In [355]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel
from sklearn.metrics.pairwise import cosine_similarity

In [356]:
# Load workout data (Excel File)
workout_data_body_only = pd.read_excel('../datasets/gym-exercises/Body Only.xlsx')
workout_data_minimal_equipment = pd.read_excel('../datasets/gym-exercises/Minimal Equipment.xlsx')
workout_data_full_equipment = pd.read_excel('../datasets/gym-exercises/Full Gym Equipment.xlsx')

workout_data_body_only.head()

Unnamed: 0,exercise_name,short_description,instructions,guide_img_url,equipment,rating,body_group,youtube_links,youtube_title
0,Triceps dip,The triceps dip is a bodyweight exercise perfo...,"To get into the starting position, hold your b...",https://artifacts.bbcomcdn.com/@bbcom/exercise...,Body Only,9.4,Upper,https://www.youtube.com/watch?v=rjdpMVtMehw&pp...,How to do a Triceps Dip on a Chair at Home
1,Elbow plank,The elbow plank is a popular isometric abdomin...,"Get into a prone position on the floor, suppor...",https://artifacts.bbcomcdn.com/@bbcom/exercise...,Body Only,9.3,Lower,https://www.youtube.com/watch?v=ZqjFqDy3i70&pp...,How to do a Plank on Elbows with Upper Body El...
2,Bottoms Up,The lying leg lift is a popular bodyweight exe...,Begin by lying on your back on the ground. You...,https://artifacts.bbcomcdn.com/@bbcom/exercise...,Body Only,9.3,Lower,https://www.youtube.com/watch?v=RwZjU8CrUo8&pp...,Abdominal Exercises - Bottoms Up
3,Pullups,The pull-up is a multi-joint bodyweight exerci...,Grab the pull-up bar with the palms facing for...,https://artifacts.bbcomcdn.com/@bbcom/exercise...,Body Only,9.2,Upper,https://www.youtube.com/watch?v=asMtayTp0J0&pp...,The Secret to Pull-Ups | How to Go From 0 to 20+
4,Jumping rope,Jumping rope is a classic conditioning exercis...,Hold an end of the rope in each hand. Position...,https://artifacts.bbcomcdn.com/@bbcom/exercise...,Body Only,9.2,Lower,https://www.youtube.com/watch?v=BUouJzDSpJQ&pp...,Criss Cross Jump Rope Tutorial


## User Input
Feature engineering & data filtering

In [357]:
# Filter based on user preference
# body_group: Upper Body, Lower Body, Full Body
# workout_level: 5, 7, 10
# exercise_option: Body Only, Minimal Equipment, Full Gym Equipment
# user_favorites: []


# Ganti input dari CC
body_group = "Lower Body"
exercise_option = "Body Only"
user_favorites = ["Linear Acceleration Wall Drill","3/4 sit-up", "Bodyweight squat", "Decline Crunch", "Russian twist"]

In [358]:
if exercise_option == "Body Only":
    workout_data = workout_data_body_only
elif exercise_option == "Minimal Equipment":
    workout_data = workout_data_minimal_equipment
elif exercise_option == "Full Gym Equipment":
    workout_data = workout_data_full_equipment

In [359]:
## Drop fitur gakepake
workout_data = workout_data.drop(columns="guide_img_url")
workout_data = workout_data.drop(columns="youtube_links")
workout_data = workout_data.drop(columns="youtube_title")

# Filter Rows based on user preferences
if body_group == "Upper Body":
    workout_data = workout_data[workout_data['body_group'] == "Upper"]
elif body_group == "Lower Body":
    workout_data = workout_data[workout_data['body_group'] == "Lower"]

# Reset index agar data aligned kembali
workout_data = workout_data.reset_index(drop=True)

workout_data.head()

Unnamed: 0,exercise_name,short_description,instructions,equipment,rating,body_group
0,Elbow plank,The elbow plank is a popular isometric abdomin...,"Get into a prone position on the floor, suppor...",Body Only,9.3,Lower
1,Bottoms Up,The lying leg lift is a popular bodyweight exe...,Begin by lying on your back on the ground. You...,Body Only,9.3,Lower
2,Jumping rope,Jumping rope is a classic conditioning exercis...,Hold an end of the rope in each hand. Position...,Body Only,9.2,Lower
3,Spider crawl,The spider crawl is a bodyweight exercise that...,Begin in a prone position on the floor. Suppor...,Body Only,9.2,Lower
4,Single Leg Push-off,"The Single Leg Push-off targets the glutes, qu...",Stand on the ground with one foot resting on t...,Body Only,9.2,Lower


## Buat cosine similarity dari text data

In [360]:
# Buat fitur untuk dicari cosine similarity
workout_data['combined_features'] = workout_data['short_description'] + ' ' + workout_data['instructions'].astype(str)

# TFIDF Vectorizer untuk convert text data ke vector fitur
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(workout_data['combined_features'])

# Hitung cosine similarity
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

# cosine_sim.shape

In [361]:
# FUngsi predict cosine similarity
def predict(user_workouts, cosine_sim=cosine_sim):

    # Seri indeks dari nama workout
    indices = pd.Series(workout_data.index, index=workout_data['exercise_name'])
    
    sim_scores = []
    for workout in user_workouts:
        # Cari workout (user's favorite) dalam dataset
        users_favorites_db = workout_data[workout_data['exercise_name'].str.contains(workout, case=False, na=False)]

        if not users_favorites_db.empty:
            for workout in users_favorites_db['exercise_name']:
                idx = indices[workout]

                # Nambah poin cosine sim workout ke sim_scores
                sim_scores.append(cosine_sim[idx])
                
        else:
            print(f"Workout '{workout}' not found in the dataset.")
    
    if not sim_scores:
        return []
    

    # Rata-rata simmilarity score: gabungkan score dari seluruh latihan
    sim_scores = sum(sim_scores) / len(sim_scores)

    # Urutkan workout berdasarkan simmilarity score
    workout_indices = sim_scores.argsort()[::-1]

    # Filter rekomendasi: Pastikan rekomendasi tidak ada di list favorite user sebelumnya
    similarity_scores_table = workout_data.iloc[workout_indices]['exercise_name'].tolist()
    filtered_recommendations = [workout for workout in similarity_scores_table if workout not in user_workouts]

    # Ambil nama dan skor dari workout yang direkomendasikan
    similarity_scores_table = workout_data.iloc[workout_indices].copy()
    
    # Drop kolom gakepake
    columns_to_drop = ["short_description", "instructions", "equipment", "rating", "body_group", "combined_features"]
    similarity_scores_table = similarity_scores_table.drop(columns=columns_to_drop, errors='ignore')  
    similarity_scores_table['similarity'] = sim_scores[workout_indices]

    # Sort berdasarkan final_score
    similarity_scores_table = similarity_scores_table.sort_values(by='similarity', ascending=False)

    return similarity_scores_table


In [362]:
recommended_workouts = predict(user_favorites)
output = recommended_workouts[:10]
output

Unnamed: 0,exercise_name,similarity
18,3/4 sit-up,0.320095
40,Russian twist,0.301162
21,Bodyweight squat,0.284856
13,Decline Crunch,0.264478
30,Crunches,0.253884
53,Linear Acceleration Wall Drill,0.247393
37,Decline oblique crunch,0.205424
11,Elbow-to-knee crunch,0.188666
42,Flat Bench Lying Leg Raise,0.185886
41,Seated Flat Bench Leg Pull-In,0.175793


## Buat CC

In [363]:
# Ubah ke list of exercise_name
output = output['exercise_name'].tolist()

print(output)

['3/4 sit-up', 'Russian twist', 'Bodyweight squat', 'Decline Crunch', 'Crunches', 'Linear Acceleration Wall Drill', 'Decline oblique crunch', 'Elbow-to-knee crunch', 'Flat Bench Lying Leg Raise', 'Seated Flat Bench Leg Pull-In']


In [364]:
# All Similarity Score
recommended_workouts

Unnamed: 0,exercise_name,similarity
18,3/4 sit-up,0.320095
40,Russian twist,0.301162
21,Bodyweight squat,0.284856
13,Decline Crunch,0.264478
30,Crunches,0.253884
53,Linear Acceleration Wall Drill,0.247393
37,Decline oblique crunch,0.205424
11,Elbow-to-knee crunch,0.188666
42,Flat Bench Lying Leg Raise,0.185886
41,Seated Flat Bench Leg Pull-In,0.175793
