In [7]:
# Phase 4: Recommendation System (Fixed)

import pandas as pd
import joblib
from sklearn.metrics.pairwise import cosine_similarity

# ==========================
# 1. Load dataset & models
# ==========================
df = pd.read_csv("fitness_and_workout_dataset_cleaned.csv")

label_encoders = joblib.load("label_encoders.pkl")
time_model = joblib.load("time_per_workout_regressor.pkl")

# ==========================
# 2. Encode full dataset
# ==========================
encoded_df = df.copy()
for col, le in label_encoders.items():
    if col in encoded_df.columns:
        encoded_df[col] = le.transform(encoded_df[col])

# ==========================
# 3. Function to encode user input
# ==========================
def encode_input(user_input):
    encoded = user_input.copy()
    for col, le in label_encoders.items():
        if col in encoded:
            encoded[col] = le.transform([encoded[col]])[0]
    return encoded

# ==========================
# 4. Recommendation function
# ==========================
def recommend_workouts(user_prefs, top_n=5):
    # Encode preferences
    encoded_prefs = encode_input(user_prefs)

    # Create user vector with same order as dataset columns (excluding target)
    feature_cols = [col for col in encoded_df.columns if col != "time_per_workout"]
    user_vector = []
    for col in feature_cols:
        if col in encoded_prefs:
            user_vector.append(encoded_prefs[col])
        else:
            user_vector.append(encoded_df[col].mean())  # fallback for missing

    # Calculate similarity (now all numeric)
    sim_scores = cosine_similarity([user_vector], encoded_df[feature_cols])[0]

    # Get top matches
    top_indices = sim_scores.argsort()[-top_n:][::-1]
    recommendations = df.iloc[top_indices].copy()  # original data for readability

    # Predict time per workout using the regression model
    rec_encoded = encoded_df.iloc[top_indices][feature_cols]
    predicted_times = time_model.predict(rec_encoded)
    recommendations["predicted_time_per_workout"] = predicted_times

    return recommendations

# ==========================
# 5. Example usage
# ==========================
user_preferences = {
    "goal_type": "weight loss",
    "equipment_used": "dumbbells",
    "fitness_level": "beginner",
    "program_length": 8,
    "total_exercises": 12
}

results = recommend_workouts(user_preferences, top_n=5)
print("\n🔹 Recommended Workouts:")
print(results)



🔹 Recommended Workouts:
                                           title  \
2358             ultimate guts full body program   
1869                          quick bodybuilding   
2336                             tyrone and liam   
2561  programma in autoregolazione del cinghiale   
2187                                  twunk body   

                                            description  \
2358  this program uses ultimate hypertrophy program...   
1869  this is a more personalized program for me, an...   
2336                           to make u big and strong   
2561                           strength and hypertrophy   
2187  to increase muscle mass and develop a lean, sy...   

                             level                                    goal  \
2358              ['intermediate']                        ['powerlifting']   
1869    ['intermediate', 'novice']  ['bodybuilding', 'muscle & sculpting']   
2336                    ['novice']                        ['bodybuilding'