In [55]:
# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor

# Load your dataset
df = pd.read_csv('fitinsights_data.csv')

# Exploratory Data Analysis
print(df.describe())



               Age    Weight_kg     Height_m      Max_BPM      Avg_BPM  \
count  1629.000000  1629.000000  1629.000000  1629.000000  1629.000000   
mean     33.320442    73.868938     1.713579   186.698588   136.101903   
std      11.677977    14.423329     0.097744    12.845304    16.932825   
min      18.000000    45.000000     1.500000   160.000000   100.000000   
25%      24.000000    62.900000     1.640000   178.000000   124.000000   
50%      32.000000    72.900000     1.710000   188.000000   136.000000   
75%      41.000000    83.500000     1.780000   196.000000   149.000000   
max      66.000000   113.800000     1.990000   210.000000   170.000000   

       Resting_BPM  Session_Duration_hours  Calories_Burned  Fat_Percentage  \
count  1629.000000             1629.000000      1629.000000     1629.000000   
mean     71.451811                0.720350       408.900552       24.471209   
std       6.055916                0.159805       143.165151        7.340761   
min      54.00000

In [56]:
# Convert to minutes
df['Session_Duration_Minutes'] = df['Session_Duration_hours'] * 60
df.head()

Unnamed: 0,Age,Gender,Weight_kg,Height_m,Max_BPM,Avg_BPM,Resting_BPM,Session_Duration_hours,Calories_Burned,Workout_Type,...,Water_Intake_liters,Workout_Frequency_days_per_week,Experience_Level,BMI,Workout_Intensity,Calories_per_Min,HRR,BMI_Category,Hydration_per_Kg,Session_Duration_Minutes
0,21,Male,88.4,1.81,209.0,147.0,74.0,0.68,396.0,Strength Training,...,2.7,4,Intermediate,27.1,0.703,9.706,135.0,Overweight,0.031,40.8
1,29,Male,68.9,1.78,194.0,137.0,67.0,0.99,464.0,Boxing,...,2.2,5,Advanced,21.8,0.706,7.811,127.0,Normal,0.032,59.4
2,32,Male,61.0,1.76,196.0,128.0,71.0,0.66,211.0,Pilates,...,2.0,4,Beginner,19.7,0.653,5.328,125.0,Normal,0.033,39.6
3,22,Male,83.8,1.67,192.0,138.0,62.0,0.82,529.0,Tennis,...,2.3,4,Advanced,30.2,0.719,10.752,130.0,Obese,0.027,49.2
4,18,Male,104.2,1.88,192.0,136.0,72.0,0.76,776.0,Swimming,...,2.9,4,Beginner,29.6,0.708,17.018,120.0,Overweight,0.028,45.6


In [57]:

# Feature importance analysis
X = df[['Age', 'Gender', 'BMI', 'Weight_kg', 'Experience_Level', 'Calories_Burned']]
y_workout = df['Workout_Type']
y_duration = df['Session_Duration_Minutes']

# Define features and targets
features = ['Age', 'Gender', 'BMI', 'Weight', 'Experience_Level', 'Calories_Burned']
target_workout = 'Workout_Type'
target_duration = 'Session_Duration_Minutes'

# Data PreProcessing Pipeline 

In [59]:
# Identify categorical and numerical features
categorical_features = ['Gender', 'Experience_Level', 'BMI_Category']
numerical_features = ['Age', 'Weight_kg', 'BMI', 'Calories_Burned']

# Create preprocessor
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numerical_features),
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features)
    ])

# Split data into training and testing sets
X = df[numerical_features + categorical_features]
y_workout = df[target_workout]
y_duration = df[target_duration]

X_train, X_test, y_workout_train, y_workout_test = train_test_split(
    X, y_workout, test_size=0.2, random_state=42)

# We'll use the same split for duration prediction
_, _, y_duration_train, y_duration_test = train_test_split(
    X, y_duration, test_size=0.2, random_state=42)


# Build and Train Models

In [61]:
# Workout Type Prediction (Classification)
workout_classifier = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', RandomForestClassifier(n_estimators=100, random_state=42))
])

# Session Duration Prediction (Regression)
duration_regressor = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('regressor', RandomForestRegressor(n_estimators=100, random_state=42))
])

# Train the models
workout_classifier.fit(X_train, y_workout_train)
duration_regressor.fit(X_train, y_duration_train)

# Make predictions
y_workout_pred = workout_classifier.predict(X_test)
y_duration_pred = duration_regressor.predict(X_test)


# Evaluate Models

In [63]:
from sklearn.metrics import classification_report, mean_absolute_error, r2_score

# Evaluate workout type model
print("Workout Type Classification Report:")
print(classification_report(y_workout_test, y_workout_pred))

# Evaluate duration model
print(f"Session Duration Mean Absolute Error: {mean_absolute_error(y_duration_test, y_duration_pred):.2f} minutes")
print(f"Session Duration R² Score: {r2_score(y_duration_test, y_duration_pred):.2f}")

# Feature importance analysis
workout_importances = workout_classifier.named_steps['classifier'].feature_importances_
duration_importances = duration_regressor.named_steps['regressor'].feature_importances_

# Get feature names after one-hot encoding
preprocessor = workout_classifier.named_steps['preprocessor']
feature_names = (
    numerical_features +
    list(preprocessor.named_transformers_['cat'].get_feature_names_out(categorical_features))
)

# Display feature importances
importance_df = pd.DataFrame({
    'Feature': feature_names,
    'Workout_Type_Importance': workout_importances,
    'Duration_Importance': duration_importances
})
print(importance_df.sort_values('Workout_Type_Importance', ascending=False))


Workout Type Classification Report:
                   precision    recall  f1-score   support

         Aerobics       0.07      0.08      0.07        24
           Boxing       0.15      0.21      0.17        19
         CrossFit       0.04      0.07      0.05        14
          Cycling       0.11      0.14      0.12        14
             HIIT       0.26      0.16      0.20        31
          Pilates       0.30      0.24      0.27        25
           Rowing       0.00      0.00      0.00        18
          Running       0.27      0.26      0.26        58
           Soccer       0.12      0.09      0.10        23
Strength Training       0.04      0.05      0.04        20
         Swimming       0.00      0.00      0.00        12
           Tennis       0.11      0.09      0.10        23
          Walking       0.20      0.08      0.12        24
             Yoga       0.27      0.48      0.34        21

         accuracy                           0.16       326
        macro avg 

# Fine Tune Models

In [65]:
from sklearn.model_selection import GridSearchCV

# Example for workout type model
param_grid = {
    'classifier__n_estimators': [50, 100, 200],
    'classifier__max_depth': [None, 10, 20, 30]
}

grid_search = GridSearchCV(
    workout_classifier, param_grid, cv=5, scoring='f1_weighted'
)
grid_search.fit(X_train, y_workout_train)

print(f"Best parameters: {grid_search.best_params_}")
best_workout_classifier = grid_search.best_estimator_


Best parameters: {'classifier__max_depth': 10, 'classifier__n_estimators': 200}


#   Create the Recommendation Function

In [67]:
def recommend_workout(age, gender, weight, bmi, experience_level, target_calories):
    """
    Predict workout type and session duration based on user inputs
    """
    # Create a dataframe for the single user
    user_data = pd.DataFrame({
        'Age': [age],
        'Gender': [gender],
        'Weight_kg': [weight],
        'BMI': [bmi],
        'BMI_Category': [get_bmi_category(bmi)],  # Function to convert BMI to category
        'Experience_Level': [experience_level],
        'Calories_Burned': [target_calories],
    })
    
    # Predict workout type and duration
    workout_type = best_workout_classifier.predict(user_data)[0]
    session_duration = duration_regressor.predict(user_data)[0]
    
    return {
        'recommended_workout': workout_type,
        'recommended_duration': round(session_duration),
        'expected_calorie_burn': target_calories
    }

def get_bmi_category(bmi):
    if bmi < 18.5:
        return 'Underweight'
    elif bmi < 25:
        return 'Normal'
    elif bmi < 30:
        return 'Overweight'
    else:
        return 'Obese'


# Deploy And Integration

In [97]:
# Example usage
# recommendation = recommend_workout(
#     age=23,
#     gender='Male',
#     weight=58,
#     bmi=getBmi(),
#     experience_level='Beginner',
#     target_calories=300,
    
# )

# 1 feet =30.48cm

def get_bmi(weight, height_cm):
    height_m = height_cm / 100  # Convert cm to meters
    return weight / (height_m ** 2)


# --- Take user inputs ---
age = int(input("Enter your age: "))
gender = input("Enter your gender (Male/Female): ")
weight = float(input("Enter your weight in kg: "))
height = float(input("Enter your height in cm: "))
experience_level = input("Enter your experience level (Beginner/Intermediate/Advanced): ")
target_calories = float(input("Enter your target calories to burn: "))

# --- Calculate BMI ---
bmi = get_bmi(weight, height)

# --- Get workout recommendation ---
recommendation = recommend_workout(
    age=age,
    gender=gender,
    weight=weight,
    bmi=bmi,
    experience_level=experience_level,
    target_calories=target_calories
)

# --- Print results ---
print("\n--- Workout Recommendation ---")
print(f"Recommended workout: {recommendation['recommended_workout']}")
print(f"Session duration: {recommendation['recommended_duration']} Minutes")
print(f"Expected calorie burn: {recommendation['expected_calorie_burn']} calories")

Enter your age:  21
Enter your gender (Male/Female):  Female
Enter your weight in kg:  50
Enter your height in cm:  160
Enter your experience level (Beginner/Intermediate/Advanced):  Beginner
Enter your target calories to burn:  159



--- Workout Recommendation ---
Recommended workout: Yoga
Session duration: 34 Minutes
Expected calorie burn: 159.0 calories
