In [28]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [50]:
data = pd.read_csv('../data/gym_exercise2.csv')

In [51]:
data.head()

Unnamed: 0,Age,Gender,Weight,Height,BMI,Experience_Level,Fat_Percentage,Session_Duration,Workout_Type,Water_Intake,Workout_Frequency
0,56,1,88.3,1.71,30.2,3,12.6,1.69,Yoga,3.5,4
1,46,2,74.9,1.53,32.0,2,33.9,1.3,HIIT,2.1,4
2,32,2,68.1,1.66,24.71,2,33.4,1.11,Cardio,2.3,4
3,25,1,53.2,1.7,18.41,1,28.8,0.59,Strength,2.1,3
4,38,1,46.1,1.79,14.39,1,29.2,0.64,Strength,2.8,3


In [52]:
data.isnull().sum()

Age                  0
Gender               0
Weight               0
Height               0
BMI                  0
Experience_Level     0
Fat_Percentage       0
Session_Duration     0
Workout_Type         0
Water_Intake         0
Workout_Frequency    0
dtype: int64

In [53]:
data = pd.get_dummies(data)

In [54]:
data.head()

Unnamed: 0,Age,Gender,Weight,Height,BMI,Experience_Level,Fat_Percentage,Session_Duration,Water_Intake,Workout_Frequency,Workout_Type_Cardio,Workout_Type_HIIT,Workout_Type_Strength,Workout_Type_Yoga
0,56,1,88.3,1.71,30.2,3,12.6,1.69,3.5,4,False,False,False,True
1,46,2,74.9,1.53,32.0,2,33.9,1.3,2.1,4,False,True,False,False
2,32,2,68.1,1.66,24.71,2,33.4,1.11,2.3,4,True,False,False,False
3,25,1,53.2,1.7,18.41,1,28.8,0.59,2.1,3,False,False,True,False
4,38,1,46.1,1.79,14.39,1,29.2,0.64,2.8,3,False,False,True,False


In [55]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.multioutput import MultiOutputRegressor
from sklearn.metrics import accuracy_score, mean_squared_error
from sklearn.preprocessing import LabelEncoder

In [56]:
# Convert workout types to single categorical label
data['Workout_Type'] = data[['Workout_Type_Cardio', 'Workout_Type_HIIT', 'Workout_Type_Strength', 'Workout_Type_Yoga']].idxmax(axis=1)
data['Workout_Type'] = LabelEncoder().fit_transform(data['Workout_Type'])  # Encode to numeric

In [63]:
data.head()

Unnamed: 0,Age,Gender,Weight,Height,BMI,Experience_Level,Fat_Percentage,Session_Duration,Water_Intake,Workout_Frequency,Workout_Type_Cardio,Workout_Type_HIIT,Workout_Type_Strength,Workout_Type_Yoga,Workout_Type
0,56,1,88.3,1.71,30.2,3,12.6,1.69,3.5,4,False,False,False,True,3
1,46,2,74.9,1.53,32.0,2,33.9,1.3,2.1,4,False,True,False,False,1
2,32,2,68.1,1.66,24.71,2,33.4,1.11,2.3,4,True,False,False,False,0
3,25,1,53.2,1.7,18.41,1,28.8,0.59,2.1,3,False,False,True,False,2
4,38,1,46.1,1.79,14.39,1,29.2,0.64,2.8,3,False,False,True,False,2


In [57]:
# Define input and outputs
X = data[['Age', 'Gender', 'Weight', 'Height', 'BMI', 'Experience_Level', 'Fat_Percentage']]
y_classification = data['Workout_Type']  # Single label for workout type
y_regression = data[['Session_Duration', 'Water_Intake', 'Workout_Frequency']]  # Continuous outputs


In [94]:
# Split data
X_train, X_test, y_train_class, y_test_class, y_train_reg, y_test_reg = train_test_split(
    X, y_classification, y_regression, test_size=0.2, random_state=50
)

In [95]:
# Train classification model for workout type
clf = RandomForestClassifier(random_state=42)
clf.fit(X_train, y_train_class)

In [96]:
# Train regression model for other predictions
regr = MultiOutputRegressor(RandomForestRegressor(random_state=50))
regr.fit(X_train, y_train_reg)

In [97]:
# Predict and evaluate
y_pred_class = clf.predict(X_test)
y_pred_reg = regr.predict(X_test)

In [100]:
# Classification accuracy for workout type
accuracy = accuracy_score(y_test_class, y_pred_class)
print(f'Workout Type Prediction Accuracy: {accuracy}')

# Regression metrics
mse = mean_squared_error(y_test_reg, y_pred_reg, multioutput='raw_values')
print(f'Mean Squared Error - Session Duration: {mse[0]}, Water Intake: {mse[1]}, Workout Frequency: {mse[2]}')

Workout Type Prediction Accuracy: 0.24615384615384617
Mean Squared Error - Session Duration: 0.0565940017948718, Water Intake: 0.14137682051282052, Workout Frequency: 0.28644000000000003


In [98]:
from sklearn .metrics import accuracy_score, f1_score, precision_score, recall_score

def training_scores(y_act, y_pred):
    acc = round(accuracy_score(y_act, y_pred), 3)
    pr = round(precision_score(y_act, y_pred, average='weighted'), 3)
    rec = round(recall_score(y_act, y_pred, average='weighted'), 3)
    f1 = round(f1_score(y_act, y_pred, average='weighted'), 3)
    print(f'Training Scores:\n\tAccuracy = {acc} \n\tPrecission = {pr}\n\tRecall = {rec}\n\tF1score = {f1}')
    
def validation_scores(y_act, y_pred):
    acc = round(accuracy_score(y_act, y_pred), 3)
    pr = round(precision_score(y_act, y_pred, average='weighted'), 3)
    rec = round(recall_score(y_act, y_pred, average='weighted'), 3)
    f1 = round(f1_score(y_act, y_pred, average='weighted'), 3)
    print(f'Testing Scores:\n\tAccuracy = {acc} \n\tPrecission = {pr}\n\tRecall = {rec}\n\tF1score = {f1}')

In [99]:
y_train_pred_class = clf.predict(X_train)

y_test_pred_class = clf.predict(X_test)

training_scores(y_train_class,y_train_pred_class)
validation_scores(y_test_class,y_test_pred_class)



Training Scores:
	Accuracy = 1.0 
	Precission = 1.0
	Recall = 1.0
	F1score = 1.0
Testing Scores:
	Accuracy = 0.246 
	Precission = 0.262
	Recall = 0.246
	F1score = 0.253
