In [299]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import classification_report

In [300]:
file_path = r'D:\FREELANCE_PROJECTS\Fitness-Lifestyle-Prediction\data\exercise_dataset.csv'
data = pd.read_csv(file_path)

In [301]:
data.head(), data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3864 entries, 0 to 3863
Data columns (total 12 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   ID                  3864 non-null   int64  
 1   Exercise            3864 non-null   object 
 2   Calories Burn       3864 non-null   float64
 3   Dream Weight        3864 non-null   float64
 4   Actual Weight       3864 non-null   float64
 5   Age                 3864 non-null   int64  
 6   Gender              3864 non-null   object 
 7   Duration            3864 non-null   int64  
 8   Heart Rate          3864 non-null   int64  
 9   BMI                 3864 non-null   float64
 10  Weather Conditions  3864 non-null   object 
 11  Exercise Intensity  3864 non-null   int64  
dtypes: float64(4), int64(5), object(3)
memory usage: 362.4+ KB


(   ID     Exercise  Calories Burn  Dream Weight  Actual Weight  Age  Gender  \
 0   1   Exercise 2     286.959851     91.892531      96.301115   45    Male   
 1   2   Exercise 7     343.453036     64.165097      61.104668   25    Male   
 2   3   Exercise 4     261.223465     70.846224      71.766724   20    Male   
 3   4   Exercise 5     127.183858     79.477008      82.984456   33    Male   
 4   5  Exercise 10     416.318374     89.960226      85.643174   29  Female   
 
    Duration  Heart Rate        BMI Weather Conditions  Exercise Intensity  
 0        37         170  29.426275              Rainy                   5  
 1        43         142  21.286346              Rainy                   5  
 2        20         148  27.899592             Cloudy                   4  
 3        39         170  33.729552              Sunny                  10  
 4        34         118  23.286113             Cloudy                   3  ,
 None)

In [302]:
def classify_fitness(row):
    if 18.5 <= row['BMI'] <= 24.9 and row['Calories Burn'] > 300 and 60 <= row['Heart Rate'] <= 160:
        return "Good Fitness"
    elif 25 <= row['BMI'] <= 29.9 or (200 <= row['Calories Burn'] <= 300) or (161 <= row['Heart Rate'] <= 180):
        return "Average"
    else:
        return "Bad"

data['Fitness Level'] = data.apply(classify_fitness, axis=1)

data['Fitness Level'].value_counts()

Fitness Level
Average         2340
Bad              963
Good Fitness     561
Name: count, dtype: int64

In [303]:
X

Unnamed: 0,Calories Burn,Dream Weight,Actual Weight,Age,Gender,Duration,Heart Rate,BMI,Weather Conditions
0,286.959851,91.892531,96.301115,45,1,37,170,29.426275,0
1,343.453036,64.165097,61.104668,25,1,43,142,21.286346,0
2,261.223465,70.846224,71.766724,20,1,20,148,27.899592,1
3,127.183858,79.477008,82.984456,33,1,39,170,33.729552,2
4,416.318374,89.960226,85.643174,29,0,34,118,23.286113,1
...,...,...,...,...,...,...,...,...,...
3859,154.113144,98.147291,96.745133,20,0,22,139,32.447764,1
3860,486.392768,97.598957,92.700573,21,0,49,160,26.602475,0
3861,264.307731,94.946612,96.778936,57,1,56,167,31.435348,0
3862,185.951870,64.743906,68.662889,58,0,60,128,19.774614,0


In [304]:
categorical_cols = X.select_dtypes(include='object').columns
for i in categorical_cols:
    print(i)
    print(data[i].unique())

In [305]:
gender_map = {"Male": 1, "Female":0}
weather_map = {"Rainy": 0,"Cloudy":1,  "Sunny": 2}

data["Gender"] = data["Gender"].map(gender_map)
data["Weather Conditions"] = data["Weather Conditions"].map(weather_map)

In [306]:
data["Fitness Level"].value_counts()

Fitness Level
Average         2340
Bad              963
Good Fitness     561
Name: count, dtype: int64

In [307]:
data.head()

Unnamed: 0,ID,Exercise,Calories Burn,Dream Weight,Actual Weight,Age,Gender,Duration,Heart Rate,BMI,Weather Conditions,Exercise Intensity,Fitness Level
0,1,Exercise 2,286.959851,91.892531,96.301115,45,1,37,170,29.426275,0,5,Average
1,2,Exercise 7,343.453036,64.165097,61.104668,25,1,43,142,21.286346,0,5,Good Fitness
2,3,Exercise 4,261.223465,70.846224,71.766724,20,1,20,148,27.899592,1,4,Average
3,4,Exercise 5,127.183858,79.477008,82.984456,33,1,39,170,33.729552,2,10,Average
4,5,Exercise 10,416.318374,89.960226,85.643174,29,0,34,118,23.286113,1,3,Good Fitness


In [308]:
label_encoder = LabelEncoder()
data["Fitness Level"] = label_encoder.fit_transform(data["Fitness Level"])

In [309]:
data["Fitness Level"].value_counts()

Fitness Level
0    2340
1     963
2     561
Name: count, dtype: int64

In [310]:
data.head()

Unnamed: 0,ID,Exercise,Calories Burn,Dream Weight,Actual Weight,Age,Gender,Duration,Heart Rate,BMI,Weather Conditions,Exercise Intensity,Fitness Level
0,1,Exercise 2,286.959851,91.892531,96.301115,45,1,37,170,29.426275,0,5,0
1,2,Exercise 7,343.453036,64.165097,61.104668,25,1,43,142,21.286346,0,5,2
2,3,Exercise 4,261.223465,70.846224,71.766724,20,1,20,148,27.899592,1,4,0
3,4,Exercise 5,127.183858,79.477008,82.984456,33,1,39,170,33.729552,2,10,0
4,5,Exercise 10,416.318374,89.960226,85.643174,29,0,34,118,23.286113,1,3,2


In [311]:
X = data.drop(columns=['ID', 'Fitness Level', 'Exercise', 'Exercise Intensity'])
y = data['Fitness Level']

In [312]:
# scaler = StandardScaler()
# X[X.select_dtypes(include=['float64', 'int64']).columns] = scaler.fit_transform(
#     X.select_dtypes(include=['float64', 'int64'])
# )

In [313]:
# label_encoder = LabelEncoder()
# y_encoded = label_encoder.fit_transform(y)

In [314]:
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

In [315]:
rf_model = RandomForestClassifier(random_state=42)
rf_model.fit(X_train, y_train)

rf_y_pred = rf_model.predict(X_test)
report = classification_report(y_test, rf_y_pred)
print(report)
print(accuracy_score(y_test, rf_y_pred))

              precision    recall  f1-score   support

           0       0.99      1.00      1.00       464
           1       1.00      0.98      0.99       183
           2       0.99      1.00      1.00       126

    accuracy                           0.99       773
   macro avg       1.00      0.99      0.99       773
weighted avg       0.99      0.99      0.99       773

0.9948253557567918


In [316]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

lr = LogisticRegression()

lr.fit(X_train, y_train)
lr_y_pred = lr.predict(X_test)
report_lr = classification_report(y_test, lr_y_pred)
print(accuracy_score(y_test, lr_y_pred))

0.720569210866753


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [323]:
data.iloc[416]

ID                           417
Exercise              Exercise 9
Calories Burn         393.686028
Dream Weight           67.868765
Actual Weight          68.525386
Age                           38
Gender                         0
Duration                      23
Heart Rate                   111
BMI                    26.527797
Weather Conditions             2
Exercise Intensity             2
Fitness Level                  0
Name: 416, dtype: object

In [324]:
X_train

Unnamed: 0,Calories Burn,Dream Weight,Actual Weight,Age,Gender,Duration,Heart Rate,BMI,Weather Conditions
691,433.922575,63.515225,61.315538,34,0,36,135,34.987444,2
2177,122.826173,78.901584,79.872360,46,0,52,128,19.541354,0
416,393.686028,67.868765,68.525386,38,0,23,111,26.527797,2
1258,155.171376,68.500800,69.372802,23,1,47,118,33.699077,1
217,279.552529,88.233543,87.639509,59,0,35,126,34.002920,2
...,...,...,...,...,...,...,...,...,...
1130,220.192878,69.305525,67.086366,40,0,29,114,25.123054,2
1294,137.583475,50.955283,55.908696,60,1,29,112,28.788644,2
860,217.148170,94.555330,94.947673,24,1,42,162,26.053056,2
3507,340.077644,72.061134,73.841707,29,1,39,132,28.979937,1


In [325]:
import numpy as np
test_data = np.array([393.686028,	67.868765,	68.525386,	38,	0,	23,	111,	26.527797,	2]).reshape(1, -1)
test_pred = rf_model.predict(test_data)
print(test_pred)

[0]




In [320]:
import joblib
joblib.dump(rf_model, "../fitness_model.joblib")

['../fitness_model.joblib']

In [321]:
print(data["Heart Rate"].min()),
print(data["Heart Rate"].max())

100
180
