<a href="https://colab.research.google.com/github/BRV12G/Final_year_Project/blob/main/fitness%20on%20random-2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.multioutput import MultiOutputClassifier, MultiOutputRegressor
from sklearn.metrics import accuracy_score, classification_report, mean_squared_error
from sklearn.preprocessing import LabelEncoder, StandardScaler

# Load the dataset
data_path = '/content/updated_cleaned_diet_split_exercises.csv'  # Update with the correct file path
data = pd.read_csv(data_path)

# Display basic information about the dataset
print("Dataset loaded successfully!")
print("Shape of the dataset:", data.shape)
print("Columns:", data.columns)
print("Preview of the dataset:")
print(data.head())

# Define input features and output labels
input_columns = ['Gender', 'Age', 'Height', 'Weight', 'Hypertension', 'Diabetes', 'BMI', 'BMI_Class',
                 'Duration_of_Workout', 'Fitness_Goal', 'Fitness_Type']
classification_columns = ['Exercise1', 'Exercise2', 'Exercise3', 'Equipment1', 'Equipment2',
                          'd_vegetables', 'd_juice', 'd_proteinintake']
regression_columns = ['Calories_Burnt', 'Water_Intake(Litres)']

X = data[input_columns]
y_class = data[classification_columns]
y_reg = data[regression_columns]

# Encode categorical variables
le = LabelEncoder()
for column in X.select_dtypes(include=['object']).columns:
    X[column] = le.fit_transform(X[column])

# Encode classification target columns separately
for column in y_class.select_dtypes(include=['object']).columns:
    y_class[column] = le.fit_transform(y_class[column])

# Standardizing numerical features
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Splitting the dataset into training and testing sets
X_train, X_test, y_class_train, y_class_test = train_test_split(X, y_class, test_size=0.2, random_state=42)
_, _, y_reg_train, y_reg_test = train_test_split(X, y_reg, test_size=0.2, random_state=42)

# MultiOutputClassifier for categorical targets
model_class = MultiOutputClassifier(RandomForestClassifier(
    n_estimators=100,
    criterion='gini',
    max_depth=None,
    min_samples_split=2,
    min_samples_leaf=1,
    max_features='sqrt',
    random_state=42
))

# Train the classification model
print("Training the Random Forest model for classification...")
model_class.fit(X_train, y_class_train)

# Predictions for classification
print("Predicting on test data (classification)...")
y_class_pred = model_class.predict(X_test)

# Evaluate the classification model
for i, column in enumerate(classification_columns):
    print(f"\nEvaluating predictions for {column} (classification):")
    accuracy = accuracy_score(y_class_test.iloc[:, i], y_class_pred[:, i])
    print("Accuracy:", accuracy)
    print("Classification Report:")
    print(classification_report(y_class_test.iloc[:, i], y_class_pred[:, i]))

# Calculate and print mean accuracy for the classification model
mean_accuracy = np.mean([
    accuracy_score(y_class_test.iloc[:, i], y_class_pred[:, i])
    for i in range(len(classification_columns))
])
print("\nMean Accuracy for the classification model:", mean_accuracy)

# MultiOutputRegressor for regression targets
model_reg = MultiOutputRegressor(RandomForestRegressor(
    n_estimators=100,
    max_depth=None,
    min_samples_split=2,
    min_samples_leaf=1,
    random_state=42
))

# Train the regression model
print("\nTraining the Random Forest model for regression...")
model_reg.fit(X_train, y_reg_train)

# Predictions for regression
print("Predicting on test data (regression)...")
y_reg_pred = model_reg.predict(X_test)

# Evaluate the regression model
for i, column in enumerate(regression_columns):
    print(f"\nEvaluating predictions for {column} (regression):")
    mse = mean_squared_error(y_reg_test.iloc[:, i], y_reg_pred[:, i])
    print("Mean Squared Error:", mse)

# Save the models
import joblib
joblib.dump(model_class, 'fitness_model_class_rf.pkl')
joblib.dump(model_reg, 'fitness_model_reg_rf.pkl')
print("\nClassification model saved as 'fitness_model_class_rf.pkl'")
print("Regression model saved as 'fitness_model_reg_rf.pkl'")


Dataset loaded successfully!
Shape of the dataset: (14589, 22)
Columns: Index(['ID', 'Gender', 'Age', 'Height', 'Weight', 'Hypertension', 'Diabetes',
       'BMI', 'BMI_Class', 'Duration_of_Workout', 'Fitness_Goal',
       'Fitness_Type', 'Exercise1', 'Exercise2', 'Exercise3', 'Calories_Burnt',
       'Water_Intake(Litres)', 'Equipment1', 'Equipment2', 'd_vegetables',
       'd_juice', 'd_proteinintake'],
      dtype='object')
Preview of the dataset:
   ID Gender  Age  Height  Weight Hypertension Diabetes    BMI    BMI_Class  \
0   1   Male   18     168    47.5           No       No  16.83  Underweight   
1   2   Male   18     168    47.5          Yes       No  16.83  Underweight   
2   3   Male   18     168    47.5           No      Yes  16.83  Underweight   
3   4   Male   18     168    47.5          Yes      Yes  16.83  Underweight   
4   5   Male   18     168    47.5           No       No  16.83  Underweight   

   Duration_of_Workout  ... Exercise1   Exercise2       Exercise3  \
0

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X[column] = le.fit_transform(X[column])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X[column] = le.fit_transform(X[column])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X[column] = le.fit_transform(X[column])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row

Predicting on test data (classification)...

Evaluating predictions for Exercise1 (classification):
Accuracy: 0.15969842357779301
Classification Report:
              precision    recall  f1-score   support

           0       0.17      0.20      0.18       500
           1       0.17      0.17      0.17       510
           2       0.18      0.16      0.17       479
           3       0.16      0.14      0.15       493
           4       0.13      0.14      0.13       463
           5       0.15      0.14      0.15       473

    accuracy                           0.16      2918
   macro avg       0.16      0.16      0.16      2918
weighted avg       0.16      0.16      0.16      2918


Evaluating predictions for Exercise2 (classification):
Accuracy: 0.1562714187799863
Classification Report:
              precision    recall  f1-score   support

           0       0.13      0.15      0.14       463
           1       0.16      0.19      0.18       500
           2       0.16      0.14

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Predicting on test data (regression)...

Evaluating predictions for Calories_Burnt (regression):
Mean Squared Error: 18085.99633958952

Evaluating predictions for Water_Intake(Litres) (regression):
Mean Squared Error: 0.8169018636813646

Classification model saved as 'fitness_model_class_rf.pkl'
Regression model saved as 'fitness_model_reg_rf.pkl'
