# Importing libraries

In [106]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import mean_absolute_error, mean_squared_error, root_mean_squared_error
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
import joblib

import warnings
warnings.filterwarnings('ignore')

# Load datasets

In [107]:
calories = pd.read_csv("calories.csv")
exercise = pd.read_csv("exercise.csv")

# Merge datasets

In [108]:
exercise_df = exercise.merge(calories, on="User_ID")
print("Dataset shape:", exercise_df.shape)

Dataset shape: (15000, 9)


# Add BMI column

In [109]:
exercise_df["BMI"] = round(exercise_df["Weight"] / ((exercise_df["Height"] / 100) ** 2), 2)

# Drop unnecessary columns

In [110]:
exercise_df.drop(columns=["User_ID", "Height", "Weight"], inplace=True)

# Check for null values

In [111]:
print("Null values in dataset:\n", exercise_df.isnull().sum())

Null values in dataset:
 Gender        0
Age           0
Duration      0
Heart_Rate    0
Body_Temp     0
Calories      0
BMI           0
dtype: int64


# Drop duplicates (if any)

In [112]:
exercise_df.drop_duplicates(inplace=True)
print("Shape after dropping duplicates:", exercise_df.shape)


Shape after dropping duplicates: (14999, 7)


# Select features matching the app

In [113]:
features = ["Gender", "Age", "BMI", "Duration", "Heart_Rate", "Body_Temp", "Calories"]
exercise_df = exercise_df[features]

# Convert Gender to numerical (1 for Male, 0 for Female)

In [114]:
exercise_df = pd.get_dummies(exercise_df, columns=["Gender"], drop_first=True)
exercise_df.rename(columns={"Gender_male": "Gender_male"}, inplace=True)  # Ensure consistent naming

# Split features and target

In [115]:
X = exercise_df.drop("Calories", axis=1)
y = exercise_df["Calories"]

# Split into training and test sets

In [116]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)
print("Training data shape:", X_train.shape)
print("Test data shape:", X_test.shape)

Training data shape: (11999, 6)
Test data shape: (3000, 6)


# Standardize features

In [117]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train RandomForestRegressor

# Define parameter grid for Random Forest

In [118]:
param_grid = {
    'n_estimators': [100, 500, 1000],
    'max_depth': [3, 6, 9],
    'max_features': [2, 3, 4]
}

# Train RandomForestRegressor with GridSearchCV

In [119]:
model = RandomForestRegressor(random_state=1)
grid_search = GridSearchCV(model, param_grid, cv=5, scoring='neg_root_mean_squared_error', n_jobs=-1)
grid_search.fit(X_train_scaled, y_train)

In [120]:
best_model = grid_search.best_estimator_
print("Best Random Forest Parameters:", grid_search.best_params_)

Best Random Forest Parameters: {'max_depth': 9, 'max_features': 4, 'n_estimators': 1000}


# Evaluate model

In [121]:
y_pred = best_model.predict(X_test_scaled)
mae = round(mean_absolute_error(y_test, y_pred), 2)
mse = round(mean_squared_error(y_test, y_pred), 2)
rmse = round(root_mean_squared_error(y_test, y_pred), 2)

In [122]:
print("Tuned Random Forest Metrics:")
print(f"MAE: {mae}")
print(f"MSE: {mse}")
print(f"RMSE: {rmse}")

Tuned Random Forest Metrics:
MAE: 3.03
MSE: 21.54
RMSE: 4.64


# Save the model

In [123]:
model_filename = "trained_model.pkl"
joblib.dump(best_model, model_filename)  # Save the tuned model
print(f"Model saved as {model_filename}")

Model saved as trained_model.pkl


# Save the scaler (optional, if needed for app)

In [124]:
scaler_filename = "scaler.pk1"
joblib.dump(scaler, scaler_filename)
print(f"Scaler saved as {scaler_filename}")

Scaler saved as scaler.pk1


# Test prediction with sample input similar to app

In [125]:
sample_input = pd.DataFrame({
    "Age": [30],
    "BMI": [20],
    "Duration": [30],
    "Heart_Rate": [90],
    "Body_Temp": [37.5],
    "Gender_male": [1]
})
sample_input_scaled = scaler.transform(sample_input)
prediction = best_model.predict(sample_input_scaled)  
print("Sample prediction (calories burned):", round(prediction[0], 2))

Sample prediction (calories burned): 119.48
