In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from math import sqrt

In [3]:
# Load both files
df_exercise = pd.read_csv('data/exercise.csv')
df_calories = pd.read_csv('data/calories.csv')

# Merge by index
df = pd.concat([df_exercise, df_calories['Calories']], axis=1)

# Encode Gender (Male = 1, Female = 0)
df['Gender'] = df['Gender'].map({'male': 1, 'female': 0})

df.head()


Unnamed: 0,User_ID,Gender,Age,Height,Weight,Duration,Heart_Rate,Body_Temp,Calories
0,14733363,1,68,190,94,29,105,40.8,231.0
1,14861698,0,20,166,60,14,94,40.3,66.0
2,11179863,1,69,179,79,5,88,38.7,26.0
3,16180408,0,34,179,71,13,100,40.5,71.0
4,17771927,0,27,154,58,10,81,39.8,35.0


# =====================
# **Phase 1:** Predict Heart Rate (Model A)
# =====================

In [4]:
# Select features for Heart Rate prediction
features_hr = ['Gender', 'Age', 'Height', 'Weight', 'Duration']
X_hr = df[features_hr]
y_hr = df['Heart_Rate']


In [5]:
# Train-test split
X_train_hr, X_test_hr, y_train_hr, y_test_hr = train_test_split(X_hr, y_hr, test_size=0.2, random_state=42)

# Train Model
model_hr = LinearRegression()
model_hr.fit(X_train_hr, y_train_hr)

# Predict
y_pred_hr = model_hr.predict(X_test_hr)


In [6]:
# Evaluate Heart Rate Model
print("Heart Rate Prediction - Linear Regression")
print("R² Score:", r2_score(y_test_hr, y_pred_hr))
print("MAE:", mean_absolute_error(y_test_hr, y_pred_hr))
print("RMSE:", np.sqrt(mean_squared_error(y_test_hr, y_pred_hr)))



Heart Rate Prediction - Linear Regression
R² Score: 0.7357173702544111
MAE: 4.022083659000314
RMSE: 5.012790258036366


# =====================
# **Phase 2:** Predict Body Temperature (Model B)
# =====================

In [7]:
# Define inputs and output for Model B
X_bt = df[["Gender", "Age", "Height", "Weight", "Duration"]]
y_bt = df["Body_Temp"]


In [8]:
# Train/test split
X_train_bt, X_test_bt, y_train_bt, y_test_bt = train_test_split(X_bt, y_bt, test_size=0.2, random_state=42)

# Train Linear Regression
model_bt = LinearRegression()
model_bt.fit(X_train_bt, y_train_bt)

# Predict and evaluate
y_pred_bt = model_bt.predict(X_test_bt)
rmse_bt = sqrt(mean_squared_error(y_test_bt, y_pred_bt))
r2_bt = r2_score(y_test_bt, y_pred_bt)

In [9]:
# Evaluate Model B
print("Body Temperature Prediction - Linear Regression")
print(f"R²: {r2_bt:.2f}")
print(f"RMSE: {rmse_bt:.2f}")


Body Temperature Prediction - Linear Regression
R²: 0.81
RMSE: 0.34


# =====================
# **Phase 3:** Predict Calories Burned using outputs from previous models (Model C)
# =====================

In [10]:
# Prepare input features
# Predict Heart Rate and Body Temp for the full dataset using trained models from Phase 1 & 2
df['Predicted_Heart_Rate'] = model_hr.predict(df[['Gender', 'Age', 'Height', 'Weight', 'Duration']])
df['Predicted_Body_Temp'] = model_bt.predict(df[['Gender', 'Age', 'Height', 'Weight', 'Duration']])

# Round predictions to 1 decimal place
df['Predicted_Heart_Rate'] = df['Predicted_Heart_Rate'].round(1)
df['Predicted_Body_Temp'] = df['Predicted_Body_Temp'].round(1)

df.head()

Unnamed: 0,User_ID,Gender,Age,Height,Weight,Duration,Heart_Rate,Body_Temp,Calories,Predicted_Heart_Rate,Predicted_Body_Temp
0,14733363,1,68,190,94,29,105,40.8,231.0,108.8,41.2
1,14861698,0,20,166,60,14,94,40.3,66.0,93.9,39.9
2,11179863,1,69,179,79,5,88,38.7,26.0,85.3,39.1
3,16180408,0,34,179,71,13,100,40.5,71.0,92.8,39.8
4,17771927,0,27,154,58,10,81,39.8,35.0,90.1,39.5


In [11]:
# Define X and y for calories model
X_cal = df[['Gender', 'Age', 'Height', 'Weight', 'Duration', 'Predicted_Heart_Rate', 'Predicted_Body_Temp']]
y_cal = df['Calories']

# Train-test split
X_train_cal, X_test_cal, y_train_cal, y_test_cal = train_test_split(X_cal, y_cal, test_size=0.2, random_state=42)


In [12]:
# Train the model
model_cal = LinearRegression()
model_cal.fit(X_train_cal, y_train_cal)

# Predict and evaluate
y_pred_cal = model_cal.predict(X_test_cal)
rmse_bt = sqrt(mean_squared_error(y_test_bt, y_pred_bt))
rmse_cal = np.sqrt(mean_squared_error(y_test_cal, y_pred_cal))
r2_cal = r2_score(y_test_cal, y_pred_cal)

# Show performance
print("Calories Prediction - Linear Regression")
print(f"RMSE: {rmse_cal:.2f}")
print(f"R²: {r2_cal:.2f}")


Calories Prediction - Linear Regression
RMSE: 16.29
R²: 0.93


# =====================
# Save Models
# =====================


In [None]:
import pickle

# Save the heart rate model
with open('models/model_hr.pkl', 'wb') as f:
    pickle.dump(model_hr, f)

# Save the body temperature model
with open('models/model_bt.pkl', 'wb') as f:
    pickle.dump(model_bt, f)

# Save the calories burned model
with open('models/model_cal.pkl', 'wb') as f:
    pickle.dump(model_cal, f)
