In [1]:
import pandas as pd
import joblib
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor

# Load datasets
calories = pd.read_csv("calories.csv")
exercise = pd.read_csv("exercise.csv")

# Merge datasets on User_ID
df = exercise.merge(calories, on="User_ID")

# Drop User_ID as it's not needed
df.drop(columns=["User_ID"], inplace=True)

# Compute BMI
df["BMI"] = df["Weight"] / ((df["Height"] / 100) ** 2)
df["BMI"] = round(df["BMI"], 2)

# Select features and target
df = df[["Gender", "Age", "BMI", "Duration", "Heart_Rate", "Body_Temp", "Calories"]]

# One-hot encoding for categorical variables
df = pd.get_dummies(df, drop_first=True)

# Split dataset into training and test sets
X = df.drop("Calories", axis=1)
y = df["Calories"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)

# Train Random Forest model
model = RandomForestRegressor(n_estimators=1000, max_features=3, max_depth=6, random_state=1)
model.fit(X_train, y_train)

# Save the trained model
joblib.dump(model, "random_forest_calories.pkl")

print("Model training complete. Saved as 'random_forest_calories.pkl'")


Model training complete. Saved as 'random_forest_calories.pkl'
