In [None]:
# 1. Imports
import pandas as pd
import joblib
import os
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, r2_score

# 2. Load dataset
df = pd.read_csv("merged_dataset.csv")
df.head()


In [None]:
# 3. Clean and prepare data
df = df.dropna(subset=["Calories Burned", "Exercise Intensity", "Duration", "Heart Rate", "BMI_Category", "Fitness_Level"])

# Define features and target
X = df[["Exercise Intensity", "Duration", "Heart Rate", "BMI_Category", "Fitness_Level"]]
y = df["Calories Burned"]

# Encode categorical columns
categorical_cols = ["BMI_Category", "Fitness_Level"]
encoders = {}

os.makedirs("prediction_utils", exist_ok=True)

for col in categorical_cols:
    le = LabelEncoder()
    X[col] = le.fit_transform(X[col])
    encoders[col] = le
    joblib.dump(le, f"prediction_utils/{col}_encoder.pkl")

# Save feature columns
joblib.dump(X.columns.tolist(), "prediction_utils/feature_columns.pkl")


In [None]:
# 4. Train-test split and model training
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# 5. Evaluation
y_pred = model.predict(X_test)
print("MAE:", mean_absolute_error(y_test, y_pred))
print("R² Score:", r2_score(y_test, y_pred))

# 6. Save model
joblib.dump(model, "prediction_utils/calories_burned_model.pkl")
print("Model and encoders saved.")


In [None]:
# Define save paths
base_path = "prediction_utils"
os.makedirs(base_path, exist_ok=True)

macros_model_path = os.path.join(base_path, "macros_model.pkl")
macros_features_path = os.path.join(base_path, "macros_feature_columns.pkl")

In [None]:
# Macronutrient Distribution Prediction

# Define features and targets for macronutrients
X_macros = df[["Calories (kcal)", "Meal_Type", "Diet_Type", "BMI_Category"]]
y_macros = df[["Protein (g)", "Carbohydrates (g)", "Fat (g)"]]

# Encode categorical features
macro_encoders = {}
cat_cols = ["Meal_Type", "Diet_Type", "BMI_Category"]

for col in cat_cols:
    le = LabelEncoder()
    X_macros[col] = le.fit_transform(X_macros[col])
    macro_encoders[col] = le
    joblib.dump(le, os.path.join(base_path, f"{col}_encoder.pkl"))

# Save feature names
joblib.dump(X_macros.columns.tolist(), macros_features_path)

# Train-test split
X_train_macros, X_test_macros, y_train_macros, y_test_macros = train_test_split(X_macros, y_macros, test_size=0.2, random_state=42)

# Train model
macros_model = RandomForestRegressor(n_estimators=100, random_state=42)
macros_model.fit(X_train_macros, y_train_macros)

# Evaluate
macros_pred = macros_model.predict(X_test_macros)
mae_macro = mean_absolute_error(y_test_macros, macros_pred)
r2_macro = r2_score(y_test_macros, macros_pred)

print("Macronutrient Prediction MAE:", mae_macro)
print("Macronutrient Prediction R² Score:", r2_macro)

# Save model
joblib.dump(macros_model, macros_model_path)
print("Macronutrient model and encoders saved.")
