In [3]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
import pickle

# 1. Load data
df = pd.read_csv('calories.csv')

# 2. Drop missing values
df.dropna(inplace=True)

# 3. Encode 'Gender' column
le = LabelEncoder()
df['GenderIndexed'] = le.fit_transform(df['Gender'])
df.drop('Gender', axis=1, inplace=True)

# 4. Define features and target
feature_cols = ['GenderIndexed', 'Age', 'Height', 'Weight', 'Duration', 'Heart_Rate', 'Body_Temp']
X = df[feature_cols]
y = df['Calories']

# 5. Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1234)

# 6. Train the Random Forest model
model = RandomForestRegressor(random_state=42)
model.fit(X_train, y_train)

# 7. Make predictions
y_pred = model.predict(X_test)

# 8. Evaluate RMSE
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
print(f"Root Mean Squared Error (RMSE): {rmse:.2f}")

# 9. Save model
with open('rf_calories_model.pkl', 'wb') as f:
    pickle.dump(model, f)
print("Model saved to 'rf_calories_model.pkl'.")

# 10. Predict on new input
sample_input = pd.DataFrame([{
    'GenderIndexed': 1.0,
    'Age': 25.0,
    'Height': 175.0,
    'Weight': 70.0,
    'Duration': 30.0,
    'Heart_Rate': 110.0,
    'Body_Temp': 98.6
}])

sample_pred = model.predict(sample_input)
print(f"Prediction for sample input: {sample_pred[0]:.2f} calories")


Root Mean Squared Error (RMSE): 2.59
Model saved to 'rf_calories_model.pkl'.
Prediction for sample input: 187.41 calories
