In [3]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from xgboost import XGBRegressor
from sklearn.metrics import mean_squared_log_error, r2_score
import joblib

# 1. Load Dataset
df = pd.read_csv("D:\\Calorie_Predicitor_Project\\dataset\\train (1).csv")

# 2. Define Features and Target
X = df.drop(['Calories', 'id'], axis=1)
y = df['Calories']

# 3. Define Feature Types
categorical_features = ['Sex']
numerical_features = ['Age', 'Height', 'Weight', 'Duration', 'Heart_Rate', 'Body_Temp']

# 4. Preprocessing Pipeline
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numerical_features),
        ('cat', OneHotEncoder(), categorical_features)
    ])

# 5. Complete Pipeline with XGBoost
pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('regressor', XGBRegressor(max_depth=4, n_estimators=400, learning_rate=0.1, random_state=42))
])

# 6. Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 7. Train the Pipeline
pipeline.fit(X_train, y_train)

# 8. Predictions
y_train_pred = pipeline.predict(X_train)
y_test_pred = pipeline.predict(X_test)

# 9. Clip predictions to avoid log error
y_train_pred = np.maximum(0, y_train_pred)
y_test_pred = np.maximum(0, y_test_pred)

# 10. Evaluate the model
rmsle_train = np.sqrt(mean_squared_log_error(y_train, y_train_pred))
r2_train = r2_score(y_train, y_train_pred)

rmsle_test = np.sqrt(mean_squared_log_error(y_test, y_test_pred))
r2_test = r2_score(y_test, y_test_pred)

print(f"Train RMSLE: {rmsle_train:.5f}, Train R²: {r2_train:.5f}")
print(f"Test RMSLE: {rmsle_test:.5f}, Test R²: {r2_test:.5f}")

# 11. Save the pipeline for deployment
joblib.dump(pipeline, 'calorie_predictor_pipeline.pkl')
print("✅ Model pipeline saved as 'calorie_predictor_pipeline.pkl'")


Train RMSLE: 0.06598, Train R²: 0.99664
Test RMSLE: 0.06731, Test R²: 0.99641
✅ Model pipeline saved as 'calorie_predictor_pipeline.pkl'
