# **Calories Burnt Prediction**

**Dataset Link:-** https://www.kaggle.com/code/pragathiputhran/calories-burnt-prediction/data

In [None]:
# import required libraries

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

#from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from xgboost import XGBRegressor
from sklearn.linear_model import LinearRegression
#from sklearn.linear_model import Ridge,Lasso
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn import metrics
from statsmodels.stats.outliers_influence import variance_inflation_factor 
import pickle

import warnings
from warnings import filterwarnings
filterwarnings("ignore")

sns.set()

In [None]:
#Load the Calories dataset
df1 = pd.read_csv("content/calories.csv")
df1.head()

In [None]:
df1.shape

In [None]:
#Load the Exercise Dataset
df2 = pd.read_csv("content/exercise.csv")
df2.head()

In [None]:
df2.shape

**Now Concatenate both the Dataframe i.e df1 and df2**

In [None]:

df = pd.concat([df2,df1["Calories"]],axis=1)

In [None]:
df.head()

In [None]:
df.info()

In [None]:
df.describe()

In [None]:
df.isnull().sum()

In [None]:
# drop User_ID column because this is not required from Main Dataframe itself

df.drop(columns = ["User_ID"],axis=1,inplace =True)

In [None]:
df.head()

In [None]:
df.info()

#### **Separate Categorical and Numerical Features**

**1. Categorical Feature**

In [None]:
#Fatching Categorical Data
cat_col=[col for col in df.columns if df[col].dtype=='O'] #-->Object-"o"
cat_col

In [None]:
df["Gender"].value_counts()

In [None]:
# plotting the gender column in count plot
sns.countplot(x = df['Gender'])
plt.show()

In [None]:
pd.get_dummies(df["Gender"],drop_first=True)

In [None]:
categorical = df[cat_col]
categorical.head()

In [None]:
categorical = pd.get_dummies(categorical["Gender"],drop_first=True)

In [None]:
categorical

**2.Numerical Features**

In [None]:
Num_col = [col for col in df.columns if df[col].dtype != "O"]
Num_col

In [None]:
df[Num_col].shape

In [None]:
Numerical = df[Num_col]
Numerical.head() 

In [None]:
Numerical.shape

In [None]:
plt.figure(figsize=(20,15))
plotnumber = 1

for column in Numerical:
  if plotnumber <= 8:
    ax = plt.subplot(3,3,plotnumber)
    sns.distplot(Numerical[column])
    plt.xlabel(column,fontsize=15)
  plotnumber+=1
plt.show()

In [None]:
# constructing a heatmap to understand the correlation

plt.figure(figsize=(10,10))
sns.heatmap(Numerical.corr(), cmap='Blues',annot = True)

### **Concatenate Categorical and Numerical**

In [None]:
data = pd.concat([categorical,Numerical],axis=1)

In [None]:
data.head()

In [None]:
fig,ax = plt.subplots(figsize = (15,10))
sns.boxplot(data=data,width = 0.5,fliersize = 3,ax=ax)

In [None]:
plt.figure(figsize=(20,15))
plotnumber = 1

for column in data:
  if plotnumber <= 8:
    ax = plt.subplot(3,3,plotnumber)
    sns.distplot(data[column])
    plt.xlabel(column,fontsize=15)
  plotnumber+=1
plt.show()

In [None]:
data.columns

In [None]:
X = data.drop(columns = ["Calories"],axis = 1)
y = data["Calories"]

In [None]:
X.head()

In [None]:
y.head()

In [None]:
# Split the Data

X_train,X_test,y_train,y_test = train_test_split(X,y,test_size = 0.2,random_state=1)

In [None]:
print("Shape of X Train: ",X_train.shape)
print("Shape of X Test: ",X_test.shape)
print("Shape of y Train: ",y_train.shape)
print("Shape of y Test: ",y_test.shape)

In [None]:
#from sklearn import metrics
def predict(ml_model):
    model=ml_model.fit(X_train,y_train)
    print('Score : {}'.format(model.score(X_train,y_train)))
    y_prediction=model.predict(X_test)
    print('predictions are: \n {}'.format(y_prediction))
    print('\n')

    r2_score=metrics.r2_score(y_test,y_prediction)
    print('r2 score: {}'.format(r2_score))

    print('MAE:',metrics.mean_absolute_error(y_test,y_prediction))
    print('MSE:',metrics.mean_squared_error(y_test,y_prediction))
    print('RMSE:',np.sqrt(metrics.mean_squared_error(y_test,y_prediction)))
    
    sns.distplot(y_test-y_prediction)

**XGB Regressor**

In [None]:
regression = predict(XGBRegressor())
regression

**Save the Model**

In [None]:
# saving the model to the local file system
filename = 'finalized_model.pickle'
pickle.dump(regression, open(filename, 'wb'))

**Linear Regression**

In [None]:
predict(LinearRegression())

**DecisionTree Regression**

In [None]:
predict(DecisionTreeRegressor())

**RandomForest Regression**

In [None]:
predict(RandomForestRegressor())