In [1]:
import pandas as pd

import warnings
warnings.simplefilter('ignore')

In [2]:
# Importing the Dataset

df = pd.read_csv(r'clean.csv')
df

Unnamed: 0,Age,Heart_Rate,Calories,Gender_male
0,68,105.0,231.0,1
1,20,94.0,66.0,0
2,69,88.0,26.0,1
3,34,100.0,71.0,0
4,27,81.0,35.0,0
...,...,...,...,...
14995,20,92.0,45.0,0
14996,27,85.0,23.0,0
14997,43,90.0,75.0,0
14998,78,84.0,11.0,1


In [3]:
# Seperating the Dependent and Independent variables

X = df.drop(columns=['Calories'])
y = df['Calories']

In [4]:
# Splitting the Data into Train and Test
from sklearn.model_selection import train_test_split

x_train,x_test,y_train,y_test = train_test_split(X,y,random_state=42,train_size=0.8)

In [5]:
# Building the Model using Gradient Boost Regressor.
# Apply HyperParameter Tunning for best parameters.

from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import GridSearchCV

estimator = GradientBoostingRegressor()

param_grid = {'n_estimators':list(range(1,50)),
              'learning_rate':[0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1]}

grid = GridSearchCV(estimator,param_grid,cv=5,scoring='r2')
grid.fit(x_train,y_train)
info = grid.best_estimator_

In [6]:
features = pd.DataFrame(data=info.feature_importances_,columns=['Importances'],index=x_train.columns)

imp_feat = features[features['Importances']>0].index.tolist()
imp_feat

['Age', 'Heart_Rate', 'Gender_male']

In [7]:
model = grid.best_estimator_
model.fit(x_train,y_train)

# Predicting on Train Data
ypred_train = model.predict(x_train)

# Calculating the Train r2 score and Cross Validation Score
from sklearn.metrics import r2_score
from sklearn.model_selection import cross_val_score

print('Train r2 score is :',r2_score(y_train,ypred_train))
print('Cross validation score is :',cross_val_score(model,x_train,y_train,cv=5).mean())

# Predicting on Test data
ypred_test = model.predict(x_test)

#  Calculating the Test r2 score
print('Test r2 score is :',r2_score(y_test,ypred_test))

Train r2 score is : 0.8684516976322078
Cross validation score is : 0.8648846077877845
Test r2 score is : 0.8717088216660027
