In [1]:
import pandas as pd

import warnings
warnings.simplefilter('ignore')

In [2]:
# Importing the Dataset

df = pd.read_csv(r'clean.csv')
df

Unnamed: 0,Age,Heart_Rate,Calories,Gender_male
0,68,105.0,231.0,1
1,20,94.0,66.0,0
2,69,88.0,26.0,1
3,34,100.0,71.0,0
4,27,81.0,35.0,0
...,...,...,...,...
14995,20,92.0,45.0,0
14996,27,85.0,23.0,0
14997,43,90.0,75.0,0
14998,78,84.0,11.0,1


In [3]:
# Seperating the Dependent and Independent variables.

X = df.drop(columns=['Calories'])
y = df['Calories']

In [4]:
# splitting the data into train and test
from sklearn.model_selection import train_test_split

x_train,x_test,y_train,y_test = train_test_split(X,y,random_state=42,train_size=0.8)

In [5]:
# Apply Hyperparameter Tunning to determine alpha value.

from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import Lasso

estimator = Lasso()
param_grid  = {'alpha':list(range(1,100))}

grid = GridSearchCV(estimator,param_grid,cv=5,scoring='r2')
grid.fit(x_train,y_train)
grid.best_params_

{'alpha': 1}

In [6]:
# Build Lasso Regression with alpha = 1

model = Lasso(alpha=1)
model.fit(x_train,y_train)

print('Intercept is :',model.intercept_)
print('Coefficients are :',model.coef_)

Intercept is : -489.59695718768694
Coefficients are : [0.52222044 5.82748904 0.        ]


- The Coefficient of Gender_male column is 0, which means that column is not necessary for building the model.
- So, we have to drop that column and build the final model with remaining variables.

In [7]:
df = df.drop(columns=['Gender_male'])

In [8]:
# Seperating the Dependent and Independent variables.

X = df.drop(columns=['Calories'])
y = df['Calories']

In [9]:
# splitting the data into train and test
from sklearn.model_selection import train_test_split

x_train,x_test,y_train,y_test = train_test_split(X,y,random_state=39,train_size=0.8)

In [10]:
# Build Lasso Regression final model with alpha = 1

model = Lasso(alpha=1)
model.fit(x_train,y_train)

print('Intercept is :',model.intercept_)
print('Coefficients are :',model.coef_)

Intercept is : -487.9893930766272
Coefficients are : [0.53193453 5.80763862]


In [11]:
# predicting on the train data

ypred_train = model.predict(x_train)

# Calculating train r2 score and cross validation score
from sklearn.metrics import r2_score
from sklearn.model_selection import cross_val_score

print('Train r2 score is :',r2_score(y_train,ypred_train))
print('cross validation score is :',cross_val_score(model,x_train,y_train,cv=5).mean())

# predicting on test data
ypred_test = model.predict(x_test)

# calculating the test r2 score
print('Test r2 score is :',r2_score(y_test,ypred_test))

Train r2 score is : 0.8285304435020524
cross validation score is : 0.8281625511117519
Test r2 score is : 0.8218548676005608
