# Import Statements

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import joblib
import random

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Lasso
from sklearn.svm import SVC
from sklearn import metrics
from xgboost import XGBRegressor

# Data Preprocessing

In [2]:
df_c = pd.read_csv('./calories.csv')
df_e = pd.read_csv('./exercise.csv')

In [3]:
df_c.head()

Unnamed: 0,User_ID,Calories
0,14733363,231.0
1,14861698,66.0
2,11179863,26.0
3,16180408,71.0
4,17771927,35.0


In [4]:
df_e.head()

Unnamed: 0,User_ID,Gender,Age,Height,Weight,Duration,Heart_Rate,Body_Temp
0,14733363,male,68,190.0,94.0,29.0,105.0,40.8
1,14861698,female,20,166.0,60.0,14.0,94.0,40.3
2,11179863,male,69,179.0,79.0,5.0,88.0,38.7
3,16180408,female,34,179.0,71.0,13.0,100.0,40.5
4,17771927,female,27,154.0,58.0,10.0,81.0,39.8


In [5]:
df_e.shape, df_c.shape

((15000, 8), (15000, 2))

In [6]:
df = pd.merge(df_e, df_c, how='inner', on='User_ID')

In [7]:
df.head()

Unnamed: 0,User_ID,Gender,Age,Height,Weight,Duration,Heart_Rate,Body_Temp,Calories
0,14733363,male,68,190.0,94.0,29.0,105.0,40.8,231.0
1,14861698,female,20,166.0,60.0,14.0,94.0,40.3,66.0
2,11179863,male,69,179.0,79.0,5.0,88.0,38.7,26.0
3,16180408,female,34,179.0,71.0,13.0,100.0,40.5,71.0
4,17771927,female,27,154.0,58.0,10.0,81.0,39.8,35.0


In [8]:
df.replace({'Gender': {'male':0, 'female':1}}, inplace=True)

In [9]:
x = df.drop(columns=['User_ID', 'Calories'], axis=1).values
y = df['Calories'].values

In [10]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2)

In [11]:
print(x_train.shape, x_test.shape)

(12000, 7) (3000, 7)


##  Creating a Model Linear Model

In [12]:
model = LinearRegression()

In [13]:
model.fit(x_train, y_train)

## XGBRegression Model 

In [14]:
model_2 = XGBRegressor()

In [15]:
model_2.fit(x_train, y_train)

## Lasso model

In [16]:
model_3 = Lasso()

In [17]:
model_3.fit(x_train, y_train)

## SUPPORT VECTOR MACHINES (SVM) Model

In [18]:
# model_4 = SVC(kernel='linear')

In [19]:
# model_4.fit(x_train, y_train)

## Evaluation of Models

In [20]:
# model 1 

test_pred_1 = model.predict(x_test)

In [21]:
for i in range(20):
    x = random.randint(0, 2999)
    print('predicted calories', round(test_pred_1[x]), 'Actual Calories', round(y_test[x]))

predicted calories 164 Actual Calories 162
predicted calories 180 Actual Calories 207
predicted calories 34 Actual Calories 46
predicted calories 171 Actual Calories 196
predicted calories -23 Actual Calories 6
predicted calories 102 Actual Calories 103
predicted calories -10 Actual Calories 21
predicted calories 156 Actual Calories 159
predicted calories 146 Actual Calories 150
predicted calories 26 Actual Calories 22
predicted calories 87 Actual Calories 82
predicted calories 40 Actual Calories 39
predicted calories 210 Actual Calories 226
predicted calories 28 Actual Calories 25
predicted calories 3 Actual Calories 5
predicted calories 64 Actual Calories 73
predicted calories 8 Actual Calories 17
predicted calories 124 Actual Calories 104
predicted calories 73 Actual Calories 63
predicted calories 91 Actual Calories 78


In [22]:
mse = metrics.mean_absolute_error(y_test, test_pred_1)

In [23]:
print('Model_1 Mean Absolute Error is:', mse)
print("Model_1 R-Square Score: ", metrics.r2_score(y_test, test_pred_1))
print('Model_1 Mean Square Error', metrics.mean_squared_error(y_test, test_pred_1))

Model_1 Mean Absolute Error is: 8.297512381713673
Model_1 R-Square Score:  0.9678375527207863
Model_1 Mean Square Error 125.64177476127213


In [24]:
# model-2 

test_pred_2 = model_2.predict(x_test)

In [25]:
for i in range(10):
    x = random.randint(0, 2999)
    print('predicted calories', round(test_pred_2[x]), 'Actual Calories', round(y_test[x]))

predicted calories 100 Actual Calories 100
predicted calories 173 Actual Calories 168
predicted calories 136 Actual Calories 137
predicted calories 188 Actual Calories 187
predicted calories 16 Actual Calories 17
predicted calories 125 Actual Calories 125
predicted calories 193 Actual Calories 192
predicted calories 45 Actual Calories 47
predicted calories 26 Actual Calories 27
predicted calories 6 Actual Calories 6


In [26]:
print('Model_2 Mean Square Error:', metrics.mean_squared_error(y_test, test_pred_2))
print('Model_2 Mean Absolute Error is:', metrics.mean_absolute_error(y_test, test_pred_2))
print("Model_2 R-Square Score: ", metrics.r2_score(y_test, test_pred_2))

Model_2 Mean Square Error: 4.984808142501161
Model_2 Mean Absolute Error is: 1.564584461838007
Model_2 R-Square Score:  0.9987239623971819


In [27]:
# model-3 

test_pred_3 = model_3.predict(x_test)

In [28]:
for i in range(10):
    x = random.randint(0, 2999)
    print('predicted calories', round(test_pred_3[x]), 'Actual Calories', round(y_test[x]))

predicted calories 96 Actual Calories 82
predicted calories 169 Actual Calories 181
predicted calories 32 Actual Calories 43
predicted calories 57 Actual Calories 53
predicted calories 123 Actual Calories 114
predicted calories 159 Actual Calories 153
predicted calories 40 Actual Calories 41
predicted calories 77 Actual Calories 68
predicted calories 137 Actual Calories 127
predicted calories 51 Actual Calories 43


In [29]:
print('Model_3 Mean Square Error:', metrics.mean_squared_error(y_test, test_pred_3))
print('Model_3 Mean Absolute Error is:', metrics.mean_absolute_error(y_test, test_pred_3))
print("Model_3 R-Square Score: ", metrics.r2_score(y_test, test_pred_3))

Model_3 Mean Square Error: 134.49705791489268
Model_3 Mean Absolute Error is: 8.649533046994938
Model_3 R-Square Score:  0.965570730414973


In [30]:
# # model_4

# test_pred_4 = model_4.predict(x_test)

In [31]:
# for i in range(10):
#     x = random.randint(0, 2999)
#     print('predicted calories', round(test_pred_4[x]), 'Actual Calories', round(y_test[x]))
#     print('\t')

In [32]:
# print('Model_1 Mean Square Error:', metrics.mean_squared_error(y_test, test_pred_4))
# print('Model_1 Mean Absolute Error is:', metrics.mean_absolute_error(y_test, test_pred_4))
# print("Model_1 R-Square Score: ", metrics.r2_score(y_test, test_pred_4))

### Saving the model

In [33]:
joblib.dump(model_2, './model.pkl')

['./model.pkl']

In [34]:
save_model = joblib.load('./model.pkl')

In [35]:
save_model.predict(x_train[[14]])

array([75.64775], dtype=float32)

In [36]:
y_train[14]

75.0