# Calories Brunt Predictor
Dataset includes 15000 data

## Data Preprocessing

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
data1 = pd.read_csv("data/calories.csv")
data2 = pd.read_csv("data/exercise.csv")

In [3]:
data1.head()

Unnamed: 0,User_ID,Calories
0,14733363,231.0
1,14861698,66.0
2,11179863,26.0
3,16180408,71.0
4,17771927,35.0


In [4]:
data2.head()

Unnamed: 0,User_ID,Gender,Age,Height,Weight,Duration,Heart_Rate,Body_Temp
0,14733363,male,68,190.0,94.0,29.0,105.0,40.8
1,14861698,female,20,166.0,60.0,14.0,94.0,40.3
2,11179863,male,69,179.0,79.0,5.0,88.0,38.7
3,16180408,female,34,179.0,71.0,13.0,100.0,40.5
4,17771927,female,27,154.0,58.0,10.0,81.0,39.8


In [5]:
data1.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 15000 entries, 0 to 14999
Data columns (total 2 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   User_ID   15000 non-null  int64  
 1   Calories  15000 non-null  float64
dtypes: float64(1), int64(1)
memory usage: 234.5 KB


In [6]:
data2.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 15000 entries, 0 to 14999
Data columns (total 8 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   User_ID     15000 non-null  int64  
 1   Gender      15000 non-null  object 
 2   Age         15000 non-null  int64  
 3   Height      15000 non-null  float64
 4   Weight      15000 non-null  float64
 5   Duration    15000 non-null  float64
 6   Heart_Rate  15000 non-null  float64
 7   Body_Temp   15000 non-null  float64
dtypes: float64(5), int64(2), object(1)
memory usage: 937.6+ KB


In [7]:
data1.drop(["User_ID"], axis = 1, inplace= True)
data2.drop(["User_ID"], axis = 1, inplace = True)

In [8]:
# Gender Column
data2["Gender"].unique()

array(['male', 'female'], dtype=object)

In [9]:
data2["Gender"] = data2["Gender"].apply(lambda x: 0 if x == "male" else 1)

In [10]:
data2.head()

Unnamed: 0,Gender,Age,Height,Weight,Duration,Heart_Rate,Body_Temp
0,0,68,190.0,94.0,29.0,105.0,40.8
1,1,20,166.0,60.0,14.0,94.0,40.3
2,0,69,179.0,79.0,5.0,88.0,38.7
3,1,34,179.0,71.0,13.0,100.0,40.5
4,1,27,154.0,58.0,10.0,81.0,39.8


In [11]:
X = data2
y = data1

In [12]:
X.shape, y.shape

((15000, 7), (15000, 1))

In [13]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state= 42)

## Fitting the data in model

In [14]:
import xgboost
model = xgboost.XGBRegressor()
model.fit(X_train, y_train)

## Evaluation

In [15]:
model.score(X_test, y_test)

0.9987088943678761

In [16]:
from sklearn.model_selection import cross_val_score
cvs = cross_val_score(estimator = model,
                      X = X,
                      y = y,
                      scoring= 'r2',
                      cv = 10)
print("R-Squared: {:.3f}".format(cvs.mean()))
print("STD: {:.3f}".format(cvs.std()))

R-Squared: 0.999
STD: 0.000


## Export Model

In [18]:
import pickle
with open('model.pkl','wb') as f:
    pickle.dump(model,f)