Importing the dependencies

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, r2_score
from xgboost import XGBRegressor
import pickle

Data Collection and Processing

In [2]:
calories=pd.read_csv('calories.csv')

In [3]:
calories.head()

Unnamed: 0,User_ID,Calories
0,14733363,231.0
1,14861698,66.0
2,11179863,26.0
3,16180408,71.0
4,17771927,35.0


In [4]:
exercise_data=pd.read_csv('exercise.csv')

In [5]:
exercise_data.head()

Unnamed: 0,User_ID,Gender,Age,Height,Weight,Duration,Heart_Rate,Body_Temp
0,14733363,male,68,190.0,94.0,29.0,105.0,40.8
1,14861698,female,20,166.0,60.0,14.0,94.0,40.3
2,11179863,male,69,179.0,79.0,5.0,88.0,38.7
3,16180408,female,34,179.0,71.0,13.0,100.0,40.5
4,17771927,female,27,154.0,58.0,10.0,81.0,39.8


In [6]:
# Combining the two Dataframes
calories_data=pd.concat([exercise_data,calories['Calories']],axis=1)

In [7]:
# Converting the text data to numerical values (Gender)
calories_data.replace({"Gender": {'male': 0, 'female': 1}}, inplace=True)


  calories_data.replace({"Gender": {'male': 0, 'female': 1}}, inplace=True)


In [8]:
# Calculating Lean Body Mass (LBM) for males and females
# Apply male-specific formula
calories_data.loc[calories_data['Gender'] == 0, 'Lean_Body_mass'] = (
    calories_data['Weight'] * 0.407 + calories_data['Height'] * 0.267 - 19.2
)

# Apply female-specific formula
calories_data.loc[calories_data['Gender'] == 1, 'Lean_Body_mass'] = (
    calories_data['Weight'] * 0.252 + calories_data['Height'] * 0.473 - 48.3
)

In [9]:
calories_data.head()

Unnamed: 0,User_ID,Gender,Age,Height,Weight,Duration,Heart_Rate,Body_Temp,Calories,Lean_Body_mass
0,14733363,0,68,190.0,94.0,29.0,105.0,40.8,231.0,69.788
1,14861698,1,20,166.0,60.0,14.0,94.0,40.3,66.0,45.338
2,11179863,0,69,179.0,79.0,5.0,88.0,38.7,26.0,60.746
3,16180408,1,34,179.0,71.0,13.0,100.0,40.5,71.0,54.259
4,17771927,1,27,154.0,58.0,10.0,81.0,39.8,35.0,39.158


In [10]:
calories_data.shape

(15000, 10)

In [11]:
calories_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 15000 entries, 0 to 14999
Data columns (total 10 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   User_ID         15000 non-null  int64  
 1   Gender          15000 non-null  int64  
 2   Age             15000 non-null  int64  
 3   Height          15000 non-null  float64
 4   Weight          15000 non-null  float64
 5   Duration        15000 non-null  float64
 6   Heart_Rate      15000 non-null  float64
 7   Body_Temp       15000 non-null  float64
 8   Calories        15000 non-null  float64
 9   Lean_Body_mass  15000 non-null  float64
dtypes: float64(7), int64(3)
memory usage: 1.1 MB


In [12]:
calories_data.isnull().sum()

User_ID           0
Gender            0
Age               0
Height            0
Weight            0
Duration          0
Heart_Rate        0
Body_Temp         0
Calories          0
Lean_Body_mass    0
dtype: int64

Data Analysis

In [13]:
calories_data.describe()

Unnamed: 0,User_ID,Gender,Age,Height,Weight,Duration,Heart_Rate,Body_Temp,Calories,Lean_Body_mass
count,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0
mean,14977360.0,0.503533,42.7898,174.465133,74.966867,15.5306,95.518533,40.025453,89.539533,55.355938
std,2872851.0,0.500004,16.980264,14.258114,15.035657,8.319203,9.583328,0.77923,62.456978,12.071046
min,10001160.0,0.0,20.0,123.0,36.0,1.0,67.0,37.1,1.0,19.959
25%,12474190.0,0.0,28.0,164.0,63.0,8.0,88.0,39.6,35.0,45.179
50%,14997280.0,1.0,39.0,175.0,74.0,16.0,96.0,40.2,79.0,55.494
75%,17449280.0,1.0,56.0,185.0,87.0,23.0,103.0,40.6,138.0,65.223
max,19999650.0,1.0,79.0,222.0,132.0,30.0,128.0,41.5,314.0,92.73


Separating features and target

In [14]:
X=calories_data.drop(columns=['User_ID','Calories'],axis=1)
Y=calories_data['Calories']

In [15]:
print(X)

       Gender  Age  Height  Weight  Duration  Heart_Rate  Body_Temp  \
0           0   68   190.0    94.0      29.0       105.0       40.8   
1           1   20   166.0    60.0      14.0        94.0       40.3   
2           0   69   179.0    79.0       5.0        88.0       38.7   
3           1   34   179.0    71.0      13.0       100.0       40.5   
4           1   27   154.0    58.0      10.0        81.0       39.8   
...       ...  ...     ...     ...       ...         ...        ...   
14995       1   20   193.0    86.0      11.0        92.0       40.4   
14996       1   27   165.0    65.0       6.0        85.0       39.2   
14997       1   43   159.0    58.0      16.0        90.0       40.1   
14998       0   78   193.0    97.0       2.0        84.0       38.3   
14999       0   63   173.0    79.0      18.0        92.0       40.5   

       Lean_Body_mass  
0              69.788  
1              45.338  
2              60.746  
3              54.259  
4              39.158  
...

In [16]:
print(Y)

0        231.0
1         66.0
2         26.0
3         71.0
4         35.0
         ...  
14995     45.0
14996     23.0
14997     75.0
14998     11.0
14999     98.0
Name: Calories, Length: 15000, dtype: float64


Splitting the data into training data and Test data

In [17]:
X_train, X_test, Y_train, Y_test=train_test_split(X,Y,test_size=0.2,random_state=2)

In [18]:
print(X.shape,X_train.shape,X_test.shape)

(15000, 8) (12000, 8) (3000, 8)


Model Training

XGBoost Regressor

In [19]:
#loading the model
model=XGBRegressor()

In [20]:
#training the model with X_train
model.fit(X_train,Y_train)

Prediction on Test Data

In [21]:
test_data_prediction=model.predict(X_test)

In [22]:
print(test_data_prediction)

[125.660706 221.76747   40.04258  ... 143.01523   23.873156  91.53726 ]


Mean Absolute Error

In [23]:
mae = mean_absolute_error(Y_test, test_data_prediction)
r2 = r2_score(Y_test, test_data_prediction)

In [24]:
print("Mean Absolute Error=",mae)
print(f"R² Score of XGBoost model: {r2}")

Mean Absolute Error= 1.512008231629928
R² Score of XGBoost model: 0.9987879640641213


In [25]:
# Function to calculate Lean Body Mass based on gender, weight, and height
def calculate_lbm(gender, weight, height):
    if gender == 0:  # Male
        lbm = 0.407 * weight + 0.267 * height - 19.2
    else:  # Female
        lbm = 0.252 * weight + 0.473 * height - 48.3
    return lbm


In [26]:
example_input_data = np.array([[1,34,179.0,71.0,13.0,100.0,40.5]])
example_lbm = calculate_lbm(example_input_data[0][0], example_input_data[0][2], example_input_data[0][3])
example_input_with_lbm = np.append(example_input_data, example_lbm).reshape(1, -1)


In [27]:
# Predicting Calories using the trained model
example_prediction = model.predict(example_input_with_lbm)
print(f"Predicted Calories for the example input: {example_prediction}")


Predicted Calories for the example input: [71.81158]


Saving the trained model

In [28]:
# Saving the model as a pickle file
with open("xgboost_model.pkl", "wb") as pickle_file:
    pickle.dump(model, pickle_file)

print("XGBoost model saved as 'xgboost_model.pkl'.")

XGBoost model saved as 'xgboost_model.pkl'.
