## Importing the Dependencies

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from xgboost import XGBRegressor
from sklearn import metrics

## Data Collection and Preprocessing

In [3]:
calories = pd.read_csv('calories.csv')

In [4]:
calories.head()

Unnamed: 0,User_ID,Calories
0,14733363,231.0
1,14861698,66.0
2,11179863,26.0
3,16180408,71.0
4,17771927,35.0


In [5]:
exercise_data = pd.read_csv('exercise.csv')

In [6]:
exercise_data.head()

Unnamed: 0,User_ID,Gender,Age,Height,Weight,Duration,Heart_Rate,Body_Temp
0,14733363,male,68,190.0,94.0,29.0,105.0,40.8
1,14861698,female,20,166.0,60.0,14.0,94.0,40.3
2,11179863,male,69,179.0,79.0,5.0,88.0,38.7
3,16180408,female,34,179.0,71.0,13.0,100.0,40.5
4,17771927,female,27,154.0,58.0,10.0,81.0,39.8


### Combining the two dataframes

In [7]:
calories_data = pd.concat([exercise_data, calories['Calories']], axis=1)

In [8]:
calories_data.head()

Unnamed: 0,User_ID,Gender,Age,Height,Weight,Duration,Heart_Rate,Body_Temp,Calories
0,14733363,male,68,190.0,94.0,29.0,105.0,40.8,231.0
1,14861698,female,20,166.0,60.0,14.0,94.0,40.3,66.0
2,11179863,male,69,179.0,79.0,5.0,88.0,38.7,26.0
3,16180408,female,34,179.0,71.0,13.0,100.0,40.5,71.0
4,17771927,female,27,154.0,58.0,10.0,81.0,39.8,35.0


In [9]:
calories_data.shape

(15000, 9)

In [10]:
calories_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 15000 entries, 0 to 14999
Data columns (total 9 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   User_ID     15000 non-null  int64  
 1   Gender      15000 non-null  object 
 2   Age         15000 non-null  int64  
 3   Height      15000 non-null  float64
 4   Weight      15000 non-null  float64
 5   Duration    15000 non-null  float64
 6   Heart_Rate  15000 non-null  float64
 7   Body_Temp   15000 non-null  float64
 8   Calories    15000 non-null  float64
dtypes: float64(6), int64(2), object(1)
memory usage: 1.0+ MB


In [11]:
calories_data.isnull().sum()

User_ID       0
Gender        0
Age           0
Height        0
Weight        0
Duration      0
Heart_Rate    0
Body_Temp     0
Calories      0
dtype: int64

### Data Analysis

In [12]:
calories_data.describe()

Unnamed: 0,User_ID,Age,Height,Weight,Duration,Heart_Rate,Body_Temp,Calories
count,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0,15000.0
mean,14977360.0,42.7898,174.465133,74.966867,15.5306,95.518533,40.025453,89.539533
std,2872851.0,16.980264,14.258114,15.035657,8.319203,9.583328,0.77923,62.456978
min,10001160.0,20.0,123.0,36.0,1.0,67.0,37.1,1.0
25%,12474190.0,28.0,164.0,63.0,8.0,88.0,39.6,35.0
50%,14997280.0,39.0,175.0,74.0,16.0,96.0,40.2,79.0
75%,17449280.0,56.0,185.0,87.0,23.0,103.0,40.6,138.0
max,19999650.0,79.0,222.0,132.0,30.0,128.0,41.5,314.0


### Data Visualization

In [13]:
sns.countplot(calories_data['Gender'])

<Axes: xlabel='count', ylabel='Gender'>

In [14]:
sns.histplot(calories_data['Age'],kde=True)

<Axes: xlabel='count', ylabel='Gender'>

In [15]:
sns.histplot(calories_data['Height'],kde=True)

<Axes: xlabel='count', ylabel='Gender'>

In [16]:
sns.histplot(calories_data['Weight'],kde=True)

<Axes: xlabel='count', ylabel='Gender'>

In [17]:
calories_data.replace({"Gender":{'male':0,'female':1}}, inplace=True)

  calories_data.replace({"Gender":{'male':0,'female':1}}, inplace=True)


In [18]:
calories_data.head()

Unnamed: 0,User_ID,Gender,Age,Height,Weight,Duration,Heart_Rate,Body_Temp,Calories
0,14733363,0,68,190.0,94.0,29.0,105.0,40.8,231.0
1,14861698,1,20,166.0,60.0,14.0,94.0,40.3,66.0
2,11179863,0,69,179.0,79.0,5.0,88.0,38.7,26.0
3,16180408,1,34,179.0,71.0,13.0,100.0,40.5,71.0
4,17771927,1,27,154.0,58.0,10.0,81.0,39.8,35.0


In [19]:
X = calories_data.drop(columns=['User_ID','Calories'], axis=1)
Y = calories_data['Calories']

In [20]:
print(X)

       Gender  Age  Height  Weight  Duration  Heart_Rate  Body_Temp
0           0   68   190.0    94.0      29.0       105.0       40.8
1           1   20   166.0    60.0      14.0        94.0       40.3
2           0   69   179.0    79.0       5.0        88.0       38.7
3           1   34   179.0    71.0      13.0       100.0       40.5
4           1   27   154.0    58.0      10.0        81.0       39.8
...       ...  ...     ...     ...       ...         ...        ...
14995       1   20   193.0    86.0      11.0        92.0       40.4
14996       1   27   165.0    65.0       6.0        85.0       39.2
14997       1   43   159.0    58.0      16.0        90.0       40.1
14998       0   78   193.0    97.0       2.0        84.0       38.3
14999       0   63   173.0    79.0      18.0        92.0       40.5

[15000 rows x 7 columns]


In [21]:
print(Y)

0        231.0
1         66.0
2         26.0
3         71.0
4         35.0
         ...  
14995     45.0
14996     23.0
14997     75.0
14998     11.0
14999     98.0
Name: Calories, Length: 15000, dtype: float64


In [22]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=2)

In [23]:
print(X.shape, X_train.shape, X_test.shape)

(15000, 7) (12000, 7) (3000, 7)


## Model Training

In [24]:
model = XGBRegressor()

In [25]:
model.fit(X_train, Y_train)

## Prediction on Test Data

In [26]:
test_data_prediction = model.predict(X_test)
print(test_data_prediction)

[125.58828  222.11377   38.725952 ... 144.3179    23.425894  90.100494]


## Mean Absolute Error

In [27]:
mae = metrics.mean_absolute_error(Y_test, test_data_prediction)
print("Mean Absolute Error = ", mae)

Mean Absolute Error =  1.4833678883314132


In [33]:
import pickle

# ... your training code ...

# Save the model as a pickle file
with open('calories_model.pkl', 'wb') as f:
    pickle.dump(model, f)

print("Model saved as calories_model.pkl")

Model saved as calories_model.pkl


In [29]:
import streamlit as st

In [32]:
def calorie_burn_predictor():
    st.title("Calorie Burn Prediction")

    # Collect user inputs
    gender = st.number_input("Gender (0 for Male, 1 for Female)", min_value=0, max_value=1)
    age = st.number_input("Age", min_value=0, max_value=120)
    height = st.number_input("Height (in inches)", min_value=0)
    weight = st.number_input("Weight (in pounds)", min_value=0)
    duration = st.number_input("Duration (in minutes)", min_value=0)
    heart_rate = st.number_input("Heart Rate (BPM)", min_value=0)
    body_temp = st.number_input("Body Temperature (in Fahrenheit)", min_value=95, max_value=110)

    # Prepare input data for prediction
    input_data = np.array([[gender, age, height, weight, duration, heart_rate, body_temp]])

    # Make the prediction
    predicted_calories_burnt = model.predict(input_data)

    # Display the predicted calories burnt
    st.success("Predicted Calories Burnt: {:.2f}".format(predicted_calories_burnt[0]))

# Run the app
if __name__ == "__main__":
    calorie_burn_predictor()

