## Importing libraries

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from IPython.display import display, HTML

## Loading Data from CSV File

The data used in this project is sourced from Kaggle's [Gym Members Exercise Dataset](https://www.kaggle.com/datasets/valakhorasani/gym-members-exercise-dataset).

This dataset is published under the [Apache License 2.0](http://www.apache.org/licenses/LICENSE-2.0).


In [2]:
# Load the data
data = pd.read_csv('gym_members_exercise_tracking.csv')

# Displaying the data in HTML format
html_table = data.head(5).to_html(index=False, escape=False)

# Printing the table in a markdown cell
display(HTML(html_table))

Age,Gender,Weight (kg),Height (m),Max_BPM,Avg_BPM,Resting_BPM,Session_Duration (hours),Calories_Burned,Workout_Type,Fat_Percentage,Water_Intake (liters),Workout_Frequency (days/week),Experience_Level,BMI
56,Male,88.3,1.71,180,157,60,1.69,1313.0,Yoga,12.6,3.5,4,3,30.2
46,Female,74.9,1.53,179,151,66,1.3,883.0,HIIT,33.9,2.1,4,2,32.0
32,Female,68.1,1.66,167,122,54,1.11,677.0,Cardio,33.4,2.3,4,2,24.71
25,Male,53.2,1.7,190,164,56,0.59,532.0,Strength,28.8,2.1,3,1,18.41
38,Male,46.1,1.79,188,158,68,0.64,556.0,Strength,29.2,2.8,3,1,14.39


## Data Cleaning and Enhancement

In this section, we perform two main tasks to enhance the dataset:

1. **Adding a New Column 'calories_spent_per_hour'**  
   We calculate the calories burned per hour of workout by dividing the total calories burned by the session duration in hours. This helps us understand the workout intensity based on the calories burned over time.

2. **Data Cleaning**  
   - **Removing Rows with Missing Data**: We remove any rows with missing values to ensure the integrity of the dataset for analysis.
   - **Removing Outliers**: We filter out extreme values by removing individuals with a BMI below 15 or above 50, as such values are unrealistic and could skew the analysis.

After these modifications, the dataset is cleaner and more suitable for analysis.


In [3]:
# 1. Adding a new column 'Calories_spent_per_hour'
# Calculating calories burned per hour of workout
data['Calories_spent_per_hour'] = data['Calories_Burned'] / data['Session_Duration (hours)']

# 2. Data cleaning
# - Removing rows with missing data
data_cleaned = data.dropna()

# - Removing outliers, e.g. removing people with BMI below 15 or above 50
data_cleaned = data_cleaned[(data_cleaned['BMI'] >= 15) & (data_cleaned['BMI'] <= 50)]

# Displaying the data in HTML format
html_table = data_cleaned.head(5).to_html(index=False, escape=False)

# Printing the table in a markdown cell
display(HTML(html_table))

Age,Gender,Weight (kg),Height (m),Max_BPM,Avg_BPM,Resting_BPM,Session_Duration (hours),Calories_Burned,Workout_Type,Fat_Percentage,Water_Intake (liters),Workout_Frequency (days/week),Experience_Level,BMI,Calories_spent_per_hour
56,Male,88.3,1.71,180,157,60,1.69,1313.0,Yoga,12.6,3.5,4,3,30.2,776.923077
46,Female,74.9,1.53,179,151,66,1.3,883.0,HIIT,33.9,2.1,4,2,32.0,679.230769
32,Female,68.1,1.66,167,122,54,1.11,677.0,Cardio,33.4,2.3,4,2,24.71,609.90991
25,Male,53.2,1.7,190,164,56,0.59,532.0,Strength,28.8,2.1,3,1,18.41,901.694915
56,Female,58.0,1.68,168,156,74,1.59,1116.0,HIIT,15.5,2.7,5,3,20.55,701.886792
