In [2]:
# Import libraries
import pandas as pd

# Step 1: Load the dataset
file_path = "data/gym_members_exercise_tracking.csv"  # Update with the actual path to your dataset
data = pd.read_csv(file_path)

# Display the first few rows
print("Initial Dataset:")
print(data.head())

# Step 2: Inspect dataset
print("\nDataset Info:")
data.info()

# Step 3: Data Cleaning
# 3.1 Rename columns for consistency
data.columns = data.columns.str.strip().str.lower().str.replace(" ", "_")

# 3.2 Convert relevant columns to appropriate types
data['age'] = data['age'].astype(int)
data['gender'] = data['gender'].astype(str)
data['weight_(kg)'] = data['weight_(kg)'].astype(float)
data['height_(m)'] = data['height_(m)'].astype(float)
data['session_duration_(hours)'] = data['session_duration_(hours)'].astype(float)
data['calories_burned'] = data['calories_burned'].astype(float)
data['water_intake_(liters)'] = data['water_intake_(liters)'].astype(float)

# 3.3 Handle missing or invalid values
data = data.dropna()  # Drop rows with missing values
data = data[data['calories_burned'] > 0]  # Ensure valid calorie values

# Step 4: Add derived columns for analysis
data['bmi_category'] = pd.cut(
    data['bmi'],
    bins=[0, 18.5, 24.9, 29.9, 100],
    labels=['Underweight', 'Normal weight', 'Overweight', 'Obese']
)
data['calories_per_minute'] = data['calories_burned'] / (data['session_duration_(hours)'] * 60)

# Display the cleaned dataset
print("\nCleaned Dataset:")
print(data.head())

# Step 5: Save cleaned data for next steps
cleaned_file_path = "data/cleaned_gym_data.csv"
data.to_csv(cleaned_file_path, index=False)
print(f"\nCleaned data saved to {cleaned_file_path}")


Initial Dataset:
   Age  Gender  Weight (kg)  Height (m)  Max_BPM  Avg_BPM  Resting_BPM  \
0   56    Male         88.3        1.71      180      157           60   
1   46  Female         74.9        1.53      179      151           66   
2   32  Female         68.1        1.66      167      122           54   
3   25    Male         53.2        1.70      190      164           56   
4   38    Male         46.1        1.79      188      158           68   

   Session_Duration (hours)  Calories_Burned Workout_Type  Fat_Percentage  \
0                      1.69           1313.0         Yoga            12.6   
1                      1.30            883.0         HIIT            33.9   
2                      1.11            677.0       Cardio            33.4   
3                      0.59            532.0     Strength            28.8   
4                      0.64            556.0     Strength            29.2   

   Water_Intake (liters)  Workout_Frequency (days/week)  Experience_Level  