In [4]:
import pandas as pd
import statistics
import numpy as np

# Declaration
data = pd.read_csv("gym_members_exercise_tracking.csv")

# Descriptive Statistic : Analyzed Weight (kg), Height (m), Max_BPM, Avg_BPM, Resting_BPM, Session_Duration (hours), Calories_Burned, Fat_Percentage, Water_Intake (liters), Workout_Frequency (days/week),  Experience_Level, BMI
target_col = ['Weight (kg)','Height (m)','Max_BPM','Avg_BPM','Resting_BPM','Session_Duration (hours)','Calories_Burned','Fat_Percentage','Water_Intake (liters)','Workout_Frequency (days/week)','Experience_Level','BMI']

for dt in target_col:
    # Convert to int
    data[dt] = pd.to_numeric(data[dt], errors='coerce')
    
    # Remove NaN
    data = data.dropna(subset=[dt])

    max_val = data[dt].max()
    min_val = data[dt].min()
    range_val = max_val - min_val
    mean = round(data[dt].mean(),2)
    mode = round(statistics.mode(data[dt]),2)
    var = round(np.nanvar(data[dt],ddof=1),2)
    std = round(var ** 0.5,2)

    print(f"Max of {dt.title()} : {max_val}")
    print(f"Min of {dt.title()} : {min_val}")
    print(f"Range of {dt.title()} : {range_val}")
    print(f"Mean of {dt.title()} : {mean}")
    print(f"Mode of {dt.title()} : {mode}")
    print(f"Variance of {dt.title()} : {var}")
    print(f"Standard Deviance of {dt.title()} : {std}\n")

Max of Weight (Kg) : 129.9
Min of Weight (Kg) : 40.0
Range of Weight (Kg) : 89.9
Mean of Weight (Kg) : 73.85
Mode of Weight (Kg) : 75.6
Variance of Weight (Kg) : 449.76
Standard Deviance of Weight (Kg) : 21.21

Max of Height (M) : 2.0
Min of Height (M) : 1.5
Range of Height (M) : 0.5
Mean of Height (M) : 1.72
Mode of Height (M) : 1.62
Variance of Height (M) : 0.02
Standard Deviance of Height (M) : 0.14

Max of Max_Bpm : 199
Min of Max_Bpm : 160
Range of Max_Bpm : 39
Mean of Max_Bpm : 179.88
Mode of Max_Bpm : 198
Variance of Max_Bpm : 132.84
Standard Deviance of Max_Bpm : 11.53

Max of Avg_Bpm : 169
Min of Avg_Bpm : 120
Range of Avg_Bpm : 49
Mean of Avg_Bpm : 143.77
Mode of Avg_Bpm : 132
Variance of Avg_Bpm : 205.78
Standard Deviance of Avg_Bpm : 14.35

Max of Resting_Bpm : 74
Min of Resting_Bpm : 50
Range of Resting_Bpm : 24
Mean of Resting_Bpm : 62.22
Mode of Resting_Bpm : 50
Variance of Resting_Bpm : 53.69
Standard Deviance of Resting_Bpm : 7.33

Max of Session_Duration (Hours) : 2.0

In [10]:
import pandas as pd
import statistics
import numpy as np

# Declaration
data = pd.read_csv("gym_members_exercise_tracking.csv")

# Descriptive Statistic : Show Top 7 Member With Most Exercise Hours Per Week
data['BMI Status'] = pd.cut(
    data['BMI'], 
    bins=[0, 18.5, 24.9, 29.9, float('inf')], 
    labels=['Underweight', 'Normal', 'Overweight', 'Obese']
)

# Count per Week
data['Total Exercise per Week (hours)'] = data['Session_Duration (hours)'] * data['Workout_Frequency (days/week)']

# Sort
sorted_data = data.sort_values(by='Total Exercise per Week (hours)', ascending=False)

print(sorted_data[['Gender', 'Age', 'Total Exercise per Week (hours)','BMI Status']].head(7))

     Gender  Age  Total Exercise per Week (hours)   BMI Status
348  Female   41                             9.95       Normal
66     Male   32                             9.95        Obese
964  Female   56                             9.95       Normal
848  Female   28                             9.95  Underweight
943    Male   59                             9.95        Obese
440  Female   18                             9.90   Overweight
513    Male   36                             9.90       Normal


In [12]:
import pandas as pd
import statistics
import numpy as np

# Declaration
data = pd.read_csv("gym_members_exercise_tracking.csv")

# Descriptive Statistic : Show Top 7 Member With Most Expensive and Cheapest Training Cost
data['BMI Status'] = pd.cut(
    data['BMI'], 
    bins=[0, 18.5, 24.9, 29.9, float('inf')], 
    labels=['Underweight', 'Normal', 'Overweight', 'Obese']
)

# Assign Promo based on BMI Status 
is_promo = {
    "Yes": ["Underweight", "Obese", "Overweight"],
    "No": ["Normal"]
}
def assign_promo(promo):
    for dt, promos in is_promo.items():
        if promo in promos:
            return dt
    return "Unknown"
    
data['Promo'] = data['BMI Status'].apply(assign_promo)

# Count per Week
data['Total Exercise per Week (hours)'] = data['Session_Duration (hours)'] * data['Workout_Frequency (days/week)']

# Count Price
data['Training Cost Raw (Rp.)'] = data['Total Exercise per Week (hours)'] * 10000

# Assign Price After Promo
def assign_cost_promo(price, promo):
    if promo == 'Yes':
        return price * 0.90
    else: 
        return price
    
data['Training Cost (Rp.)'] = data.apply(lambda row: assign_cost_promo(row['Training Cost Raw (Rp.)'], row['Promo']), axis=1)

# Sort
sorted_data_least = data.sort_values(by='Training Cost (Rp.)', ascending=True)
sorted_data_top = data.sort_values(by='Training Cost (Rp.)', ascending=False)

print('\nTop 7 Member with Lowest Cost Training\n')
print(sorted_data_least[['Gender', 'Age', 'Total Exercise per Week (hours)','BMI Status','Promo','Training Cost (Rp.)']].head(7))
print('\nTop 7 Member with Highest Cost Training\n')
print(sorted_data_top[['Gender', 'Age', 'Total Exercise per Week (hours)','BMI Status','Promo','Training Cost (Rp.)']].head(7))


Top 7 Member with Lowest Cost Training

     Gender  Age  Total Exercise per Week (hours)  BMI Status Promo  \
526    Male   22                             1.04  Overweight   Yes   
259    Male   53                             1.04       Obese   Yes   
298    Male   50                             1.06       Obese   Yes   
806    Male   52                             1.06       Obese   Yes   
151    Male   37                             1.08  Overweight   Yes   
604  Female   37                             1.08  Overweight   Yes   
619  Female   25                             1.10       Obese   Yes   

     Training Cost (Rp.)  
526               9360.0  
259               9360.0  
298               9540.0  
806               9540.0  
151               9720.0  
604               9720.0  
619               9900.0  

Top 7 Member with Highest Cost Training

     Gender  Age  Total Exercise per Week (hours) BMI Status Promo  \
348  Female   41                             9.95     Normal  

In [14]:
# Descriptive Statistic : Show Training Cost Max, Min, Average, Total Cost, and Total Cost Cut For Promo
print(f'Total Training Cost Rp. {data['Training Cost (Rp.)'].sum()}')
print(f'The Highest Training Cost Rp. {data['Training Cost (Rp.)'].max()}')
print(f'The Minium Training Cost Rp. {data['Training Cost (Rp.)'].min()}')
print(f'The Average Training Cost Rp. {data['Training Cost (Rp.)'].mean().round(2)}')

data['Cut For Promo'] = data['Training Cost Raw (Rp.)'] - data['Training Cost (Rp.)']
print(f'The Cut for Promo Rp. {data['Cut For Promo'].sum()}')

Total Training Cost Rp. 40034800.0
The Highest Training Cost Rp. 99500.0
The Minium Training Cost Rp. 9360.0
The Average Training Cost Rp. 41145.73
The Cut for Promo Rp. 2533800.0


In [24]:
import pandas as pd
import statistics
import numpy as np

# Descriptive Statistic : Show Top 7 Member With Most Expensive and Cheapest Training Cost After Extra Cost Tax
# Assign Extra Cost
extra_cost = {
    "Yes": ["Cardio", "HIIT"],
    "No": ["Yoga","Strength"]
}
def assign_extra_cost(extra):
    for dt, extras in extra_cost.items():
        if extra in extras:
            return dt
    return "Unknown"
    
data['With Extra Cost'] = data['Workout_Type'].apply(assign_extra_cost)

# Assign Extra Cost
def assign_extra_cost(price, promo):
    if promo == 'Yes':
        return price * 1.05
    else: 
        return price
    
data['Training Cost (Rp.)'] = data.apply(lambda row: assign_extra_cost(row['Training Cost Raw (Rp.)'], row['With Extra Cost']), axis=1)

# Sort
sorted_data_least = data.sort_values(by='Training Cost (Rp.)', ascending=True)
sorted_data_top = data.sort_values(by='Training Cost (Rp.)', ascending=False)

print('\nTop 7 Member with Lowest Cost Training After Extra Cost Tax\n')
print(sorted_data_least[['Gender', 'Age', 'Total Exercise per Week (hours)','Training Cost (Rp.)','With Extra Cost']].head(7))
print('\nTop 7 Member with Highest Cost Training After Extra Cost Tax\n')
print(sorted_data_top[['Gender', 'Age', 'Total Exercise per Week (hours)','Training Cost (Rp.)','With Extra Cost']].head(7))


Top 7 Member with Lowest Cost Training After Extra Cost Tax

     Gender  Age  Total Exercise per Week (hours)  Training Cost (Rp.)  \
409  Female   20                             1.00              10000.0   
106    Male   41                             1.02              10200.0   
259    Male   53                             1.04              10400.0   
126  Female   32                             1.02              10710.0   
742  Female   23                             1.02              10710.0   
526    Male   22                             1.04              10920.0   
619  Female   25                             1.10              11000.0   

    With Extra Cost  
409              No  
106              No  
259              No  
126             Yes  
742             Yes  
526             Yes  
619              No  

Top 7 Member with Highest Cost Training After Extra Cost Tax

     Gender  Age  Total Exercise per Week (hours)  Training Cost (Rp.)  \
66     Male   32                

In [9]:
import pandas as pd
import statistics
import numpy as np

data = pd.read_csv("gym_members_exercise_tracking.csv")

# Descriptive Statistic : Average Water Intake, Workout Frequency, Experience_Level, Avg BPM, Session Duration, Calories_Burned, Age per Workout Type
target_col = [
    'Water_Intake (liters)', 'Workout_Frequency (days/week)', 'Experience_Level', 'Avg_BPM', 'Session_Duration (hours)', 
    'Calories_Burned', 'Age'
]

# Convert to numeric
for col in target_col:
    data[col] = pd.to_numeric(data[col], errors='coerce')

# Remove NaN
data = data.dropna(subset=target_col + ['Workout_Type'])

# Group and find avg
grouped = data.groupby('Workout_Type')[target_col].mean()

# Sort by calories burned 
grouped = grouped.sort_values(by='Calories_Burned', ascending=False)

# Round values
grouped = grouped.round(2)
display(grouped)

Unnamed: 0_level_0,Water_Intake (liters),Workout_Frequency (days/week),Experience_Level,Avg_BPM,Session_Duration (hours),Calories_Burned,Age
Workout_Type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
HIIT,2.65,3.35,1.84,143.52,1.29,925.81,38.95
Strength,2.6,3.36,1.8,144.31,1.26,910.7,38.95
Yoga,2.64,3.35,1.87,143.27,1.26,903.19,39.23
Cardio,2.62,3.24,1.75,143.89,1.22,884.51,37.67


In [4]:
import pandas as pd
import numpy as np
from datetime import datetime

data = pd.read_csv("gym_members_exercise_tracking.csv")

# Descriptive Statistic : Average Water Intake, Workout Frequency, Experience_Level, Avg BPM, Session Duration, Calories_Burned, Age per Gen
target_col = [
    'Water_Intake (liters)', 'Workout_Frequency (days/week)', 'Experience_Level', 'Avg_BPM', 'Session_Duration (hours)', 'Calories_Burned'
]

# Convert to numeric
for col in target_col:
    data[col] = pd.to_numeric(data[col], errors='coerce')

# Remove NaN
data = data.dropna(subset=target_col)

# Convert age to birth year
current_year = datetime.now().year
data['Birth_Year'] = current_year - data['Age']

# Function to classify generation
def get_generation(year):
    if 1997 <= year <= 2012:
        return "Gen Z"
    elif 1981 <= year <= 1996:
        return "Millennial"
    elif 1965 <= year <= 1980:
        return "Gen X"
    elif 1946 <= year <= 1964:
        return "Boomer"
    else:
        return "Other"

data['Generation'] = data['Birth_Year'].apply(get_generation)

# Group by gen
grouped = data.groupby('Generation')[target_col].mean()

# Sort by calories burned
grouped = grouped.sort_values(by='Calories_Burned', ascending=False)

# Round values
grouped = grouped.round(2)
display(grouped)

Unnamed: 0_level_0,Water_Intake (liters),Workout_Frequency (days/week),Experience_Level,Avg_BPM,Session_Duration (hours),Calories_Burned
Generation,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Gen Z,2.58,3.28,1.81,143.23,1.26,950.5
Millennial,2.64,3.36,1.82,143.8,1.26,919.07
Gen X,2.65,3.31,1.8,144.18,1.25,852.55
