In [7]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import seaborn as sns
import matplotlib.pyplot as plt

In [8]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("jockeroika/life-style-data")

print("Path to dataset files:", path)

Path to dataset files: C:\Users\Nscoo\.cache\kagglehub\datasets\jockeroika\life-style-data\versions\7


In [9]:
# Load the main dataset
df = pd.read_csv(r"C:\Users\Nscoo\.cache\kagglehub\datasets\jockeroika\life-style-data\versions\7\Final_data.csv")

# Quick look at the data
print(df.head())
print(df.info())
print(df.describe())


     Age  Gender  Weight (kg)  Height (m)  Max_BPM  Avg_BPM  Resting_BPM  \
0  34.91    Male        65.27        1.62   188.58   157.65        69.05   
1  23.37  Female        56.41        1.55   179.43   131.75        73.18   
2  33.20  Female        58.98        1.67   175.04   123.95        54.96   
3  38.69  Female        93.78        1.70   191.21   155.10        50.07   
4  45.09    Male        52.42        1.88   193.58   152.88        70.84   

   Session_Duration (hours)  Calories_Burned Workout_Type  ...  \
0                      1.00          1080.90     Strength  ...   
1                      1.37          1809.91         HIIT  ...   
2                      0.91           802.26       Cardio  ...   
3                      1.10          1450.79         HIIT  ...   
4                      1.08          1166.40     Strength  ...   

   cal_from_macros  pct_carbs  protein_per_kg   pct_HRR  pct_maxHR  \
0          2139.59   0.500432        1.624789  0.741237   0.835985   
1     

In [10]:
df.rename(columns=lambda x: x.strip().replace(" ", "_").replace("(", "").replace(")", ""), inplace=True)
df.fillna(0, inplace=True)  # replace missing values with 0 for simplicity

## Operational Dashboard – Mid-Manager

**Purpose:**  
This dashboard monitors day-to-day user engagement, session performance, and workout completion, helping managers ensure programs are running effectively.

**Audience:**  
Mid-level managers responsible for tracking user activity, workout adherence, and overall engagement metrics.

**Key KPIs:**  
- **Average Calories Burned per Session:** Measures workout intensity and effectiveness.  
- **Workout Type Distribution:** Shows which workouts are most popular to optimize class offerings.  
- **Average Heart Rate Metrics:** Ensures safe exercise intensity and monitors user health.


In [11]:
# KPI 1: Average Calories Burned
avg_calories = df['Calories_Burned'].mean()

# KPI 2: Workout Type Counts
workout_counts = df['Workout_Type'].value_counts()

# KPI 3: Avg Heart Rate Metrics
avg_bpm = df['Avg_BPM'].mean()
avg_pct_maxHR = df['pct_maxHR'].mean()

# Plots
# Bar chart: Workout counts
fig1 = px.bar(workout_counts, x=workout_counts.index, y=workout_counts.values,
              title='Workout Type Distribution')
fig1.show()

# KPI cards using Plotly
avg_session = df['Session_Duration_hours'].mean()
fig_kpi1 = go.Figure(go.Indicator(
    mode="number",
    value=avg_session,
    title={"text": "Average Session Duration (hours)"}))
fig_kpi1.show()

fig_kpi2 = go.Figure(go.Indicator(
    mode="number",
    value=avg_calories,
    title={"text": "Average Calories Burned per Session"}))
fig_kpi2.show()

## Executive Dashboard

**Purpose:**  
Provides high-level insights into user demographics, engagement trends, and overall health improvements to guide strategic business decisions.

**Audience:**  
Executives and senior leadership who need to understand the overall performance, growth, and revenue potential of the platform.

**Key KPIs:**  
- **Total Users:** Measures overall growth and platform reach.  
- **Gender Distribution:** Identifies engagement differences across demographics for marketing strategies.  
- **Average Lean Mass by Gender:** Indicates effectiveness of programs on health outcomes.  
- **Average Calorie Balance:** Shows overall user adherence to diet and fitness programs, highlighting potential for upsell or premium features.


In [12]:
# Purpose: High-level trends and business insights

# KPI 1: Total Users
total_users = df.shape[0]

# KPI 2: Gender Distribution
gender_dist = df['Gender'].value_counts()

# KPI 3: Avg Lean Mass
avg_lean_mass = df['lean_mass_kg'].mean()

# KPI 4: Avg Weight Change / Calorie Balance
avg_cal_balance = df['cal_balance'].mean()

# Plots
# Pie chart: Gender Distribution
fig3 = px.pie(gender_dist, names=gender_dist.index, values=gender_dist.values,
              title='User Gender Distribution')
fig3.show()

# Bar chart: Average Lean Mass by Gender
avg_lean_by_gender = df.groupby('Gender')['lean_mass_kg'].mean().reset_index()
fig4 = px.bar(avg_lean_by_gender,
              x='Gender', y='lean_mass_kg', 
              title='Average Lean Mass by Gender',
              text='lean_mass_kg')
fig4.update_traces(texttemplate='%{text:.2f} kg', textposition='outside')
fig4.update_layout(yaxis_title='Lean Mass (kg)')
fig4.show()

# KPI cards
fig_kpi3 = go.Figure(go.Indicator(
    mode="number",
    value=total_users,
    title={"text": "Total Users"}))
fig_kpi3.show()

fig_kpi4 = go.Figure(go.Indicator(
    mode="number",
    value=avg_cal_balance,
    title={"text": "Average Calorie Balance"}))
fig_kpi4.show()


## Custom Dashboard

**Purpose:**  
Explores correlations and trends between user activity, workouts, and calories burned to identify patterns and guide product innovation.

**Audience:**  
Product managers, researchers, or analysts looking for insights to design new features or optimize existing programs.

**Key KPIs:**  
- **Correlation between Session Duration and Calories Burned:** Helps understand how workout length impacts energy expenditure.  
- **Top 5 Workouts by Average Calories Burned:** Identifies the most effective exercises for users.  
- **Avg Session Duration vs Avg BPM by Workout Type:** Tracks exercise intensity and efficiency.  
- **Scatter Plot of Calories Burned vs Session Duration:** Helps discover trends and inform personalized recommendations.


In [13]:
# KPI 1: Correlation Activity vs Calories Burned
corr = df['Session_Duration_hours'].corr(df['Calories_Burned'])

# KPI 2: Top Workout Types by Avg Calories Burned
top_workouts = df.groupby('Workout_Type')['Calories_Burned'].mean().sort_values(ascending=False).head(5)
top_workouts_df = top_workouts.reset_index().rename(columns={'Calories_Burned': 'Avg_Calories_Burned'})
print("Top 5 Workouts by Average Calories Burned:")
print(top_workouts_df)

# Scatter plot: Calories Burned vs Session Duration
fig5 = px.scatter(
    df, x='Session_Duration_hours', y='Calories_Burned', color='Gender',
    title='Calories Burned vs Session Duration',
    labels={'Session_Duration_hours': 'Session Duration (hrs)', 'Calories_Burned': 'Calories Burned'},
    template='plotly_white'
)
fig5.update_traces(marker=dict(size=6, opacity=0.7))
fig5.show()

# Bar chart: Top Workouts by Avg Calories
fig6 = px.bar(
    top_workouts_df, x='Workout_Type', y='Avg_Calories_Burned',
    title='Top 5 Workouts by Average Calories Burned',
    text='Avg_Calories_Burned',
    labels={'Workout_Type': 'Workout Type', 'Avg_Calories_Burned': 'Avg Calories Burned'},
    template='plotly_white',
    color='Avg_Calories_Burned'
)
fig6.update_traces(texttemplate='%{text:.0f}', textposition='outside')
fig6.update_layout(yaxis_title='Avg Calories Burned')
fig6.show()

print(f"Correlation between Session Duration and Calories Burned: {corr:.2f}")

Top 5 Workouts by Average Calories Burned:
  Workout_Type  Avg_Calories_Burned
0         HIIT          1652.533209
1     Strength          1361.430459
2       Cardio          1211.544680
3         Yoga           897.107216


Correlation between Session Duration and Calories Burned: 0.81
