In [None]:
import pandas as pd 
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import TimeSeriesSplit, cross_val_score
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.ensemble import RandomForestRegressor


wd = pd.read_csv("workout_data.csv")
wd["start_time"] = pd.to_datetime(wd["start_time"], errors="coerce") # convert to datetime
wd = wd.dropna(subset=["start_time", "exercise_title", "weight_lbs", "reps"]) # drop rows with missing values

# filter to specific lifts
LIFTS = ["Front Squat", "Bench Press (Barbell)", "Shoulder Press (Dumbbell)", "Clean", "Lat Pulldown (Cable)"]

# make sure the lift titles match and then is filtered
wd = wd[wd["exercise_title"].isin(LIFTS)].copy()

# calculate volume
wd["volume"] = wd["reps"] * wd["weight_lbs"]

# aggregate to workout targets
workout_targets = (
    wd.groupby(["start_time", "exercise_title"], as_index=False)
      .agg(
          total_volume=("volume", "sum"),
          max_weight=("weight_lbs", "max"),
          top_set_reps=("reps", "max"),
          total_reps=("reps", "sum"),
      )
)

# add workout date column
workout_targets["workout_date"] = workout_targets["start_time"].dt.date
workout_targets.head()

Unnamed: 0,start_time,exercise_title,total_volume,max_weight,top_set_reps,total_reps,workout_date
0,2024-03-01 15:00:00,Lat Pulldown (Cable),5400.0,160.0,10,36,2024-03-01
1,2024-03-04 12:45:00,Lat Pulldown (Cable),4312.0,143.0,8,32,2024-03-04
2,2024-03-05 12:17:00,Bench Press (Barbell),4545.0,225.0,6,29,2024-03-05
3,2024-03-07 12:30:00,Bench Press (Barbell),4895.0,205.0,6,31,2024-03-07
4,2024-03-07 12:30:00,Shoulder Press (Dumbbell),2400.0,70.0,8,37,2024-03-07


In [70]:
# load my sleep data
sleep = pd.read_csv("sleeps.csv")
sleep.columns = sleep.columns.str.strip()

sleep["Cycle start time"] = pd.to_datetime(sleep["Cycle start time"], errors="coerce")
sleep["sleep_date"] = sleep["Cycle start time"].dt.date

# aggregate sleep features by date, keep track of mean values of sleep efficiency, duration, and awake time
sleep_feats = (
    sleep.groupby("sleep_date", as_index=False) # group by sleep date
        .agg( # aggregate features
            sleep_eff=("Sleep efficiency %", "mean"), # sleep efficiency i.e percentage of time asleep while in bed
            sleep_dur=("Asleep duration (min)", "mean"), # sleep duration in minutes
            awake_min=("Awake duration (min)", "mean"), # awake time in minutes
            light_min=("Light sleep duration (min)", "mean"), # light sleep duration in minutes
            deep_min=("Deep (SWS) duration (min)", "mean"), # deep sleep duration in minutes
            rem_min=("REM duration (min)", "mean") # REM sleep duration in minutes
        )
)

sleep_feats.head()

Unnamed: 0,sleep_date,sleep_eff,sleep_dur,awake_min,light_min,deep_min,rem_min
0,2024-02-07,87.0,426.0,65.0,227.0,98.0,101.0
1,2024-02-08,87.0,424.0,60.0,205.0,119.0,100.0
2,2024-02-09,89.0,453.0,50.0,247.0,98.0,108.0
3,2024-02-10,90.0,411.0,46.0,195.0,93.0,123.0
4,2024-02-11,86.0,411.0,65.0,288.0,53.0,70.0


In [71]:
# merge workout targets with prior night's sleep features
# this line shifts the workout date back by one day to align with prior sleep data
workout_targets["sleep_date"] = pd.to_datetime(workout_targets["workout_date"]) - pd.Timedelta(days=1)
# convert back to date for merging
workout_targets["sleep_date"] = workout_targets["sleep_date"].dt.date

# merge dataframes
model_df = workout_targets.merge(sleep_feats, on="sleep_date", how="left")
model_df.head() 

Unnamed: 0,start_time,exercise_title,total_volume,max_weight,top_set_reps,total_reps,workout_date,sleep_date,sleep_eff,sleep_dur,awake_min,light_min,deep_min,rem_min
0,2024-03-01 15:00:00,Lat Pulldown (Cable),5400.0,160.0,10,36,2024-03-01,2024-02-29,85.0,418.0,74.0,227.0,92.0,99.0
1,2024-03-04 12:45:00,Lat Pulldown (Cable),4312.0,143.0,8,32,2024-03-04,2024-03-03,89.0,464.0,55.0,289.0,87.0,88.0
2,2024-03-05 12:17:00,Bench Press (Barbell),4545.0,225.0,6,29,2024-03-05,2024-03-04,91.0,453.0,40.0,220.0,81.0,152.0
3,2024-03-07 12:30:00,Bench Press (Barbell),4895.0,205.0,6,31,2024-03-07,2024-03-06,89.0,476.0,57.0,248.0,106.0,122.0
4,2024-03-07 12:30:00,Shoulder Press (Dumbbell),2400.0,70.0,8,37,2024-03-07,2024-03-06,89.0,476.0,57.0,248.0,106.0,122.0


In [72]:
# load physiological cycle data, physiological data is my daily resting heart rate and heart rate variability, etc 
phys = pd.read_csv("physiological_cycles.csv")
phys["Cycle start time"] = pd.to_datetime(phys["Cycle start time"]) # parse datetime
phys["phys_date"] = phys["Cycle start time"].dt.date # extract date

phys_feats = (
    phys.groupby("phys_date", as_index=False)
        .agg(
            recovery=("Recovery score %", "mean"), # recovery score percentage based on HRV, RHR, RR, sleep performance and quality, skin temp, SpO2
            rhr=("Resting heart rate (bpm)", "mean"), # resting heart rate in beats per minute
            hrv=("Heart rate variability (ms)", "mean"), # heart rate variability in milliseconds
            rr=("Respiratory rate (rpm)", "mean"), # respiratory rate in respirations per minute
            spo2=("Blood oxygen %", "mean"), # blood oxygen saturation percentage
            skin_temp=("Skin temp (celsius)", "mean"), # skin temperature in celsius
            day_strain=("Day Strain", "mean"), # day strain score based on cardiovascular strain 
            sleep_need=("Sleep need (min)", "mean"), # sleep need in minutes
            sleep_debt=("Sleep debt (min)", "mean"), # sleep debt in minutes
            sleep_consistency=("Sleep consistency %", "mean"), # sleep consistency percentage based on regularity of sleep schedule
            sleep_performance=("Sleep performance %", "mean"), # sleep performance percentage based on sleep quality
            in_bed=("In bed duration (min)", "mean"), # in bed duration in minutes
        )
)


# merge physiological features with model dataframe
model_df = model_df.merge(phys_feats, left_on="workout_date", right_on="phys_date", how="left")
model_df.head()
# save final modeling dataframe to csv
model_df.to_csv("Health_lifting.csv", index=False)
