In [33]:
import pandas as pd 
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import TimeSeriesSplit, cross_val_score
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.ensemble import RandomForestRegressor

# load workout data
wd = pd.read_csv("workout_data.csv")

# parse time -> datetime
wd["start_time"] = pd.to_datetime(wd["start_time"])

# basic workout targets per session 
wd["volume"] = wd["reps"] * wd["weight_lbs"]

workout_targets = (
    wd.groupby("start_time", as_index=False)
      .agg(total_volume=("volume", "sum"), # total volume per session
           max_weight=("weight_lbs", "max"), # max weight lifted per session
           total_reps=("reps", "sum"),
           exercise_title=("title", "first")) # total reps per session
)

workout_targets["workout_date"] = workout_targets["start_time"].dt.date
wd.head()

Unnamed: 0,title,start_time,end_time,description,exercise_title,superset_id,exercise_notes,set_index,set_type,weight_lbs,reps,distance_miles,duration_seconds,rpe,volume
0,Morning workout ☀️,2026-01-13 06:53:00,"13 Jan 2026, 08:15",,Clean,,,0,normal,195.0,3,,,,585.0
1,Morning workout ☀️,2026-01-13 06:53:00,"13 Jan 2026, 08:15",,Clean,,,1,normal,195.0,3,,,,585.0
2,Morning workout ☀️,2026-01-13 06:53:00,"13 Jan 2026, 08:15",,Clean,,,2,normal,195.0,3,,,,585.0
3,Morning workout ☀️,2026-01-13 06:53:00,"13 Jan 2026, 08:15",,Clean,,,3,normal,195.0,3,,,,585.0
4,Morning workout ☀️,2026-01-13 06:53:00,"13 Jan 2026, 08:15",,Front Squat,,,0,normal,225.0,3,,,,675.0


In [34]:
# load my sleep data
sleep = pd.read_csv("sleeps.csv")
sleep.columns = sleep.columns.str.strip()

sleep["Cycle start time"] = pd.to_datetime(sleep["Cycle start time"], errors="coerce")
sleep["sleep_date"] = sleep["Cycle start time"].dt.date

# aggregate sleep features by date, keep track of mean values of sleep efficiency, duration, and awake time
sleep_feats = (
    sleep.groupby("sleep_date", as_index=False) # group by sleep date
        .agg( # aggregate features
            sleep_eff=("Sleep efficiency %", "mean"), # sleep efficiency i.e percentage of time asleep while in bed
            sleep_dur=("Asleep duration (min)", "mean"), # sleep duration in minutes
            awake_min=("Awake duration (min)", "mean"), # awake time in minutes
            light_min=("Light sleep duration (min)", "mean"), # light sleep duration in minutes
            deep_min=("Deep (SWS) duration (min)", "mean"), # deep sleep duration in minutes
            rem_min=("REM duration (min)", "mean") # REM sleep duration in minutes
        )
)

sleep_feats.head()

Unnamed: 0,sleep_date,sleep_eff,sleep_dur,awake_min,light_min,deep_min,rem_min
0,2024-02-07,87.0,426.0,65.0,227.0,98.0,101.0
1,2024-02-08,87.0,424.0,60.0,205.0,119.0,100.0
2,2024-02-09,89.0,453.0,50.0,247.0,98.0,108.0
3,2024-02-10,90.0,411.0,46.0,195.0,93.0,123.0
4,2024-02-11,86.0,411.0,65.0,288.0,53.0,70.0


In [35]:
# merge workout targets with prior night's sleep features
# this line shifts the workout date back by one day to align with prior sleep data
workout_targets["sleep_date"] = pd.to_datetime(workout_targets["workout_date"]) - pd.Timedelta(days=1)
# convert back to date for merging
workout_targets["sleep_date"] = workout_targets["sleep_date"].dt.date

# merge dataframes
model_df = workout_targets.merge(sleep_feats, on="sleep_date", how="left")
model_df.head() 

Unnamed: 0,start_time,total_volume,max_weight,total_reps,exercise_title,workout_date,sleep_date,sleep_eff,sleep_dur,awake_min,light_min,deep_min,rem_min
0,2024-03-01 15:00:00,19595.0,545.0,160,Pull,2024-03-01,2024-02-29,85.0,418.0,74.0,227.0,92.0,99.0
1,2024-03-04 12:45:00,23250.0,545.0,235,Pull,2024-03-04,2024-03-03,89.0,464.0,55.0,289.0,87.0,88.0
2,2024-03-05 12:17:00,13235.0,225.0,211,Push,2024-03-05,2024-03-04,91.0,453.0,40.0,220.0,81.0,152.0
3,2024-03-06 12:41:00,15300.0,275.0,146,Leg,2024-03-06,2024-03-05,85.0,379.0,65.0,253.0,58.0,68.0
4,2024-03-07 12:30:00,13281.0,205.0,184,Push,2024-03-07,2024-03-06,89.0,476.0,57.0,248.0,106.0,122.0


In [36]:
# load physiological cycle data, physiological data is my daily resting heart rate and heart rate variability, etc 
phys = pd.read_csv("physiological_cycles.csv")
phys["Cycle start time"] = pd.to_datetime(phys["Cycle start time"]) # parse datetime
phys["phys_date"] = phys["Cycle start time"].dt.date # extract date

phys_feats = (
    phys.groupby("phys_date", as_index=False)
        .agg(
            recovery=("Recovery score %", "mean"), # recovery score percentage based on HRV, RHR, RR, sleep performance and quality, skin temp, SpO2
            rhr=("Resting heart rate (bpm)", "mean"), # resting heart rate in beats per minute
            hrv=("Heart rate variability (ms)", "mean"), # heart rate variability in milliseconds
            rr=("Respiratory rate (rpm)", "mean"), # respiratory rate in respirations per minute
            spo2=("Blood oxygen %", "mean"), # blood oxygen saturation percentage
            skin_temp=("Skin temp (celsius)", "mean"), # skin temperature in celsius
            day_strain=("Day Strain", "mean"), # day strain score based on cardiovascular strain 
            sleep_need=("Sleep need (min)", "mean"), # sleep need in minutes
            sleep_debt=("Sleep debt (min)", "mean"), # sleep debt in minutes
            sleep_consistency=("Sleep consistency %", "mean"), # sleep consistency percentage based on regularity of sleep schedule
            sleep_performance=("Sleep performance %", "mean"), # sleep performance percentage based on sleep quality
            in_bed=("In bed duration (min)", "mean"), # in bed duration in minutes
        )
)


# merge physiological features with model dataframe
model_df = model_df.merge(phys_feats, left_on="workout_date", right_on="phys_date", how="left")
model_df.head()


Unnamed: 0,start_time,total_volume,max_weight,total_reps,exercise_title,workout_date,sleep_date,sleep_eff,sleep_dur,awake_min,...,hrv,rr,spo2,skin_temp,day_strain,sleep_need,sleep_debt,sleep_consistency,sleep_performance,in_bed
0,2024-03-01 15:00:00,19595.0,545.0,160,Pull,2024-03-01,2024-02-29,85.0,418.0,74.0,...,94.0,12.5,95.86,33.67,15.6,671.0,127.0,85.0,31.0,227.0
1,2024-03-04 12:45:00,23250.0,545.0,235,Pull,2024-03-04,2024-03-03,89.0,464.0,55.0,...,124.0,12.7,92.39,33.6,14.5,594.0,58.0,73.0,76.0,493.0
2,2024-03-05 12:17:00,13235.0,225.0,211,Push,2024-03-05,2024-03-04,91.0,453.0,40.0,...,133.0,12.9,97.0,33.44,18.7,606.0,91.0,77.0,63.0,444.0
3,2024-03-06 12:41:00,15300.0,275.0,146,Leg,2024-03-06,2024-03-05,85.0,379.0,65.0,...,112.0,12.8,94.7,33.18,15.8,658.0,113.0,85.0,72.0,533.0
4,2024-03-07 12:30:00,13281.0,205.0,184,Push,2024-03-07,2024-03-06,89.0,476.0,57.0,...,117.0,12.8,94.83,32.93,15.6,641.0,116.0,91.0,66.0,502.0
