## Reflection 
I wanted to experiement with creating a nueral network for my trianing data. This model experiment was unsuccesful because the dataset is too small and noisy. A different model like XG boost is better. The goal was to create a network that takes my health and training data to predict the weights I can lift in the future. I wanted to include my health data so I have more features relating to performance, however it makes the data set noisy. 

## Modeling Approach

A feedforward neural network is implemented using PyTorch:

- Input layer matching feature dimension
- Two hidden layers with ReLU activation
- Output layer predicting delta max
- Mean Squared Error loss
- Adam optimizer

Features are standardized prior to training.



In [13]:
import pandas as pd 
import numpy as np
import torch 
import torch.nn as nn
from sklearn.preprocessing import StandardScaler
from torch.utils.data import TensorDataset, DataLoader



wd = pd.read_csv("workout_data.csv")
wd["start_time"] = pd.to_datetime(wd["start_time"], errors="coerce") # convert to datetime
wd = wd.dropna(subset=["start_time", "exercise_title", "weight_lbs", "reps"]) # drop rows with missing values

# filter to specific lifts
LIFTS = ["Front Squat", "Bench Press (Barbell)", "Shoulder Press (Dumbbell)", "Clean", "Lat Pulldown (Cable)"]

# make sure the lift titles match and then is filtered
wd = wd[wd["exercise_title"].isin(LIFTS)].copy()

# calculate volume
wd["volume"] = wd["reps"] * wd["weight_lbs"]

# aggregate to workout targets
workout_targets = (
    wd.groupby(["start_time", "exercise_title"], as_index=False)
      .agg(
          total_volume=("volume", "sum"),
          max_weight=("weight_lbs", "max"),
          top_set_reps=("reps", "max"),
          total_reps=("reps", "sum"),
      )
)

# add workout date column
workout_targets["workout_date"] = workout_targets["start_time"].dt.date
workout_targets.head()

Unnamed: 0,start_time,exercise_title,total_volume,max_weight,top_set_reps,total_reps,workout_date
0,2024-03-01 15:00:00,Lat Pulldown (Cable),5400.0,160.0,10,36,2024-03-01
1,2024-03-04 12:45:00,Lat Pulldown (Cable),4312.0,143.0,8,32,2024-03-04
2,2024-03-05 12:17:00,Bench Press (Barbell),4545.0,225.0,6,29,2024-03-05
3,2024-03-07 12:30:00,Bench Press (Barbell),4895.0,205.0,6,31,2024-03-07
4,2024-03-07 12:30:00,Shoulder Press (Dumbbell),2400.0,70.0,8,37,2024-03-07


In [2]:
# load my sleep data
sleep = pd.read_csv("sleeps.csv")
sleep.columns = sleep.columns.str.strip()

sleep["Cycle start time"] = pd.to_datetime(sleep["Cycle start time"], errors="coerce")
sleep["sleep_date"] = sleep["Cycle start time"].dt.date

# aggregate sleep features by date, keep track of mean values of sleep efficiency, duration, and awake time
sleep_feats = (
    sleep.groupby("sleep_date", as_index=False) # group by sleep date
        .agg( # aggregate features
            sleep_eff=("Sleep efficiency %", "mean"), # sleep efficiency i.e percentage of time asleep while in bed
            sleep_dur=("Asleep duration (min)", "mean"), # sleep duration in minutes
            awake_min=("Awake duration (min)", "mean"), # awake time in minutes
            light_min=("Light sleep duration (min)", "mean"), # light sleep duration in minutes
            deep_min=("Deep (SWS) duration (min)", "mean"), # deep sleep duration in minutes
            rem_min=("REM duration (min)", "mean") # REM sleep duration in minutes
        )
)

sleep_feats.head()

Unnamed: 0,sleep_date,sleep_eff,sleep_dur,awake_min,light_min,deep_min,rem_min
0,2024-02-07,87.0,426.0,65.0,227.0,98.0,101.0
1,2024-02-08,87.0,424.0,60.0,205.0,119.0,100.0
2,2024-02-09,89.0,453.0,50.0,247.0,98.0,108.0
3,2024-02-10,90.0,411.0,46.0,195.0,93.0,123.0
4,2024-02-11,86.0,411.0,65.0,288.0,53.0,70.0


In [3]:
# merge workout targets with prior night's sleep features
# this line shifts the workout date back by one day to align with prior sleep data
workout_targets["sleep_date"] = pd.to_datetime(workout_targets["workout_date"]) - pd.Timedelta(days=1)
# convert back to date for merging
workout_targets["sleep_date"] = workout_targets["sleep_date"].dt.date

# merge dataframes
model_df = workout_targets.merge(sleep_feats, on="sleep_date", how="left")
model_df.head() 

Unnamed: 0,start_time,exercise_title,total_volume,max_weight,top_set_reps,total_reps,workout_date,sleep_date,sleep_eff,sleep_dur,awake_min,light_min,deep_min,rem_min
0,2024-03-01 15:00:00,Lat Pulldown (Cable),5400.0,160.0,10,36,2024-03-01,2024-02-29,85.0,418.0,74.0,227.0,92.0,99.0
1,2024-03-04 12:45:00,Lat Pulldown (Cable),4312.0,143.0,8,32,2024-03-04,2024-03-03,89.0,464.0,55.0,289.0,87.0,88.0
2,2024-03-05 12:17:00,Bench Press (Barbell),4545.0,225.0,6,29,2024-03-05,2024-03-04,91.0,453.0,40.0,220.0,81.0,152.0
3,2024-03-07 12:30:00,Bench Press (Barbell),4895.0,205.0,6,31,2024-03-07,2024-03-06,89.0,476.0,57.0,248.0,106.0,122.0
4,2024-03-07 12:30:00,Shoulder Press (Dumbbell),2400.0,70.0,8,37,2024-03-07,2024-03-06,89.0,476.0,57.0,248.0,106.0,122.0


In [4]:
# load physiological cycle data, physiological data is my daily resting heart rate and heart rate variability, etc 
phys = pd.read_csv("physiological_cycles.csv")
phys["Cycle start time"] = pd.to_datetime(phys["Cycle start time"]) # parse datetime
phys["phys_date"] = phys["Cycle start time"].dt.date # extract date

phys_feats = (
    phys.groupby("phys_date", as_index=False)
        .agg(
            recovery=("Recovery score %", "mean"), # recovery score percentage based on HRV, RHR, RR, sleep performance and quality, skin temp, SpO2
            rhr=("Resting heart rate (bpm)", "mean"), # resting heart rate in beats per minute
            hrv=("Heart rate variability (ms)", "mean"), # heart rate variability in milliseconds
            rr=("Respiratory rate (rpm)", "mean"), # respiratory rate in respirations per minute
            spo2=("Blood oxygen %", "mean"), # blood oxygen saturation percentage
            skin_temp=("Skin temp (celsius)", "mean"), # skin temperature in celsius
            day_strain=("Day Strain", "mean"), # day strain score based on cardiovascular strain 
            sleep_need=("Sleep need (min)", "mean"), # sleep need in minutes
            sleep_debt=("Sleep debt (min)", "mean"), # sleep debt in minutes
            sleep_consistency=("Sleep consistency %", "mean"), # sleep consistency percentage based on regularity of sleep schedule
            sleep_performance=("Sleep performance %", "mean"), # sleep performance percentage based on sleep quality
            in_bed=("In bed duration (min)", "mean"), # in bed duration in minutes
        )
)


# merge physiological features with model dataframe
model_df = model_df.merge(phys_feats, left_on="workout_date", right_on="phys_date", how="left")
model_df.head()
# save final modeling dataframe to csv
model_df.to_csv("Health_lifting.csv", index=False)


In [5]:
# load new data from csv 
df = pd.read_csv("Health_lifting.csv")
df["start_time"] = pd.to_datetime(df["start_time"], errors="coerce") # convert to datetime
df.head()

Unnamed: 0,start_time,exercise_title,total_volume,max_weight,top_set_reps,total_reps,workout_date,sleep_date,sleep_eff,sleep_dur,...,hrv,rr,spo2,skin_temp,day_strain,sleep_need,sleep_debt,sleep_consistency,sleep_performance,in_bed
0,2024-03-01 15:00:00,Lat Pulldown (Cable),5400.0,160.0,10,36,2024-03-01,2024-02-29,85.0,418.0,...,94.0,12.5,95.86,33.67,15.6,671.0,127.0,85.0,31.0,227.0
1,2024-03-04 12:45:00,Lat Pulldown (Cable),4312.0,143.0,8,32,2024-03-04,2024-03-03,89.0,464.0,...,124.0,12.7,92.39,33.6,14.5,594.0,58.0,73.0,76.0,493.0
2,2024-03-05 12:17:00,Bench Press (Barbell),4545.0,225.0,6,29,2024-03-05,2024-03-04,91.0,453.0,...,133.0,12.9,97.0,33.44,18.7,606.0,91.0,77.0,63.0,444.0
3,2024-03-07 12:30:00,Bench Press (Barbell),4895.0,205.0,6,31,2024-03-07,2024-03-06,89.0,476.0,...,117.0,12.8,94.83,32.93,15.6,641.0,116.0,91.0,66.0,502.0
4,2024-03-07 12:30:00,Shoulder Press (Dumbbell),2400.0,70.0,8,37,2024-03-07,2024-03-06,89.0,476.0,...,117.0,12.8,94.83,32.93,15.6,641.0,116.0,91.0,66.0,502.0


In [6]:
# quick check of the dataframe, want to make sure everything looks good before starting modeling
print(df.head)
print(df.shape)
print(df.columns)

<bound method NDFrame.head of              start_time             exercise_title  total_volume  max_weight  \
0   2024-03-01 15:00:00       Lat Pulldown (Cable)        5400.0       160.0   
1   2024-03-04 12:45:00       Lat Pulldown (Cable)        4312.0       143.0   
2   2024-03-05 12:17:00      Bench Press (Barbell)        4545.0       225.0   
3   2024-03-07 12:30:00      Bench Press (Barbell)        4895.0       205.0   
4   2024-03-07 12:30:00  Shoulder Press (Dumbbell)        2400.0        70.0   
..                  ...                        ...           ...         ...   
454 2026-01-07 14:49:00      Bench Press (Barbell)        2515.0       245.0   
455 2026-01-11 14:38:00      Bench Press (Barbell)        3375.0       225.0   
456 2026-01-12 09:07:00  Shoulder Press (Dumbbell)         960.0        80.0   
457 2026-01-13 06:53:00                      Clean        2340.0       195.0   
458 2026-01-13 06:53:00                Front Squat        2700.0       225.0   

     top_

### Creating a nueral network to predict my bench press heavist weight lifted 
This model will provide both a raw and more conservative output
the raw output will serve as a "if you feel good" target weight while the coservative output is a recommended weight based on recovery + gaurdrail features 

In [7]:
# filter df so there is only data for "Bench Press (Barbell)"
bench_df = df[df["exercise_title"] == "Bench Press (Barbell)"].copy()

# Parse dates and sort by workout_date
bench_df["workout_date"] = pd.to_datetime(bench_df["workout_date"], errors="coerce")
bench_df = bench_df.sort_values("workout_date").reset_index(drop=True)

# add lag features, the previous workout's max weight, total volume, total reps, and days since previous workout
bench_df["prev_max_weight"]   = bench_df["max_weight"].shift(1)
bench_df["prev_total_volume"] = bench_df["total_volume"].shift(1)
bench_df["prev_total_reps"]   = bench_df["total_reps"].shift(1)
bench_df["days_since_prev"]   = (bench_df["workout_date"] - bench_df["workout_date"].shift(1)).dt.days

# Target = current session max_weight
bench_df["target_delta"] = bench_df["max_weight"] - bench_df["prev_max_weight"]

# health features to be used in modeling from the csv 
health_features = ["recovery", "hrv", "rhr", "sleep_dur", "sleep_eff"]


# Keep only features that exist (prevents crashes if a column name differs)
health_features = [c for c in health_features if c in bench_df.columns]

# final feature columns for modeling
feature_cols = [
    "prev_max_weight",
    "prev_total_volume",
    "prev_total_reps",
    "days_since_prev",
] + health_features

# drop rows with missing values
data = bench_df.dropna(subset=feature_cols + ["target_delta"]).copy()
data["days_since_prev"] = data["days_since_prev"].clip(lower=-15, upper=15)

# train-test split (80-20 split)
N = len(data)
split = int(N * 0.8)
 
train_df = data.iloc[:split].copy() # training data
test_df  = data.iloc[split:].copy() # testing data

# prepare numpy arrays for modeling
X_train = train_df[feature_cols].astype(float).values
y_train = train_df["target_delta"].astype(float).values.reshape(-1, 1)

# prepare test data
X_test  = test_df[feature_cols].astype(float).values
y_test  = test_df["target_delta"].astype(float).values.reshape(-1, 1)

# Baseline: predict same as last time
baseline_pred = test_df["prev_max_weight"].values.reshape(-1, 1)
baseline_mae = np.mean(np.abs(baseline_pred - y_test))

# standardize features by removing the mean and scaling to unit variance
scaler = StandardScaler()
X_train_s = scaler.fit_transform(X_train)
X_test_s  = scaler.transform(X_test)

# Torch tensors, this is needed to create a DataLoader for batching i.e feeding data to the model in batches
X_train_t = torch.tensor(X_train_s, dtype=torch.float32)
y_train_t = torch.tensor(y_train,   dtype=torch.float32)

X_test_t  = torch.tensor(X_test_s,  dtype=torch.float32)
y_test_t  = torch.tensor(y_test,    dtype=torch.float32)

# DataLoader for batching
train_loader = DataLoader(TensorDataset(X_train_t, y_train_t), batch_size=32, shuffle=True)

In [8]:
# defining a simple regression MLP
# this is a feedforward neural network with two hidden layers: 32 and 16 neurons respectively 
class MLP(nn.Module):
    def __init__(self, in_dim):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(in_dim, 16),
            nn.ReLU(),
            nn.Linear(16, 1),
        )
    def forward(self, x):
        return self.net(x)

model = MLP(in_dim=X_train_t.shape[1])
loss_fn = nn.SmoothL1Loss()
opt = torch.optim.Adam(model.parameters(), lr=1e-3)

In [9]:
# training loop 
epochs = 300
for epoch in range(epochs):
    model.train()
    for xb, yb in train_loader:
        pred = model(xb)
        loss = loss_fn(pred, yb)
        opt.zero_grad()
        loss.backward()
        opt.step()


In [12]:
# y_test is delta now, so baseline predicts 0
baseline_pred = np.zeros_like(y_test)          # same shape as y_test
baseline_mae = np.mean(np.abs(baseline_pred - y_test))

# model evaluation 
model.eval()
with torch.no_grad():
    test_pred = model(X_test_t).cpu().numpy()  # predicted deltas

model_mae = np.mean(np.abs(test_pred - y_test))

# print the results 
print(f"Rows used (bench sessions): {N}")
print(f"Features: {feature_cols}")
print(f"Baseline MAE (predict delta=0): {baseline_mae:.2f} lb") # shows the baseline mean absolute error, i.e how well the baseline performed in this case predicting no change
print(f"Model MAE (delta): {model_mae:.2f} lb") # shows the model mean absolute error, i.e how well the model performed in predicting the change in max weight
print("Improvement over baseline:", baseline_mae - model_mae) # shows the improvement of the model over the baseline, negative values indicate the model performed worse than the baseline


Rows used (bench sessions): 107
Features: ['prev_max_weight', 'prev_total_volume', 'prev_total_reps', 'days_since_prev', 'recovery', 'hrv', 'rhr', 'sleep_dur', 'sleep_eff']
Baseline MAE (predict delta=0): 14.55 lb
Model MAE (delta): 29.67 lb
Improvement over baseline: -15.121349638158625
