In [3]:
import pandas as pd 
import numpy as np
from xgboost import XGBRegressor


df = pd.read_csv('Health_lifting.csv')

# group by exercise title and create the lagging features for all lifts

# Convert date column to datetime and sort
df["workout_date"] = pd.to_datetime(df["workout_date"], errors="coerce")
df = df.sort_values(["exercise_title", "workout_date"]).reset_index(drop=True)

g = df.groupby("exercise_title")

df["prev_max"] = g["max_weight"].shift(1)

# rolling workload 
df["volume_3"]  = g["total_volume"].transform(lambda x: x.rolling(3, min_periods=1).mean())
df["volume_7"]  = g["total_volume"].transform(lambda x: x.rolling(7, min_periods=1).mean())
df["volume_14"] = g["total_volume"].transform(lambda x: x.rolling(14, min_periods=1).mean())

# rolling recovery
df["recovery_3"] = g["recovery"].transform(lambda x: x.rolling(3, min_periods=1).mean())
df["recovery_7"] = g["recovery"].transform(lambda x: x.rolling(7, min_periods=1).mean())

# dats since last lift was performed
df["days_since"] = g["workout_date"].diff().dt.days.fillna(0).astype(int)

# pr and delta features
df["delta"] = df["max_weight"] - df["prev_max"]
df["is_pr"] = (df["delta"] > 0).astype(int)

df["last_was_pr"]   = g["is_pr"].shift(1).fillna(0).astype(int)
df["last_pr_delta"] = g["delta"].shift(1).fillna(0)

# drop rows with missing values
df = df.dropna().reset_index(drop=True)

In [7]:
# filter for main lifts only
LIFTS = ["Front Squat", "Clean", "Shoulder Press (Dumbbell)", "Lat Pulldown (Cable)"]
df = df[df['exercise_title'].isin(LIFTS)] # filter for lifts 
df.head()

Unnamed: 0,start_time,exercise_title,total_volume,max_weight,top_set_reps,total_reps,workout_date,sleep_date,sleep_eff,sleep_dur,...,volume_3,volume_7,volume_14,recovery_3,recovery_7,days_since,delta,is_pr,last_was_pr,last_pr_delta
106,2024-03-20 13:46:00,Clean,3670.0,185.0,5,30,2024-03-20,2024-03-19,89.0,431.0,...,3087.5,3087.5,3087.5,65.0,65.0,3,10.0,1,0,0.0
107,2024-03-26 16:01:00,Clean,3200.0,175.0,5,26,2024-03-26,2024-03-25,86.0,407.0,...,3125.0,3125.0,3125.0,62.0,62.0,6,-10.0,0,1,10.0
108,2024-04-06 13:54:00,Clean,2325.0,195.0,4,17,2024-04-06,2024-04-05,94.0,384.0,...,2618.333333,2806.0,2806.0,57.0,61.0,3,30.0,1,0,-10.0
109,2024-04-10 13:47:00,Clean,1455.0,205.0,2,9,2024-04-10,2024-04-09,88.0,363.0,...,2036.666667,2580.833333,2580.833333,60.5,61.4,4,10.0,1,1,30.0
110,2024-04-13 15:23:00,Clean,1800.0,205.0,3,12,2024-04-13,2024-04-12,85.0,356.0,...,1860.0,2469.285714,2469.285714,60.0,61.0,3,0.0,0,1,10.0


In [23]:

results = {} # to store results

# train and evaluate a model for each lift
for lift in LIFTS:
    lift_df = df[df["exercise_title"] == lift].sort_values("workout_date").reset_index(drop=True)
    
    # define features and target
    features = [
        "prev_max",
        "volume_3",
        "volume_7",
        "volume_14",
        "recovery_3",
        "recovery_7",
        "days_since",
        "last_was_pr",
        "last_pr_delta"
    ]

    # prepare data
    X = lift_df[features]
    y = lift_df["max_weight"]

    # time split
    split = int(len(lift_df) * 0.8)
    X_train, X_test = X.iloc[:split], X.iloc[split:]
    y_train, y_test = y.iloc[:split], y.iloc[split:]

    # train model on training set
    model = XGBRegressor(
        n_estimators=300,
        max_depth=4,
        learning_rate=0.05,
        subsample=0.8,
        colsample_bytree=0.8,
        random_state=42
    )

    # fit the model
    model.fit(X_train, y_train)

    # predict absolute max
    pred = model.predict(X_test)

    # MAE
    model_mae = np.mean(np.abs(pred - y_test))

    # baseline: predict no change -> predict prev_max
    baseline_mae = np.mean(np.abs(y_test - X_test["prev_max"]))

    # calculate improvement
    improvement = baseline_mae - model_mae

    # store results
    results[lift] = (model_mae, baseline_mae, improvement)

    # display results
    print("-------------------------")
    print(f"Results for {lift}:")
    print("Model MAE:", model_mae) 
    print("Baseline MAE:", baseline_mae)
    print("Improvement:", baseline_mae - model_mae)
   

-------------------------
Results for Front Squat:
Model MAE: 35.9505672454834
Baseline MAE: 45.0
Improvement: 9.049432754516602
-------------------------
Results for Clean:
Model MAE: 26.97388053894043
Baseline MAE: 31.25
Improvement: 4.276119461059569
-------------------------
Results for Shoulder Press (Dumbbell):
Model MAE: 3.2945431795987217
Baseline MAE: 5.454545454545454
Improvement: 2.1600022749467325
-------------------------
Results for Lat Pulldown (Cable):
Model MAE: 18.25463144402755
Baseline MAE: 11.789473684210526
Improvement: -6.465157759817023


In [26]:
target = "max_weight" 
WEEKS = 12 # number of weeks to predict

# function to forecast next weeks
def forecast_next_weeks(model, last_row_df, features, weeks=12, assume_days_since=7):
    """
    last_row_df: 1-row DataFrame (the last session for that lift) with all feature columns present
    returns: list of predicted maxes for the next weeks
    """
    future = last_row_df.copy() # 1-row DF to roll forward, 1 row is used to keep feature names and types
    preds_max = [] # store the predicted maxes

    # loop for number of weeks
    for _ in range(WEEKS):
        X_future = future[features] # 1-row DF, so predict returns 1-element array
 
        yhat = float(model.predict(X_future)[0]) # predicted max_weight

        # compute next_max depending on target type
        if target == "delta": # change from previous max
            delta_pred = yhat # predicted change
            next_max = float(future["prev_max"].iloc[0]) + delta_pred # new max
        else: # absolute max
            next_max = yhat # predicted absolute max
            delta_pred = next_max - float(future["prev_max"].iloc[0]) # predicted change

        preds_max.append(next_max)

        # roll state forward (still 1-row DF)
        new_row = future.copy()
        new_row["prev_max"] = next_max
        new_row["max_weight"] = next_max
        new_row["delta"] = delta_pred

        # PR-ish flags for next session
        new_row["last_was_pr"] = int(delta_pred > 0)
        new_row["last_pr_delta"] = delta_pred

        # assumptions for next session timing + keep workload/recovery constant
        new_row["days_since"] = assume_days_since
        for col in ["total_volume", "volume_3", "volume_7", "volume_14",
                    "recovery", "recovery_3", "recovery_7"]:
            if col in new_row.columns:
                new_row[col] = float(future[col].iloc[0])

        future = new_row

    return preds_max


all_forecasts = {}

# generate forecasts for each lift
for lift in LIFTS:
    # filer for lift 
    lift_df = df[df["exercise_title"] == lift].sort_values("workout_date").reset_index(drop=True).copy()

    # skip if not enough rows
    if len(lift_df) < 10:
        print(f"Skipping {lift} (not enough rows: {len(lift_df)})")
        continue

    #  train/test split (time-based)
    split = int(len(lift_df) * 0.8)
    train_df = lift_df.iloc[:split]
    test_df  = lift_df.iloc[split:]

    X_train = train_df[features]
    y_train = train_df[target]
    X_test  = test_df[features]
    y_test  = test_df[target]

    # model 
    model = XGBRegressor(
        n_estimators=300,
        max_depth=4,
        learning_rate=0.05,
        subsample=0.8,
        colsample_bytree=0.8,
        random_state=42
    )
    model.fit(X_train, y_train)

    # start forecasting from last session of that lift
    last_row = lift_df.iloc[[-1]] # 1-row DF
    preds = forecast_next_weeks(model, last_row, features, weeks = WEEKS, assume_days_since=7) # forecast

    # store forecasts
    all_forecasts[lift] = preds

In [27]:
# display forecasts
for lift, preds in all_forecasts.items():
    print(f"\n{lift} next {WEEKS} weeks:")
    for i, w in enumerate(preds, 1):
        print(f"  Week {i}: {round(w, 1)} lb")


Front Squat next 12 weeks:
  Week 1: 209.0 lb
  Week 2: 220.3 lb
  Week 3: 219.3 lb
  Week 4: 220.1 lb
  Week 5: 219.3 lb
  Week 6: 220.1 lb
  Week 7: 219.3 lb
  Week 8: 220.1 lb
  Week 9: 219.3 lb
  Week 10: 220.1 lb
  Week 11: 219.3 lb
  Week 12: 220.1 lb

Clean next 12 weeks:
  Week 1: 210.0 lb
  Week 2: 216.5 lb
  Week 3: 215.8 lb
  Week 4: 213.0 lb
  Week 5: 211.0 lb
  Week 6: 211.0 lb
  Week 7: 211.0 lb
  Week 8: 211.0 lb
  Week 9: 213.5 lb
  Week 10: 213.5 lb
  Week 11: 211.0 lb
  Week 12: 211.0 lb

Shoulder Press (Dumbbell) next 12 weeks:
  Week 1: 83.8 lb
  Week 2: 79.3 lb
  Week 3: 79.1 lb
  Week 4: 79.1 lb
  Week 5: 78.6 lb
  Week 6: 79.1 lb
  Week 7: 78.2 lb
  Week 8: 79.1 lb
  Week 9: 78.2 lb
  Week 10: 79.1 lb
  Week 11: 78.2 lb
  Week 12: 79.1 lb

Lat Pulldown (Cable) next 12 weeks:
  Week 1: 168.4 lb
  Week 2: 171.7 lb
  Week 3: 170.3 lb
  Week 4: 169.2 lb
  Week 5: 169.8 lb
  Week 6: 170.9 lb
  Week 7: 170.3 lb
  Week 8: 169.3 lb
  Week 9: 169.9 lb
  Week 10: 170.9 lb