In [2]:
# ------------------------------------------------------------
# 0. Imports
# ------------------------------------------------------------
import pandas as pd, numpy as np, tensorflow as tf
from sklearn.preprocessing import StandardScaler
from sklearn.utils import class_weight
from tensorflow.keras import layers, models, callbacks

# ------------------------------------------------------------
# 1. Load & resample to daily granularity
# ------------------------------------------------------------
df = pd.read_csv("Fixed_cleaned_activities.csv",
                 parse_dates=["Begin Timestamp"], dayfirst=True)

df = df.sort_values("Begin Timestamp")
df_daily = (
    df.set_index("Begin Timestamp")
      .resample("D")
      .agg({"Distance (Raw)": "sum",        # or first/mean as sensible
            "Calories": "sum",
            "Average Heart Rate (bpm)": "mean"})
      .rename(columns={"Distance (Raw)": "dist"})
)

# fill missing days (no session) with zeros / NaNs as appropriate
df_daily["dist"].fillna(0, inplace=True)
df_daily["Calories"].fillna(0, inplace=True)

# ------------------------------------------------------------
# 2. Rolling-load features
# ------------------------------------------------------------
df_daily["acute_load"]   = df_daily["dist"].rolling("7D").sum()
df_daily["chronic_load"] = df_daily["dist"].rolling("28D").sum()/4
df_daily["acwr"]         = df_daily["acute_load"] / df_daily["chronic_load"]
df_daily["acwr"]         = df_daily["acwr"].replace([np.inf, -np.inf], np.nan)

# binary label: risk today (threshold 1.5)
df_daily["injury_risk"]  = (df_daily["acwr"] > 1.5).astype(int)

# drop first 28 days (no full window yet) and rows with NaNs
df_daily = df_daily.dropna()

# ------------------------------------------------------------
# 3. Normalise numeric columns (fit on train split only)
# ------------------------------------------------------------
feature_cols = ["dist", "Calories", "Average Heart Rate (bpm)",
                "acute_load", "chronic_load", "acwr"]

# chronological split idx
test_frac = 0.2
val_frac  = 0.2
n_total   = len(df_daily)
n_test    = int(n_total * test_frac)
n_val     = int(n_total * val_frac)

train_df = df_daily.iloc[:-(n_val + n_test)]
val_df   = df_daily.iloc[-(n_val + n_test):-n_test]
test_df  = df_daily.iloc[-n_test:]

scaler = StandardScaler().fit(train_df[feature_cols])
for sub in (train_df, val_df, test_df):
    sub[feature_cols] = scaler.transform(sub[feature_cols])

# ------------------------------------------------------------
# 4. Turn into 3-D arrays (samples, time, features)
# ------------------------------------------------------------
def make_windows(dataframe, window_size=28, label_offset=0):
    X, y = [], []
    values = dataframe[feature_cols + ["injury_risk"]].values
    for i in range(window_size, len(values) - label_offset):
        X.append(values[i-window_size:i, :-1])
        y.append(values[i + label_offset, -1])
    return np.array(X), np.array(y)

window_size, label_offset = 28, 0
X_train, y_train = make_windows(train_df, window_size, label_offset)
X_val, y_val     = make_windows(val_df,   window_size, label_offset)
X_test, y_test   = make_windows(test_df,  window_size, label_offset)

# ------------------------------------------------------------
# 5. Compute class weights (handle imbalance)
# ------------------------------------------------------------
cw = class_weight.compute_class_weight(
        "balanced",
        classes=np.unique(y_train),
        y=y_train
     )
class_wt = {0: cw[0], 1: cw[1]}

# ------------------------------------------------------------
# 6. LSTM model
# ------------------------------------------------------------
tf.random.set_seed(42)
model = models.Sequential([
    layers.Input(shape=(window_size, len(feature_cols))),
    layers.Masking(mask_value=0.0),           # optional, if you zero-pad
    layers.LSTM(64, return_sequences=False),
    layers.Dropout(0.3),
    layers.Dense(32, activation="relu"),
    layers.Dense(1, activation="sigmoid")
])

model.compile(optimizer="adam",
              loss="binary_crossentropy",
              metrics=[tf.keras.metrics.AUC(name="auc")])

cb = callbacks.EarlyStopping(monitor="val_auc",
                             patience=10, mode="max",
                             restore_best_weights=True)

history = model.fit(X_train, y_train,
                    epochs=200,
                    batch_size=32,
                    validation_data=(X_val, y_val),
                    class_weight=class_wt,
                    callbacks=[cb],
                    verbose=2)

# ------------------------------------------------------------
# 7. Evaluation
# ------------------------------------------------------------
test_preds = model.predict(X_test).flatten()
test_auc   = tf.keras.metrics.AUC()(y_test, test_preds).numpy()
print(f"Test AUC = {test_auc:.3f}")

# Confusion threshold at 0.5 (tune if needed)
test_labels = (test_preds >= 0.5).astype(int)
from sklearn.metrics import classification_report
print(classification_report(y_test, test_labels))


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df_daily["dist"].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df_daily["Calories"].fillna(0, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the

Epoch 1/200
3/3 - 7s - 2s/step - auc: 0.4545 - loss: 0.6984 - val_auc: 0.0000e+00 - val_loss: 0.7602
Epoch 2/200
3/3 - 0s - 64ms/step - auc: 0.7731 - loss: 0.6699 - val_auc: 0.0000e+00 - val_loss: 0.8001
Epoch 3/200
3/3 - 0s - 63ms/step - auc: 0.8310 - loss: 0.6541 - val_auc: 0.0000e+00 - val_loss: 0.8395
Epoch 4/200
3/3 - 0s - 67ms/step - auc: 0.8380 - loss: 0.6332 - val_auc: 0.0000e+00 - val_loss: 0.8784
Epoch 5/200
3/3 - 0s - 68ms/step - auc: 0.8573 - loss: 0.6155 - val_auc: 0.0000e+00 - val_loss: 0.9190
Epoch 6/200
3/3 - 0s - 63ms/step - auc: 0.8711 - loss: 0.6028 - val_auc: 0.0000e+00 - val_loss: 0.9674
Epoch 7/200
3/3 - 0s - 78ms/step - auc: 0.8619 - loss: 0.5859 - val_auc: 0.0000e+00 - val_loss: 1.0040
Epoch 8/200
3/3 - 0s - 136ms/step - auc: 0.8789 - loss: 0.5582 - val_auc: 0.0000e+00 - val_loss: 1.0218
Epoch 9/200
3/3 - 0s - 77ms/step - auc: 0.8719 - loss: 0.5473 - val_auc: 0.0000e+00 - val_loss: 1.0297
Epoch 10/200
3/3 - 1s - 203ms/step - auc: 0.8557 - loss: 0.5545 - val_auc:

  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
