In [None]:
# train_model.py
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import joblib
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

CSV_PATH = "synthetic_wsht_weather_migraine_prob_600days_hourly_FIXED.csv"
FEATURES = ["workload_0_10", "stress_0_10", "hrv_rmssd_ms"]
TARGET_LABEL = "migraine_prob_next_hour"
SEQ_LEN = 24
RANDOM_SEED = 123

np.random.seed(RANDOM_SEED)
tf.random.set_seed(RANDOM_SEED)

df = pd.read_csv(CSV_PATH, parse_dates=["timestamp"])
df = df.sort_values("timestamp").reset_index(drop=True)
df = df[~df[TARGET_LABEL].isna()].reset_index(drop=True)
df[TARGET_LABEL] = df[TARGET_LABEL].astype(int)

def build_sequences(frame, features, seq_len):
    scaler = StandardScaler()
    scaler.fit(frame[features])
    joblib.dump(scaler, "scaler.pkl")  # save scaler

    feat = scaler.transform(frame[features])
    y = frame[TARGET_LABEL].to_numpy().astype(int)
    X_list, y_list = [], []
    for end in range(seq_len, len(frame)):
        start = end - seq_len
        X_list.append(feat[start:end, :])
        y_list.append(y[end])
    return np.array(X_list), np.array(y_list)

X, y = build_sequences(df, FEATURES, SEQ_LEN)

# split chronologically
N = len(X)
train_n = int(N * 0.7)
val_n   = int(N * 0.15)
X_train, y_train = X[:train_n], y[:train_n]
X_val,   y_val   = X[train_n:train_n+val_n], y[train_n:train_n+val_n]
X_test,  y_test  = X[train_n+val_n:], y[train_n+val_n:]

def make_model(input_len, input_dim):
    inputs = keras.Input(shape=(input_len, input_dim))
    x = layers.Masking(mask_value=0.0)(inputs)
    x = layers.LSTM(64)(x)
    x = layers.Dropout(0.3)(x)
    x = layers.Dense(32, activation="relu")(x)
    outputs = layers.Dense(1, activation="sigmoid")(x)
    model = keras.Model(inputs, outputs)
    model.compile(
        optimizer=keras.optimizers.Adam(1e-3),
        loss="binary_crossentropy",
        metrics=[keras.metrics.AUC(name="auc"), "accuracy"],
    )
    return model

model = make_model(SEQ_LEN, len(FEATURES))

callbacks = [
    keras.callbacks.ReduceLROnPlateau(monitor="val_auc", mode="max", factor=0.5, patience=3, verbose=1),
    keras.callbacks.EarlyStopping(monitor="val_auc", mode="max", patience=8, restore_best_weights=True, verbose=1),
]

history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=50,
    batch_size=64,
    callbacks=callbacks,
    verbose=1,
)

print("Test evaluation:", model.evaluate(X_test, y_test, verbose=0))
model.save("lstm_migraine_model.h5")
print("Saved lstm_migraine_model.h5 and scaler.pkl")
