In [None]:
import pandas as pd
import matplotlib.pyplot as plt

import torch
import math

from pathlib import Path

import training.training_config as cfg
from config import RUNS_PATH, PROCESSED_CSV_PATH
from training.data import make_dataloaders
from training.io import load_or_prepare_encoded
from training.models import DecisionRegressor
from training.trainer import Trainer
from utils import get_logger, prepare_run_dir, save_json

In [None]:
logger = get_logger("train", cfg.LOG_LEVEL, cfg.USE_LOGGING)

In [None]:
# Prepare a timestamped run directory under the global RUNS_DIR
run_dir = prepare_run_dir(RUNS_PATH)
if logger:
    logger.info("Run directory: %s", str(run_dir))

In [None]:
# Data
df_train, df_eval, df_test, vocab_sizes = load_or_prepare_encoded(
    force_rebuild=cfg.FORCE_REBUILD,
    logger=logger,
)

In [None]:
train_dl, eval_dl, test_dl = make_dataloaders(
    df_train=df_train,
    df_eval=df_eval,
    df_test=df_test,
    batch_size=cfg.BATCH_SIZE,
    num_workers=cfg.NUM_WORKERS,
    pin_memory=cfg.PIN_MEMORY,
    logger=logger,
)

In [None]:
# Model
model = DecisionRegressor(
    vocab_sizes=vocab_sizes,
    embed_dim_single=cfg.EMBED_DIM_SINGLE,
    embed_dim_multi=cfg.EMBED_DIM_MULTI,
    hidden=cfg.HIDDEN,
    dropout=cfg.DROPOUT,
)

In [None]:
# Save a minimal config snapshot for reproducibility
cfg_snapshot = {
    "batch_size": cfg.BATCH_SIZE,
    "num_workers": cfg.NUM_WORKERS,
    "pin_memory": cfg.PIN_MEMORY,
    "epochs": cfg.EPOCHS,
    "lr": cfg.LR,
    "weight_decay": cfg.WEIGHT_DECAY,
    "device": cfg.DEVICE,
    "use_huber": cfg.USE_HUBER,
    "embed_dim_single": cfg.EMBED_DIM_SINGLE,
    "embed_dim_multi": cfg.EMBED_DIM_MULTI,
    "hidden": list(cfg.HIDDEN),
    "dropout": cfg.DROPOUT,
    "vocab_sizes": vocab_sizes,
}
save_json(run_dir / "config_snapshot.json", cfg_snapshot)

In [None]:
# Trainer
trainer = Trainer(
    model,
    lr=cfg.LR,
    weight_decay=cfg.WEIGHT_DECAY,
    device=cfg.DEVICE,
    use_huber=cfg.USE_HUBER,
    logger=logger,
    run_dir=run_dir,
    save_best=cfg.SAVE_BEST,
)

In [None]:
# Train
metrics = trainer.fit(
    train_dl=train_dl,
    val_dl=eval_dl,
    epochs=cfg.EPOCHS,
)

In [None]:
run_dir = Path("/home/mark/code/SOUDAI_model_training/runs/20250809-162109")
history_path = run_dir / "metrics.csv"

df = pd.read_csv(history_path)

plt.figure(figsize=(8, 5))
plt.plot(df["epoch"], df["train_loss"], label="Train Loss")
plt.plot(df["epoch"], df["val_loss"], label="Validation Loss")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.title("Training and Validation Loss Over Time")
plt.legend()
plt.grid(True)
plt.show()
