In [None]:
import os
import pickle
import torch
from torch.utils.data import DataLoader
from torch import nn, optim

In [None]:
# ---------- Helpers ----------
def load_pickle(path):
    with open(path, "rb") as f:
        return pickle.load(f)

# device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

# ---------- load dataframes or pickled datasets ----------
train_df = load_pickle("data_pickles/train_prefix_dataset.pkl")
test_df  = load_pickle("data_pickles/test_prefix_dataset.pkl")

# if you already built PrefixDataset (class from earlier), instantiate it:
train_dataset = PrefixDataset(train_df, activity_col, resource_col, month_col, trace_cols, y_cols)
test_dataset  = PrefixDataset(test_df, activity_col, resource_col, month_col, trace_cols, y_cols)

# DataLoader: pin_memory True -> faster host->GPU transfer
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True,
                          num_workers=4, pin_memory=True)
test_loader  = DataLoader(test_dataset, batch_size=128, shuffle=False,
                          num_workers=2, pin_memory=True)

In [None]:
# ---------- model ----------
model = LSTMCollectiveIDP(
    activity_vocab_size=act_vocab,
    resource_vocab_size=res_vocab,
    month_vocab_size=month_vocab,
    num_trace_features=len(trace_cols),
    embedding_dim=16, lstm_hidden=128, fc_hidden=128,
    num_output_labels=len(y_cols), dropout=0.1
)
model.to(device)   # move model params to GPU

# optimizer + loss
optimizer = optim.Adam(model.parameters(), lr=1e-3)
criterion = nn.BCEWithLogitsLoss()  # recommended for multilabel logits
# if using BCEWithLogitsLoss, remove final sigmoid in the model or use raw logits

# ---------- optional: mixed precision ----------
use_amp = True if device.type == "cuda" else False
scaler = torch.cuda.amp.GradScaler() if use_amp else None

In [None]:
# ---------- training loop ----------
num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    total_loss = 0.0
    for x_act, x_res, x_month, x_trace, y in train_loader:
        # move batch to device efficiently
        x_act = x_act.to(device, non_blocking=True)      # LongTensor (B, T)
        x_res = x_res.to(device, non_blocking=True)
        x_month = x_month.to(device, non_blocking=True)
        x_trace = x_trace.to(device, non_blocking=True)  # FloatTensor (B, num_trace)
        y = y.to(device, non_blocking=True)              # FloatTensor (B, num_labels)

        optimizer.zero_grad()

        if use_amp:
            with torch.cuda.amp.autocast():
                # forward: adapt if your model expects grouped inputs
                logits = model(x_act, x_res, x_month, x_trace)  # prefer raw logits
                loss = criterion(logits, y)
            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()
        else:
            logits = model(x_act, x_res, x_month, x_trace)
            loss = criterion(logits, y)
            loss.backward()
            optimizer.step()

        total_loss += loss.item() * x_act.size(0)

    avg_loss = total_loss / len(train_loader.dataset)
    print(f"Epoch {epoch+1}/{num_epochs} â€” train loss: {avg_loss:.4f}")

    # ---------- validation (optional) ----------
    model.eval()
    # compute metrics on test_loader...

    # ---------- save checkpoint ----------
    ckpt = {
        "epoch": epoch+1,
        "model_state": model.state_dict(),
        "optim_state": optimizer.state_dict(),
        "scaler_state": scaler.state_dict() if scaler is not None else None
    }
    torch.save(ckpt, f"checkpoints/ckpt_epoch{epoch+1}.pt")