In [1]:
"""
TCN multi-output pipeline (poll_lens & poll_pw)
- DOWNSAMPLE = 20, SEQ_LEN = 300, HORIZON = 1 hour
- Clip targets to [0,100], robust global scalers, NaN/Inf sanitization
- Vectorized windows via sliding_window_view
- K-Fold (5) + final forced test on files 23 & 25
- Designed for Google Colab A100 (num_workers=0, AMP)
"""
import os, gc, time, math, warnings
warnings.filterwarnings("ignore")
import numpy as np
import pandas as pd
import joblib
from numpy.lib.stride_tricks import sliding_window_view

# sklearn
from sklearn.preprocessing import RobustScaler, StandardScaler
from sklearn.model_selection import KFold

# torch
import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader
from torch.cuda.amp import autocast, GradScaler


In [None]:
# -----------------------
# USER SETTINGS
# -----------------------
CSV_PATH = "/content/drive/MyDrive/Dataset_csv_merged_v2"
N_FILES = 35
EXCLUDE_FILES = {8, 19, 33}
MANDATORY_TEST = {23, 25}
DOWNSAMPLE = 20
RAW_STEP_SECONDS = 0.05            # 50 ms
RAW_STEPS_PER_HOUR = int(3600 / RAW_STEP_SECONDS)  # 72 000
STEPS_PER_HOUR = RAW_STEPS_PER_HOUR // DOWNSAMPLE   # 3 600 when DOWNSAMPLE=20

SEQ_LEN = 300                      # context window
HORIZON = STEPS_PER_HOUR           # prediction horizon -> 1 hour
K_FOLDS = 5

MAX_ROWS_PER_FILE = 200_000
MAX_SEQS_PER_FILE = 20_000         # cap windows per file
SAMPLE_ROWS_FOR_SCALER = 120_000   # rows sampled across files to fit scalers
MAX_SAMPLES_PER_FILE_FOR_SCALER = 12_000

BATCH_SIZE = 128
EPOCHS = 12
LR = 8e-4
PATIENCE = 4

CACHE_DIR = "/content/sequence_cache_final"
MODEL_DIR = "/content/tcn_models_final"
RESULTS_FILE = "/content/tcn_results_final.pkl"
os.makedirs(CACHE_DIR, exist_ok=True)
os.makedirs(MODEL_DIR, exist_ok=True)

FEATURE_COLS_BASE = [
    'Time', 'Temperature lens IR', 'Temperature window IR', 'Temperature window BTS',
    'Pressure', 'iPartOpType', 'AC_VACTW', 'Laser set power', 'Gas set pressure',
    'Gas type', 'Mach code', 'Bar material', 'Bar thickness', 'csv_time_sec'
]
TARGETS = ['poll_lens', 'poll_pw']
CATEGORICAL_COLS = ['Gas type', 'Mach code', 'Bar material', 'iPartOpType']

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device:", DEVICE)
print(f"SEQ_LEN={SEQ_LEN}, DOWNSAMPLE={DOWNSAMPLE}, HORIZON={HORIZON}, STEPS_PER_HOUR={STEPS_PER_HOUR}")


In [None]:
# -----------------------
# UTIL: file mapping (1-based indices)
# -----------------------
def get_file_map(csv_folder):
    files = sorted([f for f in os.listdir(csv_folder) if f.endswith('.csv')])
    mapping = {}
    idx = 1
    for fname in files:
        mapping[idx] = os.path.join(csv_folder, fname)
        idx += 1
    return mapping

file_map = get_file_map(CSV_PATH)
usable_files = [i for i in range(1, N_FILES+1) if i in file_map and i not in EXCLUDE_FILES]
print("Usable files (1-based):", usable_files)

# -----------------------
# CATEGORY LEVEL COLLECTION (stable one-hot)
# -----------------------
def collect_category_levels(file_indices, max_rows_each=3000):
    levels = {c: set() for c in CATEGORICAL_COLS}
    for idx in file_indices:
        path = file_map.get(idx)
        if path is None: continue
        try:
            # read only few rows to collect categories
            df = pd.read_csv(path, nrows=max_rows_each, usecols=[c for c in CATEGORICAL_COLS if c in pd.read_csv(path, nrows=0).columns])
        except Exception:
            continue
        for c in CATEGORICAL_COLS:
            if c in df.columns:
                for v in df[c].dropna().astype(str).unique().tolist():
                    levels[c].add(v)
        del df; gc.collect()
    for c in levels:
        levels[c] = sorted(list(levels[c]))[:200]
    return levels

# -----------------------
# FEATURIZE single raw df (pandas) -> returns cleaned numeric df with targets at end
# includes clipping of targets to [0,100] and global NaN/Inf handling
# -----------------------
def featurize_df(raw_df, categorical_levels):
    df = raw_df.copy()
    # ensure targets exist
    if 'poll_lens' not in df.columns or 'poll_pw' not in df.columns:
        return None
    # Clip targets to [0,100]
    df['poll_lens'] = pd.to_numeric(df['poll_lens'], errors='coerce').clip(0,100)
    df['poll_pw']   = pd.to_numeric(df['poll_pw'], errors='coerce').clip(0,100)

    # Replace inf and fill NaN (forward/backfill then zeros)
    df = df.replace([np.inf, -np.inf], np.nan).ffill().bfill().fillna(0)

    # Time cyclical
    if 'Time' in df.columns:
        day_ms = 24*3600*1000
        tnorm = (df['Time'] % day_ms) / day_ms
        df['time_sin'] = np.sin(2*np.pi*tnorm)
        df['time_cos'] = np.cos(2*np.pi*tnorm)
        df['hour'] = (df['Time'] % day_ms) / (3600*1000)

    # Temperature lens features
    if 'Temperature lens IR' in df.columns:
        t = pd.to_numeric(df['Temperature lens IR'], errors='coerce').fillna(method='ffill').fillna(method='bfill').fillna(0)
        df['temp_lens_ma5'] = t.rolling(5, min_periods=1).mean()
        df['temp_lens_trend'] = t.diff().fillna(0)

    # Pressure features
    if 'Pressure' in df.columns:
        p = pd.to_numeric(df['Pressure'], errors='coerce').fillna(method='ffill').fillna(method='bfill').fillna(0)
        df['press_ma10'] = p.rolling(10, min_periods=1).mean()
        df['press_diff'] = p.diff().fillna(0)

    # Window diffs
    if 'Temperature window IR' in df.columns and 'Temperature window BTS' in df.columns:
        df['temp_window_diff'] = pd.to_numeric(df['Temperature window IR'], errors='coerce').fillna(0) - pd.to_numeric(df['Temperature window BTS'], errors='coerce').fillna(0)
        df['lens_window_diff'] = pd.to_numeric(df['Temperature lens IR'], errors='coerce').fillna(0) - pd.to_numeric(df['Temperature window IR'], errors='coerce').fillna(0)

    # Poll rollings
    df['poll_lens_ma7'] = pd.to_numeric(df['poll_lens'], errors='coerce').rolling(7, min_periods=1).mean().fillna(0)
    df['poll_lens_trend'] = pd.to_numeric(df['poll_lens'], errors='coerce').diff().fillna(0)
    df['poll_pw_ma7'] = pd.to_numeric(df['poll_pw'], errors='coerce').rolling(7, min_periods=1).mean().fillna(0)
    df['poll_pw_trend'] = pd.to_numeric(df['poll_pw'], errors='coerce').diff().fillna(0)

    # Categorical one-hot stable
    for c in CATEGORICAL_COLS:
        levels = categorical_levels.get(c, [])
        if c in df.columns:
            df[c] = df[c].astype(str).fillna('NA')
        else:
            df[c] = 'NA'
        for lvl in levels:
            df[f"{c}__{lvl}"] = (df[c] == lvl).astype(float)
        df[f"{c}__other"] = (~df[c].isin(levels)).astype(float)

    # Drop originals cat columns and Time
    df = df.drop(columns=[c for c in CATEGORICAL_COLS if c in df.columns], errors='ignore')
    # Place targets at the end
    target_lens = df['poll_lens'].astype(float).copy()
    target_pw = df['poll_pw'].astype(float).copy()
    df = df.drop(columns=['Time', 'poll_lens', 'poll_pw'], errors='ignore')
    df['poll_lens'] = target_lens
    df['poll_pw'] = target_pw
    # Final sanitize
    df = df.replace([np.inf, -np.inf], np.nan).ffill().bfill().fillna(0)
    return df

# -----------------------
# Vectorized window creation: takes scaled array (features + scaled targets last 2 cols),
# returns windows X shape (N_windows, SEQ_LEN, n_features) and targets y shape (N_windows, 2)
# Uses sliding_window_view, with uniform sampling if > max_seqs
# -----------------------
def create_windows_from_scaled_array(arr_scaled, seq_len, horizon, max_seqs_per_file):
    n = arr_scaled.shape[0]
    total_starts = n - seq_len - horizon + 1
    if total_starts <= 0:
        return None, None
    # features portion excluding targets
    arr_feat = arr_scaled[:-horizon, :-2]  # (n-horizon, n_feat)
    # create windows
    try:
        windows = sliding_window_view(arr_feat, window_shape=(seq_len), axis=0)
        # windows shape may be (total_starts, seq_len, n_feat)
        windows = windows.reshape(windows.shape[0], windows.shape[1], windows.shape[2])
    except Exception as e:
        # fallback slower path (shouldn't happen)
        out_w = []
        for s in range(total_starts):
            out_w.append(arr_feat[s:s+seq_len])
        windows = np.stack(out_w, axis=0)
    # targets indices
    target_start_idx = seq_len + horizon - 1
    targets = arr_scaled[target_start_idx: target_start_idx + windows.shape[0], -2:]  # (total_starts, 2)
    total = windows.shape[0]
    if total > max_seqs_per_file:
        idxs = np.linspace(0, total-1, max_seqs_per_file, dtype=np.int32)
        windows = windows[idxs]
        targets = targets[idxs]
    # final check: drop any window/target pair with NaN or Inf
    mask_valid = (~np.isnan(windows).any(axis=(1,2))) & (~np.isnan(targets).any(axis=1)) & np.isfinite(windows).all(axis=(1,2)) & np.isfinite(targets).all(axis=1)
    if not np.all(mask_valid):
        windows = windows[mask_valid]
        targets = targets[mask_valid]
    if windows.shape[0] == 0:
        return None, None
    return windows.astype('float32'), targets.astype('float32')

In [None]:
# -----------------------
# Small TCN model (PyTorch)
# -----------------------
class Chomp1d(nn.Module):
    def __init__(self, chomp_size):
        super().__init__()
        self.chomp_size = chomp_size
    def forward(self, x):
        return x[:, :, :-self.chomp_size].contiguous()

class TemporalBlock(nn.Module):
    def __init__(self, in_ch, out_ch, kernel_size, dilation, padding, dropout):
        super().__init__()
        self.conv1 = nn.Conv1d(in_ch, out_ch, kernel_size, padding=padding, dilation=dilation)
        self.chomp1 = Chomp1d(padding)
        self.relu1 = nn.ReLU()
        self.dropout1 = nn.Dropout(dropout)
        self.conv2 = nn.Conv1d(out_ch, out_ch, kernel_size, padding=padding, dilation=dilation)
        self.chomp2 = Chomp1d(padding)
        self.relu2 = nn.ReLU()
        self.dropout2 = nn.Dropout(dropout)
        self.net = nn.Sequential(self.conv1, self.chomp1, self.relu1, self.dropout1,
                                 self.conv2, self.chomp2, self.relu2, self.dropout2)
        self.downsample = nn.Conv1d(in_ch, out_ch, 1) if in_ch != out_ch else None
        self.relu = nn.ReLU()
    def forward(self, x):
        out = self.net(x)
        res = x if self.downsample is None else self.downsample(x)
        return self.relu(out + res)

class TCN(nn.Module):
    def __init__(self, input_size, output_size, num_channels=[32,32], kernel_size=3, dropout=0.2):
        super().__init__()
        layers = []
        for i, out_ch in enumerate(num_channels):
            in_ch = input_size if i == 0 else num_channels[i-1]
            dilation = 2 ** i
            padding = (kernel_size-1) * dilation
            layers.append(TemporalBlock(in_ch, out_ch, kernel_size, dilation, padding, dropout))
        self.network = nn.Sequential(*layers)
        self.fc = nn.Sequential(nn.AdaptiveAvgPool1d(1), nn.Flatten(), nn.Linear(num_channels[-1], 64), nn.ReLU(), nn.Dropout(0.2), nn.Linear(64, output_size))
    def forward(self, x):
        # x: (B, seq_len, features) -> (B, features, seq_len)
        x = x.permute(0,2,1)
        y = self.network(x)
        out = self.fc(y)
        return out

In [None]:
# -----------------------
# Metrics (unscaled)
# -----------------------
def compute_metrics(true, pred):
    mask = true > 0.5
    if np.sum(mask) == 0:
        return None
    t = true[mask]; p = pred[mask]
    mae = float(np.mean(np.abs(t - p)))
    rmse = float(np.sqrt(np.mean((t - p)**2)))
    with np.errstate(divide='ignore', invalid='ignore'):
        mape = float(np.mean(np.abs((t - p) / t)) * 100)
        err_pct = np.abs((t - p) / t) * 100
    r2 = float(1 - (np.sum((t - p)**2) / (np.sum((t - np.mean(t))**2) + 1e-12)))
    pct5 = float(np.mean(err_pct < 5) * 100)
    pct10 = float(np.mean(err_pct < 10) * 100)
    pct15 = float(np.mean(err_pct < 15) * 100)
    percentiles = np.percentile(err_pct, [25,50,75,90,95]).tolist()
    return {'mae': mae, 'rmse': rmse, 'mape': mape, 'r2': r2, 'pct5': pct5, 'pct10': pct10, 'pct15': pct15, 'percentiles': percentiles}

# -----------------------
# PREP: build global scalers using samples across all usable files
# -----------------------
print("Fitting global scalers from sample rows across usable files...")
usable = usable_files
cat_levels_global = collect_category_levels(usable, max_rows_each=3000)

X_samples = []; y_samples = []; rows = 0
for idx in usable:
    p = file_map.get(idx);
    if p is None: continue
    try:
        raw = pd.read_csv(p, usecols=lambda c: c in FEATURE_COLS_BASE + TARGETS, nrows=MAX_SAMPLES_PER_FILE_FOR_SCALER)
    except Exception:
        continue
    if DOWNSAMPLE > 1:
        raw = raw.iloc[::DOWNSAMPLE].reset_index(drop=True)
    if len(raw) < 10:
        continue
    fdf = featurize_df(raw, cat_levels_global)
    if fdf is None:
        continue
    vals = fdf.values
    # append entire small sample
    X_samples.append(vals[:, :-2])
    y_samples.append(vals[:, -2:])
    del raw, fdf, vals; gc.collect()
    rows += 1
    if rows >= len(usable):
        break

if len(X_samples) == 0:
    raise RuntimeError("No sample rows to fit scalers! Check CSVs and columns.")

X_sample = np.vstack(X_samples)
y_sample = np.vstack(y_samples)
print("  Scaler sample shapes:", X_sample.shape, y_sample.shape)

# Use RobustScaler for X, StandardScaler for y (targets). StandardScaler ok since targets clipped [0,100]
scaler_X_global = RobustScaler().fit(X_sample)
scaler_y_global = StandardScaler().fit(y_sample)

# free memory
del X_samples, y_samples, X_sample, y_sample
gc.collect()

In [None]:
# -----------------------
# K-Fold training: vectorized windows per file (fast), training on stacked windows
# -----------------------
kf = KFold(n_splits=K_FOLDS, shuffle=True, random_state=42)
all_fold_results = []
global_true_lens = []; global_pred_lens = []
global_true_pw = []; global_pred_pw = []

fold_no = 0
for train_idx, test_idx in kf.split(usable):
    fold_no += 1
    train_files = [usable[i] for i in train_idx]
    test_files = [usable[i] for i in test_idx]
    print(f"\n=== Fold {fold_no}/{K_FOLDS} â€” train {len(train_files)} files, test {len(test_files)} files ===")
    # collect cat levels for this fold (stability)
    cat_levels = collect_category_levels(train_files, max_rows_each=2500)
    # build train windows (vectorized) across files
    X_parts = []; y_parts = []
    for idx in train_files:
        p = file_map.get(idx)
        if p is None: continue
        try:
            raw = pd.read_csv(p, usecols=lambda c: c in FEATURE_COLS_BASE + TARGETS)
        except Exception:
            continue
        if DOWNSAMPLE > 1:
            raw = raw.iloc[::DOWNSAMPLE].reset_index(drop=True)
        if len(raw) < SEQ_LEN + HORIZON:
            continue
        fdf = featurize_df(raw, cat_levels)
        if fdf is None: continue
        arr = fdf.values.astype('float32')
        # scale
        X_feats = scaler_X_global.transform(arr[:, :-2])
        y_t = scaler_y_global.transform(arr[:, -2:])
        arr_scaled = np.hstack([X_feats, y_t]).astype('float32')
        Xw, yw = create_windows_from_scaled_array(arr_scaled, SEQ_LEN, HORIZON, MAX_SEQS_PER_FILE)
        if Xw is None: continue
        X_parts.append(Xw)
        y_parts.append(yw)
        del raw, fdf, arr, arr_scaled, X_feats, y_t, Xw, yw
        gc.collect()
    if len(X_parts) == 0:
        print("No windows for train in this fold -> skip")
        continue
    X_all = np.vstack(X_parts)
    y_all = np.vstack(y_parts)
    # shuffle and split
    perm = np.random.permutation(len(X_all))
    X_all = X_all[perm]; y_all = y_all[perm]
    split = int(0.90 * len(X_all))
    X_tr = X_all[:split]; y_tr = y_all[:split]
    X_val = X_all[split:]; y_val = y_all[split:]
    print("Train windows:", X_tr.shape, "Val windows:", X_val.shape)
    # free some RAM
    del X_all, X_parts, y_all, y_parts; gc.collect()

    # dataloaders (TensorDataset)
    ds_tr = TensorDataset(torch.from_numpy(X_tr), torch.from_numpy(y_tr))
    ds_val = TensorDataset(torch.from_numpy(X_val), torch.from_numpy(y_val))
    dl_tr = DataLoader(ds_tr, batch_size=BATCH_SIZE, shuffle=True, num_workers=0)
    dl_val = DataLoader(ds_val, batch_size=BATCH_SIZE, shuffle=False, num_workers=0)

    # model
    n_features = X_tr.shape[2]
    model = TCN(input_size=n_features, output_size=2, num_channels=[32,32], kernel_size=3, dropout=0.2).to(DEVICE)
    optimizer = torch.optim.AdamW(model.parameters(), lr=LR, weight_decay=1e-5)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=3)
    amp = GradScaler()

    best_val = 1e9; best_state = None; wait = 0
    for epoch in range(1, EPOCHS+1):
        t0 = time.time()
        model.train()
        train_losses = []; train_maes = []
        for Xb, yb in dl_tr:
            # sanitize & move to GPU
            Xb = torch.nan_to_num(Xb, nan=0.0, posinf=0.0, neginf=0.0).to(DEVICE, dtype=torch.float)
            yb = torch.nan_to_num(yb, nan=0.0, posinf=0.0, neginf=0.0).to(DEVICE, dtype=torch.float)
            # skip if any invalid
            if torch.isnan(Xb).any() or torch.isnan(yb).any():
                continue
            optimizer.zero_grad()
            with autocast():
                out = model(Xb)
                loss = torch.nn.SmoothL1Loss()(out, yb)
            amp.scale(loss).backward()
            amp.step(optimizer)
            amp.update()
            train_losses.append(loss.item())
            train_maes.append(torch.mean(torch.abs(out - yb)).item())
        # validation
        model.eval()
        val_losses = []; val_maes = []
        with torch.no_grad():
            for Xb, yb in dl_val:
                Xb = torch.nan_to_num(Xb, nan=0.0, posinf=0.0, neginf=0.0).to(DEVICE, dtype=torch.float)
                yb = torch.nan_to_num(yb, nan=0.0, posinf=0.0, neginf=0.0).to(DEVICE, dtype=torch.float)
                if torch.isnan(Xb).any() or torch.isnan(yb).any():
                    continue
                with autocast():
                    out = model(Xb)
                    loss = torch.nn.SmoothL1Loss()(out, yb)
                val_losses.append(loss.item()); val_maes.append(torch.mean(torch.abs(out - yb)).item())
        avg_val_mae = float(np.mean(val_maes)) if len(val_maes)>0 else float('inf')
        val_loss_mean = float(np.mean(val_losses)) if len(val_losses)>0 else (np.mean(train_losses) if len(train_losses)>0 else 0.0)
        scheduler.step(val_loss_mean)
        elapsed = time.time() - t0
        print(f"Epoch {epoch}/{EPOCHS} train_loss={np.mean(train_losses) if train_losses else float('nan'):.5f} train_mae={np.mean(train_maes) if train_maes else float('nan'):.5f} val_mae={avg_val_mae:.5f} time={elapsed:.1f}s")
        if avg_val_mae < best_val - 1e-6:
            best_val = avg_val_mae
            best_state = {k:v.cpu() for k,v in model.state_dict().items()}
            wait = 0
        else:
            wait += 1
            if wait >= PATIENCE:
                print("Early stopping")
                break

    # restore best weights
    if best_state is not None:
        model.load_state_dict(best_state)
    torch.save(model.state_dict(), os.path.join(MODEL_DIR, f"tcn_fold_{fold_no}.pth"))

    # Evaluate on test files for this fold
    X_test_parts = []; y_test_parts = []
    for idx in test_files:
        path = file_map.get(idx)
        if path is None: continue
        try:
            raw = pd.read_csv(path, usecols=lambda c: c in FEATURE_COLS_BASE + TARGETS)
        except Exception:
            continue
        if DOWNSAMPLE > 1:
            raw = raw.iloc[::DOWNSAMPLE].reset_index(drop=True)
        if len(raw) < SEQ_LEN + HORIZON:
            continue
        fdf = featurize_df(raw, cat_levels)
        if fdf is None: continue
        arr = fdf.values.astype('float32')
        X_feats = scaler_X_global.transform(arr[:, :-2])
        y_t = scaler_y_global.transform(arr[:, -2:])
        arr_scaled = np.hstack([X_feats, y_t]).astype('float32')
        Xw, yw = create_windows_from_scaled_array(arr_scaled, SEQ_LEN, HORIZON, MAX_SEQS_PER_FILE // 2)
        if Xw is None: continue
        X_test_parts.append(Xw); y_test_parts.append(yw)
        del raw, fdf, arr, arr_scaled, X_feats, y_t, Xw, yw; gc.collect()
    if len(X_test_parts) == 0:
        print("No test windows for this fold -> continue")
        continue
    X_test_all = np.vstack(X_test_parts); y_test_all = np.vstack(y_test_parts)
    dl_test = DataLoader(TensorDataset(torch.from_numpy(X_test_all), torch.from_numpy(y_test_all)), batch_size=BATCH_SIZE, shuffle=False, num_workers=0)

    preds = []; trues = []
    model.eval()
    with torch.no_grad():
        for Xb, yb in dl_test:
            Xb = torch.nan_to_num(Xb, nan=0.0, posinf=0.0, neginf=0.0).to(DEVICE, dtype=torch.float)
            if torch.isnan(Xb).any():
                continue
            with autocast():
                out = model(Xb)
            out_np = out.cpu().numpy(); y_np = yb.numpy()
            out_inv = scaler_y_global.inverse_transform(out_np)
            y_inv = scaler_y_global.inverse_transform(y_np)
            preds.append(out_inv); trues.append(y_inv)
    if len(preds) == 0:
        print("No predictions for fold -> continue")
        continue
    preds = np.vstack(preds); trues = np.vstack(trues)
    pred_lens = preds[:,0]; pred_pw = preds[:,1]
    true_lens = trues[:,0]; true_pw = trues[:,1]
    global_pred_lens.extend(pred_lens.tolist()); global_true_lens.extend(true_lens.tolist())
    global_pred_pw.extend(pred_pw.tolist()); global_true_pw.extend(true_pw.tolist())

    metrics_lens = compute_metrics(true_lens, pred_lens)
    metrics_pw = compute_metrics(true_pw, pred_pw)
    all_fold_results.append({'fold': fold_no, 'metrics_lens': metrics_lens, 'metrics_pw': metrics_pw})
    print(f"Fold {fold_no} metrics (poll_lens): {metrics_lens}")
    print(f"Fold {fold_no} metrics (poll_pw)  : {metrics_pw}")

    # cleanup
    del model, optimizer, dl_tr, dl_val, dl_test, ds_tr, ds_val
    gc.collect()
    if torch.cuda.is_available(): torch.cuda.empty_cache()


In [None]:
# -----------------------
# FINAL forced test on MANDATORY_TEST
# -----------------------
print("\n=== FINAL forced test on MANDATORY_TEST:", sorted(list(MANDATORY_TEST)))
train_final = [i for i in usable if i not in MANDATORY_TEST]
test_final = sorted(list(MANDATORY_TEST))

cat_levels_final = collect_category_levels(train_final, max_rows_each=3000)

# Fit final scalers on larger sample across train_final
SAMPLE_ROWS_FINAL = SAMPLE_ROWS_FOR_SCALER
X_parts = []; y_parts = []; rows = 0
for idx in train_final:
    p = file_map.get(idx)
    if p is None: continue
    try:
        raw = pd.read_csv(p, usecols=lambda c: c in FEATURE_COLS_BASE + TARGETS, nrows=MAX_SAMPLES_PER_FILE_FOR_SCALER)
    except Exception:
        continue
    if DOWNSAMPLE > 1:
        raw = raw.iloc[::DOWNSAMPLE].reset_index(drop=True)
    if len(raw) < 10: continue
    fdf = featurize_df(raw, cat_levels_final)
    if fdf is None: continue
    vals = fdf.values
    X_parts.append(vals[:, :-2]); y_parts.append(vals[:, -2:])
    del raw, fdf, vals; gc.collect()
    rows += 1
    if rows >= len(train_final): break

if len(X_parts) == 0:
    raise RuntimeError("No sample rows for final scalers")
X_sample = np.vstack(X_parts); y_sample = np.vstack(y_parts)
scaler_X_final = RobustScaler().fit(X_sample)
scaler_y_final = StandardScaler().fit(y_sample)
del X_parts, y_parts, X_sample, y_sample; gc.collect()

# Build final train windows
X_train_parts = []; y_train_parts = []
for idx in train_final:
    p = file_map.get(idx)
    if p is None: continue
    try:
        raw = pd.read_csv(p, usecols=lambda c: c in FEATURE_COLS_BASE + TARGETS)
    except Exception:
        continue
    if DOWNSAMPLE > 1:
        raw = raw.iloc[::DOWNSAMPLE].reset_index(drop=True)
    if len(raw) < SEQ_LEN + HORIZON: continue
    fdf = featurize_df(raw, cat_levels_final)
    if fdf is None: continue
    arr = fdf.values.astype('float32')
    X_feats = scaler_X_final.transform(arr[:, :-2])
    y_t = scaler_y_final.transform(arr[:, -2:])
    arr_scaled = np.hstack([X_feats, y_t]).astype('float32')
    Xw, yw = create_windows_from_scaled_array(arr_scaled, SEQ_LEN, HORIZON, MAX_SEQS_PER_FILE)
    if Xw is None: continue
    X_train_parts.append(Xw); y_train_parts.append(yw)
    del raw, fdf, arr, arr_scaled, X_feats, y_t, Xw, yw; gc.collect()
if len(X_train_parts) == 0:
    raise RuntimeError("No final train windows")
X_tr_all = np.vstack(X_train_parts); y_tr_all = np.vstack(y_train_parts)
perm = np.random.permutation(len(X_tr_all)); X_tr_all = X_tr_all[perm]; y_tr_all = y_tr_all[perm]
split = int(0.95 * len(X_tr_all))
X_tr = X_tr_all[:split]; y_tr = y_tr_all[:split]
X_val = X_tr_all[split:]; y_val = y_tr_all[split:]
del X_tr_all, y_tr_all; gc.collect()

# Build final test windows
X_test_parts = []; y_test_parts = []
for idx in test_final:
    p = file_map.get(idx)
    if p is None: continue
    try:
        raw = pd.read_csv(p, usecols=lambda c: c in FEATURE_COLS_BASE + TARGETS)
    except Exception:
        continue
    if DOWNSAMPLE > 1:
        raw = raw.iloc[::DOWNSAMPLE].reset_index(drop=True)
    if len(raw) < SEQ_LEN + HORIZON: continue
    fdf = featurize_df(raw, cat_levels_final)
    if fdf is None: continue
    arr = fdf.values.astype('float32')
    X_feats = scaler_X_final.transform(arr[:, :-2])
    y_t = scaler_y_final.transform(arr[:, -2:])
    arr_scaled = np.hstack([X_feats, y_t]).astype('float32')
    Xw, yw = create_windows_from_scaled_array(arr_scaled, SEQ_LEN, HORIZON, MAX_SEQS_PER_FILE//2)
    if Xw is None: continue
    X_test_parts.append(Xw); y_test_parts.append(yw)
    del raw, fdf, arr, arr_scaled, X_feats, y_t, Xw, yw; gc.collect()
if len(X_test_parts) == 0:
    raise RuntimeError("No final test windows")
X_test_all = np.vstack(X_test_parts); y_test_all = np.vstack(y_test_parts)

# Dataloaders final
dl_tr = DataLoader(TensorDataset(torch.from_numpy(X_tr), torch.from_numpy(y_tr)), batch_size=BATCH_SIZE, shuffle=True, num_workers=0)
dl_val = DataLoader(TensorDataset(torch.from_numpy(X_val), torch.from_numpy(y_val)), batch_size=BATCH_SIZE, shuffle=False, num_workers=0)
dl_test = DataLoader(TensorDataset(torch.from_numpy(X_test_all), torch.from_numpy(y_test_all)), batch_size=BATCH_SIZE, shuffle=False, num_workers=0)

# Train final model
n_features_final = X_tr.shape[2]
model_final = TCN(input_size=n_features_final, output_size=2, num_channels=[32,32], kernel_size=3, dropout=0.2).to(DEVICE)
opt_final = torch.optim.AdamW(model_final.parameters(), lr=LR, weight_decay=1e-5)
sch_final = torch.optim.lr_scheduler.ReduceLROnPlateau(opt_final, mode='min', factor=0.5, patience=3)
amp_final = GradScaler()

best_val = 1e9; best_state = None; wait = 0
for epoch in range(1, EPOCHS+1):
    t0 = time.time()
    model_final.train()
    losses = []; maes = []
    for Xb, yb in dl_tr:
        Xb = torch.nan_to_num(Xb, nan=0.0, posinf=0.0, neginf=0.0).to(DEVICE, dtype=torch.float)
        yb = torch.nan_to_num(yb, nan=0.0, posinf=0.0, neginf=0.0).to(DEVICE, dtype=torch.float)
        if torch.isnan(Xb).any() or torch.isnan(yb).any():
            continue
        opt_final.zero_grad()
        with autocast():
            out = model_final(Xb)
            loss = torch.nn.SmoothL1Loss()(out, yb)
        amp_final.scale(loss).backward()
        amp_final.step(opt_final); amp_final.update()
        losses.append(loss.item()); maes.append(torch.mean(torch.abs(out-yb)).item())
    # val
    model_final.eval()
    val_losses=[]; val_maes=[]
    with torch.no_grad():
        for Xb, yb in dl_val:
            Xb = torch.nan_to_num(Xb, nan=0.0, posinf=0.0, neginf=0.0).to(DEVICE, dtype=torch.float)
            yb = torch.nan_to_num(yb, nan=0.0, posinf=0.0, neginf=0.0).to(DEVICE, dtype=torch.float)
            if torch.isnan(Xb).any() or torch.isnan(yb).any():
                continue
            with autocast():
                out = model_final(Xb)
                loss = torch.nn.SmoothL1Loss()(out, yb)
            val_losses.append(loss.item()); val_maes.append(torch.mean(torch.abs(out-yb)).item())
    avg_val_mae = float(np.mean(val_maes)) if len(val_maes)>0 else float('inf')
    val_loss_mean = float(np.mean(val_losses)) if len(val_losses)>0 else (np.mean(losses) if losses else 0.0)
    sch_final.step(val_loss_mean)
    print(f"[FINAL] Epoch {epoch}/{EPOCHS} train_loss={np.mean(losses) if losses else float('nan'):.5f} val_mae={avg_val_mae:.5f} time={time.time()-t0:.1f}s")
    if avg_val_mae < best_val - 1e-6:
        best_val = avg_val_mae; best_state = {k:v.cpu() for k,v in model_final.state_dict().items()}; wait = 0
    else:
        wait += 1
        if wait >= PATIENCE:
            print("[FINAL] Early stopping"); break

if best_state is not None:
    model_final.load_state_dict(best_state)
torch.save(model_final.state_dict(), os.path.join(MODEL_DIR, "tcn_final_optionB_cleaned.pth"))

# Final evaluation
preds = []; trues = []
model_final.eval()
with torch.no_grad():
    for Xb, yb in dl_test:
        Xb = torch.nan_to_num(Xb, nan=0.0, posinf=0.0, neginf=0.0).to(DEVICE, dtype=torch.float)
        if torch.isnan(Xb).any():
            continue
        with autocast():
            out = model_final(Xb)
        out_np = out.cpu().numpy(); y_np = yb.numpy()
        out_inv = scaler_y_final.inverse_transform(out_np)
        y_inv = scaler_y_final.inverse_transform(y_np)
        preds.append(out_inv); trues.append(y_inv)
preds = np.vstack(preds); trues = np.vstack(trues)
pred_lens_final = preds[:,0]; true_lens_final = trues[:,0]
pred_pw_final = preds[:,1]; true_pw_final = trues[:,1]
metrics_final_lens = compute_metrics(true_lens_final, pred_lens_final)
metrics_final_pw = compute_metrics(true_pw_final, pred_pw_final)
print("\nFINAL TEST METRICS (poll_lens):", metrics_final_lens)
print("FINAL TEST METRICS (poll_pw)  :", metrics_final_pw)

# Aggregated folds
agg_lens = compute_metrics(np.array(global_true_lens), np.array(global_pred_lens)) if len(global_true_lens)>0 else None
agg_pw = compute_metrics(np.array(global_true_pw), np.array(global_pred_pw)) if len(global_true_pw)>0 else None
print("\nAGGREGATED FOLDS (lens):", agg_lens)
print("AGGREGATED FOLDS (pw)  :", agg_pw)

# Save results and scalers
out = {
    'config': {'DOWNSAMPLE': DOWNSAMPLE, 'SEQ_LEN': SEQ_LEN, 'HORIZON': HORIZON, 'usable_files': usable},
    'folds': all_fold_results,
    'final_test': {'lens': metrics_final_lens, 'pw': metrics_final_pw},
    'aggregated': {'lens': agg_lens, 'pw': agg_pw}
}
joblib.dump(out, RESULTS_FILE)
joblib.dump(scaler_X_global, os.path.join(MODEL_DIR, "scaler_X_global.pkl"))
joblib.dump(scaler_y_global, os.path.join(MODEL_DIR, "scaler_y_global.pkl"))
print("Saved results to", RESULTS_FILE)
print("Saved scalers to", os.path.join(MODEL_DIR, "scaler_*_global.pkl"))
