# 🏴‍☠️ Ensemble di 9 Modelli Migliori - PyTorch\n\nQuesto notebook implementa un ensemble di **9 modelli** (3 configurazioni × 3 seeds) usando:\n- Architettura **Conv1D + BiLSTM**\n- Pesatura **EWA (Exponentially Weighted Average)** basata su F1 validation\n- Predizione con **soft voting pesato**\n\n## Pipeline Completa\n1. Caricamento dati\n2. Preprocessing (ADVICE 07/11, 11/11, 12/11)\n3. Creazione finestre\n4. **Definizione architettura Conv1D + BiLSTM**\n5. **Training ensemble 9 modelli**\n6. **Valutazione e calcolo pesi EWA**\n7. **Predizione test con ensemble**\n8. Generazione submission

In [18]:
# Core libraries
import os
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm.auto import tqdm
import warnings
warnings.filterwarnings('ignore')

# PyTorch
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, TensorDataset
from torch.optim import Adam, AdamW
from torch.optim.lr_scheduler import ReduceLROnPlateau

# Stats and ML
from statsmodels.tsa.stattools import acf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import classification_report, confusion_matrix, f1_score, precision_score, recall_score
from sklearn.utils.class_weight import compute_class_weight

# Set seeds
SEED = 42
os.environ['PYTHONHASHSEED'] = str(SEED)
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(SEED)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette('husl')

print(f'PyTorch: {torch.__version__}')
print(f'Device: {device}')
print('✅ Environment ready!')

PyTorch: 2.9.0+cpu
Device: cpu
✅ Environment ready!


In [19]:
# Load dataset
X_train = pd.read_csv('pirate_pain_train.csv')
y_train = pd.read_csv('pirate_pain_train_labels.csv')

print('📊 Dataset Shape:')
print(f'  Features: {X_train.shape}')
print(f'  Labels: {y_train.shape}')
print(f'  Samples: {X_train["sample_index"].nunique()}')
print(f'  Timesteps/sample: {X_train.groupby("sample_index").size().iloc[0]}')

# Feature groups
pain_survey_cols = [c for c in X_train.columns if 'pain_survey' in c]
categorical_cols = ['n_legs', 'n_hands', 'n_eyes']
joint_cols = [c for c in X_train.columns if 'joint_' in c]

print(f'\n📋 Features: {len(pain_survey_cols)} pain_survey + {len(categorical_cols)} categorical + {len(joint_cols)} joints')

# ADVICE 08/11: Check class imbalance
print(f'\n🏷️ Labels (IMBALANCED - need class weighting):')
for label, count in y_train['label'].value_counts().items():
    print(f'  {label}: {count} ({100*count/len(y_train):.1f}%)')

📊 Dataset Shape:
  Features: (105760, 40)
  Labels: (661, 2)
  Samples: 661
  Timesteps/sample: 160

📋 Features: 4 pain_survey + 3 categorical + 31 joints

🏷️ Labels (IMBALANCED - need class weighting):
  no_pain: 511 (77.3%)
  low_pain: 94 (14.2%)
  high_pain: 56 (8.5%)


In [20]:
# ADVICE 11/11: Analyze autocorrelation to determine optimal window
print('🔍 Analyzing autocorrelation...')
samples_analyze = X_train['sample_index'].unique()[:10]
key_features = joint_cols[:6]

optimal_lags = {}
for feature in key_features:
    sample_lags = []
    for sid in samples_analyze:
        data = X_train[X_train['sample_index']==sid][feature].values
        if len(data) >= 50:
            max_lags = min(len(data)//2-1, 80)
            acf_vals = acf(data, nlags=max_lags)
            sig_bound = 1.96/np.sqrt(len(data))
            for lag in range(1, len(acf_vals)):
                if abs(acf_vals[lag]) < sig_bound:
                    sample_lags.append(lag)
                    break
            else:
                sample_lags.append(max_lags)
    if sample_lags:
        optimal_lags[feature] = int(np.median(sample_lags))

if optimal_lags:
    suggested = int(np.median(list(optimal_lags.values())))
    WINDOW_SIZE = max(min(suggested, 100), 40)
else:
    WINDOW_SIZE = 60

WINDOW_STRIDE = WINDOW_SIZE // 2

print(f'✅ WINDOW_SIZE from autocorrelation: {WINDOW_SIZE}')
print(f'   STRIDE: {WINDOW_STRIDE}')
print(f'💡 ADVICE 11/11: Data-driven window size!')

🔍 Analyzing autocorrelation...
✅ WINDOW_SIZE from autocorrelation: 40
   STRIDE: 20
💡 ADVICE 11/11: Data-driven window size!


In [21]:
# ADVICE 07/11: Map categorical features
cat_map = {
    'n_legs': {'two': 0, 'one+peg_leg': 1},
    'n_hands': {'two': 0, 'one+hook_hand': 1},
    'n_eyes': {'two': 0, 'one+eye_patch': 1}
}

X_proc = X_train.copy()
for col, mapping in cat_map.items():
    X_proc[col] = X_proc[col].map(mapping).fillna(0).astype(int)

# ADVICE 12/11: Add cyclical time features
max_time = X_proc['time'].max()
X_proc['time_sin'] = np.sin(2*np.pi*X_proc['time']/max_time)
X_proc['time_cos'] = np.cos(2*np.pi*X_proc['time']/max_time)
X_proc['time_norm'] = X_proc['time']/max_time

print('✅ Preprocessing done:')
print('   - ADVICE 07/11: Categorical mapped')
print('   - ADVICE 12/11: Time features (sin, cos, norm) added')
print(f'   Shape: {X_proc.shape}')

✅ Preprocessing done:
   - ADVICE 07/11: Categorical mapped
   - ADVICE 12/11: Time features (sin, cos, norm) added
   Shape: (105760, 43)


In [22]:
# Create sliding windows
def create_windows(df, sample_idx, window_size, stride):
    sample = df[df['sample_index']==sample_idx].sort_values('time')
    feat_cols = [c for c in sample.columns if c not in ['sample_index','time']]
    features = sample[feat_cols].values
    
    windows = []
    for start in range(0, max(1, len(features)-window_size+1), stride):
        end = min(start+window_size, len(features))
        win = features[start:end]
        if len(win) < window_size:
            pad = np.zeros((window_size-len(win), win.shape[1]))
            win = np.vstack([win, pad])
        windows.append(win)
    return windows

print('🔄 Creating windows...')
all_windows = []
all_labels = []

label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y_train['label'])

for sid, label in zip(y_train['sample_index'], y_encoded):
    wins = create_windows(X_proc, sid, WINDOW_SIZE, WINDOW_STRIDE)
    all_windows.extend(wins)
    all_labels.extend([label]*len(wins))

X_windows = np.array(all_windows, dtype=np.float32)
y_windows = np.array(all_labels, dtype=np.int64)

print(f'✅ Windows: {X_windows.shape}')
print(f'   Labels: {y_windows.shape}')

🔄 Creating windows...


✅ Windows: (4627, 40, 41)
   Labels: (4627,)


In [23]:
# Split and normalize
X_tr, X_val, y_tr, y_val = train_test_split(
    X_windows, y_windows, test_size=0.2, random_state=SEED, stratify=y_windows
)

scaler = StandardScaler()
X_tr = scaler.fit_transform(X_tr.reshape(-1, X_tr.shape[-1])).reshape(X_tr.shape)
X_val = scaler.transform(X_val.reshape(-1, X_val.shape[-1])).reshape(X_val.shape)

print(f'📊 Split: Train {X_tr.shape}, Val {X_val.shape}')

# ADVICE 08/11: Compute class weights
class_weights_array = compute_class_weight('balanced', classes=np.unique(y_tr), y=y_tr)
class_weights_tensor = torch.FloatTensor(class_weights_array).to(device)

print(f'\n⚖️ ADVICE 08/11 - Class Weights:')
for i, w in enumerate(class_weights_array):
    print(f'   {label_encoder.classes_[i]}: {w:.3f}')

# Create DataLoaders
train_dataset = TensorDataset(
    torch.FloatTensor(X_tr),
    torch.LongTensor(y_tr)
)
val_dataset = TensorDataset(
    torch.FloatTensor(X_val),
    torch.LongTensor(y_val)
)

BATCH_SIZE = 32
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)

print(f'\n✅ DataLoaders ready (batch_size={BATCH_SIZE})')

📊 Split: Train (3701, 40, 41), Val (926, 40, 41)

⚖️ ADVICE 08/11 - Class Weights:
   high_pain: 3.929
   low_pain: 2.345
   no_pain: 0.431

✅ DataLoaders ready (batch_size=32)


---\n## 🎯 ENSEMBLE: Definizioni e Utility\n\nOra definiamo tutte le funzioni per l'ensemble approach.

In [None]:
"""
PyTorch reimplementation of Conv1D + BiLSTM ensemble with EWA weighting.

This script trains 9 models (3 configurations × 3 seeds) and creates
an ensemble using Exponentially Weighted Average (EWA) based on validation F1 scores.

Assumes that the following variables already exist in the namespace when imported:
    X_tr        : (N_train, WINDOW_SIZE, n_features), float32
    y_tr_cat    : (N_train, n_classes), one-hot
    X_val       : (N_val,   WINDOW_SIZE, n_features), float32
    y_val_cat   : (N_val,   n_classes), one-hot
    X_test_proc : feature matrix for test (used in create_windows)
    X_test      : DataFrame with column "sample_index"
    WINDOW_SIZE : int
    WINDOW_STRIDE: int (for create_windows)
    scaler      : fitted sklearn scaler (for test windows)
    label_encoder: fitted sklearn LabelEncoder
    create_windows: function as in the original code
"""

# =========================================================
# 1. IMPORTS & GLOBAL CONFIG
# =========================================================

import os
import gc
import random
from copy import deepcopy

import numpy as np
import pandas as pd

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

from sklearn.metrics import f1_score, classification_report, confusion_matrix
from scipy.stats import mode
from scipy.special import softmax
from tqdm.auto import tqdm

# Device detection: MPS (Mac M4) > CUDA > CPU
DEVICE = torch.device("mps" if getattr(torch.backends, "mps", None) and torch.backends.mps.is_available() else
                      "cuda" if torch.cuda.is_available() else
                      "cpu")
print(f"Using device: {DEVICE}")


# =========================================================
# 2. SEEDING
# =========================================================

def set_seed(seed: int):
    """Set all random seeds for reproducibility."""
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False


# =========================================================
# 3. DATASET & DATALOADER HELPERS
# =========================================================

class TimeSeriesDataset(Dataset):
    """Dataset for time series data.
    
    Args:
        X: shape (N, T, F) - N samples, T timesteps, F features
        y: shape (N,) with class indices (0..C-1), or None for test data
    """
    def __init__(self, X: np.ndarray, y: np.ndarray | None = None):
        super().__init__()
        self.X = torch.from_numpy(X).float()
        self.y = None if y is None else torch.from_numpy(y).long()

    def __len__(self):
        return self.X.shape[0]

    def __getitem__(self, idx):
        x = self.X[idx]  # (T, F)
        if self.y is None:
            return x
        return x, self.y[idx]


def make_dataloaders(X_tr, y_tr, X_val, y_val, batch_size: int = 16):
    """Create train and validation dataloaders."""
    train_ds = TimeSeriesDataset(X_tr, y_tr)
    val_ds   = TimeSeriesDataset(X_val, y_val)

    train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True,  drop_last=False)
    val_loader   = DataLoader(val_ds,   batch_size=batch_size, shuffle=False, drop_last=False)
    return train_loader, val_loader


# =========================================================
# 4. MODEL: Conv1D + BiLSTM
# =========================================================

class ConvLSTMNet(nn.Module):
    """Conv1D + Bidirectional LSTM network for time series classification."""
    
    def __init__(
        self,
        input_dim: int,       # n_features
        conv_dimension: int,  # conv_dim
        lstm_units: int,
        num_classes: int,
        dense_layer: int = 64,
        dropout: float = 0.25,
        noise_std: float = 0.01,
    ):
        super().__init__()
        self.noise_std = noise_std

        # Conv1D expects (B, C=F, T). We'll permute in forward.
        self.conv1 = nn.Conv1d(in_channels=input_dim, out_channels=conv_dimension,
                               kernel_size=3, padding=1)
        self.bn1   = nn.BatchNorm1d(conv_dimension)
        self.pool1 = nn.MaxPool1d(kernel_size=2)
        self.drop1 = nn.Dropout(dropout)

        self.conv2 = nn.Conv1d(in_channels=conv_dimension, out_channels=conv_dimension,
                               kernel_size=3, padding=1)
        self.bn2   = nn.BatchNorm1d(conv_dimension)
        self.pool2 = nn.MaxPool1d(kernel_size=2)
        self.drop2 = nn.Dropout(dropout)

        # BiLSTM (return_sequences=False => use last timestep)
        self.lstm = nn.LSTM(
            input_size=conv_dimension,
            hidden_size=lstm_units,
            num_layers=1,
            batch_first=True,
            bidirectional=True,
        )

        self.bn_lstm = nn.BatchNorm1d(2 * lstm_units)

        self.fc1     = nn.Linear(2 * lstm_units, dense_layer)
        self.drop_fc = nn.Dropout(dropout)
        self.fc_out  = nn.Linear(dense_layer, num_classes)

    def forward(self, x):
        # x: (B, T, F)
        if self.training and self.noise_std > 0.0:
            x = x + torch.randn_like(x) * self.noise_std

        # Conv1D blocks
        x = x.permute(0, 2, 1)          # (B, F, T)
        x = F.relu(self.conv1(x))
        x = self.bn1(x)
        x = self.pool1(x)
        x = self.drop1(x)

        x = F.relu(self.conv2(x))
        x = self.bn2(x)
        x = self.pool2(x)
        x = self.drop2(x)

        # LSTM
        x = x.permute(0, 2, 1)          # (B, T', C)
        out, _ = self.lstm(x)           # out: (B, T', 2*lstm_units)
        x = out[:, -1, :]               # last timestep (return_sequences=False)
        x = self.bn_lstm(x)

        # Dense head
        x = F.relu(self.fc1(x))
        x = self.drop_fc(x)
        logits = self.fc_out(x)         # (B, num_classes)
        return logits


def build_conv_lstm_model(input_shape, num_classes, conv_dimension, lstm_units, dropout, dense_layer=64):
    """Build Conv1D + LSTM model."""
    _, n_features = input_shape  # (WINDOW_SIZE, n_features)
    model = ConvLSTMNet(
        input_dim=n_features,
        conv_dimension=conv_dimension,
        lstm_units=lstm_units,
        num_classes=num_classes,
        dense_layer=dense_layer,
        dropout=dropout,
        noise_std=0.01,
    )
    return model


# =========================================================
# 5. UTILS: PREDICTION HELPERS
# =========================================================

@torch.no_grad()
def predict_proba_on_array(model: nn.Module, X: np.ndarray, batch_size: int = 256) -> np.ndarray:
    """Predict class probabilities on numpy array.
    
    Args:
        model: PyTorch model
        X: (N, T, F) numpy array
        batch_size: batch size for prediction
        
    Returns:
        (N, n_classes) array with softmax probabilities
    """
    model.eval()
    ds = TimeSeriesDataset(X, y=None)
    loader = DataLoader(ds, batch_size=batch_size, shuffle=False)

    all_probs = []
    for xb in loader:
        xb = xb.to(DEVICE)
        logits = model(xb)           # (B, C)
        probs = F.softmax(logits, dim=1)
        all_probs.append(probs.cpu().numpy())

    return np.concatenate(all_probs, axis=0)


# =========================================================
# 6. ENSEMBLE UTILS
# =========================================================

def majority_voting(models, X: np.ndarray, batch_size: int = 256) -> np.ndarray:
    """Ensemble prediction using majority voting."""
    preds = []
    for m in models:
        probs = predict_proba_on_array(m, X, batch_size=batch_size)
        preds.append(np.argmax(probs, axis=1))

    preds = np.stack(preds, axis=0)  # (n_models, N)
    final = mode(preds, axis=0, keepdims=False).mode
    return final


def soft_weighted_ensemble_predict(models, X: np.ndarray, weights: np.ndarray, batch_size: int = 256):
    """Ensemble prediction using weighted soft voting.
    
    Args:
        models: list of PyTorch models
        X: input data (N, T, F)
        weights: model weights for averaging
        batch_size: batch size for prediction
        
    Returns:
        preds: predicted class indices
        weighted_probs: weighted probability distribution
    """
    weighted_probs = None
    for w, m in zip(weights, models):
        probs = predict_proba_on_array(m, X, batch_size=batch_size)  # (N, C)
        if weighted_probs is None:
            weighted_probs = w * probs
        else:
            weighted_probs += w * probs

    weighted_probs = softmax(weighted_probs, axis=1)
    preds = np.argmax(weighted_probs, axis=1)
    return preds, weighted_probs


# =========================================================
# 7. TRAINING LOOP + EARLY STOPPING
# =========================================================

class EarlyStopping:
    """Early stopping based on validation metric."""
    
    def __init__(self, patience: int = 10, mode: str = "max"):
        self.patience = patience
        self.mode = mode
        self.best_score = None
        self.best_state = None
        self.counter = 0

    def step(self, score: float, model: nn.Module) -> bool:
        """Returns True if training should stop."""
        if self.best_score is None:
            self.best_score = score
            self.best_state = deepcopy(model.state_dict())
            self.counter = 0
            return False

        improve = score > self.best_score if self.mode == "max" else score < self.best_score

        if improve:
            self.best_score = score
            self.best_state = deepcopy(model.state_dict())
            self.counter = 0
            return False
        else:
            self.counter += 1
            if self.counter >= self.patience:
                return True
            return False


def train_one_model(
    cfg: dict,
    seed: int,
    X_tr: np.ndarray,
    y_tr: np.ndarray,
    X_val: np.ndarray,
    y_val: np.ndarray,
    batch_size: int = 16,
):
    """Train a single model with given hyperparameters and seed.
    
    Args:
        cfg: configuration dict with hyperparameters
        seed: random seed
        X_tr, y_tr: training data
        X_val, y_val: validation data
        batch_size: batch size for training
        
    Returns:
        model: trained model with best weights
        history: training history
    """
    set_seed(seed)
    gc.collect()

    n_features = X_tr.shape[2]
    n_classes = len(np.unique(y_tr))
    window_size = X_tr.shape[1]

    model = build_conv_lstm_model(
        input_shape=(window_size, n_features),
        num_classes=n_classes,
        conv_dimension=cfg["conv_dim"],
        lstm_units=cfg["lstm_units"],
        dropout=cfg["dropout"],
        dense_layer=64,
    ).to(DEVICE)

    # optimizer + loss with label smoothing
    criterion = nn.CrossEntropyLoss(label_smoothing=float(cfg["label_smoothing"]))
    optimizer = torch.optim.Adam(
        model.parameters(),
        lr=float(cfg["lr"]),
        weight_decay=float(cfg["wd"]),
    )

    train_loader, val_loader = make_dataloaders(X_tr, y_tr, X_val, y_val, batch_size=batch_size)

    es = EarlyStopping(patience=cfg["early_stop_patience"], mode="max")
    max_epochs = max(100, cfg["max_epochs"])

    history = []

    for epoch in range(1, max_epochs + 1):
        # ---- TRAIN ----
        model.train()
        train_losses = []

        for xb, yb in train_loader:
            xb = xb.to(DEVICE)
            yb = yb.to(DEVICE)

            optimizer.zero_grad()
            logits = model(xb)
            loss = criterion(logits, yb)
            loss.backward()
            optimizer.step()

            train_losses.append(loss.item())

        # ---- VALIDATION (acc + F1 macro) ----
        model.eval()
        val_losses = []
        all_preds = []
        all_targets = []

        with torch.no_grad():
            for xb, yb in val_loader:
                xb = xb.to(DEVICE)
                yb = yb.to(DEVICE)

                logits = model(xb)
                loss = criterion(logits, yb)
                val_losses.append(loss.item())

                probs = F.softmax(logits, dim=1)
                preds = probs.argmax(dim=1)

                all_preds.append(preds.cpu().numpy())
                all_targets.append(yb.cpu().numpy())

        all_preds = np.concatenate(all_preds)
        all_targets = np.concatenate(all_targets)

        val_f1 = f1_score(all_targets, all_preds, average="macro")
        val_acc = (all_preds == all_targets).mean()

        epoch_log = {
            "epoch": epoch,
            "train_loss": float(np.mean(train_losses)),
            "val_loss": float(np.mean(val_losses)),
            "val_acc": float(val_acc),
            "val_f1": float(val_f1),
        }
        history.append(epoch_log)

        print(f"Epoch {epoch:03d} | train_loss={epoch_log['train_loss']:.4f} "
              f"val_loss={epoch_log['val_loss']:.4f} val_acc={val_acc:.4f} val_f1={val_f1:.4f}")

        # early stopping on val_f1
        stop = es.step(val_f1, model)
        if stop:
            print(f"Early stopping triggered at epoch {epoch} (best val_f1={es.best_score:.4f})")
            break

    # restore best weights
    if es.best_state is not None:
        model.load_state_dict(es.best_state)

    return model, history


# =========================================================
# 8. BEST CONFIGS
# =========================================================

best_configs = [
    {   # trial 223
        "name": "128x128x128",
        "conv_dim": 128,
        "lstm_units": 128,
        "dropout": 0.117753,
        "lr": 0.000506,
        "label_smoothing": 0.034647,
        "early_stop_patience": 999,
        "max_epochs": 23,
        "wd": 1e-6,
    },
    {   # trial 23
        "name": "128x128x160",
        "conv_dim": 128,
        "lstm_units": 160,
        "dropout": 0.107198,
        "lr": 0.000302,
        "label_smoothing": 0.051445,
        "early_stop_patience": 999,
        "max_epochs": 37,
        "wd": 8.31e-4,
    },
    {   # trial 122
        "name": "160x160x160",
        "conv_dim": 160,
        "lstm_units": 160,
        "dropout": 0.114202,
        "lr": 0.000335,
        "label_smoothing": 0.051782,
        "early_stop_patience": 999,
        "max_epochs": 29,
        "wd": 3e-5,
    },
]


# =========================================================
# 9. MAIN ENSEMBLE TRAINING FUNCTION
# =========================================================

def train_ensemble(X_tr, y_tr_cat, X_val, y_val_cat, configs=None, seeds_per_config=3):
    """Train ensemble of models.
    
    Args:
        X_tr: training features (N, T, F)
        y_tr_cat: training labels, one-hot encoded
        X_val: validation features (N, T, F)
        y_val_cat: validation labels, one-hot encoded
        configs: list of config dicts (default: best_configs)
        seeds_per_config: number of seeds per config
        
    Returns:
        models: list of trained models
        model_info: list of dicts with model metadata
        histories: list of training histories
    """
    if configs is None:
        configs = best_configs
    
    # Convert one-hot to indices
    y_tr = np.argmax(y_tr_cat, axis=1).astype(np.int64)
    y_val = np.argmax(y_val_cat, axis=1).astype(np.int64)

    n_features = X_tr.shape[2]
    n_classes = len(np.unique(y_tr))

    print("✅ Conv1D + LSTM (PyTorch) ensemble - training start")
    print(f"   Input: ({X_tr.shape[1]}, {n_features})")
    print(f"   Output: {n_classes} classes")

    models = []
    model_info = []
    histories = []

    TOTAL_MODELS = len(configs) * seeds_per_config
    model_idx = 0

    for cfg_idx, cfg in enumerate(configs):
        for s in range(seeds_per_config):
            model_idx += 1
            seed = 100 * cfg_idx + s

            print("\n==============================")
            print(f"  TRAINING MODEL {model_idx}/{TOTAL_MODELS}")
            print(f"  config: {cfg['name']} | seed: {seed}")
            print("==============================")

            model, history = train_one_model(
                cfg=cfg,
                seed=seed,
                X_tr=X_tr,
                y_tr=y_tr,
                X_val=X_val,
                y_val=y_val,
                batch_size=16,
            )

            models.append(model)
            model_info.append({
                "name": f"{cfg['name']}_seed{seed}",
                "config": cfg,
                "seed": seed,
            })
            histories.append(history)

            gc.collect()

    return models, model_info, histories


# =========================================================
# 10. ENSEMBLE EVALUATION AND WEIGHTING
# =========================================================

def evaluate_ensemble(models, X_val, y_val_cat, eta=15.0):
    """Evaluate ensemble and compute EWA weights.
    
    Args:
        models: list of trained models
        X_val: validation features
        y_val_cat: validation labels (one-hot)
        eta: EWA parameter (higher = more weight to best models)
        
    Returns:
        weights: EWA weights for each model
        single_f1: F1 scores for individual models
        ensemble_f1: F1 score of the ensemble
    """
    y_val_true = np.argmax(y_val_cat, axis=1)

    # Compute F1 for each model
    single_f1 = []
    for i, m in enumerate(models):
        probs = predict_proba_on_array(m, X_val, batch_size=256)
        preds = np.argmax(probs, axis=1)
        f1 = f1_score(y_val_true, preds, average="macro")
        single_f1.append(f1)
        print(f"Model {i} F1: {f1:.4f}")

    single_f1 = np.array(single_f1)
    print("\nF1 scores:", single_f1)

    # Compute EWA weights: Loss = 1 - F1
    losses = 1.0 - single_f1
    print("Losses (1 - F1):", losses)

    raw_weights = np.exp(-eta * losses)
    weights = raw_weights / raw_weights.sum()

    print(f"\nEWA weights (eta = {eta:.1f}):", weights)
    print("Sum of weights:", weights.sum())

    # Ensemble prediction
    y_val_pred_ewa, _ = soft_weighted_ensemble_predict(models, X_val, weights, batch_size=256)
    f1_ens_ewa = f1_score(y_val_true, y_val_pred_ewa, average="macro")
    print(f"\nEnsemble F1 (soft + EWA, eta={eta}):", f1_ens_ewa)

    print("\n=== Classification Report (Validation, Ensemble EWA) ===\n")
    print(classification_report(y_val_true, y_val_pred_ewa, digits=4))

    print("\n=== Confusion Matrix ===")
    print(confusion_matrix(y_val_true, y_val_pred_ewa))

    return weights, single_f1, f1_ens_ewa


# =========================================================
# 11. TEST PREDICTION
# =========================================================

def ensemble_predict_test(models, X_test_proc, df_test, weights, 
                         WINDOW_SIZE, WINDOW_STRIDE, scaler, create_windows,
                         batch_size_windows: int = 256):
    """Make ensemble predictions on test set.
    
    Args:
        models: list of trained models
        X_test_proc: preprocessed test features
        df_test: test DataFrame with sample_index column
        weights: EWA weights
        WINDOW_SIZE: window size
        WINDOW_STRIDE: window stride
        scaler: fitted scaler
        create_windows: window creation function
        batch_size_windows: batch size for prediction
        
    Returns:
        results: predicted class indices
        test_ids: sample indices
    """
    results = []
    test_ids = df_test["sample_index"].unique()

    for sid in tqdm(test_ids, desc="Predict ensemble (soft+EWA)"):
        # Create windows for this sample
        windows = create_windows(X_test_proc, sid, WINDOW_SIZE, WINDOW_STRIDE)

        if len(windows) == 0:
            results.append(0)
            continue

        X_sample = np.array(windows, dtype=np.float32)  # (n_windows, T, F)

        # Scale windows
        X_sample = scaler.transform(
            X_sample.reshape(-1, X_sample.shape[-1])
        ).reshape(X_sample.shape)

        # Weighted average of probabilities across all windows and models
        weighted_probs = None

        for w, m in zip(weights, models):
            probs = predict_proba_on_array(m, X_sample, batch_size=batch_size_windows)  # (n_windows, C)
            avg = probs.mean(axis=0)  # average over windows

            if weighted_probs is None:
                weighted_probs = w * avg
            else:
                weighted_probs += w * avg

        weighted_probs = softmax(weighted_probs)
        final_class = int(np.argmax(weighted_probs))
        results.append(final_class)

    return np.array(results, dtype=int), test_ids


# =========================================================
# 12. MAIN EXECUTION FUNCTION
# =========================================================

def run_ensemble(X_tr, y_tr_cat, X_val, y_val_cat, 
                 X_test_proc=None, X_test=None, 
                 WINDOW_SIZE=None, WINDOW_STRIDE=None,
                 scaler=None, label_encoder=None, create_windows=None,
                 save_submission=True, submission_filename="submission_ewa_eta15_pytorch.csv"):
    """Run complete ensemble training, evaluation, and prediction.
    
    Args:
        X_tr, y_tr_cat: training data
        X_val, y_val_cat: validation data
        X_test_proc: preprocessed test features (optional)
        X_test: test DataFrame (optional)
        WINDOW_SIZE, WINDOW_STRIDE: window parameters
        scaler: fitted scaler
        label_encoder: fitted label encoder
        create_windows: window creation function
        save_submission: whether to save submission file
        submission_filename: output filename
        
    Returns:
        models: list of trained models
        weights: EWA weights
        submission: submission DataFrame (if test data provided)
    """
    print("="*60)
    print("ENSEMBLE TRAINING AND EVALUATION")
    print("="*60)
    
    # Train ensemble
    models, model_info, histories = train_ensemble(X_tr, y_tr_cat, X_val, y_val_cat)
    
    # Evaluate and compute weights
    weights, single_f1, ensemble_f1 = evaluate_ensemble(models, X_val, y_val_cat, eta=15.0)
    
    # Test prediction (if test data provided)
    submission = None
    if all(v is not None for v in [X_test_proc, X_test, WINDOW_SIZE, WINDOW_STRIDE, 
                                     scaler, label_encoder, create_windows]):
        print("\n" + "="*60)
        print("TEST PREDICTION")
        print("="*60)
        
        test_pred, test_ids = ensemble_predict_test(
            models, X_test_proc, X_test, weights,
            WINDOW_SIZE, WINDOW_STRIDE, scaler, create_windows
        )

        submission = pd.DataFrame({
            "sample_index": test_ids,
            "label": label_encoder.inverse_transform(test_pred),
        })

        if save_submission:
            submission.to_csv(submission_filename, index=False)
            print(f"\n✅ Submission saved as '{submission_filename}'")
    else:
        print("\n[INFO] Skipped test prediction: missing required variables")
    
    return models, weights, submission


# =========================================================
# 13. STANDALONE EXECUTION (if run as script)
# =========================================================

if __name__ == "__main__":
    print("""
    This script is designed to be imported and run from a notebook.
    It expects the following variables to be defined:
        - X_tr, y_tr_cat, X_val, y_val_cat
        - X_test_proc, X_test (for test prediction)
        - WINDOW_SIZE, WINDOW_STRIDE
        - scaler, label_encoder, create_windows
    
    Example usage in notebook:
        import ensemble_pytorch
        models, weights, submission = ensemble_pytorch.run_ensemble(
            X_tr, y_tr_cat, X_val, y_val_cat,
            X_test_proc, X_test, WINDOW_SIZE, WINDOW_STRIDE,
            scaler, label_encoder, create_windows
        )
    """)


---\n## 🚀 Training Ensemble\n\nOra alleniamo i 9 modelli.

In [None]:
# Prepara i dati per l'ensemble\nfrom sklearn.preprocessing import label_binarize\n\n# Convert to one-hot\ny_tr_cat = label_binarize(y_tr, classes=range(len(label_encoder.classes_)))\ny_val_cat = label_binarize(y_val, classes=range(len(label_encoder.classes_)))\n\nprint(f"✅ Dati preparati:")\nprint(f"   X_tr: {X_tr.shape}")\nprint(f"   y_tr_cat: {y_tr_cat.shape}")\nprint(f"   X_val: {X_val.shape}")\nprint(f"   y_val_cat: {y_val_cat.shape}")

In [None]:
# Train ensemble\nprint("\n" + "="*60)\nprint("INIZIO TRAINING ENSEMBLE")\nprint("="*60)\n\nmodels, model_info, histories = train_ensemble(\n    X_tr, y_tr_cat, X_val, y_val_cat,\n    configs=best_configs,\n    seeds_per_config=3\n)\n\nprint(f"\n✅ Training completato! {len(models)} modelli allenati.")

---\n## 📊 Valutazione Ensemble e Calcolo Pesi EWA

In [None]:
# Evaluate ensemble\nprint("\n" + "="*60)\nprint("VALUTAZIONE ENSEMBLE")\nprint("="*60)\n\nweights, single_f1, ensemble_f1 = evaluate_ensemble(\n    models, X_val, y_val_cat, eta=15.0\n)\n\nprint(f"\n✅ F1 Ensemble: {ensemble_f1:.4f}")\nprint(f"   F1 medio singoli modelli: {single_f1.mean():.4f}")\nprint(f"   Miglioramento: {(ensemble_f1 - single_f1.mean())*100:.2f}%")

---\n## 🧪 Test Prediction con Ensemble\n\nOra facciamo le predizioni sul test set usando l'ensemble.

In [None]:
# Load test data\nX_test = pd.read_csv('pirate_pain_test.csv')\nprint(f'📊 Test Data:')\nprint(f'   Shape: {X_test.shape}')\nprint(f'   Samples: {X_test["sample_index"].nunique()}')\n\n# Apply preprocessing to test\nX_test_proc = X_test.copy()\n\n# ADVICE 07/11: Map categorical\nfor col, mapping in cat_map.items():\n    X_test_proc[col] = X_test_proc[col].map(mapping).fillna(0).astype(int)\n\n# ADVICE 12/11: Add time features\nmax_time_test = X_test_proc['time'].max()\nX_test_proc['time_sin'] = np.sin(2*np.pi*X_test_proc['time']/max_time_test)\nX_test_proc['time_cos'] = np.cos(2*np.pi*X_test_proc['time']/max_time_test)\nX_test_proc['time_norm'] = X_test_proc['time']/max_time_test\n\nprint('✅ Test preprocessing done')

In [None]:
# Ensemble prediction on test\nprint("\n" + "="*60)\nprint("PREDIZIONE TEST CON ENSEMBLE")\nprint("="*60)\n\ntest_pred, test_ids = ensemble_predict_test(\n    models, X_test_proc, X_test, weights,\n    WINDOW_SIZE, WINDOW_STRIDE, scaler, create_windows,\n    batch_size_windows=256\n)\n\nprint(f"\n✅ Predizioni completate per {len(test_pred)} campioni")

---\n## 💾 Creazione Submission

In [None]:
# Create submission\nsubmission = pd.DataFrame({\n    "sample_index": test_ids,\n    "label": label_encoder.inverse_transform(test_pred),\n})\n\nsubmission.to_csv("submission_ensemble_9models.csv", index=False)\n\nprint("✅ Submission salvata come 'submission_ensemble_9models.csv'")\nprint(f"\n📋 Preview:")\nprint(submission.head(10))\nprint(f"\nTotal predictions: {len(submission)}")\nprint(f"\nLabel distribution:")\nprint(submission['label'].value_counts())

---\n## 📈 Analisi Risultati\n\nVisualizziamo i risultati dell'ensemble.

In [None]:
import matplotlib.pyplot as plt\nimport seaborn as sns\n\n# Plot F1 scores\nfig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 5))\n\n# Individual model F1 scores\nax1.bar(range(len(single_f1)), single_f1, alpha=0.7)\nax1.axhline(y=ensemble_f1, color='r', linestyle='--', label=f'Ensemble F1: {ensemble_f1:.4f}')\nax1.axhline(y=single_f1.mean(), color='g', linestyle='--', label=f'Mean F1: {single_f1.mean():.4f}')\nax1.set_xlabel('Model Index')\nax1.set_ylabel('F1 Score (macro)')\nax1.set_title('F1 Scores: Individual Models vs Ensemble')\nax1.legend()\nax1.grid(True, alpha=0.3)\n\n# Model weights\nax2.bar(range(len(weights)), weights, alpha=0.7, color='orange')\nax2.set_xlabel('Model Index')\nax2.set_ylabel('Weight')\nax2.set_title('EWA Weights (eta=15.0)')\nax2.grid(True, alpha=0.3)\n\nplt.tight_layout()\nplt.show()\n\nprint("\n📊 Statistiche:")\nprint(f"   Best individual F1: {single_f1.max():.4f}")\nprint(f"   Worst individual F1: {single_f1.min():.4f}")\nprint(f"   Mean F1: {single_f1.mean():.4f}")\nprint(f"   Ensemble F1: {ensemble_f1:.4f}")\nprint(f"   Improvement over mean: {(ensemble_f1 - single_f1.mean())*100:.2f}%")\nprint(f"   Improvement over best: {(ensemble_f1 - single_f1.max())*100:.2f}%")

---\n## 🎉 Conclusioni\n\nL'ensemble di 9 modelli è stato completato con successo!\n\n### Vantaggi dell'Ensemble:\n1. **Riduzione della varianza**: mediando più modelli si riduce l'overfitting\n2. **Migliore generalizzazione**: seed diversi esplorano diversi minimi locali\n3. **Voting pesato**: i modelli migliori hanno più influenza (EWA)\n4. **Robustezza**: meno sensibile a singoli modelli fallimentari\n\n### File generato:\n- `submission_ensemble_9models.csv`: file di submission con predizioni ensemble