# **Kaggle Challenge: Pirate Pain Dataset üè¥‚Äç‚ò†Ô∏è (v9: Optimized & Faster)**

This notebook refactors the previous version for a significant speedup without changing the core model logic. The focus is on optimizing the data loading and preprocessing pipeline.

**Strategy Update:**
1.  **Pre-computation:** The `StandardScaler` is now fitted only once on the training data *before* the HPO search and K-fold training begins. This avoids redundant and expensive computations in the hot loops.
2.  **Optimized DataLoader:** The `DataLoader` is now configured with `num_workers > 0` and `persistent_workers=True`. This enables asynchronous data loading, ensuring the GPU does not sit idle waiting for the CPU to prepare data batches.
3.  **Architecture:** The powerful Attention-GRU model with Label Smoothing remains the core of our approach, now powered by a much faster data pipeline.

## ‚öôÔ∏è 1. Setup & Libraries

In [1]:
# Set seed for reproducibility
SEED = 123

# Import necessary libraries
import os
import logging
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns 
import copy
from itertools import product
import time

# Set environment variables before importing modules
os.environ['MPLCONFIGDIR'] = os.getcwd() + '/configs/'

# Suppress warnings
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action='ignore', category=Warning)

# --- PyTorch Imports ---
import torch
from torch import nn
from torch.utils.tensorboard import SummaryWriter
from torch.utils.data import TensorDataset, DataLoader

# --- Sklearn Imports ---
from sklearn.preprocessing import StandardScaler, LabelEncoder, MinMaxScaler
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import StratifiedShuffleSplit, StratifiedKFold
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, confusion_matrix

# --- Ray[tune] & Optuna Imports ---
import ray
from ray import tune
from ray.tune.schedulers import ASHAScheduler
from ray.tune.search.optuna import OptunaSearch
from functools import partial

# --- Setup Directories & Device ---
logs_dir = "tensorboard"
os.makedirs("models", exist_ok=True)
os.makedirs("submissions", exist_ok=True)
os.makedirs(logs_dir, exist_ok=True)

if torch.cuda.is_available():
    device = torch.device("cuda")
    torch.cuda.manual_seed_all(SEED)
    torch.backends.cudnn.benchmark = True
    print("\n--- Using GPU ---")
else:
    device = torch.device("cpu")
    print("\n--- Using CPU ---")

print(f"PyTorch version: {torch.__version__}")
print(f"Device: {device}")

# Configure plot display settings
sns.set_theme(font_scale=1.4)
sns.set_style('white')
plt.rc('font', size=14)
%matplotlib inline


--- Using GPU ---
PyTorch version: 2.5.1
Device: cuda


## üîÑ 2. Data Loading & Feature Engineering

In [2]:
print("--- 1. Loading Data ---")

# --- Define File Paths and Features ---
DATA_DIR = "data"
X_TRAIN_PATH = os.path.join(DATA_DIR, "pirate_pain_train.csv")
Y_TRAIN_PATH = os.path.join(DATA_DIR, "pirate_pain_train_labels.csv")
X_TEST_PATH = os.path.join(DATA_DIR, "pirate_pain_test.csv")
SUBMISSION_PATH = os.path.join(DATA_DIR, "sample_submission.csv")

try:
    features_long_df = pd.read_csv(X_TRAIN_PATH)
    labels_df = pd.read_csv(Y_TRAIN_PATH)
    X_test_long_df = pd.read_csv(X_TEST_PATH)
    
    N_TIMESTEPS = 160
    JOINT_FEATURES = [f"joint_{i:02d}" for i in range(31)]
    PAIN_FEATURES = [f"pain_survey_{i}" for i in range(1, 5)]
    TIME_FEATURE = ['time']
    FEATURES = JOINT_FEATURES + PAIN_FEATURES + TIME_FEATURE
    LABEL_MAPPING = {'no_pain': 0, 'low_pain': 1, 'high_pain': 2}
    N_CLASSES = len(LABEL_MAPPING)

    def reshape_data(df, features_list, n_timesteps):
        df_pivot = df.pivot(index='sample_index', columns='time', values=features_list)
        data_2d = df_pivot.values
        n_samples = data_2d.shape[0]
        data_3d = data_2d.reshape(n_samples, len(features_list), n_timesteps)
        return data_3d.transpose(0, 2, 1)

    X_train_full = reshape_data(features_long_df[features_long_df['sample_index'].isin(labels_df['sample_index'].unique())], FEATURES, N_TIMESTEPS)
    X_test_full = reshape_data(X_test_long_df, FEATURES, N_TIMESTEPS)
    y_train_full_df = labels_df.sort_values(by='sample_index')
    le = LabelEncoder().fit(list(LABEL_MAPPING.keys()))
    y_train_full = le.transform(y_train_full_df['label'])
    print(f"Loaded X_train_full (shape: {X_train_full.shape}) and y_train_full (shape: {y_train_full.shape})")
    print(f"Loaded X_test_full (shape: {X_test_full.shape})")

    print("\n--- 2. Engineering 'is_pirate' Feature ---")
    static_cols = ['sample_index', 'n_legs', 'n_hands', 'n_eyes']
    static_df = features_long_df[static_cols].drop_duplicates().set_index('sample_index')
    pirate_filter = (static_df['n_legs'] == 'one+peg_leg') | (static_df['n_hands'] == 'one+hook_hand') | (static_df['n_eyes'] == 'one+eye_patch')
    pirate_indices = static_df[pirate_filter].index
    sample_indices_ordered = sorted(features_long_df[features_long_df['sample_index'].isin(labels_df['sample_index'].unique())]['sample_index'].unique())
    is_pirate_map = np.array([1 if idx in pirate_indices else 0 for idx in sample_indices_ordered])
    pirate_feature_broadcast = np.tile(is_pirate_map.reshape(-1, 1, 1), (1, N_TIMESTEPS, 1))
    X_train_full_engineered = np.concatenate([X_train_full, pirate_feature_broadcast], axis=2)

    static_df_test = X_test_long_df[static_cols].drop_duplicates().set_index('sample_index')
    pirate_filter_test = (static_df_test['n_legs'] == 'one+peg_leg') | (static_df_test['n_hands'] == 'one+hook_hand') | (static_df_test['n_eyes'] == 'one+eye_patch')
    pirate_indices_test = static_df_test[pirate_filter_test].index
    sample_indices_test_ordered = sorted(X_test_long_df['sample_index'].unique())
    is_pirate_map_test = np.array([1 if idx in pirate_indices_test else 0 for idx in sample_indices_test_ordered])
    pirate_feature_broadcast_test = np.tile(is_pirate_map_test.reshape(-1, 1, 1), (1, N_TIMESTEPS, 1))
    X_test_full_engineered = np.concatenate([X_test_full, pirate_feature_broadcast_test], axis=2)
    
    N_FEATURES_NEW = X_train_full_engineered.shape[2]
    print(f"Created X_train_full_engineered (shape: {X_train_full_engineered.shape})")
    print(f"Created X_test_full_engineered (shape: {X_test_full_engineered.shape})")
    print(f"N_FEATURES is now: {N_FEATURES_NEW}")

    print("\n--- 3. Calculating Class Weights ---")
    class_counts_series = labels_df['label'].value_counts()
    counts_ordered = class_counts_series.reindex(LABEL_MAPPING.keys()).values
    class_weights_tensor = 1.0 / torch.tensor(counts_ordered, dtype=torch.float)
    class_weights_tensor = (class_weights_tensor / class_weights_tensor.sum()).to(device)
    print(f"Class counts (0, 1, 2): {counts_ordered}")
    print(f"Calculated class weights: {class_weights_tensor}")

except Exception as e:
    print(f"An error occurred: {e}")

--- 1. Loading Data ---
Loaded X_train_full (shape: (661, 160, 36)) and y_train_full (shape: (661,))
Loaded X_test_full (shape: (1324, 160, 36))

--- 2. Engineering 'is_pirate' Feature ---
Created X_train_full_engineered (shape: (661, 160, 37))
Created X_test_full_engineered (shape: (1324, 160, 37))
N_FEATURES is now: 37

--- 3. Calculating Class Weights ---
Class counts (0, 1, 2): [511  94  56]
Calculated class weights: tensor([0.0643, 0.3493, 0.5864], device='cuda:0')


## üõ†Ô∏è 3. Helper Functions

In [3]:
def create_sliding_windows(X_3d, y=None, window_size=100, stride=20):
    new_X, new_y, window_indices = [], [], []
    n_samples, n_timesteps, _ = X_3d.shape
    for i in range(n_samples):
        idx = 0
        while (idx + window_size) <= n_timesteps:
            new_X.append(X_3d[i, idx:idx+window_size, :])
            window_indices.append(i)
            if y is not None: new_y.append(y[i])
            idx += stride
    if y is not None:
        return np.array(new_X), np.array(new_y), np.array(window_indices)
    return np.array(new_X), np.array(window_indices)

# --- OPTIMIZED: DataLoader with multiple workers ---
def make_loader(ds, batch_size, shuffle, drop_last):
    return DataLoader(ds, batch_size=int(batch_size), shuffle=shuffle, drop_last=drop_last, 
                      num_workers=2, pin_memory=True, persistent_workers=True)

## üß† 4. Model & Training Engine

In [4]:
class Attention(nn.Module):
    def __init__(self, hidden_size):
        super(Attention, self).__init__()
        self.attn = nn.Linear(hidden_size, hidden_size)
        self.v = nn.Linear(hidden_size, 1, bias=False)

    def forward(self, rnn_outputs):
        energy = torch.tanh(self.attn(rnn_outputs))
        attn_scores = self.v(energy).squeeze(2)
        attn_weights = torch.softmax(attn_scores, dim=1)
        context_vector = torch.bmm(attn_weights.unsqueeze(1), rnn_outputs).squeeze(1)
        return context_vector

class RecurrentClassifier(nn.Module):
    def __init__(self, hidden_size, num_layers, num_classes,
                 conv_out_channels, conv_kernel_size, bidirectional,
                 dropout_rate, feature_dropout_rate, rnn_type='GRU'):
        super().__init__()
        self.rnn_type, self.num_layers, self.hidden_size, self.bidirectional = \
            rnn_type, num_layers, hidden_size, bidirectional
        
        rnn_hidden_dim = hidden_size * 2 if bidirectional else hidden_size

        self.pain_embed_dim, self.pirate_embed_dim = 4, 4
        self.pain_embeddings = nn.ModuleList([nn.Embedding(3, self.pain_embed_dim) for _ in range(4)])
        self.pirate_embedding = nn.Embedding(2, self.pirate_embed_dim)
        
        num_continuous_features = 32
        total_embedding_dim = (4 * self.pain_embed_dim) + self.pirate_embed_dim
        conv_input_size = num_continuous_features + total_embedding_dim

        self.conv1d = nn.Conv1d(in_channels=conv_input_size, out_channels=conv_out_channels,
                                kernel_size=conv_kernel_size, padding='same')
        self.conv_activation = nn.ReLU()
        self.feature_dropout = nn.Dropout(feature_dropout_rate)

        self.rnn = nn.GRU(
            input_size=conv_out_channels, hidden_size=hidden_size,
            num_layers=num_layers, batch_first=True, bidirectional=bidirectional,
            dropout=dropout_rate if num_layers > 1 else 0)
        
        self.attention = Attention(rnn_hidden_dim)
        self.classifier = nn.Linear(rnn_hidden_dim, num_classes)

    def forward(self, x):
        x_continuous = x[:, :, :32]
        x_categorical = x[:, :, 32:].long()
        embedded_cats = [self.pain_embeddings[i](x_categorical[:, :, i]) for i in range(4)] \
                      + [self.pirate_embedding(x_categorical[:, :, 4])]
        x_combined = torch.cat([x_continuous] + embedded_cats, dim=2)
        x_permuted = x_combined.permute(0, 2, 1)
        x_conv = self.conv_activation(self.conv1d(x_permuted))
        x_conv_permuted = x_conv.permute(0, 2, 1)
        x_dropped = self.feature_dropout(x_conv_permuted)
        rnn_outputs, _ = self.rnn(x_dropped)
        context_vector = self.attention(rnn_outputs)
        return self.classifier(context_vector)

def train_one_epoch(model, loader, criterion, optimizer, scaler, device):
    model.train()
    total_loss, all_preds, all_targets = 0, [], []
    for x, y in loader:
        x, y = x.to(device, non_blocking=True), y.to(device, non_blocking=True)
        optimizer.zero_grad(set_to_none=True)
        with torch.amp.autocast(device_type=device.type, enabled=(device.type == 'cuda')):
            logits = model(x)
            loss = criterion(logits, y)
        scaler.scale(loss).backward()
        scaler.unscale_(optimizer)
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
        scaler.step(optimizer)
        scaler.update()
        total_loss += loss.item() * x.size(0)
        all_preds.append(logits.argmax(dim=1).cpu().numpy())
        all_targets.append(y.cpu().numpy())
    return total_loss / len(loader.dataset.tensors[0]), f1_score(np.concatenate(all_targets), np.concatenate(all_preds), average='weighted')

def validate_one_epoch(model, loader, criterion, device):
    model.eval()
    total_loss, all_preds, all_targets = 0, [], []
    with torch.no_grad():
        for x, y in loader:
            x, y = x.to(device, non_blocking=True), y.to(device, non_blocking=True)
            with torch.amp.autocast(device_type=device.type, enabled=(device.type == 'cuda')):
                logits = model(x)
                loss = criterion(logits, y)
            total_loss += loss.item() * x.size(0)
            all_preds.append(logits.argmax(dim=1).cpu().numpy())
            all_targets.append(y.cpu().numpy())
    return total_loss / len(loader.dataset.tensors[0]), f1_score(np.concatenate(all_targets), np.concatenate(all_preds), average='weighted')

# --- OPTIMIZED: Objective function now accepts pre-scaled data ---
def objective_function(config, X_train_scaled, y_train, X_val_scaled, y_val, class_weights):
    X_train_w, y_train_w, _ = create_sliding_windows(X_train_scaled, y_train, config["window_size"], config["stride"])
    X_val_w, y_val_w, _ = create_sliding_windows(X_val_scaled, y_val, config["window_size"], config["stride"])
    train_loader = make_loader(TensorDataset(torch.from_numpy(X_train_w).float(), torch.from_numpy(y_train_w).long()), config["batch_size"], True, True)
    val_loader = make_loader(TensorDataset(torch.from_numpy(X_val_w).float(), torch.from_numpy(y_val_w).long()), config["batch_size"], False, False)

    model_config = {
        'hidden_size': config['hidden_size'], 'num_layers': config['num_layers'],
        'conv_out_channels': config['conv_out_channels'], 'conv_kernel_size': config['conv_kernel_size'],
        'bidirectional': config['bidirectional'], 'dropout_rate': config['dropout_rate'],
        'feature_dropout_rate': config['feature_dropout_rate']
    }
    model = RecurrentClassifier(**model_config, num_classes=N_CLASSES, rnn_type='GRU').to(device)
    
    optimizer = torch.optim.AdamW(model.parameters(), lr=config["lr"], weight_decay=config["l2_lambda"])
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=350)
    scaler = torch.amp.GradScaler(enabled=(device.type == 'cuda'))
    criterion = nn.CrossEntropyLoss(weight=class_weights, label_smoothing=config['label_smoothing_factor'])

    best_val_f1 = -1.0; patience_counter = 0; hpo_patience = 50
    
    for epoch in range(1, 351):
        train_loss, _ = train_one_epoch(model, train_loader, criterion, optimizer, scaler, device)
        _, val_f1 = validate_one_epoch(model, val_loader, criterion, device)
        tune.report({"val_f1": val_f1, "train_loss": train_loss})
        scheduler.step()

        if val_f1 > best_val_f1:
            best_val_f1 = val_f1; patience_counter = 0
        else:
            patience_counter += 1
            if patience_counter >= hpo_patience: break

def fit(model, train_loader, val_loader, epochs, criterion, optimizer, scheduler, scaler, device, patience, experiment_name):
    model_path = f"models/{experiment_name}_best_model.pt"
    best_f1 = -1; patience_counter = 0
    print(f"--- Starting Training: {experiment_name} ---")
    for epoch in range(1, epochs + 1):
        train_loss, train_f1 = train_one_epoch(model, train_loader, criterion, optimizer, scaler, device)
        val_loss, val_f1 = validate_one_epoch(model, val_loader, criterion, device)
        scheduler.step()

        if epoch % 5 == 0: print(f"Epoch {epoch:3d}/{epochs} | Val F1: {val_f1:.4f} | LR: {scheduler.get_last_lr()[0]:.6f}")

        if val_f1 > best_f1:
            best_f1, patience_counter = val_f1, 0
            torch.save(model.state_dict(), model_path)
        else:
            patience_counter += 1
            if patience_counter >= patience: print(f"Early stopping at epoch {epoch}. Best F1: {best_f1:.4f}"); break
    print(f"--- Finished Training --- Best F1: {best_f1:.4f}")
    model.load_state_dict(torch.load(model_path))
    return model

## üß™ 5. Phase 1: Hyperparameter Search

In [5]:
continuous_indices_orig = list(range(31)) + [35]
categorical_indices_orig = list(range(31, 35)) + [36]
X_train_full_reordered = np.concatenate([
    X_train_full_engineered[:, :, continuous_indices_orig],
    X_train_full_engineered[:, :, categorical_indices_orig]
], axis=2)

print("--- Splitting data for HPO ---")
sss = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=SEED)
for train_idx, val_idx in sss.split(X_train_full_reordered, y_train_full):
    X_train_split, y_train_split = X_train_full_reordered[train_idx], y_train_full[train_idx]
    X_val_split, y_val_split = X_train_full_reordered[val_idx], y_train_full[val_idx]

# --- OPTIMIZED: Scale data ONCE before HPO ---
print("--- Pre-scaling data for HPO efficiency ---")
continuous_indices_reordered = list(range(32))
preprocessor_hpo = ColumnTransformer([('scaler', StandardScaler(), continuous_indices_reordered)], remainder='passthrough')

ns, ts, f = X_train_split.shape
X_train_split_scaled = preprocessor_hpo.fit_transform(X_train_split.reshape(ns*ts, f)).reshape(ns, ts, f)

ns_val, ts_val, f_val = X_val_split.shape
X_val_split_scaled = preprocessor_hpo.transform(X_val_split.reshape(ns_val*ts_val, f_val)).reshape(ns_val, ts_val, f_val)

print(f"  X_train_split_scaled: {X_train_split_scaled.shape}\n  X_val_split_scaled:   {X_val_split_scaled.shape}")

--- Splitting data for HPO ---
--- Pre-scaling data for HPO efficiency ---
  X_train_split_scaled: (528, 160, 37)
  X_val_split_scaled:   (133, 160, 37)


In [6]:
search_space = {
    "window_size": tune.choice([10, 20]), "stride": tune.choice([1,2,5]),
    "lr": tune.loguniform(1e-4, 5e-3), "batch_size": tune.choice([64, 128]),
    "hidden_size": tune.choice([256, 384]), "num_layers": tune.choice([2, 3]),
    "dropout_rate": tune.uniform(0.1, 0.5), "feature_dropout_rate": tune.uniform(0.1, 0.5),
    "bidirectional": tune.choice([True, False]), "l2_lambda": tune.loguniform(1e-7, 1e-4),
    "conv_out_channels": tune.choice([64, 128, 256]), "conv_kernel_size": tune.choice([3, 5, 7]),
    "label_smoothing_factor": tune.uniform(0.05, 0.2)
}

def short_trial_name(trial): return f"{trial.trainable_name}_{trial.trial_id}"

if ray.is_initialized(): ray.shutdown()
ray.init(num_cpus=os.cpu_count(), num_gpus=1, ignore_reinit_error=True, log_to_driver=False)

print("Starting hyperparameter search with pre-scaled data...")
analysis = tune.run(
    tune.with_parameters(objective_function, 
                         X_train_scaled=X_train_split_scaled, y_train=y_train_split, 
                         X_val_scaled=X_val_split_scaled, y_val=y_val_split, 
                         class_weights=class_weights_tensor),
    resources_per_trial={"cpu": 4, "gpu": 0.25},
    config=search_space, 
    num_samples=200, 
    search_alg=OptunaSearch(metric="val_f1", mode="max"),
    scheduler=ASHAScheduler(metric="val_f1", mode="max", grace_period=25, reduction_factor=2),
    name="pirate_pain_optimized_search_v9", 
    verbose=1,
    trial_dirname_creator=short_trial_name
)

0,1
Current time:,2025-11-14 07:37:34
Running for:,08:33:27.69
Memory:,12.5/13.9 GiB

Trial name,# failures,error file
objective_function_01111992,1,C:/Users/KARIMN~1/AppData/Local/Temp/ray/session_2025-11-13_23-03-57_187257_30756/artifacts/2025-11-13_23-04-06/pirate_pain_optimized_search_v9/driver_artifacts/objective_function_01111992/error.txt

Trial name,status,loc,batch_size,bidirectional,conv_kernel_size,conv_out_channels,dropout_rate,feature_dropout_rate,hidden_size,l2_lambda,label_smoothing_fact or,lr,num_layers,stride,window_size,iter,total time (s),val_f1,train_loss
objective_function_44d6ea4a,RUNNING,127.0.0.1:31296,64,False,5,64,0.368111,0.295731,384,9.16776e-05,0.150822,0.00272979,2,1,20,15.0,496.957,0.912719,0.20237
objective_function_460e9926,RUNNING,127.0.0.1:5392,64,False,5,128,0.334402,0.293147,384,4.79883e-07,0.151563,0.000380981,3,2,20,20.0,402.676,0.91217,0.201775
objective_function_8370ce93,RUNNING,127.0.0.1:6580,128,False,3,128,0.355147,0.477918,256,4.40411e-05,0.137933,0.000503923,3,2,20,7.0,81.5055,0.923502,0.19238
objective_function_f28e5581,RUNNING,127.0.0.1:8464,128,False,3,128,0.297562,0.481637,256,5.32035e-05,0.10044,0.000473951,3,2,20,1.0,25.8469,0.881048,0.252615
objective_function_15cc5001,PENDING,,128,False,3,128,0.301915,0.47721,384,3.26953e-06,0.10042,0.000253177,3,2,20,,,,
objective_function_c82f4465,TERMINATED,127.0.0.1:20516,128,True,5,256,0.109717,0.272803,256,1.54533e-06,0.1542,0.00276141,2,2,10,25.0,250.106,0.924608,0.205229
objective_function_b209ca5d,TERMINATED,127.0.0.1:12544,128,False,5,64,0.291513,0.321072,384,5.07524e-05,0.146414,0.00025807,2,2,10,100.0,768.599,0.930036,0.194814
objective_function_23392aeb,TERMINATED,127.0.0.1:29520,64,True,7,128,0.436961,0.495259,256,8.7102e-07,0.14924,0.00419498,3,5,10,25.0,287.402,0.882396,0.213864
objective_function_e6abc8ae,TERMINATED,127.0.0.1:11748,128,True,7,128,0.107809,0.126095,384,2.69024e-05,0.199664,0.000136339,2,2,20,25.0,303.889,0.918451,0.244886
objective_function_ccf5b5a9,TERMINATED,127.0.0.1:5832,64,True,5,256,0.406167,0.414276,384,6.28909e-05,0.185799,0.000119779,2,5,20,25.0,242.808,0.916554,0.235536


2025-11-14 02:52:28,936	ERROR tune_controller.py:1331 -- Trial task failed for trial objective_function_01111992
Traceback (most recent call last):
  File "e:\miniconda3\envs\an2dl-kaggle\lib\site-packages\ray\air\execution\_internal\event_manager.py", line 110, in resolve_future
    result = ray.get(future)
  File "e:\miniconda3\envs\an2dl-kaggle\lib\site-packages\ray\_private\auto_init_hook.py", line 22, in auto_init_wrapper
    return fn(*args, **kwargs)
  File "e:\miniconda3\envs\an2dl-kaggle\lib\site-packages\ray\_private\client_mode_hook.py", line 104, in wrapper
    return func(*args, **kwargs)
  File "e:\miniconda3\envs\an2dl-kaggle\lib\site-packages\ray\_private\worker.py", line 2961, in get
    values, debugger_breakpoint = worker.get_objects(
  File "e:\miniconda3\envs\an2dl-kaggle\lib\site-packages\ray\_private\worker.py", line 1028, in get_objects
    raise value
ray.exceptions.ActorDiedError: The actor died unexpectedly before finishing this task.
	class_name: ImplicitFun

[33m(raylet)[0m A worker died or was killed while executing a task by an unexpected system error. To troubleshoot the problem, check the logs for the dead worker. Lease ID: 50000000ee8617afc10ac95a63d329169c909a0da108117770af840d98cbaaa4 Worker ID: 04c781ad7b878c86a211e957fb3a06e84026ded6ed2100169fa07e30 Node ID: fbe412221844697448485b3d899a3bc645cc768bf6e539cc72273ff4 Worker IP address: 127.0.0.1 Worker port: 57875 Worker PID: 30156 Worker exit type: SYSTEM_ERROR Worker exit detail: Worker unexpectedly exits with a connection error code 10054. An existing connection was forcibly closed by the remote host. There are some potential root causes. (1) The process is killed by SIGKILL by OOM killer due to high memory usage. (2) ray stop --force is called. (3) The worker is crashed unexpectedly due to SIGSEGV or other unexpected errors.


2025-11-14 07:37:34,430	INFO tune.py:1009 -- Wrote the latest version of all result files and experiment state to 'C:/Users/Karim Negm/ray_results/pirate_pain_optimized_search_v9' in 0.2658s.
2025-11-14 07:37:44,487	ERROR tune.py:1037 -- Trials did not complete: [objective_function_01111992]
2025-11-14 07:37:44,488	INFO tune.py:1041 -- Total run time: 30817.80 seconds (30807.42 seconds for the tuning loop).
Resume experiment with: tune.run(..., resume=True)
- objective_function_15cc5001: FileNotFoundError('Could not fetch metrics for objective_function_15cc5001: both result.json and progress.csv were not found at C:/Users/Karim Negm/ray_results/pirate_pain_optimized_search_v9/objective_function_15cc5001')


In [7]:
print("\n--- Search Complete ---\n")
print("Getting best trial from analysis...")
best_trial = analysis.get_best_trial(metric="val_f1", mode="max", scope="all")
if best_trial:
    FINAL_CONFIG = best_trial.config
    FINAL_BEST_VAL_F1 = best_trial.last_result["val_f1"]
    print(f"Best validation F1 score: {FINAL_BEST_VAL_F1:.4f}")
    print("Best hyperparameters found:")
    print(FINAL_CONFIG)
else:
    print("ERROR: No trials completed successfully. Using a default config.")
    FINAL_CONFIG = {'window_size': 10, 'stride': 2, 'lr': 0.001, 'batch_size': 128, 'hidden_size': 256, 'num_layers': 2, 'dropout_rate': 0.3, 'feature_dropout_rate': 0.3, 'bidirectional': True, 'l2_lambda': 1e-06, 'conv_out_channels': 128, 'conv_kernel_size': 5, 'label_smoothing_factor': 0.1}
    FINAL_BEST_VAL_F1 = 0.0

del X_train_split, y_train_split, X_val_split, y_val_split, X_train_split_scaled, X_val_split_scaled


--- Search Complete ---

Getting best trial from analysis...
Best validation F1 score: 0.9412
Best hyperparameters found:
{'window_size': 20, 'stride': 1, 'lr': 0.00040057525160210354, 'batch_size': 128, 'hidden_size': 384, 'num_layers': 3, 'dropout_rate': 0.3988574060987872, 'feature_dropout_rate': 0.29861539642406965, 'bidirectional': False, 'l2_lambda': 1.5160108671301372e-05, 'conv_out_channels': 64, 'conv_kernel_size': 5, 'label_smoothing_factor': 0.08447179682487424}


## üèÜ 6. Phase 2: K-Fold Ensemble Training

In [8]:
print("--- üèÜ Final Configuration Set --- ")
print(f"Best Val F1 from HPO search: {FINAL_BEST_VAL_F1:.4f}")
print(FINAL_CONFIG)

N_SPLITS = 5
FINAL_EXPERIMENT_NAME = f"Optimized-Attn-GRU_H{FINAL_CONFIG['hidden_size']}_L{FINAL_CONFIG['num_layers']}_" \
                      f"C{FINAL_CONFIG['conv_out_channels']}_K{FINAL_CONFIG['conv_kernel_size']}_v9"
submission_filename_base = f"submission_{FINAL_EXPERIMENT_NAME}.csv"
print(f"Submission name will be: {submission_filename_base}")

--- üèÜ Final Configuration Set --- 
Best Val F1 from HPO search: 0.9412
{'window_size': 20, 'stride': 1, 'lr': 0.00040057525160210354, 'batch_size': 128, 'hidden_size': 384, 'num_layers': 3, 'dropout_rate': 0.3988574060987872, 'feature_dropout_rate': 0.29861539642406965, 'bidirectional': False, 'l2_lambda': 1.5160108671301372e-05, 'conv_out_channels': 64, 'conv_kernel_size': 5, 'label_smoothing_factor': 0.08447179682487424}
Submission name will be: submission_Optimized-Attn-GRU_H384_L3_C64_K5_v9.csv


In [9]:
skf = StratifiedKFold(n_splits=N_SPLITS, shuffle=True, random_state=SEED)
fold_val_f1_list = []
continuous_indices_reordered = list(range(32))
EPOCHS = 350

for fold, (train_idx, val_idx) in enumerate(skf.split(X_train_full_reordered, y_train_full)):
    fold_name = f"{FINAL_EXPERIMENT_NAME}_fold_{fold+1}"
    print(f"\n--- Fold {fold+1}/{N_SPLITS} --- ({fold_name}) ---")
    
    X_train_fold, y_train_fold = X_train_full_reordered[train_idx], y_train_full[train_idx]
    X_val_fold, y_val_fold = X_train_full_reordered[val_idx], y_train_full[val_idx]

    # --- OPTIMIZED: Scale data ONCE per fold ---
    preprocessor_fold = ColumnTransformer([('s', StandardScaler(), continuous_indices_reordered)], remainder='passthrough')
    ns, ts, f = X_train_fold.shape
    X_train_scaled = preprocessor_fold.fit_transform(X_train_fold.reshape(ns*ts, f)).reshape(ns, ts, f)
    ns_val, ts_val, f_val = X_val_fold.shape
    X_val_scaled = preprocessor_fold.transform(X_val_fold.reshape(ns_val*ts_val, f_val)).reshape(ns_val, ts_val, f_val)
    
    X_train_w, y_train_w, _ = create_sliding_windows(X_train_scaled, y_train_fold, FINAL_CONFIG['window_size'], FINAL_CONFIG['stride'])
    X_val_w, y_val_w, _ = create_sliding_windows(X_val_scaled, y_val_fold, FINAL_CONFIG['window_size'], FINAL_CONFIG['stride'])
    train_loader = make_loader(TensorDataset(torch.from_numpy(X_train_w).float(), torch.from_numpy(y_train_w).long()), FINAL_CONFIG['batch_size'], True, True)
    val_loader = make_loader(TensorDataset(torch.from_numpy(X_val_w).float(), torch.from_numpy(y_val_w).long()), FINAL_CONFIG['batch_size'], False, False)

    model_config_kfold = {k: v for k, v in FINAL_CONFIG.items() if k not in ['window_size', 'stride', 'lr', 'batch_size', 'l2_lambda', 'label_smoothing_factor']}
    model_fold = RecurrentClassifier(**model_config_kfold, num_classes=N_CLASSES).to(device)
    
    optimizer = torch.optim.AdamW(model_fold.parameters(), lr=FINAL_CONFIG['lr'], weight_decay=FINAL_CONFIG['l2_lambda'])
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=EPOCHS)
    scaler = torch.amp.GradScaler(enabled=(device.type == 'cuda'))
    criterion = nn.CrossEntropyLoss(weight=class_weights_tensor, label_smoothing=FINAL_CONFIG['label_smoothing_factor'])

    model_fold = fit(model_fold, train_loader, val_loader, EPOCHS, criterion, optimizer, scheduler, scaler, device, 50, fold_name)
    _, val_f1 = validate_one_epoch(model_fold, val_loader, criterion, device)
    fold_val_f1_list.append(val_f1)
    print(f"Fold {fold+1} Final Val F1: {val_f1:.4f}")

print(f"\n--- üèÜ K-Fold Training Complete --- Average F1: {np.mean(fold_val_f1_list):.4f}")


--- Fold 1/5 --- (Optimized-Attn-GRU_H384_L3_C64_K5_v9_fold_1) ---
--- Starting Training: Optimized-Attn-GRU_H384_L3_C64_K5_v9_fold_1 ---
Epoch   5/350 | Val F1: 0.8974 | LR: 0.000400
Epoch  10/350 | Val F1: 0.9044 | LR: 0.000400
Epoch  15/350 | Val F1: 0.8951 | LR: 0.000399
Epoch  20/350 | Val F1: 0.8929 | LR: 0.000397
Epoch  25/350 | Val F1: 0.9008 | LR: 0.000396
Epoch  30/350 | Val F1: 0.9011 | LR: 0.000393
Epoch  35/350 | Val F1: 0.9040 | LR: 0.000391
Epoch  40/350 | Val F1: 0.9046 | LR: 0.000388
Epoch  45/350 | Val F1: 0.9037 | LR: 0.000384
Epoch  50/350 | Val F1: 0.8971 | LR: 0.000381
Epoch  55/350 | Val F1: 0.8971 | LR: 0.000377
Epoch  60/350 | Val F1: 0.8970 | LR: 0.000372
Epoch  65/350 | Val F1: 0.9017 | LR: 0.000367
Epoch  70/350 | Val F1: 0.9030 | LR: 0.000362
Epoch  75/350 | Val F1: 0.8949 | LR: 0.000357
Epoch  80/350 | Val F1: 0.8968 | LR: 0.000351
Epoch  85/350 | Val F1: 0.9013 | LR: 0.000345
Early stopping at epoch 89. Best F1: 0.9084
--- Finished Training --- Best F1: 

## üì¨ 7. Phase 3: Ensemble Submission

In [None]:
print("\n--- Preparing test dataset for submission ---")
continuous_indices_orig = list(range(31)) + [35]
categorical_indices_orig = list(range(31, 35)) + [36]
X_test_full_reordered = np.concatenate([
    X_test_full_engineered[:, :, continuous_indices_orig],
    X_test_full_engineered[:, :, categorical_indices_orig]], axis=2)

continuous_indices_reordered = list(range(32))
preprocessor_final = ColumnTransformer([('scaler', StandardScaler(), continuous_indices_reordered)], remainder='passthrough')

# --- OPTIMIZED: Fit scaler on the FULL training data before transforming test data ---
ns, ts, f = X_train_full_reordered.shape
preprocessor_final.fit(X_train_full_reordered.reshape(ns * ts, f))

ns_test, ts_test, f_test = X_test_full_reordered.shape
X_test_scaled = preprocessor_final.transform(X_test_full_reordered.reshape(ns_test * ts_test, f_test)).reshape(ns_test, ts_test, f_test)
X_test_w, test_window_indices = create_sliding_windows(X_test_scaled, y=None, window_size=FINAL_CONFIG['window_size'], stride=FINAL_CONFIG['stride'])
test_loader = make_loader(TensorDataset(torch.from_numpy(X_test_w).float()), FINAL_CONFIG['batch_size'], False, False)

model_config_final = {k: v for k, v in FINAL_CONFIG.items() if k not in ['window_size', 'stride', 'lr', 'batch_size', 'l2_lambda', 'label_smoothing_factor']}
all_fold_probabilities = []

for fold in range(N_SPLITS):
    if fold == 2: continue  # Skip the worst performing folds
    fold_name = f"{FINAL_EXPERIMENT_NAME}_fold_{fold+1}"
    model_path = f"models/{fold_name}_best_model.pt"
    print(f"Loading model {fold+1}/{N_SPLITS} from {model_path}...")
    model_fold = RecurrentClassifier(**model_config_final, num_classes=N_CLASSES).to(device)
    model_fold.load_state_dict(torch.load(model_path, map_location=device))
    model_fold.eval()
    
    fold_preds = []
    with torch.no_grad():
        for (inputs,) in test_loader:
            with torch.amp.autocast(device_type=device.type, enabled=(device.type == 'cuda')):
                probs = torch.softmax(model_fold(inputs.to(device)), dim=1)
                fold_preds.append(probs.cpu().numpy())
    all_fold_probabilities.append(np.concatenate(fold_preds))

mean_probabilities = np.mean(all_fold_probabilities, axis=0)
df_probs = pd.DataFrame(mean_probabilities, columns=[f"prob_{i}" for i in range(N_CLASSES)])
df_probs['original_index'] = test_window_indices
agg_probs = df_probs.groupby('original_index')[[f"prob_{i}" for i in range(N_CLASSES)]].mean().values
final_predictions = le.inverse_transform(np.argmax(agg_probs, axis=1))

submission_df = pd.DataFrame({'sample_index': sorted(X_test_long_df['sample_index'].unique()), 'label': final_predictions})
submission_df['sample_index'] = submission_df['sample_index'].apply(lambda x: f"{x:03d}")
submission_filepath = os.path.join("submissions", submission_filename_base)
submission_df.to_csv(submission_filepath, index=False)
print(f"\nSuccessfully saved to {submission_filepath}!")
print(submission_df.head())


--- Preparing test dataset for submission ---
Loading model 1/5 from models/Optimized-Attn-GRU_H384_L3_C64_K5_v9_fold_1_best_model.pt...
Loading model 2/5 from models/Optimized-Attn-GRU_H384_L3_C64_K5_v9_fold_2_best_model.pt...
Loading model 4/5 from models/Optimized-Attn-GRU_H384_L3_C64_K5_v9_fold_4_best_model.pt...
Loading model 5/5 from models/Optimized-Attn-GRU_H384_L3_C64_K5_v9_fold_5_best_model.pt...

Successfully saved to submissions\submission_Optimized-Attn-GRU_H384_L3_C64_K5_v9.csv!
  sample_index    label
0          000  no_pain
1          001  no_pain
2          002  no_pain
3          003  no_pain
4          004  no_pain
