# **Kaggle Challenge: Pirate Pain Dataset üè¥‚Äç‚ò†Ô∏è (v13: Final Strategy)**

This notebook contains a final, consolidated strategy designed to address the critical class imbalance issue, particularly the low recall for the `high_pain` class. It incorporates several major enhancements based on the data analysis.

**üî• Final Strategy Updates:**
1.  **Feature Cleaning (`joint_30` Removed):** The zero-variance feature `joint_30` has been removed. All column indexing and model layers have been updated to reflect the new feature count.
2.  **WeightedRandomSampler:** To combat imbalance at the batch level, a `WeightedRandomSampler` is now used for the training `DataLoader`. This oversamples minority classes (`low_pain`, `high_pain`) to ensure the model sees a more balanced set of examples in every batch.
3.  **Noise Augmentation:** To improve generalization for minority classes, simple Gaussian noise is added to the training samples for `low_pain` and `high_pain`. This creates new, synthetic training data and forces the model to learn more robust features.
4.  **Top-K Averaging for Submission:** The submission logic has been upgraded. Instead of averaging predictions from all sliding windows (which dilutes the signal), it now identifies the **Top 5 windows** most indicative of `high_pain` and averages their predictions for the final classification. This focuses the decision on the most critical moments in the time series.
5.  **Compiled Model & OneCycleLR:** We retain the use of `torch.compile()` for speed and the `OneCycleLR` scheduler for efficient training.

## ‚öôÔ∏è 1. Setup & Libraries

In [1]:
# Set seed for reproducibility
SEED = 1234

# Import necessary libraries
import os
import logging
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns 
import copy
from itertools import product
import time
import gc

# Set environment variables before importing modules
os.environ['MPLCONFIGDIR'] = os.getcwd() + '/configs/'

# Suppress warnings
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action='ignore', category=Warning)

# --- PyTorch Imports ---
import torch
from sklearn.utils.class_weight import compute_class_weight
from torch import nn
import torch.nn.functional as F
from torch.utils.tensorboard import SummaryWriter
from torch.utils.data import TensorDataset, DataLoader, WeightedRandomSampler

# --- Sklearn Imports ---
from sklearn.preprocessing import StandardScaler, LabelEncoder, MinMaxScaler
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import StratifiedShuffleSplit, StratifiedKFold
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, confusion_matrix

# --- Ray[tune] & Optuna Imports ---
import ray
from ray import tune
from ray.tune.schedulers import ASHAScheduler
from ray.tune.search.optuna import OptunaSearch
from functools import partial

# --- Setup Directories & Device ---
logs_dir = "tensorboard"
os.makedirs("models", exist_ok=True)
os.makedirs("submissions", exist_ok=True)
os.makedirs(logs_dir, exist_ok=True)

if torch.cuda.is_available():
    device = torch.device("cuda")
    torch.cuda.manual_seed_all(SEED)
    torch.backends.cudnn.benchmark = True
    print("\n--- Using GPU ---")
else:
    device = torch.device("cpu")
    print("\n--- Using CPU ---")

print(f"PyTorch version: {torch.__version__}")
print(f"Device: {device}")

# Configure plot display settings
sns.set_theme(font_scale=1.4)
sns.set_style('white')
plt.rc('font', size=14)
%matplotlib inline


--- Using GPU ---
PyTorch version: 2.5.1
Device: cuda


## üîÑ 2. Data Loading & Feature Engineering

In [2]:
print("--- 1. Loading Data ---")

# --- Define File Paths and Features ---
DATA_DIR = "data"
X_TRAIN_PATH = os.path.join(DATA_DIR, "pirate_pain_train.csv")
Y_TRAIN_PATH = os.path.join(DATA_DIR, "pirate_pain_train_labels.csv")
X_TEST_PATH = os.path.join(DATA_DIR, "pirate_pain_test.csv")
SUBMISSION_PATH = os.path.join(DATA_DIR, "sample_submission.csv")

try:
    features_long_df = pd.read_csv(X_TRAIN_PATH)
    labels_df = pd.read_csv(Y_TRAIN_PATH)
    X_test_long_df = pd.read_csv(X_TEST_PATH)
    
    # --- MODIFICATION: Remove joint_30 (zero variance) ---
    if 'joint_30' in features_long_df.columns:
        features_long_df = features_long_df.drop(columns=['joint_30'])
        X_test_long_df = X_test_long_df.drop(columns=['joint_30'])
        print("Removed zero-variance feature: 'joint_30'")
    
    N_TIMESTEPS = 160
    # --- MODIFICATION: JOINT_FEATURES now goes up to 29, not 30 ---
    JOINT_FEATURES = [f"joint_{i:02d}" for i in range(30)]
    PAIN_FEATURES = [f"pain_survey_{i}" for i in range(1, 5)]
    TIME_FEATURE = ['time']
    FEATURES = JOINT_FEATURES + PAIN_FEATURES + TIME_FEATURE
    LABEL_MAPPING = {'no_pain': 0, 'low_pain': 1, 'high_pain': 2}
    N_CLASSES = len(LABEL_MAPPING)

    def reshape_data(df, features_list, n_timesteps):
        df_pivot = df.pivot(index='sample_index', columns='time', values=features_list)
        data_2d = df_pivot.values
        n_samples = data_2d.shape[0]
        data_3d = data_2d.reshape(n_samples, len(features_list), n_timesteps)
        return data_3d.transpose(0, 2, 1)

    X_train_full = reshape_data(features_long_df[features_long_df['sample_index'].isin(labels_df['sample_index'].unique())], FEATURES, N_TIMESTEPS)
    X_test_full = reshape_data(X_test_long_df, FEATURES, N_TIMESTEPS)
    y_train_full_df = labels_df.sort_values(by='sample_index')
    le = LabelEncoder().fit(list(LABEL_MAPPING.keys()))
    y_train_full = le.transform(y_train_full_df['label'])
    print(f"Loaded X_train_full (shape: {X_train_full.shape}) and y_train_full (shape: {y_train_full.shape})")
    print(f"Loaded X_test_full (shape: {X_test_full.shape})")

    print("\n--- 2. Engineering 'is_pirate' Feature ---")
    static_cols = ['sample_index', 'n_legs', 'n_hands', 'n_eyes']
    static_df = features_long_df[static_cols].drop_duplicates().set_index('sample_index')
    pirate_filter = (static_df['n_legs'] == 'one+peg_leg') | (static_df['n_hands'] == 'one+hook_hand') | (static_df['n_eyes'] == 'one+eye_patch')
    pirate_indices = static_df[pirate_filter].index
    sample_indices_ordered = sorted(features_long_df[features_long_df['sample_index'].isin(labels_df['sample_index'].unique())]['sample_index'].unique())
    is_pirate_map = np.array([1 if idx in pirate_indices else 0 for idx in sample_indices_ordered])
    pirate_feature_broadcast = np.tile(is_pirate_map.reshape(-1, 1, 1), (1, N_TIMESTEPS, 1))
    X_train_full_engineered = np.concatenate([X_train_full, pirate_feature_broadcast], axis=2)

    static_df_test = X_test_long_df[static_cols].drop_duplicates().set_index('sample_index')
    pirate_filter_test = (static_df_test['n_legs'] == 'one+peg_leg') | (static_df_test['n_hands'] == 'one+hook_hand') | (static_df_test['n_eyes'] == 'one+eye_patch')
    pirate_indices_test = static_df_test[pirate_filter_test].index
    sample_indices_test_ordered = sorted(X_test_long_df['sample_index'].unique())
    is_pirate_map_test = np.array([1 if idx in pirate_indices_test else 0 for idx in sample_indices_test_ordered])
    pirate_feature_broadcast_test = np.tile(is_pirate_map_test.reshape(-1, 1, 1), (1, N_TIMESTEPS, 1))
    X_test_full_engineered = np.concatenate([X_test_full, pirate_feature_broadcast_test], axis=2)
    
    N_FEATURES_NEW = X_train_full_engineered.shape[2]
    print(f"Created X_train_full_engineered (shape: {X_train_full_engineered.shape})")
    print(f"Created X_test_full_engineered (shape: {X_test_full_engineered.shape})")
    print(f"N_FEATURES is now: {N_FEATURES_NEW}")

    print("\n--- 3. Calculating Alpha Weights for Focal Loss ---")
    class_counts_series = labels_df['label'].value_counts()
    counts_ordered = class_counts_series.reindex(LABEL_MAPPING.keys()).values
    class_weights_tensor = 1.0 / torch.tensor(counts_ordered, dtype=torch.float)
    alpha_tensor = (class_weights_tensor / class_weights_tensor.sum()).to(device)
    print(f"Class counts (0, 1, 2): {counts_ordered}")
    print(f"Calculated alpha weights: {alpha_tensor}")

except Exception as e:
    print(f"An error occurred: {e}")

--- 1. Loading Data ---
Removed zero-variance feature: 'joint_30'
Loaded X_train_full (shape: (661, 160, 35)) and y_train_full (shape: (661,))
Loaded X_test_full (shape: (1324, 160, 35))

--- 2. Engineering 'is_pirate' Feature ---
Created X_train_full_engineered (shape: (661, 160, 36))
Created X_test_full_engineered (shape: (1324, 160, 36))
N_FEATURES is now: 36

--- 3. Calculating Alpha Weights for Focal Loss ---
Class counts (0, 1, 2): [511  94  56]
Calculated alpha weights: tensor([0.0643, 0.3493, 0.5864], device='cuda:0')


In [3]:
print("\n\n======================================================================")
print("--- EARLY SANITY CHECK: Verifying Feature Indexing Logic ---")
print("======================================================================\n")

# This check ensures that after removing 'joint_30', our manual indexing correctly\n",
# separates the remaining continuous and categorical features.

# --- 1. Recreate the list of engineered feature names in order ---
engineered_feature_names = JOINT_FEATURES + PAIN_FEATURES + TIME_FEATURE + ['is_pirate']
total_features = len(engineered_feature_names)
print(f"Detected a total of {total_features} features in the engineered data (X_train_full_engineered).")

# --- 2. Define the exact index lists that will be used later in the pipeline ---
# This logic MUST match the logic used in the HPO and K-Fold cells.
continuous_indices_orig = list(range(30)) + [34]  # 30 joints (0-29) + time (34)
categorical_indices_orig = list(range(30, 34)) + [35] # 4 pain surveys (30-33) + is_pirate (35)

print(f"Defined {len(continuous_indices_orig)} continuous indices and {len(categorical_indices_orig)} categorical indices.\n")

# --- 3. Map indices to names to verify what we are grabbing ---
feature_array = np.array(engineered_feature_names)
grabbed_continuous_features = feature_array[continuous_indices_orig].tolist()
grabbed_categorical_features = feature_array[categorical_indices_orig].tolist()

print("--- CONTINUOUS features being grabbed:")
print(f"First 5: {grabbed_continuous_features[:5]}")
print(f"Last 5: {grabbed_continuous_features[-5:]}")
print("  -> OK if this list contains all 'joint_XX' features and the 'time' feature.\n")

print("--- CATEGORICAL features being grabbed:")
print(grabbed_categorical_features)
print("  -> OK if this list contains all 'pain_survey_X' features and 'is_pirate'.\n")

# --- 4. Perform automated checks for correctness ---
print("--- AUTOMATED CHECKS ---")
errors_found = False

# Check for completeness
if len(continuous_indices_orig) + len(categorical_indices_orig) != total_features:
    print(f"  -> !!! ERROR: Index count mismatch! Sum of indices ({len(continuous_indices_orig) + len(categorical_indices_orig)}) != Total Features ({total_features})")
    errors_found = True
else:
    print("  -> OK: Sum of indices matches total feature count.")

# Check for overlap
overlap = set(continuous_indices_orig).intersection(set(categorical_indices_orig))
if overlap:
    print(f"  -> !!! ERROR: Overlap detected between continuous and categorical indices: {overlap}")
    errors_found = True
else:
    print("  -> OK: No overlap between continuous and categorical indices.")

# Check if 'time' was correctly identified as continuous
if 'time' not in grabbed_continuous_features:
    print("  -> !!! ERROR: 'time' feature was not correctly assigned to the continuous group.")
    errors_found = True
else:
    print("  -> OK: 'time' feature correctly identified as continuous.")

print("\n======================================================================")
if errors_found:
    print("--- SANITY CHECK FAILED: Review the index definitions before proceeding! ---")
    # This will stop the notebook execution if an error is found
    raise ValueError("Sanity check for feature indexing failed. Please review the output.")
else:
    print("--- SANITY CHECK PASSED: Feature indexing logic is correct. ---")
print("======================================================================\n")



--- EARLY SANITY CHECK: Verifying Feature Indexing Logic ---

Detected a total of 36 features in the engineered data (X_train_full_engineered).
Defined 31 continuous indices and 5 categorical indices.

--- CONTINUOUS features being grabbed:
First 5: ['joint_00', 'joint_01', 'joint_02', 'joint_03', 'joint_04']
Last 5: ['joint_26', 'joint_27', 'joint_28', 'joint_29', 'time']
  -> OK if this list contains all 'joint_XX' features and the 'time' feature.

--- CATEGORICAL features being grabbed:
['pain_survey_1', 'pain_survey_2', 'pain_survey_3', 'pain_survey_4', 'is_pirate']
  -> OK if this list contains all 'pain_survey_X' features and 'is_pirate'.

--- AUTOMATED CHECKS ---
  -> OK: Sum of indices matches total feature count.
  -> OK: No overlap between continuous and categorical indices.
  -> OK: 'time' feature correctly identified as continuous.

--- SANITY CHECK PASSED: Feature indexing logic is correct. ---



## üõ†Ô∏è 3. Helper Functions & Custom Loss

In [4]:
class FocalLoss(nn.Module):
    """Implements Focal Loss for cost-sensitive learning."""
    def __init__(self, alpha=None, gamma=2.0, reduction='mean'):
        super(FocalLoss, self).__init__()
        self.alpha = alpha
        self.gamma = gamma
        self.reduction = reduction

    def forward(self, logits, targets):
        ce_loss = F.cross_entropy(logits, targets, reduction='none')
        pt = torch.exp(-ce_loss)
        focal_loss = (1 - pt) ** self.gamma * ce_loss

        if self.alpha is not None:
            alpha_t = self.alpha[targets].to(focal_loss.device)
            focal_loss = alpha_t * focal_loss

        if self.reduction == 'mean':
            return focal_loss.mean()
        elif self.reduction == 'sum':
            return focal_loss.sum()
        else:
            return focal_loss

def create_sliding_windows(X_3d, y=None, window_size=100, stride=20):
    new_X, new_y, window_indices = [], [], []
    n_samples, n_timesteps, _ = X_3d.shape
    for i in range(n_samples):
        idx = 0
        while (idx + window_size) <= n_timesteps:
            new_X.append(X_3d[i, idx:idx+window_size, :])
            window_indices.append(i)
            if y is not None: new_y.append(y[i])
            idx += stride
    if y is not None:
        return np.array(new_X), np.array(new_y), np.array(window_indices)
    return np.array(new_X), np.array(window_indices)

# --- MODIFICATION: make_loader now accepts a sampler for weighted sampling ---
def make_loader(ds, batch_size, shuffle, drop_last, sampler=None):
    # Sampler and shuffle are mutually exclusive in PyTorch's DataLoader
    use_shuffle = shuffle if sampler is None else False
    return DataLoader(ds, batch_size=int(batch_size), shuffle=use_shuffle, drop_last=drop_last, 
                      num_workers=2, pin_memory=True, persistent_workers=True, sampler=sampler)

# --- MODIFICATION: New function for data augmentation ---
def augment_minority_classes(X_w, y_w, continuous_feature_count, noise_level=0.01, aug_factor=2):
    """Augments minority classes (1 and 2) with noise injection."""
    print(f"Augmenting minority classes... Original sample count: {len(X_w)}")
    X_aug_list, y_aug_list = [X_w], [y_w]
    
    # Find indices for low_pain (1) and high_pain (2)
    minority_indices = np.where((y_w == 1) | (y_w == 2))[0]
    
    if len(minority_indices) == 0:
        print("No minority class samples found to augment.")
        return X_w, y_w
        
    for i in range(aug_factor):
        X_to_augment = X_w[minority_indices]
        
        # Create Gaussian noise with the same shape as the data to be augmented
        noise = np.random.normal(0, noise_level, X_to_augment.shape)
        
        # Create a zero-filled copy to store the augmented data
        X_augmented = X_to_augment.copy()
        
        # Add noise ONLY to the continuous features, leaving categoricals untouched
        X_augmented[:, :, :continuous_feature_count] += noise[:, :, :continuous_feature_count]
        
        X_aug_list.append(X_augmented)
        y_aug_list.append(y_w[minority_indices])
        
    X_final_aug = np.concatenate(X_aug_list, axis=0)
    y_final_aug = np.concatenate(y_aug_list, axis=0)
    print(f"Augmentation complete. New sample count: {len(X_final_aug)}")
    
    return X_final_aug, y_final_aug

## üß† 4. Model & Training Engine

In [5]:
class Attention(nn.Module):
    def __init__(self, hidden_size):
        super(Attention, self).__init__()
        self.attn = nn.Linear(hidden_size, hidden_size)
        self.v = nn.Linear(hidden_size, 1, bias=False)

    def forward(self, rnn_outputs):
        energy = torch.tanh(self.attn(rnn_outputs))
        attn_scores = self.v(energy).squeeze(2)
        attn_weights = torch.softmax(attn_scores, dim=1)
        context_vector = torch.bmm(attn_weights.unsqueeze(1), rnn_outputs).squeeze(1)
        return context_vector

class RecurrentClassifier(nn.Module):
    def __init__(self, hidden_size, num_layers, num_classes,
                 conv_out_channels, conv_kernel_size, bidirectional,
                 dropout_rate, feature_dropout_rate, rnn_type='GRU'):
        super().__init__()
        self.rnn_type, self.num_layers, self.hidden_size, self.bidirectional = \
            rnn_type, num_layers, hidden_size, bidirectional
        
        rnn_hidden_dim = hidden_size * 2 if bidirectional else hidden_size

        self.pain_embed_dim, self.pirate_embed_dim = 4, 4
        self.pain_embeddings = nn.ModuleList([nn.Embedding(3, self.pain_embed_dim) for _ in range(4)])
        self.pirate_embedding = nn.Embedding(2, self.pirate_embed_dim)
        
        # --- MODIFICATION: Continuous features count is now 31 (30 joints + 1 time), not 32 ---
        num_continuous_features = 31
        total_embedding_dim = (4 * self.pain_embed_dim) + self.pirate_embed_dim
        conv_input_size = num_continuous_features + total_embedding_dim

        self.conv1d = nn.Conv1d(in_channels=conv_input_size, out_channels=conv_out_channels,
                                kernel_size=conv_kernel_size, padding='same')
        self.conv_activation = nn.ReLU()
        self.feature_dropout = nn.Dropout(feature_dropout_rate)

        if rnn_type == 'GRU':
            self.rnn = nn.GRU(
                input_size=conv_out_channels, hidden_size=hidden_size,
                num_layers=num_layers, batch_first=True, bidirectional=bidirectional,
                dropout=dropout_rate if num_layers > 1 else 0)
        elif rnn_type == 'LSTM':
            self.rnn = nn.LSTM(
                input_size=conv_out_channels, hidden_size=hidden_size,
                num_layers=num_layers, batch_first=True, bidirectional=bidirectional,
                dropout=dropout_rate if num_layers > 1 else 0)
        
        self.attention = Attention(rnn_hidden_dim)
        self.classifier = nn.Linear(rnn_hidden_dim, num_classes)

    def forward(self, x):
        # --- MODIFICATION: Slicing adjusted for 31 continuous features ---
        x_continuous = x[:, :, :31]
        x_categorical = x[:, :, 31:].long()
        
        embedded_cats = [self.pain_embeddings[i](x_categorical[:, :, i]) for i in range(4)] \
                      + [self.pirate_embedding(x_categorical[:, :, 4])]
        x_combined = torch.cat([x_continuous] + embedded_cats, dim=2)
        x_permuted = x_combined.permute(0, 2, 1)
        x_conv = self.conv_activation(self.conv1d(x_permuted))
        x_conv_permuted = x_conv.permute(0, 2, 1)
        x_dropped = self.feature_dropout(x_conv_permuted)
        rnn_outputs, _ = self.rnn(x_dropped)
        context_vector = self.attention(rnn_outputs)
        return self.classifier(context_vector)

def train_one_epoch(model, loader, criterion, optimizer, scaler, scheduler, device):
    model.train()
    total_loss, all_preds, all_targets = 0, [], []
    for x, y in loader:
        x, y = x.to(device, non_blocking=True), y.to(device, non_blocking=True)
        optimizer.zero_grad(set_to_none=True)
        with torch.amp.autocast(device_type=device.type, enabled=(device.type == 'cuda')):
            logits = model(x)
            loss = criterion(logits, y)
        scaler.scale(loss).backward()
        scaler.unscale_(optimizer)
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
        scaler.step(optimizer)
        scaler.update()
        scheduler.step() # <-- OneCycleLR is stepped each batch
        total_loss += loss.item() * x.size(0)
        all_preds.append(logits.argmax(dim=1).cpu().numpy())
        all_targets.append(y.cpu().numpy())
    return total_loss / len(loader.sampler if loader.sampler else loader.dataset), f1_score(np.concatenate(all_targets), np.concatenate(all_preds), average='weighted')

def validate_one_epoch(model, loader, criterion, device):
    model.eval()
    total_loss, all_preds, all_targets = 0, [], []
    with torch.no_grad():
        for x, y in loader:
            x, y = x.to(device, non_blocking=True), y.to(device, non_blocking=True)
            with torch.amp.autocast(device_type=device.type, enabled=(device.type == 'cuda')):
                logits = model(x)
                loss = criterion(logits, y)
            total_loss += loss.item() * x.size(0)
            all_preds.append(logits.argmax(dim=1).cpu().numpy())
            all_targets.append(y.cpu().numpy())
    return total_loss / len(loader.dataset.tensors[0]), f1_score(np.concatenate(all_targets), np.concatenate(all_preds), average='weighted')

def objective_function(config, X_train_w, y_train_w, X_val_w, y_val_w, alpha_tensor):
    EPOCHS = 150
    train_loader = make_loader(TensorDataset(X_train_w, y_train_w), config["batch_size"], True, True)
    val_loader = make_loader(TensorDataset(X_val_w, y_val_w), config["batch_size"], False, False)

    model_config = {k: v for k, v in config.items() if k not in ['lr', 'batch_size', 'l2_lambda', 'focal_loss_gamma']}
    model = RecurrentClassifier(**model_config, num_classes=N_CLASSES).to(device)
    model = torch.compile(model, backend="eager") # <-- SPEEDUP
    
    optimizer = torch.optim.AdamW(model.parameters(), lr=config["lr"], weight_decay=config["l2_lambda"])
    scheduler = torch.optim.lr_scheduler.OneCycleLR(
        optimizer, max_lr=config["lr"], epochs=EPOCHS, steps_per_epoch=len(train_loader)
    )
    scaler = torch.amp.GradScaler(enabled=(device.type == 'cuda'))
    criterion = FocalLoss(alpha=alpha_tensor, gamma=config['focal_loss_gamma'])

    best_val_f1 = -1.0; patience_counter = 0; hpo_patience = 30
    
    for epoch in range(1, EPOCHS + 1):
        train_loss, _ = train_one_epoch(model, train_loader, criterion, optimizer, scaler, scheduler, device)
        _, val_f1 = validate_one_epoch(model, val_loader, criterion, device)
        tune.report({"val_f1": val_f1, "train_loss": train_loss})
        
        if val_f1 > best_val_f1:
            best_val_f1 = val_f1; patience_counter = 0
        else:
            patience_counter += 1
            if patience_counter >= hpo_patience: break

def fit(model, train_loader, val_loader, epochs, criterion, optimizer, scheduler, scaler, device, patience, experiment_name):
    model_path = f"models/{experiment_name}_best_model.pt"
    best_f1 = -1; patience_counter = 0
    print(f"--- Starting Training: {experiment_name} ---")
    for epoch in range(1, epochs + 1):
        train_loss, train_f1 = train_one_epoch(model, train_loader, criterion, optimizer, scaler, scheduler, device)
        val_loss, val_f1 = validate_one_epoch(model, val_loader, criterion, device)

        if val_f1 > best_f1:
            best_f1, patience_counter = val_f1, 0
            torch.save(model._orig_mod.state_dict() if hasattr(model, '_orig_mod') else model.state_dict(), model_path)
        else:
            patience_counter += 1
            if patience_counter >= patience: print(f"Early stopping at epoch {epoch}. Best F1: {best_f1:.4f}"); break
        if epoch % 3 == 0: print(f"Epoch {epoch:3d}/{epochs} | Best Val F1: {best_f1:.4f} | Val F1: {val_f1:.4f} | LR: {optimizer.param_groups[0]['lr']:.6f}")
    print(f"--- Finished Training --- Best F1: {best_f1:.4f}")
    uncompiled_model = model._orig_mod if hasattr(model, '_orig_mod') else model
    uncompiled_model.load_state_dict(torch.load(model_path))
    return uncompiled_model

## üß™ 5. Phase 1: Hyperparameter Search

We will run the HPO search as before to find the best set of hyperparameters. The core model and training logic inside the search remains the same, but the final K-Fold training will incorporate our new balancing techniques.

In [6]:
# Define fixed windowing parameters
WINDOW_SIZE = 10
STRIDE = 2

# --- MODIFICATION: Column indices updated to reflect removal of joint_30 ---
# Original features: 30 joints (0-29), 4 pain (30-33), 1 time (34), 1 pirate (35)
continuous_indices_orig = list(range(30)) + [34] # 30 joints + time
categorical_indices_orig = list(range(30, 34)) + [35] # 4 pain surveys + is_pirate

X_train_full_reordered = np.concatenate([
    X_train_full_engineered[:, :, continuous_indices_orig],
    X_train_full_engineered[:, :, categorical_indices_orig]
], axis=2)

print("--- Splitting data for HPO ---")
sss = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=SEED)
for train_idx, val_idx in sss.split(X_train_full_reordered, y_train_full):
    X_train_split, y_train_split = X_train_full_reordered[train_idx], y_train_full[train_idx]
    X_val_split, y_val_split = X_train_full_reordered[val_idx], y_train_full[val_idx]

print("--- Pre-scaling data for HPO efficiency ---")
# --- MODIFICATION: After reordering, there are 31 continuous features ---
continuous_indices_reordered = list(range(31)) 
preprocessor_hpo = ColumnTransformer([('scaler', StandardScaler(), continuous_indices_reordered)], remainder='passthrough')
ns, ts, f = X_train_split.shape
X_train_split_scaled = preprocessor_hpo.fit_transform(X_train_split.reshape(ns*ts, f)).reshape(ns, ts, f)
ns_val, ts_val, f_val = X_val_split.shape
X_val_split_scaled = preprocessor_hpo.transform(X_val_split.reshape(ns_val*ts_val, f_val)).reshape(ns_val, ts_val, f_val)

print("--- Creating fixed sliding windows for HPO ---")
X_train_w, y_train_w, _ = create_sliding_windows(X_train_split_scaled, y_train_split, WINDOW_SIZE, STRIDE)
X_val_w, y_val_w, _ = create_sliding_windows(X_val_split_scaled, y_val_split, WINDOW_SIZE, STRIDE)

# Convert to tensors once before HPO
X_train_w_torch = torch.from_numpy(X_train_w).float()
y_train_w_torch = torch.from_numpy(y_train_w).long()
X_val_w_torch = torch.from_numpy(X_val_w).float()
y_val_w_torch = torch.from_numpy(y_val_w).long()

print(f"Created training windows of shape: {X_train_w_torch.shape}")

# Clean up memory
del X_train_split_scaled, X_val_split_scaled, X_train_w, y_train_w, X_val_w, y_val_w
gc.collect()

--- Splitting data for HPO ---
--- Pre-scaling data for HPO efficiency ---
--- Creating fixed sliding windows for HPO ---
Created training windows of shape: torch.Size([40128, 10, 36])


56

In [7]:
search_space = {
    "rnn_type": tune.choice(['GRU', 'LSTM']),
    "focal_loss_gamma": tune.uniform(1.0, 4.0),
    "lr": tune.loguniform(1e-4, 1e-2),
    "batch_size": tune.choice([64, 128]),
    "hidden_size": tune.choice([256, 384, 512]), 
    "num_layers": tune.choice([2, 3]),
    "dropout_rate": tune.uniform(0, 0.5), 
    "feature_dropout_rate": tune.uniform(0, 0.5),
    "bidirectional": tune.choice([True, False]), 
    "l2_lambda": tune.loguniform(1e-8, 1e-5),
    "conv_out_channels": tune.choice([128]), 
    "conv_kernel_size": tune.choice([5])
}

def short_trial_name(trial): return f"{trial.trainable_name}_{trial.trial_id}"

if ray.is_initialized(): ray.shutdown()
ray.init(num_cpus=os.cpu_count(), num_gpus=1, ignore_reinit_error=True, log_to_driver=False)

print("--- Starting HPO with Focal Loss and RNN-Type search ---")
analysis = tune.run(
    tune.with_parameters(objective_function, 
                         X_train_w=X_train_w_torch, y_train_w=y_train_w_torch,
                         X_val_w=X_val_w_torch, y_val_w=y_val_w_torch,
                         alpha_tensor=alpha_tensor),
    resources_per_trial={"cpu": 4, "gpu": 0.25},
    config=search_space, 
    num_samples=0, 
    search_alg=OptunaSearch(metric="val_f1", mode="max"),
    scheduler=ASHAScheduler(metric="val_f1", mode="max", grace_period=25, reduction_factor=2),
    name="pirate_pain_focalloss_search_v13_final", 
    verbose=1,
    trial_dirname_creator=short_trial_name,
    resume=True
)

0,1
Current time:,2025-11-17 09:49:23
Running for:,00:01:02.70
Memory:,12.2/13.9 GiB

Trial name,status,loc,batch_size,bidirectional,conv_kernel_size,conv_out_channels,dropout_rate,feature_dropout_rate,focal_loss_gamma,hidden_size,l2_lambda,lr,num_layers,rnn_type,iter,total time (s),val_f1,train_loss
objective_function_d3f059ea,RUNNING,127.0.0.1:33552,64,False,5,128,0.471509,0.362726,1.45168,512,6.58493e-08,0.00330047,2,GRU,1.0,37.6862,0.859859,0.0446891
objective_function_7dde1ec4,RUNNING,127.0.0.1:33584,64,False,5,128,0.4558,0.422645,1.82882,512,3.13501e-07,0.00309611,2,GRU,1.0,37.6992,0.878423,0.037524
objective_function_2a625bf7,RUNNING,127.0.0.1:33616,64,False,5,128,0.468796,0.370299,1.50751,512,5.85567e-08,0.00368539,2,GRU,1.0,37.6937,0.866599,0.0442824
objective_function_ab2aa931,RUNNING,127.0.0.1:33688,64,True,5,128,0.345147,0.410382,1.4584,512,1.29261e-07,0.00239552,2,GRU,1.0,40.9352,0.874057,0.0447165
objective_function_cec0c729,PENDING,,64,True,5,128,0.234849,0.419,1.82654,512,1.493e-07,0.00402668,2,GRU,,,,
objective_function_a861e309,TERMINATED,127.0.0.1:21932,64,False,5,128,0.383319,0.0104024,1.27821,384,6.00886e-07,0.000232323,2,GRU,25.0,378.408,0.928738,0.00276643
objective_function_2df3d805,TERMINATED,127.0.0.1:29536,64,False,5,128,0.47144,0.365539,1.2182,384,3.39011e-07,0.00153987,2,GRU,25.0,376.554,0.932257,0.00226642
objective_function_69b1b00d,TERMINATED,127.0.0.1:20052,64,False,5,128,0.393727,0.374004,1.29281,512,4.30389e-07,0.00140723,2,GRU,85.0,1312.17,0.947306,0.000238619
objective_function_ee121a90,TERMINATED,127.0.0.1:29312,64,False,5,128,0.376464,0.0509729,1.33862,512,4.56294e-07,0.00163282,2,GRU,50.0,769.492,0.935686,0.000829346
objective_function_ad3c91f6,TERMINATED,127.0.0.1:19116,64,False,5,128,0.393205,0.371099,1.34248,512,4.65911e-07,0.00147239,2,GRU,80.0,1263.5,0.959049,0.000316566


2025-11-17 09:48:21,034	INFO tune_controller.py:444 -- Restoring the run from the latest experiment state file: experiment_state-2025-11-17_09-07-44.json
2025-11-17 09:49:23,981	INFO tune.py:1009 -- Wrote the latest version of all result files and experiment state to 'C:/Users/Karim Negm/ray_results/pirate_pain_focalloss_search_v13_final' in 0.0368s.
2025-11-17 09:49:30,843	INFO tune.py:1041 -- Total run time: 69.87 seconds (62.67 seconds for the tuning loop).
Resume experiment with: tune.run(..., resume=True)
- objective_function_cec0c729: FileNotFoundError('Could not fetch metrics for objective_function_cec0c729: both result.json and progress.csv were not found at C:/Users/Karim Negm/ray_results/pirate_pain_focalloss_search_v13_final/objective_function_cec0c729')


In [8]:
print("\n--- Loading HPO Search Results ---")

experiment_path = os.path.expanduser("~/ray_results/pirate_pain_focalloss_search_v13_final")
print(f"Loading analysis from: {experiment_path}")
try:
    # Note: If you re-run this notebook, the experiment name might increment (e.g., _v13_final_2).
    # Make sure the path matches the one output by the tune.run() command.
    analysis = tune.ExperimentAnalysis(experiment_path)
    best_trial = analysis.get_best_trial(metric="val_f1", mode="max", scope="all")
    
    if best_trial:
        FINAL_CONFIG = best_trial.config
        FINAL_BEST_VAL_F1 = best_trial.last_result.get("val_f1", 0.0) 
        print(f"Best validation F1 score from completed trials: {FINAL_BEST_VAL_F1:.4f}")
        print("Best hyperparameters found:")
        print(FINAL_CONFIG)
    else:
        print("ERROR: No trials completed successfully. Using a default config.")
        raise ValueError("HPO run failed to produce a valid trial.")

except Exception as e:
    print(f"\nWARNING: Could not load analysis from {experiment_path}. The error was: {e}")
    print("\n--- USING FALLBACK CONFIGURATION ---")
    # This is a strong configuration from a previous run, adjusted for final strategy.
    FINAL_CONFIG = {'rnn_type': 'GRU', 'focal_loss_gamma': 2.8, 'lr': 0.0015, 'batch_size': 128, 'hidden_size': 512, 'num_layers': 2, 'dropout_rate': 0.35, 'feature_dropout_rate': 0.33, 'bidirectional': False, 'l2_lambda': 4.2e-06, 'conv_out_channels': 128, 'conv_kernel_size': 5}
    FINAL_BEST_VAL_F1 = 0.95 # Placeholder value
    print("Best hyperparameters (fallback):")
    print(FINAL_CONFIG)


# Add the fixed windowing params to the final config for the next steps
FINAL_CONFIG['window_size'] = WINDOW_SIZE
FINAL_CONFIG['stride'] = STRIDE

# Clean up HPO data to save memory
try:
    del X_train_w_torch, y_train_w_torch, X_val_w_torch, y_val_w_torch
    del X_train_split, y_train_split, X_val_split, y_val_split
except NameError:
    print("Data already cleaned up or not in memory.")
gc.collect()

print("\n--- Ready to proceed to K-Fold Training ---")

- objective_function_cec0c729: FileNotFoundError('Could not fetch metrics for objective_function_cec0c729: both result.json and progress.csv were not found at C:/Users/Karim Negm/ray_results/pirate_pain_focalloss_search_v13_final/objective_function_cec0c729')



--- Loading HPO Search Results ---
Loading analysis from: C:\Users\Karim Negm/ray_results/pirate_pain_focalloss_search_v13_final
Best validation F1 score from completed trials: 0.9421
Best hyperparameters found:
{'rnn_type': 'GRU', 'focal_loss_gamma': 1.51103656149311, 'lr': 0.00065379125036077, 'batch_size': 64, 'hidden_size': 384, 'num_layers': 3, 'dropout_rate': 0.36726022005612846, 'feature_dropout_rate': 0.2514972946679169, 'bidirectional': True, 'l2_lambda': 1.2035123100344062e-06, 'conv_out_channels': 128, 'conv_kernel_size': 5}

--- Ready to proceed to K-Fold Training ---


## üèÜ 6. Phase 2: K-Fold Ensemble Training with Final Strategy

This is the main training block. It incorporates the best hyperparameters from the HPO search along with our new strategies for handling class imbalance:
1.  **WeightedRandomSampler**: A sampler is created for each fold's training data to ensure balanced batches.
2.  **Noise Augmentation**: The training data for each fold is augmented by adding noisy copies of the minority class samples.

In [9]:
print("--- üèÜ Final Configuration Set --- ")
print(f"Best Val F1 from HPO search: {FINAL_BEST_VAL_F1:.4f}")
print(FINAL_CONFIG)

N_SPLITS = 5
FINAL_EXPERIMENT_NAME = f"FinalStrategy-{FINAL_CONFIG['rnn_type']}_H{FINAL_CONFIG['hidden_size']}_L{FINAL_CONFIG['num_layers']}_"\
                      f"C{FINAL_CONFIG['conv_out_channels']}_K{FINAL_CONFIG['conv_kernel_size']}_v13"
submission_filename_base = f"submission_{FINAL_EXPERIMENT_NAME}.csv"
print(f"Submission name will be: {submission_filename_base}")

--- üèÜ Final Configuration Set --- 
Best Val F1 from HPO search: 0.9421
{'rnn_type': 'GRU', 'focal_loss_gamma': 1.51103656149311, 'lr': 0.00065379125036077, 'batch_size': 64, 'hidden_size': 384, 'num_layers': 3, 'dropout_rate': 0.36726022005612846, 'feature_dropout_rate': 0.2514972946679169, 'bidirectional': True, 'l2_lambda': 1.2035123100344062e-06, 'conv_out_channels': 128, 'conv_kernel_size': 5, 'window_size': 10, 'stride': 2}
Submission name will be: submission_FinalStrategy-GRU_H384_L3_C128_K5_v13.csv


In [11]:
skf = StratifiedKFold(n_splits=N_SPLITS, shuffle=True, random_state=42)
fold_val_f1_list = []
# --- MODIFICATION: The number of reordered continuous features is now 31 ---
continuous_indices_reordered = list(range(31))
EPOCHS = 150
PATIENCE = 70

for fold, (train_idx, val_idx) in enumerate(skf.split(X_train_full_reordered, y_train_full)):
    fold_name = f"{FINAL_EXPERIMENT_NAME}_fold_{fold+1}"
    print(f"\n--- Fold {fold+1}/{N_SPLITS} --- ({fold_name}) ---")
    
    X_train_fold, y_train_fold = X_train_full_reordered[train_idx], y_train_full[train_idx]
    X_val_fold, y_val_fold = X_train_full_reordered[val_idx], y_train_full[val_idx]

    preprocessor_fold = ColumnTransformer([('s', StandardScaler(), continuous_indices_reordered)], remainder='passthrough')
    ns, ts, f = X_train_fold.shape
    X_train_scaled = preprocessor_fold.fit_transform(X_train_fold.reshape(ns*ts, f)).reshape(ns, ts, f)
    ns_val, ts_val, f_val = X_val_fold.shape
    X_val_scaled = preprocessor_fold.transform(X_val_fold.reshape(ns_val*ts_val, f_val)).reshape(ns_val, ts_val, f_val)
    
    X_train_w, y_train_w, _ = create_sliding_windows(X_train_scaled, y_train_fold, FINAL_CONFIG['window_size'], FINAL_CONFIG['stride'])
    X_val_w, y_val_w, _ = create_sliding_windows(X_val_scaled, y_val_fold, FINAL_CONFIG['window_size'], FINAL_CONFIG['stride'])
    
    # --- MODIFICATION: Augment the training data with noise injection for minority classes ---
    X_train_w_aug, y_train_w_aug = augment_minority_classes(X_train_w, y_train_w, continuous_feature_count=len(continuous_indices_reordered), aug_factor=2)

    # --- MODIFICATION: Create WeightedRandomSampler to handle class imbalance at the batch level ---
    print("Creating WeightedRandomSampler for the training loader...")
    class_counts = np.bincount(y_train_w_aug)
    class_weights = 1. / class_counts
    sample_weights = np.array([class_weights[t] for t in y_train_w_aug])
    sampler = WeightedRandomSampler(torch.from_numpy(sample_weights).double(), num_samples=len(sample_weights), replacement=True)

    # --- MODIFICATION: Pass sampler to the training loader and use augmented data ---
    train_ds = TensorDataset(torch.from_numpy(X_train_w_aug).float(), torch.from_numpy(y_train_w_aug).long())
    val_ds = TensorDataset(torch.from_numpy(X_val_w).float(), torch.from_numpy(y_val_w).long())
    train_loader = make_loader(train_ds, FINAL_CONFIG['batch_size'], shuffle=False, drop_last=True, sampler=sampler) # Shuffle is False when sampler is used
    val_loader = make_loader(val_ds, FINAL_CONFIG['batch_size'], shuffle=False, drop_last=False)

    model_config_kfold = {k: v for k, v in FINAL_CONFIG.items() if k not in ['window_size', 'stride', 'lr', 'batch_size', 'l2_lambda', 'focal_loss_gamma']}
    model_fold = RecurrentClassifier(**model_config_kfold, num_classes=N_CLASSES).to(device)
    model_fold = torch.compile(model_fold, backend="eager")
    
    optimizer = torch.optim.AdamW(model_fold.parameters(), lr=FINAL_CONFIG['lr'], weight_decay=FINAL_CONFIG['l2_lambda'])
    scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr=FINAL_CONFIG['lr'], epochs=EPOCHS, steps_per_epoch=len(train_loader))
    scaler = torch.amp.GradScaler(enabled=(device.type == 'cuda'))
    criterion = FocalLoss(alpha=alpha_tensor, gamma=FINAL_CONFIG['focal_loss_gamma'])

    model_fold_uncompiled = fit(model_fold, train_loader, val_loader, EPOCHS, criterion, optimizer, scheduler, scaler, device, PATIENCE, fold_name)
    
    _, val_f1 = validate_one_epoch(model_fold_uncompiled, val_loader, criterion, device)
    fold_val_f1_list.append(val_f1)
    print(f"Fold {fold+1} Final Val F1: {val_f1:.4f}")
    
    # Memory cleanup for the next fold
    del X_train_w, y_train_w, X_val_w, y_val_w, X_train_w_aug, y_train_w_aug, train_ds, val_ds, train_loader, val_loader, model_fold, model_fold_uncompiled
    gc.collect()
    if torch.cuda.is_available():
        torch.cuda.empty_cache()

print(f"\n--- üèÜ K-Fold Training Complete --- Average F1: {np.mean(fold_val_f1_list):.4f}")


--- Fold 1/5 --- (FinalStrategy-GRU_H384_L3_C128_K5_v13_fold_1) ---
Augmenting minority classes... Original sample count: 40128
Augmentation complete. New sample count: 113696
Creating WeightedRandomSampler for the training loader...
--- Starting Training: FinalStrategy-GRU_H384_L3_C128_K5_v13_fold_1 ---
Epoch   3/150 | Best Val F1: 0.9391 | Val F1: 0.9391 | LR: 0.000033
Epoch   6/150 | Best Val F1: 0.9391 | Val F1: 0.9381 | LR: 0.000053
Epoch   9/150 | Best Val F1: 0.9391 | Val F1: 0.9373 | LR: 0.000086
Epoch  12/150 | Best Val F1: 0.9412 | Val F1: 0.9310 | LR: 0.000130
Epoch  15/150 | Best Val F1: 0.9427 | Val F1: 0.9354 | LR: 0.000183
Epoch  18/150 | Best Val F1: 0.9427 | Val F1: 0.9379 | LR: 0.000243
Epoch  21/150 | Best Val F1: 0.9427 | Val F1: 0.9240 | LR: 0.000307
Epoch  24/150 | Best Val F1: 0.9427 | Val F1: 0.9334 | LR: 0.000373
Epoch  27/150 | Best Val F1: 0.9427 | Val F1: 0.9302 | LR: 0.000437
Epoch  30/150 | Best Val F1: 0.9427 | Val F1: 0.9386 | LR: 0.000497
Epoch  33/150

## üì¨ 7. Phase 3: Ensemble Submission 

In [14]:
print("\n--- Preparing test dataset for submission ---")
# --- MODIFICATION: Updated column indices to reflect removal of joint_30 ---
continuous_indices_orig = list(range(30)) + [34]
categorical_indices_orig = list(range(30, 34)) + [35]
X_test_full_reordered = np.concatenate([
    X_test_full_engineered[:, :, continuous_indices_orig],
    X_test_full_engineered[:, :, categorical_indices_orig]], axis=2)

continuous_indices_reordered = list(range(31))
preprocessor_final = ColumnTransformer([('scaler', StandardScaler(), continuous_indices_reordered)], remainder='passthrough')

# Fit the final scaler on the ENTIRE reordered training data
ns, ts, f = X_train_full_reordered.shape
preprocessor_final.fit(X_train_full_reordered.reshape(ns * ts, f))

ns_test, ts_test, f_test = X_test_full_reordered.shape
X_test_scaled = preprocessor_final.transform(X_test_full_reordered.reshape(ns_test * ts_test, f_test)).reshape(ns_test, ts_test, f_test)
X_test_w, test_window_indices = create_sliding_windows(X_test_scaled, y=None, window_size=FINAL_CONFIG['window_size'], stride=FINAL_CONFIG['stride'])
test_loader = make_loader(TensorDataset(torch.from_numpy(X_test_w).float()), FINAL_CONFIG['batch_size'], False, False)

model_config_final = {k: v for k, v in FINAL_CONFIG.items() if k not in ['window_size', 'stride', 'lr', 'batch_size', 'l2_lambda', 'focal_loss_gamma']}
all_fold_probabilities = []

for fold in range(N_SPLITS):
    fold_name = f"{FINAL_EXPERIMENT_NAME}_fold_{fold+1}"
    model_path = f"models/{fold_name}_best_model.pt"
    print(f"Loading model {fold+1}/{N_SPLITS} from {model_path}...")
    model_fold = RecurrentClassifier(**model_config_final, num_classes=N_CLASSES).to(device)
    model_fold.load_state_dict(torch.load(model_path, map_location=device))
    # Using 'eager' backend for inference is fine and avoids potential cudagraphs state issues.
    model_fold = torch.compile(model_fold, backend="eager")
    model_fold.eval()
    
    fold_preds = []
    with torch.no_grad():
        for (inputs,) in test_loader:
            with torch.amp.autocast(device_type=device.type, enabled=(device.type == 'cuda')):
                probs = torch.softmax(model_fold(inputs.to(device)), dim=1)
                fold_preds.append(probs.cpu().numpy())
    all_fold_probabilities.append(np.concatenate(fold_preds))

# --- REVERTED TO NORMAL: Using robust mean aggregation as requested ---
print("\n--- Aggregating predictions using standard Mean Aggregation ---")

# First, get the mean probabilities across all folds for each window
mean_probabilities = np.mean(all_fold_probabilities, axis=0)
df_probs = pd.DataFrame(mean_probabilities, columns=[f"prob_{i}" for i in range(N_CLASSES)])
df_probs['original_index'] = test_window_indices

# Group by the original sample index and calculate the mean of the probabilities for all its windows
agg_probs = df_probs.groupby('original_index')[[f"prob_{i}" for i in range(N_CLASSES)]].mean().values
final_predictions = le.inverse_transform(np.argmax(agg_probs, axis=1))

submission_df = pd.DataFrame({'sample_index': sorted(X_test_long_df['sample_index'].unique()), 'label': final_predictions})
submission_df['sample_index'] = submission_df['sample_index'].apply(lambda x: f"{x:03d}")
submission_filepath = os.path.join("submissions", submission_filename_base)
submission_df.to_csv(submission_filepath, index=False)
print(f"\nSuccessfully saved to {submission_filepath}!")
print(submission_df.head())


--- Preparing test dataset for submission ---
Loading model 1/5 from models/FinalStrategy-GRU_H384_L3_C128_K5_v13_fold_1_best_model.pt...
Loading model 2/5 from models/FinalStrategy-GRU_H384_L3_C128_K5_v13_fold_2_best_model.pt...
Loading model 3/5 from models/FinalStrategy-GRU_H384_L3_C128_K5_v13_fold_3_best_model.pt...
Loading model 4/5 from models/FinalStrategy-GRU_H384_L3_C128_K5_v13_fold_4_best_model.pt...
Loading model 5/5 from models/FinalStrategy-GRU_H384_L3_C128_K5_v13_fold_5_best_model.pt...

--- Aggregating predictions using standard Mean Aggregation ---

Successfully saved to submissions\submission_FinalStrategy-GRU_H384_L3_C128_K5_v13.csv!
  sample_index    label
0          000  no_pain
1          001  no_pain
2          002  no_pain
3          003  no_pain
4          004  no_pain
