## Setup

## my setup

### My imports

In [1]:
import os
import json
from glob import glob
from functools import partial
from datetime import datetime
from itertools import pairwise
from os.path import join, realpath
from typing import Optional, Literal

import torch
import kagglehub
import numpy as np
import pandas as pd
import plotly.express as px
from torch import nn, Tensor
from pandas import DataFrame as DF
from torch.utils.data import TensorDataset
from torch.utils.data import DataLoader as DL
from rich.progress import Progress, Task, track
from torch.optim.lr_scheduler import ConstantLR, LRScheduler
metric_package = kagglehub.package_import('wasupandceacar/cmi-metric', bypass_confirmation=True)

### config

In [2]:
NB_CROSS_VALIDATIONS = 5
TRAINING_EPOCHS = 60
STARTING_LR = 0.0005
BATCH_SIZE = 256
TARGET_NAMES = sorted([
    "Above ear - pull hair",
    "Cheek - pinch skin",
    "Eyebrow - pull hair",
    "Eyelash - pull hair",
    "Feel around in tray and pull out an object",
    "Forehead - pull hairline",
    "Forehead - scratch",
    "Neck - pinch skin",
    "Neck - scratch",
    "Text on phone",
    "Wave hello",
    "Write name in air",
    "Write name on leg",
    "Drink from bottle/cup",
    "Pinch knee/leg skin",
    "Pull air toward your face",
    "Scratch knee/leg skin",
    "Glasses on/off"
])
TARGET_NAMES_NDARRAY = np.asarray(TARGET_NAMES)
MOCK_TRAINING_GAMMA = 1.01
MAX_LR_TO_MIN_DIV_FACTOR = 10

### device setup

In [3]:
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")

### Dataset Setup

In [None]:
dataset_path = kagglehub.dataset_download(
    handle="mauroabidalcarrer/prepocessed-cmi-2025",
)

In [None]:
class CMIDataset(TensorDataset):
    def __init__(
        self,
        parent_dir: str,
        split: Optional[Literal["train", "validation"]]=None,
        subset: Optional[int]=None,
        force_download=False
    ):
        dataset_path = kagglehub.dataset_download(
            handle="mauroabidalcarrer/prepocessed-cmi-2025",
            force_download=force_download
        )
        parent_dir = join(dataset_path, "preprocessed_dataset", parent_dir)
        split = "" if split is None else split + "_"
        x = np.load(join(parent_dir, f"{split}X.npy")).swapaxes(1, 2)
        y = np.load(join(parent_dir, f"{split}Y.npy"))
        if subset is not None:
            x = x[:subset]
            y = y[:subset]
        super().__init__(
            torch.from_numpy(x), 
            torch.from_numpy(y),
        )

In [None]:
meta_data_path = join(
    dataset_path,
    "preprocessed_dataset",
    "full_dataset_meta_data.json"
)
with open(meta_data_path, "r") as fp:
    meta_data = json.load(fp)
# Convert target names into a ndarray to index it batchwise.
# meta_data["target_names"] = np.asarray(meta_data["target_names"])
non_imu_feats_idx = [feat_idx for feat_idx, feat in enumerate(meta_data["feature_cols"]) if feat.startswith(("thm", "tof"))]
non_imu_feats = [feat for feat in meta_data["feature_cols"] if feat.startswith(("thm", "tof"))]
imu_feats_idx = [feat_idx for feat_idx, feat in enumerate(meta_data["feature_cols"]) if not feat.startswith(("thm", "tof"))]
imu_feats = [feat for feat in meta_data["feature_cols"] if not feat.startswith(("thm", "tof"))]

### kaggle notbook Imports

In [None]:
import random
import numpy as np
import torch
import os

def seed_everything(seed=42):
    """Set all random seeds for reproducibility"""
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    os.environ['PYTHONHASHSEED'] = str(seed)
    os.environ['CUBLAS_WORKSPACE_CONFIG'] = ':4096:8'
    torch.use_deterministic_algorithms(True, warn_only=True)

SEED = 42
seed_everything(seed=SEED)

import pandas as pd
import polars as pl
from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import GroupKFold
from sklearn.utils.class_weight import compute_class_weight
import joblib
from tqdm import tqdm

from torch import nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from tensorflow.keras.preprocessing.sequence import pad_sequences as keras_pad_sequences

import kaggle_evaluation.cmi_inference_server
from matplotlib import pyplot as plt

2025-06-14 10:01:01.640826: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1749895261.806557      19 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1749895261.850009      19 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


## **Read data**

In [None]:
print("Loading datasets...")
train_df = pd.read_csv(join(dataset_path, "train.csv"))
train_dem_df = pd.read_csv(join(dataset_path, "train_demographics.csv"))
test_df = pd.read_csv(join(dataset_path, "test.csv"))
test_dem_df = pd.read_csv(join(dataset_path, "test_demographics.csv"))
print(f"Train rows: {len(train_df)}, Test rows: {len(test_df)}")

# Encode labels
label_encoder = LabelEncoder()
train_df['gesture'] = label_encoder.fit_transform(train_df['gesture'].astype(str))
gesture_classes = label_encoder.classes_

bfrb_gestures = [
    'Above ear - pull hair',
    'Forehead - pull hairline',
    'Forehead - scratch',
    'Eyebrow - pull hair',
    'Eyelash - pull hair',
    'Neck - pinch skin',
    'Neck - scratch',
    'Cheek - pinch skin'
]
bfrb_indices = label_encoder.transform(bfrb_gestures)

# imu_cols = ['acc_x', 'acc_y', 'acc_z', 'rot_w', 'rot_x', 'rot_y', 'rot_z']
# tof_thm_cols = [c for c in train_df.columns if c.startswith('thm_') or c.startswith('tof_')]

# # Reorder so that IMU features come first
# feature_cols = imu_cols + tof_thm_cols
# imu_dim = len(imu_cols)
# tof_thm_dim = len(tof_thm_cols)

# print(f"IMU features: {imu_dim}, TOF/Thermal features: {tof_thm_dim}, Total features: {len(feature_cols)}")

# # Check for missing values
# nan_counts = train_df[feature_cols].isna().sum().sum()
# print("Total NaNs in train features:", nan_counts)


# # to remove hand dependency in IMU data
# # im not sure if the rotation is on the x axis but this give me the best CV
# def apply_symmetry(data):
#     transformed = data.copy()
#     transformed['acc_z'] = -transformed['acc_z']
#     transformed['acc_y'] = -transformed['acc_y']
    
#     transformed['rot_w'] = transformed['rot_w']
#     transformed['rot_x'] = transformed['rot_x']
#     transformed['rot_y'] = -transformed['rot_y']
#     transformed['rot_z'] = -transformed['rot_z']
#     return transformed


# train_df = train_df.merge(
#     train_dem_df,
#     on='subject',
#     how='left',
#     validate='many_to_one'
# )

# right_handed_mask = train_df['handedness'] == 1
# train_df.loc[right_handed_mask, imu_cols] = apply_symmetry(train_df.loc[right_handed_mask, imu_cols])

Loading datasets...
Train rows: 574945, Test rows: 107
IMU features: 7, TOF/Thermal features: 325, Total features: 332
Total NaNs in train features: 3597807


### Create kaggle notebook dataset

In [None]:
# sequences = train_df.groupby('sequence_id')
# X_list = []
# lengths = []
# y_list = []

# sequence_info = []
# for i, (seq_id, seq) in enumerate(sequences):
#     seq_data = seq[feature_cols].ffill().bfill().fillna(0).values
#     X_list.append(seq_data)
#     lengths.append(seq_data.shape[0])
#     sequence_info.append({
#         'sequence_id': seq_id,
#         'subject': seq['subject'].iloc[0],
#         'gesture': seq['gesture'].iloc[0]
#     })

# pad_len = int(np.percentile(lengths, 90))
# print(f"Pad/truncate all sequences to length {pad_len} (90th percentile).")

# seq_df = pd.DataFrame(sequence_info)
# X_array = keras_pad_sequences(
#     X_list,
#     maxlen=pad_len,
#     dtype='float32',
#     padding='post',
#     truncating='post'
# )  # shape: (n_samples, pad_len, total_features)

# y_array = seq_df['gesture'].values  # shape: (n_samples,)

# num_classes = len(np.unique(y_array))
# y_array = np.eye(num_classes)[y_array]  # shape: (n_samples, num_classes)

# # Transpose to (n_samples, features, seq_len) for PyTorch
# X_array = np.transpose(X_array, (0, 2, 1))


# class SequenceDataset(Dataset):
#     def __init__(self, X, y=None):
#         """
#         X: np.ndarray of shape (n_samples, features, seq_len)
#         y: np.ndarray of shape (n_samples, num_classes) or None for test
#         """
#         self.X = torch.from_numpy(X).float()
#         self.y = torch.from_numpy(y).float() if y is not None else None

#     def __len__(self):
#         return self.X.size(0)

#     def __getitem__(self, idx):
#         if self.y is not None:
#             return self.X[idx], self.y[idx]
#         else:
#             return self.X[idx]

Pad/truncate all sequences to length 103 (90th percentile).


## Model definition

In [None]:
class ResidualBlock(nn.Module):
    def __init__(self, in_chns:int, out_chns:int):
        super().__init__()
        self.blocks = nn.Sequential(
            nn.Conv1d(in_chns, out_chns, kernel_size=3, padding=1),
            nn.BatchNorm1d(out_chns),
            nn.ReLU(),
            nn.Conv1d(out_chns, out_chns, kernel_size=3, padding=1),
            nn.BatchNorm1d(out_chns),
        )
        if in_chns == out_chns:
            self.skip_connection = nn.Identity() 
        else:
            # TODO: set bias to False ?
            self.skip_connection = nn.Sequential(
                nn.Conv1d(in_chns, out_chns, 1),
                nn.BatchNorm1d(out_chns)
            )

    def forward(self, x:Tensor) -> Tensor:
        activaition_maps = self.skip_connection(x) + self.blocks(x)
        return nn.functional.relu(activaition_maps)

class Resnet(nn.Module):
    def __init__(
            self,
            in_channels:int,
            depth:int,
            # n_res_block_per_depth:int,
            mlp_width:int,
            n_class:int,
        ):
        super().__init__()
        chs_per_depth = [in_channels * 2 ** i for i in range(depth)]
        blocks_chns_it = pairwise(chs_per_depth)
        self.res_blocks = [ResidualBlock(in_chns, out_chns) for in_chns, out_chns in blocks_chns_it]
        self.res_blocks = nn.ModuleList(self.res_blocks)
        self.mlp_head = nn.Sequential(
            nn.LazyLinear(mlp_width),
            nn.ReLU(),
            nn.Linear(mlp_width, n_class),
            nn.Softmax(dim=1),
        )
        
        
    def forward(self, x:Tensor) -> Tensor:
        activation_maps = x
        for res_block in self.res_blocks:
            activation_maps = nn.functional.max_pool1d(res_block(activation_maps), 2)
        out = activation_maps.view(activation_maps.shape[0], -1)
        out = self.mlp_head(out)
        return out

### Create model function

In [None]:
def mk_model() -> nn.Module:
    nb_in_chans = len(meta_data["feature_cols"])
    return (
        Resnet(
            in_channels=nb_in_chans,
            depth=4,
            mlp_width=256,
            n_class=18
        )
        .to(device)
    )

print("input channels:", len(meta_data["feature_cols"]))

input channels: 66


# **Training loop**

In [None]:
seed_everything(seed=SEED)

# criterion = soft_cross_entropy
criterion = torch.nn.CrossEntropyLoss()

n_splits = 5
batch_size = 128
gkf = GroupKFold(n_splits=n_splits)

fold_metrics = []
best_fold_metrics = []
best_models = []

fold_patterns = join(dataset_path, "preprocessed_dataset", "fold*")
fold_pths = glob(fold_patterns)[:NB_CROSS_VALIDATIONS]
all_training_metrics = {}


for fold, fold_pth in enumerate(fold_pths):
    print("training:", fold + 1)
    train_dataset = CMIDataset(fold_pth, "train")
    train_loader = DL(train_dataset, BATCH_SIZE, shuffle=True)
    validation_dataset = CMIDataset(fold_pth, "validation")
    validation_loader = DL(validation_dataset, BATCH_SIZE, shuffle=False)
    print(f"\n{'='*50}")
    print(f"Fold {fold + 1}/{n_splits}")
    # print(f"Train subjects: {len(np.unique(seq_df.iloc[train_idx]['subject']))}")
    # print(f"Val subjects: {len(np.unique(seq_df.iloc[val_idx]['subject']))}")
    # print(f"{'='*50}")
    
    # train_dataset = SequenceDataset(X_array[train_idx], y_array[train_idx])
    # val_dataset = SequenceDataset(X_array[val_idx], y_array[val_idx])
    
    # train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, drop_last=True)
    # validation_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
    
    seed_everything(seed=SEED + fold)
    model = mk_model()
    
    # Optimizer et scheduler
    # optimizer = optim.AdamW(model.parameters(), lr=1e-3, weight_decay=1e-4)
    optimizer = torch.optim.AdamW(model.parameters(), STARTING_LR)
    steps_per_epoch = len(train_loader)
    # scheduler = optim.lr_scheduler.CosineAnnealingWarmRestarts(
    #     optimizer,
    #     T_0=5 * steps_per_epoch,
    #     T_mult=2,
    #     eta_min=1e-5,
    # )
    scheduler = torch.optim.lr_scheduler.OneCycleLR(
        max_lr=0.002 / 2,
        div_factor=MAX_LR_TO_MIN_DIV_FACTOR,
        epochs=TRAINING_EPOCHS,
        steps_per_epoch=len(train_loader),
    )


    # Early stopping
    best_metric = -np.inf
    best_binary_f1 = -np.inf
    best_macro_f1 = -np.inf
    patience = 15
    epochs_no_improve = 0
    
    for epoch in range(1, TRAINING_EPOCHS + 1):
        # Training phase
        model.train()
        train_loss = 0.0
        total = 0
        for batch_x, batch_y in train_loader:
            batch_x = batch_x.to(device)
            batch_y = batch_y.to(device)
    
            # Apply mixup
            # mixed_x, mixed_y = mixup_data(batch_x, batch_y, alpha=0.2)
            mixed_x, mixed_y = batch_x, batch_y #mixup_data(batch_x, batch_y, alpha=0.2)
    
            optimizer.zero_grad()
            outputs = model(mixed_x)
            loss = criterion(outputs, mixed_y)
            loss.backward()
            optimizer.step()
            scheduler.step()
    
            train_loss += loss.item() * batch_x.size(0)
            total += batch_x.size(0)
        train_loss /= total
    
        # Validation phase
        model.eval()
        val_loss = 0.0
        total = 0
        all_true = []
        all_pred = []
        
        with torch.no_grad():
            for batch_x, batch_y in validation_loader:
                batch_x = batch_x.to(device)
                batch_y = batch_y.to(device)
                
                outputs = model(batch_x)
                loss = criterion(outputs, batch_y)
                val_loss += loss.item() * batch_x.size(0)
                total += batch_x.size(0)
                
                # Get predicted class indices
                preds = torch.argmax(outputs, dim=1).cpu().numpy()
                # Get true class indices from one-hot
                trues = torch.argmax(batch_y, dim=1).cpu().numpy()
                
                all_true.append(trues)
                all_pred.append(preds)
        
        val_loss /= total
        all_true = np.concatenate(all_true)
        all_pred = np.concatenate(all_pred)
        
        # Compute competition metrics
        # Binary classification: BFRB (1) vs non-BFRB (0)
        binary_true = np.isin(all_true, bfrb_indices).astype(int)
        binary_pred = np.isin(all_pred, bfrb_indices).astype(int)
        binary_f1 = f1_score(binary_true, binary_pred)
        
        # Collapse non-BFRB gestures into a single class
        collapsed_true = np.where(
            np.isin(all_true, bfrb_indices),
            all_true,
            len(bfrb_gestures)  # Single non-BFRB class
        )
        collapsed_pred = np.where(
            np.isin(all_pred, bfrb_indices),
            all_pred,
            len(bfrb_gestures)  # Single non-BFRB class
        )
        
        # Macro F1 on collapsed classes
        macro_f1 = f1_score(collapsed_true, collapsed_pred, average='macro')
        final_metric = (binary_f1 + macro_f1) / 2
        
        print(f"Epoch {epoch:02d}: Train Loss = {train_loss:.4f}, Val Loss = {val_loss:.4f}")
        print(f"  Binary F1 = {binary_f1:.4f}, Macro F1 = {macro_f1:.4f}, Final Metric = {final_metric:.4f}")
        
        if final_metric > best_metric:
            best_metric = final_metric
            best_binary_f1 = binary_f1
            best_macro_f1 = macro_f1
            epochs_no_improve = 0
            best_model_state = model.state_dict()
            print(f"  New best metric! Saving model...")
        else:
            epochs_no_improve += 1
            if epochs_no_improve >= patience:
                print(f"Early stopping triggered at epoch {epoch}")
                model.load_state_dict(best_model_state)
                break

    torch.save(best_model_state, f"best_model_fold{fold}.pth")
    best_models.append(best_model_state)

    fold_metrics.append({
        'binary_f1': binary_f1,
        'macro_f1': macro_f1,
        'final_metric': final_metric
    })
    
    best_fold_metrics.append({
        'binary_f1': best_binary_f1,
        'macro_f1': best_macro_f1,
        'final_metric': best_metric
    })
    
    print(f"\nFold {fold + 1} completed.")
    print(f"Final validation metrics - Binary F1: {binary_f1:.4f}, Macro F1: {macro_f1:.4f}, Final: {final_metric:.4f}")
    print(f"Best validation metrics - Binary F1: {best_binary_f1:.4f}, Macro F1: {best_macro_f1:.4f}, Final: {best_metric:.4f}")

print("\n" + "="*50)
print("Cross-Validation Results")
print("="*50)

# Statistiques pour les meilleures métriques
best_binary_f1 = [m['binary_f1'] for m in best_fold_metrics]
best_macro_f1 = [m['macro_f1'] for m in best_fold_metrics]
best_metrics = [m['final_metric'] for m in best_fold_metrics]

print("\nBest Fold-wise Metrics:")
for i, (bf1, mf1, fm) in enumerate(zip(best_binary_f1, best_macro_f1, best_metrics)):
    print(f"Fold {i+1}: Binary F1 = {bf1:.4f}, Macro F1 = {mf1:.4f}, Final = {fm:.4f}")

print("\nGlobal Statistics (Best Metrics):")
print(f"Mean Best Final Metric: {np.mean(best_metrics):.4f} ± {np.std(best_metrics):.4f}")
print(f"Mean Best Binary F1: {np.mean(best_binary_f1):.4f} ± {np.std(best_binary_f1):.4f}")
print(f"Mean Best Macro F1: {np.mean(best_macro_f1):.4f} ± {np.std(best_macro_f1):.4f}")


Fold 1/5
Train subjects: 65
Val subjects: 16
Epoch 01: Train Loss = 2.5398, Val Loss = 2.2454
  Binary F1 = 0.8421, Macro F1 = 0.2179, Final Metric = 0.5300
  New best metric! Saving model...
Epoch 02: Train Loss = 2.1876, Val Loss = 1.9619
  Binary F1 = 0.9034, Macro F1 = 0.2610, Final Metric = 0.5822
  New best metric! Saving model...
Epoch 03: Train Loss = 2.0925, Val Loss = 1.8313
  Binary F1 = 0.9156, Macro F1 = 0.2893, Final Metric = 0.6024
  New best metric! Saving model...
Epoch 04: Train Loss = 1.9823, Val Loss = 1.7746
  Binary F1 = 0.9256, Macro F1 = 0.3012, Final Metric = 0.6134
  New best metric! Saving model...
Epoch 05: Train Loss = 1.8570, Val Loss = 1.7468
  Binary F1 = 0.9249, Macro F1 = 0.3054, Final Metric = 0.6152
  New best metric! Saving model...
Epoch 06: Train Loss = 1.9488, Val Loss = 1.7515
  Binary F1 = 0.9189, Macro F1 = 0.2939, Final Metric = 0.6064
Epoch 07: Train Loss = 1.8011, Val Loss = 1.6515
  Binary F1 = 0.9432, Macro F1 = 0.3307, Final Metric = 0.

# Reloading best model

In [7]:
model_ensemble = []
for fold in range(5):
    model = IMU_HARModel(
        total_features=len(feature_cols),
        imu_dim=imu_dim,
        pad_len=pad_len,
        num_classes=num_classes,
    ).to(device)
    checkpoint = torch.load(f"best_model_fold{fold}.pth", map_location=device)
    model.load_state_dict(checkpoint)
    model.eval()
    model_ensemble.append(model)

# **Submission**

In [8]:
def preprocess_sequence(df_seq: pd.DataFrame):
    """
    Process a single sequence DataFrame (pandas):
    - Forward/backward fill missing
    - Scale using loaded scaler
    - Pad/truncate to pad_len
    - Return torch.Tensor of shape (1, features, seq_len)
    """
    data = df_seq[feature_cols].ffill().bfill().fillna(0).values
    # Pad/truncate
    padded = keras_pad_sequences(
        [data],
        maxlen=pad_len,
        dtype='float32',
        padding='post',
        truncating='post'
    )[0]  # (pad_len, total_features)
    # Transpose to (features, pad_len)
    tensor = torch.from_numpy(padded.T).unsqueeze(0).float()  # (1, features, pad_len)
    return tensor
    
def predict(sequence: pl.DataFrame, demographics: pl.DataFrame) -> str:
    """
    Kaggle evaluation API will call this for each sequence.
    sequence: polars DataFrame for a single sequence
    demographics: unused in this model
    Returns: predicted gesture string
    """
    df_seq = sequence.to_pandas()
    df_demo = demographics.to_pandas()
    
    df_seq = df_seq.merge(
    df_demo,
    on='subject',
    how='left',
    validate='many_to_one',
    )
    right_handed_mask = df_seq['handedness'] == 1
    df_seq.loc[right_handed_mask, imu_cols] = apply_symmetry(df_seq.loc[right_handed_mask, imu_cols])

    x_tensor = preprocess_sequence(df_seq).to(device)
    
    all_outputs = []
    with torch.no_grad():
        for model in model_ensemble:
            outputs = model(x_tensor).softmax(dim=-1)
            all_outputs.append(outputs)

    avg_outputs = torch.mean(torch.stack(all_outputs), dim=0)
    pred_idx = torch.argmax(avg_outputs, dim=1).item()
    
    return str(gesture_classes[pred_idx])

In [9]:
inference_server = kaggle_evaluation.cmi_inference_server.CMIInferenceServer(predict)

if os.getenv('KAGGLE_IS_COMPETITION_RERUN'):
    inference_server.serve()
else:
    inference_server.run_local_gateway(
        data_paths=(
            '/kaggle/input/cmi-detect-behavior-with-sensor-data/test.csv',
            '/kaggle/input/cmi-detect-behavior-with-sensor-data/test_demographics.csv',
        )
    )

Credit to [Tarun Mishra](https://www.kaggle.com/tarundirector) – this code is heavily based on his [notebook](https://www.kaggle.com/code/tarundirector/sensor-pulse-viz-eda-for-bfrb-detection?scriptVersionId=243465321).