# Training & inference notebook
Credit to [Tarun Mishra](https://www.kaggle.com/tarundirector) – this code is heavily based on his [notebook](https://www.kaggle.com/code/tarundirector/sensor-pulse-viz-eda-for-bfrb-detection?scriptVersionId=243465321).

## Setup

## my setup

### My imports

#### Training imports

In [14]:
import os
import json
from glob import glob
from functools import partial
from datetime import datetime
from itertools import pairwise
from os.path import join, realpath
from typing import Optional, Literal

import torch
import kagglehub
import numpy as np
import pandas as pd
import plotly.express as px
from torch import nn, Tensor
from pandas import DataFrame as DF
from torch.utils.data import TensorDataset
from torch.utils.data import DataLoader as DL
from rich.progress import Progress, Task, track
from torch.optim.lr_scheduler import ConstantLR, LRScheduler
metric_package = kagglehub.package_import('wasupandceacar/cmi-metric', bypass_confirmation=True)

#### inference imports

In [15]:
import os
import json
import warnings
from os.path import join
from tqdm.notebook import tqdm
from itertools import pairwise, product

import torch
import numpy as np
import pandas as pd
import polars as pl
from numpy import ndarray
from torch import nn, Tensor
from numpy.linalg import norm
from pandas import DataFrame as DF
from scipy.spatial.transform import Rotation
# from kagglehub import competition_download, dataset_download, model_download
import kagglehub
metric_package = kagglehub.package_import('wasupandceacar/cmi-metric', bypass_confirmation=True)

import kaggle_evaluation.cmi_inference_server

### config

In [16]:
NB_CROSS_VALIDATIONS = 5
TRAINING_EPOCHS = 60
STARTING_LR = 0.0005
BATCH_SIZE = 256
TARGET_NAMES = sorted([
    "Above ear - pull hair",
    "Cheek - pinch skin",
    "Eyebrow - pull hair",
    "Eyelash - pull hair",
    "Feel around in tray and pull out an object",
    "Forehead - pull hairline",
    "Forehead - scratch",
    "Neck - pinch skin",
    "Neck - scratch",
    "Text on phone",
    "Wave hello",
    "Write name in air",
    "Write name on leg",
    "Drink from bottle/cup",
    "Pinch knee/leg skin",
    "Pull air toward your face",
    "Scratch knee/leg skin",
    "Glasses on/off"
])
TARGET_NAMES_NDARRAY = np.asarray(TARGET_NAMES)
MOCK_TRAINING_GAMMA = 1.01
MAX_LR_TO_MIN_DIV_FACTOR = 10

COMPETITION_HANDLE = "cmi-detect-behavior-with-sensor-data"

#### Inference config 

In [17]:
QUATERNION_COLS = ['rot_w', 'rot_x', 'rot_y', 'rot_z']
GRAVITY_WORLD = np.array([0, 0, 9.81], "float32")
RAW_ACCELRATION_COLS = ["acc_x", "acc_y", "acc_z"]
LINEAR_ACC_COLS = ["gravity_free_" + col for col in RAW_ACCELRATION_COLS]
COMPETITION_HANDLE = "cmi-detect-behavior-with-sensor-data"
CATEGORY_COLUMNS = [
    'row_id',
    'sequence_type',
    'sequence_id',
    'subject',
    'orientation',
    'behavior',
    'phase',
    'gesture',
]
META_DATA_COLUMNS = [
    'row_id',
    'sequence_type',
    'sequence_id',
    'sequence_counter',
    'subject',
    'orientation',
    'behavior',
    'phase',
    'gesture',
]
DATASET_DF_DTYPES = {
    "acc_x": "float32", "acc_y": "float32", "acc_z": "float32",
    "thm_1":"float32", "thm_2":"float32", "thm_3":"float32", "thm_4":"float32", "thm_5":"float32",
    "sequence_counter": "int32",
    **{col: "category" for col in CATEGORY_COLUMNS},
    **{f"tof_{i_1}_v{i_2}": "float32" for i_1, i_2 in product(range(1, 5), range(64))},
}
PREPROCESSED_DATASET_HANDLE = "mauroabidalcarrer/prepocessed-cmi-2025"
# The quantile of the sequences len used to pad/truncate during preprocessing
SEQUENCE_NORMED_LEN_QUANTILE = 0.95
# SAMPLING_FREQUENCY = 10 #Hz
N_FOLDS = 5
VALIDATION_FRACTION = 0.2
TARGET_NAMES = sorted([
    "Above ear - pull hair",
    "Cheek - pinch skin",
    "Eyebrow - pull hair",
    "Eyelash - pull hair",
    "Feel around in tray and pull out an object",
    "Forehead - pull hairline",
    "Forehead - scratch",
    "Neck - pinch skin",
    "Neck - scratch",
    "Text on phone",
    "Wave hello",
    "Write name in air",
    "Write name on leg",
    "Drink from bottle/cup",
    "Pinch knee/leg skin",
    "Pull air toward your face",
    "Scratch knee/leg skin",
    "Glasses on/off"
])
EPSILON=1e-8
DELTA_ROTATION_ANGULAR_VELOCITY_COLS = ["angular_vel_x", "angular_vel_y", "angular_vel_z"]
DELTA_ROTATION_AXES_COLS = ["rotation_axis_x", "rotation_axis_y", "rotation_axis_z"]
EULER_ANGLES_COLS = ["euler_x", "euler_y", "euler_z"]

### Define function to get the feature columns
Feature columns change over time so it's better to have a function to get them than manually update a variable every time we add/remove features.

In [18]:
def get_feature_cols(df:DF) -> list[str]:
    return sorted(list(set(df.columns) - set(META_DATA_COLUMNS) - set(TARGET_NAMES)))

### Supress performance warngings

In [19]:
warnings.filterwarnings(
    "ignore",
    message=(
        "DataFrame is highly fragmented.  This is usually the result of "
        "calling `frame.insert` many times.*"
    ),
    category=pd.errors.PerformanceWarning,
)

### device setup

In [20]:
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")

### Dataset Setup

In [21]:
dataset_path = kagglehub.dataset_download(
    handle="mauroabidalcarrer/prepocessed-cmi-2025",
)

In [22]:
class CMIDataset(TensorDataset):
    def __init__(
        self,
        parent_dir: str,
        split: Optional[Literal["train", "validation"]]=None,
        subset: Optional[int]=None,
        force_download=False
    ):
        dataset_path = kagglehub.dataset_download(
            handle="mauroabidalcarrer/prepocessed-cmi-2025",
            force_download=force_download
        )
        parent_dir = join(dataset_path, "preprocessed_dataset", parent_dir)
        split = "" if split is None else split + "_"
        x = np.load(join(parent_dir, f"{split}X.npy")).swapaxes(1, 2)
        y = np.load(join(parent_dir, f"{split}Y.npy"))
        if subset is not None:
            x = x[:subset]
            y = y[:subset]
        super().__init__(
            torch.from_numpy(x), 
            torch.from_numpy(y),
        )

In [23]:
meta_data_path = join(
    dataset_path,
    "preprocessed_dataset",
    "full_dataset_meta_data.json"
)
with open(meta_data_path, "r") as fp:
    meta_data = json.load(fp)
# Convert target names into a ndarray to index it batchwise.
# meta_data["target_names"] = np.asarray(meta_data["target_names"])
non_imu_feats_idx = [feat_idx for feat_idx, feat in enumerate(meta_data["feature_cols"]) if feat.startswith(("thm", "tof"))]
non_imu_feats = [feat for feat in meta_data["feature_cols"] if feat.startswith(("thm", "tof"))]
imu_feats_idx = [feat_idx for feat_idx, feat in enumerate(meta_data["feature_cols"]) if not feat.startswith(("thm", "tof"))]
imu_feats = [feat for feat in meta_data["feature_cols"] if not feat.startswith(("thm", "tof"))]

### kaggle notbook Imports

In [24]:
import random
import numpy as np
import torch
import os

def seed_everything(seed=42):
    """Set all random seeds for reproducibility"""
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    os.environ['PYTHONHASHSEED'] = str(seed)
    os.environ['CUBLAS_WORKSPACE_CONFIG'] = ':4096:8'
    torch.use_deterministic_algorithms(True, warn_only=True)

SEED = 42
seed_everything(seed=SEED)

import pandas as pd
import polars as pl
from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import GroupKFold
from sklearn.utils.class_weight import compute_class_weight
import joblib
from tqdm import tqdm

from torch import nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
# from tensorflow.keras.preprocessing.sequence import pad_sequences as keras_pad_sequences

import kaggle_evaluation.cmi_inference_server
from matplotlib import pyplot as plt

## Read data

In [25]:
print("Loading datasets...")
competition_dataset_path = kagglehub.competition_download(COMPETITION_HANDLE)
train_df = pd.read_csv(join(competition_dataset_path, "train.csv"))
train_dem_df = pd.read_csv(join(competition_dataset_path, "train_demographics.csv"))
test_df = pd.read_csv(join(competition_dataset_path, "test.csv"))
test_dem_df = pd.read_csv(join(competition_dataset_path, "test_demographics.csv"))
print(f"Train rows: {len(train_df)}, Test rows: {len(test_df)}")

# Encode labels
label_encoder = LabelEncoder()
train_df['gesture'] = label_encoder.fit_transform(train_df['gesture'].astype(str))
gesture_classes = label_encoder.classes_

bfrb_gestures = [
    'Above ear - pull hair',
    'Forehead - pull hairline',
    'Forehead - scratch',
    'Eyebrow - pull hair',
    'Eyelash - pull hair',
    'Neck - pinch skin',
    'Neck - scratch',
    'Cheek - pinch skin'
]
bfrb_indices = label_encoder.transform(bfrb_gestures)

# imu_cols = ['acc_x', 'acc_y', 'acc_z', 'rot_w', 'rot_x', 'rot_y', 'rot_z']
# tof_thm_cols = [c for c in train_df.columns if c.startswith('thm_') or c.startswith('tof_')]

# # Reorder so that IMU features come first
# feature_cols = imu_cols + tof_thm_cols
# imu_dim = len(imu_cols)
# tof_thm_dim = len(tof_thm_cols)

# print(f"IMU features: {imu_dim}, TOF/Thermal features: {tof_thm_dim}, Total features: {len(feature_cols)}")

# # Check for missing values
# nan_counts = train_df[feature_cols].isna().sum().sum()
# print("Total NaNs in train features:", nan_counts)


# # to remove hand dependency in IMU data
# # im not sure if the rotation is on the x axis but this give me the best CV
# def apply_symmetry(data):
#     transformed = data.copy()
#     transformed['acc_z'] = -transformed['acc_z']
#     transformed['acc_y'] = -transformed['acc_y']
    
#     transformed['rot_w'] = transformed['rot_w']
#     transformed['rot_x'] = transformed['rot_x']
#     transformed['rot_y'] = -transformed['rot_y']
#     transformed['rot_z'] = -transformed['rot_z']
#     return transformed


# train_df = train_df.merge(
#     train_dem_df,
#     on='subject',
#     how='left',
#     validate='many_to_one'
# )

# right_handed_mask = train_df['handedness'] == 1
# train_df.loc[right_handed_mask, imu_cols] = apply_symmetry(train_df.loc[right_handed_mask, imu_cols])

Loading datasets...
Train rows: 574945, Test rows: 107


### Create kaggle notebook dataset

In [26]:
# sequences = train_df.groupby('sequence_id')
# X_list = []
# lengths = []
# y_list = []

# sequence_info = []
# for i, (seq_id, seq) in enumerate(sequences):
#     seq_data = seq[feature_cols].ffill().bfill().fillna(0).values
#     X_list.append(seq_data)
#     lengths.append(seq_data.shape[0])
#     sequence_info.append({
#         'sequence_id': seq_id,
#         'subject': seq['subject'].iloc[0],
#         'gesture': seq['gesture'].iloc[0]
#     })

# pad_len = int(np.percentile(lengths, 90))
# print(f"Pad/truncate all sequences to length {pad_len} (90th percentile).")

# seq_df = pd.DataFrame(sequence_info)
# X_array = keras_pad_sequences(
#     X_list,
#     maxlen=pad_len,
#     dtype='float32',
#     padding='post',
#     truncating='post'
# )  # shape: (n_samples, pad_len, total_features)

# y_array = seq_df['gesture'].values  # shape: (n_samples,)

# num_classes = len(np.unique(y_array))
# y_array = np.eye(num_classes)[y_array]  # shape: (n_samples, num_classes)

# # Transpose to (n_samples, features, seq_len) for PyTorch
# X_array = np.transpose(X_array, (0, 2, 1))


# class SequenceDataset(Dataset):
#     def __init__(self, X, y=None):
#         """
#         X: np.ndarray of shape (n_samples, features, seq_len)
#         y: np.ndarray of shape (n_samples, num_classes) or None for test
#         """
#         self.X = torch.from_numpy(X).float()
#         self.y = torch.from_numpy(y).float() if y is not None else None

#     def __len__(self):
#         return self.X.size(0)

#     def __getitem__(self, idx):
#         if self.y is not None:
#             return self.X[idx], self.y[idx]
#         else:
#             return self.X[idx]

## Model definition

In [None]:
class ResidualBlock(nn.Module):
    def __init__(self, in_chns:int, out_chns:int):
        super().__init__()
        self.blocks = nn.Sequential(
            nn.Conv1d(in_chns, out_chns, kernel_size=3, padding=1),
            nn.BatchNorm1d(out_chns),
            nn.ReLU(),
            nn.Conv1d(out_chns, out_chns, kernel_size=3, padding=1),
            nn.BatchNorm1d(out_chns),
        )
        if in_chns == out_chns:
            self.skip_connection = nn.Identity() 
        else:
            # TODO: set bias to False ?
            self.skip_connection = nn.Sequential(
                nn.Conv1d(in_chns, out_chns, 1),
                nn.BatchNorm1d(out_chns)
            )

    def forward(self, x:Tensor) -> Tensor:
        activaition_maps = self.skip_connection(x) + self.blocks(x)
        return nn.functional.relu(activaition_maps)

class Resnet(nn.Module):
    def __init__(
            self,
            in_channels:int,
            depth:int,
            # n_res_block_per_depth:int,
            mlp_width:int,
            n_class:int,
        ):
        super().__init__()
        chs_per_depth = [in_channels * 2 ** i for i in range(depth)]
        blocks_chns_it = pairwise(chs_per_depth)
        self.res_blocks = [ResidualBlock(in_chns, out_chns) for in_chns, out_chns in blocks_chns_it]
        self.res_blocks = nn.ModuleList(self.res_blocks)
        self.mlp_head = nn.Sequential(
            nn.LazyLinear(mlp_width),
            nn.ReLU(),
            nn.Linear(mlp_width, n_class),
            nn.Softmax(dim=1),
        )
        
        
    def forward(self, x:Tensor) -> Tensor:
        activation_maps = x
        for res_block in self.res_blocks:
            activation_maps = nn.functional.max_pool1d(res_block(activation_maps), 2)
        # print("res block output shape:", activation_maps.shape)
        out = activation_maps.view(activation_maps.shape[0], -1)
        # print("flatten mlp head input shape:", out.shape)
        out = self.mlp_head(out)
        return out

### Create model function

In [28]:
def mk_model() -> nn.Module:
    nb_in_chans = len(meta_data["feature_cols"])
    return (
        Resnet(
            in_channels=nb_in_chans,
            depth=4,
            mlp_width=256,
            n_class=18
        )
        .to(device)
    )

print("input channels:", len(meta_data["feature_cols"]))

input channels: 66


## Training loop

In [29]:
seed_everything(seed=SEED)

# criterion = soft_cross_entropy
criterion = torch.nn.CrossEntropyLoss()

n_splits = 5
batch_size = 128
gkf = GroupKFold(n_splits=n_splits)

fold_metrics = []
best_fold_metrics = []
best_models = []

fold_patterns = join(dataset_path, "preprocessed_dataset", "fold*")
fold_pths = glob(fold_patterns)[:NB_CROSS_VALIDATIONS]
all_training_metrics = {}


for fold, fold_pth in enumerate(fold_pths):
    print("training:", fold + 1)
    train_dataset = CMIDataset(fold_pth, "train")
    train_loader = DL(train_dataset, BATCH_SIZE, shuffle=True)
    validation_dataset = CMIDataset(fold_pth, "validation")
    validation_loader = DL(validation_dataset, BATCH_SIZE, shuffle=False)
    print(f"\n{'='*50}")
    print(f"Fold {fold + 1}/{n_splits}")
    # print(f"Train subjects: {len(np.unique(seq_df.iloc[train_idx]['subject']))}")
    # print(f"Val subjects: {len(np.unique(seq_df.iloc[val_idx]['subject']))}")
    # print(f"{'='*50}")
    
    # train_dataset = SequenceDataset(X_array[train_idx], y_array[train_idx])
    # val_dataset = SequenceDataset(X_array[val_idx], y_array[val_idx])
    
    # train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, drop_last=True)
    # validation_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
    
    seed_everything(seed=SEED + fold)
    model = mk_model()
    
    # Optimizer et scheduler
    # optimizer = optim.AdamW(model.parameters(), lr=1e-3, weight_decay=1e-4)
    optimizer = torch.optim.AdamW(model.parameters(), STARTING_LR)
    steps_per_epoch = len(train_loader)
    # scheduler = optim.lr_scheduler.CosineAnnealingWarmRestarts(
    #     optimizer,
    #     T_0=5 * steps_per_epoch,
    #     T_mult=2,
    #     eta_min=1e-5,
    # )
    scheduler = torch.optim.lr_scheduler.OneCycleLR(
        optimizer,
        max_lr=0.002 / 2,
        div_factor=MAX_LR_TO_MIN_DIV_FACTOR,
        epochs=TRAINING_EPOCHS,
        steps_per_epoch=len(train_loader),
    )


    # Early stopping
    best_metric = -np.inf
    best_binary_f1 = -np.inf
    best_macro_f1 = -np.inf
    patience = 15
    epochs_no_improve = 0
    
    for epoch in range(1, TRAINING_EPOCHS + 1):
        # Training phase
        model.train()
        train_loss = 0.0
        total = 0
        for batch_x, batch_y in train_loader:
            batch_x = batch_x.to(device)
            batch_y = batch_y.to(device)
    
            # Apply mixup
            # mixed_x, mixed_y = mixup_data(batch_x, batch_y, alpha=0.2)
            mixed_x, mixed_y = batch_x, batch_y #mixup_data(batch_x, batch_y, alpha=0.2)
    
            optimizer.zero_grad()
            outputs = model(mixed_x)
            loss = criterion(outputs, mixed_y)
            loss.backward()
            optimizer.step()
            scheduler.step()
    
            train_loss += loss.item() * batch_x.size(0)
            total += batch_x.size(0)
        train_loss /= total
    
        # Validation phase
        model.eval()
        val_loss = 0.0
        total = 0
        all_true = []
        all_pred = []
        
        with torch.no_grad():
            for batch_x, batch_y in validation_loader:
                batch_x = batch_x.to(device)
                batch_y = batch_y.to(device)
                
                outputs = model(batch_x)
                loss = criterion(outputs, batch_y)
                val_loss += loss.item() * batch_x.size(0)
                total += batch_x.size(0)
                
                # Get predicted class indices
                preds = torch.argmax(outputs, dim=1).cpu().numpy()
                # Get true class indices from one-hot
                trues = torch.argmax(batch_y, dim=1).cpu().numpy()
                
                all_true.append(trues)
                all_pred.append(preds)
        
        val_loss /= total
        all_true = np.concatenate(all_true)
        all_pred = np.concatenate(all_pred)
        
        # Compute competition metrics
        # Binary classification: BFRB (1) vs non-BFRB (0)
        binary_true = np.isin(all_true, bfrb_indices).astype(int)
        binary_pred = np.isin(all_pred, bfrb_indices).astype(int)
        binary_f1 = f1_score(binary_true, binary_pred)
        
        # Collapse non-BFRB gestures into a single class
        collapsed_true = np.where(
            np.isin(all_true, bfrb_indices),
            all_true,
            len(bfrb_gestures)  # Single non-BFRB class
        )
        collapsed_pred = np.where(
            np.isin(all_pred, bfrb_indices),
            all_pred,
            len(bfrb_gestures)  # Single non-BFRB class
        )
        
        # Macro F1 on collapsed classes
        macro_f1 = f1_score(collapsed_true, collapsed_pred, average='macro')
        final_metric = (binary_f1 + macro_f1) / 2
        
        print(f"Epoch {epoch:02d}: Train Loss = {train_loss:.4f}, Val Loss = {val_loss:.4f}")
        print(f"  Binary F1 = {binary_f1:.4f}, Macro F1 = {macro_f1:.4f}, Final Metric = {final_metric:.4f}")
        
        if final_metric > best_metric:
            best_metric = final_metric
            best_binary_f1 = binary_f1
            best_macro_f1 = macro_f1
            epochs_no_improve = 0
            best_model_state = model.state_dict()
            print(f"  New best metric! Saving model...")
        else:
            epochs_no_improve += 1
            if epochs_no_improve >= patience:
                print(f"Early stopping triggered at epoch {epoch}")
                model.load_state_dict(best_model_state)
                break

    torch.save(best_model_state, f"best_model_fold{fold}.pth")
    best_models.append(best_model_state)

    fold_metrics.append({
        'binary_f1': binary_f1,
        'macro_f1': macro_f1,
        'final_metric': final_metric
    })
    
    best_fold_metrics.append({
        'binary_f1': best_binary_f1,
        'macro_f1': best_macro_f1,
        'final_metric': best_metric
    })
    
    print(f"\nFold {fold + 1} completed.")
    print(f"Final validation metrics - Binary F1: {binary_f1:.4f}, Macro F1: {macro_f1:.4f}, Final: {final_metric:.4f}")
    print(f"Best validation metrics - Binary F1: {best_binary_f1:.4f}, Macro F1: {best_macro_f1:.4f}, Final: {best_metric:.4f}")

print("\n" + "="*50)
print("Cross-Validation Results")
print("="*50)

# Statistiques pour les meilleures métriques
best_binary_f1 = [m['binary_f1'] for m in best_fold_metrics]
best_macro_f1 = [m['macro_f1'] for m in best_fold_metrics]
best_metrics = [m['final_metric'] for m in best_fold_metrics]

print("\nBest Fold-wise Metrics:")
for i, (bf1, mf1, fm) in enumerate(zip(best_binary_f1, best_macro_f1, best_metrics)):
    print(f"Fold {i+1}: Binary F1 = {bf1:.4f}, Macro F1 = {mf1:.4f}, Final = {fm:.4f}")

print("\nGlobal Statistics (Best Metrics):")
print(f"Mean Best Final Metric: {np.mean(best_metrics):.4f} ± {np.std(best_metrics):.4f}")
print(f"Mean Best Binary F1: {np.mean(best_binary_f1):.4f} ± {np.std(best_binary_f1):.4f}")
print(f"Mean Best Macro F1: {np.mean(best_macro_f1):.4f} ± {np.std(best_macro_f1):.4f}")

training: 1

Fold 1/5
Epoch 01: Train Loss = 2.7731, Val Loss = 2.7456
  Binary F1 = 0.8111, Macro F1 = 0.2203, Final Metric = 0.5157
  New best metric! Saving model...
Epoch 02: Train Loss = 2.6407, Val Loss = 2.6868
  Binary F1 = 0.8808, Macro F1 = 0.2593, Final Metric = 0.5700
  New best metric! Saving model...
Epoch 03: Train Loss = 2.5722, Val Loss = 2.6292
  Binary F1 = 0.8989, Macro F1 = 0.3084, Final Metric = 0.6036
  New best metric! Saving model...
Epoch 04: Train Loss = 2.5242, Val Loss = 2.6466
  Binary F1 = 0.8924, Macro F1 = 0.2771, Final Metric = 0.5848
Epoch 05: Train Loss = 2.4938, Val Loss = 2.6073
  Binary F1 = 0.8509, Macro F1 = 0.3249, Final Metric = 0.5879
Epoch 06: Train Loss = 2.4598, Val Loss = 2.6066
  Binary F1 = 0.8776, Macro F1 = 0.2842, Final Metric = 0.5809
Epoch 07: Train Loss = 2.4381, Val Loss = 2.5973
  Binary F1 = 0.8833, Macro F1 = 0.3116, Final Metric = 0.5974
Epoch 08: Train Loss = 2.4205, Val Loss = 2.5958
  Binary F1 = 0.8980, Macro F1 = 0.2983,

## Reloading best model

In [44]:
model_ensemble = []
for fold in range(5):
    model = mk_model().to(device)
    checkpoint = torch.load(f"best_model_fold{fold}.pth", map_location=device)
    model.load_state_dict(checkpoint)
    model.eval()
    model_ensemble.append(model)

  checkpoint = torch.load(f"best_model_fold{fold}.pth", map_location=device)


## Submission

### Define preprocessing function

In [31]:
def get_fillna_val_per_feature_col(df:DF) -> dict:
    return {col: 1.0 if col == 'rot_w' else 0 for col in get_feature_cols(df)}

def imputed_features(df:DF) -> DF:
    # Missing ToF values are already imputed by -1 which is inconvinient since we want all missing values to be NaN.    
    # So we replace them by NaN and then perform imputing.  
    tof_vals_to_nan = {col: -1.0 for col in df.columns if col.startswith("tof")}

    df[get_feature_cols(df)] = (
        df
        .loc[:, get_feature_cols(df)]
        # df.replace with np.nan sets dtype to floar64 so we set it back to float32
        .replace(tof_vals_to_nan, value=np.nan)
        .astype("float32")
        .groupby(df["sequence_id"], observed=True, as_index=False)
        .ffill()
        .groupby(df["sequence_id"], observed=True, as_index=False)
        .bfill()
        # In case there are only nan in the column in the sequence
        .fillna(get_fillna_val_per_feature_col(df))
    )
    return df

def norm_quat_rotations(df:DF) -> DF:
    df[QUATERNION_COLS] /= np.linalg.norm(df[QUATERNION_COLS], axis=1, keepdims=True)
    return df

def add_linear_acc_cols(df:DF) -> DF:
    # Vectorized version of https://www.kaggle.com/code/wasupandceacar/lb-0-82-5fold-single-bert-model#Dataset `remove_gravity_from_acc`
    rotations:Rotation = Rotation.from_quat(df[QUATERNION_COLS])
    gravity_sensor_frame = rotations.apply(GRAVITY_WORLD, inverse=True).astype("float32")
    df[LINEAR_ACC_COLS] = df[RAW_ACCELRATION_COLS] - gravity_sensor_frame
    return df

def add_acc_magnitude(df:DF, acc_cols:list[str], acc_mag_col_name:str) -> DF:
    return df.assign(**{acc_mag_col_name: np.linalg.norm(df.loc[:, acc_cols], axis=1)})

def add_quat_angle_mag(df:DF) -> DF:
    return df.assign(quat_rot_mag=np.arccos(df["rot_w"]) * 2)

def add_angular_velocity_features(df:DF) -> DF:
    rotations = Rotation.from_quat(df[QUATERNION_COLS])
    delta_rotations = rotations[1:] * rotations[:-1].inv()
    delta_rot_velocity = delta_rotations.as_rotvec()
    # Add extra line to avoid shape mismatch
    delta_rot_velocity = np.vstack((np.zeros((1, 3)), delta_rot_velocity))
    delta_rot_magnitude = norm(delta_rot_velocity, axis=1, keepdims=True)
    delta_rot_axes = delta_rot_velocity / (delta_rot_magnitude + EPSILON)
    df[DELTA_ROTATION_ANGULAR_VELOCITY_COLS] = delta_rot_velocity
    df[DELTA_ROTATION_AXES_COLS] = delta_rot_axes
    df["delta_rot_mag"] = delta_rot_magnitude.squeeze()

    return df

def rot_euler_angles(df:DF) -> ndarray:
    df[EULER_ANGLES_COLS] = (
        Rotation
        .from_quat(df[QUATERNION_COLS])
        .as_euler("xyz")
        .squeeze()
    )
    return df

def agg_tof_cols_per_sensor(df:DF) -> DF:
    for tof_idx in range(1, 6):
        tof_name = f"tof_{tof_idx}"
        tof_cols = [f"{tof_name}_v{v_idx}" for v_idx in range(64)]
        if any(map(lambda col: col not in df.columns, tof_cols)):
            print(f"Some (or) all ToF {tof_idx} columns are not in the df. Maybe you already ran this cell?")
            continue
        df = (
            df
            # Need to use a dict otherwise the name of the col will be "tof_preffix" instead of the value it contains
            .assign(**{tof_name:df[tof_cols].mean(axis="columns")})
            .drop(columns=tof_cols)
        )
    return df

def add_diff_features(df:DF) -> DF:
    df[[col + "_diff" for col in get_feature_cols(df)]] = (
        df
        .groupby("sequence_id", observed=True)
        [get_feature_cols(df)]
        .diff()
        .fillna(get_fillna_val_per_feature_col(df))
        .values
    )
    return df

def length_normed_sequence_feat_arr(sequence: DF) -> ndarray:
    features = (
        sequence
        .loc[:, meta_data["feature_cols"]]
        .values
    )
    normed_sequence_len = meta_data["pad_seq_len"]
    len_diff = abs(normed_sequence_len - len(features))
    if len(features) < normed_sequence_len:
        padded_features = np.pad(
            features,
            ((len_diff // 2 + len_diff % 2, len_diff // 2), (0, 0)),
        )
        return padded_features
    elif len(features) > normed_sequence_len:
        return features[len_diff // 2:-len_diff // 2]
    else:
        return features

def preprocess_sequence(sequence_df:pl.DataFrame) -> ndarray:
    return (
        sequence_df                     
        .to_pandas()                            # Convert to pandas dataframe.
        .pipe(imputed_features)                 # Impute missing data.
        .pipe(norm_quat_rotations)              # Norm quaternions
        .pipe(add_linear_acc_cols)              # Add gravity free acceleration.
        .pipe(add_acc_magnitude, RAW_ACCELRATION_COLS, "raw_acc_mag")
        .pipe(add_acc_magnitude, LINEAR_ACC_COLS, "linear_acc_mag")
        .pipe(add_quat_angle_mag)
        .pipe(add_angular_velocity_features)
        .pipe(rot_euler_angles)                 # Add rotation acc expressed as euler angles.
        .pipe(agg_tof_cols_per_sensor)          # Aggregate ToF columns.
        .pipe(add_diff_features)                # 
        .loc[:, sorted(meta_data["feature_cols"])]      # Retain only the usefull columns a.k.a features.
        .sub(meta_data["mean"])                 # Subtract features by their mean, std norm pt.1.
        .div(meta_data["std"])                  # Divide by Standard deviation, std norm pt.2.
        .pipe(length_normed_sequence_feat_arr)  # get feature ndarray of sequence.
        .T                                      # Transpose to swap channel and X dimensions.
    )

### Define prediction function

In [None]:
# def predict(sequence: pl.DataFrame, demographics: pl.DataFrame) -> str:
#     """
#     Kaggle evaluation API will call this for each sequence.
#     sequence: polars DataFrame for a single sequence
#     demographics: unused in this model
#     Returns: predicted gesture string
#     """
#     df_seq = sequence.to_pandas()
#     df_demo = demographics.to_pandas()

#     df_seq = df_seq.merge(
#         df_demo,
#         on='subject',
#         how='left',
#         validate='many_to_one',
#     )
#     right_handed_mask = df_seq['handedness'] == 1
#     df_seq.loc[right_handed_mask, imu_cols] = apply_symmetry(df_seq.loc[right_handed_mask, imu_cols])

#     x_tensor = preprocess_sequence(df_seq).to(device)

#     all_outputs = []
#     with torch.no_grad():
#         for model in model_ensemble:
#             outputs = model(x_tensor).softmax(dim=-1)
#             all_outputs.append(outputs)

#     avg_outputs = torch.mean(torch.stack(all_outputs), dim=0)
#     pred_idx = torch.argmax(avg_outputs, dim=1).item()

#     return str(gesture_classes[pred_idx])

def predict(sequence: pl.DataFrame, _: pl.DataFrame) -> str:
    """
    Kaggle evaluation API will call this for each sequence.
    sequence: polars DataFrame for a single sequence
    demographics: unused in this model
    Returns: predicted gesture string
    """
    # df_seq = sequence.to_pandas()
    # df_demo = demographics.to_pandas()

    # df_seq = df_seq.merge(
    #     df_demo,
    #     on='subject',
    #     how='left',
    #     validate='many_to_one',
    # )
    # right_handed_mask = df_seq['handedness'] == 1
    # df_seq.loc[right_handed_mask, imu_cols] = apply_symmetry(df_seq.loc[right_handed_mask, imu_cols])
    # x = torch.unsqueeze(Tensor(x), dim=0).to(device)

    x_tensor = (
        torch.unsqueeze(Tensor(preprocess_sequence(sequence)), dim=0)
        .float()
        .to(device)
    )
    print(x_tensor.shape)

    all_outputs = []
    with torch.no_grad():
        for model in model_ensemble[:1]: # Only take the first one bc it's the only one that takes in the correct input shape
            outputs = model(x_tensor)
            all_outputs.append(outputs)

    avg_outputs = torch.mean(torch.stack(all_outputs), dim=0)
    pred_idx = torch.argmax(avg_outputs, dim=1).item()

    return str(gesture_classes[pred_idx])

### Run inference server

In [None]:
inference_server = kaggle_evaluation.cmi_inference_server.CMIInferenceServer(predict)

if os.getenv('KAGGLE_IS_COMPETITION_RERUN'):
    inference_server.serve()
else:
    inference_server.run_local_gateway(
        data_paths=(
            join(competition_dataset_path, 'test.csv'),
            join(competition_dataset_path, 'test_demographics.csv'),
        )
    )
    inference_server = kaggle_evaluation.cmi_inference_server.CMIInferenceServer(predict)
    inference_server.run_local_gateway(
        data_paths=(
            join(competition_dataset_path, 'test.csv'),
            join(competition_dataset_path, 'test_demographics.csv'),
        )
    )

torch.Size([1, 66, 127])
res block output shape: torch.Size([1, 528, 15])
flatten mlp head input shape: torch.Size([1, 7920])
torch.Size([1, 66, 127])
res block output shape: torch.Size([1, 528, 15])
flatten mlp head input shape: torch.Size([1, 7920])
