# Inference notebook

## Setup

### Imports

In [1]:
import os
import json
import warnings
from os.path import join
from tqdm.notebook import tqdm
from itertools import pairwise, product

import torch
import numpy as np
import pandas as pd
import polars as pl
from numpy import ndarray
from torch import nn, Tensor
from numpy.linalg import norm
from pandas import DataFrame as DF
from scipy.spatial.transform import Rotation
# from kagglehub import competition_download, dataset_download, model_download
import kagglehub
metric_package = kagglehub.package_import('wasupandceacar/cmi-metric', bypass_confirmation=True)

import kaggle_evaluation.cmi_inference_server

### Supress performance warngings

In [2]:
warnings.filterwarnings(
    "ignore",
    message=(
        "DataFrame is highly fragmented.  This is usually the result of "
        "calling `frame.insert` many times.*"
    ),
    category=pd.errors.PerformanceWarning,
)

### Define device

In [3]:
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")

### Config

In [4]:
QUATERNION_COLS = ['rot_w', 'rot_x', 'rot_y', 'rot_z']
GRAVITY_WORLD = np.array([0, 0, 9.81], "float32")
RAW_ACCELRATION_COLS = ["acc_x", "acc_y", "acc_z"]
LINEAR_ACC_COLS = ["gravity_free_" + col for col in RAW_ACCELRATION_COLS]
COMPETITION_HANDLE = "cmi-detect-behavior-with-sensor-data"
CATEGORY_COLUMNS = [
    'row_id',
    'sequence_type',
    'sequence_id',
    'subject',
    'orientation',
    'behavior',
    'phase',
    'gesture',
]
META_DATA_COLUMNS = [
    'row_id',
    'sequence_type',
    'sequence_id',
    'sequence_counter',
    'subject',
    'orientation',
    'behavior',
    'phase',
    'gesture',
]
DATASET_DF_DTYPES = {
    "acc_x": "float32", "acc_y": "float32", "acc_z": "float32",
    "thm_1":"float32", "thm_2":"float32", "thm_3":"float32", "thm_4":"float32", "thm_5":"float32",
    "sequence_counter": "int32",
    **{col: "category" for col in CATEGORY_COLUMNS},
    **{f"tof_{i_1}_v{i_2}": "float32" for i_1, i_2 in product(range(1, 5), range(64))},
}
PREPROCESSED_DATASET_HANDLE = "mauroabidalcarrer/prepocessed-cmi-2025"
# The quantile of the sequences len used to pad/truncate during preprocessing
SEQUENCE_NORMED_LEN_QUANTILE = 0.95
# SAMPLING_FREQUENCY = 10 #Hz
N_FOLDS = 5
VALIDATION_FRACTION = 0.2
TARGET_NAMES = [
    "Above ear - pull hair",
    "Cheek - pinch skin",
    "Eyebrow - pull hair",
    "Eyelash - pull hair",
    "Feel around in tray and pull out an object",
    "Forehead - pull hairline",
    "Forehead - scratch",
    "Neck - pinch skin",
    "Neck - scratch",
    "Text on phone",
    "Wave hello",
    "Write name in air",
    "Write name on leg",
    "Drink from bottle/cup",
    "Pinch knee/leg skin",
    "Pull air toward your face",
    "Scratch knee/leg skin",
    "Glasses on/off"
]
EPSILON=1e-8
DELTA_ROTATION_ANGULAR_VELOCITY_COLS = ["angular_vel_x", "angular_vel_y", "angular_vel_z"]
DELTA_ROTATION_AXES_COLS = ["rotation_axis_x", "rotation_axis_y", "rotation_axis_z"]
EULER_ANGLES_COLS = ["euler_x", "euler_y", "euler_z"]

### Define function to get the feature columns
Feature columns change over time so it's better to have a function to get them than manually update a variable every time we add/remove features.

In [5]:
def get_feature_cols(df:DF) -> list[str]:
    return list(set(df.columns) - set(META_DATA_COLUMNS) - set(TARGET_NAMES))

### Load dataset meta data

In [6]:
meta_data_path = kagglehub.dataset_download(
    handle="mauroabidalcarrer/prepocessed-cmi-2025",
    path="preprocessed_dataset/full_dataset_meta_data.json"
)
with open(meta_data_path, "r") as fp:
    meta_data = json.load(fp)

### Define function to get the feature columns
Feature columns change over time so it's better to have a function to get them than manually update a variable every time we add/remove features.

In [7]:
def get_feature_cols(df:DF) -> list[str]:
    return list(set(df.columns) - set(META_DATA_COLUMNS) - set(TARGET_NAMES))

## Preprocessing

### Define preprocessing function

In [8]:
def get_fillna_val_per_feature_col(df:DF) -> dict:
    return {col: 1.0 if col == 'rot_w' else 0 for col in get_feature_cols(df)}

def imputed_features(df:DF) -> DF:
    # Missing ToF values are already imputed by -1 which is inconvinient since we want all missing values to be NaN.    
    # So we replace them by NaN and then perform imputing.  
    tof_vals_to_nan = {col: -1.0 for col in df.columns if col.startswith("tof")}
    # fillna_val_per_col = {col: 1.0 if col == 'rot_w' else 0 for col in df.columns}
    #print(get_fillna_val_per_feature_col(df))
    df[get_feature_cols(df)] = (
        df
        .loc[:, get_feature_cols(df)]
        # df.replace with np.nan sets dtype to floar64 so we set it back to float32
        .replace(tof_vals_to_nan, value=np.nan)
        .astype("float32")
        .groupby(df["sequence_id"], observed=True, as_index=False)
        .ffill()
        .groupby(df["sequence_id"], observed=True, as_index=False)
        .bfill()
        # In case there are only nan in the column in the sequence
        .fillna(get_fillna_val_per_feature_col(df))
    )
    return df

def norm_quat_rotations(df:DF) -> DF:
    df[QUATERNION_COLS] /= np.linalg.norm(df[QUATERNION_COLS], axis=1, keepdims=True)
    return df

def add_linear_acc_cols(df:DF) -> DF:
    # Vectorized version of https://www.kaggle.com/code/wasupandceacar/lb-0-82-5fold-single-bert-model#Dataset `remove_gravity_from_acc`
    rotations:Rotation = Rotation.from_quat(df[QUATERNION_COLS])
    gravity_sensor_frame = rotations.apply(GRAVITY_WORLD, inverse=True).astype("float32")
    df[LINEAR_ACC_COLS] = df[RAW_ACCELRATION_COLS] - gravity_sensor_frame
    return df

def add_acc_magnitude(df:DF, acc_cols:list[str], acc_mag_col_name:str) -> DF:
    return df.assign(**{acc_mag_col_name: np.linalg.norm(df.loc[:, acc_cols], axis=1)})

def add_quat_angle_mag(df:DF) -> DF:
    return df.assign(quat_rot_mag=np.arccos(df["rot_w"]) * 2)

def add_angular_velocity_features(df:DF) -> DF:
    rotations = Rotation.from_quat(df[QUATERNION_COLS])
    delta_rotations = rotations[1:] * rotations[:-1].inv()
    delta_rot_velocity = delta_rotations.as_rotvec()
    # Add extra line to avoid shape mismatch
    delta_rot_velocity = np.vstack((np.zeros((1, 3)), delta_rot_velocity))
    delta_rot_magnitude = norm(delta_rot_velocity, axis=1, keepdims=True)
    delta_rot_axes = delta_rot_velocity / (delta_rot_magnitude + EPSILON)
    df[DELTA_ROTATION_ANGULAR_VELOCITY_COLS] = delta_rot_velocity
    df[DELTA_ROTATION_AXES_COLS] = delta_rot_axes
    df["delta_rot_mag"] = delta_rot_magnitude.squeeze()

    return df

def rot_euler_angles(df:DF) -> ndarray:
    df[EULER_ANGLES_COLS] = (
        Rotation
        .from_quat(df[QUATERNION_COLS])
        .as_euler("xyz")
        .squeeze()
    )
    return df

def agg_tof_cols_per_sensor(df:DF) -> DF:
    for tof_idx in range(1, 6):
        tof_name = f"tof_{tof_idx}"
        tof_cols = [f"{tof_name}_v{v_idx}" for v_idx in range(64)]
        if any(map(lambda col: col not in df.columns, tof_cols)):
            print(f"Some (or) all ToF {tof_idx} columns are not in the df. Maybe you already ran this cell?")
            continue
        df = (
            df
            # Need to use a dict otherwise the name of the col will be "tof_preffix" instead of the value it contains
            .assign(**{tof_name:df[tof_cols].mean(axis="columns")})
            .drop(columns=tof_cols)
        )
    return df

def add_diff_features(df:DF) -> DF:
    df[[col + "_diff" for col in get_feature_cols(df)]] = (
        df
        .groupby("sequence_id", observed=True)
        [get_feature_cols(df)]
        .diff()
        .fillna(get_fillna_val_per_feature_col(df))
        .values
    )
    return df

def length_normed_sequence_feat_arr(sequence: DF) -> ndarray:
    features = (
        sequence
        .loc[:, meta_data["feature_cols"]]
        .values
    )
    normed_sequence_len = meta_data["pad_seq_len"]
    len_diff = abs(normed_sequence_len - len(features))
    if len(features) < normed_sequence_len:
        padded_features = np.pad(
            features,
            ((len_diff // 2 + len_diff % 2, len_diff // 2), (0, 0)),
        )
        return padded_features
    elif len(features) > normed_sequence_len:
        return features[len_diff // 2:-len_diff // 2]
    else:
        return features

def preprocess_sequence(sequence_df:pl.DataFrame) -> ndarray:
    return (
        sequence_df                     
        .to_pandas()                            # Convert to pandas dataframe.
        .pipe(imputed_features)                 # Impute missing data.
        .pipe(norm_quat_rotations)              # Norm quaternions
        .pipe(add_linear_acc_cols)              # Add gravity free acceleration.
        .pipe(add_acc_magnitude, RAW_ACCELRATION_COLS, "raw_acc_mag")
        .pipe(add_acc_magnitude, LINEAR_ACC_COLS, "linear_acc_mag")
        .pipe(add_quat_angle_mag)
        .pipe(add_angular_velocity_features)
        .pipe(rot_euler_angles)                 # Add rotation acc expressed as euler angles.
        .pipe(agg_tof_cols_per_sensor)          # Aggregate ToF columns.
        .pipe(add_diff_features)                # 
        .loc[:, meta_data["feature_cols"]]      # Retain only the usefull columns a.k.a features.
        .sub(meta_data["mean"])                 # Subtract features by their mean, std norm pt.1.
        .div(meta_data["std"])                  # Divide by Standard deviation, std norm pt.2.
        .pipe(length_normed_sequence_feat_arr)  # get feature ndarray of sequence.
        .T                                      # Transpose to swap channel and X dimensions.
    )

## Load model

### Define model

In [9]:
class ResidualBlock(nn.Module):
    def __init__(self, in_chns:int, out_chns:int):
        super().__init__()
        self.blocks = nn.Sequential(
            nn.Conv1d(in_chns, out_chns, kernel_size=3, padding=1),
            nn.BatchNorm1d(out_chns),
            nn.ReLU(),
            nn.Conv1d(out_chns, out_chns, kernel_size=3, padding=1),
            nn.BatchNorm1d(out_chns),
        )
        if in_chns == out_chns:
            self.skip_connection = nn.Identity() 
        else:
            # TODO: set bias to False ?
            self.skip_connection = nn.Sequential(
                nn.Conv1d(in_chns, out_chns, 1),
                nn.BatchNorm1d(out_chns)
            )

    def forward(self, x:Tensor) -> Tensor:
        activaition_maps = self.skip_connection(x) + self.blocks(x)
        return nn.functional.relu(activaition_maps)

class Resnet(nn.Module):
    def __init__(
            self,
            in_channels:int,
            depth:int,
            # n_res_block_per_depth:int,
            mlp_width:int,
            n_class:int,
        ):
        super().__init__()
        chs_per_depth = [in_channels * 2 ** i for i in range(depth)]
        blocks_chns_it = pairwise(chs_per_depth)
        self.res_blocks = [ResidualBlock(in_chns, out_chns) for in_chns, out_chns in blocks_chns_it]
        self.res_blocks = nn.ModuleList(self.res_blocks)
        self.mlp_head = nn.Sequential(
            nn.LazyLinear(mlp_width),
            nn.ReLU(),
            nn.Linear(mlp_width, n_class),
            nn.Softmax(dim=1),
        )
        
        
    def forward(self, x:Tensor) -> Tensor:
        activation_maps = x
        for res_block in self.res_blocks:
            activation_maps = nn.functional.max_pool1d(res_block(activation_maps), 2)
        out = activation_maps.view(activation_maps.shape[0], -1)
        out = self.mlp_head(out)
        return out



In [10]:
def mk_model() -> nn.Module:
    nb_in_chans = len(meta_data["feature_cols"])
    return (
        Resnet(
            in_channels=nb_in_chans,
            depth=4,
            mlp_width=256,
            n_class=18
        )
        .to(device)
    )

print("input channels:", len(meta_data["feature_cols"]))

input channels: 66


### Load model weights

In [11]:
model_state_parent_dir = kagglehub.model_download("mauroabidalcarrer/cmi-resnet/pyTorch/more_imu_features_epoch_37")
model_state_filename = os.listdir(model_state_parent_dir)[0]
model_state_path = join(model_state_parent_dir, model_state_filename)
model_weights_state_dict = torch.load(model_state_path, weights_only=True, map_location=device)["model"]
model = mk_model()
model.load_state_dict(model_weights_state_dict)

<All keys matched successfully>

In [12]:
model

Resnet(
  (res_blocks): ModuleList(
    (0): ResidualBlock(
      (blocks): Sequential(
        (0): Conv1d(66, 132, kernel_size=(3,), stride=(1,), padding=(1,))
        (1): BatchNorm1d(132, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU()
        (3): Conv1d(132, 132, kernel_size=(3,), stride=(1,), padding=(1,))
        (4): BatchNorm1d(132, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (skip_connection): Sequential(
        (0): Conv1d(66, 132, kernel_size=(1,), stride=(1,))
        (1): BatchNorm1d(132, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (1): ResidualBlock(
      (blocks): Sequential(
        (0): Conv1d(132, 264, kernel_size=(3,), stride=(1,), padding=(1,))
        (1): BatchNorm1d(264, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU()
        (3): Conv1d(264, 264, kernel_size=(3,), stride=(1,), padding=(1,))
        (4): BatchNorm1d(2

## Perform inference

In [13]:
def predict(sequence_df:pl.DataFrame, _:pl.DataFrame) -> str:
    x = preprocess_sequence(sequence_df)
    x = torch.unsqueeze(Tensor(x), dim=0).to(device)
    y_pred = (
        model(x)
        .max(dim=1)[1]
        .cpu()
        .numpy()
        .squeeze()
    )
    y_pred_str = meta_data["target_names"][y_pred]

    return y_pred_str

In [20]:
def evaluate_with_competition_metrics(predict_func:callable, train_df:DF, nb_train_seq_to_test:None) -> float:
    train_seq_grp = train_df.groupby(by="sequence_id")
    competition_metric = metric_package.Metric()
    nb_train_seq_to_test = nb_train_seq_to_test if nb_train_seq_to_test else len(train_seq_grp)
    for seq_idx, (seq_id, seq) in tqdm(enumerate(train_seq_grp), total=nb_train_seq_to_test):
        remove_non_imu_features = seq_idx % 2 == 0
        if remove_non_imu_features:
            non_imu_features = [col for col in seq.columns if col.startswith(("thm", "tof"))]
            seq.loc[:, non_imu_features] = 0
        x_seq = seq.drop(columns=["gesture"])
        x_seq = pl.DataFrame(seq)
        y_pred = predict_func(x_seq, None)
        competition_metric.add(y_pred, seq["gesture"].iloc[0])
        if seq_idx >= nb_train_seq_to_test:
            break

    return competition_metric.score()

In [21]:
def local_evaluation(server, predict_func:callable):
    competition_dataset_path = kagglehub.competition_download("cmi-detect-behavior-with-sensor-data")
    test_df = pl.read_csv(join(competition_dataset_path, "test.csv"))
    print("testing the capacity of the predict function on test sequences:")
    for seq_id, seq in test_df.group_by("sequence_id"):
        print(predict(seq, None))
    print("Evaluating the score of the TRAINING dataset:")
    train_df = pd.read_csv(join(competition_dataset_path, "train.csv"))
    train_score = evaluate_with_competition_metrics(predict_func, train_df, 2000)
    print("train score:", train_score)


In [None]:
inference_server = kaggle_evaluation.cmi_inference_server.CMIInferenceServer(predict)
if os.getenv('KAGGLE_IS_COMPETITION_RERUN'):
    inference_server.serve()
else:
    competition_dataset_path = kagglehub.competition_download("cmi-detect-behavior-with-sensor-data")
    local_evaluation(inference_server, predict)
    inference_server.run_local_gateway(
        data_paths=(
            join(competition_dataset_path, "test.csv"),
            join(competition_dataset_path, "test_demographics.csv"),
        )
    )
    inference_server = kaggle_evaluation.cmi_inference_server.CMIInferenceServer(predict)
    inference_server.run_local_gateway(
        data_paths=(
            join(competition_dataset_path, "train.csv"),
            join(competition_dataset_path, "train_demographics.csv"),
        )
    )

testing the capacity of the predict function on test sequences:
Forehead - pull hairline
Feel around in tray and pull out an object
Evaluating the score of the TRAINING dataset:


  0%|          | 0/2000 [00:00<?, ?it/s]

train score: 0.5


                This exceeds the startup time limit of 900 seconds that the gateway will enforce
                during the rerun on the hidden test set. Start the server before performing any time consuming steps.


KeyboardInterrupt: 

I0000 00:00:1753027382.537351   16309 chttp2_transport.cc:1182] ipv6:%5B::1%5D:60053: Got goaway [2] err=UNAVAILABLE:GOAWAY received; Error code: 2; Debug Text: Cancelling all calls {grpc_status:14, http2_error:2, created_time:"2025-07-20T16:03:02.53725003+00:00"}
