# Introduction

This is an ensemble of hard and soft label models. The Kaggle notebook is available [here](https://www.kaggle.com/code/naresh/cmi-behavior-detection-84-submission?scriptVersionId=257282787).

# Setup Environment

## Configurations

In [1]:
import torch

from pathlib import Path
from torch import nn
from dataclasses import dataclass, field
from typing import List, Dict, Callable
from core_utilities import set_seed

@dataclass
class Config:
    seed: int = 42
    eps: float = 1e-8
    
    root: Path = Path('/kaggle/input/cmi-detect-behavior-with-sensor-data/')
    ckpt_root: Path = Path('/kaggle/input/cmi-behavior-detection/lightning_logs/version_0/checkpoints')
    data_root: Path = Path('/kaggle/input/cmi-behavior-detection-synthetic-dataset')
        
    # Data groups (train, test, ...)
    data_groups: dict[str, str] = field(default_factory=lambda: dict(
        train='train.csv',
        test='test.csv',
    ))
    
    # Demographics data
    demographics: dict[str, str] = field(default_factory=lambda: dict(
        train='train_demographics.csv',
        test='test_demographics.csv',
    ))

    # Split ratio
    split_ratio: float = 0.2

    # Torch Configuration
    device = torch.device("cuda:0" if torch.cuda.is_available() else 'cpu')
    
    # Feature Flags
    features: List[str] = field(default_factory=lambda: [
        ## Data Processing
        # 'data.linear_acc',
        # 'data.linear_acc.overwrite',
        
        ## Model
        'model.summary',

        ## Training
        # 'train',
        # 'train.metrics',
        # 'checkpoint',

        ## Competition Metrics
        # 'competition.metrics.valid',
        'competition.metrics.test',

    ])

    def data_limit(self) -> int:
        return 3 if is_interactive() else None

config = Config()
# print(f'{config=}')
set_seed(config.seed)

In [2]:
from dataclasses import dataclass, field
from typing import List

@dataclass
class DataConfig:
    slice_len: int = 64
    sequence_len: int = 103

    # Classes
    num_classes: int = 18

    column_to_str: {str, List[str]} = field(default_factory=lambda: {
        ## Metadata columns
        'metadata': ['row_id', 'sequence_id', 'sequence_counter', 'subject'],
        'metadata_d': ['subject'],

        ## Data Columns
        'acc': ['acc_x', 'acc_y', 'acc_z'],
        'rot': ['rot_w', 'rot_x', 'rot_y', 'rot_z'],
        'thm': [f'thm_{v}' for v in range(1, 6)],
        'tof': (
            [f'tof_1_v{v}' for v in range(64)]
            + [f'tof_2_v{v}' for v in range(64)]
            + [f'tof_3_v{v}' for v in range(64)]
            + [f'tof_4_v{v}' for v in range(64)]
        ),
        ## Demographics Columns
        'd': ['adult_child', 'handedness'],

        ## Label Columns
        'label': ['gesture'],

        ## Generated Columns
        'linear_acc': list(map(lambda s: f'linear_{s}', ['acc_x', 'acc_y', 'acc_z']))
    })

    def columns(self, hint=['acc', 'rot', 'tof', 'd']):
        # Convert hint to a list for processing
        if type(hint) is str: hint = [hint]

        return sum(map(lambda h: self.column_to_str[h], hint), [])

    def sequence_columns(self, generated=[]):
        return self.columns(['acc', 'rot', 'thm', 'tof', 'd'] + generated)
        
data_config = DataConfig(slice_len=64, sequence_len=64)

print(f'{data_config=}')

data_config=DataConfig(slice_len=64, sequence_len=64, num_classes=18, column_to_str={'metadata': ['row_id', 'sequence_id', 'sequence_counter', 'subject'], 'metadata_d': ['subject'], 'acc': ['acc_x', 'acc_y', 'acc_z'], 'rot': ['rot_w', 'rot_x', 'rot_y', 'rot_z'], 'thm': ['thm_1', 'thm_2', 'thm_3', 'thm_4', 'thm_5'], 'tof': ['tof_1_v0', 'tof_1_v1', 'tof_1_v2', 'tof_1_v3', 'tof_1_v4', 'tof_1_v5', 'tof_1_v6', 'tof_1_v7', 'tof_1_v8', 'tof_1_v9', 'tof_1_v10', 'tof_1_v11', 'tof_1_v12', 'tof_1_v13', 'tof_1_v14', 'tof_1_v15', 'tof_1_v16', 'tof_1_v17', 'tof_1_v18', 'tof_1_v19', 'tof_1_v20', 'tof_1_v21', 'tof_1_v22', 'tof_1_v23', 'tof_1_v24', 'tof_1_v25', 'tof_1_v26', 'tof_1_v27', 'tof_1_v28', 'tof_1_v29', 'tof_1_v30', 'tof_1_v31', 'tof_1_v32', 'tof_1_v33', 'tof_1_v34', 'tof_1_v35', 'tof_1_v36', 'tof_1_v37', 'tof_1_v38', 'tof_1_v39', 'tof_1_v40', 'tof_1_v41', 'tof_1_v42', 'tof_1_v43', 'tof_1_v44', 'tof_1_v45', 'tof_1_v46', 'tof_1_v47', 'tof_1_v48', 'tof_1_v49', 'tof_1_v50', 'tof_1_v51', 'tof_1_v5

# Model

In [3]:
from pathlib import Path
from dataclasses import dataclass, field
from collections.abc import Callable

@dataclass
class ModelConfig:
    model: str = 'BDLstmModel'

    # Required configuration
    input_size: int = 64
    out_channels: int = 18

    # Input split configuration
    in_splits: list[int] = field(default_factory=lambda: [3, 4, 5, 256, 2])
    in_encoders: list[str] = field(default_factory=lambda: ['acc', 'rot', 'thm', 'tof', 'dem'])

    # Input slice configuration
    slice_encoders: dict[str, tuple[int]] = field(default_factory=lambda: {
        # 'acc_rot': [slice(7)],
    })

    # Optional configuration
    rnn_channels: int = 64
    squeeze_channels: int = 128
    
    # Optional layers
    with_noise: bool = False
    
    def input_shape(self) -> tuple[int, int]:
        return (self.input_size, self.in_channels)

    @property
    def in_channels(self) -> int:
        return sum(self.in_splits)

model_config = ModelConfig(
    input_size=data_config.sequence_len,
    rnn_channels=data_config.sequence_len,
    squeeze_channels=32,
)
print(f'{model_config=}')
print(f'{model_config.input_shape()=}')

model_config=ModelConfig(model='BDLstmModel', input_size=64, out_channels=18, in_splits=[3, 4, 5, 256, 2], in_encoders=['acc', 'rot', 'thm', 'tof', 'dem'], slice_encoders={}, rnn_channels=64, squeeze_channels=32, with_noise=False)
model_config.input_shape()=(64, 270)


In [4]:
import torch

import torch.nn.functional as F
import torch.nn as nn

from torchvision.transforms.v2 import GaussianNoise
from collections import OrderedDict, defaultdict
from torchinfo import summary
from torch_layers import BDConvBlock, BDLinearBlock, ChannelSeBlock, ConvNormActivation, ConvBlock1d
from torch_layers import Lambda

class BDLstmModel(nn.Module):
    name = 'lstm-model'
        
    def __init__(
        self,
        in_channels,
        out_channels,

        in_splits,
        in_encoders,
        slice_encoders,
        
        conv_configs=[
            # (256, 3, .1), (512, 5, .1),
            # (768, 3, .1),
            
            (512, 3, .1), (768, 5, .1),
            (1024, 3, .1), #(1536, 3, .4),
            #(2048, 3, .5),
        ],
        linear_configs=[
            # (2048, 2048, .5), (2048, 1024, .4),
            #(1024, 512, .3),
            # (512, 128, .3),
            (512, 128, .3),
        ],
        squeeze_channels=128,
        rnn_channels=128,
        with_noise=False,
    ):
        super().__init__()

        # Input args
        self.in_channels = in_channels
        self.out_channels = out_channels

        self.in_splits = in_splits
        self.in_encoders = in_encoders
        self.slice_encoders = slice_encoders
        
        self.conv_configs = conv_configs
        self.linear_configs = linear_configs.copy()
        self.squeeze_channels = squeeze_channels
        self.with_noise = with_noise

        # Derived args
        conv_out_channels = conv_configs[-1][0] # Output channels of the last conv block
        encoders_out_channels = conv_out_channels*3 + 128*3
        rnn_args = (encoders_out_channels, rnn_channels)
        rnn_kwargs = dict(bidirectional=True, batch_first=True)

        # Adjusted args
        _, last_out_channels, last_dropout = linear_configs[-1]
        self.linear_configs[-1] = (rnn_channels*6, last_out_channels, last_dropout)

        # Additional args
        conv_configs_64_128 = [
            (64, 3, .2),
            (128, 3, .2),
        ]
        conv_configs_128_256 = [
            (128, 3, .2),
            (256, 3, .2),
        ]
        conv_configs_256_512 = [
            (256, 3, .2),
            (512, 3, .2),
        ]
        conv_configs_64_128_256 = [
            (64, 3, .2),
            (128, 3, .2),
            (256, 3, .2),
        ]
        conv_configs_64_128_256_512 = [
            (64, 3, .2),
            (128, 3, .2),
            (256, 3, .2),
            (512, 3, .2),
        ]
        reflex_fn = lambda x: x

        # Layers
        self.encoders = nn.ModuleDict(OrderedDict(filter(
            lambda item: item[1] is not None,
            [
                (
                    'acc',
                    self.make_bdconv_module(3, conv_configs, padding='same')
                ),
                (
                    'rot',
                    self.make_bdconv_module(4, conv_configs, padding='same')
                    # self.make_small_conv_module(4, conv_configs_128_256, padding='same', bias=False)
                ),
                (
                    'thm',
                    self.make_small_conv_module(5, conv_configs_64_128, padding='same', bias=False)
                ),
                (
                    'tof',
                    self.make_small_conv_module(256, conv_configs_64_128, padding='same', bias=False)
                    # self.make_small_conv_module(256, conv_configs_256_512, padding='same', bias=False)
                ),
                (
                    'dem',
                    self.make_small_conv_module(2, conv_configs_64_128, padding='same', bias=False)
                ),
            ]
        )))
        self.conv_m = self.make_bdconv_module(in_channels, conv_configs, padding='same')

        # Noise branch
        if with_noise: noise_m = nn.Sequential(
            GaussianNoise(),
            nn.Linear(self.encoders_out_channels, 16),
            nn.ReLU(inplace=True),
        )

        # All branches
        self.branches = nn.ModuleList(filter(
            lambda fn: fn is not None,
            [
                nn.LSTM(*rnn_args, **rnn_kwargs),
                nn.GRU(*rnn_args, **rnn_kwargs),
                nn.RNN(*rnn_args, nonlinearity='relu', **rnn_kwargs),
                noise_m if self.with_noise else None,
            ]
        ))
        self.branch_fns = list(filter(
            lambda fn: fn is not None,
            [
                ([], []),
                # ([], []),
                ([torch.fliplr], [torch.fliplr]),
                ([], []),
                ([], []) if self.with_noise else None,
            ]
            
        ))

        self.dropout_m = nn.Dropout(.2)
        
        self.linear_backbone = nn.Sequential(
            OrderedDict(
                map(
                    self.make_linear_block,
                    enumerate(self.linear_configs)
                ),
            )
        )

        self.linear = nn.Linear(self.linear_configs[-1][1], self.out_channels)

    def apply_one_branch(self, branch_m, x, branch_fns=([], [])):
        pre_fns, post_fns = branch_fns
        # print(f'{type(branch_m)} {type(prep_fns)} {x.shape=}')

        # Apply branch input prep functions
        x = self.apply_fns(x, pre_fns)
        
        # Apply RNN branch. RNN branches require channel-last input
        # print(f'{x.shape=} {branch_m=}')
        x, _ = branch_m(x)

        # Apply branch post-process functions
        x = self.apply_fns(x, post_fns)

        # print(f'{x.shape=}')

        return x

    def apply_fns(self, x, fns):
        for fn in fns: x = fn(x)

        return x
        
    def apply_branches(self, x):
        # print(f'apply_branches({x.shape=})')
        
        # Apply branches
        xs = list(map(
            lambda args: self.apply_one_branch(*args),
            zip(self.branches, [x]*len(self.branches), self.branch_fns)
        ))

        return xs

    def apply_encoders(self, x):
        # 1. Split inputs
        x_splits = x.split(self.in_splits, dim=1)
        # print(f'{len(x_splits)}')

        # 2. Group splits by encoder
        x_grouped = defaultdict(list)
        for x_split, e_name in zip(x_splits, self.in_encoders):
            # print(f'{e_name} {x_split.shape}')
            x_grouped[e_name].append(x_split)
        
        # 3. Merge groups
        xs = list(map(
            lambda e_name: torch.concat(x_grouped[e_name], dim=1) if len(x_grouped[e_name]) > 1 else x_grouped[e_name][0],
            self.in_encoders,
        ))

        # for x in xs:
        #     print(f'{x.shape=}')

        # 4. Apply encoders
        xs = list(map(
            lambda item: self.encoders[item[0]](item[1]), 
            zip(x_grouped.keys(), xs),
        ))

        return torch.concat(xs, dim=1)

    def apply_slice_encoders(self, x):
        # 1. Get slices
        x_slices = []
        for slices in self.slice_encoders.values():
            x_slice = torch.concat(
                list(map(lambda s: x[:, s, :], slices)),
                dim=1
            )
            x_slices.append(x_slice)
            # print(f'{x_slice.shape=}')
            
        # print(f'{len(x_slices)}')

        # 4. Apply encoders
        xs = list(map(
            lambda item: self.encoders[item[0]](item[1]), 
            zip(self.slice_encoders.keys(), x_slices),
        ))

        return torch.concat(xs, dim=1)

    def forward(self, x):
        # Save input
        inp = x
        
        # -> Apply encoders
        x_enc = self.apply_encoders(x)
        # print(f'{x.shape=} <- apply_encoders()')

        # -> Apply slice_encoders
        if self.slice_encoders:
            x_slice_enc = self.apply_slice_encoders(x)
            # print(f'{x_slice_enc.shape=} <- apply_slice_encoders()')

            ## -> Merge encoders
            x = torch.concat([x_enc, x_slice_enc], dim=1)
        else:
            x = x_enc

        x = torch.concat([x, self.conv_m(inp)], dim=1)
        
        # -> Apply branches
        xs = self.apply_branches(x.permute(0, 2, 1))
        
        # -> Join the branches on the channel dimension and apply attention
        x = torch.concat(xs, dim=2)

        # -> Apply Dropout
        # print(f'{x.shape=}')
        x = self.dropout_m(x)
        
        # -> Join the branches on the channel dimension and apply attention
        x = F.scaled_dot_product_attention(x, x, x)
        
        # -> Apply linear backbone and pick the last element for classification
        x = self.linear_backbone(x)
        x = self.linear(x[..., -1, :])

        return x

    def make_small_conv_module(self, in_channels, conv_configs, **kwargs):
        def conv_fn(args):
            # Extract parameters
            conv_idx, (conv_out, kernel, dropout) = args
            conv_in = in_channels if conv_idx == 0 else conv_configs[conv_idx - 1][0]

            # Create layers
            layers = [
                ConvBlock1d(conv_in, conv_out, kernel, **kwargs)
            ]
            
            if dropout is not None: layers.append(nn.Dropout(dropout))
            
            return layers
            
        return nn.Sequential(
            *sum(
                map(
                    conv_fn,
                    enumerate(conv_configs)
                ),
                []
            )
        )

    def make_bdconv_module(self, in_channels, conv_configs, **kwargs):
        def bdconv_fn(args):
            # Extract parameters
            conv_idx, config = args
            conv_in = in_channels if conv_idx == 0 else conv_configs[conv_idx - 1][0]

            return BDConvBlock(
                f'bdconv_{conv_idx}', conv_in, *config,
                squeeze_channels=self.squeeze_channels, **kwargs,
            )
            
        return nn.Sequential(*map(bdconv_fn, enumerate(conv_configs)))

    def make_linear_block(self, args):
        idx, config = args
        block_name = f'block_{idx}'
        block = BDLinearBlock(block_name, *config, bias=False)

        return block_name, block
        
model = BDLstmModel(
    in_channels=model_config.in_channels,
    out_channels=model_config.out_channels,
    
    in_splits=model_config.in_splits,
    in_encoders=model_config.in_encoders,
    slice_encoders=model_config.slice_encoders,
    
    squeeze_channels=model_config.squeeze_channels,
    rnn_channels=model_config.rnn_channels,
    
    with_noise=model_config.with_noise,
).to(config.device)

if 'model.summary' in config.features:
    print(
        summary(
            model=model, 
            input_size=(16, model_config.in_channels, model_config.input_size),
            col_names=["input_size", "output_size", "num_params", "trainable"],
            col_width=20,
            row_settings=["var_names"],
        )
    )

Layer (type (var_name))                                      Input Shape          Output Shape         Param #              Trainable
BDLstmModel (BDLstmModel)                                    [16, 270, 64]        [16, 18]             --                   True
├─ModuleDict (encoders)                                      --                   --                   --                   True
│    └─Sequential (acc)                                      [16, 3, 64]          [16, 1024, 64]       --                   True
│    │    └─BDConvBlock (0)                                  [16, 3, 64]          [16, 512, 64]        38,912               True
│    │    └─BDConvBlock (1)                                  [16, 512, 64]        [16, 768, 64]        2,017,536            True
│    │    └─BDConvBlock (2)                                  [16, 768, 64]        [16, 1024, 64]       2,427,904            True
│    └─Sequential (rot)                                      [16, 4, 64]          [16, 1024,

In [5]:
import torch

import torch.nn.functional as F
import torch.nn as nn

from torchvision.transforms.v2 import GaussianNoise
from collections import OrderedDict, defaultdict
from torchinfo import summary
from torch_layers import BDConvBlock, BDLinearBlock, ChannelSeBlock, ConvNormActivation, ConvBlock1d
from torch_layers import Lambda

class HardLabelModel(nn.Module):
    name = 'lstm-model'
        
    def __init__(
        self,
        in_channels,
        out_channels,

        in_splits,
        in_encoders,
        slice_encoders,
        
        conv_configs=[
            # (256, 3, .1), (512, 5, .1),
            # (768, 3, .1),
            
            (512, 3, .1), (768, 5, .1),
            (1024, 3, .1), #(1536, 3, .4),
            #(2048, 3, .5),
        ],
        linear_configs=[
            # (2048, 2048, .5), (2048, 1024, .4),
            #(1024, 512, .3),
            # (512, 128, .3),
            (512, 128, .3),
        ],
        squeeze_channels=128,
        rnn_channels=128,
        with_noise=False,
    ):
        super().__init__()

        # Input args
        self.in_channels = in_channels
        self.out_channels = out_channels

        self.in_splits = in_splits
        self.in_encoders = in_encoders
        self.slice_encoders = slice_encoders
        
        self.conv_configs = conv_configs
        self.linear_configs = linear_configs.copy()
        self.squeeze_channels = squeeze_channels
        self.with_noise = with_noise

        # Derived args
        self.conv_out_channels = conv_configs[-1][0] # Output channels of the last conv block
        self.encoders_out_channels = self.conv_out_channels*2 + 128*3

        # Adjusted args
        _, last_out_channels, last_dropout = linear_configs[-1]
        self.linear_configs[-1] = (rnn_channels*4, last_out_channels, last_dropout)

        # Additional args
        conv_configs_64_128 = [
            (64, 3, .2),
            (128, 3, .2),
        ]
        conv_configs_128_256 = [
            (128, 3, .2),
            (256, 3, .2),
        ]
        conv_configs_256_512 = [
            (256, 3, .2),
            (512, 3, .2),
        ]
        conv_configs_64_128_256 = [
            (64, 3, .2),
            (128, 3, .2),
            (256, 3, .2),
        ]
        conv_configs_64_128_256_512 = [
            (64, 3, .2),
            (128, 3, .2),
            (256, 3, .2),
            (512, 3, .2),
        ]

        # Layers
        self.encoders = nn.ModuleDict(OrderedDict(filter(
            lambda item: item[1] is not None,
            [
                (
                    'acc',
                    self.make_bdconv_module(3, conv_configs, padding='same')
                ),
                (
                    'rot',
                    self.make_bdconv_module(4, conv_configs, padding='same')
                    # self.make_small_conv_module(4, conv_configs_128_256, padding='same', bias=False)
                ),
                (
                    'thm',
                    self.make_small_conv_module(5, conv_configs_64_128, padding='same', bias=False)
                ),
                (
                    'tof',
                    self.make_small_conv_module(256, conv_configs_64_128, padding='same', bias=False)
                    # self.make_small_conv_module(256, conv_configs_256_512, padding='same', bias=False)
                ),
                (
                    'dem',
                    self.make_small_conv_module(2, conv_configs_64_128, padding='same', bias=False)
                ),
                (
                    'linear_acc',
                    self.make_small_conv_module(3, conv_configs_64_128, padding='same', bias=False) if 'linear_acc' in in_encoders else None
                ),
                (
                    'acc_rot',
                    self.make_bdconv_module(
                        7, conv_configs,
                        padding='same',
                        # 7, conv_configs_64_128_256,
                        # padding='same',
                    ) if 'acc_rot' in slice_encoders else None
                ),
            ]
        )))
        

        self.lstm_m = nn.LSTM(
            self.encoders_out_channels,
            rnn_channels,
            bidirectional=True,
            batch_first=True,
        )
        self.gru_m = nn.GRU(
            self.encoders_out_channels,
            rnn_channels,
            bidirectional=True,
            batch_first=True,
        )
        if with_noise: self.noise_m = nn.Sequential(
            GaussianNoise(),
            nn.Linear(self.encoders_out_channels, 16),
            nn.ReLU(inplace=True),
        )

        self.dropout_m = nn.Dropout(.4)
        
        self.linear_backbone = nn.Sequential(
            OrderedDict(
                map(
                    self.make_linear_block,
                    enumerate(self.linear_configs)
                ),
            )
        )

        self.linear = nn.Linear(self.linear_configs[-1][1], self.out_channels)

    def apply_one_branch(self, branch_m, x, prep_fns=[]):
        # print(f'{type(branch_m)} {type(prep_fns)} {x.shape=}')

        # Apply branch input prep functions
        x = self.apply_prep_fns(x, prep_fns)
        
        # Apply RNN branch. RNN branches require channel-last input
        # print(f'{x.shape=} {branch_m=}')
        x, _ = branch_m(x)

        # print(f'{x.shape=}')

        return x

    def apply_prep_fns(self, x, fns):
        for fn in fns: x = fn(x)

        return x
        
    def apply_branches(self, x):
        # RNN branches
        branches = list(filter(
            lambda fn: fn is not None,
            [
                self.lstm_m,
                self.gru_m,
                self.noise_m if self.with_noise else None,
            ]
        ))
        
        # Apply branches
        xs = list(map(
            lambda args: self.apply_one_branch(*args),
            zip(branches, [x]*len(branches))
        ))

        return xs

    def apply_encoders(self, x):
        # 1. Split inputs
        x_splits = x.split(self.in_splits, dim=1)
        # print(f'{len(x_splits)}')

        # 2. Group splits by encoder
        x_grouped = defaultdict(list)
        for x_split, e_name in zip(x_splits, self.in_encoders):
            # print(f'{e_name} {x_split.shape}')
            x_grouped[e_name].append(x_split)
        
        # 3. Merge groups
        xs = list(map(
            lambda e_name: torch.concat(x_grouped[e_name], dim=1) if len(x_grouped[e_name]) > 1 else x_grouped[e_name][0],
            self.in_encoders,
        ))

        # for x in xs:
        #     print(f'{x.shape=}')

        # 4. Apply encoders
        xs = list(map(
            lambda item: self.encoders[item[0]](item[1]), 
            zip(x_grouped.keys(), xs),
        ))

        return torch.concat(xs, dim=1)

    def apply_slice_encoders(self, x):
        # 1. Get slices
        x_slices = []
        for slices in self.slice_encoders.values():
            x_slice = torch.concat(
                list(map(lambda s: x[:, s, :], slices)),
                dim=1
            )
            x_slices.append(x_slice)
            # print(f'{x_slice.shape=}')
            
        # print(f'{len(x_slices)}')

        # 4. Apply encoders
        xs = list(map(
            lambda item: self.encoders[item[0]](item[1]), 
            zip(self.slice_encoders.keys(), x_slices),
        ))

        return torch.concat(xs, dim=1)

    def forward(self, x):
        # -> Apply encoders
        x_enc = self.apply_encoders(x)
        # print(f'{x.shape=} <- apply_encoders()')

        # -> Apply slice_encoders
        if self.slice_encoders:
            x_slice_enc = self.apply_slice_encoders(x)
            # print(f'{x_slice_enc.shape=} <- apply_slice_encoders()')

            ## -> Merge encoders
            x = torch.concat([x_enc, x_slice_enc], dim=1)
        else:
            x = x_enc
        
        # -> Apply branches
        xs = self.apply_branches(x.permute(0, 2, 1))
        
        # -> Join the branches on the channel dimension and apply attention
        x = torch.concat(xs, dim=2)

        # -> Apply Dropout
        # print(f'{x.shape=}')
        x = self.dropout_m(x)
        
        # -> Join the branches on the channel dimension and apply attention
        x = F.scaled_dot_product_attention(x, x, x)
        
        # -> Apply linear backbone and pick the last element for classification
        x = self.linear_backbone(x)
        x = self.linear(x[..., -1, :])

        return x

    def make_small_conv_module(self, in_channels, conv_configs, **kwargs):
        def conv_fn(args):
            # Extract parameters
            conv_idx, (conv_out, kernel, dropout) = args
            conv_in = in_channels if conv_idx == 0 else conv_configs[conv_idx - 1][0]

            # Create layers
            layers = [
                ConvBlock1d(conv_in, conv_out, kernel, **kwargs)
            ]
            
            if dropout is not None: layers.append(nn.Dropout(dropout))
            
            return layers
            
        return nn.Sequential(
            *sum(
                map(
                    conv_fn,
                    enumerate(conv_configs)
                ),
                []
            )
        )

    def make_bdconv_module(self, in_channels, conv_configs, **kwargs):
        def bdconv_fn(args):
            # Extract parameters
            conv_idx, config = args
            conv_in = in_channels if conv_idx == 0 else conv_configs[conv_idx - 1][0]

            return BDConvBlock(
                f'bdconv_{conv_idx}', conv_in, *config,
                squeeze_channels=self.squeeze_channels, **kwargs,
            )
            
        return nn.Sequential(*map(bdconv_fn, enumerate(conv_configs)))

    def make_linear_block(self, args):
        idx, config = args
        block_name = f'block_{idx}'
        block = BDLinearBlock(block_name, *config, bias=False)

        return block_name, block
        
hard_label_model = HardLabelModel(
    in_channels=model_config.in_channels,
    out_channels=model_config.out_channels,
    
    in_splits=model_config.in_splits,
    in_encoders=model_config.in_encoders,
    slice_encoders={},
    
    squeeze_channels=model_config.squeeze_channels,
    rnn_channels=model_config.rnn_channels,
    
    with_noise=model_config.with_noise,
)

if 'model.summary' in config.features:
    print(
        summary(
            model=hard_label_model, 
            input_size=(16, model_config.in_channels, model_config.input_size),
            col_names=["input_size", "output_size", "num_params", "trainable"],
            col_width=20,
            row_settings=["var_names"],
        )
    )

Layer (type (var_name))                                      Input Shape          Output Shape         Param #              Trainable
HardLabelModel (HardLabelModel)                              [16, 270, 64]        [16, 18]             --                   True
├─ModuleDict (encoders)                                      --                   --                   --                   True
│    └─Sequential (acc)                                      [16, 3, 64]          [16, 1024, 64]       --                   True
│    │    └─BDConvBlock (0)                                  [16, 3, 64]          [16, 512, 64]        38,912               True
│    │    └─BDConvBlock (1)                                  [16, 512, 64]        [16, 768, 64]        2,017,536            True
│    │    └─BDConvBlock (2)                                  [16, 768, 64]        [16, 1024, 64]       2,427,904            True
│    └─Sequential (rot)                                      [16, 4, 64]          [16, 1024,

# Training

## Configuration

In [6]:
from dataclasses import dataclass, field
from typing import List, Dict, Callable, Union
from pathlib import Path

@dataclass
class OptimizerConfig:
    name: str = 'adam'
    lr: float = 7E-4
    
    early_stopping: int = 15

    # Scheduler
    patience: int = 3
    lr_decay: float = 0.65
    min_lr: float = 3E-5
    
    momentum: float = 0.9

    # Regularization
    weight_decay: float = 3E-3

@dataclass
class TrainConfig:
    name: str = 'train'
    num_classes: int = 18
        
    batch_size: int = 256
    epochs: int = 150
        
    optimizer: OptimizerConfig = field(default_factory=OptimizerConfig)
    early_stop: bool = False

    device: str = 'cpu'
    dataloader_workers: int = 3

    checkpoint: Path = None
    load_best_ckpt: bool = False

    # Collater
    items_to_mix: int = 2
    mixup_prob: float = 0.4

train_config = TrainConfig(
    name='train',
    num_classes=model_config.out_channels,
    device=config.device,
    checkpoint=Path('weights.pt')
)
valid_config = TrainConfig(
    name='valid',
    num_classes=model_config.out_channels,
    device=config.device,
)

print(f'{train_config=}')
print(f'{valid_config=}')

train_config=TrainConfig(name='train', num_classes=18, batch_size=256, epochs=150, optimizer=OptimizerConfig(name='adam', lr=0.0007, early_stopping=15, patience=3, lr_decay=0.65, min_lr=3e-05, momentum=0.9, weight_decay=0.003), early_stop=False, device=device(type='cpu'), dataloader_workers=3, checkpoint=PosixPath('weights.pt'), load_best_ckpt=False, items_to_mix=2, mixup_prob=0.4)
valid_config=TrainConfig(name='valid', num_classes=18, batch_size=256, epochs=150, optimizer=OptimizerConfig(name='adam', lr=0.0007, early_stopping=15, patience=3, lr_decay=0.65, min_lr=3e-05, momentum=0.9, weight_decay=0.003), early_stop=False, device=device(type='cpu'), dataloader_workers=3, checkpoint=None, load_best_ckpt=False, items_to_mix=2, mixup_prob=0.4)


In [7]:
import torch

if 'checkpoint' in config.features:
    ckpt_path = Path('/kaggle/input/cmi-behavior-detection-lstm/weights.pt')
    print(f'Load:: {ckpt_path=}')
    model.load_state_dict(
        torch.load(
            ckpt_path,
            weights_only=True,
            map_location=torch.device(config.device)
        )
    )

## Evaluation

# Competition Metrics

* We use: https://www.kaggle.com/code/richolson/cmi-2025-metric-copy-for-import

## Test

In [8]:
import warnings
import numpy as np
import pandas as pd

from sklearn.preprocessing import StandardScaler
from scipy.signal import savgol_filter

def get_sequence_prep_fn(config, data_config, sm_window=None):
    def fn(sequence_data):
        ## Placeholder for data
        # data = sequence_data[sequence_data.columns]
        data = sequence_data[data_config.sequence_columns()].copy()

        ## Fix tof data
        if data_config.columns('tof'):
            ### Get tof_columns values
            tof_values = data[data_config.columns('tof')].values

            ### Replace -1 with NaN
            tof_values[tof_values == -1.] = np.nan

            ### Compute mean of tof_columns while skipping NaNs
            with warnings.catch_warnings():
                warnings.simplefilter("ignore", category=RuntimeWarning)
                tof_mean = np.nan_to_num(np.nanmean(tof_values, axis=0))

            ### Replace NaN with means
            tof_values = np.where(np.isnan(tof_values), tof_mean, tof_values)

            ### Update the DataFrame
            data[data_config.columns('tof')] = tof_values

        ### Fill NaN with the mean value
        means = data.mean().fillna(0)
        data = data.ffill().bfill().fillna(means.to_dict())
        
        ## Data scaling
        data = StandardScaler().fit_transform(data)

        ## Data smoothening
        if sm_window:
            data = savgol_filter(data, sm_window, 3, axis=0)
        
        return data

    return fn

In [9]:
import torch

def load_models(model_weights, device):
    def load_fn(args):
        name, (m, p) = args
        
        print(f'Loading [({name})]:: {p=}')
        m.load_state_dict(
            torch.load(
                p,
                weights_only=True,
                map_location=torch.device(device)
            )
        )

        return m

    models = list(map(load_fn, model_weights.items()))
    return models

In [10]:
import os
import torch
import kaggle_evaluation.cmi_inference_server

import polars as pl
import pandas as pd

from transforms import FixLength, Transpose, ToType, ToTensor, Resize, Clip
from torch_evaluation import get_predict_fn, get_multi_predict_fn, get_slices_fn, get_slices_predict_fn
from core_utilities import make_header, load_pkl
from pathlib import Path

# Sequence Builder
test_sequence_prep_fn = get_sequence_prep_fn(config, data_config)

# Class names
class_names = load_pkl(config.data_root / 'class_names.pkl')

# Model Weights
model_weights = {
    'mixup': (
        model,
        Path('/kaggle/input/cmi-behavior-detection-84/weights.pt')
    ),
    'hard-labels': (
        hard_label_model,
        Path('/kaggle/input/cmi-behavior-detection-lstm/weights.pt')
    ),
}

# Load all models
models = load_models(model_weights, config.device)

# Multi-slice test transforms
# base_predict_fn = get_predict_fn(model, config.device)
base_predict_fn = get_multi_predict_fn(models, config.device)
slices_fn = get_slices_fn(
    slice_dim=0,
    slice_size=data_config.slice_len,
    stride=16,
    transforms=[Clip(limits=(-3.5, 2.5)), FixLength(data_config.sequence_len)]
)

predict_fn = get_slices_predict_fn(
    base_predict_fn,
    slices_fn,
    transforms=[
        Transpose(dims=(0, 2, 1)),
        ToType(),
        ToTensor(),
    ])

@torch.inference_mode()
def predict(sequence: pl.DataFrame, demographics: pl.DataFrame) -> str:
    ## Convert daa
    data = sequence.to_pandas()[data_config.columns(['metadata', 'acc', 'rot', 'thm', 'tof'])]
    demographics_data = demographics.to_pandas()[data_config.columns(['metadata_d', 'd'])]
    
    selection = data_config.sequence_columns()
    data = pd.merge(data, demographics_data, on='subject')[selection]

    ## Normalize
    X = test_sequence_prep_fn(data)

    ## Compute probabilities
    probs = predict_fn(X)

    ## Compute predicted class
    y_label = np.squeeze(torch.argmax(probs, dim=1).cpu().numpy())

    ## Compute gesture
    gesture = class_names[y_label]

    return gesture

if 'competition.metrics.test' in config.features:
    inference_server = kaggle_evaluation.cmi_inference_server.CMIInferenceServer(predict)

    if os.getenv('KAGGLE_IS_COMPETITION_RERUN'):
        inference_server.serve()
    else:
        inference_server.run_local_gateway(
            data_paths=(
                '/kaggle/input/cmi-detect-behavior-with-sensor-data/test.csv',
                '/kaggle/input/cmi-detect-behavior-with-sensor-data/test_demographics.csv',
            )
        )
        display(pd.read_parquet('submission.parquet'))

# if 'competition.metrics.test' in config.features:
#     print(make_header('Valid Gestures'))
#     evaluate_score(X_test, None, label_encoders[0].classes_, model, valid_config)

Loading [(mixup)]:: p=PosixPath('/kaggle/input/cmi-behavior-detection-84/weights.pt')
Loading [(hard-labels)]:: p=PosixPath('/kaggle/input/cmi-behavior-detection-lstm/weights.pt')


Unnamed: 0,sequence_id,gesture
0,SEQ_000001,Neck - scratch
1,SEQ_000011,Eyelash - pull hair
