In [24]:
import os
from typing import Literal, Optional

import torch
from torch import nn
import pandas as pd
import plotly.express as px
from pandas import DataFrame as DF
from kagglehub import competition_download

from config import COMPETITION_HANDLE, DATASET_DF_DTYPES, SAMPLING_FREQUENCY

In [25]:
class CMIdataset():
    def __init__(self, split:Literal["train", "test"], nb_sequences_to_load:Optional[int]=None):
        super().__init__()
        if not split in ["train", "test"]:
            raise ValueError(f'"split" argument should be either "train" or "test, got {split}')
        csv_file_path = competition_download(COMPETITION_HANDLE, path=f"{split}.csv")
        self.df = (
            pd.read_csv(csv_file_path, dtype=DATASET_DF_DTYPES)
            .sort_values(["sequence_id", "sequence_counter"])
        )

    def __len__(self):
        return len(self.df)

In [26]:
train_dataset = CMIdataset("train")

In [27]:
train_dataset.df.columns.to_list()

['row_id',
 'sequence_type',
 'sequence_id',
 'sequence_counter',
 'subject',
 'orientation',
 'behavior',
 'phase',
 'gesture',
 'acc_x',
 'acc_y',
 'acc_z',
 'rot_w',
 'rot_x',
 'rot_y',
 'rot_z',
 'thm_1',
 'thm_2',
 'thm_3',
 'thm_4',
 'thm_5',
 'tof_1_v0',
 'tof_1_v1',
 'tof_1_v2',
 'tof_1_v3',
 'tof_1_v4',
 'tof_1_v5',
 'tof_1_v6',
 'tof_1_v7',
 'tof_1_v8',
 'tof_1_v9',
 'tof_1_v10',
 'tof_1_v11',
 'tof_1_v12',
 'tof_1_v13',
 'tof_1_v14',
 'tof_1_v15',
 'tof_1_v16',
 'tof_1_v17',
 'tof_1_v18',
 'tof_1_v19',
 'tof_1_v20',
 'tof_1_v21',
 'tof_1_v22',
 'tof_1_v23',
 'tof_1_v24',
 'tof_1_v25',
 'tof_1_v26',
 'tof_1_v27',
 'tof_1_v28',
 'tof_1_v29',
 'tof_1_v30',
 'tof_1_v31',
 'tof_1_v32',
 'tof_1_v33',
 'tof_1_v34',
 'tof_1_v35',
 'tof_1_v36',
 'tof_1_v37',
 'tof_1_v38',
 'tof_1_v39',
 'tof_1_v40',
 'tof_1_v41',
 'tof_1_v42',
 'tof_1_v43',
 'tof_1_v44',
 'tof_1_v45',
 'tof_1_v46',
 'tof_1_v47',
 'tof_1_v48',
 'tof_1_v49',
 'tof_1_v50',
 'tof_1_v51',
 'tof_1_v52',
 'tof_1_v53',
 'tof

In [28]:
train_dataset.df["gesture"].unique().tolist()

['Cheek - pinch skin',
 'Forehead - pull hairline',
 'Write name on leg',
 'Feel around in tray and pull out an object',
 'Neck - scratch',
 'Neck - pinch skin',
 'Eyelash - pull hair',
 'Eyebrow - pull hair',
 'Forehead - scratch',
 'Above ear - pull hair',
 'Wave hello',
 'Write name in air',
 'Text on phone',
 'Pull air toward your face',
 'Pinch knee/leg skin',
 'Scratch knee/leg skin',
 'Drink from bottle/cup',
 'Glasses on/off']

In [29]:
train_dataset.df.query("sequence_id == 'SEQ_000007'")

Unnamed: 0,row_id,sequence_type,sequence_id,sequence_counter,subject,orientation,behavior,phase,gesture,acc_x,...,tof_5_v54,tof_5_v55,tof_5_v56,tof_5_v57,tof_5_v58,tof_5_v59,tof_5_v60,tof_5_v61,tof_5_v62,tof_5_v63
0,SEQ_000007_000000,Target,SEQ_000007,0,SUBJ_059520,Seated Lean Non Dom - FACE DOWN,Relaxes and moves hand to target location,Transition,Cheek - pinch skin,6.683594,...,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0
1,SEQ_000007_000001,Target,SEQ_000007,1,SUBJ_059520,Seated Lean Non Dom - FACE DOWN,Relaxes and moves hand to target location,Transition,Cheek - pinch skin,6.949219,...,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0
2,SEQ_000007_000002,Target,SEQ_000007,2,SUBJ_059520,Seated Lean Non Dom - FACE DOWN,Relaxes and moves hand to target location,Transition,Cheek - pinch skin,5.722656,...,-1.0,-1.0,112.0,119.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0
3,SEQ_000007_000003,Target,SEQ_000007,3,SUBJ_059520,Seated Lean Non Dom - FACE DOWN,Relaxes and moves hand to target location,Transition,Cheek - pinch skin,6.601562,...,-1.0,-1.0,101.0,111.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0
4,SEQ_000007_000004,Target,SEQ_000007,4,SUBJ_059520,Seated Lean Non Dom - FACE DOWN,Relaxes and moves hand to target location,Transition,Cheek - pinch skin,5.566406,...,-1.0,-1.0,101.0,109.0,125.0,-1.0,-1.0,-1.0,-1.0,-1.0
5,SEQ_000007_000005,Target,SEQ_000007,5,SUBJ_059520,Seated Lean Non Dom - FACE DOWN,Relaxes and moves hand to target location,Transition,Cheek - pinch skin,3.996094,...,-1.0,-1.0,118.0,114.0,119.0,-1.0,-1.0,-1.0,-1.0,-1.0
6,SEQ_000007_000006,Target,SEQ_000007,6,SUBJ_059520,Seated Lean Non Dom - FACE DOWN,Relaxes and moves hand to target location,Transition,Cheek - pinch skin,4.035156,...,-1.0,-1.0,104.0,118.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0
7,SEQ_000007_000007,Target,SEQ_000007,7,SUBJ_059520,Seated Lean Non Dom - FACE DOWN,Relaxes and moves hand to target location,Transition,Cheek - pinch skin,3.730469,...,-1.0,-1.0,105.0,119.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0
8,SEQ_000007_000008,Target,SEQ_000007,8,SUBJ_059520,Seated Lean Non Dom - FACE DOWN,Relaxes and moves hand to target location,Transition,Cheek - pinch skin,4.535156,...,-1.0,-1.0,103.0,122.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0
9,SEQ_000007_000009,Target,SEQ_000007,9,SUBJ_059520,Seated Lean Non Dom - FACE DOWN,Relaxes and moves hand to target location,Transition,Cheek - pinch skin,3.921875,...,-1.0,-1.0,104.0,123.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0


In [30]:
train_dataset.df["speed_x"] = train_dataset.df["acc_x"].mul(SAMPLING_FREQUENCY).cumsum()
train_dataset.df["position_x"] = train_dataset.df["speed_x"].mul(SAMPLING_FREQUENCY).cumsum()

train_dataset.df["speed_y"] = train_dataset.df["acc_y"].mul(SAMPLING_FREQUENCY).cumsum()
train_dataset.df["position_y"] = train_dataset.df["speed_y"].mul(SAMPLING_FREQUENCY).cumsum()

train_dataset.df["speed_z"] = train_dataset.df["acc_z"].mul(SAMPLING_FREQUENCY).cumsum()
train_dataset.df["position_z"] = train_dataset.df["speed_z"].mul(SAMPLING_FREQUENCY).cumsum()

px.line_3d(
    train_dataset.df.query("sequence_id == 'SEQ_000007'"),
    "position_x",
    "position_y",
    "position_z",
    color="phase",
    height=750,
)

In [None]:
def compute_axis_speed_and_position(df: DF, axis:Literal["x", "y", "z"]) -> DF:
    df[f"speed_{axis}"] = (
        df
        .eval(f"speed_diff_{axis} = acc_{axis} * {SAMPLING_FREQUENCY}")
        .groupby("sequence_id", as_index=False)
        [f"speed_diff_{axis}"]
        .cumsum()
    )
    df[f"position_{axis}"] = (
        df
        .eval(f"position_diff_{axis} = acc_{axis} * {SAMPLING_FREQUENCY}")
        .groupby("sequence_id", as_index=False)
        [f"position_diff_{axis}"]
        .cumsum()
    )

    return df

def compute_speed_and_position(df: DF) -> DF:
    return (
        df
        .pipe(compute_axis_speed_and_position, "x")
        .pipe(compute_axis_speed_and_position, "y")
        .pipe(compute_axis_speed_and_position, "z")
    )

test_df = compute_speed_and_position(train_dataset.df)

px.line_3d(
    test_df.query("sequence_id == 'SEQ_000007'"),
    "position_x",
    "position_y",
    "position_z",
    color="phase",
    height=750,
)



