In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/cmi-detect-behavior-with-sensor-data/train_demographics.csv
/kaggle/input/cmi-detect-behavior-with-sensor-data/test_demographics.csv
/kaggle/input/cmi-detect-behavior-with-sensor-data/train.csv
/kaggle/input/cmi-detect-behavior-with-sensor-data/test.csv
/kaggle/input/cmi-detect-behavior-with-sensor-data/kaggle_evaluation/cmi_inference_server.py
/kaggle/input/cmi-detect-behavior-with-sensor-data/kaggle_evaluation/cmi_gateway.py
/kaggle/input/cmi-detect-behavior-with-sensor-data/kaggle_evaluation/__init__.py
/kaggle/input/cmi-detect-behavior-with-sensor-data/kaggle_evaluation/core/templates.py
/kaggle/input/cmi-detect-behavior-with-sensor-data/kaggle_evaluation/core/base_gateway.py
/kaggle/input/cmi-detect-behavior-with-sensor-data/kaggle_evaluation/core/relay.py
/kaggle/input/cmi-detect-behavior-with-sensor-data/kaggle_evaluation/core/kaggle_evaluation.proto
/kaggle/input/cmi-detect-behavior-with-sensor-data/kaggle_evaluation/core/__init__.py
/kaggle/input/cmi-detect-behav

In [2]:
import numpy as np
import pandas as pd
from xgboost import XGBClassifier
from sklearn.preprocessing import LabelEncoder

# 1. Load training data
train = pd.read_csv('/kaggle/input/cmi-detect-behavior-with-sensor-data/train.csv')
train_demo = pd.read_csv('/kaggle/input/cmi-detect-behavior-with-sensor-data/train_demographics.csv')
merged = pd.merge(train, train_demo, on='subject', how='left')

# 2. ToF Imputation
tof_cols = [col for col in merged.columns if col.startswith('tof')]
merged[tof_cols] = merged[tof_cols].replace(-1, np.nan)
medians = np.nanmedian(merged[tof_cols].values, axis=0)
merged[tof_cols] = np.where(np.isnan(merged[tof_cols]), medians, merged[tof_cols])

# 3. ToF Aggregates
def add_tof_aggregates(df, tof_cols):
    return pd.concat([
        df.reset_index(drop=True),
        pd.DataFrame({
            'tof_mean': df[tof_cols].mean(axis=1),
            'tof_std': df[tof_cols].std(axis=1),
            'tof_min': df[tof_cols].min(axis=1),
            'tof_max': df[tof_cols].max(axis=1),
            'tof_median': df[tof_cols].median(axis=1),
        }, index=df.index)
    ], axis=1)

merged = add_tof_aggregates(merged, tof_cols)

# 4. Feature setup
features = [
    'acc_x', 'acc_y', 'acc_z',
    'rot_w', 'rot_x', 'rot_y', 'rot_z',
    'thm_1', 'thm_2', 'thm_3', 'thm_4', 'thm_5',
    'adult_child', 'age', 'sex', 'handedness',
    'height_cm', 'shoulder_to_wrist_cm', 'elbow_to_wrist_cm',
    'tof_mean', 'tof_std', 'tof_min', 'tof_max', 'tof_median'
]

X = merged[features]
y = merged['gesture']

# 5. Encode gesture
gesture_le = LabelEncoder()
y_encoded = gesture_le.fit_transform(y)

# 6. Train model
model = XGBClassifier(
    eval_metric='mlogloss',
    use_label_encoder=False,
    n_jobs=-1,
    verbosity=0,
    random_state=42
)
model.fit(X, y_encoded)

# 7. Predict on test set and write submission.parquet
test = pd.read_csv('/kaggle/input/cmi-detect-behavior-with-sensor-data/test.csv')
test_demo = pd.read_csv('/kaggle/input/cmi-detect-behavior-with-sensor-data/test_demographics.csv')

results = []

for seq_id, seq_df in test.groupby("sequence_id"):
    subject_id = seq_df["subject"].iloc[0]
    demo_row = test_demo[test_demo["subject"] == subject_id].iloc[0]

    seq_df = seq_df.copy()
    for col in tof_cols:
        seq_df[col] = seq_df[col].replace(-1, np.nan)
    med_vals = pd.Series(medians, index=tof_cols)
    seq_df[tof_cols] = seq_df[tof_cols].fillna(med_vals)

    seq_df["tof_mean"] = seq_df[tof_cols].mean(axis=1)
    seq_df["tof_std"] = seq_df[tof_cols].std(axis=1)
    seq_df["tof_min"] = seq_df[tof_cols].min(axis=1)
    seq_df["tof_max"] = seq_df[tof_cols].max(axis=1)
    seq_df["tof_median"] = seq_df[tof_cols].median(axis=1)

    last_row = seq_df.iloc[-1][[
        'acc_x', 'acc_y', 'acc_z',
        'rot_w', 'rot_x', 'rot_y', 'rot_z',
        'thm_1', 'thm_2', 'thm_3', 'thm_4', 'thm_5',
        'tof_mean', 'tof_std', 'tof_min', 'tof_max', 'tof_median'
    ]]

    demo_feats = demo_row[['adult_child', 'age', 'sex', 'handedness', 'height_cm', 'shoulder_to_wrist_cm', 'elbow_to_wrist_cm']]
    full_features = pd.concat([last_row, demo_feats]).values.reshape(1, -1)

    pred_label = model.predict(full_features)[0]
    pred_gesture = gesture_le.inverse_transform([pred_label])[0]

    row_id = seq_df["row_id"].iloc[-1]
    results.append({"row_id": row_id, "gesture": pred_gesture})

submission_df = pd.DataFrame(results)

# ✅ Save to submission.parquet for Kaggle
submission_df.to_parquet("submission.parquet", index=False)


  seq_df["tof_mean"] = seq_df[tof_cols].mean(axis=1)
  seq_df["tof_std"] = seq_df[tof_cols].std(axis=1)
  seq_df["tof_min"] = seq_df[tof_cols].min(axis=1)
  seq_df["tof_max"] = seq_df[tof_cols].max(axis=1)
  seq_df["tof_median"] = seq_df[tof_cols].median(axis=1)
  seq_df["tof_mean"] = seq_df[tof_cols].mean(axis=1)
  seq_df["tof_std"] = seq_df[tof_cols].std(axis=1)
  seq_df["tof_min"] = seq_df[tof_cols].min(axis=1)
  seq_df["tof_max"] = seq_df[tof_cols].max(axis=1)
  seq_df["tof_median"] = seq_df[tof_cols].median(axis=1)
