In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go

from mpl_toolkits.mplot3d import Axes3D
from sklearn.decomposition import PCA
import warnings
warnings.filterwarnings('ignore')

In [None]:
train = pd.read_csv('/kaggle/input/cmi-detect-behavior-with-sensor-data/train.csv')
demographics = pd.read_csv('/kaggle/input/cmi-detect-behavior-with-sensor-data/train_demographics.csv')

In [None]:
train.head()

In [None]:
demogrpahics.head()

In [None]:
# Basic Info
print("Train shape:", train_df.shape)
print("\nTrain columns:", len(train_df.columns))
print("Missing values in train:", train_df.isnull().sum().sum())

In [None]:
# Gesture Distribution
plt.figure(figsize=(12, 6))
sns.countplot(data=train.drop_duplicates("sequence_id"), x='gesture', order=train['gesture'].dropna().unique())
plt.title("Gesture Distribution (Train)")
plt.xticks(rotation=90)
plt.tight_layout()
plt.show()

In [None]:
# Demographic Analysis
# Merge for demographic EDA
train_demo = train.merge(demographics, on='subject', how='left')

# Age Distribution
sns.histplot(train_demo['age'], kde=True)
plt.title("Age Distribution")
plt.show()

In [None]:
# Height and Arm Length Boxplots by Gesture
plt.figure(figsize=(12, 6))
sns.boxplot(data=train_demo.drop_duplicates('sequence_id'), x='gesture', y='height_cm')
plt.title("Participant Height by Gesture")
plt.xticks(rotation=90)
plt.tight_layout()
plt.show()

In [None]:
# Pick a single gesture sequence
sample_seq = train[train['sequence_id'] == train['sequence_id'].unique()[0]]

plt.figure(figsize=(15, 5))
for axis in ['acc_x', 'acc_y', 'acc_z']:
    plt.plot(sample_seq['sequence_counter'], sample_seq[axis], label=axis)

plt.title(f"Accelerometer Time Series - Sequence ID {sample_seq['sequence_id'].iloc[0]}")
plt.xlabel("Sequence Counter")
plt.ylabel("Acceleration (m/s²)")
plt.legend()
plt.tight_layout()
plt.show()

In [None]:
fig = plt.figure(figsize=(10, 6))
ax = fig.add_subplot(111, projection='3d')
ax.plot(sample_seq['acc_x'], sample_seq['acc_y'], sample_seq['acc_z'], color='blue')
ax.set_title("3D Accelerometer Trajectory")
ax.set_xlabel("acc_x")
ax.set_ylabel("acc_y")
ax.set_zlabel("acc_z")
plt.tight_layout()
plt.show()

In [None]:
thm_cols = [col for col in train.columns if col.startswith('thm')]
sample_seq[thm_cols].plot(figsize=(12, 6), title='Thermopile Sensor Time Series')
plt.xlabel("Time")
plt.ylabel("Temperature (°C)")
plt.show()

In [None]:
tof_grid = sample_seq.iloc[50]  # middle of gesture
tof1 = [tof_grid[f"tof_1_v{i}"] for i in range(64)]
tof1_grid = np.array(tof1).reshape(8, 8)

plt.figure(figsize=(6, 5))
sns.heatmap(tof1_grid, cmap="viridis", annot=False, cbar=True)
plt.title("ToF Sensor 1 (Frame at Step 50)")
plt.show()

In [None]:
x = np.arange(8)
y = np.arange(8)
x, y = np.meshgrid(x, y)
z = tof1_grid

fig = go.Figure(data=[go.Surface(z=z, x=x, y=y)])
fig.update_layout(title="3D ToF Surface - Sensor 1", autosize=False,
                  width=500, height=500, margin=dict(l=65, r=50, b=65, t=90))
fig.show()

In [None]:
from scipy.spatial.transform import Rotation as R

rot = sample_seq[['rot_w', 'rot_x', 'rot_y', 'rot_z']].values
euler_angles = R.from_quat(rot).as_euler('xyz', degrees=True)

plt.figure(figsize=(12, 5))
plt.plot(euler_angles)
plt.title("Euler Angles Converted from Quaternion")
plt.legend(['Roll', 'Pitch', 'Yaw'])
plt.xlabel("Time Step")
plt.ylabel("Degrees")
plt.show()

In [None]:
sample_seq = sample_seq.copy()
plt.figure(figsize=(15, 8))

# Plot acc_x with phase info
sns.lineplot(data=sample_seq, x='sequence_counter', y='acc_x', hue='behavior')
plt.title("acc_x Over Time by Phase")
plt.show()

In [None]:
# Analyze differences between behavior phases
phase_analysis = {}
for behavior in train_df['behavior'].unique():
    phase_data = train_df[train_df['behavior'] == behavior]
    
    phase_analysis[behavior] = {
        'count': len(phase_data),
        'avg_duration': len(phase_data) / phase_data['sequence_id'].nunique(),
        'imu_activity': phase_data[imu_cols].std().mean()
    }

phase_df = pd.DataFrame(phase_analysis).T
print("Behavior phase analysis:")
print(phase_df)

In [None]:
# FFT of acc_x for sample_seq
plot_fft(sample_seq['acc_x'].values, title="FFT of acc_x (Single Sequence)")

In [None]:
# FFT of acc_y for sample_seq
plot_fft(sample_seq['acc_y'].values, title="FFT of acc_y (Single Sequence)")

In [None]:
# FFT of acc_z for sample_seq
plot_fft(sample_seq['acc_z'].values, title="FFT of acc_z (Single Sequence)")

In [None]:
def extract_features(df, sensor_cols):
    grouped = df.groupby("sequence_id")
    features = grouped[sensor_cols].agg(['mean', 'std', 'min', 'max', 'skew']).reset_index()
    features.columns = ['_'.join(col).strip() for col in features.columns.values]
    return features

sensor_cols = ['acc_x', 'acc_y', 'acc_z']
acc_features = extract_features(train, sensor_cols)

# Merge with gesture labels
gesture_map = train[['sequence_id', 'gesture']].drop_duplicates()
acc_features = acc_features.merge(gesture_map, left_on='sequence_id_', right_on='sequence_id', how='left')

# mean acc_x by gesture
plt.figure(figsize=(14, 6))
sns.boxplot(data=acc_features, x='gesture', y='acc_x_mean')
plt.xticks(rotation=90)
plt.title("Mean acc_x per Gesture")
plt.tight_layout()
plt.show()

In [None]:
# mean acc_x by gesture
plt.figure(figsize=(14, 6))
sns.boxplot(data=acc_features, x='gesture', y='acc_x_skew')
plt.xticks(rotation=90)
plt.title("Skewed acc_x per Gesture")
plt.tight_layout()
plt.show()

In [None]:
# mean acc_y by gesture
plt.figure(figsize=(14, 6))
sns.boxplot(data=acc_features, x='gesture', y='acc_y_mean')
plt.xticks(rotation=90)
plt.title("Mean acc_y per Gesture")
plt.tight_layout()
plt.show()

In [None]:
# mean acc_y by gesture
plt.figure(figsize=(14, 6))
sns.boxplot(data=acc_features, x='gesture', y='acc_y_skew')
plt.xticks(rotation=90)
plt.title("Skewed acc_y per Gesture")
plt.tight_layout()
plt.show()

In [None]:
# mean acc_z by gesture
plt.figure(figsize=(14, 6))
sns.boxplot(data=acc_features, x='gesture', y='acc_z_mean')
plt.xticks(rotation=90)
plt.title("Mean acc_z per Gesture")
plt.tight_layout()
plt.show()

In [None]:
# mean acc_z by gesture
plt.figure(figsize=(14, 6))
sns.boxplot(data=acc_features, x='gesture', y='acc_z_skew')
plt.xticks(rotation=90)
plt.title("Skewed acc_z per Gesture")
plt.tight_layout()
plt.show()

In [None]:
gesture_subject = train.groupby(['subject', 'gesture'])['sequence_id'].nunique().unstack(fill_value=0)
plt.figure(figsize=(14, 14))
sns.heatmap(gesture_subject, annot=True, fmt='d', cmap="Blues")
plt.title("Gesture Count per Subject")
plt.xlabel("Gesture")
plt.ylabel("Subject")
plt.tight_layout()
plt.show()

In [None]:
import matplotlib.pyplot as plt
import matplotlib.animation as animation
from IPython.display import HTML

fig, ax = plt.subplots(figsize=(5, 5))

def animate(i):
    ax.clear()
    frame = sample_seq.iloc[i]
    tof_data = np.array([frame[f"tof_1_v{j}"] for j in range(64)]).reshape(8, 8)
    sns.heatmap(tof_data, ax=ax, cbar=False, vmin=0, vmax=254, cmap='viridis')
    ax.set_title(f"ToF Sensor 1 - Frame {i}")

ani = animation.FuncAnimation(fig, animate, frames=min(60, len(sample_seq)), interval=200)

# Display in notebook
HTML(ani.to_jshtml())

In [None]:
# Count number of missing values per sequence in ToF
tof_cols = [col for col in train.columns if col.startswith("tof_")]
sensor_completeness = train.groupby("sequence_id")[tof_cols].apply(lambda x: (x == -1).mean().mean()).reset_index()
sensor_completeness.columns = ['sequence_id', 'tof_missing_ratio']

plt.hist(sensor_completeness['tof_missing_ratio'], bins=30)
plt.title("Distribution of Missing Ratio in ToF Sensors")
plt.xlabel("Missing Ratio")
plt.ylabel("Count of Sequences")
plt.show()