In [184]:
import pandas as pd
from pathlib import Path
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix

DATA_ROOT = Path("../data")
all_batches = []

for batch_dir in sorted(DATA_ROOT.glob("batch_*")):
    data = pd.read_csv(batch_dir / "WristMotion.csv").sort_values("time")
    labels = pd.read_csv(batch_dir / "Annotation.csv").sort_values("time")

    data["batch_id"] = batch_dir.name
    labels["label"] = labels["text"].map({"no": 0, "yes": 1})

    aligned = pd.merge_asof(
        data, labels[["time", "label"]], on="time", direction="backward"
    )

    aligned = aligned.dropna(subset=["label"])
    aligned["label"] = aligned["label"].astype(int)

    all_batches.append(aligned)

final_df = pd.concat(all_batches, ignore_index=True)


In [185]:
import numpy as np

FEATURES = [
    "accelerationX",
    "accelerationY",
    "accelerationZ",
    "rotationRateX",
    "rotationRateY",
    "rotationRateZ",
    "accelerationMagnitude",
]

WINDOW_SIZE = 200
STEP_SIZE = 50

X, y, window_batch_ids = [], [], []

for batch_id, batch_df in final_df.groupby("batch_id"):
    batch_df = batch_df.reset_index(drop=True)

    for start in range(0, len(batch_df) - WINDOW_SIZE, STEP_SIZE):
        window = batch_df.iloc[start : start + WINDOW_SIZE]

        label = int(window["label"].mean() >= 0.5)

        X.append(window[FEATURES].values)
        y.append(label)
        window_batch_ids.append(batch_id)

X = np.array(X)
y = np.array(y)
window_batch_ids = np.array(window_batch_ids)

print(X.shape, y.shape)


def extract_features(X):
    feats = []
    for window in X:
        mean = window.mean(axis=0)
        std = window.std(axis=0)
        maxv = window.max(axis=0)
        energy = np.sum(window**2, axis=0)
        feats.append(np.concatenate([mean, std, maxv, energy]))
    return np.array(feats)


X_feat = extract_features(X)


(1687, 200, 7) (1687,)


In [186]:
test_batch = "batch_02"  # example

train_idx = window_batch_ids != test_batch
test_idx = window_batch_ids == test_batch

X_train = X_feat[train_idx]
y_train = y[train_idx]

X_test = X_feat[test_idx]
y_test = y[test_idx]

print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

(1071, 28) (1071,)
(616, 28) (616,)


In [187]:
print("Train batches:", np.unique(window_batch_ids[train_idx]))
print("Test batches:", np.unique(window_batch_ids[test_idx]))


Train batches: ['batch_01' 'batch_03' 'batch_04']
Test batches: ['batch_02']


In [188]:
model = RandomForestClassifier(
    n_estimators=400, max_depth=8, class_weight="balanced", random_state=42
)

model.fit(X_train, y_train)

probs = model.predict(X_test)
THRESHOLD = 0.6  # tune this
y_pred_raw = (probs >= THRESHOLD).astype(int)


# Temporal smoothing (MAJOR PRECISION BOOST)
# Majority vote over last N windows
def smooth_predictions(preds, window=3):
    smoothed = []
    for i in range(len(preds)):
        start = max(0, i - window + 1)
        smoothed.append(int(np.mean(preds[start : i + 1]) >= 0.5))
    return np.array(smoothed)


y_pred = smooth_predictions(y_pred_raw, window=3)

print("Classification Report:")
print(classification_report(y_test, y_pred))

print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))


Classification Report:
              precision    recall  f1-score   support

           0       0.90      0.90      0.90       456
           1       0.72      0.72      0.72       160

    accuracy                           0.85       616
   macro avg       0.81      0.81      0.81       616
weighted avg       0.85      0.85      0.85       616

Confusion Matrix:
[[410  46]
 [ 44 116]]


In [102]:
importances = model.feature_importances_
feature_names = [f"{f}_mean" for f in FEATURES] + [f"{f}_std" for f in FEATURES]

for name, val in sorted(zip(feature_names, importances), key=lambda x: -x[1]):
    print(f"{name}: {val:.3f}")


accelerationX_mean: 0.092
rotationRateZ_mean: 0.059
rotationRateX_std: 0.056
rotationRateY_std: 0.049
accelerationZ_mean: 0.047
accelerationY_std: 0.040
accelerationX_std: 0.036
accelerationZ_std: 0.034
rotationRateZ_std: 0.029
rotationRateX_mean: 0.018
accelerationY_mean: 0.015
rotationRateY_mean: 0.011
accelerationMagnitude_mean: 0.000
accelerationMagnitude_std: 0.000
