In [2]:
!pip install -q scikeras joblib

In [33]:
import os
import numpy as np
import pandas as pd

from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import (
    Conv1D, MaxPooling1D, LSTM,
    Dense, Dropout, BatchNormalization
)
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

In [4]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("niloy333/kuhar")

print("Path to dataset files:", path)

Downloading from https://www.kaggle.com/api/v1/datasets/download/niloy333/kuhar?dataset_version_number=1...


100%|██████████| 460M/460M [00:08<00:00, 56.7MB/s]

Extracting files...





Path to dataset files: /root/.cache/kagglehub/datasets/niloy333/kuhar/versions/1


In [6]:
BASE_PATH = "/root/.cache/kagglehub/datasets/niloy333/kuhar/versions/1/2.Trimmed_interpolated_data"

In [16]:
CLASS_MAP = {
    "Walk": ["11.Walk", "12.Walk-backwards", "13.Walk-circle"],
    "Stairs": ["15.Stair-up", "16.Stair-down"],
    "Static": ["0.Stand", "1.Sit", "5.Lay"],
    "Transitions": ["4.Stand-sit", "6.Lay-stand"],
    "Exercise": ["8.Jump", "9.Push-up", "10.Sit-up", "14.Run"]
}


In [17]:
activity_to_group = {}
for group, acts in CLASS_MAP.items():
    for act in acts:
        activity_to_group[act] = group


In [18]:
X = []
y = []

for folder in sorted(os.listdir(BASE_PATH)):
    folder_path = os.path.join(BASE_PATH, folder)

    if folder not in activity_to_group:
        continue

    label = activity_to_group[folder]

    for file in os.listdir(folder_path):
        if file.endswith(".csv"):
            df = pd.read_csv(os.path.join(folder_path, file))
            df = df.select_dtypes(include=[np.number])
            X.append(df.values)
            y.append(label)


In [19]:
TIMESTEPS = 128   # good balance
X = np.array([sample[:TIMESTEPS] for sample in X if sample.shape[0] >= TIMESTEPS])
y = y[:len(X)]

print("X shape:", X.shape)  # (samples, timesteps, channels)


X shape: (1647, 128, 8)


In [32]:
num_samples, timesteps, channels = X.shape

X_reshaped = X.reshape(-1, channels)

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_reshaped)

X = X_scaled.reshape(num_samples, timesteps, channels)


In [27]:
le = LabelEncoder()
y_encoded = le.fit_transform(y)
y_cat = to_categorical(y_encoded)

num_classes = y_cat.shape[1]
print("Classes:", le.classes_)


Classes: ['Exercise' 'Stairs' 'Static' 'Transitions' 'Walk']


In [29]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y_cat,
    test_size=0.2,
    random_state=42,
    stratify=y_encoded
)


In [30]:
model = Sequential([
    Conv1D(64, kernel_size=3, activation="relu", input_shape=(TIMESTEPS, channels)),
    BatchNormalization(),
    MaxPooling1D(2),

    Conv1D(128, kernel_size=3, activation="relu"),
    BatchNormalization(),
    MaxPooling1D(2),

    LSTM(64),
    Dropout(0.4),

    Dense(64, activation="relu"),
    Dropout(0.4),

    Dense(num_classes, activation="softmax")
])

model.compile(
    optimizer="adam",
    loss="categorical_crossentropy",
    metrics=["accuracy"]
)

model.summary()


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [34]:
callbacks = [
    EarlyStopping(monitor="val_loss", patience=5, restore_best_weights=True),
    ReduceLROnPlateau(monitor="val_loss", factor=0.5, patience=3)
]

history = model.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=40,
    batch_size=32,
    callbacks=callbacks
)

Epoch 1/40
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 72ms/step - accuracy: 0.4258 - loss: 1.3445 - val_accuracy: 0.5394 - val_loss: 1.3285 - learning_rate: 0.0010
Epoch 2/40
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 47ms/step - accuracy: 0.6697 - loss: 0.9096 - val_accuracy: 0.6364 - val_loss: 1.2710 - learning_rate: 0.0010
Epoch 3/40
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 46ms/step - accuracy: 0.7667 - loss: 0.6672 - val_accuracy: 0.6152 - val_loss: 1.3370 - learning_rate: 0.0010
Epoch 4/40
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 43ms/step - accuracy: 0.7802 - loss: 0.5640 - val_accuracy: 0.6091 - val_loss: 1.1746 - learning_rate: 0.0010
Epoch 5/40
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 42ms/step - accuracy: 0.8168 - loss: 0.5157 - val_accuracy: 0.6394 - val_loss: 0.7814 - learning_rate: 0.0010
Epoch 6/40
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 

In [35]:
y_pred = model.predict(X_test)
y_pred_cls = np.argmax(y_pred, axis=1)
y_true = np.argmax(y_test, axis=1)

print("Accuracy:", accuracy_score(y_true, y_pred_cls))
print(classification_report(y_true, y_pred_cls, target_names=le.classes_))


[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 101ms/step
Accuracy: 0.8787878787878788
              precision    recall  f1-score   support

    Exercise       0.92      0.90      0.91       102
      Stairs       0.67      0.64      0.65        22
      Static       0.86      1.00      0.92        54
 Transitions       0.91      0.82      0.86        97
        Walk       0.86      0.91      0.88        55

    accuracy                           0.88       330
   macro avg       0.84      0.85      0.85       330
weighted avg       0.88      0.88      0.88       330



In [36]:
model.save("kuhar_cnn_lstm_fixed.h5")

import pickle
with open("kuhar_label_encoder.pkl", "wb") as f:
    pickle.dump(le, f)

with open("kuhar_scaler.pkl", "wb") as f:
    pickle.dump(scaler, f)




In [38]:
from google.colab import files
files.download("kuhar_label_encoder.pkl")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>