In [1]:
# =========================================================
#  EMG + IMU Gesture Training Pipeline
#  CNN vs LSTM (Research Comparison)
#  FULL CLEAN VERSION — NO PATCHING REQUIRED
# =========================================================

!pip install -q tensorflow scikit-learn joblib tqdm

import os, zipfile, glob, joblib
import numpy as np
import pandas as pd
from tqdm import tqdm

from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

import tensorflow as tf
from tensorflow.keras import layers, models

# =========================================================
# CONFIG
# =========================================================

DATASET_ZIP = "/content/v2.zip"   # upload your zip here
EXTRACT_DIR = "/content"
DATASET_DIR = "/content/v2"

SEQ_LEN = 200
FEATURES = ["emg1","emg2","emg3","ax","ay","az","gx","gy","gz"]

# =========================================================
# EXTRACT DATASET
# =========================================================

if not os.path.exists(DATASET_DIR):
    with zipfile.ZipFile(DATASET_ZIP, 'r') as z:
        z.extractall(EXTRACT_DIR)

print("Dataset root:", DATASET_DIR)

# =========================================================
# PAD FUNCTION
# =========================================================

def pad_sequence(x):
    if len(x) > SEQ_LEN:
        return x[:SEQ_LEN]
    pad = np.zeros((SEQ_LEN-len(x), x.shape[1]))
    return np.vstack([x, pad])

# =========================================================
# LOAD DATA (ROBUST VERSION)
# =========================================================

X, y = [], []

files = glob.glob(DATASET_DIR + "/*/*/*.txt")
print("Total files found:", len(files))

for file in tqdm(files):

    try:
        # auto detect separator (comma/space/tab)
        df = pd.read_csv(file, sep=None, engine="python")

        # skip broken files
        if df.shape[1] < 10:
            continue

        # assign columns
        df.columns = ["t","emg1","emg2","emg3","ax","ay","az","gx","gy","gz"]

        seq = pad_sequence(df[FEATURES].values)

        gesture = file.split("/")[-3]   # folder name is label

        X.append(seq)
        y.append(gesture)

    except:
        pass


X = np.array(X)
y = np.array(y)

print("Loaded samples:", len(X))
print("Data shape:", X.shape)

assert len(X) > 0, "❌ No samples loaded. Check dataset path or format."

# =========================================================
# NORMALIZE
# =========================================================

scaler = StandardScaler()

X_2d = X.reshape(-1, X.shape[-1])
X_2d = scaler.fit_transform(X_2d)
X = X_2d.reshape(X.shape)

joblib.dump(scaler, "scaler.pkl")

# =========================================================
# ENCODE LABELS
# =========================================================

le = LabelEncoder()
y = le.fit_transform(y)

joblib.dump(le, "label_map.pkl")

print("Classes:", le.classes_)

# =========================================================
# SPLIT
# =========================================================

X_train, X_temp, y_train, y_temp = train_test_split(
    X, y, test_size=0.3, stratify=y, random_state=42
)

X_val, X_test, y_val, y_test = train_test_split(
    X_temp, y_temp, test_size=0.5, stratify=y_temp, random_state=42
)

print("Train:", X_train.shape)
print("Val:", X_val.shape)
print("Test:", X_test.shape)

# =========================================================
# ================= CNN MODEL =============================
# =========================================================

cnn_model = models.Sequential([
    layers.Conv1D(64,3,activation='relu',input_shape=(SEQ_LEN,9)),
    layers.MaxPooling1D(2),

    layers.Conv1D(128,3,activation='relu'),
    layers.MaxPooling1D(2),

    layers.Conv1D(256,3,activation='relu'),
    layers.GlobalAveragePooling1D(),

    layers.Dense(128,activation='relu'),
    layers.Dropout(0.3),
    layers.Dense(len(le.classes_),activation='softmax')
])

cnn_model.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

print("\nTraining CNN model...")
cnn_model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=20,
    batch_size=32,
    verbose=1
)

cnn_pred = np.argmax(cnn_model.predict(X_test), axis=1)
cnn_acc = accuracy_score(y_test, cnn_pred)

print("\nCNN Accuracy:", cnn_acc)
print(classification_report(y_test, cnn_pred))

cnn_model.save("cnn_model.h5")


# =========================================================
# ================= LSTM MODEL ============================
# =========================================================

lstm_model = models.Sequential([
    layers.LSTM(128, return_sequences=True, input_shape=(SEQ_LEN,9)),
    layers.Dropout(0.3),

    layers.LSTM(128),
    layers.Dropout(0.3),

    layers.Dense(128,activation='relu'),
    layers.Dense(len(le.classes_),activation='softmax')
])

lstm_model.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

print("\nTraining LSTM model...")
lstm_model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=20,
    batch_size=32,
    verbose=1
)

lstm_pred = np.argmax(lstm_model.predict(X_test), axis=1)
lstm_acc = accuracy_score(y_test, lstm_pred)

print("\nLSTM Accuracy:", lstm_acc)
print(classification_report(y_test, lstm_pred))

lstm_model.save("lstm_model.h5")


# =========================================================
# FINAL COMPARISON
# =========================================================

print("\n==============================")
print("FINAL ACCURACY COMPARISON")
print("==============================")
print("CNN  :", cnn_acc)
print("LSTM :", lstm_acc)
print("==============================")


Dataset root: /content/v2
Total files found: 1000


100%|██████████| 1000/1000 [00:08<00:00, 116.81it/s]
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Loaded samples: 722
Data shape: (722, 200, 9)
Classes: ['ada' 'awidinawa' 'boru' 'hawasa' 'hodai' 'irida' 'narakai' 'pata'
 'saduda' 'udasana']
Train: (505, 200, 9)
Val: (108, 200, 9)
Test: (109, 200, 9)

Training CNN model...
Epoch 1/20
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 56ms/step - accuracy: 0.1648 - loss: 2.2767 - val_accuracy: 0.3241 - val_loss: 2.0374
Epoch 2/20
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 42ms/step - accuracy: 0.3511 - loss: 1.9632 - val_accuracy: 0.3889 - val_loss: 1.7004
Epoch 3/20
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 42ms/step - accuracy: 0.3950 - loss: 1.6515 - val_accuracy: 0.4259 - val_loss: 1.6026
Epoch 4/20
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 43ms/step - accuracy: 0.4373 - loss: 1.5277 - val_accuracy: 0.4722 - val_loss: 1.5360
Epoch 5/20
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 42ms/step - accuracy: 0.5066 - loss: 1.3666 - val_accu




CNN Accuracy: 0.6697247706422018
              precision    recall  f1-score   support

           0       0.41      0.75      0.53        12
           1       0.62      0.67      0.64        12
           2       1.00      0.55      0.71        11
           3       0.80      0.36      0.50        11
           4       0.89      0.67      0.76        12
           5       0.62      0.62      0.62         8
           6       0.56      0.56      0.56         9
           7       0.92      1.00      0.96        11
           8       0.80      0.67      0.73        12
           9       0.60      0.82      0.69        11

    accuracy                           0.67       109
   macro avg       0.72      0.67      0.67       109
weighted avg       0.73      0.67      0.67       109


Training LSTM model...
Epoch 1/20


  super().__init__(**kwargs)


[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 496ms/step - accuracy: 0.1914 - loss: 2.2378 - val_accuracy: 0.3241 - val_loss: 1.9185
Epoch 2/20
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 543ms/step - accuracy: 0.3563 - loss: 1.8084 - val_accuracy: 0.3704 - val_loss: 1.7293
Epoch 3/20
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 542ms/step - accuracy: 0.4434 - loss: 1.5343 - val_accuracy: 0.3981 - val_loss: 1.6009
Epoch 4/20
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 483ms/step - accuracy: 0.5041 - loss: 1.3691 - val_accuracy: 0.4444 - val_loss: 1.5941
Epoch 5/20
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 537ms/step - accuracy: 0.5273 - loss: 1.2526 - val_accuracy: 0.4815 - val_loss: 1.4775
Epoch 6/20
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 539ms/step - accuracy: 0.5936 - loss: 1.1640 - val_accuracy: 0.5370 - val_loss: 1.3743
Epoch 7/20
[1m16/16[0m [32m━━━━━━━




LSTM Accuracy: 0.6146788990825688
              precision    recall  f1-score   support

           0       0.50      0.58      0.54        12
           1       0.67      0.50      0.57        12
           2       0.73      0.73      0.73        11
           3       0.36      0.36      0.36        11
           4       0.78      0.58      0.67        12
           5       1.00      0.62      0.77         8
           6       0.45      0.56      0.50         9
           7       0.90      0.82      0.86        11
           8       0.70      0.58      0.64        12
           9       0.47      0.82      0.60        11

    accuracy                           0.61       109
   macro avg       0.66      0.62      0.62       109
weighted avg       0.65      0.61      0.62       109


FINAL ACCURACY COMPARISON
CNN  : 0.6697247706422018
LSTM : 0.6146788990825688
