In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pickle, os

import tensorflow as tf
from tensorflow import keras

from keras.models import Sequential
from keras.layers import LSTM, Dense, GRU, Dropout, Bidirectional

from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
from sklearn.model_selection import train_test_split
from sklearn.svm import *


In [3]:
VERSION = 'v1'

In [4]:
df = pd.read_csv('../../dataset/csv/dataset_v1.csv')
df.head(3)


Unnamed: 0,class,x1,y1,z1,v1,x2,y2,z2,v2,x3,...,y74,z74,v74,x75,y75,z75,v75,face_d75,face_s75,hand_s75
0,celinguk,0.452516,0.177898,-0.904605,0.999882,0.51665,0.137372,-0.673349,0.999341,0.422173,...,0.609458,-0.498302,0.967,0.330829,0.609458,-0.498302,0.967,2.0,0.0,0.0
1,celinguk,0.687073,0.188956,-0.612002,0.996062,0.648438,0.158911,-0.388706,0.987777,0.60119,...,0.525487,-0.142604,0.941713,0.311903,0.525487,-0.142604,0.941713,0.0,0.0,0.0
2,celinguk,0.48578,0.217672,-0.78086,0.99651,0.578979,0.16856,-0.718592,0.986905,0.490544,...,0.577168,-0.192743,0.966556,0.330957,0.577168,-0.192743,0.966556,2.0,0.0,0.0


In [5]:
df['class'].value_counts()



class
celinguk       50
diam           50
nodong         50
tutupmuka      50
tutuptangan    50
Name: count, dtype: int64

In [6]:
X = df.drop('class', axis=1) # features
y = df['class'] # target value

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=1234)

print(f"Train set size: {X_train.shape[0]}")
print(f"Test set size: {X_test.shape[0]}")
y_test

Train set size: 212
Test set size: 38


202    tutuptangan
94            diam
31        celinguk
246    tutuptangan
215    tutuptangan
6         celinguk
91            diam
73            diam
129         nodong
5         celinguk
136         nodong
111         nodong
161      tutupmuka
74            diam
48        celinguk
65            diam
7         celinguk
218    tutuptangan
32        celinguk
63            diam
132         nodong
97            diam
51            diam
122         nodong
9         celinguk
145         nodong
110         nodong
196      tutupmuka
52            diam
238    tutuptangan
29        celinguk
131         nodong
125         nodong
141         nodong
235    tutuptangan
164      tutupmuka
245    tutuptangan
95            diam
Name: class, dtype: object

# Training Model : LSTM SINGLE

In [7]:
# Separate features (X) and labels (y)
X_raw = df.iloc[:, 1:].values
y_raw = df['class'].values

# Encode Labels (for multi-class classification)
# Assuming 'diam' is just one class and there might be others in a full dataset
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y_raw)
N_CLASSES = len(label_encoder.classes_)

# Scale Features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_raw)

In [8]:
# Reshape for LSTM (Single Timestep) ---
# Format: [samples, timesteps, features]
N_SAMPLES = X_scaled.shape[0]
N_FEATURES_PER_SAMPLE = X_scaled.shape[1]
N_TIMESTEPS = 1 # Each row is treated as one observation with one timestep

X_reshaped = X_scaled.reshape(N_SAMPLES, N_TIMESTEPS, N_FEATURES_PER_SAMPLE)

In [9]:
if N_CLASSES > 2:
    y_final = tf.keras.utils.to_categorical(y_encoded, num_classes=N_CLASSES)
    loss_fn = 'categorical_crossentropy'
    output_activation = 'softmax'
else:
    # If it's effectively binary (N_CLASSES=2), we still use sparse_categorical_crossentropy 
    # since we kept y_encoded as integer labels
    y_final = y_encoded
    loss_fn = 'sparse_categorical_crossentropy'
    # If N_CLASSES=2, the Dense layer should have 1 unit with 'sigmoid' for binary_crossentropy
    # or N_CLASSES units with 'softmax' for categorical_crossentropy. 
    # Since we use sparse_categorical_crossentropy with integer labels, N_CLASSES with 'softmax' is safest.
    output_activation = 'softmax'

In [10]:
X_train, X_test, y_train, y_test = train_test_split(
    X_reshaped, y_final, test_size=0.2, random_state=42, stratify=y_final
)

In [11]:
lstm_model = Sequential([
    # Single LSTM Layer (no return_sequences since it's the last recurrent layer)
    LSTM(units=128, activation='relu', input_shape=(N_TIMESTEPS, N_FEATURES_PER_SAMPLE)),
    Dropout(0.3),
    # Output Dense layer
    Dense(units=N_CLASSES, activation=output_activation)
])

I0000 00:00:1761546666.822386   74247 gpu_device.cc:2020] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 3539 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 4050 Laptop GPU, pci bus id: 0000:01:00.0, compute capability: 8.9
  super().__init__(**kwargs)


In [12]:
lstm_model.compile(
    optimizer='adam',
    loss=loss_fn,
    metrics=['accuracy']
)

print(f"\nModel Input Shape: {X_train.shape[1:]}")
print(f"Model Output Classes: {N_CLASSES}")
print("-" * 30)
lstm_model.summary()
print("-" * 30)

# Training (using the split training data)
print("Starting Model Training...")
lstm_model.fit(
    X_train,
    y_train,
    epochs=10,
    batch_size=4,
    verbose=0
)
print("Model Training Complete.")
print("-" * 30)


Model Input Shape: (1, 345)
Model Output Classes: 5
------------------------------


------------------------------
Starting Model Training...


2025-10-27 14:31:11.172316: I external/local_xla/xla/service/service.cc:163] XLA service 0x7690f8003c50 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2025-10-27 14:31:11.172379: I external/local_xla/xla/service/service.cc:171]   StreamExecutor device (0): NVIDIA GeForce RTX 4050 Laptop GPU, Compute Capability 8.9
2025-10-27 14:31:11.287535: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2025-10-27 14:31:11.628126: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:473] Loaded cuDNN version 91002
I0000 00:00:1761546673.873871   74726 device_compiler.h:196] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


Model Training Complete.
------------------------------


In [14]:
# Since TensorFlow models don't pickle reliably, we save the trained 
# Keras model separately and then pickle a dictionary containing the necessary 
# preprocessing objects.

PICKLE_FILENAME = f'lstm_model_{VERSION}.pkl'
KERAS_MODEL_FILENAME = f'../../model/trained/lstm_s/single_lstm_weights_{VERSION}.keras'

# 8a. Save the Keras model in its native format
lstm_model.save(KERAS_MODEL_FILENAME)

# 8b. Create a dictionary of necessary artifacts
artifacts = {
    'scaler': scaler,
    'label_encoder': label_encoder,
    'model_filename': KERAS_MODEL_FILENAME,
    'input_shape': X_train.shape[1:],
    'num_classes': N_CLASSES
}

# 8c. Pickle the artifacts dictionary
with open(f'../../model/trained/lstm_s/{PICKLE_FILENAME}', 'wb') as file:
    pickle.dump(artifacts, file)

print(f"Preprocessing artifacts (Scaler, Encoder) saved to: {PICKLE_FILENAME}")
print(f"Trained Keras model weights saved to: {KERAS_MODEL_FILENAME}")
print("\nTo load and use this model, you'll need both files and the TensorFlow code to load the .keras file.")

Preprocessing artifacts (Scaler, Encoder) saved to: lstm_model_v1.pkl
Trained Keras model weights saved to: ../../model/trained/lstm_s/single_lstm_weights_v1.keras

To load and use this model, you'll need both files and the TensorFlow code to load the .keras file.


# BILSTM : Training Model

In [15]:
bilstm_model = Sequential([
    # Bi-LSTM Layer: Wrap LSTM in Bidirectional
    Bidirectional(LSTM(
        units=128,          # Number of units for EACH direction (Forward and Backward)
        activation='relu'
    ), input_shape=(N_TIMESTEPS, N_FEATURES_PER_SAMPLE)),
    
    Dropout(0.3),
    
    # Dense layers for classification
    Dense(units=64, activation='relu'),
    Dense(units=N_CLASSES, activation=output_activation)
])

  super().__init__(**kwargs)


In [16]:
bilstm_model.compile(
    optimizer='adam',
    loss=loss_fn,
    metrics=['accuracy']
)

print(f"Model Input Shape: {X_train.shape[1:]}")
print(f"Model Output Classes: {N_CLASSES}")
print("-" * 50)
bilstm_model.summary()
print("-" * 50)

print("Starting BiLSTM Model Training...")
# Train the model
bilstm_model.fit(
    X_train,
    y_train,
    epochs=10,
    batch_size=4,
    validation_data=(X_test, y_test),
    verbose=0
)
print("Model Training Complete.")
print("-" * 50)

# Evaluate the model
loss, accuracy = bilstm_model.evaluate(X_test, y_test, verbose=0)
print(f"Test Accuracy: {accuracy*100:.2f}%")
print("-" * 50)

Model Input Shape: (1, 345)
Model Output Classes: 5
--------------------------------------------------


--------------------------------------------------
Starting BiLSTM Model Training...
Model Training Complete.
--------------------------------------------------


2025-10-27 14:32:22.929258: I external/local_xla/xla/service/gpu/autotuning/dot_search_space.cc:208] All configs were filtered out because none of them sufficiently match the hints. Maybe the hints set does not contain a good representative set of valid configs? Working around this by using the full hints set instead.
2025-10-27 14:32:25.432033: I external/local_xla/xla/service/gpu/autotuning/dot_search_space.cc:208] All configs were filtered out because none of them sufficiently match the hints. Maybe the hints set does not contain a good representative set of valid configs? Working around this by using the full hints set instead.


Test Accuracy: 98.00%
--------------------------------------------------


In [17]:

PICKLE_FILENAME = f'bi_lstm_model_{VERSION}.pkl'
KERAS_MODEL_FILENAME = f'bi_lstm_weights_{VERSION}.keras'# Recommended native Keras format

# 4a. Save the Keras model in its native format
bilstm_model.save(f'../../model/trained/lstm_bi/{KERAS_MODEL_FILENAME}')

# 4b. Create a dictionary of necessary artifacts (Scaler, Encoder, Model Path)
artifacts = {
    'scaler': scaler,
    'label_encoder': label_encoder,
    'model_filename': KERAS_MODEL_FILENAME,
    'input_shape': X_train.shape[1:],
    'num_classes': N_CLASSES
}

# 4c. Pickle the artifacts dictionary
with open(f'../../model/trained/lstm_bi/{PICKLE_FILENAME}', 'wb') as file:
    pickle.dump(artifacts, file)

print(f"✅ BiLSTM Keras model weights saved to: {KERAS_MODEL_FILENAME}")
print(f"✅ Preprocessing artifacts (Scaler, Encoder) saved to: {PICKLE_FILENAME}")

✅ BiLSTM Keras model weights saved to: bi_lstm_weights_v1.keras
✅ Preprocessing artifacts (Scaler, Encoder) saved to: bi_lstm_model_v1.pkl


# GRU : Training Model

In [18]:
# --- 2. Define the Single-Layer GRU Model ---
gru_model = Sequential([
    # GRU Layer - No Bidirectional wrapper requested, so it's unidirectional.
    GRU(
        units=128,          # Number of units in the GRU
        activation='tanh',  # GRU default activation
        input_shape=(N_TIMESTEPS, N_FEATURES_PER_SAMPLE)
    ),
    
    Dropout(0.3),
    
    # Dense layers for classification
    Dense(units=64, activation='relu'),
    Dense(units=N_CLASSES, activation=output_activation)
])

In [19]:
gru_model.compile(
    optimizer='adam',
    loss=loss_fn,
    metrics=['accuracy']
)

print(f"Model Input Shape: {X_train.shape[1:]}")
print(f"Model Output Classes: {N_CLASSES}")
print("-" * 50)
gru_model.summary()
print("-" * 50)

print("Starting GRU Model Training...")
# Train the model
gru_model.fit(
    X_train,
    y_train,
    epochs=10,
    batch_size=4,
    validation_data=(X_test, y_test),
    verbose=0
)
print("Model Training Complete.")
print("-" * 50)

# Evaluate the model
loss, accuracy = gru_model.evaluate(X_test, y_test, verbose=0)
print(f"Test Accuracy: {accuracy*100:.2f}%")
print("-" * 50)

Model Input Shape: (1, 345)
Model Output Classes: 5
--------------------------------------------------


--------------------------------------------------
Starting GRU Model Training...
Model Training Complete.
--------------------------------------------------
Test Accuracy: 98.00%
--------------------------------------------------


In [20]:

PICKLE_FILENAME = f'gru_model_{VERSION}.pkl'
KERAS_MODEL_FILENAME = f'gru_weights_{VERSION}.keras'# Recommended native Keras format

# 4a. Save the Keras model in its native format
gru_model.save(f'../../model/trained/gru/{KERAS_MODEL_FILENAME}')

# 4b. Create a dictionary of necessary artifacts (Scaler, Encoder, Model Path)
artifacts = {
    'scaler': scaler,
    'label_encoder': label_encoder,
    'model_filename': KERAS_MODEL_FILENAME,
    'input_shape': X_train.shape[1:],
    'num_classes': N_CLASSES
}

# 4c. Pickle the artifacts dictionary
with open(f'../../model/trained/gru/{PICKLE_FILENAME}', 'wb') as file:
    pickle.dump(artifacts, file)

print(f"✅ GRU Keras model weights saved to: {KERAS_MODEL_FILENAME}")
print(f"✅ Preprocessing artifacts (Scaler, Encoder) saved to: {PICKLE_FILENAME}")

✅ GRU Keras model weights saved to: gru_weights_v1.keras
✅ Preprocessing artifacts (Scaler, Encoder) saved to: gru_model_v1.pkl


# NN : Training Model

In [29]:
# --- Configuration ---

# Label Encoding
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y_raw)
N_CLASSES = len(label_encoder.classes_)

# Scaling (Crucial for Dense layers)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_raw)

N_FEATURES_PER_SAMPLE = X_scaled.shape[1]
N_SAMPLES = X_scaled.shape[0]

# Note: MLP/Dense layers expect 2D data: [samples, features]
# We use X_scaled directly, NOT the 3D X_reshaped used for LSTMs/GRUs.
X_flat = X_scaled

# Final Output Preparation (Using sparse labels with softmax for consistency)
y_final = y_encoded
loss_fn = 'sparse_categorical_crossentropy'
output_activation = 'softmax'

# Split Data
X_train, X_test, y_train, y_test = train_test_split(
    X_flat, y_final, test_size=0.2, random_state=42, stratify=y_final
)

In [30]:
nn_model = Sequential([
    # Input layer and first Hidden Dense Layer
    Dense(units=256, activation='relu', input_shape=(N_FEATURES_PER_SAMPLE,)),
    Dropout(0.3),
    
    # Second Hidden Dense Layer
    Dense(units=128, activation='relu'),
    Dropout(0.3),
    
    # Output Dense layer (units = number of classes, activation = softmax for classification)
    Dense(units=N_CLASSES, activation=output_activation)
])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [31]:
nn_model.compile(
    optimizer='adam',
    loss=loss_fn,
    metrics=['accuracy']
)

print(f"Model Input Shape (NN/MLP): {X_train.shape[1]}")
print(f"Model Output Classes: {N_CLASSES}")
print("-" * 50)
nn_model.summary()
print("-" * 50)

print("Starting MLP Model Training...")
# Train the model
nn_model.fit(
    X_train,
    y_train,
    epochs=20, # Use more epochs than RNNs, as simple layers train faster
    batch_size=4,
    validation_data=(X_test, y_test),
    verbose=0
)
print("Model Training Complete.")
print("-" * 50)

# Evaluate the model
loss, accuracy = nn_model.evaluate(X_test, y_test, verbose=0)
print(f"Test Accuracy: {accuracy*100:.2f}%")
print("-" * 50)

Model Input Shape (NN/MLP): 345
Model Output Classes: 5
--------------------------------------------------


--------------------------------------------------
Starting MLP Model Training...






Model Training Complete.
--------------------------------------------------


2025-10-27 14:38:01.711913: I external/local_xla/xla/service/gpu/autotuning/dot_search_space.cc:208] All configs were filtered out because none of them sufficiently match the hints. Maybe the hints set does not contain a good representative set of valid configs? Working around this by using the full hints set instead.

2025-10-27 14:38:03.684063: I external/local_xla/xla/service/gpu/autotuning/dot_search_space.cc:208] All configs were filtered out because none of them sufficiently match the hints. Maybe the hints set does not contain a good representative set of valid configs? Working around this by using the full hints set instead.


Test Accuracy: 96.00%
--------------------------------------------------


In [33]:

PICKLE_FILENAME = f'nn_model_{VERSION}.pkl'
KERAS_MODEL_FILENAME = f'nn_weights_{VERSION}.keras'# Recommended native Keras format

# 4a. Save the Keras model in its native format
nn_model.save(f'../../model/trained/nn/{KERAS_MODEL_FILENAME}')

# 4b. Create a dictionary of necessary artifacts (Scaler, Encoder, Model Path)
artifacts = {
    'scaler': scaler,
    'label_encoder': label_encoder,
    'model_filename': KERAS_MODEL_FILENAME,
    'input_shape': X_train.shape[1:],
    'num_classes': N_CLASSES
}

# 4c. Pickle the artifacts dictionary
with open(f'../../model/trained/nn/{PICKLE_FILENAME}', 'wb') as file:
    pickle.dump(artifacts, file)

print(f"✅ NN Keras model weights saved to: {KERAS_MODEL_FILENAME}")
print(f"✅ Preprocessing artifacts (Scaler, Encoder) saved to: {PICKLE_FILENAME}")

✅ NN Keras model weights saved to: nn_weights_v1.keras
✅ Preprocessing artifacts (Scaler, Encoder) saved to: nn_model_v1.pkl
