# 1D CNN for sEMG 17 premade gestures recognition v1.0
## 1. Import Libraries and Verify TF GPU

In [2]:
import tensorflow as tf
import numpy as np
import os
from tensorflow.keras import layers, models, callbacks
from datetime import datetime
from tensorflow.python.client import device_lib
import h5py


2025-01-27 23:30:26.011987: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-01-27 23:30:26.026517: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-01-27 23:30:26.030520: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-01-27 23:30:26.042920: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [3]:
os.environ['TF_GPU_ALLOCATOR'] = 'cuda_malloc_async'
# Ensure TensorFlow uses GPU (forcing for WSL2)
physical_devices = tf.config.experimental.list_physical_devices('GPU')
if physical_devices:
    print("Num GPUs Available: ", len(physical_devices))
    print("GPU: ", physical_devices)
    print("\nExtra Verification Info. GPU: \n\n", device_lib.list_local_devices())
    print("\n\nGPU is available. Forcing GPU usage...")
    tf.config.experimental.set_memory_growth(physical_devices[0], True)
else:
    print("No GPU found. Exiting...")
    exit(1)



Num GPUs Available:  1
GPU:  [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

Extra Verification Info. GPU: 

 [name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 15815881975965083771
xla_global_id: -1
, name: "/device:GPU:0"
device_type: "GPU"
memory_limit: 5834276864
locality {
  bus_id: 1
  links {
  }
}
incarnation: 16543966995509941902
physical_device_desc: "device: 0, name: NVIDIA GeForce RTX 3070 Ti Laptop GPU, pci bus id: 0000:01:00.0, compute capability: 8.6"
xla_global_id: 416903419
]


GPU is available. Forcing GPU usage...


I0000 00:00:1738038633.573591   22025 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
I0000 00:00:1738038633.651678   22025 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
I0000 00:00:1738038633.651765   22025 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
I0000 00:00:1738038633.659309   22025 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
I0000 00:00:1738038633.659443   22025 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
I0000 00:0

## 2. Pre-Process Pt 2

In [None]:
# Function to load HDF5-based .mat files with memory-efficient processing
# IF IN WSL 2 PLS RUN THIS CELL's CODE IN NATIVE WINDOWS, MUCH MUCH FASTER EVEN IF YOU CHANGE MEMORY LIMITS IN WSL2

def load_hdf5_mat_lazy(file_path, variable_name):
    with h5py.File(file_path, 'r') as f:
        dataset = f[variable_name]
        shape = dataset.shape  # Get shape without loading entire data
        dtype = dataset.dtype  # Get datatype
        
        # Read data in chunks to save memory and convert to numpy
        data = np.empty(shape, dtype=dtype)
        dataset.read_direct(data)  # Read directly to avoid memory duplication
        
        # MATLAB stores as column-major, so transpose to row-major (NumPy default)
        data = np.transpose(data)
        
        # Replace NaN values with 0.0 (handling in-place to save memory)
        np.nan_to_num(data, copy=False, nan=0.0)
        
    return data

# Load labels with flattening
def load_labels_lazy(file_path, variable_name):
    with h5py.File(file_path, 'r') as f:
        dataset = f[variable_name]
        data = np.empty(dataset.shape, dtype=dataset.dtype)
        dataset.read_direct(data)
    return data.flatten()

# Load training and testing datasets lazily
dataset = None
train_data = load_hdf5_mat_lazy('train_data.mat', 'train_data_reshaped')
np.save('train_data.npy', train_data)
print("Train Data Loaded. Shape:", train_data.shape)  # Expected (340646, 409, 8)
del dataset
del train_data

dataset = None
test_data = load_hdf5_mat_lazy('test_data.mat', 'test_data_reshaped')
np.save('test_data.npy', test_data)
print("Test Data Loaded. Shape:", test_data.shape)    # Expected (170323, 409, 8)
del dataset
del test_data

dataset = None
train_labels = load_labels_lazy('train_labels.mat', 'train_labels')
test_labels = load_labels_lazy('test_labels.mat', 'test_labels')

# Convert labels from 1-17 to 0-16 (for TensorFlow sparse categorical crossentropy)
train_labels -= 1
test_labels -= 1

np.save('train_labels.npy', train_labels)
np.save('test_labels.npy', test_labels)

# Clear memory by deleting unnecessary variables
del dataset
del train_labels, test_labels

print("Data processed and saved as NumPy files successfully.")


In [9]:
# Load training data
X_train = np.load('./Training_And_Validation/train_data.npy')  # Shape: (340646, 409, 8)
y_train = np.load('./Training_And_Validation/train_labels.npy')  # Shape: (340646,)

# Load testing data
X_test = np.load('./Training_And_Validation/test_data.npy')  # Shape: (170323, 409, 8)
y_test = np.load('./Training_And_Validation/test_labels.npy')  # Shape: (170323,)

print(X_train.shape, y_train.shape)

print(f"Training data shape: {X_train.shape}, Labels: {y_train.shape}")
print(f"Testing data shape: {X_test.shape}, Labels: {y_test.shape}")


(340646, 409, 8) (340646,)
Training data shape: (340646, 409, 8), Labels: (340646,)
Testing data shape: (170323, 409, 8), Labels: (170323,)


In [None]:
print("Number of NaNs in X_train:", np.isnan(X_train).sum())


import numpy as np

def find_first_nan(arr):
    # Find indices where NaNs exist
    nan_indices = np.argwhere(np.isnan(arr))
    
    if len(nan_indices) == 0:
        print("No NaNs found in the dataset.")
        return None

    # Get the first occurrence
    first_nan_index = nan_indices[0]
    window_idx, row_idx, channel_idx = first_nan_index

    print(f"First NaN found at:")
    print(f" - Window index: {window_idx}")
    print(f" - Row (time step) index: {row_idx}")
    print(f" - Channel index: {channel_idx}")
    
    # Return the first NaN value for verification
    return window_idx, row_idx, channel_idx, arr[window_idx, row_idx, channel_idx]

# Check in X_train
print("Checking X_train for NaNs...")
result = find_first_nan(X_train)

if result:
    win_idx, row_idx, ch_idx, nan_value = result
    print(f"Value at [{win_idx}, {row_idx}, {ch_idx}] = {nan_value}")

# Check in X_test
print("Checking X_test for NaNs...")
result = find_first_nan(X_test)

if result:
    win_idx, row_idx, ch_idx, nan_value = result
    print(f"Value at [{win_idx}, {row_idx}, {ch_idx}] = {nan_value}")


In [None]:
X_train[6,294,2] #after running NaN replacement

In [None]:
X_test[6,294,2]

In [None]:
y_test[40076]

In [None]:
np.unique(y_train)


In [None]:
np.unique(y_test)

## 3. Load Dataset

In [4]:
# Force dataset operations to run on the CPU (GPU VRAM not enough for dataset and we're using tensorflow, we onyl want to use GPU for model wieghts and params and stuff not for data)
with tf.device('/CPU:0'):
    train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train))
    del X_train, y_train  # Free memory after creating the dataset

    # Save the dataset
    train_dataset.save('train_dataset')
    del train_dataset

I0000 00:00:1738033895.854882     624 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
I0000 00:00:1738033895.855165     624 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
I0000 00:00:1738033895.855220     624 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
I0000 00:00:1738033895.855944     624 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2025-01-27 22:11:35.856096: I tensorflow/core/common_runtime/gpu/gpu_device.cc:2112] Could not identify NUMA node of platform GPU id 0, defaulting to 0.  Your kernel may not have been built with NUMA support.

In [None]:
with tf.device('/CPU:0'):
    test_dataset = tf.data.Dataset.from_tensor_slices((X_test, y_test))
    del X_test, y_test
    # Save the test dataset
    test_dataset.save('test_dataset')
    del test_dataset

I0000 00:00:1738032660.484818     648 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
I0000 00:00:1738032660.488544     648 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
I0000 00:00:1738032660.488625     648 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
I0000 00:00:1738032660.494688     648 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2025-01-27 21:51:00.497223: I tensorflow/core/common_runtime/gpu/gpu_device.cc:2112] Could not identify NUMA node of platform GPU id 0, defaulting to 0.  Your kernel may not have been built with NUMA support.

In [7]:
BATCH_SIZE = 17
SHUFFLE_BUFFER_SIZE = 5000

# Load the training dataset
train_dataset = tf.data.Dataset.load('train_dataset')

# Load the test dataset
test_dataset = tf.data.Dataset.load('test_dataset')

In [7]:
# seeing if data still correct structure and stuff
# Specify the index of the window and the value inside it
target_window_idx = 6
sample_idx = 294
channel_idx = 2

# Iterate through the dataset to find the specific window
for i, (window, label) in enumerate(test_dataset):
    if i == target_window_idx:
        # Convert the tensor to NumPy for easy indexing
        window_np = window.numpy()
        label_np = label.numpy()

        # Access the specific value
        specific_value = window_np[sample_idx, channel_idx]
        print(f"Value at X_test[{target_window_idx},{sample_idx},{channel_idx}]: {specific_value}")
        print(f"Label for this window: {label_np}")
        break


Value at X_test[6,294,2]: -0.03252993960541661
Label for this window: 0.0


In [8]:
train_dataset = train_dataset.shuffle(SHUFFLE_BUFFER_SIZE).batch(BATCH_SIZE)
test_dataset = test_dataset.batch(BATCH_SIZE)

## Define 1D CNN Model Architecture

In [9]:
# Define 1D CNN model
model = models.Sequential([
    layers.Input(shape=(409, 8)),  # Explicit input layer
    layers.Conv1D(filters=32, kernel_size=5, activation='relu'),
    layers.MaxPooling1D(pool_size=2),
    layers.Conv1D(filters=64, kernel_size=5, activation='relu'),
    layers.MaxPooling1D(pool_size=2),
    layers.Conv1D(filters=128, kernel_size=5, activation='relu'),
    layers.GlobalAveragePooling1D(),
    layers.Dense(128, activation='relu'),
    layers.Dropout(0.3),
    layers.Dense(17, activation='softmax')
])



In [10]:

# Compile the model
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])


In [11]:
model.summary()

In [12]:
# Set up TensorBoard logging
log_dir = "logs/fit/" + datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)

In [13]:
# Train the model
model.fit(train_dataset, epochs=20, validation_data=test_dataset,
    callbacks=[tensorboard_callback],
    verbose=1,
)

Epoch 1/20


I0000 00:00:1738038746.081389   22305 service.cc:146] XLA service 0x7f179c004a60 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1738038746.081436   22305 service.cc:154]   StreamExecutor device (0): NVIDIA GeForce RTX 3070 Ti Laptop GPU, Compute Capability 8.6
2025-01-27 23:32:26.135912: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:268] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2025-01-27 23:32:26.401032: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:531] Loaded cuDNN version 8907


[1m   18/20038[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m2:06[0m 6ms/step - accuracy: 0.7814 - loss: 2.2119     

I0000 00:00:1738038750.232305   22305 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m20038/20038[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m149s[0m 7ms/step - accuracy: 0.9053 - loss: 0.2735 - val_accuracy: 0.0890 - val_loss: 14.4017
Epoch 2/20
[1m20038/20038[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m133s[0m 7ms/step - accuracy: 0.8941 - loss: 0.3446 - val_accuracy: 0.0988 - val_loss: 16.6994
Epoch 3/20
[1m20038/20038[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m144s[0m 7ms/step - accuracy: 0.9015 - loss: 0.3304 - val_accuracy: 0.1066 - val_loss: 17.1145
Epoch 4/20
[1m20038/20038[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m146s[0m 7ms/step - accuracy: 0.8974 - loss: 0.3373 - val_accuracy: 0.0994 - val_loss: 17.1077
Epoch 5/20
[1m20038/20038[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m146s[0m 7ms/step - accuracy: 0.9050 - loss: 0.3051 - val_accuracy: 0.1008 - val_loss: 17.6268
Epoch 6/20
[1m20038/20038[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m147s[0m 7ms/step - accuracy: 0.9060 - loss: 0.2979 - val_accuracy: 0.1038 - val_loss: 17.54

2025-01-27 23:56:19.249178: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:450] ShuffleDatasetV3:7: Filling up shuffle buffer (this may take a while): 931 of 5000


[1m   14/20038[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m4:33[0m 14ms/step - accuracy: 0.0381 - loss: 19.6079       

2025-01-27 23:56:08.836085: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:480] Shuffle buffer filled.


[1m20038/20038[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m137s[0m 7ms/step - accuracy: 0.8975 - loss: 0.3529 - val_accuracy: 0.0980 - val_loss: 25.1768
Epoch 12/20
[1m20038/20038[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m144s[0m 7ms/step - accuracy: 0.8793 - loss: 0.4010 - val_accuracy: 0.1022 - val_loss: 30.3458
Epoch 13/20
[1m20038/20038[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m150s[0m 7ms/step - accuracy: 0.8815 - loss: 0.4485 - val_accuracy: 0.1048 - val_loss: 37.4317
Epoch 14/20
[1m20038/20038[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m150s[0m 7ms/step - accuracy: 0.8569 - loss: 0.5620 - val_accuracy: 0.0836 - val_loss: 15.6019
Epoch 15/20
[1m20038/20038[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m147s[0m 7ms/step - accuracy: 0.8319 - loss: 0.5318 - val_accuracy: 0.1109 - val_loss: 30.1636
Epoch 16/20
[1m20038/20038[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m145s[0m 7ms/step - accuracy: 0.8507 - loss: 0.5439 - val_accuracy: 0.0845 - val_loss: 

<keras.src.callbacks.history.History at 0x7f189a363f80>

In [16]:
model.summary()

# Later stuff

In [14]:
# Save the trained model
model.save('emg_gesture_cnn_model.h5')
print("Model saved as 'emg_gesture_cnn_model.h5'")

# Evaluate the model on the test set
test_loss, test_acc = model.evaluate(test_dataset)
print(f"Test Accuracy: {test_acc * 100:.2f}%")

# Launch TensorBoard command (for WSL2 use the correct path)
print("Run the following command to start TensorBoard:")
print(f"tensorboard --logdir=logs/fit --bind_all")



Model saved as 'emg_gesture_cnn_model.h5'
[1m10019/10019[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 3ms/step - accuracy: 0.0055 - loss: 4.7496   
Test Accuracy: 5.88%
Run the following command to start TensorBoard:
tensorboard --logdir=logs/fit --bind_all
