In [None]:
import pandas as pd

### Import Normalized Dataset (Unzip it first)

In [None]:
file_paths = [
    'collected dataset/fist_1.csv',
    'collected dataset/fist_2.csv',
    
    'collected dataset/hand_open_1.csv',
    
    'collected dataset/index_1.csv',
    'collected dataset/index_2.csv',
    'collected dataset/index_3.csv',
    'collected dataset/index_4.csv',
    
    
    'collected dataset/index_middle_1.csv',
    'collected dataset/index_middle_2.csv',
    'collected dataset/index_middle_3.csv',
    'collected dataset/index_middle_4.csv',
    
    'collected dataset/index_middle_thumb_1.csv',
    
    'collected dataset/index_thumb_1.csv',

    'collected dataset/peace_1.csv',
    
    'collected dataset/pinch_1.csv',
    
    'collected dataset/thumbs_down_1.csv',
    
    'collected dataset/thumbs_pinky_1.csv',
    'collected dataset/thumbs_pinky_2.csv',
    'collected dataset/thumbs_pinky_3.csv',
    
    'collected dataset/thumbs_up_1.csv',
]

#### Basic Preprocessing

In [None]:
csv_data = [pd.read_csv(file) for file in file_paths]

# Concatenate all DataFrames into a single DataFrame
data = pd.concat(csv_data, ignore_index=True)

# Shuffle the DataFrame
data = data.sample(frac=1).reset_index(drop=True)

In [None]:
# remove missing values
df = data.dropna()

df.shape

#### Replace string labels with numbers & remove column names

In [None]:
string_to_numeric = {'index': 0, 'index_middle': 1, 'index_thumb': 2, 'index_middle_thumb' : 3, 'peace': 4, 'hand_open' : 5, 'fist': 6, 'pinch' : 7, 'thumbs_up' : 8, 'thumbs_down': 9 , 'thumbs_pinky' : 10}

X = df.iloc[:, :-1]
y = df.iloc[:, -1]

y.replace(string_to_numeric, inplace=True)

X.sample(1)

# remove column names
X = X.values

X.shape

### Train, test and validation split

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test_val, y_train, y_test_val = train_test_split(X, y, test_size=0.3, random_state=12)

X_test, X_val, y_test, y_val = train_test_split(X_test_val, y_test_val, test_size=0.5, random_state=12)

print("Training data shape:", X_train.shape)
print("Testing data shape:", X_test.shape)
print("Validation data shape:", X_val.shape)

### Feature Engineering using PCA

In [None]:
from sklearn.decomposition import PCA

pca = PCA(n_components=0.99)

X_train_pca = pca.fit_transform(X_train)
X_test_pca = pca.transform(X_test)
X_val_pca = pca.transform(X_val)

print("Training data shape after PCA:", X_train_pca.shape)
print(X_test_pca.shape[0])

In [None]:
# save PCA model
import pickle

with open('../utils/pca.pkl', 'wb') as f:
    pickle.dump(pca, f)

### Training and Testing non-tensorflow models

In [None]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier

from xgboost import XGBClassifier

# Hyperparameter tuned models

my_models = {
    ("KNN", KNeighborsClassifier(algorithm = 'auto', n_neighbors = 3)),
    ("Decision Tree", DecisionTreeClassifier(criterion = 'entropy', max_depth = 30, min_samples_split = 2, splitter = 'best')),
    ("Random Forest", RandomForestClassifier(criterion = 'entropy', max_depth = 20, n_estimators = 50)),
    ("XGBoost", XGBClassifier(learning_rate=0.2, max_depth=6, n_estimators= 50))
    ("MLP", MLPClassifier(hidden_layer_sizes=(128,64), activation='relu', solver='adam', max_iter=1000, early_stopping=True, n_iter_no_change=3))
}

In [None]:
from sklearn.metrics import confusion_matrix

for name, model in my_models:
    print("Training", name)
    model.fit(X_train_pca, y_train)
    print("Training Accuracy:", model.score(X_train_pca, y_train))
    print("Validation Accuracy:", model.score(X_val_pca, y_val))
    print("Testing Accuracy:", model.score(X_test_pca, y_test))
    print()

In [None]:
import time

for name, model in my_models:
    print("Model:", name)
    start = time.time()
    y_pred = model.predict(X_test_pca)
    end = time.time()
    print("Time taken for prediction:", end-start)
    cm = confusion_matrix(y_test, y_pred)
    print(cm)
    print()
    print("Testing Accuracy:", model.score(X_test_pca, y_test))
    print()
    print()

#### Save chosen model from the above models

In [None]:
for name, model in my_models:
    if name == "MLP":
        import pickle
        with open('../models/gesture_detector_mlp.pkl', 'wb') as f:
            pickle.dump(model, f)

### Training and testing Tensorflow model

In [None]:
import tensorflow as tf
from tensorflow.keras import layers, models

num_states = len(y.unique())

print(y.unique())

tf_model = models.Sequential([
    layers.Input(shape=(X_train_pca.shape[1],)), 
    tf.keras.layers.Dense(64, activation= 'relu'), 
    tf.keras.layers.Dense(num_states, activation= 'softmax')
])
                                
# Compile the model
tf_model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

In [None]:
from tensorflow.keras.callbacks import EarlyStopping

early_stopping = EarlyStopping(monitor='val_loss', patience=2, restore_best_weights=True)

# Train the model with early stopping
history = tf_model.fit(
    X_train_pca, y_train,
    epochs=50,
    validation_data=(X_val_pca, y_val),
    callbacks=[early_stopping]
)

In [None]:
# draw confusion matrix
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix

import time 
start = time.time()
y_pred = tf_model(X_test_pca)
end = time.time()
print("Time taken for prediction:", end-start)
y_pred = np.argmax(y_pred, axis=1)

cm = confusion_matrix(y_test, y_pred)
print(cm)

print("\nAccuracy:", np.trace(cm) / np.sum(cm))

#### Save Tensorflow model

In [None]:
# Save tensorflow model
tf_model.save('../models/tf_model.keras')