In [1]:
import pandas as pd

In [2]:
# List of CSV file paths
file_paths = [
    'Collected Dataset/fist.csv',
    'Collected Dataset/hand_open.csv',
    'Collected Dataset/index_middle_thumb.csv',
    'Collected Dataset/index_middle.csv',
    'Collected Dataset/index_thumb.csv',
    'Collected Dataset/index.csv',
    'Collected Dataset/pinch.csv',
    'Collected Dataset/peace.csv',
    'Collected Dataset/thumbs_pinky.csv',
    'Collected Dataset/thumbs_down.csv',
    'Collected Dataset/thumbs_up.csv'
]


In [3]:
csv_data = [pd.read_csv(file) for file in file_paths]

# Concatenate all DataFrames into a single DataFrame
data = pd.concat(csv_data, ignore_index=True)

# Shuffle the DataFrame
data = data.sample(frac=1).reset_index(drop=True)

In [4]:


string_to_numeric = {'index': 0, 'index_middle': 1, 'index_thumb': 2, 'index_middle_thumb' : 3, 'peace': 4, 'hand_open' : 5, 'fist': 6, 'pinch' : 7, 'thumbs_up' : 8, 'thumbs_down': 9 , 'thumbs_pinky' : 10}

# string_to_numeric = {'index': 0, 'index_middle': 1, 'index_thumb': 2, 'index_middle_thumb' : 3, 'fist': 4, 'hand_open' : 5}

X = data.iloc[:, :-1]
y = data.iloc[:, -1]

y.replace(string_to_numeric, inplace=True)

X.sample(1)

# remove column names
X = X.values

X.shape

  y.replace(string_to_numeric, inplace=True)


(88549, 63)

In [5]:
from sklearn.model_selection import train_test_split

X_train, X_test_val, y_train, y_test_val = train_test_split(X, y, test_size=0.3, random_state=12)

X_test, X_val, y_test, y_val = train_test_split(X_test_val, y_test_val, test_size=0.5, random_state=12)

print("Training data shape:", X_train.shape)
print("Testing data shape:", X_test.shape)
print("Validation data shape:", X_val.shape)

Training data shape: (61984, 63)
Testing data shape: (13282, 63)
Validation data shape: (13283, 63)


In [6]:
# apply pca

from sklearn.decomposition import PCA
import matplotlib.pyplot as plt

pca = PCA(n_components=0.95)

X_train_pca = pca.fit_transform(X_train)
X_test_pca = pca.transform(X_test)
X_val_pca = pca.transform(X_val)

print("Training data shape after PCA:", X_train_pca.shape)

Training data shape after PCA: (61984, 13)


In [94]:
# save pca model
import pickle

with open('../utils/pca.pkl', 'wb') as f:
    pickle.dump(pca, f)

In [7]:
# from sklearn.neighbors import KNeighborsClassifier
# from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
# from sklearn.linear_model import LogisticRegression
# from sklearn.neural_network import MLPClassifier
# from sklearn.svm import SVC
from xgboost import XGBClassifier

# Hyperparameter tuned models

my_models = {
#     ("KNN", KNeighborsClassifier(algorithm = 'auto', n_neighbors = 3)),
#     ("Decision Tree", DecisionTreeClassifier(criterion = 'entropy', max_depth = None)),
    ("Random Forest", RandomForestClassifier(criterion = 'entropy', max_depth = 20, n_estimators = 200)),
#     ("Logistic Regression", LogisticRegression(C = 10, solver = 'newton-cg')),
#     ("MLP", MLPClassifier(activation = 'tanh', hidden_layer_sizes = (64, 128), solver = 'adam')),
#     ("SVM", SVC(C=10, gamma='scale', kernel='poly')),
    ("XGBoost", XGBClassifier(learning_rate=0.1, max_depth=6, n_estimators=200))
}

In [9]:
for name, model in my_models:
    print("Training", name)
    model.fit(X_train_pca, y_train)
    print("Training Accuracy:", model.score(X_train_pca, y_train))
    print("Validation Accuracy:", model.score(X_val_pca, y_val))
    print("Testing Accuracy:", model.score(X_test_pca, y_test))
    print()

Training XGBoost
Training Accuracy: 1.0
Validation Accuracy: 0.9996988632086126
Testing Accuracy: 0.9996988405360638

Training Random Forest
Training Accuracy: 1.0
Validation Accuracy: 0.9997741474064594
Testing Accuracy: 0.9997741304020479



# Models

In [10]:
import tensorflow as tf
from tensorflow.keras import layers, models

num_states = len(y.unique())

print(y.unique())

tf_model = models.Sequential([
    # layers.Input(shape=(X.shape[1],)),  # Input layer specifying the input shape
    layers.Input(shape=(X_train_pca.shape[1],)),  # Input layer specifying the input shape
    tf.keras.layers.Dense(64, activation= 'relu'), 
    tf.keras.layers.Dense(128, activation= 'relu'), 
    tf.keras.layers.Dense(num_states, activation= 'softmax')
])
                                
# Compile the model
tf_model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

[ 7  9  1  5  4  2  8  6 10  3  0]


In [11]:
from tensorflow.keras.callbacks import EarlyStopping
early_stopping = EarlyStopping(monitor='val_loss', patience=2, restore_best_weights=True)

# Train the model with early stopping
history = tf_model.fit(
    # X_train, y_train,
    X_train_pca, y_train,
    epochs=10,
    # validation_data=(X_val, y_val),
    validation_data=(X_val_pca, y_val),
    callbacks=[early_stopping]
)

Epoch 1/10
[1m1937/1937[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 3ms/step - accuracy: 0.8805 - loss: 0.4803 - val_accuracy: 0.9940 - val_loss: 0.0251
Epoch 2/10
[1m1937/1937[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 2ms/step - accuracy: 0.9941 - loss: 0.0229 - val_accuracy: 0.9954 - val_loss: 0.0150
Epoch 3/10
[1m1937/1937[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 2ms/step - accuracy: 0.9954 - loss: 0.0156 - val_accuracy: 0.9966 - val_loss: 0.0101
Epoch 4/10
[1m1937/1937[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 2ms/step - accuracy: 0.9965 - loss: 0.0107 - val_accuracy: 0.9978 - val_loss: 0.0067
Epoch 5/10
[1m1937/1937[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 2ms/step - accuracy: 0.9972 - loss: 0.0083 - val_accuracy: 0.9979 - val_loss: 0.0062
Epoch 6/10
[1m1937/1937[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 2ms/step - accuracy: 0.9975 - loss: 0.0077 - val_accuracy: 0.9988 - val_loss: 0.0038
Epoch 7/10
[1m1

In [12]:
# draw confusion matrix
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix

import time 
start = time.time()
y_pred = tf_model(X_test_pca)
end = time.time()
print("Time taken for prediction:", end-start)
y_pred = np.argmax(y_pred, axis=1)

cm = confusion_matrix(y_test, y_pred)
print(cm)

print("\nAccuracy:", np.trace(cm) / np.sum(cm))


Time taken for prediction: 0.029997587203979492
[[1291    3    0    0    0    0    1    0    0    0    0]
 [   4 1045    0    0    0    0    0    0    0    0    0]
 [   0    0 1449    2    0    0    0    0    0    0    0]
 [   0    0    8 1362    0    2    0    0    0    0    0]
 [   0    0    0    0 1461    0    0    0    0    0    0]
 [   0    0    0    0    0 1210    0    0    0    0    0]
 [   0    0    0    0    0    0  962    0    0    0    0]
 [   0    0    0    0    0    0    0 1327    0    0    0]
 [   0    0    0    0    0    0    0    0  827    0    0]
 [   0    0    0    0    0    0    0    0    0 1047    0]
 [   0    0    0    0    0    0    0    0    0    0 1281]]

Accuracy: 0.9984942026803192


In [13]:
from sklearn.metrics import confusion_matrix
import time 

for name, model in my_models:
    print("Model:", name)
    start = time.time()
    y_pred = model.predict(X_test_pca)
    end = time.time()
    print("Time taken for prediction:", end-start)
    cm = confusion_matrix(y_test, y_pred)
    print(cm)
    print()
    print("Testing Accuracy:", model.score(X_test_pca, y_test))
    print()
    


Model: XGBoost
Time taken for prediction: 0.2619955539703369
[[1295    0    0    0    0    0    0    0    0    0    0]
 [   0 1049    0    0    0    0    0    0    0    0    0]
 [   0    0 1449    1    0    0    0    0    1    0    0]
 [   0    0    2 1370    0    0    0    0    0    0    0]
 [   0    0    0    0 1461    0    0    0    0    0    0]
 [   0    0    0    0    0 1210    0    0    0    0    0]
 [   0    0    0    0    0    0  962    0    0    0    0]
 [   0    0    0    0    0    0    0 1327    0    0    0]
 [   0    0    0    0    0    0    0    0  827    0    0]
 [   0    0    0    0    0    0    0    0    0 1047    0]
 [   0    0    0    0    0    0    0    0    0    0 1281]]

Testing Accuracy: 0.9996988405360638

Model: Random Forest
Time taken for prediction: 0.6000027656555176
[[1295    0    0    0    0    0    0    0    0    0    0]
 [   0 1049    0    0    0    0    0    0    0    0    0]
 [   0    0 1451    0    0    0    0    0    0    0    0]
 [   0    0    3 136

In [13]:
for name, model in my_models:
    # if name == "Random Forest":
    if name == "XGBoost":
        import pickle
        with open('../models/xgboost.pkl', 'wb') as f:
            pickle.dump(model, f)

In [24]:
# Save the model
tf_model.save('../model/tfv4.keras')