In [1]:
import pandas as pd

In [2]:
# List of CSV file paths
file_paths = [
    'Collected Dataset/fist.csv',
    'Collected Dataset/hand_open.csv',
    'Collected Dataset/index_middle_thumb.csv',
    'Collected Dataset/index_middle.csv',
    'Collected Dataset/index_thumb.csv',
    'Collected Dataset/index.csv',
    'Collected Dataset/pinch.csv',
    'Collected Dataset/peace.csv',
    'Collected Dataset/thumbs_pinky.csv',
    'Collected Dataset/thumbs_down.csv',
    'Collected Dataset/thumbs_up.csv'
]


In [3]:
csv_data = [pd.read_csv(file) for file in file_paths]

# Concatenate all DataFrames into a single DataFrame
data = pd.concat(csv_data, ignore_index=True)

# Shuffle the DataFrame
data = data.sample(frac=1).reset_index(drop=True)

In [4]:


string_to_numeric = {'index': 0, 'index_middle': 1, 'index_thumb': 2, 'index_middle_thumb' : 3, 'peace': 4, 'hand_open' : 5, 'fist': 6, 'pinch' : 7, 'thumbs_up' : 8, 'thumbs_down': 9 , 'thumbs_pinky' : 10}

# string_to_numeric = {'index': 0, 'index_middle': 1, 'index_thumb': 2, 'index_middle_thumb' : 3, 'fist': 4, 'hand_open' : 5}

X = data.iloc[:, :-1]
y = data.iloc[:, -1]

y.replace(string_to_numeric, inplace=True)

X.sample(1)

# remove column names
X = X.values

X.shape

  y.replace(string_to_numeric, inplace=True)


(88549, 63)

In [5]:
from sklearn.model_selection import train_test_split

X_train, X_test_val, y_train, y_test_val = train_test_split(X, y, test_size=0.3, random_state=12)

X_test, X_val, y_test, y_val = train_test_split(X_test_val, y_test_val, test_size=0.5, random_state=12)

print("Training data shape:", X_train.shape)
print("Testing data shape:", X_test.shape)
print("Validation data shape:", X_val.shape)

Training data shape: (61984, 63)
Testing data shape: (13282, 63)
Validation data shape: (13283, 63)


In [96]:
# apply pca

from sklearn.decomposition import PCA
import matplotlib.pyplot as plt

pca = PCA(n_components=0.99)

X_train_pca = pca.fit_transform(X_train)
X_test_pca = pca.transform(X_test)
X_val_pca = pca.transform(X_val)

print("Training data shape after PCA:", X_train_pca.shape)

Training data shape after PCA: (61984, 20)


In [115]:
# save pca model
import pickle

with open('../utils/pca.pkl', 'wb') as f:
    pickle.dump(pca, f)

In [97]:
# from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from xgboost import XGBClassifier

# Hyperparameter tuned models

my_models = {
#     ("KNN", KNeighborsClassifier(algorithm = 'auto', n_neighbors = 3)),
    ("Decision Tree", DecisionTreeClassifier(criterion = 'entropy', max_depth = 30, min_samples_split = 2, splitter = 'best')),
    # ("Random Forest", RandomForestClassifier(criterion = 'entropy', max_depth = 20, n_estimators = 50)),
    # ("Logistic Regression", LogisticRegression(C = 10, solver = 'newton-cg')),
    # ("XGBoost", XGBClassifier(learning_rate=0.2, max_depth=6, n_estimators= 50))
}

In [98]:
for name, model in my_models:
    print("Training", name)
    model.fit(X_train_pca, y_train)
    print("Training Accuracy:", model.score(X_train_pca, y_train))
    print("Validation Accuracy:", model.score(X_val_pca, y_val))
    print("Testing Accuracy:", model.score(X_test_pca, y_test))
    print()

Training Decision Tree
Training Accuracy: 1.0
Validation Accuracy: 0.9980426108559813
Testing Accuracy: 0.9990212317422075



# Models

In [65]:
import tensorflow as tf
from tensorflow.keras import layers, models

num_states = len(y.unique())

print(y.unique())

tf_model = models.Sequential([
    layers.Input(shape=(X_train_pca.shape[1],)), 
    tf.keras.layers.Dense(24, activation= 'relu'), 
    # tf.keras.layers.Dense(64, activation= 'relu'), 
    # tf.keras.layers.Dense(128, activation= 'relu'), 
    tf.keras.layers.Dense(num_states, activation= 'softmax')
])
                                
# Compile the model
tf_model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

[ 8  1  4  9  7  3  0  2 10  5  6]


In [None]:
from tensorflow.keras.callbacks import EarlyStopping
early_stopping = EarlyStopping(monitor='val_loss', patience=2, restore_best_weights=True)

# Train the model with early stopping
history = tf_model.fit(
    # X_train, y_train,
    X_train_pca, y_train,
    epochs=10,
    # validation_data=(X_val, y_val),
    validation_data=(X_val_pca, y_val),
    callbacks=[early_stopping]
)

In [None]:
# draw confusion matrix
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix

import time 
start = time.time()
y_pred = tf_model(X_test_pca)
end = time.time()
print("Time taken for prediction:", end-start)
y_pred = np.argmax(y_pred, axis=1)

cm = confusion_matrix(y_test, y_pred)
print(cm)

print("\nAccuracy:", np.trace(cm) / np.sum(cm))

In [112]:
from sklearn.metrics import confusion_matrix
import time 

for name, model in my_models:
    print("Model:", name)
    start = time.time()
    y_pred = model.predict(X_test_pca)
    end = time.time()
    print("Time taken for prediction:", end-start)
    cm = confusion_matrix(y_test, y_pred)
    print(cm)
    print()
    print("Testing Accuracy:", model.score(X_test_pca, y_test))
    print()
    


Model: Decision Tree
Time taken for prediction: 0.0065805912017822266
[[1319    2    0    1    2    0    1    0    0    0    0]
 [   1 1107    0    0    0    0    1    0    0    0    0]
 [   0    0 1456    0    0    0    0    0    0    0    0]
 [   2    0    0 1365    0    0    0    0    0    0    0]
 [   0    2    0    0 1452    0    0    0    0    0    0]
 [   0    0    0    0    0 1247    0    0    0    0    0]
 [   0    0    0    0    0    0  974    0    0    0    0]
 [   0    0    0    0    0    0    0 1214    0    0    0]
 [   0    0    0    0    0    1    0    0  848    0    0]
 [   0    0    0    0    0    0    0    0    0 1042    0]
 [   0    0    0    0    0    0    0    0    0    0 1245]]

Testing Accuracy: 0.9990212317422075



In [114]:
for name, model in my_models:
    # if name == "Random Forest":
    if name == "Decision Tree":
        import pickle
        with open('../models/d_tree.pkl', 'wb') as f:
            pickle.dump(model, f)

In [24]:
# Save the model
tf_model.save('../model/tfv4.keras')