In [1]:
import os
import cv2
import math
import mediapipe as mp
import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np
import pickle
import random
from tensorflow import keras
from tensorflow.keras.optimizers import RMSprop
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles
mp_hands = mp.solutions.hands


In [2]:
DATASET_PATH = "chords/"
TEST_DATASET_PATH = "chords_test_val/"

In [3]:
def getImgPathAndLabels(dataset_path):
   def listDirsInDir(directory):
      directories = []
      for root, dirs, _ in os.walk(directory):
         for dir_name in dirs:
            dir_path = os.path.join(root, dir_name).replace("\\", "/")
            directories.append(dir_path)
      return directories

   def listFilesInDirectory(directory):
      files = []
      for root, _, filenames in os.walk(directory):
         for filename in filenames:
            file_path = os.path.join(root, filename).replace("\\", "/")
            files.append(file_path)
      return files
   
   chordTypes = listDirsInDir(dataset_path)
   image_paths = []
   for chordType in chordTypes:
      listOfASingleTypeOfChordDirectories = listFilesInDirectory(chordType)
      image_paths.append(listOfASingleTypeOfChordDirectories)
   return image_paths,chordTypes

In [4]:
def getData(img_paths,img_labels):
   x_dataset = []
   y_dataset = []
   with mp_hands.Hands(static_image_mode=True, max_num_hands=1, min_detection_confidence=0.1) as hands:
      label_id = 0
      for specificChord in img_paths:
         for idx, file in enumerate(specificChord):
            # reading the image
            image = cv2.imread(file)
            # Convert the BGR image to RGB before processing.
            results = hands.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
            try:
               for hand_landmarks in results.multi_hand_landmarks:
                  keypoints = []
                  for data_point in hand_landmarks.landmark:
                     keypoints.append([data_point.x, data_point.y, data_point.z])
               x_dataset.append(keypoints)
               y_dataset.append(label_id)
            except:
               pass
         label_id = label_id + 1
   x_dataset = np.array(x_dataset, dtype=np.float32)
   y_dataset = np.array(y_dataset, dtype=np.float32)
   return x_dataset, y_dataset

In [5]:
def randAndSplit(x_dataset, y_dataset, split_ratio): ### pass the amount of training for split_ratio
   # Check if the lengths of x_dataset and y_dataset match
   assert len(x_dataset) == len(y_dataset), "Lengths of x_dataset and y_dataset must match"

   # Get the total number of samples
   total_samples = len(x_dataset)

   # Create indices for shuffling
   indices = np.arange(total_samples)
   np.random.shuffle(indices)

   # Shuffle x_dataset and y_dataset based on the indices
   x_dataset_shuffled = x_dataset[indices]
   y_dataset_shuffled = y_dataset[indices]

   # Calculate the split index based on the split ratio
   split_index = int(total_samples * split_ratio)

   # Split the datasets into train and test sets
   x_train = x_dataset_shuffled[:split_index]
   x_test = x_dataset_shuffled[split_index:]
   y_train = y_dataset_shuffled[:split_index]
   y_test = y_dataset_shuffled[split_index:]

   return x_train, x_test, y_train, y_test

def stratifiedSplit(x_dataset, y_dataset, test_size): 
    # Perform a stratified split based on y_dataset
    x_train, x_test, y_train, y_test = train_test_split(
        x_dataset, y_dataset, test_size=test_size, stratify=y_dataset
    )

    return x_train, x_test, y_train, y_test

def equalize_data(x_dataset, y_dataset):
    unique_labels, label_counts = np.unique(y_dataset, return_counts=True)
    min_samples = min(label_counts)
    equalized_x = []
    equalized_y = []

    for label in unique_labels:
        label_indices = [i for i, y in enumerate(y_dataset) if y == label]
        selected_indices = random.sample(label_indices, min_samples)
        
        equalized_x.extend([x_dataset[i] for i in selected_indices])
        equalized_y.extend([y_dataset[i] for i in selected_indices])

    return np.array(equalized_x), np.array(equalized_y)

In [6]:
def combine_arrays(pre_nudge_array, post_nudge_array):
    combined_array = np.concatenate((pre_nudge_array, post_nudge_array))
    return combined_array

def nudge_array(arr, min_nudge, max_nudge):
    # Generate random nudge values within the specified range
    nudge_values = np.random.uniform(low=min_nudge, high=max_nudge, size=arr.shape)
    
    # Apply the nudge values to the original array
    nudged_array = arr + nudge_values
    
    return nudged_array

In [7]:
class highAccuracyCallback(tf.keras.callbacks.Callback): # just in case
   def on_epoch_end(self, epoch, logs={}):
      if logs.get('val_accuracy') is not None and logs.get('val_accuracy') > 0.90:
         print("\nModel reached morea than 95.0% accuracy. Stopping training")
         self.model.stop_training = True

In [8]:
def create_model():
  model = tf.keras.models.Sequential([
      tf.keras.layers.Flatten(input_shape=(21,3)),
      tf.keras.layers.Dense(512, activation= 'linear'),
      tf.keras.layers.Dense(512, activation= 'linear'),
      tf.keras.layers.Dense(256, activation= 'linear'),
      tf.keras.layers.Dense(256, activation= 'tanh'),
      tf.keras.layers.Dense(128, activation= 'selu'),
      tf.keras.layers.Dense(128, activation= 'selu'),
      tf.keras.layers.Dense(64, activation= 'selu'),
      tf.keras.layers.Dense(7, activation='softmax'),
  ])
  model.compile(loss='sparse_categorical_crossentropy', optimizer=RMSprop(lr=1e-5), metrics=['accuracy'])
  return model

In [9]:
def testModel(loaded_model, x_test, y_test, loaded_labels):
    preds = []
    for predTargetIdx in range(len(x_test)):
        pred = loaded_model.predict(x_test[predTargetIdx:predTargetIdx+1])
        preds.append(np.argmax(pred))
    preds = np.array(preds)
    conf_matrix = confusion_matrix(y_test, preds, labels=range(len(loaded_labels)))
    return conf_matrix

def confusionMatrixPercentages(conf_matrix, loaded_labels):
    total_samples = np.sum(conf_matrix, axis=1)  # Sum of samples per actual class
    conf_matrix_percent = np.round((conf_matrix / total_samples[:, None]) * 100, 2)
    
    # Displaying confusion matrix in percentages
    print("Confusion Matrix in Percentages:")
    print("Actual/Predicted\t", end="")
    for label in loaded_labels:
        print(label, "\t", end="")
    print()
    
    for i, row in enumerate(conf_matrix_percent):
        print(f"{loaded_labels[i]}\t\t", end="")
        for col in row:
            print(f"{col}%\t", "       ", end="")
        print()

def calculateAccuracy(conf_matrix):
    correct_predictions = np.trace(conf_matrix)
    total_samples = np.sum(conf_matrix) 
    accuracy = correct_predictions / total_samples
    accuracy_percent = accuracy * 100
    print(f"Model Accuracy: {accuracy_percent:.2f}%")

def calculateRecall(conf_matrix):
    recall = np.diag(conf_matrix) / np.sum(conf_matrix, axis=1)
    recall_percent = np.round(recall * 100, 2)
    average_recall = np.mean(recall)
    print(f"Average Recall: {average_recall * 100:.2f}%")
    return average_recall

def calculatePrecision(conf_matrix):
    precision = np.diag(conf_matrix) / np.sum(conf_matrix, axis=0)
    precision_percent = np.round(precision * 100, 2)
    average_precision = np.mean(precision)
    print(f"Average Precision: {average_precision * 100:.2f}%")
    return average_precision

def calculatePrecisionRecallAndF1Score(conf_matrix):
    precision = calculatePrecision(conf_matrix)
    recall = calculateRecall(conf_matrix)
    f1_score = 2 * (precision * recall) / (precision + recall)
    f1_score_percent = np.round(f1_score * 100, 2)
    average_f1_score = np.mean(f1_score)
    print(f"Average F1 Score: {average_f1_score * 100:.2f}%")
    return average_f1_score

In [10]:
def printKptsInConsole(keypoints):
   # print the kpts to console
   idx=0
   while idx != len(keypoints):
      print("kpt",idx,keypoints[idx])
      idx=idx+1

def showInPyplot(keypoints):
   # showing it in Pyplot
   kptsXpos = []
   kptsYpos = []
   kptsZpos = []
   for kpt in keypoints:
      kptsXpos.append(kpt[0])
      kptsYpos.append(kpt[1])
      kptsZpos.append(kpt[2])

   kptsXpos = np.array(kptsXpos)
   kptsYpos = np.array(kptsYpos)
   kptsZpos = np.array(kptsZpos)

   ax = plt.axes(projection='3d')
   ax.grid()

   #ax.scatter(kptsXpos, kptsYpos, kptsZpos, c = 'r', s = 50)
   ax.set_title('3D Scatter Plot')
   plt.plot(kptsXpos,kptsYpos,kptsZpos, 'ro')

   def connectpoints(x,y,z,p1,p2):
      x1, x2 = x[p1], x[p2]
      y1, y2 = y[p1], y[p2]
      z1, z2 = z[p1], z[p2]
      plt.plot([x1,x2],[y1,y2],[z1,z2],'k-')

   connectpoints(kptsXpos,kptsYpos,kptsZpos,0,1)
   connectpoints(kptsXpos,kptsYpos,kptsZpos,1,2)
   connectpoints(kptsXpos,kptsYpos,kptsZpos,2,3)
   connectpoints(kptsXpos,kptsYpos,kptsZpos,3,4)
   connectpoints(kptsXpos,kptsYpos,kptsZpos,0,5)
   connectpoints(kptsXpos,kptsYpos,kptsZpos,5,6)
   connectpoints(kptsXpos,kptsYpos,kptsZpos,6,7)
   connectpoints(kptsXpos,kptsYpos,kptsZpos,7,8)
   connectpoints(kptsXpos,kptsYpos,kptsZpos,5,9)
   connectpoints(kptsXpos,kptsYpos,kptsZpos,9,10)
   connectpoints(kptsXpos,kptsYpos,kptsZpos,10,11)
   connectpoints(kptsXpos,kptsYpos,kptsZpos,11,12)
   connectpoints(kptsXpos,kptsYpos,kptsZpos,9,13)
   connectpoints(kptsXpos,kptsYpos,kptsZpos,13,14)
   connectpoints(kptsXpos,kptsYpos,kptsZpos,14,15)
   connectpoints(kptsXpos,kptsYpos,kptsZpos,15,16)
   connectpoints(kptsXpos,kptsYpos,kptsZpos,13,17)
   connectpoints(kptsXpos,kptsYpos,kptsZpos,17,18)
   connectpoints(kptsXpos,kptsYpos,kptsZpos,18,19)
   connectpoints(kptsXpos,kptsYpos,kptsZpos,19,20)
   connectpoints(kptsXpos,kptsYpos,kptsZpos,0,17)

   # Set axes label
   ax.set_xlabel('x', labelpad=20)
   ax.set_ylabel('y', labelpad=20)
   ax.set_zlabel('z', labelpad=20)
   #uncomment the code below to make each cell equal in x y and z representation
   #plt.axis('equal')
   plt.show()

def plotTrainingHistory(history):
   #-----------------------------------------------------------
   # Retrieve a list of list results on training and test data
   # sets for each training epoch
   #-----------------------------------------------------------
   acc=history.history['accuracy']
   val_acc=history.history['val_accuracy']
   loss=history.history['loss']
   val_loss=history.history['val_loss']

   epochs=range(len(acc)) # Get number of epochs

   #------------------------------------------------
   # Plot training and validation accuracy per epoch
   #------------------------------------------------
   plt.plot(epochs, acc, 'r', "Training Accuracy")
   plt.plot(epochs, val_acc, 'b', "Validation Accuracy")
   plt.title('Training and validation accuracy')
   plt.show()
   print("")

   #------------------------------------------------
   # Plot training and validation loss per epoch
   #------------------------------------------------
   plt.plot(epochs, loss, 'r', "Training Loss")
   plt.plot(epochs, val_loss, 'b', "Validation Loss")
   plt.show()

In [None]:
print("Getting images...")
paths,labels = getImgPathAndLabels(DATASET_PATH)
paths_test,labels_test = getImgPathAndLabels(TEST_DATASET_PATH)
print("Getting KPTS...")
x_dataset,y_dataset = getData(paths,labels)
x_test, y_test = getData(paths_test,labels_test)

In [None]:
print("Generating dataset...")
x_train, y_train = equalize_data(x_dataset, y_dataset)
x_test, y_test = equalize_data(x_test, y_test)

print(type(x_train))
print(type(y_train))
print(type(x_test))
print(type(y_test))
print(x_train.shape)
print(y_train.shape)
print(x_test.shape)
print(y_test.shape)

x_test, x_val, y_test, y_val = stratifiedSplit(x_test, y_test, test_size=0.5)

print(type(x_train))
print(type(y_train))
print(type(x_test))
print(type(y_test))
print(type(x_val))
print(type(y_val))

print(x_train.shape)
print(y_train.shape)
print(x_test.shape)
print(y_test.shape)
print(x_val.shape)
print(y_val.shape)

In [None]:
printKptsInConsole(x_train)

In [None]:
print("Augmenting dataset...")
x_train = combine_arrays(x_train,nudge_array(x_train, min_nudge=-0.05, max_nudge=0.05))
y_train = combine_arrays(y_train,y_train)

print(x_train.shape)
print(y_train.shape)
print(x_test.shape)
print(y_test.shape)
print(x_val.shape)
print(y_val.shape)

In [None]:
print("Training model...")
callbacks = highAccuracyCallback()
model = create_model()
history = model.fit(x=x_train, y=y_train, epochs=300, validation_data=(x_val,y_val))
plotTrainingHistory(history)

In [16]:
model.save("deltaNET.h5")
pickle.dump(labels, open("labels.dat", "wb"))

In [17]:
loaded_model = keras.models.load_model("deltaNET.h5")
loaded_labels = pickle.load(open("labels.dat", "rb"))

In [None]:
conf_matrix = testModel(loaded_model,x_test,y_test,loaded_labels)

In [None]:
confusionMatrixPercentages(conf_matrix, loaded_labels)
calculateAccuracy(conf_matrix)
calculatePrecisionRecallAndF1Score(conf_matrix)