# Notebook Purpose 


The purpose of this notebook is mainly to build the model, train it and evaluate it with a live feed through the webcam.

# Importing Relevant Libraries

In [10]:
import os

import numpy as np
import tensorflow as tf

import random

# Setting up basic utilities

In [2]:
labels = []
data = []

In [4]:
save_dir = 'hand_landmarks'
os.makedirs(save_dir, exist_ok=True)

# Little data preprocessing 

## Accessing dataset that has been collected in previous notebook

In [5]:
for label in range(3):
    class_dir = os.path.join(save_dir, 'class_{}'.format(label))
    print(class_dir)
    for file_name in os.listdir(class_dir):
        file_path = os.path.join(class_dir, file_name)
        landmark = np.load(file_path)
        labels.append(label)
        data.append(landmark)

hand_landmarks\class_0
hand_landmarks\class_1
hand_landmarks\class_2


In [8]:
print(len(data))

6000


# Testing the model with live feed by accessing the webcam

In [None]:
# Define the hand gesture labels
labels = ['Rock', 'Paper', 'Scissors']
#landmarks = []
# Initialize the MediaPipe Hand module
mp_hands_2 = mp.solutions.hands.Hands(static_image_mode=False, max_num_hands=1, min_detection_confidence=0.5)

while True:
    # Read frames from the webcam
    ret, frame = cap.read()

    # Convert the frame to RGB for input to MediaPipe
    #frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    
    if not ret:
        break
    
    image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    image.flags.writeable = False 
    results = mp_hands_2.process(image)

    image.flags.writeable = True 
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)

    # Detect hand landmarks using MediaPipe
    #results = mp_hands_2.process(frame_rgb)

    # Check if hand landmarks are detected
     
    if results.multi_hand_landmarks:
        for hand_landmarks in results.multi_hand_landmarks:
            mp_drawing.draw_landmarks(image, hand_landmarks
                                      , mp_hands.HAND_CONNECTIONS)
        # Extract the landmark coordinates
            image_height, image_width, _ = image.shape

            landmark_list = []

            for landmark in hand_landmarks.landmark: 
                landmark_x = landmark.x * image_width
                landmark_y = landmark.y * image_height

                landmark_list.extend([landmark_x, landmark_y, landmark.z])
            #landmarks.append(landmark_list)
            
            landmark_array = np.array(landmark_list)
            landmark_array = np.expand_dims(landmark_array, axis=0)
            # Perform prediction
            print(len(landmark_list))
            prediction = model.predict(landmark_array)
            predicted_class = np.argmax(prediction)
            gesture_label = labels[predicted_class]

            # Draw the predicted gesture label on the frame
            cv2.putText(frame, gesture_label, (10, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)

    # Display the frame
    cv2.imshow('Hand Gesture Detection', frame)

    # Break the loop if 'q' is pressed
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release the webcam and close all windows
cap.release()
cv2.destroyAllWindows()


# Organizing the dataset for training 

In [11]:
dataset = list(zip(data, labels))
random.shuffle(dataset)
data_, labels_ = zip(*dataset)


In [12]:
dataset[0][0]


array([ 4.27902260e+02,  2.99752808e+02, -4.39823538e-07,  4.01977921e+02,
        2.46460333e+02, -1.21167889e-02,  3.81016426e+02,  1.97910333e+02,
       -3.44027840e-02,  3.55619659e+02,  1.67270265e+02, -5.61827831e-02,
        3.50769043e+02,  1.58199692e+02, -7.25529045e-02,  4.49759483e+02,
        1.58735876e+02, -3.88408229e-02,  3.76228218e+02,  1.44802752e+02,
       -7.24378750e-02,  3.69927979e+02,  1.72313075e+02, -8.39188099e-02,
        3.87392654e+02,  1.82521219e+02, -8.94800723e-02,  4.53388138e+02,
        1.84885211e+02, -5.17292842e-02,  3.62608986e+02,  1.73327923e+02,
       -8.54131877e-02,  3.65556984e+02,  1.99183702e+02, -8.29801708e-02,
        3.85987015e+02,  2.04944401e+02, -7.86534101e-02,  4.46063805e+02,
        2.16021638e+02, -6.65851533e-02,  3.60319557e+02,  2.11438165e+02,
       -9.46287364e-02,  3.67483978e+02,  2.32011738e+02, -7.88755789e-02,
        3.88800621e+02,  2.33573785e+02, -6.60151243e-02,  4.32827950e+02,
        2.49051905e+02, -

In [13]:
data = np.array(data_)
labels = np.array(labels_)


# Partitioning into training and test data

In [14]:
train_landmarks = data[:-200]
train_labels = labels[:-200]
test_landmarks = data[-200:]
test_labels = labels[-200:]


In [1]:
# Attempt to use LSTM


# timesteps = 10  # You can try different values here

# # Reshape the input data to add the timesteps dimension
# num_samples = train_landmarks.shape[0]  # Total number of samples in the training set
# input_data_reshaped = train_landmarks.reshape(num_samples, timesteps, 63)

In [23]:
input_shape = (10, 63)

# Model Architecture building and training 

In [24]:
num_landmarks = 21  # Number of hand landmarks detected by MediaPipe
num_classes = 3  # Number of hand gesture classes (rock, paper, scissors)


# Current model saved as model2/ in the directory.
# NB: model1/ was trained with fewer dataset than model2/ but with the same architecture.



model = tf.keras.models.Sequential([
    tf.keras.layers.Dense(128, activation='relu',
                          input_shape=(num_landmarks * 3,)),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(num_classes, activation='softmax')
])


# Attempted Model for getting better performance

# model = tf.keras.models.Sequential([
#         tf.keras.layers.LSTM(128, return_sequences=True, input_shape=input_shape),
#         tf.keras.layers.Dropout(0.5),
#         tf.keras.layers.LSTM(128, return_sequences=True),
#         tf.keras.layers.Dropout(0.5),
#         tf.keras.layers.LSTM(64),
#         tf.keras.layers.Dropout(0.5),
#         tf.keras.layers.Dense(128, activation='relu'),
#         tf.keras.layers.Dropout(0.5),
#         tf.keras.layers.Dense(num_classes, activation='softmax')
#     ]) 

# Compile the model
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy', metrics=['accuracy'])

model.fit(train_landmarks, train_labels, epochs=25)

# Evaluate the model on the test set
test_loss, test_acc = model.evaluate(test_landmarks, test_labels)
print('Test accuracy:', test_acc)


Epoch 1/25


ValueError: in user code:

    File "C:\Users\ssffff\anaconda3\envs\rps\lib\site-packages\keras\engine\training.py", line 1160, in train_function  *
        return step_function(self, iterator)
    File "C:\Users\ssffff\anaconda3\envs\rps\lib\site-packages\keras\engine\training.py", line 1146, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "C:\Users\ssffff\anaconda3\envs\rps\lib\site-packages\keras\engine\training.py", line 1135, in run_step  **
        outputs = model.train_step(data)
    File "C:\Users\ssffff\anaconda3\envs\rps\lib\site-packages\keras\engine\training.py", line 993, in train_step
        y_pred = self(x, training=True)
    File "C:\Users\ssffff\anaconda3\envs\rps\lib\site-packages\keras\utils\traceback_utils.py", line 70, in error_handler
        raise e.with_traceback(filtered_tb) from None
    File "C:\Users\ssffff\anaconda3\envs\rps\lib\site-packages\keras\engine\input_spec.py", line 295, in assert_input_compatibility
        raise ValueError(

    ValueError: Input 0 of layer "sequential_5" is incompatible with the layer: expected shape=(None, 10, 63), found shape=(None, 63)


In [18]:
model.save("./model2")


INFO:tensorflow:Assets written to: ./model2\assets


In [None]:
cap.release()
cv2.destroyAllWindows()
