In [3]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import os
import pandas as pd
import cv2
import matplotlib.pyplot as plt 
import numpy as np
import mediapipe as mp
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.utils import to_categorical

# Data Collection


In [2]:
train_dir = 'asl_alphabet_train/asl_alphabet_train'
test_dir  = 'asl_alphabet_test/asl_alphabet_test'

MediaPipe Hands solution in Python commonly used for detecting and tracking hands in images or videos.

- mp_hands: imports the MediaPipe Hands module, for hand detection and tracking.
- mp_drawing : to draw landmarks and connections on images, useful for visualizing the hand landmarks detected by the Hands module.
- static_image_mode=True: 
  -  True : static image. 
  -  False : video stream and would use tracking to improve detection performance across frames.
- max_num_hands=1: This limits the number of hands detected to a maximum of 1.
- min_detection_confidence=0.7: This sets the minimum confidence threshold for the detection to be considered successful. 

In [7]:
mp_hands=mp.solutions.hands
mp_drawing=mp.solutions.drawing_utils

hands = mp_hands.Hands(static_image_mode=True, max_num_hands=1, min_detection_confidence=0.7)

process_image: function takes an image file path as input, processes the image to detect hand landmarks using MediaPipe, and returns the processed image along with the detected landmarks.

 - image = cv2.imread(image_path): This line uses OpenCV to read the image from the specified file path. The image is read in BGR format by default.
 - image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB): This converts the image from BGR to RGB format because the MediaPipe Hands module expects the input image in RGB format.
 - result = hands.process(image_rgb): This line processes the RGB image using the MediaPipe Hands module to detect hand landmarks. The result contains information about the detected landmarks if any hands are detected.

If hand landmarks are detected, they are drawn on the original image, and their coordinates are collected in a list.

In [9]:
def process_image(image_path):
    image= cv2.imread(image_path)
    image_rgb= cv2.cvtColor(image,cv2.COLOR_BGR2RGB)
    result=hands.process(image_rgb)
    
    landmarks=[]
    
    if result.multi_hand_landmarks: # checks if any hand landmarks were detected in the image.
        for hand_landmarks in result.multi_hand_landmarks: #loops through each detected hand
            mp_drawing.draw_landmarks(image, hand_landmarks, mp_hands.HAND_CONNECTIONS) #to draw the detected hand landmarks and their connections on the original image.
            for lm in hand_landmarks.landmark: #loops through each landmark
                landmarks.append([lm.x, lm.y, lm.z])
    return image, landmarks

extract_landmarks : function takes an image file path as input, processes the image to detect hand landmarks, and returns a flattened array of the landmark coordinates. If no landmarks are detected, it returns an array of zeros. 

- The processed image is ignored (denoted by _), and only the landmarks list is captured.

- If landmarks were detected, converts the list of landmarks into a NumPy array and flattens it into a one-dimensional array. 

- Each landmark has 3 coordinates (x, y, z), and there are 21 landmarks, resulting in a flattened array of 63 elements (21 * 3).

- If no landmarks are detected, it returns an array of zeros with a length of 63 (corresponding to 21 landmarks with x, y, and z coordinates).

In [10]:
def extract_landmarks(image_path):
    _, landmarks = process_image(image_path)
    if landmarks:
        return np.array(landmarks).flatten()
    else:
        return np.zeros(63)  # 21 landmarks with x, y, z coordinates

create datasets from directories of images, where each subdirectory corresponds to a class of images.

processe each image to extract hand landmarks using the extract_landmarks function, 

label each set of landmarks according to its class, and saves the data to CSV files for later use. 

In [12]:
def create_dataset(directory):
    data = []
    labels = []
    classes = sorted(os.listdir(directory))
    
    for i, label in enumerate(classes):
        class_dir = os.path.join(directory, label)
        if os.path.isdir(class_dir):
            for image_file in os.listdir(class_dir):
                image_path = os.path.join(class_dir, image_file)
                landmarks = extract_landmarks(image_path)
                data.append(landmarks)
                labels.append(i)
    
    return np.array(data), np.array(labels)

train_dir = 'asl_alphabet_train/asl_alphabet_train'
test_dir = 'asl_alphabet_test/asl_alphabet_test'

train_data, train_labels = create_dataset(train_dir)
test_data, test_labels = create_dataset(test_dir)

# Save to CSV for later use
train_df = pd.DataFrame(train_data)
train_df['label'] = train_labels
train_df.to_csv('train_landmarks.csv', index=False)

test_df = pd.DataFrame(test_data)
test_df['label'] = test_labels
test_df.to_csv('test_landmarks.csv', index=False)



In [None]:
# Load dataset
train_df = pd.read_csv('train_landmarks.csv')
test_df = pd.read_csv('test_landmarks.csv')

In [None]:
# Prepare the data

#Extracts all columns except the last one (which is the label column) from the training DataFrame and converts them to a NumPy array.
train_data = train_df.iloc[:, :-1].values 
#Extracts the label column from the training DataFrame and converts it to a NumPy array.
train_labels = train_df['label'].values 
test_data = test_df.iloc[:, :-1].values
test_labels = test_df['label'].values

In [None]:
# Convert labels to categorical (one-hot encoding)
num_classes = len(np.unique(train_labels))
train_labels = to_categorical(train_labels, num_classes=num_classes)
test_labels = to_categorical(test_labels, num_classes=num_classes)

In [None]:
# Define a simple fully connected neural network
model = Sequential([
    Dense(128, activation='relu', input_shape=(train_data.shape[1],)),
    Dropout(0.5),
    Dense(64, activation='relu'),
    Dropout(0.5),
    Dense(num_classes, activation='softmax')
])

In [14]:
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()

# Train the model
history = model.fit(train_data, train_labels, epochs=50, batch_size=32, validation_split=0.2)

# Save the model
model.save('asl_landmarks_model.h5')

# Evaluate the model
test_loss, test_acc = model.evaluate(test_data, test_labels)
print(f'Test accuracy: {test_acc}')


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/50
[1m2175/2175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 1ms/step - accuracy: 0.2114 - loss: 2.6117 - val_accuracy: 0.0021 - val_loss: 13.1113
Epoch 2/50
[1m2175/2175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - accuracy: 0.5441 - loss: 1.4320 - val_accuracy: 0.0653 - val_loss: 19.2627
Epoch 3/50
[1m2175/2175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - accuracy: 0.6067 - loss: 1.2620 - val_accuracy: 0.0651 - val_loss: 24.4291
Epoch 4/50
[1m2175/2175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - accuracy: 0.6356 - loss: 1.1913 - val_accuracy: 0.0852 - val_loss: 30.9028
Epoch 5/50
[1m2175/2175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - accuracy: 0.6545 - loss: 1.1356 - val_accuracy: 0.0862 - val_loss: 35.0922
Epoch 6/50
[1m2175/2175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - accuracy: 0.6612 - loss: 1.1089 - val_accuracy: 0.0786 - val_loss: 40.2940
Epoch 7/50



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step - accuracy: 0.3929 - loss: 12.3935
Test accuracy: 0.3928571343421936


In [15]:
model.save('final_model.keras')