# Training a Neural Network Model with Mediapipe Input Data

In [None]:
# Import libs

import pandas as pd
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt

from data import *
from keras.models import Sequential
from keras.layers import Dense
from keras.activations import linear, relu, sigmoid


# https://github.com/cvzone/cvzone
from cvzone.HandTrackingModule import HandDetector      # For mediapipe hand-tracking module

print("TensorFlow version:", tf.__version__)

In [None]:
# Load images for training
images, labels = load_data(path='./Miniset/', grayscale=False, labels=['B'], shape=(128, 128))

show_random_dataset(images, labels)


In [None]:
detector = HandDetector(detectionCon=0.8, maxHands=2)

image = images[0].copy()

hands, _ = detector.findHands(image)
print(hands)

flattened_array = []
for landmark in hands[0].values():
    flattened_array.extend(landmark)

print(flattened_array)

In [None]:

detector = HandDetector(detectionCon=0.8, maxHands=2)

dual_hands = ('A', 'B', 'D', 'E', 'F', 'G', 'H', 'J', 'K', 'M',
              'N', 'P', 'Q', 'R', 'S', 'T', 'W', 'X', 'Y', 'Z')

def extract_from_mp_hands(mp_hands):

    lmlist = mp_hands['lmList']
    center = mp_hands['center']
    bbox   = mp_hands['bbox']
    _, _, W, H = bbox

    data = tuple((x - center[0], y - center[1]) for x, y, _ in lmlist)

    min_x = min(tuple(x for x, _ in data))
    min_y = min(tuple(y for _, y in data))

    data = tuple((x - min_x, y - min_y) for x, y in data)

    # Note: Instead of dividing x by w and y by H, an alterative
    #       approach could be divide x and y by sqrt(w*w + h*h) [TO TRY]
    data = tuple((round((x / W), 3) , round((y / H), 3)) for x, y in data)

    return tuple(val for pair in data for val in pair)


def extract_multihands_data_with_mediapipe(frame, label):

    hands, _ = detector.findHands(frame)

    if len(hands) == 0 or (len(hands) == 1 and (label in dual_hands)):
        return None

    hand_type = hands[0]['type']
    
    part1 = extract_from_mp_hands(hands[0])
    part2 = extract_from_mp_hands(hands[1]) if (len(hands) > 1) else (0, ) * 42

    return part1 + part2 if hand_type == 'Left' else part2 + part1



In [None]:
# Mediapipe hand detector for detecting hands from a frame
detector = HandDetector(detectionCon=0.8, maxHands=2)


# Returns medipipe_hands data from a single frame
# Output will be a tuple of 42 values
def extract_data_with_mediapipe(frame):

    hands, _ = detector.findHands(frame)

    if len(hands) == 0:
        return None

    lmlist = hands[0]['lmList']
    center = hands[0]['center']
    bbox = hands[0]['bbox']
    _, _, W, H = bbox

    data = tuple((x - center[0], y - center[1]) for x, y, _ in lmlist)

    min_x = min(tuple(x for x, _ in data))
    min_y = min(tuple(y for _, y in data))

    data = tuple((x - min_x, y - min_y) for x, y in data)

    # Note: Instead of dividing x by w and y by H, an alterative
    #       approach could be divide x and y by sqrt(w*w + h*h) [TO TRY]
    data = tuple((round((x / W), 3) , round((y / H), 3)) for x, y in data)

    return tuple(val for pair in data for val in pair)


# Generate mediapipe_hands data over entire image set
# using extract_data_with_mediapipe function
# Returns an np array of shape(no. of images, 42)
def generate_mediapipe_data(path, labels=None, shape=(128, 128)):

    # Get a list of all the folders in the directory
    folders = [f for f in os.listdir(path) if os.path.isdir(os.path.join(path, f))]

    if labels is not None:
        folders = [f for f in folders if f in labels]

    folders.sort()

    # Print the list of folders
    print("Folders_Found = ", folders)

    for folder in folders:
        # Get a list of all the image files in the directory (supported file extensions: .jpg, .jpeg, .png, .bmp, .gif, .tiff)
        images_path = glob.glob(os.path.join(path + "/" + folder, '*.jpg'))


        # By default, the color space of the loaded image is BGR (blue, green, red)
        # rather than the typical RGB (red, green, blue) order.
        images = tuple(cv2.imread(image_path) for image_path in images_path)

        # Resizing images to reduce computational load.
        images = tuple(cv2.resize(image, shape) for image in images)


        images_mediapipe = ()
        passed = 0


        for i, image in enumerate(images):
            # mp_data = extract_data_with_mediapipe(image.copy())
            mp_data = extract_multihands_data_with_mediapipe(image, folder)

            if mp_data is None:
                print(f'Empty hands at : Folder {folder} | {images_path[i]}')
            else:
                passed += 1
                images_mediapipe += (mp_data, )

        # images_mediapipe2 = tuple(extract_data_with_mediapipe(image.copy()) for image in images)

        labels = tuple(folder * passed)
        combined_data = np.hstack((images_mediapipe, np.array(labels)[:, np.newaxis]))

        file_name = 'mp_data_isl_' + folder + '.csv'
        np.savetxt(file_name, combined_data, delimiter=',', fmt='%s')

        print(f'{passed}/{len(images)} images extracted to .csv from folder {folder}')


# generate_mediapipe_data('C:/Users/subha/Downloads/archive/ASL_Dataset/Train/',
#                         labels=['M', 'N', 'O'])

generate_mediapipe_data('./Dataset/Indian/')

In [None]:
# Load training/test data from .csv files to memory

# Set the directory path where the CSV files are located
directory_path = './mediapipe_dataset/'

# Initialize an empty list to store the loaded data
images, labels = [], []


for letter in range(ord('A'), ord('Z')+1):
    # Create the CSV file path for the current letter
    csv_file_path = os.path.join(directory_path, f'mediapipe_data_{chr(letter)}.csv')
    
    # Load the CSV file into a pandas DataFrame
    df = pd.read_csv(csv_file_path, header=None)

    # Extract the image data and labels from the DataFrame
    images.append(df.iloc[:, :-1].values)
    labels.append(df.iloc[:, -1].values)

images = np.concatenate(images, axis=0)
labels = np.concatenate(labels, axis=0)


In [None]:
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split    # For splitting data into train and test sets.


# Convert labels to integers using label encoding
label_encoder = LabelEncoder()
encoded_labels = label_encoder.fit_transform(labels)

# Split data into training and test sets (using 80-20 split)
train_data, test_data, train_labels, test_labels = train_test_split(images, encoded_labels, test_size=0.2, random_state=123)


In [None]:
# Define your model architecture

tf.random.set_seed(1234) # for consistent results

model = Sequential(
    [
        tf.keras.Input(shape=(42, )),  # To be adjusted for inputs
        Dense(39, activation='relu'),
        Dense(36, activation='linear')

    ], name = "sign_recognition_model" 
)

model.summary()

In [None]:
# Compiling the model
model.compile(
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
    metrics=['accuracy']
)

# Training the model
history = model.fit(
    train_data, train_labels,
    validation_data=(test_data, test_labels),
    epochs=20,
)

model.save('isl_model.h5')

In [None]:
# Plotting accuracy, val_accuracy, loss, val_loss

# Create a figure with two subplots
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 5))

# Plot the validation loss
ax1.plot(history.history['loss'], label='loss')
ax1.plot(history.history['val_loss'], label='val_loss')
ax1.set_title('Loss')
ax1.set_xlabel('Epoch')
ax1.set_ylabel('Loss')
ax1.legend(loc='lower right')

ax2.plot(history.history['accuracy'], label='accuracy')
ax2.plot(history.history['val_accuracy'], label='val_accuracy')
ax2.set_title('Accuracy')
ax2.set_xlabel('Epoch')
ax2.set_ylabel('Accuracy')
ax2.legend(loc='upper right')

# Adjust the spacing between subplots
plt.subplots_adjust(wspace=0.3)

# Display the figure
plt.show()

In [None]:
# Generating confusion matrix

import seaborn as sns
from sklearn.metrics import confusion_matrix

# Evaluate the model on the test data
predictions = model.predict(test_data)
predictions_categorical = np.argmax(predictions, axis=1)
confusion_mtx = confusion_matrix(test_labels, predictions_categorical)

class_names = [chr(i + 65) for i in range(26)]      # 'A' to 'Z'

# Plot the confusion matrix using seaborn heatmap
sns.heatmap(confusion_mtx, annot=True,
            cmap=plt.cm.Blues, fmt='g',
            xticklabels=class_names, yticklabels=class_names)
plt.title('Confusion Matrix')
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.show()

print(confusion_mtx)


In [None]:
from sklearn.metrics import f1_score

# Assuming you have your test data and labels stored in variables test_data and test_labels

# Make predictions on the test data
predictions = model.predict(test_data)


# Convert predictions to categorical format if needed
predictions_categorical = np.argmax(predictions, axis=1)

# Calculate the F1 score
f1 = f1_score(test_labels, predictions_categorical, average='weighted')

print("F1 Score:", f1)


In [None]:
from sklearn.metrics import classification_report

# Convert predictions to categorical format if needed
predictions_categorical = np.argmax(predictions, axis=1)

# Generate classification report
report = classification_report(test_labels, predictions_categorical)

# Print the report
print(report)
