In [1]:

# import required packages
import cv2
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Dense, Dropout, Flatten
from keras.optimizers import Adam
from keras.preprocessing.image import ImageDataGenerator




In [4]:
import cv2
import mediapipe as mp
import numpy as np
import os

def mediapipeDetectHands(image, hands_module, min_detection_confidence=0.5, display=True):
    image_height, image_width, _ = image.shape
    output_image = image.copy()

    img_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

    results = hands_module.process(img_rgb)

    detected_hands = []

    # Set the scaling factor for augmentation
    scaling_factor = 1.5

    if results.multi_hand_landmarks:
        for hand_landmarks in results.multi_hand_landmarks:
            landmarks = []
            for landmark in hand_landmarks.landmark:
                x = int(landmark.x * image_width)
                y = int(landmark.y * image_height)
                landmarks.append((x, y))

            # Get the bounding box of the hand
            bbox = cv2.boundingRect(np.array(landmarks))

            x1, y1, w, h = bbox

            # Augment the bounding box dimensions
            x1 = int(x1 * scaling_factor)
            y1 = int(y1 * scaling_factor)
            w = int(w * scaling_factor)
            h = int(h * scaling_factor)

            # Ensure that the augmented bounding box is within the image boundaries
            x1 = max(0, x1)
            y1 = max(0, y1)
            x2 = min(image_width, x1 + w)
            y2 = min(image_height, y1 + h)

            # Draw the augmented rectangle around the hand
            cv2.rectangle(output_image, pt1=(x1, y1), pt2=(x2, y2), color=(0, 255, 0), thickness=2)

            detected_hands.append(landmarks)

            # Draw the hand landmarks on the output image
            mp.solutions.drawing_utils.draw_landmarks(
                output_image, hand_landmarks, mp.solutions.hands.HAND_CONNECTIONS,
                landmark_drawing_spec=mp.solutions.drawing_styles.get_default_hand_landmarks_style()
            )

    if display:
        cv2.imshow("Output", output_image)
    else:
        return output_image, detected_hands
# Path to the training data directory
DATA_DIR = 'data/train'

# Output directory to save the corrected images
OUTPUT_DIR = 'cropped_data2'

# Create the output directory if it doesn't exist
os.makedirs(OUTPUT_DIR, exist_ok=True)

# Initialize the MediaPipe Hands module
mp_hands = mp.solutions.hands
hands_module = mp_hands.Hands(static_image_mode=True, min_detection_confidence=0.3)

# Loop through each class in the training data directory
for class_name in os.listdir(DATA_DIR):
    class_dir = os.path.join(DATA_DIR, class_name)
    output_class_dir = os.path.join(OUTPUT_DIR, class_name)
    os.makedirs(output_class_dir, exist_ok=True)

    # Loop through each image in the class directory
    for img_name in os.listdir(class_dir):
        img_path = os.path.join(class_dir, img_name)
        img = cv2.imread(img_path)

        # Check if the image is read successfully
        if img is not None:
            output_image, detected_hands = mediapipeDetectHands(img, hands_module, display=False)

            # Save the corrected hand images to the output directory
            for i, landmarks in enumerate(detected_hands):
                # Construct the output file name without ".jpg" in the middle
                output_name = f'{img_name}_hand_{i}'
                output_path = os.path.join(output_class_dir, f'{output_name}.jpg')

                # Draw rectangle and save the hand region
                x1, y1, w, h = cv2.boundingRect(np.array(landmarks))

                # Print debug information
                print(f"Image: {img_name}, Hand {i} - Coordinates: x={x1}, y={y1}, w={w}, h={h}")

                # Check if the image region is empty or invalid
                if h > 0 and w > 0 and y1 + h <= img.shape[0] and x1 + w <= img.shape[1]:
                    cv2.imwrite(output_path, img[y1:y1 + h, x1:x1 + w])
                else:
                    print(f"Skipping saving image {output_path} due to empty or invalid region")

# Release the resources used by the hands module
hands_module.close()

# Close OpenCV windows
cv2.destroyAllWindows()


Image: img_100006.jpg, Hand 0 - Coordinates: x=440, y=254, w=61, h=46
Image: img_100041.jpg, Hand 0 - Coordinates: x=525, y=221, w=97, h=85
Image: img_100041.jpg, Hand 1 - Coordinates: x=396, y=223, w=53, h=69
Image: img_100048.jpg, Hand 0 - Coordinates: x=530, y=214, w=83, h=82
Image: img_100139.jpg, Hand 0 - Coordinates: x=460, y=279, w=65, h=65
Image: img_100139.jpg, Hand 1 - Coordinates: x=371, y=221, w=66, h=49
Image: img_100281.jpg, Hand 0 - Coordinates: x=491, y=314, w=69, h=47
Image: img_100328.jpg, Hand 0 - Coordinates: x=485, y=238, w=61, h=70
Image: img_100339.jpg, Hand 0 - Coordinates: x=493, y=180, w=71, h=75
Image: img_10034.jpg, Hand 0 - Coordinates: x=578, y=184, w=69, h=61
Skipping saving image cropped_data2\texting_left\img_10034.jpg_hand_0.jpg due to empty or invalid region
Image: img_100374.jpg, Hand 0 - Coordinates: x=475, y=252, w=67, h=74
Image: img_100412.jpg, Hand 0 - Coordinates: x=458, y=285, w=68, h=46
Image: img_100414.jpg, Hand 0 - Coordinates: x=464, y=26

In [5]:
from sklearn.model_selection import train_test_split
train_data_gen = ImageDataGenerator(
        rescale=1./255 )

# Preprocess all test images
train_generator = train_data_gen.flow_from_directory(
        'cropped_data',
        target_size=(48, 48),
        batch_size=64,
        color_mode="grayscale",
        class_mode='categorical',
    shuffle=True # Set shuffle to True initially
)

# create model structure
hand_model = Sequential()

hand_model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=(48, 48, 1)))
hand_model.add(Conv2D(64, kernel_size=(3, 3), activation='relu'))
hand_model.add(MaxPooling2D(pool_size=(2, 2)))
hand_model.add(Dropout(0.25))

hand_model.add(Conv2D(128, kernel_size=(3, 3), activation='relu'))
hand_model.add(MaxPooling2D(pool_size=(2, 2)))
hand_model.add(Conv2D(128, kernel_size=(3, 3), activation='relu'))
hand_model.add(MaxPooling2D(pool_size=(2, 2)))
hand_model.add(Dropout(0.25))

hand_model.add(Flatten())
hand_model.add(Dense(1024, activation='relu'))
hand_model.add(Dropout(0.5))
hand_model.add(Dense(6, activation='softmax'))

cv2.ocl.setUseOpenCL(False)

hand_model.compile(loss='categorical_crossentropy', optimizer=Adam(learning_rate=0.0001), metrics=['accuracy'])

Found 15443 images belonging to 6 classes.




In [6]:

# Train the neural network/model
hand_model_info = hand_model.fit(
        train_generator,
        steps_per_epoch=15443// 64,
        epochs=20,
        validation_data=train_generator ,
        validation_split=.2 ,
        shuffle=True)

Epoch 1/20


Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [7]:
# save model structure in jason file
model_json = hand_model.to_json()
with open("handfin_model.json", "w") as json_file:
    json_file.write(model_json)

# save trained model weight in .h5 file
hand_model.save_weights('handfin_model.h5')