In [1]:
import os
import cv2
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import mediapipe as mp
import joblib
from sklearn.preprocessing import LabelEncoder
import random

In [2]:
# Load Mediapipe Hands module
mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles
# Initialize hands module with static_image_mode and min_detection_confidence
hands = mp_hands.Hands(static_image_mode=True, min_detection_confidence=0.3)

DATA_DIR = "asl_alphabet_train"
data = []
labels = []

# Function to process an image and return hand landmarks
def process_image(image):
    data_aux = []
    x_ = []
    y_ = []

    results = hands.process(image)

    if results.multi_hand_landmarks:
        for hand_landmarks in results.multi_hand_landmarks:
            for i in range(len(hand_landmarks.landmark)):
                x = hand_landmarks.landmark[i].x
                y = hand_landmarks.landmark[i].y

                x_.append(x)
                y_.append(y)

        for i in range(len(hand_landmarks.landmark)):
            x = hand_landmarks.landmark[i].x
            y = hand_landmarks.landmark[i].y
            data_aux.append(x - min(x_))
            data_aux.append(y - min(y_))

    return data_aux

# Iterate over directories in the data folder
for dir_ in os.listdir(DATA_DIR):
    img_paths = os.listdir(os.path.join(DATA_DIR, dir_))
    selected_img_paths = random.sample(img_paths, min(1000, len(img_paths)))

    for img_path in selected_img_paths:
        # Process the original image
        img = cv2.imread(os.path.join(DATA_DIR, dir_, img_path))
        img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        original_data_aux = process_image(img_rgb)

        if original_data_aux:
            data.append(original_data_aux)
            labels.append(dir_)

        # Process the horizontally flipped image
        flipped_img = cv2.flip(img, 1)  # Flip horizontally
        flipped_img_rgb = cv2.cvtColor(flipped_img, cv2.COLOR_BGR2RGB)
        flipped_data_aux = process_image(flipped_img_rgb)

        if flipped_data_aux:
            data.append(flipped_data_aux)
            labels.append(dir_)

data = np.asarray(data)
labels = np.asarray(labels)

In [3]:
label_encoder = LabelEncoder()
labels = label_encoder.fit_transform(labels)

joblib.dump(label_encoder, 'label_encoder.pickle')
print("Label encoder saved.")

Label encoder saved.


In [4]:
# Split data into training and testing sets
x_train, x_test, y_train, y_test = train_test_split(data, labels, test_size=0.2, shuffle=True, stratify=labels)

# Initialize and train the RandomForestClassifier model
model = RandomForestClassifier()
model.fit(x_train, y_train)

# Make predictions on the test set and calculate accuracy
y_predict = model.predict(x_test)
score = accuracy_score(y_predict, y_test)
print('{} % of accurate.'.format(score * 100))

98.30375196585037 % of accurate.


In [5]:
import pickle
# Save the trained model to a pickle file
with open('model.pickle', 'wb') as f:
    pickle.dump({'model': model}, f)