In [4]:
import cv2, os, numpy as np
from pathlib import Path
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import LabelEncoder
import mediapipe as mp
import joblib

In [3]:
target_directory = 'FullDataset' 
output_directory = 'TrainingImages'

In [4]:
os.makedirs(output_directory, exist_ok=True)

In [None]:
def transform_to_square(image, final_size):
    # Obter as dimensões da imagem
    height, width = image.shape[:2]

    # Calculate the size of the square
    side_square = max(height, width)

    # Create a new square image with black border
    square_image = np.zeros((side_square, side_square, 3), dtype=np.uint8)

    # Insert the image in the middle of the new square image
    y_offset = (side_square - height) // 2
    x_offset = (side_square - width) // 2
    
    # Convert grayscale image to 3 channels
    color_image = cv2.cvtColor(image, cv2.COLOR_GRAY2BGR)
    square_image[y_offset:y_offset + height, x_offset:x_offset + width] = color_image

    # Resize to desired final size
    square_image = cv2.resize(square_image, (final_size, final_size), interpolation=cv2.INTER_CUBIC)

    return square_image

for letter_folder in os.listdir(target_directory):
    letter_path = os.path.join(target_directory, letter_folder)
    
    if os.path.isdir(letter_path):
        for image_file in os.listdir(letter_path):
            image_path = os.path.join(letter_path, image_file)
            
            try:
                image = cv2.imread(image_path)
                if image is not None:
                    gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
                    final_image = transform_to_square(gray_image, 512)
                    
                    border_width = 10 
                    image_with_border = cv2.copyMakeBorder(
                        final_image,
                        border_width,
                        border_width,
                        border_width,
                        border_width,
                        cv2.BORDER_CONSTANT,
                        value=[0, 0, 0]  
                    )

                    image_with_border = cv2.resize(image_with_border, (512, 512), interpolation=cv2.INTER_CUBIC)
                                        
                    output_path = os.path.join(output_directory, letter_folder)
                    os.makedirs(output_path, exist_ok=True)
                    
                    gray_image_path = os.path.join(output_path, image_file)
                    cv2.imwrite(gray_image_path, image_with_border)
       
            except Exception as e:
                print(f"Error to open image {image_path} : {e}")


In [3]:
# Initialize MediaPipe Hands
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(static_image_mode=True, max_num_hands=1)

# Function to extract hand landmarks
def extract_hand_landmarks(image_path):
    # Load the grayscale image
    image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    
    # Convert grayscale image to RGB
    image_rgb = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB)
    
    # Process the image to find hand landmarks
    results = hands.process(image_rgb)

    if results.multi_hand_landmarks:
        landmarks = results.multi_hand_landmarks[0]
        return [(lm.x, lm.y) for lm in landmarks.landmark]
    return None

# Directory containing ASL images organized by letter
image_directory = 'TrainingImages'
# List to hold data
data = []

# Loop through each subfolder (each letter of the alphabet)
for label in os.listdir(image_directory):
    label_path = os.path.join(image_directory, label)
    
    # Check if it's a directory (skip files in the main folder, if any)
    if os.path.isdir(label_path):
        # Loop through images in each subfolder
        for filename in os.listdir(label_path):
            if filename.endswith('.jpg') or filename.endswith('.png'):
                image_path = os.path.join(label_path, filename)
                landmarks = extract_hand_landmarks(image_path)
                
                if landmarks:
                    # Append the label and landmarks to the data list
                    data.append([label] + [coord for landmark in landmarks for coord in landmark])
    

# Create a DataFrame
columns = ['Label'] + [f'Landmark_{i}_{axis}' for i in range(21) for axis in ['x', 'y']]
df = pd.DataFrame(data, columns=columns)

# Save to CSV
df.to_csv('asl_landmarks.csv', index=False)


FileNotFoundError: [WinError 3] O sistema não pode encontrar o caminho especificado: 'TrainingImages'

In [None]:
le = LabelEncoder()

# Load data from generated CSV
df = pd.read_csv('asl_landmarks.csv')
df['Label'] = le.fit_transform(df['Label'])

# Separate features (Landmarks) and target (Label)
X = df.drop('Label', axis=1)  # All landmark points are features
y = df['Label']               # The Label column is the target

# Split into training and testing set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Instantiate and train the Random Forest model
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = rf_model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

prev_test = y_pred  # Model predictions
class_test = y_test  # Actual test set classes

print(f"Acurácia: {accuracy}")
print(f"Relatório de Classificação:\n{report}")


In [6]:
def get_img():
    path = Path("../Image")

    for file in path.glob("*"):
        if file.is_file():
            return str(file)
        
    return None

In [9]:
# Get the model and load it
model_file = "model-rf.pkl"
load_model = joblib.load(model_file)

label_file = "label-encoder.pkl"
encoder = joblib.load(label_file)

# Inicialize o MediaPipe Hands
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(static_image_mode=False, max_num_hands=1, min_detection_confidence=0.5, min_tracking_confidence=0.5)

# Function to extract landmarks from a new image and make prediction
def predict_letter(image_path, model):
    
    image = cv2.imread(image_path)
    
    if image is None:
        return None

    # Process image to get the landmarks
    results = hands.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))

    # Known if the hand is detected
    if results.multi_hand_landmarks:
        
        # Extract the landmarks
        landmarks = results.multi_hand_landmarks[0]

        # Format the coordinates
        landmark_data = [coord for lm in landmarks.landmark for coord in (lm.x, lm.y)]
        
        # Make prediction with the model
        prediction = model.predict([landmark_data])
        
        predicted_letter = encoder.inverse_transform(prediction)
        
        return predicted_letter[0]  # Retorna a letra prevista
    else:
        return "Error: There's no hand on the image"

image_path = get_img()
predicted_letter = predict_letter(image_path, load_model)
print(f"The predicted letter is: {predicted_letter}")


The predicted letter is: G




In [None]:
# Saving model using Joblib
file_joblib = "model-rf.pkl"
joblib.dump(rf_model, file_joblib, compress=True)

# Loading model using Joblib
loading_model_joblib = joblib.load(file_joblib)
result = loading_model_joblib.score(X_test, y_test)
print(f"Percentual de Acertos {(result*100):.2f}%\n")


Percentual de Acertos 96.52%



In [None]:
file_label_enconder = "label-encoder.pkl"
joblib.dump(le, file_label_enconder)

['label_encoder.pkl']