In [1]:
# -*- coding: utf-8 -*-
"""CV_project.ipynb

Automatically generated by Colab.

Original file is located at
    https://colab.research.google.com/drive/1-U7aARa5bBUd9Mvd1rOAPp9UnAc8XKgj

## **1) Downloading the 2 datasets that we will compare between**
"""

!pip install kaggle
import os
import subprocess

# Find the kaggle executable path
kaggle_path = subprocess.check_output(['which', 'kaggle']).decode().strip()
print(f"Kaggle executable path: {kaggle_path}")

def download_kaggle_dataset(dataset_name, output_path):
    """Downloads a Kaggle dataset."""
    os.makedirs(output_path, exist_ok=True)
    # Use the updated kaggle path
    command = f"{kaggle_path} datasets download -d {dataset_name} -p {output_path} --unzip"
    subprocess.run(command, shell=True, check=True)
    print(f"Dataset downloaded to: {output_path}")

dataset_name = "grassknoted/asl-alphabet"
output_path = "./Datasets/asl-alphabet"
download_kaggle_dataset(dataset_name, output_path)

"""# 2) Starting in the objectives implementation"""



Kaggle executable path: /usr/local/bin/kaggle
Dataset downloaded to: ./Datasets/asl-alphabet


'# 2) Starting in the objectives implementation'

In [2]:
import os

# Check the contents of the directory where the dataset was downloaded
output_path = './Datasets/asl-alphabet'
print(os.listdir(output_path))


['asl_alphabet_test', 'asl_alphabet_train']


In [3]:
# List the contents of the training dataset
train_path = os.path.join(output_path, 'asl_alphabet_train')
print("Training Subdirectory Contents:")
print(os.listdir(train_path))

# List the contents of the test dataset
test_path = os.path.join(output_path, 'asl_alphabet_test')
print("Test Subdirectory Contents:")
print(os.listdir(test_path))



Training Subdirectory Contents:
['asl_alphabet_train']
Test Subdirectory Contents:
['asl_alphabet_test']


In [4]:
nested_train_path = os.path.join(train_path, 'asl_alphabet_train')
print("Contents of the Nested Training Subdirectory:")
print(os.listdir(nested_train_path))


Contents of the Nested Training Subdirectory:
['P', 'Z', 'D', 'del', 'F', 'I', 'R', 'X', 'K', 'S', 'J', 'nothing', 'T', 'Y', 'space', 'V', 'C', 'B', 'E', 'G', 'U', 'H', 'Q', 'N', 'A', 'M', 'L', 'O', 'W']


In [5]:
# Check Test Dataset Structure
print("\nChecking Test Dataset Structure...")
nested_test_path = os.path.join(test_path, 'asl_alphabet_test')
if os.path.exists(nested_test_path):
    print("Contents of the Nested Test Subdirectory:")
    print(os.listdir(nested_test_path))
    test_path = nested_test_path  # Update to the nested path
else:
    print("Test Subdirectory Contents:")
    print(os.listdir(test_path))



Checking Test Dataset Structure...
Contents of the Nested Test Subdirectory:
['C_test.jpg', 'B_test.jpg', 'J_test.jpg', 'N_test.jpg', 'R_test.jpg', 'V_test.jpg', 'Q_test.jpg', 'L_test.jpg', 'K_test.jpg', 'W_test.jpg', 'nothing_test.jpg', 'D_test.jpg', 'S_test.jpg', 'X_test.jpg', 'space_test.jpg', 'F_test.jpg', 'Y_test.jpg', 'I_test.jpg', 'E_test.jpg', 'H_test.jpg', 'M_test.jpg', 'T_test.jpg', 'G_test.jpg', 'U_test.jpg', 'P_test.jpg', 'Z_test.jpg', 'O_test.jpg', 'A_test.jpg']


In [6]:
import os

# Define the dataset paths
output_path = './Datasets/asl-alphabet'

# Check if the main dataset folder exists
if os.path.exists(output_path):
    print(f"The dataset folder exists at: {output_path}")
else:
    print("Dataset folder does not exist. Please check the download process.")


The dataset folder exists at: ./Datasets/asl-alphabet


In [7]:
# Paths to the subdirectories
train_path = os.path.join(output_path, 'asl_alphabet_train')
test_path = os.path.join(output_path, 'asl_alphabet_test')

# Check if the training folder exists and list its contents
if os.path.exists(train_path):
    print("\nTraining Directory Contents:")
    print(os.listdir(train_path))
else:
    print("Training directory not found.")

# Check if the test folder exists and list its contents
if os.path.exists(test_path):
    print("\nTest Directory Contents:")
    print(os.listdir(test_path))
else:
    print("Test directory not found.")



Training Directory Contents:
['asl_alphabet_train']

Test Directory Contents:
['asl_alphabet_test']


In [8]:
# Check the contents of the directory after unzipping
output_path = './Datasets/asl-alphabet'
print(os.listdir(output_path))


['asl_alphabet_test', 'asl_alphabet_train']


In [9]:
#extract 21 key points (landmarks) for each hand, analyze to know shape and size (not sure if needed)
!pip install mediapipe opencv-python




In [10]:
import mediapipe as mp
import numpy as np
import cv2

In [11]:

# Initialize MediaPipe Hands module
mp_hands = mp.solutions.hands
hands = mp_hands.Hands()

# Function to extract hand landmarks
def extract_hand_landmarks(image):
    # Convert image to RGB (MediaPipe requires RGB input)
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    result = hands.process(image_rgb)

    landmarks = []
    if result.multi_hand_landmarks:
        for hand_landmarks in result.multi_hand_landmarks:
            for landmark in hand_landmarks.landmark:
                landmarks.append([landmark.x, landmark.y, landmark.z])  # x, y, z coordinates
    return landmarks

# Function to calculate Euclidean distance between two points
def calculate_distance(point1, point2):
    return np.sqrt((point1[0] - point2[0])**2 + (point1[1] - point2[1])**2)

# Extract minimal features (Hand Size, Finger Lengths, Orientation)
def extract_minimal_features(image):
    landmarks = extract_hand_landmarks(image)
    if not landmarks:
        return None  # No hand detected

    # 1. Hand Size (Bounding Box)
    x_coords = [landmark[0] for landmark in landmarks]
    y_coords = [landmark[1] for landmark in landmarks]
    x_min, x_max = min(x_coords), max(x_coords)
    y_min, y_max = min(y_coords), max(y_coords)
    hand_width = x_max - x_min
    hand_height = y_max - y_min

    # 2. Finger Lengths (Wrist to Fingertip)
    wrist = landmarks[0]  # Wrist (landmark 0)
    thumb_tip = landmarks[4]  # Thumb tip (landmark 4)
    index_tip = landmarks[8]  # Index finger tip (landmark 8)
    middle_tip = landmarks[12]  # Middle finger tip (landmark 12)
    ring_tip = landmarks[16]  # Ring finger tip (landmark 16)
    pinky_tip = landmarks[20]  # Pinky finger tip (landmark 20)

    # Calculate lengths of fingers
    thumb_length = calculate_distance(wrist, thumb_tip)
    index_length = calculate_distance(wrist, index_tip)
    middle_length = calculate_distance(wrist, middle_tip)
    ring_length = calculate_distance(wrist, ring_tip)
    pinky_length = calculate_distance(wrist, pinky_tip)

    # 3. Hand Orientation (angle between wrist, index, and middle tip)
    def calculate_angle(p1, p2, p3):
        v1 = np.array([p1[0] - p2[0], p1[1] - p2[1]])
        v2 = np.array([p3[0] - p2[0], p3[1] - p2[1]])
        dot_product = np.dot(v1, v2)
        magnitude_v1 = np.linalg.norm(v1)
        magnitude_v2 = np.linalg.norm(v2)
        cos_theta = dot_product / (magnitude_v1 * magnitude_v2)
        angle = np.arccos(np.clip(cos_theta, -1.0, 1.0))
        return np.degrees(angle)

    # Calculate orientation between wrist, index, and middle fingers
    orientation = calculate_angle(wrist, index_tip, middle_tip)

    # Create and return the features dictionary
    features = {
        "hand_width": hand_width,
        "hand_height": hand_height,
        "thumb_length": thumb_length,
        "index_length": index_length,
        "middle_length": middle_length,
        "ring_length": ring_length,
        "pinky_length": pinky_length,
        "orientation": orientation
    }

    return features



In [15]:
# Import required modules
import os
import cv2
import numpy as np

# Define the training dataset path
train_path = './Datasets/asl-alphabet/asl_alphabet_train/asl_alphabet_train'

# Function to load images and labels in batches
def load_images_and_labels_in_batches(data_path, batch_size=1000, size=(200, 200)):
    """Load and preprocess images in batches."""
    images = []
    labels = []
    batch_images = []
    batch_labels = []

    print(f"Scanning directory: {data_path}")
    for folder_name in os.listdir(data_path):
        folder_path = os.path.join(data_path, folder_name)
        if os.path.isdir(folder_path):
            print(f"Processing folder: {folder_name}")
            for file_name in os.listdir(folder_path):
                file_path = os.path.join(folder_path, file_name)
                if file_name.endswith(('.jpg', '.png', '.jpeg')):  # Ensure valid image files
                    image = cv2.imread(file_path)
                    if image is not None:
                        gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)  # Convert to grayscale
                        resized_image = cv2.resize(gray_image, size)         # Resize the image
                        batch_images.append(resized_image)
                        batch_labels.append(folder_name)

                        # When batch size is reached, save the batch and reset
                        if len(batch_images) == batch_size:
                            images.append(np.array(batch_images))
                            labels.append(np.array(batch_labels))
                            batch_images = []
                            batch_labels = []

    # Add any remaining images if the last batch is incomplete
    if batch_images:
        images.append(np.array(batch_images))
        labels.append(np.array(batch_labels))

    return images, labels

# Example: Load training images in batches
batch_size = 500
train_batches, train_label_batches = load_images_and_labels_in_batches(train_path, batch_size)
print(f"Loaded {len(train_batches)} batches of training data.")


Scanning directory: ./Datasets/asl-alphabet/asl_alphabet_train/asl_alphabet_train
Processing folder: P
Processing folder: Z
Processing folder: D
Processing folder: del
Processing folder: F
Processing folder: I
Processing folder: R
Processing folder: X
Processing folder: K
Processing folder: S
Processing folder: J
Processing folder: nothing
Processing folder: T
Processing folder: Y
Processing folder: space
Processing folder: V
Processing folder: C
Processing folder: B
Processing folder: E
Processing folder: G
Processing folder: U
Processing folder: H
Processing folder: Q
Processing folder: N
Processing folder: A
Processing folder: M
Processing folder: L
Processing folder: O
Processing folder: W
Loaded 174 batches of training data.


In [18]:
def preprocess_batches(batches, size=(200, 200)):
    """Preprocess images in batches: Resize and convert to grayscale."""
    processed_batches = []
    for batch in batches:
        processed_batch = []
        for image in batch:
            # Check if the image is already grayscale
            if len(image.shape) == 3:  # 3 channels (RGB/BGR)
                gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
            else:  # Already grayscale
                gray_image = image

            # Resize the image
            resized_image = cv2.resize(gray_image, size)
            processed_batch.append(resized_image)

        processed_batches.append(np.array(processed_batch))
    return processed_batches


In [19]:
# Number of batches
print(f"Number of Batches: {len(train_batches)}")

# Check the shape of the first batch
print(f"Shape of First Batch (Images): {train_batches[0].shape}")
print(f"Number of Labels in First Batch: {len(train_label_batches[0])}")

# Check a few labels from the first batch
print(f"Labels in First Batch: {train_label_batches[0][:10]}")


Number of Batches: 174
Shape of First Batch (Images): (500, 200, 200)
Number of Labels in First Batch: 500
Labels in First Batch: ['P' 'P' 'P' 'P' 'P' 'P' 'P' 'P' 'P' 'P']


In [20]:
import os
import numpy as np

# Directory to save normalized batches
normalized_batch_dir = "./normalized_batches/"
os.makedirs(normalized_batch_dir, exist_ok=True)

# Normalize and save batches one at a time
for i, batch in enumerate(train_batches):
    # Normalize the batch
    normalized_batch = batch / 255.0

    # Save the batch to disk as a NumPy file
    batch_file = os.path.join(normalized_batch_dir, f"batch_{i}.npy")
    np.save(batch_file, normalized_batch)

    # Clear memory for the processed batch
    del normalized_batch

    print(f"Processed and saved batch {i + 1} of {len(train_batches)}.")


Processed and saved batch 1 of 174.
Processed and saved batch 2 of 174.
Processed and saved batch 3 of 174.
Processed and saved batch 4 of 174.
Processed and saved batch 5 of 174.
Processed and saved batch 6 of 174.
Processed and saved batch 7 of 174.
Processed and saved batch 8 of 174.
Processed and saved batch 9 of 174.
Processed and saved batch 10 of 174.
Processed and saved batch 11 of 174.
Processed and saved batch 12 of 174.
Processed and saved batch 13 of 174.
Processed and saved batch 14 of 174.
Processed and saved batch 15 of 174.
Processed and saved batch 16 of 174.
Processed and saved batch 17 of 174.
Processed and saved batch 18 of 174.
Processed and saved batch 19 of 174.
Processed and saved batch 20 of 174.
Processed and saved batch 21 of 174.
Processed and saved batch 22 of 174.
Processed and saved batch 23 of 174.
Processed and saved batch 24 of 174.
Processed and saved batch 25 of 174.
Processed and saved batch 26 of 174.
Processed and saved batch 27 of 174.
Processed 

In [None]:
# Extract features from all images in batches
def extract_features_from_batches(batches):
    """Extract features for all images in the provided batches."""
    all_features = []
    for batch in batches:
        batch_features = []
        for image in batch:
            features = extract_minimal_features(image)
            if features:  # Only include images where features were successfully extracted
                batch_features.append(features)
        all_features.extend(batch_features)  # Append all features from the batch
    return all_features

# Extract features for training and testing batches
train_features = extract_features_from_batches(train_batches)
test_features = extract_features_from_batches(test_batches)

print(f"Extracted features for {len(train_features)} training images and {len(test_features)} test images.")


In [None]:
import pandas as pd

# Convert features to a DataFrame
train_features_df = pd.DataFrame(train_features)
test_features_df = pd.DataFrame(test_features)

# Check the DataFrame structure
print(train_features_df.head())
print(f"Training Features Shape: {train_features_df.shape}")
print(f"Test Features Shape: {test_features_df.shape}")


In [None]:
from sklearn.preprocessing import LabelEncoder

# Flatten label batches
train_labels = np.hstack(train_label_batches)
test_labels = np.hstack(test_label_batches)

# Encode labels to numerical format
label_encoder = LabelEncoder()
y_train = label_encoder.fit_transform(train_labels)
y_test = label_encoder.transform(test_labels)

# Check encoded labels
print(f"Encoded Training Labels: {y_train[:10]}")
print(f"Classes: {label_encoder.classes_}")


In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns

# Convert DataFrames to NumPy arrays
X_train = train_features_df.to_numpy()
X_test = test_features_df.to_numpy()

# Train Random Forest Classifier
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

# Evaluate the model
y_pred_rf = rf_model.predict(X_test)
rf_accuracy = accuracy_score(y_test, y_pred_rf)

print(f"Random Forest Test Accuracy: {rf_accuracy * 100:.2f}%")
print("\nClassification Report:")
print(classification_report(y_test, y_pred_rf))

# Plot Confusion Matrix
conf_matrix = confusion_matrix(y_test, y_pred_rf)
plt.figure(figsize=(10, 8))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', xticklabels=label_encoder.classes_, yticklabels=label_encoder.classes_)
plt.title("Confusion Matrix for Random Forest")
plt.xlabel("Predicted Label")
plt.ylabel("True Label")
plt.show()
