## Import Libraries

In [18]:
import numpy as np 
import cv2
import mediapipe as mp
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.preprocessing import image
import matplotlib.pyplot as plt
import os


## Create training, validation, and test data sets

In [None]:
img_size = 200
batch_size = 32

# Create training, validation, and test data sets

class ASL():
  def __init__(self):
    self.train_ds = None
    self.val_ds = None
    self.test_ds = None
    self.labels = os.listdir('asl_alphabet/train')
    self.model = None

  def load_data(self):
    self.train_dir = 'asl_alphabet/train'
    self.test_dir = 'asl_alphabet/test'

    self.train_ds = tf.keras.preprocessing.image_dataset_from_directory(
        'asl_alphabet/train',
        image_size=(img_size, img_size),
        subset='training',
        validation_split=0.2,
        batch_size=batch_size,
        seed=123
    )

    self.val_ds = tf.keras.preprocessing.image_dataset_from_directory(
        'asl_alphabet/train',
        image_size=(img_size, img_size),
        subset='validation',
        validation_split=0.2,
        batch_size=batch_size,
        seed=123
    )

    self.test_ds = tf.keras.preprocessing.image_dataset_from_directory(
        'asl_alphabet/test',
        image_size=(img_size, img_size),
        batch_size=batch_size,
        seed=123
    )

    AUTOTUNE = tf.data.AUTOTUNE
    self.train_ds = self.train_ds.cache().shuffle(1000).prefetch(buffer_size=AUTOTUNE)
    self.val_ds = self.val_ds.cache().prefetch(buffer_size=AUTOTUNE)
    self.test_ds = self.test_ds.cache().prefetch(buffer_size=AUTOTUNE)

  def build_model(self):
    self.model = models.Sequential([
      layers.Input(shape=(200, 200, 3)),
      layers.Rescaling(1./255),
      
      layers.Conv2D(32, (3, 3)),
      layers.LeakyReLU(negative_slope=0.01),
      layers.MaxPooling2D(),

      layers.Conv2D(64, (3, 3)),
      layers.LeakyReLU(negative_slope=0.01),
      layers.MaxPooling2D(),

      layers.Conv2D(128, (3, 3)),
      layers.LeakyReLU(negative_slope=0.01),
      layers.MaxPooling2D(),

      layers.Flatten(),
      layers.Dense(128),
      layers.LeakyReLU(negative_slope=0.01),
      layers.Dropout(0.7),
      layers.Dense(len(self.labels), activation='softmax')
    ])

    self.model.compile(optimizer='adam',
                       loss='sparse_categorical_crossentropy',
                       metrics=['accuracy'])
    self.model.fit(self.train_ds, validation_data=self.val_ds, epochs=3)
    self.model.evaluate(self.test_ds)
    self.model.save('asl_cnn_model.keras')



In [20]:
asl = ASL()
asl.load_data()
asl.build_model()


Found 84000 files belonging to 28 classes.
Using 67200 files for training.
Found 84000 files belonging to 28 classes.
Using 16800 files for validation.
Found 28 files belonging to 28 classes.
Epoch 1/10
[1m  62/2100[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m2:53[0m 85ms/step - accuracy: 0.0393 - loss: 3.9709

KeyboardInterrupt: 

## Real Time Recognition

In [17]:
model = tf.keras.models.load_model('asl_cnn_model.keras')
labels = asl.labels

# Initialize MediaPipe Hands
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(static_image_mode=False, max_num_hands=1, min_detection_confidence=0.7)
mp_draw = mp.solutions.drawing_utils

# Start webcam
cap = cv2.VideoCapture(0)
print("📷 Press 'q' to quit.")

while True:
    ret, frame = cap.read()
    if not ret:
        break

    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results = hands.process(frame_rgb)

    if results.multi_hand_landmarks:
        for hand_landmarks in results.multi_hand_landmarks:
            # Get bounding box of hand
            h, w, _ = frame.shape
            x_coords = [lm.x for lm in hand_landmarks.landmark]
            y_coords = [lm.y for lm in hand_landmarks.landmark]
            padding = 70
            x_min = max(int(min(x_coords) * w) - padding, 0)
            x_max = min(int(max(x_coords) * w) + padding, w)
            y_min = max(int(min(y_coords) * h) - padding, 0)
            y_max = min(int(max(y_coords) * h) + padding, h)

            # Clamp values
            x_min = max(x_min, 0)
            y_min = max(y_min, 0)
            x_max = min(x_max, w)
            y_max = min(y_max, h)

            # Extract hand ROI
            hand_img = frame[y_min:y_max, x_min:x_max]
            if hand_img.size == 0:
                continue  # skip if invalid crop

            # Preprocess for model
            hand_resized = cv2.resize(hand_img, (img_size, img_size))
            hand_input = hand_resized.astype("float32") / 255.0
            hand_input = np.expand_dims(hand_input, axis=0)

            cv2.imshow("Hand Input", hand_resized)


            # Predict
            prediction = model.predict(hand_input)
            pred_idx = np.argmax(prediction[0])
            pred_label = labels[pred_idx]
            confidence = prediction[0][pred_idx]

            print("Prediction vector:", prediction[0])
            print("Predicted vector size:", prediction.shape)


            # Draw landmarks & label
            mp_draw.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)
            cv2.putText(frame, f'{pred_label} ({confidence:.3f})', (x_min, y_min - 10),
                        cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
            cv2.rectangle(frame, (x_min, y_min), (x_max, y_max), (255, 255, 0), 2)

    cv2.imshow("ASL Recognizer", frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()

I0000 00:00:1747520371.418248 3593349 gl_context.cc:369] GL version: 2.1 (2.1 Metal - 89.4), renderer: Apple M1 Pro
W0000 00:00:1747520371.426421 3659467 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1747520371.434703 3659467 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.


📷 Press 'q' to quit.
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 83ms/step
Prediction vector: [9.17215832e-03 8.10374968e-06 9.33553679e-10 2.45381883e-12
 1.32249985e-02 2.22705445e-08 6.08098708e-06 3.64925477e-08
 5.96974918e-04 3.84655315e-03 2.23765582e-07 3.65116222e-08
 8.26567650e-01 1.70865394e-02 2.11840324e-06 2.19735540e-07
 2.93096576e-07 1.19758290e-04 1.02994204e-01 6.67487257e-05
 5.26901340e-06 3.53953010e-06 3.99967455e-08 2.58420892e-02
 3.36900703e-05 4.97810049e-07 4.21888253e-04 3.48188621e-07
 4.10152579e-09]
Predicted vector size: (1, 29)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
Prediction vector: [8.8917920e-03 7.4235340e-06 8.3557644e-10 2.0848930e-12 1.2856483e-02
 2.0726890e-08 5.6621684e-06 3.3392332e-08 5.7407503e-04 3.7907804e-03
 2.0704617e-07 3.3598432e-08 8.2854825e-01 1.6993964e-02 1.9792324e-06
 2.0570975e-07 2.7544709e-07 1.1343296e-04 1.0254392e-01 6.4282380e-05
 4.8816064e-06 3.3180190e-06 3.6976470

## Static Recognition

In [16]:
import cv2
import numpy as np
import tensorflow as tf

# Load the trained model
model = tf.keras.models.load_model('asl_cnn_model.keras')

# Load the labels
labels = os.listdir('asl_alphabet/train')  # Ensure this matches your training setup

letter = 'A'
# Load the image
image_path = f'asl_alphabet/test/{letter}/{letter}_test.jpg'
img = cv2.imread(image_path)

# Preprocess the image
img_resized = cv2.resize(img, (img_size, img_size))  # Resize to model input size
img_input = img_resized.astype('float32') / 255.0  # Normalize pixel values
img_input = np.expand_dims(img_input, axis=0)  # Add batch dimension

# Predict the class
prediction = model.predict(img_input)
pred_idx = np.argmax(prediction[0])  # Get the index of the highest probability
pred_label = labels[pred_idx]  # Map index to label
confidence = prediction[0][pred_idx]  # Get confidence score

# Print the result
print(f"Predicted Class: {pred_label}, Confidence: {confidence:.3f}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 104ms/step
Predicted Class: H, Confidence: 0.848


In [78]:
img = cv2.imread('asl_alphabet/test/A/A_test.jpg')
img_resized = cv2.resize(img, (img_size, img_size))
img_normalized = img_resized.astype('float32') / 255.0
img_input = np.expand_dims(img_normalized, axis=0)