In [None]:
import time
import os
import cv2
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense, Dropout, GlobalAveragePooling2D, BatchNormalization
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.applications import VGG16
from tensorflow.keras import regularizers
from tensorflow.keras.applications.vgg16 import preprocess_input  # VGG16 preprocessing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import classification_report, confusion_matrix
import arabic_reshaper
from PIL import ImageFont, ImageDraw, Image
import matplotlib.pyplot as plt
import seaborn as sns
from bidi.algorithm import get_display


In [None]:
dataset_path = 'RGB ArSL dataset'
IMG_SIZE = (224, 224)

In [None]:
def load_dataset(folder, img_size=IMG_SIZE):
    images = []
    labels = []
    for label in os.listdir(folder):
        label_folder = os.path.join(folder, label)
        if os.path.isdir(label_folder):
            for img_file in os.listdir(label_folder):
                img_path = os.path.join(label_folder, img_file)
                img = cv2.imread(img_path)
                if img is not None:
                    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
                    img = cv2.resize(img, img_size)
                    images.append(img)
                    labels.append(label)
    return np.array(images), np.array(labels)

In [None]:
X, y = load_dataset(dataset_path)


In [None]:
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)
y_categorical = to_categorical(y_encoded)

In [None]:
np.save('label_classes.npy', label_encoder.classes_)


In [None]:
label_map = {i: label for i, label in enumerate(label_encoder.classes_)}
print(f"Original Label Map: {label_map}")

In [None]:
english_to_arabic = {
    'Ain': 'ع', 'Al': 'ال', 'Alef': 'ا', 'Beh': 'ب', 'Dad': 'ض', 'Dal': 'د',
    'Feh': 'ف', 'Ghain': 'غ', 'Hah': 'ح', 'Heh': 'ه', 'Jeem': 'ج', 'Kaf': 'ك',
    'Khah': 'خ', 'Laa': 'لا', 'Lam': 'ل',
    'masafa': '<space>',
    'mash': '<delete>',
    'Meem': 'م', 'Noon': 'ن', 'Qaf': 'ق',
    'Reh': 'ر', 'Sad': 'ص', 'Seen': 'س', 'Sheen': 'ش', 'Tah': 'ط', 'Teh': 'ت',
    'Teh_Marbuta': 'ة', 'Thal': 'ذ', 'Theh': 'ث', 'Waw': 'و', 'Yeh': 'ي',
    'Zah': 'ظ', 'Zain': 'ز'
}


In [None]:
label_map = {i: english_to_arabic.get(label, label) for i, label in enumerate(label_encoder.classes_)}
print(f"Arabic Label Map: {label_map}")

In [None]:
X = preprocess_input(X)

In [None]:
X_train, X_val, y_train, y_val = train_test_split(X, y_categorical, test_size=0.2, random_state=42)

In [None]:
def create_vgg_model(num_classes):
    base_model = VGG16(weights='imagenet', include_top=False, input_shape=(IMG_SIZE[0], IMG_SIZE[1], 3))
    base_model.trainable = False
    model = Sequential([
        base_model,
        GlobalAveragePooling2D(),
        Dense(512, activation='relu', kernel_regularizer=regularizers.l2(0.01)),
        BatchNormalization(),
        Dropout(0.5),
        Dense(256, activation='relu'),
        BatchNormalization(),
        Dropout(0.3),
        Dense(128, activation='relu'),
        BatchNormalization(),
        Dropout(0.3),
        Dense(num_classes, activation='softmax')
    ])
    return model

In [None]:
num_classes = len(label_encoder.classes_)
model = create_vgg_model(num_classes)
model.compile(optimizer=Adam(learning_rate=1e-4),
              loss='categorical_crossentropy',
              metrics=['accuracy'])
model.summary()

In [None]:
datagen = ImageDataGenerator(
    rotation_range=10,
    width_shift_range=0.1,
    height_shift_range=0.1,
    shear_range=0.1,
    brightness_range=[0.9, 1.1],
    zoom_range=0.1,
    horizontal_flip=True,
    fill_mode='nearest'
)

In [None]:
y_train_int = np.argmax(y_train, axis=1)
class_weights_array = compute_class_weight('balanced', classes=np.unique(y_train_int), y=y_train_int)
class_weights = dict(enumerate(class_weights_array))
print("Class weights:", class_weights)

In [None]:
callbacks = [
    tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True),
    tf.keras.callbacks.ModelCheckpoint('best_asl_vgg_model.keras', monitor='val_loss', save_best_only=True),
    tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, verbose=1)
]

In [None]:
initial_epochs = 30
history = model.fit(
    datagen.flow(X_train, y_train, batch_size=32),
    validation_data=(X_val, y_val),
    epochs=initial_epochs,
    class_weight=class_weights,
    callbacks=callbacks
)


In [None]:
base_model = model.layers[0]
print("\nVGG16 Base Model Layers:")
for idx, layer in enumerate(base_model.layers):
    print(f"Layer {idx}: {layer.name}")

In [None]:
base_model.trainable = True
for layer in base_model.layers[:-30]:
    layer.trainable = False
print("\nFine-tuning trainable layers:")
for layer in base_model.layers:
    if layer.trainable:
        print(layer.name)

In [None]:
model.compile(optimizer=Adam(learning_rate=1e-5), loss='categorical_crossentropy', metrics=['accuracy'])
fine_tune_epochs = 25
total_epochs = initial_epochs + fine_tune_epochs
history_fine = model.fit(
    datagen.flow(X_train, y_train, batch_size=32),
    validation_data=(X_val, y_val),
    epochs=total_epochs,
    initial_epoch=initial_epochs,
    class_weight=class_weights,
    callbacks=callbacks
)

In [None]:
model.save('asl_vgg_model.keras')

In [None]:
val_loss, val_acc = model.evaluate(X_val, y_val, verbose=2)
print(f'\nValidation Accuracy: {val_acc * 100:.2f}%')

# Load saved model for evaluations

In [None]:
test_dataset_path = 'RGB ArSL dataset'

In [None]:
X_test, y_test = load_dataset(test_dataset_path)

In [None]:
X_test = preprocess_input(X_test)


In [None]:
y_test_encoded = label_encoder.transform(y_test)
y_test_cat = to_categorical(y_test_encoded, num_classes=num_classes)

In [None]:
model = load_model('asl_vgg_model.keras')

In [None]:
test_loss, test_acc = model.evaluate(X_test, y_test_cat, verbose=2)
print(f'Test Accuracy: {test_acc * 100:.2f}%')

In [None]:
y_pred_probs = model.predict(X_test)
y_pred = np.argmax(y_pred_probs, axis=1)
y_true = np.argmax(y_test_cat, axis=1)
print("\nClassification Report:")
print(classification_report(y_true, y_pred, target_names=label_encoder.classes_))
cm = confusion_matrix(y_true, y_pred)
plt.figure(figsize=(10, 8))
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues",
            xticklabels=label_encoder.classes_, yticklabels=label_encoder.classes_)
plt.xlabel('Predicted label')
plt.ylabel('True label')
plt.title('Confusion Matrix')
plt.show()

# Load saved model for real time

In [None]:
model = load_model('asl_vgg_model.keras')
label_classes = np.load('label_classes.npy', allow_pickle=True)

In [None]:
label_encoder.classes_ = label_classes
label_map = {i: label for i, label in enumerate(label_encoder.classes_)}
label_map = {i: english_to_arabic.get(label, label) for i, label in label_map.items()}
print("Loaded Label Map for Inference:", label_map)

In [None]:
cap = cv2.VideoCapture(0)
captured_letters = []
last_predicted_label = None
frames_with_same_letter = 0
cooldown_threshold = 7
idle_timeout = 15
last_activity_time = time.time()

def format_arabic_text(letters):
    return arabic_reshaper.reshape(''.join(letters))

def draw_text(frame, text, position):
    font_path = "arial.ttf"
    font = ImageFont.truetype(font_path, 35)
    img_pil = Image.fromarray(frame)
    draw = ImageDraw.Draw(img_pil)
    draw.text(position, text, font=font, fill=(0, 255, 0))
    return np.array(img_pil)

while True:
    ret, frame = cap.read()
    if not ret:
        break
    height, width, _ = frame.shape
    roi_size = min(height, width) // 2
    center_x, center_y = width // 2, height // 2
    start_x = max(center_x - roi_size // 2, 0)
    end_x = start_x + roi_size
    start_y = max(center_y - roi_size // 2, 0)
    end_y = start_y + roi_size
    roi = frame[start_y:end_y, start_x:end_x]
    cv2.rectangle(frame, (start_x, start_y), (end_x, end_y), (0, 255, 0), 2)
    roi_resized = cv2.resize(roi, IMG_SIZE)
    roi_resized = preprocess_input(roi_resized)
    roi_resized = np.expand_dims(roi_resized, axis=0)
    prediction = model.predict(roi_resized, verbose=0)
    confidence = np.max(prediction)
    predicted_id = np.argmax(prediction)
    predicted_label = label_map.get(predicted_id, '')
    if confidence < 0.7:
        predicted_label = None
    if predicted_label == last_predicted_label:
        frames_with_same_letter += 1
    else:
        frames_with_same_letter = 0
    if frames_with_same_letter >= cooldown_threshold:
        if predicted_label == '<space>':
            captured_letters.append(' ')
        elif predicted_label == '<delete>' and captured_letters:
            captured_letters.pop()
        elif predicted_label and predicted_label not in ['<space>', '<delete>']:
            captured_letters.append(predicted_label)
        last_predicted_label = None
        frames_with_same_letter = 0
        last_activity_time = time.time()
    else:
        last_predicted_label = predicted_label
    if time.time() - last_activity_time > idle_timeout:
        print("Final Sentence:", format_arabic_text(captured_letters))
        break
    sentence = format_arabic_text(captured_letters)
    frame = draw_text(frame, sentence, (10, 30))
    cv2.imshow('ASL Recognition - VGG', frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        print("Final Sentence:", format_arabic_text(captured_letters))
        break
cap.release()
cv2.destroyAllWindows()