<a href="https://colab.research.google.com/github/Raneemmohamed/1/blob/main/cnn-lstm%20sign%20language%20recognation%20project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import files
files.upload()

Saving kaggle.json to kaggle.json


{'kaggle.json': b'{"username":"ranemelbasuony","key":"cdd0d306a045fb7262f32df2625425b6"}'}

In [2]:
!pip install kaggle




In [3]:
import os
import shutil

os.makedirs("/root/.kaggle/", exist_ok=True)

shutil.move("kaggle.json", "/root/.kaggle/")

'/root/.kaggle/kaggle.json'

In [4]:
!chmod 600 ~/.kaggle/kaggle.json


In [5]:
!kaggle datasets download -d mohamedsaeed823/arslvideodataset

Dataset URL: https://www.kaggle.com/datasets/mohamedsaeed823/arslvideodataset
License(s): CC0-1.0


In [6]:
import zipfile

dataset_zip = "arslvideodataset.zip"
with zipfile.ZipFile(dataset_zip, 'r') as zip_ref:
    zip_ref.extractall("/content/dataset")

print("extract done: /content/dataset")

extract done: /content/dataset


In [7]:
import cv2
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import Conv3D, MaxPooling3D, Flatten, Dense, Dropout, LayerNormalization, LSTM, TimeDistributed, BatchNormalization, Bidirectional, MultiHeadAttention
from tensorflow.keras.optimizers import AdamW
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import ReduceLROnPlateau

In [8]:
def extract_frames(video_path, num_frames=30, target_size=(64, 64)):
    cap = cv2.VideoCapture(video_path)
    frames = []
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    frame_idxs = np.linspace(0, total_frames - 1, num_frames, dtype=int)

    for idx in frame_idxs:
        cap.set(cv2.CAP_PROP_POS_FRAMES, idx)
        ret, frame = cap.read()
        if not ret:
            break
        frame = cv2.resize(frame, target_size)
        frames.append(frame)
    cap.release()

    while len(frames) < num_frames:
        frames.append(np.zeros((target_size[0], target_size[1], 3), dtype=np.uint8))

    return np.array(frames)


In [9]:
from sklearn.preprocessing import LabelEncoder

def load_videos_from_directory(base_path):
    X, y = [], []
    class_labels = sorted(os.listdir(base_path))

    for label in class_labels:
        label_path = os.path.join(base_path, label)
        if not os.path.isdir(label_path):
            continue

        for video_name in os.listdir(label_path):
            video_path = os.path.join(label_path, video_name)
            if video_path.endswith(('.mp4', '.avi', '.mov')):
                frames = extract_frames(video_path)
                X.append(frames)
                y.append(label)

    return np.array(X), np.array(y), class_labels

train_path = "/content/dataset/train"
val_path = "/content/dataset/val"

X_train, y_train, class_labels = load_videos_from_directory(train_path)
X_val, y_val, _ = load_videos_from_directory(val_path)

label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(y_train)
y_val_encoded = label_encoder.transform(y_val)

num_classes = len(class_labels)
y_train_onehot = tf.keras.utils.to_categorical(y_train_encoded, num_classes)
y_val_onehot = tf.keras.utils.to_categorical(y_val_encoded, num_classes)

In [73]:
import albumentations as A
import numpy as np

transform = A.Compose([
    A.HorizontalFlip(p=0.5),
    A.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)),
])

def augment_frames(frames):
    return np.stack([transform(image=frame)["image"] for frame in frames], axis=0).astype(np.float32)

X_train_augmented = np.stack([augment_frames(video) for video in X_train], axis=0).astype(np.float32)
X_val_augmented = np.stack([augment_frames(video) for video in X_val], axis=0).astype(np.float32)

print("Fixed shape - X_train:", X_train_augmented.shape)
print("Fixed shape - X_val:", X_val_augmented.shape)


Fixed shape - X_train: (479, 30, 64, 64, 3)
Fixed shape - X_val: (72, 30, 64, 64, 3)


In [69]:
X_val = X_val[..., :3]  # Take only the first 3 channels
print("New shape of X_val:", X_val.shape)

New shape of X_val: (72, 30, 64, 64, 3)


In [90]:
import tensorflow as tf
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.layers import TimeDistributed, GlobalAveragePooling2D, Bidirectional, GRU, Dense, GlobalAveragePooling1D, Dropout, BatchNormalization
from tensorflow.keras.optimizers import AdamW
from tensorflow.keras.layers import MultiHeadAttention

num_classes = 10

base_cnn = ResNet50(weights="imagenet", include_top=False, input_shape=(64, 64, 3))
for layer in base_cnn.layers[-20:]:
    layer.trainable = True

inputs = tf.keras.Input(shape=(30, 64, 64, 3))
cnn_features = TimeDistributed(base_cnn)(inputs)
cnn_features = TimeDistributed(GlobalAveragePooling2D())(cnn_features)
cnn_features = BatchNormalization()(cnn_features)

lstm = Bidirectional(GRU(256, return_sequences=True, dropout=0.4, recurrent_dropout=0.3, kernel_regularizer=tf.keras.regularizers.l2(1e-4)))(cnn_features)
num_classes=12
attention_output = MultiHeadAttention(num_heads=4, key_dim=128)(lstm, lstm)
attention_output = Dropout(0.3)(attention_output)
lstm_output = GlobalAveragePooling1D()(attention_output)
lstm_output = BatchNormalization()(lstm_output)
out = Dense(num_classes, activation="softmax", kernel_regularizer=tf.keras.regularizers.l2(1e-4))(lstm_output)

model = tf.keras.Model(inputs, out)
model.compile(optimizer=AdamW(learning_rate=5e-4, weight_decay=1e-4), loss="categorical_crossentropy", metrics=["accuracy"])

model.summary()


In [91]:
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping, ModelCheckpoint

lr_scheduler = ReduceLROnPlateau(
    monitor="val_loss", factor=0.5, patience=5, min_lr=1e-6, verbose=1
)

early_stopping = EarlyStopping(
    monitor="val_loss", patience=10, restore_best_weights=True, verbose=1
)

checkpoint = ModelCheckpoint(
    "best_model.h5", monitor="val_loss", save_best_only=True, verbose=1
)

callbacks = [lr_scheduler, early_stopping, checkpoint]




In [92]:
model.fit(
    X_train_augmented, y_train,
    validation_data=(X_val_augmented, y_val),
    epochs=50,
    batch_size=32,
    callbacks=callbacks
)


Epoch 1/50
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - accuracy: 0.2169 - loss: 3.6080
Epoch 1: val_loss improved from inf to 2.71802, saving model to best_model.h5




[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m616s[0m 7s/step - accuracy: 0.2222 - loss: 3.5928 - val_accuracy: 0.0833 - val_loss: 2.7180 - learning_rate: 5.0000e-04
Epoch 2/50
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - accuracy: 0.6041 - loss: 1.9440
Epoch 2: val_loss did not improve from 2.71802
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 1s/step - accuracy: 0.6062 - loss: 1.9229 - val_accuracy: 0.0833 - val_loss: 2.7269 - learning_rate: 5.0000e-04
Epoch 3/50
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - accuracy: 0.7737 - loss: 0.8901
Epoch 3: val_loss did not improve from 2.71802
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 1s/step - accuracy: 0.7759 - loss: 0.8866 - val_accuracy: 0.0833 - val_loss: 2.7441 - learning_rate: 5.0000e-04
Epoch 4/50
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m

<keras.src.callbacks.history.History at 0x7c3078d7c650>