In [None]:
!mkdir -p sign_language_data
!wget -q https://github.com/loicmarie/sign-language-alphabet-recognizer/archive/master.zip
!unzip -q master.zip
!mv sign-language-alphabet-recognizer-master/dataset/* sign_language_data/
!rm -rf sign-language-alphabet-recognizer-master master.zip

mv: cannot move 'sign-language-alphabet-recognizer-master/dataset/A' to 'sign_language_data/A': Directory not empty
mv: cannot move 'sign-language-alphabet-recognizer-master/dataset/B' to 'sign_language_data/B': Directory not empty
mv: cannot move 'sign-language-alphabet-recognizer-master/dataset/C' to 'sign_language_data/C': Directory not empty
mv: cannot move 'sign-language-alphabet-recognizer-master/dataset/D' to 'sign_language_data/D': Directory not empty
mv: cannot move 'sign-language-alphabet-recognizer-master/dataset/del' to 'sign_language_data/del': Directory not empty
mv: cannot move 'sign-language-alphabet-recognizer-master/dataset/E' to 'sign_language_data/E': Directory not empty
mv: cannot move 'sign-language-alphabet-recognizer-master/dataset/F' to 'sign_language_data/F': Directory not empty
mv: cannot move 'sign-language-alphabet-recognizer-master/dataset/G' to 'sign_language_data/G': Directory not empty
mv: cannot move 'sign-language-alphabet-recognizer-master/dataset/H'

In [None]:
import os
import cv2
import numpy as np
import tensorflow as tf
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import mediapipe as mp
from IPython.display import display, Javascript, HTML
from google.colab.output import eval_js
from base64 import b64decode
import time
import shutil

class SignLanguageDetector:
    def __init__(self, model_path=None):
        self.mp_hands = mp.solutions.hands
        self.hands = self.mp_hands.Hands(
            static_image_mode=False,
            max_num_hands=1,
            min_detection_confidence=0.7,
            min_tracking_confidence=0.5
        )
        self.mp_draw = mp.solutions.drawing_utils

        if os.path.exists('sign_language_data'):
            self.classes = sorted(os.listdir('sign_language_data'))
        else:
            self.classes = []

        self.image_size = (224, 224)

        if model_path and os.path.exists(model_path):
            print(f"Loading existing model from {model_path}")
            self.model = load_model(model_path)
        else:
            print("Building new model")
            self.model = self.build_model()

    def create_reduced_dataset(self, source_dir, target_dir, samples_per_class=150):
        if os.path.exists(target_dir):
            shutil.rmtree(target_dir)
        os.makedirs(target_dir)

        for class_name in self.classes:
            source_class_dir = os.path.join(source_dir, class_name)
            target_class_dir = os.path.join(target_dir, class_name)
            os.makedirs(target_class_dir)

            files = os.listdir(source_class_dir)
            selected_files = np.random.choice(files, min(samples_per_class, len(files)), replace=False)

            for file_name in selected_files:
                shutil.copy2(
                    os.path.join(source_class_dir, file_name),
                    os.path.join(target_class_dir, file_name)
                )

        print(f"Created reduced dataset with {samples_per_class} samples per class")

    def build_model(self):
        base_model = MobileNetV2(
            input_shape=(224, 224, 3),
            include_top=False,
            weights='imagenet'
        )
        base_model.trainable = False

        x = base_model.output
        x = GlobalAveragePooling2D()(x)
        x = Dense(512, activation='relu')(x)
        x = Dropout(0.3)(x)
        x = Dense(256, activation='relu')(x)
        x = Dropout(0.3)(x)
        outputs = Dense(len(self.classes), activation='softmax')(x)

        model = Model(inputs=base_model.input, outputs=outputs)
        model.compile(
            optimizer='adam',
            loss='categorical_crossentropy',
            metrics=['accuracy']
        )
        return model

    def train_model(self, train_dir, validation_split=0.2, epochs=20):
        print(f"Starting model training with dataset from: {train_dir}")

        datagen = ImageDataGenerator(
            rescale=1./255,
            rotation_range=20,
            width_shift_range=0.2,
            height_shift_range=0.2,
            horizontal_flip=True,
            validation_split=validation_split,
            fill_mode='nearest'
        )

        train_generator = datagen.flow_from_directory(
            train_dir,
            target_size=self.image_size,
            batch_size=32,
            class_mode='categorical',
            subset='training',
            shuffle=True
        )

        validation_generator = datagen.flow_from_directory(
            train_dir,
            target_size=self.image_size,
            batch_size=32,
            class_mode='categorical',
            subset='validation',
            shuffle=True
        )

        history = self.model.fit(
            train_generator,
            validation_data=validation_generator,
            epochs=epochs,
            callbacks=[
                tf.keras.callbacks.EarlyStopping(
                    monitor='val_loss',
                    patience=3,
                    restore_best_weights=True
                ),
                tf.keras.callbacks.ReduceLROnPlateau(
                    monitor='val_loss',
                    factor=0.2,
                    patience=2,
                    min_lr=1e-6
                )
            ]
        )

        return history

    def save_model(self, filepath):
        self.model.save(filepath)
        print(f"Model saved to {filepath}")

    def preprocess_frame(self, frame):
        rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        results = self.hands.process(rgb_frame)

        if results.multi_hand_landmarks:
            for hand_landmarks in results.multi_hand_landmarks:
                self.mp_draw.draw_landmarks(
                    frame,
                    hand_landmarks,
                    self.mp_hands.HAND_CONNECTIONS
                )

            h, w, _ = frame.shape
            landmarks = results.multi_hand_landmarks[0].landmark
            x_coords = [int(lm.x * w) for lm in landmarks]
            y_coords = [int(lm.y * h) for lm in landmarks]

            padding = 40
            x1 = max(0, min(x_coords) - padding)
            y1 = max(0, min(y_coords) - padding)
            x2 = min(w, max(x_coords) + padding)
            y2 = min(h, max(y_coords) + padding)

            hand_region = frame[y1:y2, x1:x2]
            if hand_region.size > 0:
                hand_region = cv2.resize(hand_region, self.image_size)
                hand_region = hand_region / 255.0
                return hand_region, True

        return None, False

    def predict(self, frame):
        processed_frame, hand_detected = self.preprocess_frame(frame)

        if hand_detected and processed_frame is not None:
            input_data = np.expand_dims(processed_frame, axis=0)
            prediction = self.model.predict(input_data, verbose=0)
            predicted_class = self.classes[np.argmax(prediction)]
            confidence = np.max(prediction)
            return predicted_class, confidence

        return None, 0.0

def train_and_save_model():
    detector = SignLanguageDetector()

    detector.create_reduced_dataset(
        source_dir='sign_language_data',
        target_dir='reduced_sign_language_data1',
        samples_per_class=80
    )

    print("Training model...")
    history = detector.train_model('reduced_sign_language_data1', epochs=20)

    detector.save_model('sign_language_model1.h5')
    return history

def inference_only():
    detector = SignLanguageDetector(model_path='sign_language_model1.h5')

    print("Starting video stream... Click on video to stop")
    video_stream = get_video_stream()

    while True:
        try:
            frame_data = eval_js('stream_frame("", "")')
            if frame_data['image'] == '':
                break

            frame = js_to_image(frame_data['image'])
            predicted_sign, confidence = detector.predict(frame)

            if predicted_sign:
                label = f'Sign: {predicted_sign} ({confidence:.2f})'
                eval_js(f'stream_frame("{label}", "")')
            else:
                eval_js('stream_frame("No hand detected", "")')

        except Exception as e:
            print(f"Frame processing error: {str(e)}")
            continue

if __name__ == "__main__":
    # Check if model exists
    if not os.path.exists('sign_language_model.h5'):
        print("Training new model...")
        train_and_save_model()

    print("Starting inference...")
    inference_only()

Training new model...
Building new model
Created reduced dataset with 80 samples per class
Training model...
Starting model training with dataset from: reduced_sign_language_data1
Found 1856 images belonging to 29 classes.
Found 464 images belonging to 29 classes.
Epoch 1/20
[1m58/58[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 654ms/step - accuracy: 0.0790 - loss: 3.3913 - val_accuracy: 0.2802 - val_loss: 2.5154 - learning_rate: 0.0010
Epoch 2/20
[1m58/58[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 516ms/step - accuracy: 0.3737 - loss: 2.1427 - val_accuracy: 0.4978 - val_loss: 1.8299 - learning_rate: 0.0010
Epoch 3/20
[1m58/58[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 543ms/step - accuracy: 0.5558 - loss: 1.4369 - val_accuracy: 0.5000 - val_loss: 1.5473 - learning_rate: 0.0010
Epoch 4/20
[1m58/58[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 512ms/step - accuracy: 0.6086 - loss: 1.2193 - val_accuracy: 0.6207 - val_loss: 1.3882 - learning_



Model saved to sign_language_model1.h5
Starting inference...
Loading existing model from sign_language_model1.h5




Starting video stream... Click on video to stop


NameError: name 'get_video_stream' is not defined

In [None]:
from tensorflow.keras.models import load_model

model_path = "/content/sign_language_model1.h5"

try:
    model = load_model(model_path)
    model.summary()  # Check the architecture
except Exception as e:
    print("Error loading model:", e)




In [None]:
print("Model Input Shape:", model.input_shape)


Model Input Shape: (None, 224, 224, 3)


In [None]:
import cv2
import numpy as np

image = cv2.imread("/content/reduced_sign_language_data1/S/S1207.jpg")

image = cv2.resize(image, (224, 224))

image = image / 255.0

image = np.expand_dims(image, axis=0)

prediction = model.predict(image)
print("Prediction:", prediction)

predicted_class = np.argmax(prediction)

class_labels = ["A", "B", "C", "D", "E", "F",
                "G", "H", "I", "J", "K", "L",
                "M", "N", "O", "P", "Q", "R",
                "S", "T", "U", "V", "W", "X",
                "Y", "Z"]

# Get the predicted label
predicted_label = class_labels[predicted_class]

print(f"Predicted Class: {predicted_class} ({predicted_label})")



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step
Prediction: [[1.8165882e-03 1.0024331e-02 8.9924596e-04 2.1543726e-04 1.6586596e-01
  6.9982000e-04 1.4684184e-03 7.0171704e-04 2.5500709e-02 1.6990203e-02
  8.2535349e-05 5.3459115e-04 1.2466473e-02 3.8058630e-03 3.6430403e-03
  8.4844261e-02 1.0461976e-01 2.4063441e-03 4.1684324e-01 2.7212767e-02
  1.9051476e-03 4.1682334e-03 1.8127653e-03 3.7212979e-02 1.0643373e-02
  4.7550380e-02 6.2574865e-03 9.4118286e-03 3.9668192e-04]]
Predicted Class: 18 (S)
