In [None]:
import tensorflow as tf
from tensorflow.keras.applications import DenseNet169, DenseNet201
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Dropout, GlobalAveragePooling2D
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import numpy as np
import cv2
import os

# Path to the main dataset directory
DATASET_DIR = "/Users/aryabhattacharyya/Downloads/asl_dataset"  # Replace with the actual dataset folder path

# Image dimensions and batch size
IMG_HEIGHT, IMG_WIDTH = 224, 224  # Standard DenseNet input size
BATCH_SIZE = 32

# Path to save/load models
MODEL_SAVE_PATH = "sign_language_densenet169_model.h5"
FINETUNED_MODEL_PATH = "sign_language_densenet169_finetuned_model.h5"

# Data augmentation and rescaling for training and validation
data_gen = ImageDataGenerator(
    rescale=1.0 / 255,
    validation_split=0.2,  # 20% of the data will be used for validation
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

# Create training and validation generators
train_generator = data_gen.flow_from_directory(
    DATASET_DIR,
    target_size=(IMG_HEIGHT, IMG_WIDTH),
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    subset='training'  # Use 80% of the data for training
)

validation_generator = data_gen.flow_from_directory(
    DATASET_DIR,
    target_size=(IMG_HEIGHT, IMG_WIDTH),
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    subset='validation'  # Use 20% of the data for validation
)

# Ensure that the generators are correctly initialized and have data
if train_generator.samples == 0 or validation_generator.samples == 0:
    raise ValueError("No data found. Please ensure that the dataset is structured correctly with subdirectories for each class.")

# Choose between DenseNet169 and DenseNet201
use_densenet = "169"  # Change to "201" for DenseNet201

if use_densenet == "169":
    base_model = DenseNet169(weights='imagenet', include_top=False, input_shape=(IMG_HEIGHT, IMG_WIDTH, 3))
elif use_densenet == "201":
    base_model = DenseNet201(weights='imagenet', include_top=False, input_shape=(IMG_HEIGHT, IMG_WIDTH, 3))
else:
    raise ValueError("Invalid DenseNet version. Choose '169' or '201'.")

# Freeze base model layers during initial training
base_model.trainable = False

# Add custom classification layers
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(256, activation='relu')(x)
x = Dropout(0.5)(x)  # Dropout for regularization
output = Dense(train_generator.num_classes, activation='softmax')(x)  # Match output classes

# Define the full model
model = Model(inputs=base_model.input, outputs=output)

# Check if a model already exists to avoid retraining
if os.path.exists(MODEL_SAVE_PATH):
    print(f"Loading pre-trained model from {MODEL_SAVE_PATH}")
    model = tf.keras.models.load_model(MODEL_SAVE_PATH)
else:
    # Compile the model
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])

    # Train the model with error handling
    try:
        EPOCHS = 10
        history = model.fit(
            train_generator,
            epochs=EPOCHS,
            validation_data=validation_generator
        )

        # Save the trained model
        model.save(MODEL_SAVE_PATH)
    except Exception as e:
        print(f"Error during training: {e}")

    # Fine-tuning: Unfreeze some layers of the base model for further training
    base_model.trainable = True
    for layer in base_model.layers[:400]:  # Freeze the first 400 layers
        layer.trainable = False

    # Recompile the model with a lower learning rate
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-5),
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])

    # Fine-tune the model
    fine_tune_epochs = 10
    history_fine = model.fit(
        train_generator,
        epochs=fine_tune_epochs,
        validation_data=validation_generator
    )

    # Save the fine-tuned model
    model.save(FINETUNED_MODEL_PATH)

# Real-time prediction function
def predict_real_time():
    class_labels = list(train_generator.class_indices.keys())  # Get the class labels
    cap = cv2.VideoCapture(0)  # Use the webcam for real-time video capture
    while True:
        ret, frame = cap.read()
        if not ret:
            print("Error: Unable to read frame.")
            break
        # Preprocess the frame for the model
        img = cv2.resize(frame, (IMG_HEIGHT, IMG_WIDTH))
        img_array = np.expand_dims(img / 255.0, axis=0)
        
        # Make prediction
        prediction = model.predict(img_array)
        class_index = np.argmax(prediction)
        predicted_label = class_labels[class_index]

        # Display prediction
        cv2.putText(frame, f"Prediction: {predicted_label}", (10, 30),
                    cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv2.LINE_AA)
        cv2.imshow('Sign Language Recognition', frame)

        # Break on 'q' key
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
    cap.release()
    cv2.destroyAllWindows()

# Uncomment this line to enable real-time prediction
predict_real_time()




Found 4024 images belonging to 37 classes.
Found 1006 images belonging to 37 classes.
Loading pre-trained model from sign_language_densenet169_model.h5




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 68ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 61ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 63ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 64ms/step


2024-11-21 20:12:28.719 Python[60015:4474784] +[IMKClient subclass]: chose IMKClient_Modern
2024-11-21 20:12:28.719 Python[60015:4474784] +[IMKInputSession subclass]: chose IMKInputSession_Modern


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 75ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 68ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 66ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 60ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 64ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 60ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 59ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 66ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 60ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 60ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 65ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 59ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 58ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 67