<a href="https://colab.research.google.com/github/MohammedZaid-AI/SignLink/blob/main/SignLink.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, Rescaling
from tensorflow.keras.utils import image_dataset_from_directory
from tensorflow.keras.callbacks import ModelCheckpoint
import matplotlib.pyplot as plt
import os

# --- Configuration ---
# Set all your paths and parameters here

# 1. Paths in Google Drive
DRIVE_ZIP_PATH = '/content/drive/MyDrive/SignLink 2.0/archive (1).zip'
MODEL_SAVE_DIR = '/content/drive/MyDrive/SignLink 2.0/MyModel'

# 2. Local Colab Paths (Temporary & Fast)
# We unzip the data here for fast I/O
LOCAL_DATA_DIR = '/content/dataset'

# 3. Model & Training Parameters
IMG_SIZE = (128, 128)
BATCH_SIZE = 32
EPOCHS = 20

In [2]:
from google.colab import drive
import shutil

# 1. Mount Google Drive
drive.mount('/content/drive')

# 2. Clean up old data and unzip to Colab's local disk
print(f"Preparing local directory: {LOCAL_DATA_DIR}")
# Remove old data if it exists, ensuring a fresh unzip
if os.path.exists(LOCAL_DATA_DIR):
    shutil.rmtree(LOCAL_DATA_DIR)
os.makedirs(LOCAL_DATA_DIR, exist_ok=True)

# Create the model save directory in your Drive
os.makedirs(MODEL_SAVE_DIR, exist_ok=True)

print(f"Unzipping dataset from Drive to {LOCAL_DATA_DIR}...")
# Unzip quietly from your Drive to the fast local Colab disk
!unzip -q "{DRIVE_ZIP_PATH}" -d "{LOCAL_DATA_DIR}"

print("Unzipping complete. Data is ready.")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Preparing local directory: /content/dataset
Unzipping dataset from Drive to /content/dataset...
Unzipping complete. Data is ready.


In [11]:
# --- 4. Load and Preprocess Data ---
DATA_DIR = "/content/dataset/images"

print(f"Loading data from: {DATA_DIR}")

# --- Load Training Data (80% of the images) ---
train_dataset = image_dataset_from_directory(
    DATA_DIR,
    validation_split=0.2,
    subset="training",
    seed=123,
    image_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    label_mode='categorical'
)

# --- Load Validation Data (20% of the images) ---
validation_dataset = image_dataset_from_directory(
    DATA_DIR,
    validation_split=0.2,
    subset="validation",
    seed=123,
    image_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    label_mode='categorical'
)

# Get class names and count
class_names = train_dataset.class_names
NUM_CLASSES = len(class_names)
print(f"\nFound {NUM_CLASSES} classes: {class_names}")

# --- Configure dataset for performance ---
AUTOTUNE = tf.data.AUTOTUNE

# CHANGED: We removed .cache() from both lines
train_dataset = train_dataset.prefetch(buffer_size=AUTOTUNE)
validation_dataset = validation_dataset.prefetch(buffer_size=AUTOTUNE)



# --- ADD THIS CODE TO THE END OF CELL 3 ---

print("\n--- DEBUGGING CHECK ---")
try:
    train_batches = tf.data.experimental.cardinality(train_dataset)
    val_batches = tf.data.experimental.cardinality(validation_dataset)

    print(f"Number of batches in train_dataset: {train_batches.numpy()}")
    print(f"Number of batches in validation_dataset: {val_batches.numpy()}")

    if val_batches.numpy() == 0:
        print("\nERROR: Your validation dataset is EMPTY!")
        print("This is the reason for the 'KeyError'.")
        print("This might be because your dataset is too small or all files were assigned to 'train'.")
    else:
        print("\nSUCCESS: Your validation dataset is correctly loaded.")
except Exception as e:
    print(f"\nAn error occurred while checking datasets: {e}")

Loading data from: /content/dataset/images
Found 196621 files belonging to 110 classes.
Using 157297 files for training.
Found 196621 files belonging to 110 classes.
Using 39324 files for validation.

Found 110 classes: ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'All_Gone', 'Alone', 'Baby', 'Beside', 'Book', 'Bowl', 'Bridge', 'Call', 'Camp', 'Cartridge', 'Flower', 'Fond', 'Glove', 'H', 'Hang', 'High', 'House', 'How_Many', 'I want Food', 'I_m Good', 'IorMe', 'J', 'Man', 'Marry', 'Meat', 'Medal', 'Mid_Day', 'Middle', 'Money', 'Moon', 'Mother', 'Opposite', 'Pain', 'Prisoner', 'Ring', 'Rose', 'See', 'Short', 'Stop', 'Superior', 'Theif', 'There is Gun', 'Thick', 'Thin', 'Tobacco', 'Up', 'V', 'Victory', 'Watch', 'Write', 'a', 'aboard', 'afraid', 'agree', 'assistance', 'b', 'bad', 'become', 'c', 'college', 'd', 'del', 'doctor', 'e', 'f', 'friend', 'from', 'g', 'i', 'k', 'l', 'm', 'n', 'not fine', 'nothing', 'o', 'ok fine', 'p', 'pray', 'q', 'r', 's', 'secondary', 'skin', 'small', 'spa

In [4]:
def build_model(input_shape, num_classes):
    """Creates and returns a new CNN model."""
    model = Sequential([
        Rescaling(1./255, input_shape=input_shape),

        Conv2D(32, (3, 3), activation='relu', padding='same'),
        MaxPooling2D((2, 2)),

        Conv2D(64, (3, 3), activation='relu', padding='same'),
        MaxPooling2D((2, 2)),

        Conv2D(128, (3, 3), activation='relu', padding='same'),
        MaxPooling2D((2, 2)),

        Flatten(),

        Dense(256, activation='relu'),
        Dropout(0.5),

        Dense(num_classes, activation='softmax')
    ])

    model.compile(
        optimizer='adam',
        loss='categorical_crossentropy',
        metrics=['accuracy']
    )
    return model

# --- Build the model ---
print("\nBuilding the CNN model...")
INPUT_SHAPE = (IMG_SIZE[0], IMG_SIZE[1], 3)
model = build_model(INPUT_SHAPE, NUM_CLASSES)
model.summary()


Building the CNN model...


  super().__init__(**kwargs)


In [8]:
# --- 7. Train the Model ---
import os
# Create a checkpoint to save the best model during training
# This is your safety net against Colab crashes
checkpoint_path = os.path.join(MODEL_SAVE_DIR, "best_model.h5")
checkpoint_callback = ModelCheckpoint(
    filepath=checkpoint_path,
    save_weights_only=False,
    monitor='val_accuracy',  # Save the model with the best validation accuracy
    mode='max',
    save_best_only=True,     # Only save if it's better than the previous best
    verbose=1
)

print("\n--- Starting Model Training ---")

history = model.fit(
    train_dataset,
    validation_data=validation_dataset,
    epochs=EPOCHS,
    callbacks=[checkpoint_callback]  # Pass in the checkpoint callback
)

print("\n--- Training Complete ---")
print(f"The best model was saved to {checkpoint_path}")


--- Starting Model Training ---
Epoch 1/20
[1m4913/4916[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 23ms/step - accuracy: 0.9797 - loss: 0.0770
Epoch 1: val_accuracy improved from -inf to 0.98957, saving model to /content/drive/MyDrive/SignLink 2.0/MyModel/best_model.h5




[1m4916/4916[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m139s[0m 28ms/step - accuracy: 0.9797 - loss: 0.0770 - val_accuracy: 0.9896 - val_loss: 0.0484
Epoch 2/20
[1m4916/4916[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step - accuracy: 0.9806 - loss: 0.0757
Epoch 2: val_accuracy did not improve from 0.98957
[1m4916/4916[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m141s[0m 28ms/step - accuracy: 0.9806 - loss: 0.0757 - val_accuracy: 0.9891 - val_loss: 0.0664
Epoch 3/20
[1m4915/4916[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 23ms/step - accuracy: 0.9811 - loss: 0.0710
Epoch 3: val_accuracy improved from 0.98957 to 0.98978, saving model to /content/drive/MyDrive/SignLink 2.0/MyModel/best_model.h5




[1m4916/4916[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m138s[0m 28ms/step - accuracy: 0.9811 - loss: 0.0710 - val_accuracy: 0.9898 - val_loss: 0.0572
Epoch 4/20
[1m4915/4916[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 23ms/step - accuracy: 0.9815 - loss: 0.0707
Epoch 4: val_accuracy improved from 0.98978 to 0.99031, saving model to /content/drive/MyDrive/SignLink 2.0/MyModel/best_model.h5




[1m4916/4916[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m144s[0m 29ms/step - accuracy: 0.9815 - loss: 0.0707 - val_accuracy: 0.9903 - val_loss: 0.0547
Epoch 5/20
[1m4914/4916[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 23ms/step - accuracy: 0.9817 - loss: 0.0730
Epoch 5: val_accuracy did not improve from 0.99031
[1m4916/4916[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m140s[0m 28ms/step - accuracy: 0.9817 - loss: 0.0730 - val_accuracy: 0.9889 - val_loss: 0.0652
Epoch 6/20
[1m4914/4916[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 23ms/step - accuracy: 0.9819 - loss: 0.0707
Epoch 6: val_accuracy did not improve from 0.99031
[1m4916/4916[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m137s[0m 28ms/step - accuracy: 0.9819 - loss: 0.0707 - val_accuracy: 0.9897 - val_loss: 0.0670
Epoch 7/20
[1m4914/4916[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 23ms/step - accuracy: 0.982



[1m4916/4916[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m143s[0m 28ms/step - accuracy: 0.9828 - loss: 0.0683 - val_accuracy: 0.9904 - val_loss: 0.0597
Epoch 8/20
[1m4915/4916[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 23ms/step - accuracy: 0.9823 - loss: 0.0699
Epoch 8: val_accuracy did not improve from 0.99041
[1m4916/4916[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m139s[0m 28ms/step - accuracy: 0.9823 - loss: 0.0699 - val_accuracy: 0.9887 - val_loss: 0.0662
Epoch 9/20
[1m4915/4916[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 23ms/step - accuracy: 0.9825 - loss: 0.0688
Epoch 9: val_accuracy improved from 0.99041 to 0.99049, saving model to /content/drive/MyDrive/SignLink 2.0/MyModel/best_model.h5




[1m4916/4916[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m139s[0m 28ms/step - accuracy: 0.9825 - loss: 0.0688 - val_accuracy: 0.9905 - val_loss: 0.0572
Epoch 10/20
[1m4914/4916[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 24ms/step - accuracy: 0.9832 - loss: 0.0665
Epoch 10: val_accuracy improved from 0.99049 to 0.99107, saving model to /content/drive/MyDrive/SignLink 2.0/MyModel/best_model.h5




[1m4916/4916[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m142s[0m 29ms/step - accuracy: 0.9832 - loss: 0.0665 - val_accuracy: 0.9911 - val_loss: 0.0687
Epoch 11/20
[1m4915/4916[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 23ms/step - accuracy: 0.9843 - loss: 0.0654
Epoch 11: val_accuracy did not improve from 0.99107
[1m4916/4916[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m139s[0m 28ms/step - accuracy: 0.9843 - loss: 0.0654 - val_accuracy: 0.9899 - val_loss: 0.0664
Epoch 12/20
[1m4913/4916[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 23ms/step - accuracy: 0.9837 - loss: 0.0674
Epoch 12: val_accuracy improved from 0.99107 to 0.99125, saving model to /content/drive/MyDrive/SignLink 2.0/MyModel/best_model.h5




[1m4916/4916[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m141s[0m 28ms/step - accuracy: 0.9837 - loss: 0.0674 - val_accuracy: 0.9913 - val_loss: 0.0642
Epoch 13/20
[1m4913/4916[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 24ms/step - accuracy: 0.9836 - loss: 0.0707
Epoch 13: val_accuracy did not improve from 0.99125
[1m4916/4916[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m157s[0m 32ms/step - accuracy: 0.9836 - loss: 0.0707 - val_accuracy: 0.9906 - val_loss: 0.0614
Epoch 14/20
[1m4914/4916[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 23ms/step - accuracy: 0.9842 - loss: 0.0689
Epoch 14: val_accuracy did not improve from 0.99125
[1m4916/4916[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m154s[0m 31ms/step - accuracy: 0.9842 - loss: 0.0689 - val_accuracy: 0.9902 - val_loss: 0.0661
Epoch 15/20
[1m4913/4916[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 23ms/step - accuracy: 

In [10]:















def plot_training_history(history):
    """Plots the accuracy and loss from the training history."""
    acc = history.history['accuracy']
    val_acc = history.history['validation_accuracy']
    loss = history.history['loss']
    val_loss = history.history['validation_loss']
    epochs_range = range(len(acc))

    plt.figure(figsize=(14, 5))

    plt.subplot(1, 2, 1)
    plt.plot(epochs_range, acc, label='Training Accuracy')
    plt.plot(epochs_range, val_acc, label='Validation Accuracy')
    plt.legend(loc='lower right')
    plt.title('Training and Validation Accuracy')

    plt.subplot(1, 2, 2)
    plt.plot(epochs_range, loss, label='Training Loss')
    plt.plot(epochs_range, val_loss, label='Validation Loss')
    plt.legend(loc='upper right')
    plt.title('Training and Validation Loss')

    plt.show()

# --- 9. Plot Training Results ---
plot_training_history(history)

# --- Save the final epoch model ---
final_model_path = os.path.join(MODEL_SAVE_DIR, 'final_epoch_model.h5')
model.save(final_model_path)
print(f"Final epoch model saved to {final_model_path}")
print("You can now download the 'best_model.h5' file for your Streamlit app.")

KeyError: 'validation_accuracy'