In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, Rescaling
from tensorflow.keras.utils import image_dataset_from_directory
from tensorflow.keras.callbacks import ModelCheckpoint
import matplotlib.pyplot as plt
import os

# --- Configuration ---
# Set all your paths and parameters here

# 1. Paths in Google Drive
DRIVE_ZIP_PATH = '/content/drive/MyDrive/SignLink 2.0/archive (1).zip'
MODEL_SAVE_DIR = '/content/drive/MyDrive/SignLink 2.0/MyModel'

# 2. Local Colab Paths (Temporary & Fast)
# We unzip the data here for fast I/O
LOCAL_DATA_DIR = '/content/dataset'

# 3. Model & Training Parameters
IMG_SIZE = (128, 128)
BATCH_SIZE = 32
EPOCHS = 20

In [None]:
from google.colab import drive
import shutil

# 1. Mount Google Drive
drive.mount('/content/drive')

# 2. Clean up old data and unzip to Colab's local disk
print(f"Preparing local directory: {LOCAL_DATA_DIR}")
# Remove old data if it exists, ensuring a fresh unzip
if os.path.exists(LOCAL_DATA_DIR):
    shutil.rmtree(LOCAL_DATA_DIR)
os.makedirs(LOCAL_DATA_DIR, exist_ok=True)

# Create the model save directory in your Drive
os.makedirs(MODEL_SAVE_DIR, exist_ok=True)

print(f"Unzipping dataset from Drive to {LOCAL_DATA_DIR}...")
# Unzip quietly from your Drive to the fast local Colab disk
!unzip -q "{DRIVE_ZIP_PATH}" -d "{LOCAL_DATA_DIR}"

print("Unzipping complete. Data is ready.")

MessageError: Error: credential propagation was unsuccessful

In [None]:
# --- 4. Load and Preprocess Data ---
# We found that your data is in a single folder:
DATA_DIR = "/content/dataset/images"

print(f"Loading data from: {DATA_DIR}")

# --- Load Training Data (80% of the images) ---
# We tell Keras to use 80% of the data for "training"
train_dataset = image_dataset_from_directory(
    DATA_DIR,
    validation_split=0.2,  # Create an 80/20 split
    subset="training",     # Tell this one to be the 80%
    seed=123,              # Seed for reproducible split
    image_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    label_mode='categorical'
)

# --- Load Validation Data (20% of the images) ---
# We tell Keras to use the *other* 20% for "validation"
validation_dataset = image_dataset_from_directory(
    DATA_DIR,
    validation_split=0.2,
    subset="validation",   # Tell this one to be the 20%
    seed=123,              # Use the same seed
    image_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    label_mode='categorical'
)

# Get class names and count
class_names = train_dataset.class_names
NUM_CLASSES = len(class_names)
print(f"\nFound {NUM_CLASSES} classes: {class_names}")

# Configure dataset for performance
AUTOTUNE = tf.data.AUTOTUNE
train_dataset = train_dataset.cache().prefetch(buffer_size=AUTOTUNE)
validation_dataset = validation_dataset.cache().prefetch(buffer_size=AUTOTUNE)

Loading data from: /content/dataset/images
Found 196621 files belonging to 110 classes.
Using 157297 files for training.
Found 196621 files belonging to 110 classes.
Using 39324 files for validation.

Found 110 classes: ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'All_Gone', 'Alone', 'Baby', 'Beside', 'Book', 'Bowl', 'Bridge', 'Call', 'Camp', 'Cartridge', 'Flower', 'Fond', 'Glove', 'H', 'Hang', 'High', 'House', 'How_Many', 'I want Food', 'I_m Good', 'IorMe', 'J', 'Man', 'Marry', 'Meat', 'Medal', 'Mid_Day', 'Middle', 'Money', 'Moon', 'Mother', 'Opposite', 'Pain', 'Prisoner', 'Ring', 'Rose', 'See', 'Short', 'Stop', 'Superior', 'Theif', 'There is Gun', 'Thick', 'Thin', 'Tobacco', 'Up', 'V', 'Victory', 'Watch', 'Write', 'a', 'aboard', 'afraid', 'agree', 'assistance', 'b', 'bad', 'become', 'c', 'college', 'd', 'del', 'doctor', 'e', 'f', 'friend', 'from', 'g', 'i', 'k', 'l', 'm', 'n', 'not fine', 'nothing', 'o', 'ok fine', 'p', 'pray', 'q', 'r', 's', 'secondary', 'skin', 'small', 'spa

In [None]:
def build_model(input_shape, num_classes):
    """Creates and returns a new CNN model."""
    model = Sequential([
        Rescaling(1./255, input_shape=input_shape),

        Conv2D(32, (3, 3), activation='relu', padding='same'),
        MaxPooling2D((2, 2)),

        Conv2D(64, (3, 3), activation='relu', padding='same'),
        MaxPooling2D((2, 2)),

        Conv2D(128, (3, 3), activation='relu', padding='same'),
        MaxPooling2D((2, 2)),

        Flatten(),

        Dense(256, activation='relu'),
        Dropout(0.5),

        Dense(num_classes, activation='softmax')
    ])

    model.compile(
        optimizer='adam',
        loss='categorical_crossentropy',
        metrics=['accuracy']
    )
    return model

# --- Build the model ---
print("\nBuilding the CNN model...")
INPUT_SHAPE = (IMG_SIZE[0], IMG_SIZE[1], 3)
model = build_model(INPUT_SHAPE, NUM_CLASSES)
model.summary()


Building the CNN model...


  super().__init__(**kwargs)


In [None]:
# --- 7. Train the Model ---
import os
# Create a checkpoint to save the best model during training
# This is your safety net against Colab crashes
checkpoint_path = os.path.join(MODEL_SAVE_DIR, "best_model.h5")
checkpoint_callback = ModelCheckpoint(
    filepath=checkpoint_path,
    save_weights_only=False,
    monitor='val_accuracy',  # Save the model with the best validation accuracy
    mode='max',
    save_best_only=True,     # Only save if it's better than the previous best
    verbose=1
)

print("\n--- Starting Model Training ---")

history = model.fit(
    train_dataset,
    validation_data=validation_dataset,
    epochs=EPOCHS,
    callbacks=[checkpoint_callback]  # Pass in the checkpoint callback
)

print("\n--- Training Complete ---")
print(f"The best model was saved to {checkpoint_path}")

NameError: name 'MODEL_SAVE_DIR' is not defined

In [None]:
def plot_training_history(history):
    """Plots the accuracy and loss from the training history."""
    acc = history.history['accuracy']
    val_acc = history.history['validation_accuracy']
    loss = history.history['loss']
    val_loss = history.history['validation_loss']
    epochs_range = range(len(acc))

    plt.figure(figsize=(14, 5))

    plt.subplot(1, 2, 1)
    plt.plot(epochs_range, acc, label='Training Accuracy')
    plt.plot(epochs_range, val_acc, label='Validation Accuracy')
    plt.legend(loc='lower right')
    plt.title('Training and Validation Accuracy')

    plt.subplot(1, 2, 2)
    plt.plot(epochs_range, loss, label='Training Loss')
    plt.plot(epochs_range, val_loss, label='Validation Loss')
    plt.legend(loc='upper right')
    plt.title('Training and Validation Loss')

    plt.show()

# --- 9. Plot Training Results ---
plot_training_history(history)

# --- Save the final epoch model ---
final_model_path = os.path.join(MODEL_SAVE_DIR, 'final_epoch_model.h5')
model.save(final_model_path)
print(f"Final epoch model saved to {final_model_path}")
print("You can now download the 'best_model.h5' file for your Streamlit app.")