In [None]:
!pip install tensorflow
!pip install numpy


In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import os
import tensorflow as tf
from tensorflow.keras import layers, models, regularizers, callbacks

# ===============================
# CONFIG
# ===============================
# Adjust these paths as per your Colab setup
!cp -r /content/drive/MyDrive/Urdu_letter_handwriting_dataset /content/datasets_local
DATASET_PATH  = "/content/datasets_local"
SAVE_PATH = "/content/drive/MyDrive/vgg1_urdu_custom.keras" # Using .keras format
IMG_SIZE = (128, 128)
BATCH_SIZE = 64
VAL_SPLIT = 0.2
SEED = 42

In [None]:


# ===============================
# DATA PREPROCESSING & LOADING
# ===============================
def preprocess(image, label):
    image = tf.cast(image, tf.float32) / 255.0
    # IMPORTANT: Invert colors (White bg/Black text -> Black bg/White text)
    # This helps the model focus on the letter strokes as features (1s) rather than empty space.
    image = 1.0 - image
    return image, label

train_ds = tf.keras.utils.image_dataset_from_directory(
    DATASET_PATH,
    labels="inferred",
    label_mode="int",
    batch_size=BATCH_SIZE,
    image_size=IMG_SIZE,
    color_mode="grayscale",
    shuffle=True,
    seed=SEED,
    validation_split=VAL_SPLIT,
    subset="training"
)

val_ds = tf.keras.utils.image_dataset_from_directory(
    DATASET_PATH,
    labels="inferred",
    label_mode="int",
    batch_size=BATCH_SIZE,
    image_size=IMG_SIZE,
    color_mode="grayscale",
    shuffle=True,
    seed=SEED,
    validation_split=VAL_SPLIT,
    subset="validation"
)

train_ds = train_ds.map(preprocess).prefetch(buffer_size=tf.data.AUTOTUNE)
val_ds = val_ds.map(preprocess).prefetch(buffer_size=tf.data.AUTOTUNE)

# Data Augmentation (Kept light to preserve letter structure)
data_augmentation = tf.keras.Sequential([
    layers.RandomRotation(0.05),
    layers.RandomZoom(0.05),
    layers.RandomTranslation(0.05, 0.05),
])

# =======================================
# CUSTOM VGG-STYLE ARCHITECTURE (From Scratch)
# =======================================
def build_custom_vgg(num_classes):
    inputs = layers.Input(shape=(IMG_SIZE[0], IMG_SIZE[1], 1))

    # Augmentation inside the model
    x = data_augmentation(inputs)

    # --- Block 1: Capture fine details (edges/strokes) ---
    x = layers.Conv2D(32, (3, 3), padding="same", use_bias=False)(x)
    x = layers.BatchNormalization()(x)
    x = layers.ReLU()(x)
    x = layers.Conv2D(32, (3, 3), padding="same", use_bias=False)(x)
    x = layers.BatchNormalization()(x)
    x = layers.ReLU()(x)
    x = layers.MaxPooling2D((2, 2))(x)
    x = layers.Dropout(0.2)(x) # Light dropout

    # --- Block 2: Capture shapes (curves/dots) ---
    x = layers.Conv2D(64, (3, 3), padding="same", use_bias=False)(x)
    x = layers.BatchNormalization()(x)
    x = layers.ReLU()(x)
    x = layers.Conv2D(64, (3, 3), padding="same", use_bias=False)(x)
    x = layers.BatchNormalization()(x)
    x = layers.ReLU()(x)
    x = layers.MaxPooling2D((2, 2))(x)
    x = layers.Dropout(0.25)(x)

    # --- Block 3: Capture complex features (letters) ---
    x = layers.Conv2D(128, (3, 3), padding="same", use_bias=False)(x)
    x = layers.BatchNormalization()(x)
    x = layers.ReLU()(x)
    x = layers.Conv2D(128, (3, 3), padding="same", use_bias=False)(x)
    x = layers.BatchNormalization()(x)
    x = layers.ReLU()(x)
    x = layers.MaxPooling2D((2, 2))(x)
    x = layers.Dropout(0.3)(x)

    # --- Block 4: Deep features ---
    x = layers.Conv2D(256, (3, 3), padding="same", use_bias=False)(x)
    x = layers.BatchNormalization()(x)
    x = layers.ReLU()(x)
    x = layers.MaxPooling2D((2, 2))(x)
    x = layers.Dropout(0.4)(x)

    # --- Classifier ---
    x = layers.GlobalAveragePooling2D()(x)
    x = layers.Dense(256, activation="relu")(x)
    x = layers.Dropout(0.5)(x) # Heavy dropout before final layer
    outputs = layers.Dense(num_classes, activation="softmax")(x)

    model = models.Model(inputs, outputs, name="Urdu_VGG_Custom")

    # Using AdamW (Adam with Weight Decay) is often better for generalization
    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3),
        loss="sparse_categorical_crossentropy",
        metrics=["accuracy"]
    )

    return model

# =======================================
# TRAINING
# =======================================

# Get number of classes dynamically
num_classes = len(os.listdir(DATASET_PATH))
print(f"Detected {num_classes} Classes")

model = build_custom_vgg(num_classes)
model.summary()

# KEY FIX: Learning Rate Scheduler
# If val_loss doesn't improve for 3 epochs, reduce LR by factor of 0.5.
# This prevents the "crashing" accuracy you saw in your logs.
reduce_lr = callbacks.ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.5,
    patience=3,
    min_lr=1e-6,
    verbose=1
)

early_stop = callbacks.EarlyStopping(
    monitor="val_loss",
    patience=8,
    restore_best_weights=True,
    verbose=1
)

history = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=40, # increased epochs as we have LR scheduler now
    callbacks=[early_stop, reduce_lr]
)

# Save in modern keras format
model.save(SAVE_PATH)
print("Model saved at:", SAVE_PATH)

Load and test model

In [None]:
!pip install matplotlib
!pip install seaborn
!pip install scikit-learn

In [None]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from google.colab import files
import os
import io
from PIL import Image
from IPython.display import display, HTML
from google.colab import drive

# =======================================================================
# ⚡ RESUMEABLE SETUP CELL ⚡
# Run this every time you reconnect the Colab session.
# =======================================================================

# 1. Re-Mount Google Drive (Required for model loading)
print("1. Re-Mounting Google Drive...")
drive.mount('/content/drive')

# =======================================================================
# 0. CONFIGURATION (IMPORTANT: CHECK THESE PATHS!)
# =======================================================================
# Path where your best model was saved
MODEL_PATH = '/content/drive/MyDrive/vgg1_urdu_custom.keras'

# The class names list (33 total) - Use the confirmed list
CLASS_NAMES = ['ا', 'آ', 'ب', 'ت', 'ث', 'ج', 'ح', 'خ', 'د', 'ذ', 'ر', 'ز', 'س', 'ش', 'ص', 'ض', 'ط', 'ظ', 'ع', 'غ', 'ف', 'ق', 'ل', 'م', 'ن', 'و', 'ٹ', 'پ', 'چ', 'ڈ', 'ڑ', 'ژ', 'ک', 'گ', 'ہ', 'ی', 'ے']

IMG_SIZE = (128, 128) # Must match the training size
# =======================================================================


# =======================================================================
# 1. LOAD THE TRAINED MODEL
# =======================================================================
print(f"\n--- 1. Loading model from: {MODEL_PATH} ---")
try:
    model = tf.keras.models.load_model(MODEL_PATH)
    print("Model loaded successfully!")
    print(f"Best model validation accuracy (from log): ~99.24%")
except Exception as e:
    print(f"ERROR: Could not load the model. Please check the MODEL_PATH variable.")
    print(f"Details: {e}")
    # exit()


# =======================================================================
# 2. IMAGE PREPROCESSING FUNCTION
# =======================================================================
def preprocess_image_from_bytes(image_bytes):
    """
    Loads, resizes, and preprocesses a single image from its byte content.
    Returns: preprocessed image array (1, 128, 128, 1) and raw array for plotting.
    """
    try:
        # Load from bytes, convert to grayscale, and resize
        img_pil = Image.open(io.BytesIO(image_bytes)).convert('L')
        img_resized = img_pil.resize(IMG_SIZE)

        # Convert to NumPy array and normalize
        img_array_raw = np.array(img_resized, dtype=np.float32) / 255.0

        # Expand dimensions for CNN: (128, 128) -> (1, 128, 128, 1)
        img_preprocessed = np.expand_dims(img_array_raw, axis=-1)
        img_preprocessed = np.expand_dims(img_preprocessed, axis=0)

        # Invert colors
        img_preprocessed_inverted = 1.0 - img_preprocessed

        return img_preprocessed_inverted, img_array_raw

    except Exception as e:
        print(f"  [ERROR] Failed to process image: {e}")
        return None, None


# =======================================================================
# 3. UPLOAD FILES AND BATCH PREDICTION
# =======================================================================
print("\n--- 2. Uploading Test Images (You can select multiple files) ---")
# This opens the dialog allowing the user to select multiple images.
uploaded_files = files.upload()

if not uploaded_files:
    print("No images were uploaded. Exiting prediction.")
else:
    num_uploaded = len(uploaded_files)
    print(f"Found {num_uploaded} images to predict.")

    # Initialize list to hold results for final summary
    results = []

    # Configure Matplotlib for multiple small plots
    fig, axes = plt.subplots(num_uploaded, 2, figsize=(10, 5 * num_uploaded))

    # Handle case where only one image is found
    if num_uploaded == 1:
        axes = [[axes[0], axes[1]]] # Wrap in a list for consistent indexing

    # Iterate over the uploaded dictionary (key=filename, value=bytes)
    for i, (filename, image_bytes) in enumerate(uploaded_files.items()):

        # Preprocess the image from its byte content
        img_input, img_raw = preprocess_image_from_bytes(image_bytes)

        if img_input is None:
            continue

        # Predict
        predictions = model.predict(img_input, verbose=0)

        # Get result details
        predicted_class_index = np.argmax(predictions[0])
        confidence = predictions[0][predicted_class_index] * 100
        predicted_char = CLASS_NAMES[predicted_class_index]

        results.append({
            'File': filename,
            'Prediction': predicted_char,
            'Confidence': confidence
        })

        # --- Plotting the result ---
        # 1. Original Image Plot
        ax_raw = axes[i][0]
        ax_raw.imshow(img_raw, cmap='gray', vmin=0, vmax=1)
        ax_raw.set_title(f"Input: {filename}", fontsize=10)
        ax_raw.axis('off')

        # 2. Processed Image Plot with Prediction
        ax_proc = axes[i][1]
        ax_proc.imshow(img_input.squeeze(), cmap='gray', vmin=0, vmax=1)
        ax_proc.set_title(f"Prediction: {predicted_char} ({confidence:.1f}%)",
                          fontsize=10, color='blue')
        ax_proc.axis('off')

    plt.tight_layout()
    plt.show()

    # =======================================================================
    # 4. FINAL SUMMARY TABLE
    # =======================================================================
    print("\n--- 4. Batch Prediction Summary ---")

    # Create a simple table summary
    html_table = "<table><thead><tr><th>File Name</th><th>Prediction</th><th>Confidence</th></tr></thead><tbody>"
    for res in results:
        color = 'black'
        # Highlight low confidence predictions (e.g., below 90%)
        if res['Confidence'] < 90.0:
            color = 'darkorange'

        html_table += f"<tr><td>{res['File']}</td><td style='font-size: 1.5em;'>{res['Prediction']}</td><td style='color:{color};'>{res['Confidence']:.2f}%</td></tr>"

    html_table += "</tbody></table>"
    display(HTML(html_table))