<a href="https://colab.research.google.com/github/Meetra21/Contour__Gradient_pipeline/blob/main/Contour__Gradient_pipeline_for_Letters_Dataset.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# If you need these in Colab, uncomment:
# !pip install --quiet tensorflow tensorflow-datasets opencv-python scikit-learn tqdm

import cv2
import time
import numpy as np
import tensorflow as tf
import tensorflow_datasets as tfds
from tensorflow.keras import layers, models, callbacks
from sklearn.model_selection import train_test_split
from tqdm import tqdm

# -----------------------------
# 1) Boundary+Gradient features
# -----------------------------
def _auto_canny_edges_uint8(img_uint8, sigma=0.33):
    """
    Fast auto-threshold Canny for a single uint8 image.
    Returns a binary edge map (uint8 {0,255}).
    """
    v = np.median(img_uint8)
    lower = int(max(0, (1.0 - sigma) * v))
    upper = int(min(255, (1.0 + sigma) * v))
    return cv2.Canny(img_uint8, lower, upper, L2gradient=True)

def compute_boundary_gradients(images, use_canny=True):
    """
    images: (N, 28, 28) uint8 or float
    Returns grads: (N, 2, 28, 28) with gx, gy computed by central differences,
    masked to the boundary (Canny edges). Outside edges -> 0.
    - Vectorized gradient (no Python loop over gradient).
    - Minimal loop only for Canny (fast at this scale).
    """
    # Normalize to [0,1] float32
    imgs = images.astype(np.float32)
    if imgs.max() > 1.0:
        imgs /= 255.0

    N, H, W = imgs.shape
    # Pad once for vectorized central differences
    p = np.pad(imgs, ((0,0),(1,1),(1,1)), mode='edge')

    # Central differences (vectorized)
    gx = (p[:, 1:-1, 2:] - p[:, 1:-1, :-2]) * 0.5
    gy = (p[:, 2:, 1:-1] - p[:, :-2, 1:-1]) * 0.5

    # Optional: forward/backward at borders already handled via pad('edge')

    # Edge mask via Canny on uint8 source (loop but fast)
    if use_canny:
        edges = np.empty_like(images, dtype=np.uint8)
        # ensure uint8 0..255 for canny
        imgs_u8 = (imgs * 255.0 + 0.5).astype(np.uint8)
        for i in range(N):
            edges[i] = _auto_canny_edges_uint8(imgs_u8[i])
        # convert to {0,1} mask and broadcast
        mask = (edges > 0).astype(np.float32)
    else:
        # fallback: morphological gradient as boundary proxy (no loop)
        # (dilation - erosion)
        kernel = np.ones((3,3), np.uint8)
        imgs_u8 = (imgs * 255.0 + 0.5).astype(np.uint8)
        dil = np.stack([cv2.dilate(im, kernel, iterations=1) for im in imgs_u8], axis=0)
        ero = np.stack([cv2.erode(im, kernel, iterations=1) for im in imgs_u8], axis=0)
        mask = ((dil - ero) > 0).astype(np.float32)

    # Mask gradients to boundary only
    gx *= mask
    gy *= mask

    # Per-image normalization (robust) to reduce scale variance
    # Use L2 over boundary pixels; add eps to avoid div-by-zero for blank edges
    eps = 1e-6
    norm = np.sqrt((gx**2 + gy**2).sum(axis=(1,2), keepdims=True)) + eps
    gx /= norm
    gy /= norm

    # Stack as (N, 2, H, W) float32
    grads = np.stack([gx, gy], axis=1).astype(np.float32)
    return grads

# -----------------------------
# 2) Load EMNIST Letters
# -----------------------------
(ds_train, ds_test), ds_info = tfds.load(
    'emnist/letters',
    split=['train', 'test'],
    as_supervised=True,
    with_info=True
)

# -----------------------------
# 3) Convert to NumPy arrays
# -----------------------------
all_images = []
all_labels = []
for img, lbl in tfds.as_numpy(ds_train.concatenate(ds_test)):
    all_images.append(img.squeeze())        # (28,28) uint8
    all_labels.append(int(lbl) - 1)         # labels 1–26 -> 0–25
all_images = np.stack(all_images)           # (145600,28,28)
all_labels = np.array(all_labels, dtype=np.int32)

# -----------------------------
# 4) Stratified 80/20 split
# -----------------------------
X_train, X_test, y_train, y_test = train_test_split(
    all_images, all_labels,
    train_size=0.8,
    stratify=all_labels,
    random_state=42
)

# -----------------------------
# 5) Boundary+Central-Diff grads
# -----------------------------
grad_train = compute_boundary_gradients(X_train, use_canny=True)
grad_test  = compute_boundary_gradients(X_test,  use_canny=True)

# -----------------------------
# 6) Flatten + standardize
# -----------------------------
X_train_feat = grad_train.reshape(-1, 2*28*28)
X_test_feat  = grad_test.reshape(-1, 2*28*28)

# Feature standardization (z-score) helps MLP convergence
mean = X_train_feat.mean(axis=0, keepdims=True)
std  = X_train_feat.std(axis=0, keepdims=True) + 1e-6
X_train_feat = (X_train_feat - mean) / std
X_test_feat  = (X_test_feat  - mean) / std

# -----------------------------
# 7) MLP model (lean & fast)
# -----------------------------
model = models.Sequential([
    layers.Input(shape=(2*28*28,)),
    layers.Dense(768), layers.BatchNormalization(), layers.Activation('relu'),
    layers.Dropout(0.4),

    layers.Dense(384), layers.BatchNormalization(), layers.Activation('relu'),
    layers.Dropout(0.3),

    layers.Dense(192), layers.BatchNormalization(), layers.Activation('relu'),
    layers.Dropout(0.2),

    layers.Dense(26, activation='softmax')
])

model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3),
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

# -----------------------------
# 8) Callbacks
# -----------------------------
es = callbacks.EarlyStopping(
    monitor='val_accuracy', patience=5,
    restore_best_weights=True, verbose=1
)
rlr = callbacks.ReduceLROnPlateau(
    monitor='val_loss', factor=0.5,
    patience=3, min_lr=1e-6, verbose=1
)

# -----------------------------
# 9) Train
# -----------------------------
t0 = time.time()
history = model.fit(
    X_train_feat, y_train,
    validation_split=0.1,
    epochs=50,
    batch_size=256,             # a bit larger batch (features are small)
    callbacks=[es, rlr],
    verbose=2
)
train_time = time.time() - t0

# -----------------------------
# 10) Evaluate
# -----------------------------
t1 = time.time()
test_loss, test_acc = model.evaluate(X_test_feat, y_test, verbose=0)
infer_time = time.time() - t1

# -----------------------------
# 11) Report
# -----------------------------
print(f"\nEMNIST Letters Boundary-Masked Central-Gradient MLP")
print(f"Test Accuracy   : {test_acc*100:.2f}%")
print(f"Training Time   : {train_time:.1f}s")
print(f"Inference Time  : {infer_time:.3f}s for {X_test_feat.shape[0]} samples")
