In [None]:
import os
from pathlib import Path
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import cv2

from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

import tensorflow as tf
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from tensorflow.keras.utils import to_categorical

import pickle

# Reproducibility
np.random.seed(42)
tf.random.set_seed(42)

print(f"TensorFlow: {tf.__version__}")
print(f"GPU Available: {len(tf.config.list_physical_devices('GPU')) > 0}")


DATA_DIR   = Path('../data')     
MODELS_DIR = Path('../models')   
OUTPUTS_DIR= Path('../outputs')   


MODELS_DIR.mkdir(parents=True, exist_ok=True)
OUTPUTS_DIR.mkdir(parents=True, exist_ok=True)

In [None]:
# ============================================================================
# STEP 1: DATA LOADING
# ============================================================================

def load_gtsrb_dataset(data_dir=DATA_DIR):
    """
    Load  dataset from directory structure
    
    Expected structure:
    data/
    └── Train/
        ├── 0/
        ├── 1/
        └── ... (up to 42)

    Download: https://www.kaggle.com/datasets/meowmeowmeowmeowmeow/gtsrb-german-traffic-sign
    """
    print("\n" + "="*70)
    print("LOADING  DATASET")
    print("="*70)
    
    images = []
    labels = []
    
    train_path = Path(data_dir) / 'Train'
    if not train_path.exists():
        raise FileNotFoundError(
            f"Dataset not found at {train_path}!\n"
            "Please download GTSRB and extract so that 'data/Train/<class>/*' exists."
        )
    
    classes = sorted([d for d in os.listdir(train_path) if (train_path / d).is_dir()])
    print(f"Found {len(classes)} classes")
    
    for class_num in classes:
        class_path = train_path / class_num
        class_images = [f for f in os.listdir(class_path) if f.lower().endswith(('.png', '.jpg', '.jpeg', '.ppm'))]
        
        for img_name in class_images:
            img_path = class_path / img_name
            img = cv2.imread(str(img_path))
            if img is not None:
                img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
                images.append(img)
                labels.append(int(class_num))
        
        if int(class_num) % 10 == 0:
            print(f"  Loaded class {class_num}...")
    
    labels = np.array(labels)
    print(f"\n✓ Successfully loaded {len(images)} images")
    print(f"✓ Example image shape: {images[0].shape} (will be resized)")
    print(f"✓ Number of classes: {len(np.unique(labels))}")
    return images, labels

In [None]:
# ============================================================================
# STEP 2: DATA PREPROCESSING
# ============================================================================

def preprocess_data(images, labels, img_size=32):
    """
    Preprocess images for MLP training
    """
    print("\n" + "="*70)
    print("PREPROCESSING DATA")
    print("="*70)
    
    print(f"Resizing images to {img_size}x{img_size}...")
    resized_images = np.array([cv2.resize(img, (img_size, img_size)) for img in images])
    
    print("Normalizing pixel values...")
    normalized_images = resized_images.astype('float32') / 255.0
    
    print("Flattening images...")
    flattened_images = normalized_images.reshape(len(normalized_images), -1)
    
    n_classes = len(np.unique(labels))
    labels_categorical = to_categorical(labels, n_classes)
    
    print(f"\n✓ Preprocessed shape: {flattened_images.shape}")
    print(f"✓ Each image: {img_size}x{img_size}x3 = {flattened_images.shape[1]} features")
    print(f"✓ Labels shape: {labels_categorical.shape}")
    return flattened_images, labels_categorical, n_classes, normalized_images

In [None]:
# ============================================================================
# STEP 3:  DATA ANALYSIS
# ============================================================================

def visualize_samples(images, labels, n_samples=20):
    """
    Display random sample images from dataset
    """
    print("\nVisualizing sample images...")
    fig, axes = plt.subplots(4, 5, figsize=(15, 12))
    fig.suptitle('Sample Traffic Signs from Dataset', fontsize=16, fontweight='bold')
    
    indices = np.random.choice(len(images), n_samples, replace=False)
    for i, ax in enumerate(axes.flat):
        idx = indices[i]
        ax.imshow(images[idx])
        label = np.argmax(labels[idx]) if len(getattr(labels[idx], "shape", ())) > 0 else labels[idx]
        ax.set_title(f'Class: {label}', fontsize=11)
        ax.axis('off')
    
    plt.tight_layout()
    plt.savefig(OUTPUTS_DIR / 'samples.png', dpi=150, bbox_inches='tight')
    plt.show()

def plot_class_distribution(labels):
    """
    Show distribution of traffic sign classes
    """
    print("\nPlotting class distribution...")
    if len(getattr(labels, "shape", ())) > 1:
        class_counts = np.argmax(labels, axis=1)
    else:
        class_counts = labels
    
    plt.figure(figsize=(16, 5))
    unique, counts = np.unique(class_counts, return_counts=True)
    plt.bar(unique, counts, color='steelblue', edgecolor='black', alpha=0.7)
    plt.xlabel('Traffic Sign Class', fontsize=13)
    plt.ylabel('Number of Images', fontsize=13)
    plt.title('Distribution of Traffic Sign Classes in Dataset', fontsize=15, fontweight='bold')
    plt.xticks(unique)
    plt.grid(axis='y', alpha=0.3)
    for i, (cls, cnt) in enumerate(zip(unique, counts)):
        if i % 3 == 0:
            plt.text(cls, cnt + max(5, int(0.01 * counts.max())), str(cnt), ha='center', fontsize=8)
    plt.tight_layout()
    plt.savefig(OUTPUTS_DIR / 'class_distribution.png', dpi=150, bbox_inches='tight')
    plt.show()
    
    print(f"\n✓ Class distribution plotted")
    print(f"  Min samples per class: {counts.min()}")
    print(f"  Max samples per class: {counts.max()}")
    print(f"  Average samples per class: {counts.mean():.0f}")

In [None]:
# ============================================================================
# STEP 4: BUILD MLP MODEL
# ============================================================================

def build_mlp_model(input_shape, n_classes):
    """
    Build Multi-Layer Perceptron architecture:
    512 → 256 → 128 → n_classes
    """
    print("\n" + "="*70)
    print("BUILDING MLP MODEL")
    print("="*70)
    
    model = Sequential([
        Dense(512, activation='relu', input_shape=(input_shape,), name='hidden_layer_1'),
        BatchNormalization(),
        Dropout(0.3),

        Dense(256, activation='relu', name='hidden_layer_2'),
        BatchNormalization(),
        Dropout(0.3),

        Dense(128, activation='relu', name='hidden_layer_3'),
        BatchNormalization(),
        Dropout(0.2),

        Dense(n_classes, activation='softmax', name='output_layer')
    ], name='Traffic_Sign_MLP')
    
    model.compile(optimizer=Adam(learning_rate=0.001),
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    print("\n✓ Model built successfully")
    return model