In [None]:
import os
from pathlib import Path
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import cv2

from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

import tensorflow as tf
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from tensorflow.keras.utils import to_categorical

import pickle

# Reproducibility
np.random.seed(42)
tf.random.set_seed(42)

print(f"TensorFlow: {tf.__version__}")
print(f"GPU Available: {len(tf.config.list_physical_devices('GPU')) > 0}")


DATA_DIR   = Path('../data')     
MODELS_DIR = Path('../models')   
OUTPUTS_DIR= Path('../outputs')   


MODELS_DIR.mkdir(parents=True, exist_ok=True)
OUTPUTS_DIR.mkdir(parents=True, exist_ok=True)

In [None]:
# ============================================================================
# STEP 1: DATA LOADING
# ============================================================================

def load_gtsrb_dataset(data_dir=DATA_DIR):
    """
    Load  dataset from directory structure
    
    Expected structure:
    data/
    └── Train/
        ├── 0/
        ├── 1/
        └── ... (up to 42)

    Download: https://www.kaggle.com/datasets/meowmeowmeowmeowmeow/gtsrb-german-traffic-sign
    """
    print("\n" + "="*70)
    print("LOADING  DATASET")
    print("="*70)
    
    images = []
    labels = []
    
    train_path = Path(data_dir) / 'Train'
    if not train_path.exists():
        raise FileNotFoundError(
            f"Dataset not found at {train_path}!\n"
            "Please download GTSRB and extract so that 'data/Train/<class>/*' exists."
        )
    
    classes = sorted([d for d in os.listdir(train_path) if (train_path / d).is_dir()])
    print(f"Found {len(classes)} classes")
    
    for class_num in classes:
        class_path = train_path / class_num
        class_images = [f for f in os.listdir(class_path) if f.lower().endswith(('.png', '.jpg', '.jpeg', '.ppm'))]
        
        for img_name in class_images:
            img_path = class_path / img_name
            img = cv2.imread(str(img_path))
            if img is not None:
                img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
                images.append(img)
                labels.append(int(class_num))
        
        if int(class_num) % 10 == 0:
            print(f"  Loaded class {class_num}...")
    
    labels = np.array(labels)
    print(f"\n✓ Successfully loaded {len(images)} images")
    print(f"✓ Example image shape: {images[0].shape} (will be resized)")
    print(f"✓ Number of classes: {len(np.unique(labels))}")
    return images, labels