**Imports & config**

In [7]:
pip install opencv-python

Note: you may need to restart the kernel to use updated packages.


In [10]:
import os, glob, random, shutil, json
import numpy as np
import pandas as pd
import cv2
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, ConfusionMatrixDisplay
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau

SEED = 42
np.random.seed(SEED)
random.seed(SEED)
tf.random.set_seed(SEED)

DATA_DIR = "C:/Users/hp/projects/traffic-sign-recognition/data/GTSRB/Train"
IMG_SIZE = (48, 48)  # common choice for GTSRB
BATCH_SIZE = 64
EPOCHS = 25


**Load file paths + labels**

In [11]:
assert os.path.isdir(DATA_DIR), f"Folder not found: {DATA_DIR}"

def list_images_and_labels(root):
    # Each class is a folder named 00000, 00001, ...
    class_dirs = sorted([d for d in glob.glob(os.path.join(root, "*")) if os.path.isdir(d)])
    paths, labels = [], []
    for class_dir in class_dirs:
        label = int(os.path.basename(class_dir))
        for img_path in glob.glob(os.path.join(class_dir, "*.ppm")) + glob.glob(os.path.join(class_dir, "*.png")) + glob.glob(os.path.join(class_dir, "*.jpg")):
            paths.append(img_path)
            labels.append(label)
    return paths, labels

all_paths, all_labels = list_images_and_labels(DATA_DIR)
n_classes = len(set(all_labels))
len(all_paths), n_classes


(34889, 40)

**Train/val split**

In [12]:
train_paths, val_paths, train_labels, val_labels = train_test_split(
    all_paths, all_labels, test_size=0.2, stratify=all_labels, random_state=SEED
)
len(train_paths), len(val_paths)


(27911, 6978)

**TF data pipeline (fast, memory-friendly)**

In [14]:
def preprocess_image(path, label, augment=False):
    img = tf.io.read_file(path)
    img = tf.image.decode_image(img, channels=3, expand_animations=False)
    img = tf.image.convert_image_dtype(img, tf.float32)
    img = tf.image.resize(img, IMG_SIZE)
    if augment:
        img = tf.image.random_flip_left_right(img)               # mild; some signs are symmetric, be careful
        img = tf.image.random_brightness(img, max_delta=0.1)
        img = tf.image.random_contrast(img, 0.9, 1.1)
    img = (img - 0.5) * 2.0  # scale to [-1,1]
    return img, tf.cast(label, tf.int32)

def make_dataset(paths, labels, augment=False, shuffle=True):
    ds = tf.data.Dataset.from_tensor_slices((paths, labels))
    if shuffle:
        ds = ds.shuffle(buffer_size=min(len(paths), 10000), seed=SEED)
    ds = ds.map(lambda p,l: preprocess_image(p,l,augment=augment), num_parallel_calls=tf.data.AUTOTUNE)
    ds = ds.batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)
    return ds

train_ds = make_dataset(train_paths, train_labels, augment=True, shuffle=True)
val_ds   = make_dataset(val_paths, val_labels, augment=False, shuffle=False)


**Model (lightweight CNN that works well on GTSRB)**

In [15]:
def build_model(input_shape=(IMG_SIZE[0], IMG_SIZE[1], 3), num_classes=43):
    inputs = layers.Input(shape=input_shape)
    x = layers.Conv2D(32, 3, padding='same', activation='relu')(inputs)
    x = layers.Conv2D(32, 3, activation='relu')(x)
    x = layers.MaxPool2D()(x)
    x = layers.Dropout(0.25)(x)

    x = layers.Conv2D(64, 3, padding='same', activation='relu')(x)
    x = layers.Conv2D(64, 3, activation='relu')(x)
    x = layers.MaxPool2D()(x)
    x = layers.Dropout(0.25)(x)

    x = layers.Conv2D(128, 3, padding='same', activation='relu')(x)
    x = layers.Conv2D(128, 3, activation='relu')(x)
    x = layers.MaxPool2D()(x)
    x = layers.Dropout(0.25)(x)

    x = layers.Flatten()(x)
    x = layers.Dense(256, activation='relu')(x)
    x = layers.Dropout(0.5)(x)
    outputs = layers.Dense(num_classes, activation='softmax')(x)

    model = models.Model(inputs, outputs)
    model.compile(optimizer='adam',
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])
    return model

model = build_model(num_classes=n_classes)
model.summary()
