**Imports & config**

In [1]:
pip install opencv-python

Note: you may need to restart the kernel to use updated packages.


In [2]:
import os, glob, random, shutil, json
import numpy as np
import pandas as pd
import cv2
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, ConfusionMatrixDisplay
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau

SEED = 42
np.random.seed(SEED)
random.seed(SEED)
tf.random.set_seed(SEED)

DATA_DIR = "C:/Users/hp/projects/traffic-sign-recognition/data/GTSRB/Train"
IMG_SIZE = (48, 48)  # common choice for GTSRB
BATCH_SIZE = 64
EPOCHS = 25


**Load file paths + labels**

In [3]:
assert os.path.isdir(DATA_DIR), f"Folder not found: {DATA_DIR}"

def list_images_and_labels(root):
    # Each class is a folder named 00000, 00001, ...
    class_dirs = sorted([d for d in glob.glob(os.path.join(root, "*")) if os.path.isdir(d)])
    paths, labels = [], []
    for class_dir in class_dirs:
        label = int(os.path.basename(class_dir))
        for img_path in glob.glob(os.path.join(class_dir, "*.ppm")) + glob.glob(os.path.join(class_dir, "*.png")) + glob.glob(os.path.join(class_dir, "*.jpg")):
            paths.append(img_path)
            labels.append(label)
    return paths, labels

all_paths, all_labels = list_images_and_labels(DATA_DIR)
n_classes = len(set(all_labels))
len(all_paths), n_classes


(39209, 43)

**Train/val split**

In [4]:
train_paths, val_paths, train_labels, val_labels = train_test_split(
    all_paths, all_labels, test_size=0.2, stratify=all_labels, random_state=SEED
)
len(train_paths), len(val_paths)


(31367, 7842)

**TF data pipeline (fast, memory-friendly)**

In [5]:
def preprocess_image(path, label, augment=False):
    img = tf.io.read_file(path)
    img = tf.image.decode_image(img, channels=3, expand_animations=False)
    img = tf.image.convert_image_dtype(img, tf.float32)
    img = tf.image.resize(img, IMG_SIZE)
    if augment:
        img = tf.image.random_flip_left_right(img)               # mild; some signs are symmetric, be careful
        img = tf.image.random_brightness(img, max_delta=0.1)
        img = tf.image.random_contrast(img, 0.9, 1.1)
    img = (img - 0.5) * 2.0  # scale to [-1,1]
    return img, tf.cast(label, tf.int32)

def make_dataset(paths, labels, augment=False, shuffle=True):
    ds = tf.data.Dataset.from_tensor_slices((paths, labels))
    if shuffle:
        ds = ds.shuffle(buffer_size=min(len(paths), 10000), seed=SEED)
    ds = ds.map(lambda p,l: preprocess_image(p,l,augment=augment), num_parallel_calls=tf.data.AUTOTUNE)
    ds = ds.batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)
    return ds

train_ds = make_dataset(train_paths, train_labels, augment=True, shuffle=True)
val_ds   = make_dataset(val_paths, val_labels, augment=False, shuffle=False)


**Model (lightweight CNN that works well on GTSRB)**

In [6]:
import os

TRAIN_DIR = "C:/Users/hp/projects/traffic-sign-recognition/data/GTSRB/Train"
n_classes = len(os.listdir(TRAIN_DIR))
print("Number of classes:", n_classes)

def build_model(input_shape=(IMG_SIZE[0], IMG_SIZE[1], 3), num_classes=43):
    inputs = layers.Input(shape=input_shape)
    x = layers.Conv2D(43, 3, padding='same', activation='relu')(inputs)
    x = layers.Conv2D(43, 3, activation='relu')(x)
    x = layers.MaxPool2D()(x)
    x = layers.Dropout(0.25)(x)

    x = layers.Conv2D(64, 3, padding='same', activation='relu')(x)
    x = layers.Conv2D(64, 3, activation='relu')(x)
    x = layers.MaxPool2D()(x)
    x = layers.Dropout(0.25)(x)

    x = layers.Conv2D(128, 3, padding='same', activation='relu')(x)
    x = layers.Conv2D(128, 3, activation='relu')(x)
    x = layers.MaxPool2D()(x)
    x = layers.Dropout(0.25)(x)

    x = layers.Flatten()(x)
    x = layers.Dense(256, activation='relu')(x)
    x = layers.Dropout(0.5)(x)
    outputs = layers.Dense(num_classes, activation='softmax')(x)

    model = models.Model(inputs, outputs)
    model.compile(optimizer='adam',
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])
    return model

model = build_model(num_classes=n_classes)
model.summary()


Number of classes: 43


**Callbacks and training**

In [None]:
import os
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau

# Create a folder for saving the model if it doesn’t exist
os.makedirs("../models", exist_ok=True)

# Define callbacks
ckpt = ModelCheckpoint(
    "../models/best_gtsrb.h5",
    monitor="val_accuracy",
    mode="max",
    save_best_only=True,
    verbose=1
)

early = EarlyStopping(
    monitor="val_accuracy",
    mode="max",
    patience=5,
    restore_best_weights=True,
    verbose=1
)

rlr = ReduceLROnPlateau(
    monitor="val_loss",
    factor=0.5,
    patience=2,
    verbose=1
)

# Set epochs
EPOCHS = 13

# Train the model
history = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=EPOCHS,
    callbacks=[ckpt, early, rlr],
    verbose=1
)

# The EarlyStopping callback stores the epoch index where it stopped
# +1 converts it to a human-friendly count (since epochs start at 0)
stopped_epoch = early.stopped_epoch + 1

# Print final training details
print("\nTraining Summary:")
if early.stopped_epoch > 0:
    print(f"Early stopping triggered at epoch {stopped_epoch} out of {EPOCHS}")
else:
    print(f"Training completed all {EPOCHS} epochs (no early stopping).")

#Final validation accuracy
final_val_acc = max(history.history['val_accuracy'])
print(f"Best validation accuracy achieved: {final_val_acc:.2%}")


Epoch 1/13
[1m491/491[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 429ms/step - accuracy: 0.2080 - loss: 2.8979  
Epoch 1: val_accuracy improved from None to 0.76524, saving model to ../models/best_gtsrb.h5




[1m491/491[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m239s[0m 475ms/step - accuracy: 0.3913 - loss: 2.1035 - val_accuracy: 0.7652 - val_loss: 0.6705 - learning_rate: 0.0010
Epoch 2/13
[1m491/491[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 361ms/step - accuracy: 0.7652 - loss: 0.7062  
Epoch 2: val_accuracy improved from 0.76524 to 0.94950, saving model to ../models/best_gtsrb.h5




[1m491/491[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m190s[0m 386ms/step - accuracy: 0.8145 - loss: 0.5581 - val_accuracy: 0.9495 - val_loss: 0.1829 - learning_rate: 0.0010
Epoch 3/13
[1m490/491[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 332ms/step - accuracy: 0.8961 - loss: 0.3141  
Epoch 3: val_accuracy improved from 0.94950 to 0.96914, saving model to ../models/best_gtsrb.h5




[1m491/491[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m175s[0m 355ms/step - accuracy: 0.9038 - loss: 0.2893 - val_accuracy: 0.9691 - val_loss: 0.1051 - learning_rate: 0.0010
Epoch 4/13
[1m491/491[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 343ms/step - accuracy: 0.9284 - loss: 0.2154  
Epoch 4: val_accuracy improved from 0.96914 to 0.97067, saving model to ../models/best_gtsrb.h5




[1m491/491[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m181s[0m 369ms/step - accuracy: 0.9311 - loss: 0.2110 - val_accuracy: 0.9707 - val_loss: 0.1012 - learning_rate: 0.0010
Epoch 5/13
[1m491/491[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 334ms/step - accuracy: 0.9447 - loss: 0.1719  
Epoch 5: val_accuracy improved from 0.97067 to 0.98470, saving model to ../models/best_gtsrb.h5




[1m491/491[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m197s[0m 359ms/step - accuracy: 0.9469 - loss: 0.1629 - val_accuracy: 0.9847 - val_loss: 0.0589 - learning_rate: 0.0010
Epoch 6/13
[1m105/491[0m [32m━━━━[0m[37m━━━━━━━━━━━━━━━━[0m [1m2:04[0m 323ms/step - accuracy: 0.9550 - loss: 0.1399