# Traffic Sign Recognition â€“ Data Exploration & Baseline Model

This notebook covers dataset loading, exploration, preprocessing, baseline CNN training, and evaluation.

Dataset: GTSRB

In [None]:
import os
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dense, Flatten, Dropout

In [None]:
DATA_DIR = '../data/raw'
IMG_SIZE = 32
NUM_CLASSES = 43

In [None]:
images = []
labels = []

for class_id in range(NUM_CLASSES):
    class_path = os.path.join(DATA_DIR, str(class_id))
    if not os.path.exists(class_path):
        continue
    for img_name in os.listdir(class_path):
        img = Image.open(os.path.join(class_path, img_name))
        img = img.resize((IMG_SIZE, IMG_SIZE))
        images.append(np.array(img))
        labels.append(class_id)

X = np.array(images)
y = np.array(labels)

In [None]:
print('Images:', X.shape)
print('Labels:', y.shape)

In [None]:
plt.figure(figsize=(6,6))
for i in range(9):
    plt.subplot(3,3,i+1)
    plt.imshow(X[i])
    plt.title(f'Label: {y[i]}')
    plt.axis('off')
plt.show()

In [None]:
X = X / 255.0
y = to_categorical(y, NUM_CLASSES)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
model = Sequential([
    Conv2D(32, (3,3), activation='relu', input_shape=(IMG_SIZE, IMG_SIZE, 3)),
    MaxPooling2D((2,2)),
    Conv2D(64, (3,3), activation='relu'),
    MaxPooling2D((2,2)),
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(NUM_CLASSES, activation='softmax')
])

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()

In [None]:
history = model.fit(X_train, y_train, epochs=10, batch_size=64, validation_split=0.2)

In [None]:
y_pred = model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)
y_true = np.argmax(y_test, axis=1)

print('Test Accuracy:', accuracy_score(y_true, y_pred_classes))

In [None]:
os.makedirs('../models', exist_ok=True)
model.save('../models/cnn_baseline.h5')