In [None]:
import tensorflow as tf
print(tf.__version__)
#number of gpus available
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

In [2]:
# Core scientific stack
import numpy as np
import matplotlib.pyplot as plt

# Deep learning framework 
import keras
from keras import Sequential
from keras import layers
from keras.callbacks import EarlyStopping, ModelCheckpoint

# Evaluation tools
from sklearn.metrics import classification_report, confusion_matrix, roc_curve, auc
import seaborn as sns


**Load the Saved Preprocessed Data**

In [None]:
X_train = np.load("/mnt/g/which one is it/data/GTSRB/processed/X_train.npy", allow_pickle=True)
y_train = np.load("/mnt/g/which one is it/data/GTSRB/processed/y_train.npy", allow_pickle=True)
X_val   = np.load("/mnt/g/which one is it/data/GTSRB/processed/X_val.npy", allow_pickle=True)
y_val   = np.load("/mnt/g/which one is it/data/GTSRB/processed/y_val.npy", allow_pickle=True)
X_test  = np.load("/mnt/g/which one is it/data/GTSRB/processed/X_test.npy", allow_pickle=True)
y_test  = np.load("/mnt/g/which one is it/data/GTSRB/processed/y_test.npy", allow_pickle=True)

print("Train:", X_train.shape, y_train.shape)
print("Val:", X_val.shape, y_val.shape)
print("Test:", X_test.shape, y_test.shape)

**Define augmentation pipeline + generator**

In [4]:
import albumentations as A
import numpy as np


transform = A.Compose([
    # Standard geometric transformations
    A.Affine(
        translate_percent=(-0.1, 0.1), # Shifts the image up to 10%
        scale=(0.85, 1.15),           # Zooms in/out by 15%
        rotate=(-15, 15),             # Rotates the image up to 15 degrees
        p=0.75
    ),
    A.Perspective(scale=(0.05, 0.1), p=0.2),

    # Image quality and noise simulations
    A.OneOf([
        A.GaussNoise(p=0.4),
        A.MotionBlur(blur_limit=(3, 7)),
    ], p=0.4),

    # Color and lighting variations
    A.OneOf([
        A.RandomBrightnessContrast(brightness_limit=0.2, contrast_limit=0.2),
        A.RandomGamma(gamma_limit=(80, 120)),
        A.HueSaturationValue(hue_shift_limit=10, sat_shift_limit=20, val_shift_limit=10),
    ], p=0.5),

    # Advanced simulations of real-world conditions
    A.OneOf([
        A.RandomFog(fog_coef_range=(0.1, 0.3), p=1),
        A.RandomRain(drop_length=15, drop_width=1, blur_value=3, p=1),
        A.RandomSunFlare(p=1),
    ], p=0.2),

    # Occlusion simulation
    A.CoarseDropout(
        num_holes_range=(1, 5),          
        hole_height_range=(0.01, 0.03),  
        hole_width_range=(0.01, 0.03),   
        fill=0,                    
        p=0.1
    ),
])


def augment_generator(X, y, batch_size=32):
    n = len(X)
    while True:
        # Shuffle full dataset once per epoch
        indices = np.random.permutation(n)
        for i in range(0, n, batch_size):
            batch_idx = indices[i:i+batch_size]
            X_batch, y_batch = [], []
            for j in batch_idx:
                augmented = transform(image=X[j])["image"]
                X_batch.append(augmented)
                y_batch.append(y[j])
            yield np.array(X_batch), np.array(y_batch)


train_gen = augment_generator(X_train, y_train, batch_size=16)

**Create The CNN**

In [5]:
import keras

# Set the global policy to use mixed precision
keras.mixed_precision.set_global_policy('mixed_float16')

In [6]:
def conv_model():

    model = Sequential([
        
        keras.layers.Input(shape=(80,80,3)),
        

        # Block1
        keras.layers.Conv2D(32, (3,3), activation='relu', padding='same'),
        keras.layers.BatchNormalization(),
        keras.layers.MaxPooling2D((2,2)),
        keras.layers.Dropout(0.3),

        # Block2
        keras.layers.Conv2D(64, (3,3), activation='relu', padding='same'),
        keras.layers.BatchNormalization(),
        keras.layers.MaxPooling2D((2,2)),
        keras.layers.Dropout(0.3),

        # Block3
        keras.layers.Conv2D(128, (3,3), activation='relu', padding='same'),
        keras.layers.BatchNormalization(),
        keras.layers.MaxPooling2D((2,2)),
        keras.layers.Dropout(0.3),
        
         # Block3
        keras.layers.Conv2D(256, (3,3), activation='relu', padding='same'),
        keras.layers.BatchNormalization(),
        keras.layers.MaxPooling2D((2,2)),
        keras.layers.Dropout(0.3),
        
        # Block4
        keras.layers.Conv2D(512, (3,3), activation='relu', padding='same'),
        keras.layers.BatchNormalization(),
        keras.layers.MaxPooling2D((2,2)),
        keras.layers.Dropout(0.3),

        # GAP + FC
        keras.layers.GlobalAveragePooling2D(),
        keras.layers.Dense(256, activation='relu'),
        keras.layers.Dropout(0.5),

        # Output
        keras.layers.Dense(43, activation='softmax')
    ])

    return model
    

**Compile The model**

In [None]:
model = conv_model()

model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=1e-4),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

# Callbacks
early_stop = EarlyStopping(
    monitor='val_loss',
    patience=6,
    restore_best_weights=True
)

checkpoint = ModelCheckpoint(
    filepath='best_model.keras',
    monitor='val_loss',
    save_best_only=True
)

reduce_lr = keras.callbacks.ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.5,
    patience=3,
    min_lr=1e-6

)

In [None]:
model.summary()

**Train The Model**

In [None]:
history = model.fit(
    train_gen,
    steps_per_epoch=len(X_train) // 16,
    batch_size=16,
    epochs=50,
    validation_data=(X_val, y_val),
    callbacks=[early_stop, checkpoint, reduce_lr],
    verbose=1
)

**Testing**

In [None]:
test_loss, test_acc = model.evaluate(X_test, y_test, verbose=2)
print('\nTest accuracy:', test_acc)
print('Test loss:', test_loss)

**Plotting Loss and Accuracy**

In [None]:
# Accuracy
plt.figure(figsize=(10,5))
plt.plot(history.history['accuracy'], label='Train Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.title('Training vs Validation Accuracy')
plt.legend()
plt.show()

# Loss
plt.figure(figsize=(10,5))
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Training vs Validation Loss')
plt.legend()
plt.show()

**Classification-Report and Confusion Matrix**

In [None]:
# 3️⃣ Confusion Matrix
y_pred = model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)
y_true = np.argmax(y_test, axis=1)

cm = confusion_matrix(y_true, y_pred_classes)
plt.figure(figsize=(15,15))
sns.heatmap(cm, annot=False, cmap='Blues', xticklabels=np.unique(y_true), yticklabels=np.unique(y_true))
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Confusion Matrix')
plt.show()

# 4️⃣ Classification Report
report = classification_report(y_true, y_pred_classes, digits=3)
print("Classification Report:\n", report)

In [None]:
roc_auc_dict = {}
# 5️⃣ ROC Curves and AUC
for i in range(43):
    fpr, tpr, _ = roc_curve(y_test[:, i], y_pred[:, i])
    roc_auc = auc(fpr, tpr)
    roc_auc_dict[i] = roc_auc

    plt.plot(fpr, tpr, label=f'Class {i} (AUC = {roc_auc:.2f})')

**Training Summary Table**

In [None]:
import pandas as pd

summary = {
    "Dataset": ["Training", "Validation", "Test"],
    "Accuracy": [
        history.history['accuracy'][-1],      # last training acc
        history.history['val_accuracy'][-1],  # last val acc
        test_acc                               # from model.evaluate()
    ],
    "Loss": [
        history.history['loss'][-1],          # last training loss
        history.history['val_loss'][-1],      # last val loss
        test_loss                              # from model.evaluate()
    ]
}

df_summary = pd.DataFrame(summary)
print(df_summary)


**Per-Class Data Distribution**

In [None]:


# Load your train/val labels (one-hot → convert back to class IDs)
y_train_labels = np.argmax(np.load("/mnt/g/which one is it/data/GTSRB/processed/y_train.npy"), axis=1)
y_val_labels   = np.argmax(np.load("/mnt/g/which one is it/data/GTSRB/processed/y_val.npy"), axis=1)
y_test_labels  = np.argmax(np.load("/mnt/g/which one is it/data/GTSRB/processed/y_test.npy"), axis=1)

# Combine for a full picture
all_labels = np.concatenate([y_train_labels, y_val_labels, y_test_labels])

# Count samples per class
counts = np.bincount(all_labels, minlength=43)

plt.figure(figsize=(16,6))
plt.bar(range(43), counts, color="steelblue")
plt.title("GTSRB Dataset – Samples per Class", fontsize=14)
plt.xlabel("Class ID", fontsize=12)
plt.ylabel("Number of Images", fontsize=12)
plt.xticks(range(43))
plt.show()

print("Class distribution:\n", counts)


As the figure shows, some classes have >2000 images while others have <300. This imbalance explains why recall is lower for rare classes in the confusion matrix.

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix

# --- Load data ---
history_dict = history.history  # from model.fit()
y_val_true = np.argmax(y_val, axis=1)
y_val_pred = np.argmax(model.predict(X_val), axis=1)

# Confusion matrix
cm = confusion_matrix(y_val_true, y_val_pred, labels=range(43))

# Class distribution
y_train_labels = np.argmax(y_train, axis=1)
y_val_labels   = np.argmax(y_val, axis=1)
y_test_labels  = np.argmax(y_test, axis=1)
all_labels = np.concatenate([y_train_labels, y_val_labels, y_test_labels])
counts = np.bincount(all_labels, minlength=43)

# --- Plot ---
fig, axes = plt.subplots(1, 3, figsize=(22,6))

# 1. Training curves
axes[0].plot(history_dict["accuracy"], label="Train Acc")
axes[0].plot(history_dict["val_accuracy"], label="Val Acc")
axes[0].set_title("Training/Validation Accuracy")
axes[0].set_xlabel("Epochs")
axes[0].set_ylabel("Accuracy")
axes[0].legend()

# 2. Confusion matrix (heatmap)
sns.heatmap(cm, ax=axes[1], cmap="Blues", cbar=False)
axes[1].set_title("Confusion Matrix (Validation)")
axes[1].set_xlabel("Predicted")
axes[1].set_ylabel("True")

# 3. Class distribution
axes[2].bar(range(43), counts, color="steelblue")
axes[2].set_title("Samples per Class (Train+Val+Test)")
axes[2].set_xlabel("Class ID")
axes[2].set_ylabel("Count")

plt.tight_layout()
plt.show()
