**Step 1: Training Data Loading**


In [None]:
import os
import numpy as np
import matplotlib.image as mpimg

# Path to the classification-ready dataset
train_data_dir = "/mnt/g/which one is it/data/GTSRB/Final_Training"

x_raw_train = []  # List of image arrays
y_raw_train_list = [] # List of class IDs (int)

print("Loading training data...")
# Loop over classes folder
for folder in os.listdir(train_data_dir):
    class_id = int(folder)
    class_folder = os.path.join(train_data_dir, folder)
    if os.path.isdir(class_folder):
        # Loop Over All Images
        for img_name in os.listdir(class_folder):
            if img_name.endswith(".ppm"):
                img_path = os.path.join(class_folder, img_name)
                img = mpimg.imread(img_path)
                x_raw_train.append(img)
                y_raw_train_list.append(class_id)

print("\n--- Raw Training Data Loading Complete ---")
print(f"Training Samples (raw): {len(x_raw_train)}")


**Step2: Training Data Resize and Normalisation**

In [None]:
import numpy as np
from skimage.transform import resize
from skimage.color import gray2rgb

# Define target size, consistent for all datasets
TARGET_SIZE = (80, 80)  # (height, width)

def preprocess_images(raw_images_list, target_size):
    """Resizes, standardizes channels, and normalizes images from a list."""
    preprocessed_images = []
    for img in raw_images_list:
        # Handle grayscale images by converting to RGB
        if img.ndim == 2:
            img = gray2rgb(img)
        # Handle RGBA images by removing the alpha channel
        elif img.shape[-1] == 4:
            img = img[:, :, :3]
        
        # Resize and normalize pixel values to [0, 1]
        img_resized = resize(img, target_size, anti_aliasing=True)
        preprocessed_images.append(img_resized.astype(np.float32))
    return np.array(preprocessed_images)

print("Preprocessing training images...")
x_train = preprocess_images(x_raw_train, TARGET_SIZE)
y_train = np.array(y_raw_train_list, dtype=np.int32)

print("\n--- Train Data Preprocessing Complete ---")
print(f"X_train shape: {x_train.shape}, dtype: {x_train.dtype}")
print(f"y_train shape: {y_train.shape}, dtype: {y_train.dtype}")

**Step3: Test Data Loading**

In [None]:
import os
import numpy as np
import matplotlib.image as mpimg
import pandas as pd

# Paths
test_data_dir = r"/mnt/g/which one is it/data/GTSRB/Final_Test/Images"
gt_csv = r"/mnt/g/which one is it/data/GTSRB/Final_Test/GT-final_test.csv"

# Load test data
x_test_raw = []
y_test_raw_list = []

print("\nLoading test data...")
gt = pd.read_csv(gt_csv, sep=';')

# Loop over rows of CSV
for _, row in gt.iterrows():
    filename = row['Filename']
    class_id = row['ClassId']
    img_path = os.path.join(test_data_dir, filename)
    
    img = mpimg.imread(img_path)
    x_test_raw.append(img)
    y_test_raw_list.append(class_id)

print("\n--- Raw Test Data Loading Complete ---")
print(f"Test Samples (raw): {len(x_test_raw)}")


**Step4: Test Data Resizing and Normalisation**

In [None]:
import numpy as np
from skimage.transform import resize
from skimage.color import gray2rgb

# Reuse the preprocess_images function from the training preprocessing cell
# Define target size, consistent for all datasets
TARGET_SIZE = (80, 80)  # (height, width)

def preprocess_images(raw_images_list, target_size):
    """Resizes, standardizes channels, and normalizes images from a list."""
    preprocessed_images = []
    for img in raw_images_list:
        # Handle grayscale images by converting to RGB
        if img.ndim == 2:
            img = gray2rgb(img)
        # Handle RGBA images by removing the alpha channel
        elif img.shape[-1] == 4:
            img = img[:, :, :3]
        
        # Resize and normalize pixel values to [0, 1]
        img_resized = resize(img, target_size, anti_aliasing=True)
        preprocessed_images.append(img_resized.astype(np.float32))
    return np.array(preprocessed_images)


print("Preprocessing test data...")
X_test = preprocess_images(x_test_raw, TARGET_SIZE)
y_test = np.array(y_test_raw_list, dtype=np.int32)

print("\n--- Test Data Preprocessing Complete ---")
print(f"X_test shape: {X_test.shape}, dtype: {X_test.dtype}")
print(f"y_test shape: {y_test.shape}, dtype: {y_test.dtype}")

**Step5: Data Splitting**

In [None]:
from sklearn.model_selection import train_test_split

# Stratified split: 85% train, 15% validation
X_train, X_val, y_train, y_val = train_test_split(
    x_train,
    y_train,
    test_size=0.15,
    stratify=y_train,
    random_state=42
)

print("Train:", X_train.shape, y_train.shape)
print("Val:", X_val.shape, y_val.shape)



In [None]:
print(len(X_train))
print(len(X_test))
print(len(X_val))

*One Hot Encoding*

In [None]:
# Convert labels to one-hot encoding for categorical_crossentropy
from keras.utils import to_categorical
y_train = to_categorical(y_train, num_classes=43)
y_val = to_categorical(y_val, num_classes=43)
y_test = to_categorical(y_test, num_classes=43)

print("\n--- Training Data Preprocessing and Splitting Complete ---")
print(f"X_train shape: {X_train.shape}, dtype: {X_train.dtype}")
print(f"y_train: {y_train.shape}, dtype: {y_train.dtype}")
print(f"X_val shape: {X_val.shape}, dtype: {X_val.dtype}")
print(f"y_val shape: {y_val.shape}, dtype: {y_val.dtype}")
print(f"y_test shape: {y_test.shape}, dtype: {y_test.dtype}")

**Save The Preprocessed Data**

In [None]:
import os

# Make sure processed folder exists
save_dir = "/mnt/g/which one is it/data/GTSRB/processed"
os.makedirs(save_dir, exist_ok=True)

# Save arrays
np.save(os.path.join(save_dir, "X_train.npy"), X_train)
np.save(os.path.join(save_dir, "y_train.npy"), y_train)
np.save(os.path.join(save_dir, "X_val.npy"),   X_val)
np.save(os.path.join(save_dir, "y_val.npy"),   y_val)
np.save(os.path.join(save_dir, "X_test.npy"),  X_test)
np.save(os.path.join(save_dir, "y_test.npy"),  y_test)

print("âœ… Preprocessed data saved to:", save_dir)


In [None]:
print("Train:", X_train.shape, y_train.shape)
print("Val:", X_val.shape, y_val.shape)
print("Test:", X_test.shape, y_test.shape)

In [None]:
X_check = np.load(os.path.join(save_dir, "X_train.npy"), allow_pickle=True)
print("X_check shape:", X_check.shape)