In [2]:
pip install tensorflow numpy opencv-python pandas scikit-learn





[notice] A new release of pip is available: 24.2 -> 25.0
[notice] To update, run: C:\Users\Chelt\AppData\Local\Programs\Python\Python39\python.exe -m pip install --upgrade pip


In [5]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import numpy as np
import cv2
import os
import pandas as pd

def load_gtsrb_data(root_dir):
    """
    Load and preprocess GTSRB dataset.
    Args:
        root_dir: Root directory containing GTSRB dataset
        (should contain 'GTSRB/Final_Training/Images/')
    Returns:
        X_train, y_train, X_test, y_test
    """
    images = []
    labels = []
    
    # Path to the training images
    training_dir = os.path.join(root_dir, 'GTSRB', 'Final_Training', 'Images')
    
    # Loop through each class directory
    for class_id in range(43):  # GTSRB has 43 classes
        class_dir = os.path.join(training_dir, format(class_id, '05d'))  # Folders are padded with zeros
        
        # Read annotations file for this class
        annotations_file = os.path.join(class_dir, 'GT-{:05d}.csv'.format(class_id))
        annotations = pd.read_csv(annotations_file, sep=';')
        
        print(f"Loading class {class_id}")
        
        # Loop through each image in the class
        for filename in annotations['Filename']:
            image_path = os.path.join(class_dir, filename)
            image = cv2.imread(image_path)
            if image is not None:
                image = cv2.resize(image, (32, 32))
                images.append(image)
                labels.append(class_id)
            else:
                print(f"Failed to load image: {image_path}")
    
    # Convert to numpy arrays
    X = np.array(images)
    y = np.array(labels)
    
    # Normalize pixel values
    X = X / 255.0
    
    # Split into train and test sets
    from sklearn.model_selection import train_test_split
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=42, stratify=y
    )
    
    print(f"Training set shape: {X_train.shape}")
    print(f"Test set shape: {X_test.shape}")
    
    return X_train, y_train, X_test, y_test

# Example usage:
if __name__ == "__main__":
    # Assuming you're in the directory containing GTSRB_dataset
    root_dir = "./Dataset"
    
    # Load the data
    X_train, y_train, X_test, y_test = load_gtsrb_data(root_dir)
    
    # Print dataset information
    print("\nDataset Summary:")
    print(f"Number of training samples: {len(X_train)}")
    print(f"Number of test samples: {len(X_test)}")
    print(f"Image shape: {X_train[0].shape}")
    print(f"Number of classes: {len(np.unique(y_train))}")

Loading class 0
Loading class 1
Loading class 2
Loading class 3
Loading class 4
Loading class 5
Loading class 6
Loading class 7
Loading class 8
Loading class 9
Loading class 10
Loading class 11
Loading class 12
Loading class 13
Loading class 14
Loading class 15
Loading class 16
Loading class 17
Loading class 18
Loading class 19
Loading class 20
Loading class 21
Loading class 22
Loading class 23
Loading class 24
Loading class 25
Loading class 26
Loading class 27
Loading class 28
Loading class 29
Loading class 30
Loading class 31
Loading class 32
Loading class 33
Loading class 34
Loading class 35
Loading class 36
Loading class 37
Loading class 38
Loading class 39
Loading class 40
Loading class 41
Loading class 42
Training set shape: (31367, 32, 32, 3)
Test set shape: (7842, 32, 32, 3)

Dataset Summary:
Number of training samples: 31367
Number of test samples: 7842
Image shape: (32, 32, 3)
Number of classes: 43
