### 1. Import Libraries and Define Constants


In [1]:
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical

# --- DEFINE CONSTANTS ---
# Set the path to your training data directory
DATA_DIR = r'C:\Users\Sreeneel Chavidi\OneDrive\Desktop\.vs code\.vs code\DNA-Proj\dataset-trafficsigns\Indian-Traffic Sign-Dataset\Images' 

# Define the desired image dimensions
IMG_HEIGHT = 48
IMG_WIDTH = 48

# Get the number of classes (subdirectories)
NUM_CLASSES = len(os.listdir(DATA_DIR))

print(f"Dataset Path: {DATA_DIR}")
print(f"Image Dimensions: {IMG_HEIGHT}x{IMG_WIDTH}")
print(f"Number of Classes: {NUM_CLASSES}")

KeyboardInterrupt: 

### 2. Load and Preprocess Image Data

In [None]:
print("Loading and preprocessing data... Please wait.")

images = []
labels = []

# Loop over each class folder (from 0 to NUM_CLASSES-1)
for class_id in range(NUM_CLASSES):
    class_path = os.path.join(DATA_DIR, str(class_id))
    
    # Check if the path is a directory
    if not os.path.isdir(class_path): 
        continue
    
    # Loop over each image in the class folder
    for img_name in os.listdir(class_path):
        try:
            img_path = os.path.join(class_path, img_name)
            
            # Read the image in color
            image = cv2.imread(img_path)
            
            # Resize the image to the desired dimensions
            image_resized = cv2.resize(image, (IMG_WIDTH, IMG_HEIGHT))
            
            # Append the processed image and its label
            images.append(image_resized)
            labels.append(class_id)
        except Exception as e:
            print(f"Error loading image {img_path}: {e}")

print("Data loading complete.")

# --- DATA CONVERSION AND NORMALIZATION ---

# Convert lists to NumPy arrays for efficient processing
images = np.array(images)
labels = np.array(labels)

# Normalize pixel values from [0, 255] to [0, 1]
images = images / 255.0

print(f"Shape of images array: {images.shape}")
print(f"Shape of labels array: {labels.shape}")

### 3. Exploratory Data Analysis: Visualize Class Distribution

In [None]:
# --- VISUALIZE CLASS DISTRIBUTION ---

plt.figure(figsize=(14, 6))
plt.hist(labels, bins=NUM_CLASSES, rwidth=0.8)
plt.title('Distribution of Traffic Sign Classes')
plt.xlabel('Class ID')
plt.ylabel('Number of Images')
plt.show()

### 4. Split Data and One-Hot Encode Labels

In [None]:
# --- SPLIT THE DATA ---
# Using 80% for training and 20% for validation
X_train, X_val, y_train, y_val = train_test_split(
    images, 
    labels, 
    test_size=0.2, 
    random_state=42, # for reproducibility
    stratify=labels # ensures the split has a similar class distribution
)

# --- ONE-HOT ENCODE THE LABELS ---
# This converts class vectors (integers) to binary class matrices.
# Example: label '3' with 5 classes becomes [0, 0, 0, 1, 0]
y_train = to_categorical(y_train, NUM_CLASSES)
y_val = to_categorical(y_val, NUM_CLASSES)

print("\n--- Data Splitting and Encoding Complete ---")
print(f"Training data shape: {X_train.shape}")
print(f"Training labels shape: {y_train.shape}")
print(f"Validation data shape: {X_val.shape}")
print(f"Validation labels shape: {y_val.shape}")

### 5. Build and Compile the CNN Model Architecture

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dropout, Flatten, Dense

# --- DEFINE THE CNN MODEL ARCHITECTURE ---

model = Sequential()

# First Convolutional Block
model.add(Conv2D(filters=32, kernel_size=(3, 3), activation='relu', input_shape=(IMG_HEIGHT, IMG_WIDTH, 3)))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(rate=0.25))

# Second Convolutional Block
model.add(Conv2D(filters=64, kernel_size=(3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(rate=0.25))

# Flatten the feature maps to a 1D vector
model.add(Flatten())

# Fully Connected Dense Layer
model.add(Dense(units=256, activation='relu'))
model.add(Dropout(rate=0.5))

# Output Layer - Must have the same number of units as NUM_CLASSES
# Your output shows 58 classes, so we use that number here.
model.add(Dense(units=58, activation='softmax'))


# --- COMPILE THE MODEL ---
model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

# --- DISPLAY THE MODEL SUMMARY ---
model.summary()

### 6. Train the Model

In [None]:
# --- TRAIN THE MODEL ---

print("Starting model training... This will take some time.")

# We'll start with 15 epochs. An epoch is one full pass through the entire training data.
epochs = 15
batch_size = 32 # The model will look at 32 images at a time

history = model.fit(
    X_train,
    y_train,
    batch_size=batch_size,
    epochs=epochs,
    validation_data=(X_val, y_val) # We use the validation data to check how well the model is generalizing
)

print("\nModel training complete!")

### 7. Visualize Training History (Accuracy and Loss)

In [None]:
import matplotlib.pyplot as plt

# --- PLOT THE TRAINING & VALIDATION ACCURACY ---
plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title('Model Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()

# --- PLOT THE TRAINING & VALIDATION LOSS ---
plt.subplot(1, 2, 2)
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Model Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

plt.tight_layout()
plt.show()

### 8. Save the Final Trained Model

In [None]:
# Save the entire model to a single HDF5 file.
model.save('my_traffic_sign_model.h5')

print("Model saved successfully as my_traffic_sign_model.h5")