# Self Driving Traffic Sign Detection

Isaiah Jenkins

## Import the required libraries

In [2]:
import tensorflow as tf
from keras import layers, Model
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.applications import ResNet50

from sklearn.model_selection import train_test_split

import pandas as pd
import os

## 1. About the data

1. a. Description

Throughout this analysis we will explore Udacity's Self Driving dataset. This dataset consists of images for thousands of pedestrians, bikers, cars, and traffic lights. Although traffic light images are underrepresented in the dataset, the focus of the analysis will be based solely on traffic light images.

1. b. Data dictionary, 97,942 labels across 11 classes and 15,000 images, 1,720 null examples (images with no labels).

Class Balance across images (labels in each image)

* car - 64,399 - over represented
* pedestrian - 10,806
* trafficLight-Red - 6,870
* trafficLight-Green - 5,465 - under represented
* truck - 3,623 - under represented
* trafficLight - 2,568 - under represented
* biker - 1,864 - under represented
* trafficLight-RedLeft - 1,751 - under represented
* trafficLight-GreenLeft - 310 - under represented
* trafficLight-Yellow - 272 - under represented
* trafficLight-YellowLeft - 14 - under represented

## 2. Objectives

Throughout this analysis, we will explore and build various deep learning convolutional neural network (CNN) architectures to detect traffic light signs, aiming to optimize accuracy and efficiency. Our objective is to compare different model variations, such as CNNs with different depths, pre-trained models, and data augmentation techniques, to determine the most effective approach. Potential challenges include handling variations in lighting conditions, occlusions, and small object sizes, which may impact detection performance. Additionally, dataset imbalances and misclassifications due to similar-looking traffic signs could introduce biases, requiring careful preprocessing and model tuning.

## 3. Data Exploration, Cleaning and Feature Engineering

* Extract traffic sign & traffic light images from the dataset to help with computational efficiency.
* Resize images to standardize dataset making it computationally efficient making it easier to analyze data.
* Normalize pixel values (0-1 range) to help with generalization.
* Augment data (rotation, brightness shifts, contrast changes) to improve generalization.

### Load in data

In [3]:
from google.colab import drive
drive.mount('/content/gdrive', force_remount=True)

!unzip -o -q '/content/gdrive/MyDrive/data.zip' -d '/content/'

Mounted at /content/gdrive


In [4]:
DATASET_PATH = 'data/export'
CSV_PATH = os.path.join(DATASET_PATH, '_annotations.csv')

In [5]:
df = pd.read_csv(CSV_PATH) # load annotations

In [6]:
df = df[df['class'].isin(['trafficLight-Red', 'trafficLight_Yellow', 'trafficLight-Green'])]

In [7]:
file_names = df['filename'].values # image files names
labels = df['class'].values # classes

In [8]:
label_map = {"trafficLight-Red": 0, "trafficLight_Yellow": 1, "trafficLight-Green": 2, }  # Adjust based on dataset
labels = [label_map[label] for label in labels if label in label_map]  # Convert text labels to integers
labels = to_categorical(labels, num_classes=3)  # Convert to one-hot encoding

In [9]:
#Split Data into Train (80%) and Test (20%)
train_files, test_files, train_labels, test_labels = train_test_split(
    file_names, labels, test_size=0.2, random_state=42, stratify=labels, shuffle=True
)

In [10]:
IMG_SIZE = (224, 224)

In [11]:
# Function to Load & Preprocess Images
def load_and_preprocess_image(file_name, label):
    img_path = os.path.join(DATASET_PATH, file_name)  # Update image folder path
    img = load_img(img_path, target_size=IMG_SIZE)  # Load & Resize Image
    img = img_to_array(img) / 255.0  # Convert to NumPy array & Normalize (0-1)
    return img, label

In [12]:
# Create Generators for Train & Test Data
def data_generator(file_list, label_list):
    for f, l in zip(file_list, label_list):
        yield load_and_preprocess_image(f, l)  # Load one image at a time

In [13]:
BATCH_SIZE = 10

In [14]:
# Create a tf.data.Dataset that Loads Images on Demand
train_dataset = tf.data.Dataset.from_generator(
    lambda: data_generator(train_files, train_labels),
    output_signature=(
        tf.TensorSpec(shape=(224, 224, 3), dtype=tf.float32),  # Image Shape
        tf.TensorSpec(shape=(3,), dtype=tf.float32)  # One-hot Encoded Label
    )
).batch(BATCH_SIZE).shuffle(1000).prefetch(tf.data.experimental.AUTOTUNE)

test_dataset = tf.data.Dataset.from_generator(
    lambda: data_generator(test_files, test_labels),
    output_signature=(
        tf.TensorSpec(shape=(224, 224, 3), dtype=tf.float32),  # Image Shape
        tf.TensorSpec(shape=(3,), dtype=tf.float32)  # One-hot Encoded Label
    )
).batch(BATCH_SIZE).prefetch(tf.data.experimental.AUTOTUNE)

## 4. CNN Models

#### ResNet-50

In [16]:
# Layers
num_classes = 3

base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
x = layers.GlobalAveragePooling2D()(base_model.output)
x = layers.Dense(1024, activation='relu')(x)
outputs = layers.Dense(num_classes, activation='softmax')(x)


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m94765736/94765736[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 0us/step


In [17]:
# Compile
model = Model(inputs=base_model.input, outputs=outputs)
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()

In [18]:
# Fit on training set and compare against validation set after each epoch
model.fit(train_dataset, epochs=5, validation_data=test_dataset)

Epoch 1/5
   1961/Unknown [1m344s[0m 131ms/step - accuracy: 0.7057 - loss: 0.6208



[1m1961/1961[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m366s[0m 142ms/step - accuracy: 0.7057 - loss: 0.6207 - val_accuracy: 0.6174 - val_loss: 0.8490
Epoch 2/5
[1m1961/1961[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m270s[0m 126ms/step - accuracy: 0.8758 - loss: 0.3123 - val_accuracy: 0.8472 - val_loss: 0.3760
Epoch 3/5
[1m1961/1961[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m271s[0m 126ms/step - accuracy: 0.9260 - loss: 0.2085 - val_accuracy: 0.8786 - val_loss: 0.5685
Epoch 4/5
[1m1961/1961[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m270s[0m 126ms/step - accuracy: 0.9444 - loss: 0.1595 - val_accuracy: 0.8113 - val_loss: 0.5056
Epoch 5/5
[1m1961/1961[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m270s[0m 126ms/step - accuracy: 0.9556 - loss: 0.1302 - val_accuracy: 0.9211 - val_loss: 0.2447


<keras.src.callbacks.history.History at 0x79f8495f73d0>

In [19]:
# Evaluate - Loss, Accuracy
model.evaluate(test_dataset)

[1m491/491[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 32ms/step - accuracy: 0.9268 - loss: 0.2311




[0.244716078042984, 0.9210687279701233]

In [3]:
# Inception

In [None]:
# Custom CNN 1

### Summary of Models

## 5. Insights and Key Findings

## 6. Next Steps