## Setup

In [16]:
# Import libraries
import cv2
from glob import glob
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import tensorflow as tf
# from tensorflow import keras
from tensorflow.data import Dataset # type: ignore
from tensorflow.keras import layers, Sequential # type: ignore
from tensorflow.keras.applications import EfficientNetB3 # type: ignore
from tensorflow.keras.layers import Dense, Dropout, GlobalAveragePooling2D # type: ignore
from tensorflow.keras.models import Model # type: ignore
# from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [3]:
print(tf.sysconfig.get_build_info())
print(f'tensorflow version {tf.__version__}')
print("Num GPUs Available:", len(tf.config.experimental.list_physical_devices('GPU')))
print(tf.config.list_physical_devices('GPU'))
print(tf.config.list_logical_devices('GPU'))

OrderedDict([('cpu_compiler', '/usr/lib/llvm-18/bin/clang'), ('cuda_compute_capabilities', ['sm_60', 'sm_70', 'sm_80', 'sm_89', 'compute_90']), ('cuda_version', '12.5.1'), ('cudnn_version', '9'), ('is_cuda_build', True), ('is_rocm_build', False), ('is_tensorrt_build', False)])
tensorflow version 2.18.0
Num GPUs Available: 1
[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]
[LogicalDevice(name='/device:GPU:0', device_type='GPU')]


I0000 00:00:1741387477.137852  328417 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 13499 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 4080 SUPER, pci bus id: 0000:01:00.0, compute capability: 8.9


In [None]:
# !echo "Downloading files..."
# !wget -q https://github.com/byui-cse/cse450-course/raw/master/data/roadsigns/training1.zip
# !wget -q https://github.com/byui-cse/cse450-course/raw/master/data/roadsigns/training2.zip
# !wget -q https://github.com/byui-cse/cse450-course/raw/master/data/roadsigns/holdout.zip
# !wget -q https://github.com/byui-cse/cse450-course/raw/master/data/roadsigns/mini_holdout.zip
# !wget -q https://github.com/byui-cse/cse450-course/raw/master/data/roadsigns/mini_holdout_answers.csv

## Dataset Preparation

### Upscaling

In [10]:
# Define paths
input_folder = "original"
output_folder = "upscaled"
os.makedirs(output_folder, exist_ok=True)  # Create output directory if not exists

# Set target resolution
target_size = (300, 300)  # Change this as needed (e.g., 300x300 for B3)

# Get all image files
image_files = []
image_files.extend(glob(os.path.join(input_folder, "**", "*.jpg"), recursive=True))

# Process images
for img_path in image_files:
    # Determine relative path inside original folder
    relative_path = os.path.relpath(img_path, input_folder)

    # Create corresponding output path
    output_path = os.path.join(output_folder, relative_path)

    # Ensure the target directory exists
    os.makedirs(os.path.dirname(output_path), exist_ok=True)

    # Load and resize image
    img = cv2.imread(img_path)
    if img is None:
        print(f"Skipping corrupted file: {img_path}")
        continue

    img_resized = cv2.resize(img, target_size, interpolation=cv2.INTER_LANCZOS4)

    # Save the resized image
    cv2.imwrite(output_path, img_resized)

print(f"Resized {len(image_files)} images and saved them to {output_folder}")

Resized 52040 images and saved them to upscaled


### General

In [11]:
# Define paths
dataset_path = "upscaled/training"
batch_size = 32
img_size = (100, 100)  # Change if using a different EfficientNet variant

# Get class names from directory structure
# class_names = sorted(os.listdir(dataset_path))
class_names = ['Speed_20', 'Speed_30', 'Speed_50', 'Speed_60', 'Speed_70',
               'Speed_80','Speed_Limit_Ends', 'Speed_100', 'Speed_120', 'Overtaking_Prohibited',
               'Overtakeing_Prohibited_Trucks', 'Crossroad_Ahead', 'Priority_Road_Ahead', 'Yield', 'STOP',
               'Entry_Forbidden', 'Trucks_Forbidden', 'No_Entry(one-way traffic)', 'Cars_Prohibited(!)', 'Left_Curve_Ahead',
               'Right_Curve_Ahead', 'Bends_Left_Then_Right', 'Poor_Surface_Ahead', 'Slippery_Surface_Ahead', 'Road_Narrows_On_Right',
               'Roadwork_Ahead', 'Traffic_Light_Ahead', 'Warning_Pedestrians', 'Warning_Children', 'Warning_Bikes',
               'Uncontrolled_Crossroad', 'Deer_Crossing', 'End_Previous_Limitation', 'Turning_Right_Compulsory', 'Turning_Left_Compulsory',
               'Ahead_Only', 'Straight_Or_Right_Mandatory', 'Straight_Or_Left_Mandatory', 'Passing_Right_Compulsory', 'Passing_Left_Compulsory',
               'Roundabout', 'End_Overtaking_Prohibition', 'End_Overtaking_Prohibition_Trucks']
num_classes = len(class_names)

# Get all image file paths
image_paths = tf.io.gfile.glob(os.path.join(dataset_path, "*", "*.jpg"))  # Adjust extension if needed


In [12]:
def load_and_preprocess_image(image_path):
    # Read image file
    image = tf.io.read_file(image_path)
    image = tf.image.decode_jpeg(image, channels=3)  # Change to decode_png if needed
    image = tf.image.resize(image, img_size)  # Resize to match EfficientNet input
    image = image / 255.0  # Normalize pixel values
    return image

def get_label(image_path):
    parts = tf.strings.split(image_path, os.sep)  # Split path by "/"
    label_str = parts[-2]  # Folder name is the class label
    label = tf.argmax(label_str == class_names)  # Convert to integer label
    return label

In [13]:
# Convert file paths into a tf.data.Dataset
dataset = tf.data.Dataset.from_tensor_slices(image_paths)
# Apply preprocessing and labeling functions
dataset = dataset.map(lambda x: (load_and_preprocess_image(x), get_label(x)), num_parallel_calls=tf.data.AUTOTUNE)

# Shuffle and split dataset
dataset = dataset.shuffle(buffer_size=len(image_paths), seed=42)

train_size = int(0.8 * len(image_paths))  # 80% for training
train_ds = dataset.take(train_size).batch(batch_size).prefetch(tf.data.AUTOTUNE)
val_ds = dataset.skip(train_size).batch(batch_size).prefetch(tf.data.AUTOTUNE)

## Model Generation

In [17]:
data_augmentation = Sequential([
    layers.Rescaling(1./255),  # Normalize to [0,1]
    layers.RandomFlip("horizontal"),  # Random horizontal flips
    layers.RandomRotation(0.1),  # Rotate by up to 10%
    layers.RandomZoom(0.2),  # Random zoom
    layers.RandomContrast(0.2),  # Adjust contrast slightly
])

In [None]:
# Load EfficientNet with pre-trained ImageNet weights
base_model = EfficientNetB3(weights='imagenet', include_top=False, input_shape=(300, 300, 3))
base_model.trainable = False  # Freeze base model initially

# Add custom classification layers
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dropout(0.3)(x)  # Prevent overfitting
x = Dense(512, activation='relu')(x)
x = Dropout(0.3)(x)
output_layer = Dense(num_classes, activation='softmax')(x)  # Multi-class classification

# Define model
model = Model(inputs=base_model.input, outputs=output_layer)

In [None]:
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0005),
              loss='sparse_categorical_crossentropy',  # Integer labels
              metrics=['accuracy'])

history = model.fit(
    train_ds,
    epochs=10,  # Start small; increase if needed
    validation_data=val_ds
)

## Post-Gen Exploration

In [None]:
# View 9 images and their class labels
plt.figure(figsize=(10, 10))
images, labels = next(train_generator)  # Assuming train_generator is a generator
batch_size = images.shape[0]

for i in range(min(9, batch_size)):
    ax = plt.subplot(3, 3, i + 1)
    plt.imshow((images[i] * 255).astype("uint8"))
    plt.title(int(labels[i]))
    plt.axis("off")

plt.show()

In [None]:
# Build a model...

## Testing the model
Once you have built and trained your model, the next step is to run the mini holdout images through it and see how well your model does at making predictions for images it has never seen before.

Since loading these images and formatting them for the model can be tricky, you may find the following code useful. This code only uses your model to predict the class label for a given image. You'll still need to compare those predictions to the "ground truth" class labels in `mini_holdout_answers.csv` to evaluate how well the model does.

Previously, you were given a file that would check your results. This time you're given the answers to the first mini holdout dataset. You'll need to compare those predictions against the "ground truth" class labels in `mini_holdout_answers.csv` to evaluate how well the model does.

Make sure to use the insights gained from the mini hold out dataset in your executive summary.


```
from tensorflow.keras.preprocessing import image_dataset_from_directory
test_dir = '/content/'

test_datagen = ImageDataGenerator(rescale=1./255)
test_generator = test_datagen.flow_from_directory(
        test_dir,
        classes=['mini_holdout'],
        target_size=image_size,
        class_mode='sparse',
        shuffle=False)
probabilities = model.predict(test_generator)
predictions = [np.argmax(probas) for probas in probabilities]
```



##Mini Hold out Dataset


Once you feel confident, you will need to predict for the full holdout dataset using the following code, and submit your csv file:

```
from tensorflow.keras.preprocessing import image_dataset_from_directory
test_dir = '/content/'

test_datagen = ImageDataGenerator(rescale=1./255)
test_generator = test_datagen.flow_from_directory(
        test_dir,
        classes=['holdout'],
        target_size=image_size,
        class_mode='sparse',
        shuffle=False)
probabilities = model.predict(test_generator)
predictions = [np.argmax(probas) for probas in probabilities]
```