## Setup

In [36]:
# Import libraries
import cv2
from glob import glob
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import pickle
import tensorflow as tf
# from tensorflow import keras
from tensorflow.data import Dataset # type: ignore
from tensorflow.keras import layers, mixed_precision, Sequential # type: ignore
from tensorflow.keras.applications import EfficientNetB3 # type: ignore
from tensorflow.keras.layers import Dense, Dropout, GlobalAveragePooling2D # type: ignore
from tensorflow.keras.models import Model # type: ignore
# from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.utils.class_weight import compute_class_weight

In [3]:
gpus = tf.config.list_physical_devices('GPU')

if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
    except RuntimeError as e:
        print(e)

mixed_precision.set_global_policy("mixed_float16")
os.environ["TF_XLA_FLAGS"] = "--tf_xla_auto_jit=2 --tf_xla_cpu_global_jit"

tf.config.set_logical_device_configuration(
    gpus[0],
    [tf.config.LogicalDeviceConfiguration(memory_limit=14000)]  # Adjust memory limit (MB) if needed
)


In [None]:
# print(tf.sysconfig.get_build_info())
# print(f'tensorflow version {tf.__version__}')
# print("Num GPUs Available:", len(tf.config.experimental.list_physical_devices('GPU')))
# print(tf.config.list_physical_devices('GPU'))
# print(tf.config.list_logical_devices('GPU'))

In [None]:
# !echo "Downloading files..."
# !wget -q https://github.com/byui-cse/cse450-course/raw/master/data/roadsigns/training1.zip
# !wget -q https://github.com/byui-cse/cse450-course/raw/master/data/roadsigns/training2.zip
# !wget -q https://github.com/byui-cse/cse450-course/raw/master/data/roadsigns/holdout.zip
# !wget -q https://github.com/byui-cse/cse450-course/raw/master/data/roadsigns/mini_holdout.zip
# !wget -q https://github.com/byui-cse/cse450-course/raw/master/data/roadsigns/mini_holdout_answers.csv

## Dataset Preparation

### Upscaling

In [None]:
### This has been done already, no need to do again.
# # Define paths
# input_folder = "original"
# output_folder = "upscaled"
# os.makedirs(output_folder, exist_ok=True)  # Create output directory if not exists

# # Set target resolution
# target_size = (300, 300)  # Change this as needed (e.g., 300x300 for B3)

# # Get all image files
# image_files = []
# image_files.extend(glob(os.path.join(input_folder, "**", "*.jpg"), recursive=True))

# # Process images
# for img_path in image_files:
#     # Determine relative path inside original folder
#     relative_path = os.path.relpath(img_path, input_folder)

#     # Create corresponding output path
#     output_path = os.path.join(output_folder, relative_path)

#     # Ensure the target directory exists
#     os.makedirs(os.path.dirname(output_path), exist_ok=True)

#     # Load and resize image
#     img = cv2.imread(img_path)
#     if img is None:
#         print(f"Skipping corrupted file: {img_path}")
#         continue

#     img_resized = cv2.resize(img, target_size, interpolation=cv2.INTER_LANCZOS4)

#     # Save the resized image
#     cv2.imwrite(output_path, img_resized)

# print(f"Resized {len(image_files)} images and saved them to {output_folder}")

Resized 52040 images and saved them to upscaled


### General

In [None]:
# Define paths
dataset_path = "upscaled/training"
batch_size = 24
img_size = (300, 300)  # Change if using a different EfficientNet variant

# Get class names from directory structure
class_names = sorted(os.listdir(dataset_path))
# class_names = ['Speed_20', 'Speed_30', 'Speed_50', 'Speed_60', 'Speed_70',
#                'Speed_80','Speed_Limit_Ends', 'Speed_100', 'Speed_120', 'Overtaking_Prohibited',
#                'Overtakeing_Prohibited_Trucks', 'Crossroad_Ahead', 'Priority_Road_Ahead', 'Yield', 'STOP',
#                'Entry_Forbidden', 'Trucks_Forbidden', 'No_Entry(one-way traffic)', 'Cars_Prohibited(!)', 'Left_Curve_Ahead',
#                'Right_Curve_Ahead', 'Bends_Left_Then_Right', 'Poor_Surface_Ahead', 'Slippery_Surface_Ahead', 'Road_Narrows_On_Right',
#                'Roadwork_Ahead', 'Traffic_Light_Ahead', 'Warning_Pedestrians', 'Warning_Children', 'Warning_Bikes',
#                'Uncontrolled_Crossroad', 'Deer_Crossing', 'End_Previous_Limitation', 'Turning_Right_Compulsory', 'Turning_Left_Compulsory',
#                'Ahead_Only', 'Straight_Or_Right_Mandatory', 'Straight_Or_Left_Mandatory', 'Passing_Right_Compulsory', 'Passing_Left_Compulsory',
#                'Roundabout', 'End_Overtaking_Prohibition', 'End_Overtaking_Prohibition_Trucks']
num_classes = len(class_names)

# Get all image file paths
image_paths = tf.io.gfile.glob(os.path.join(dataset_path, "*", "*.jpg"))  # Adjust extension if needed


In [6]:
data_augmentation = Sequential([
    layers.Rescaling(1./255),  # Normalize to [0,1]
    layers.RandomFlip("horizontal"),  # Random horizontal flips
    layers.RandomRotation(0.2),  # Rotate by up to 10%
    layers.RandomZoom(0.3),  # Random zoom
    layers.RandomContrast(0.3),  # Adjust contrast slightly
])

I0000 00:00:1741477937.199273     868 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 14000 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 4080 SUPER, pci bus id: 0000:01:00.0, compute capability: 8.9


In [7]:
def load_and_preprocess_image(image_path):
    try:
        image = tf.io.read_file(image_path)
        image = tf.image.decode_jpeg(image, channels=3)
        image = tf.image.resize(image, img_size)
        image = tf.cast(image, tf.float32)
        # Normalize to [-1, 1]
        image = image / 255.0
        return image
    except Exception as e:
        tf.print("Error processing", image_path, ":", e)
        # Return a tensor of zeros (or use tf.data.experimental.ignore_errors() downstream)
        return tf.zeros([*img_size, 3], dtype=tf.float32)

def get_label(image_path):
    parts = tf.strings.split(image_path, os.sep)
    label_str = parts[-2]  # Extract folder name (e.g., "0")
    
    try:
        label = tf.strings.to_number(label_str, out_type=tf.int32)  # Convert safely to integer
    except:
        print(f"Error converting label: {label_str}")
        label = tf.constant(-1, dtype=tf.int32)  # Assign -1 if an error occurs
    
    return label

# def augment(image_path, label):
#     image = load_and_preprocess_image(image_path)  # Your function that loads & resizes images
#     image = data_augmentation(image)  # Apply augmentation
#     return image, label


In [13]:
# # print(get_label(image_paths[0]))  # Test the label extraction
# for image in image_paths:
#     img = cv2.imread(image)
#     print("corrupted" if img is None else "", sep='')  # If True, the image is corrupted

# print(load_and_preprocess_image(image_paths[0]))  # Test the image loading and preprocessing

In [8]:
auto = tf.data.AUTOTUNE
# Convert file paths into a tf.data.Dataset
dataset = tf.data.Dataset.from_tensor_slices(image_paths)
# Apply preprocessing and labeling functions
dataset = dataset.map(lambda x: (load_and_preprocess_image(x), get_label(x)), num_parallel_calls=auto)

# Shuffle and split dataset
dataset = dataset.shuffle(buffer_size=1024, seed=42)

train_size = int(0.8 * len(image_paths))  # 80% for training
train_ds = dataset.take(train_size).batch(batch_size).prefetch(auto)
val_ds = dataset.skip(train_size).batch(batch_size).prefetch(auto)

In [None]:
# # Compute class weights
# class_labels = np.array([get_label(img_path).numpy() for img_path in image_paths])
# class_weights = compute_class_weight(class_weight="balanced", classes=np.unique(class_labels), y=class_labels)
# class_weight_dict = dict(enumerate(class_weights))
# pickle.dump(class_weight_dict, open("class_weight_dict.pkl", "wb"))

In [38]:
class_weight_dict = pickle.load(open("class_weight_dict.pkl", "rb"))

## Model Generation

In [56]:
inputs = tf.keras.Input(shape=(*img_size, 3))

# Apply in-model data augmentation
x = data_augmentation(inputs)

# Load EfficientNet and attach it
base_model = EfficientNetB3(weights='imagenet', include_top=False, input_tensor=x)
# base_model.trainable = False

x = GlobalAveragePooling2D()(base_model.output)
x = Dropout(0.4)(x)
x = Dense(512, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.01))(x)
x = Dropout(0.4)(x)
outputs = Dense(num_classes, activation='softmax')(x)

# Define model explicitly
model = tf.keras.Model(inputs, outputs)

In [57]:
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0002),
              loss='sparse_categorical_crossentropy',  # Integer labels
              metrics=['accuracy'])

In [58]:
tf.keras.backend.clear_session()  # Clear session to avoid clutter from old models
warmup_data = tf.convert_to_tensor(next(iter(train_ds))[0])  # Get one batch
model.predict(warmup_data)  # Run one inference step


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3s/step


array([[0.02368503, 0.02124928, 0.02811983, ..., 0.02920237, 0.02133273,
        0.02316666],
       [0.02368473, 0.0212505 , 0.02812148, ..., 0.02920197, 0.02133079,
        0.02316396],
       [0.0236861 , 0.0212493 , 0.02812079, ..., 0.02920163, 0.02133371,
        0.02316653],
       ...,
       [0.02368213, 0.02125245, 0.02811669, ..., 0.02920375, 0.02133189,
        0.02316597],
       [0.02368519, 0.02124977, 0.02812103, ..., 0.0292006 , 0.02133388,
        0.02316515],
       [0.02368525, 0.02124997, 0.02811955, ..., 0.02920253, 0.02132917,
        0.02316592]], dtype=float32)

In [59]:
history = model.fit(
    train_ds,
    epochs=5,  # Start small; increase if needed
    validation_data=val_ds,
    class_weight=class_weight_dict
)

Epoch 1/5


E0000 00:00:1741493278.892461     868 meta_optimizer.cc:966] layout failed: INVALID_ARGUMENT: Size of values 0 does not match size of permutation 4 @ fanin shape inStatefulPartitionedCall/functional_1/block1b_drop_1/stateless_dropout/SelectV2-2-TransposeNHWCToNCHW-LayoutOptimizer


[1m1307/1307[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m499s[0m 355ms/step - accuracy: 0.6991 - loss: 7.2983 - val_accuracy: 0.0022 - val_loss: 6.1131
Epoch 2/5
[1m1307/1307[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m467s[0m 357ms/step - accuracy: 0.8355 - loss: 1.7392 - val_accuracy: 0.0020 - val_loss: 6.4360
Epoch 3/5
[1m1307/1307[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m454s[0m 348ms/step - accuracy: 0.8967 - loss: 0.9176 - val_accuracy: 0.0384 - val_loss: 7.9892
Epoch 4/5
[1m1307/1307[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m455s[0m 348ms/step - accuracy: 0.9202 - loss: 0.6680 - val_accuracy: 7.6511e-04 - val_loss: 25.9423
Epoch 5/5
[1m1307/1307[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m453s[0m 347ms/step - accuracy: 0.9292 - loss: 0.5874 - val_accuracy: 0.0321 - val_loss: 11.6555


In [60]:
model_filepath = "models/en_b3_take3.keras"

In [61]:
model.save(model_filepath)

In [None]:
model = tf.keras.models.load_model(model_filepath)

## Post-Gen Exploration

In [4]:
# View 9 images and their class labels
plt.figure(figsize=(10, 10))
images, labels = next(train_generator)  # Assuming train_generator is a generator
batch_size = images.shape[0]

for i in range(min(9, batch_size)):
    ax = plt.subplot(3, 3, i + 1)
    plt.imshow((images[i] * 255).astype("uint8"))
    plt.title(int(labels[i]))
    plt.axis("off")

plt.show()

NameError: name 'train_generator' is not defined

<Figure size 1000x1000 with 0 Axes>

## Mini-Holdout

In [62]:
# Path to new images
mini_path = "upscaled/mini_holdout"  # Change this to your actual folder
mini_files = sorted([os.path.join(mini_path, f) for f in os.listdir(mini_path) if f.endswith((".jpg"))])

# Load and preprocess all new images
mini = np.array([load_and_preprocess_image(img_path) for img_path in mini_files])


In [63]:
print(model.input_shape)
print(mini.shape)

(None, 300, 300, 3)
(201, 300, 300, 3)


In [64]:
# Predict class probabilities
predictions = model.predict(mini)

# Convert probabilities to class labels
predicted_classes = np.argmax(predictions, axis=1)

[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 73ms/step


In [65]:
for i in range(0, 200, 10):
    print(mini_files[i], predicted_classes[i])

upscaled/mini_holdout/00000.jpg 17
upscaled/mini_holdout/00010.jpg 17
upscaled/mini_holdout/00020.jpg 17
upscaled/mini_holdout/00030.jpg 17
upscaled/mini_holdout/00040.jpg 17
upscaled/mini_holdout/00050.jpg 17
upscaled/mini_holdout/00060.jpg 17
upscaled/mini_holdout/00070.jpg 17
upscaled/mini_holdout/00080.jpg 17
upscaled/mini_holdout/00090.jpg 17
upscaled/mini_holdout/00100.jpg 17
upscaled/mini_holdout/00110.jpg 17
upscaled/mini_holdout/00120.jpg 17
upscaled/mini_holdout/00130.jpg 17
upscaled/mini_holdout/00140.jpg 17
upscaled/mini_holdout/00150.jpg 17
upscaled/mini_holdout/00160.jpg 17
upscaled/mini_holdout/00170.jpg 17
upscaled/mini_holdout/00180.jpg 17
upscaled/mini_holdout/00190.jpg 17


In [66]:
mini_answers = pd.read_csv("mini_holdout_answers.csv")
# Create a mapping from filename → class
filename_to_label = dict(zip(mini_answers["Filename"], mini_answers["ClassId"]))

# Extract true labels in the same order as mini_files
true_labels = [filename_to_label[os.path.basename(f)] for f in mini_files]

# Convert to NumPy array (for compatibility with sklearn)
true_labels = np.array(true_labels)

In [67]:
# Compute metrics
accuracy = accuracy_score(true_labels, predicted_classes)
precision = precision_score(true_labels, predicted_classes, average='weighted')
recall = recall_score(true_labels, predicted_classes, average='weighted')
f1 = f1_score(true_labels, predicted_classes, average='weighted')

# Print results
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")

Accuracy: 0.0348
Precision: 0.0012
Recall: 0.0348
F1 Score: 0.0023


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


## Testing the model
Once you have built and trained your model, the next step is to run the mini holdout images through it and see how well your model does at making predictions for images it has never seen before.

Since loading these images and formatting them for the model can be tricky, you may find the following code useful. This code only uses your model to predict the class label for a given image. You'll still need to compare those predictions to the "ground truth" class labels in `mini_holdout_answers.csv` to evaluate how well the model does.

Previously, you were given a file that would check your results. This time you're given the answers to the first mini holdout dataset. You'll need to compare those predictions against the "ground truth" class labels in `mini_holdout_answers.csv` to evaluate how well the model does.

Make sure to use the insights gained from the mini hold out dataset in your executive summary.


```
from tensorflow.keras.preprocessing import image_dataset_from_directory
test_dir = '/content/'

test_datagen = ImageDataGenerator(rescale=1./255)
test_generator = test_datagen.flow_from_directory(
        test_dir,
        classes=['mini_holdout'],
        target_size=image_size,
        class_mode='sparse',
        shuffle=False)
probabilities = model.predict(test_generator)
predictions = [np.argmax(probas) for probas in probabilities]
```



##Mini Hold out Dataset


Once you feel confident, you will need to predict for the full holdout dataset using the following code, and submit your csv file:

```
from tensorflow.keras.preprocessing import image_dataset_from_directory
test_dir = '/content/'

test_datagen = ImageDataGenerator(rescale=1./255)
test_generator = test_datagen.flow_from_directory(
        test_dir,
        classes=['holdout'],
        target_size=image_size,
        class_mode='sparse',
        shuffle=False)
probabilities = model.predict(test_generator)
predictions = [np.argmax(probas) for probas in probabilities]
```