The following is the tenth and final model that was created and performs the best on the holdout dataset.

In [None]:
# Note: After you run this cell, the training and test data will be available in
# the file browser. (Click the folder icon on the left to view it)
#
# If you don't see the data after the cell completes, click the refresh button
# in the file browser (folder icon with circular arrow)

# First, let's download and unzip the data
!echo "Downloading files..."
!wget -q https://github.com/byui-cse/cse450-course/raw/master/data/roadsigns/training1.zip
!wget -q https://github.com/byui-cse/cse450-course/raw/master/data/roadsigns/training2.zip
!wget -q https://github.com/byui-cse/cse450-course/raw/master/data/roadsigns/test.zip
!wget -q https://github.com/byui-cse/cse450-course/raw/master/data/roadsigns/test_classes.csv

!echo "Unzipping files..."
!unzip -q /content/training1.zip
!unzip -q /content/training2.zip
!unzip -q /content/test.zip

# Combine the two traning directories
!echo "Merging training data..."
!mkdir /content/training
!mv /content/training1/* /content/training
!mv /content/training2/* /content/training

# Cleanup
!echo "Cleaning up..."
!rmdir /content/training1
!rmdir /content/training2
!rm training1.zip
!rm training2.zip
!rm test.zip

!echo "Data ready."

Downloading files...
Unzipping files...
Merging training data...
Cleaning up...
Data ready.


In [None]:
# Import libraries
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from keras.preprocessing.image import ImageDataGenerator
from keras import models, layers
import matplotlib.pyplot as plt
import numpy as np
import os
from sklearn.utils import class_weight
!pip install -q -U tensorflow_addons
import tensorflow_addons as tfa

[?25l[K     |▎                               | 10 kB 28.4 MB/s eta 0:00:01[K     |▋                               | 20 kB 38.7 MB/s eta 0:00:01[K     |▉                               | 30 kB 24.4 MB/s eta 0:00:01[K     |█▏                              | 40 kB 18.7 MB/s eta 0:00:01[K     |█▌                              | 51 kB 13.5 MB/s eta 0:00:01[K     |█▊                              | 61 kB 14.9 MB/s eta 0:00:01[K     |██                              | 71 kB 13.2 MB/s eta 0:00:01[K     |██▍                             | 81 kB 14.8 MB/s eta 0:00:01[K     |██▋                             | 92 kB 11.8 MB/s eta 0:00:01[K     |███                             | 102 kB 12.8 MB/s eta 0:00:01[K     |███▎                            | 112 kB 12.8 MB/s eta 0:00:01[K     |███▌                            | 122 kB 12.8 MB/s eta 0:00:01[K     |███▉                            | 133 kB 12.8 MB/s eta 0:00:01[K     |████▏                           | 143 kB 12.8 MB/s eta 0:

In [None]:
# from imgaug import augmenters as iaa

# # dummy set to oversample and fix unbalanced data
# current_dir = 0
# dir = ''

# datagen = ImageDataGenerator(rotation_range=10, 
#                              zoom_range=0.1, 
#                              brightness_range= 1)

# if len(next(os.walk(dir))[2]) < 800:

#   dummy = 

#   current_dir += 1

#   if current_dir < 10:
#     dir = '/content/training/' + '0000' + str(current_dir) + '/'
#   else: 
#     dir = '/content/training/' + '000' + str(current_dir) + '/'

In [None]:
# We're using keras' ImageDataGenerator class to load our image data.
# See (https://keras.io/api/preprocessing/image/#imagedatagenerator-class) for details
#
# A couple of things to note:
# 1. We're specifying a number for the seed, so we'll always get the same shuffle and split of our images.
# 2. Class names are inferred automatically from the image subdirectory names.
# 3. We're splitting the training data into 80% training, 20% validation. 


training_dir = '/content/training/'
image_size = (100, 100)

# Split up the training data images into training and validations sets
# We'll use and ImageDataGenerator to do the splits
# ImageDataGenerator can also be used to do preprocessing and agumentation on the files as can be seen with rescale

train_datagen = ImageDataGenerator(
        rescale=1./255,
        validation_split=.2
        )
validation_datagen = ImageDataGenerator(
        rescale=1./255,
        validation_split=.2
        )

train_generator = train_datagen.flow_from_directory(
        training_dir,
        target_size = image_size,
        subset="training",
        batch_size=32,
        class_mode='sparse',
        seed=43,shuffle=True)
validation_generator = validation_datagen.flow_from_directory(
        training_dir,
        target_size=image_size,
        batch_size=32,
        class_mode='sparse',
        subset="validation",
        seed=43)



Found 31368 images belonging to 43 classes.
Found 7841 images belonging to 43 classes.


In [None]:
train_generator.classes

array([ 0,  0,  0, ..., 42, 42, 42], dtype=int32)

In [None]:
# View 9 images and their class labels
plt.figure(figsize=(10, 10))
for images, labels in train_generator:
    for i in range(9):
        first_image = images.tolist()[i]
        ax = plt.subplot(3, 3, i + 1)
        f = np.array(first_image)*255
        plt.imshow(f.astype("uint8"))
        plt.title(int(labels[i]))
        plt.axis("off")
    break

In [None]:
# Build a model...
model = models.Sequential()
model.add(layers.Conv2D(8, (5, 5), input_shape=(100, 100, 3)))
model.add(layers.BatchNormalization())
model.add(layers.Activation('relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Dropout(0.2))

model.add(layers.Conv2D(16, (3, 3)))
model.add(layers.Activation('relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Dropout(0.2))

model.add(layers.Conv2D(16, (3, 3)))
model.add(layers.Activation('relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Dropout(0.2))

model.add(layers.Conv2D(32, (3, 3), activation='relu'))
model.add(layers.BatchNormalization())
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Dropout(0.2))

model.add(layers.Conv2D(32, (3, 3), activation='relu'))
model.add(layers.BatchNormalization())
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Dropout(0.2))

model.add(layers.Flatten())
model.add(layers.Dense(512, activation='relu'))
model.add(layers.Dropout(0.2))
model.add(layers.Dense(128, activation='relu'))
model.add(layers.Dense(43, activation='softmax'))


BATCH_SIZE = 32
EPOCHS = 100
INIT_LR = 0.0005
MAX_LR = 0.01

opt = tf.keras.optimizers.Adam(
    learning_rate=0.001,
    beta_1=0.90,
    beta_2=0.99,
    epsilon=1e-07,
    amsgrad=False,
    name='Adam',
)

clr = tfa.optimizers.CyclicalLearningRate(initial_learning_rate=INIT_LR,
    maximal_learning_rate=MAX_LR,
    scale_fn=lambda x: 1/(2.**(x-1)),
    step_size=2 * BATCH_SIZE
)

optimizer = tf.keras.optimizers.Adam(clr)

model.compile(optimizer=optimizer,
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

# model.summary()

class_weights = class_weight.compute_class_weight(
    class_weight = 'balanced',
    classes = np.unique(train_generator.classes),
    y = train_generator.classes
)

class_weights = dict(enumerate(class_weights))

# stop when val_accuracy is above threshhold
class MyThresholdCallback(tf.keras.callbacks.Callback):
    def __init__(self, threshold):
        super(MyThresholdCallback, self).__init__()
        self.threshold = threshold

    def on_epoch_end(self, epoch, logs=None): 
        val_acc = logs["val_accuracy"]
        loss = logs["loss"]
        if (val_acc >= self.threshold and epoch >= 10):
            self.model.stop_training = True
callback = MyThresholdCallback(threshold=0.97)

history = model.fit(
    train_generator,
    validation_data=validation_generator, 
    epochs=EPOCHS,
    batch_size=BATCH_SIZE,
    class_weight=class_weights,
    callbacks=[callback])

In [None]:
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']

epochs = range(1, len(acc) + 1)

plt.plot(epochs, acc, 'bo', label='Training acc')
plt.plot(epochs, val_acc, 'b', label='Validation acc')
plt.title('Training and validation accuracy')
plt.legend()

plt.figure()

plt.plot(epochs, loss, 'bo', label='Training loss')
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.title('Training and validation loss')
plt.legend()

plt.show()

In [None]:
# save model

model.save('model10.h5')

from google.colab import files
files.download('model10.h5') 

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

## Testing the model
Once you have built and trained your model, the next step is to run the test images through it and see how well your model does at making predictions for images it has never seen before. 

Since loading these images and formatting them for the model can be tricky, you may find the following code useful. This code only uses your model to predict the class label for a given image. You'll still need to compare those predictions to the "ground truth" class labels in `test_classes.csv` to evaluate how well the model does.



```
from tensorflow.keras.preprocessing import image_dataset_from_directory
test_dir = '/content/'

test_datagen = ImageDataGenerator(rescale=1./255)
test_generator = test_datagen.flow_from_directory(
        test_dir,
        classes=['test'],
        target_size=image_size,
        class_mode='sparse',
        shuffle=False)
probabilities = model.predict(test_generator)
predictions = [np.argmax(probas) for probas in probabilities]
```



##Hold out Dataset
For the previous modules, your team has turned in your predictions for us to evalutate. This time, you're given the answers to the hold out dataset and will need to perform your own evaluation. 

Once you have predictions, you'll need to compare those predictions against the "ground truth" class labels in `test_classes.csv` to evaluate how well the model does. 

Make sure to use the insights gained from the hold out dataset in your executive summary.

In [None]:
def clr():
  clr = tfa.optimizers.CyclicalLearningRate(initial_learning_rate=INIT_LR,
    maximal_learning_rate=MAX_LR,
    scale_fn=lambda x: 1/(2.**(x-1)),
    step_size=2 * steps_per_epoch
  )
  return clr 


loaded_model = keras.models.load_model('model10.h5', custom_objects={'clr': clr}, compile=False)


from tensorflow.keras.preprocessing import image_dataset_from_directory
test_dir = '/content/'

test_datagen = ImageDataGenerator(rescale=1./255)
test_generator = test_datagen.flow_from_directory(
        test_dir,
        classes=['test'],
        target_size=image_size,
        class_mode='sparse',
        shuffle=False)
probabilities = loaded_model.predict(test_generator)
predictions = [np.argmax(probas) for probas in probabilities]


Found 12630 images belonging to 1 classes.


In [None]:
from sklearn.metrics import r2_score, accuracy_score, mean_squared_error, recall_score, f1_score

y = pd.read_csv('test_classes.csv')

y = y['ClassId']

holdoutResultR2 = r2_score(y, predictions)
holdoutResultAccuracy = accuracy_score(y, predictions)
holdoutResultRecall = recall_score(y, predictions, average='weighted')
holdoutResultF1 = f1_score(y, predictions, average='weighted')

results = {'F1': holdoutResultF1, 'Recall':holdoutResultRecall, 'R2':holdoutResultR2, 'Accuracy':holdoutResultAccuracy}

results

{'Accuracy': 0.9617577197149644,
 'F1': 0.9620894621671228,
 'R2': 0.9597311540397062,
 'Recall': 0.9617577197149644}

In [None]:
import altair as alt

pred = pd.DataFrame(predictions, columns=['pred'])
pred['y'] = y

pred

alt.data_transformers.disable_max_rows()

alt.Chart(pred).mark_point(
    color='blue',
    opacity=0.02,
    fill='blue',
    cornerRadius=0
).encode(
    x = 'pred',
    y = 'y'
)