In [1]:
# Problem 1: Transfer Learning
# 1. Choose a pre-trained deep learning model that has been trained to classify images.
# 2. Use the German Traffic Sign Dataset below (that the pre-trained model in (1) wasn't trained on) to define a new "traffic sign" classification problem.
# 3. Use transfer learning to adapt the pre-trained model in (1) to your new image classification problem in (2).
# 4. Is your new model able to classify test images from (2) with high accuracy? 

In [16]:
import os
import numpy as np
import time
import cv2

import PIL.Image as Image
import matplotlib.pylab as plt

import tensorflow as tf
import tensorflow_hub as hub

import pandas as pd

In [3]:
tf.config.list_physical_devices()

[PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU')]

In [21]:
def load_images_from_folder(folder):
    images = []
    for filename in os.listdir(folder):
        img = cv2.imread(os.path.join(folder,filename), cv2.IMREAD_COLOR)
        if img is not None:
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            images.append(img)
    return images

In [4]:
IMAGE_SHAPE = (128, 128)
batch_size = 16
model_path ="https://tfhub.dev/google/bit/m-r101x3/ilsvrc2012_classification/1"

In [6]:
classes = pd.read_csv("archive/Meta.csv")["ClassId"].unique()
test_labels = pd.read_csv("archive/Test.csv")["ClassId"]

In [22]:
true_labels = load_images_from_folder("./archive/Meta/")

In [None]:
tfhub_cache_dir = os.path.join(os.getcwd(),'tfhub_cache')
os.makedirs(tfhub_cache_dir, exist_ok = True)
os.environ['TFHUB_CACHE_DIR'] = tfhub_cache_dir

### Dataset Config

In [None]:
train_ds = tf.keras.preprocessing.image_dataset_from_directory(
  "./archive/Train",
  validation_split=0.2,
  subset="training",
  seed=123,
  image_size=IMAGE_SHAPE,
  batch_size=batch_size)

# NO inffered labels and no Shuffle to match agains the test.csv later
test_ds = tf.keras.preprocessing.image_dataset_from_directory(
  "./archive/Test",
  labels=None,
  label_mode=None,
  shuffle=False,
  image_size=IMAGE_SHAPE,
  batch_size=batch_size)


Found 39209 files belonging to 43 classes.
Using 31368 files for training.
Found 12630 files belonging to 1 classes.


In [None]:
normalization_layer = tf.keras.layers.experimental.preprocessing.Rescaling(1./255)
train_ds = train_ds.map(lambda x, y: (normalization_layer(x), y))

AUTOTUNE = tf.data.AUTOTUNE
train_ds = train_ds.cache().prefetch(buffer_size=AUTOTUNE)

### Model Building and Traning

In [None]:
# get the classifier without head
classifier = hub.KerasLayer(
    model_path, input_shape=IMAGE_SHAPE+(3,), trainable=False)

In [None]:
classifier.trainable = False  # Freeze the outer model

In [None]:
num_classes = len(classes)

model = tf.keras.Sequential([
  classifier,
  tf.keras.layers.Dense(num_classes)
])

model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 keras_layer (KerasLayer)    (None, 1000)              387934888 
                                                                 
 dense (Dense)               (None, 43)                43043     
                                                                 
Total params: 387,977,931
Trainable params: 43,043
Non-trainable params: 387,934,888
_________________________________________________________________


In [None]:
model.compile(
  optimizer=tf.keras.optimizers.Adam(),
  loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
  metrics=['acc'])

early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_accuracy',
    min_delta=0.05,
    patience=3)

In [None]:
history = model.fit(train_ds, epochs=10, callbacks=[early_stopping])

Epoch 1/10


In [None]:
accuracy  = history["accuracy"]
val_accuracy  = history["val_accuracy"]
epochs = len(accuracy)


plt.plot(epochs, accuracy, 'y', label='Training Accuracy')
plt.plot(epochs, val_accuracy, 'r', label='Validation Accuracy')
plt.title('Training and Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()

In [None]:
loss  = history.history['loss']
val_loss = history.history['val_loss']

plt.plot(epochs, loss, 'y', label='Training Loss')
plt.plot(epochs, val_loss, 'r', label='Validation Loss')
plt.title('Training and Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()

### Check Predictions

In [None]:
predicted_batch = model.predict(test_ds)
predicted_id = np.argmax(predicted_batch, axis=-1)
predicted_label_batch = true_labels[predicted_id]

In [None]:
plt.figure(figsize=(10,9))
plt.subplots_adjust(hspace=0.5)
for n in range(15):
  plt.subplot(6,5, n+1)
  plt.imshow(test_ds[n])
  plt.imshow()
  plt.axis('off')
_ = plt.suptitle("Model predictions")

### Save Model

In [None]:
export_path = os.path.join(os.getcwd(),'saved_models','model')
model.save(export_path)