In [1]:
import os
import pandas as pd
import numpy as np
import sklearn
import cv2
import tensorflow as tf

from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix, accuracy_score, balanced_accuracy_score, f1_score
from sklearn.model_selection import train_test_split

from tensorflow import keras
from tensorflow.keras.preprocessing.image import ImageDataGenerator, img_to_array, array_to_img, load_img
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.applications.vgg16 import preprocess_input
from tensorflow.keras import layers, models

from keras.models import load_model
from PIL import Image
from matplotlib import pyplot as plt


In [2]:
im_width  = 50
im_height = 50

In [3]:
print(os.listdir('GTSRB Data'))

df_meta  = pd.read_csv('GTSRB Data/Meta.csv', delimiter=',')
df_test  = pd.read_csv('GTSRB Data/Test.csv', delimiter=',')
df_train = pd.read_csv('GTSRB Data/Train.csv', delimiter=',')

['Meta', 'Meta.csv', 'Test', 'Test.csv', 'Train', 'Train.csv']


In [4]:
print('Number of Classes: ',df_meta.shape[0])

Number of Classes:  43


# TODO:
* Pre-process images to VGG input size and feature range
* Set up Training and validation sets (split training) (Keras Data generators)
    
    

In [5]:
# Lower quality of images by 256 factor & set validation set to 20%
ImDG_train = ImageDataGenerator(rescale=1./255, validation_split=0.2) 
data_train = ImDG_train.flow_from_directory('GTSRB Data/Train/',
                                            target_size  = (im_width, im_height),
                                            batch_size   = 150,
                                            class_mode   ='categorical',
                                            shuffle      = True,
                                            color_mode   ='rgb',
                                            interpolation='hamming',
                                            subset       ='training'
                                           )

ImDG_valid = ImageDataGenerator(rescale=1./255, validation_split=0.2) 
data_valid = ImDG_train.flow_from_directory('GTSRB Data/Train/',
                                            target_size  = (im_width, im_height),
                                            batch_size   = 150,
                                            class_mode   ='categorical',
                                            shuffle      = True,
                                            color_mode   ='rgb',
                                            interpolation='hamming',
                                            subset       ='validation'
                                           )


Found 31368 images belonging to 43 classes.
Found 7841 images belonging to 43 classes.


In [8]:
test_labels = df_test["ClassId"].values
test_imgs   = df_test["Path"].values

data =[]
for img in test_imgs:
    try:
        test_im         = cv2.imread('GTSRB Data/'+test_imgs[0])
        image_fromarray = Image.fromarray(test_im, mode='RGB')
        resize_image    = image_fromarray.resize((im_height, im_width))
        data.append(np.array(resize_image))
    except:
        print("Error in " + img)

data_test = np.array(data)
data_test = data_test/255

In [9]:
# Import model without a classification layer (for transfer learning)
base_model = VGG16(weights="imagenet", include_top=False, input_shape=(im_width, im_height, 3))
# Freeze existing model
base_model.trainable = False

In [10]:
base_model.summary()


Model: "vgg16"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 50, 50, 3)]       0         
                                                                 
 block1_conv1 (Conv2D)       (None, 50, 50, 64)        1792      
                                                                 
 block1_conv2 (Conv2D)       (None, 50, 50, 64)        36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, 25, 25, 64)        0         
                                                                 
 block2_conv1 (Conv2D)       (None, 25, 25, 128)       73856     
                                                                 
 block2_conv2 (Conv2D)       (None, 25, 25, 128)       147584    
                                                                 
 block2_pool (MaxPooling2D)  (None, 12, 12, 128)       0     

In [11]:
flatten_layer = layers.Flatten()
dense_layer = layers.Dense(512, activation='sigmoid')
prediction_layer = layers.Dense(43, activation='softmax')


model = models.Sequential([
    base_model,
    flatten_layer,
    dense_layer,
    prediction_layer
])

In [12]:
from tensorflow.keras.callbacks import EarlyStopping

model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy'],
)


es = EarlyStopping(monitor='val_accuracy', mode='max', patience=5,  restore_best_weights=True)

model.fit(data_train, epochs=10, validation_data=data_valid, batch_size=32, callbacks=[es])

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x1776634ec70>

In [14]:
model.save("VGG10-32.h5")

In [13]:
test_labels = df_test["ClassId"].values

pred = model.predict(data_test)
pred = [int(list(data_train.class_indices.keys())[i]) for i in pred.argmax(axis=-1)]
print('Test Data accuracy: ', accuracy_score(test_labels, pred)*100)

Test Data accuracy:  1.187648456057007
