# ⛔️ Preprocessing Traffic Signs for Classification with CNN

## 📰 Related Paper
Sichkar V. N. Effect of various dimension convolutional layer filters on traffic sign classification accuracy. *Scientific and Technical Journal of Information Technologies, Mechanics and Optics*, 2019, vol. 19, no. 3, pp. DOI: 10.17586/2226-1494-2019-19-3-546-552 (Full-text available on ResearchGate here: [Effect of various dimension convolutional layer filters on traffic sign classification accuracy](https://www.researchgate.net/publication/334074308_Effect_of_various_dimension_convolutional_layer_filters_on_traffic_sign_classification_accuracy))

∗  Test online with custom Traffic Sign here: https://valentynsichkar.name/traffic_signs.html

# 🎓 Related course for classification tasks

**Design**, **Train** & **Test** deep CNN for Image Classification.

**Join** the course & enjoy new opportunities to get deep learning skills:


[https://www.udemy.com/course/convolutional-neural-networks-for-image-classification/](https://www.udemy.com/course/convolutional-neural-networks-for-image-classification/?referralCode=12EE0D74A405BF4DDE9B)


![](https://github.com/sichkar-valentyn/1-million-images-for-Traffic-Signs-Classification-tasks/blob/main/images/slideshow_classification.gif?raw=true)

# 📥 Importing needed libraries

In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import pickle
import matplotlib.pyplot as plt
from timeit import default_timer as timer

from keras.utils.np_utils import to_categorical
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPool2D, AvgPool2D, BatchNormalization, Reshape
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import LearningRateScheduler

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory
import os
for dirname, _, filenames in os.walk('../input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

print(os.listdir('../input'))

# Listing all added utility scripts
print()
print(os.listdir('../usr/lib'))

# Any results you write to the current directory are saved as output.


In [None]:
import utility_scripts_for_traffic_signs

# Showing description of the functions inside utility
print(help(utility_scripts_for_traffic_signs))

# Showing module's attributes
print(dir(utility_scripts_for_traffic_signs))


# 📂 Loading datasets: data2.pickle, data3.pickle, data7.pickle, data8.pickle

In [None]:
# Defining list with numers for the files' datasets to load from
n = [2, 3, 7, 8]

# Defining list with preprocessed methods for datasets
m = ['RGB + /255 + Mean', 'RGB + /255 + Mean + STD', 'LHE + /255 + Mean', 'LHE + /255 + Mean + STD']

# Defining dictionary for saving datasets in
data = {}

# Going through all of the four datasets' files
for i in n:
    # Opening file for reading in binary mode
    with open('../input/traffic-signs-preprocessed/data' + str(i) + '.pickle', 'rb') as f:
        data[i] = pickle.load(f, encoding='latin1')  # dictionary type

    # Preparing y_train and y_validation for using in Keras
    data[i]['y_train'] = to_categorical(data[i]['y_train'], num_classes=43)
    data[i]['y_validation'] = to_categorical(data[i]['y_validation'], num_classes=43)

    # Making channels come at the end
    data[i]['x_train'] = data[i]['x_train'].transpose(0, 2, 3, 1)
    data[i]['x_validation'] = data[i]['x_validation'].transpose(0, 2, 3, 1)
    data[i]['x_test'] = data[i]['x_test'].transpose(0, 2, 3, 1)

# Showing loaded datasets from the files
# All has to be the same
ii = 0  # index of methods' name
for i in n:
    print('data' + str(i) + '.pickle ->', m[ii])
    for k, j in data[i].items():
        if k == 'labels':
            print(k + ':', len(j))
        else: 
            print(k + ':', j.shape)
    print()
    ii += 1


# 💫 Showing some training examples

In [None]:
%matplotlib inline

# Visualizing some examples of training data
some_examples = data[2]['x_train'][:49, :, :, :]
print(some_examples.shape)  # (49, 32, 32, 3)

# Plotting
fig = plt.figure()
grid = utility_scripts_for_traffic_signs.convert_to_grid(some_examples)
plt.imshow(grid.astype('uint8'))
plt.axis('off')
plt.gcf().set_size_inches(15, 15)
plt.title('Some training examples', fontsize=18)
plt.show()
plt.close()

# Saving plot
fig.savefig('some_training_examples.png')
plt.close()


# 🚳 Showing one Traffic Sign from different datasets

In [None]:
%matplotlib inline

# Loading original RGB Traffic Sign without any processing
with open('../input/traffic-signs-preprocessed/data0.pickle', 'rb') as f:
        data0 = pickle.load(f, encoding='latin1')  # dictionary type

# Making channels come at the end
data0['x_train'] = data0['x_train'].transpose(0, 2, 3, 1)

# Getting example
example0 = data0['x_train'][2, :, :, :]
print(example0.shape)  # (32, 32, 3)



# Defining dictionary for saving four examples in
example = {}

# Examples with 3-channeled images
example[2] = data[2]['x_train'][2, :, :, :]
example[3] = data[3]['x_train'][2, :, :, :]
print(example[2].shape, example[3].shape)  # (32, 32, 3) (32, 32, 3)

# Examples with 1-channeled images
example[7] = data[7]['x_train'][2, :, :, 0]
example[8] = data[8]['x_train'][2, :, :, 0]
print(example[7].shape, example[8].shape)  # (32, 32) (32, 32)



# Getting labels' names from the file
# Defining list for saving labels in order from 0 to 42
labels = []

# Reading 'csv' file and getting labels
r = pd.read_csv('../input/traffic-signs-preprocessed/label_names.csv')
# Going through all names
for name in r['SignName']:
    # Adding from every row second column with name of the label
    labels.append(name)



# Plotting examples of one traffic sign preprocessed in four different ways
plt.rcParams['figure.figsize'] = (12.0, 12.0) # Setting default size of plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['font.family'] = 'Times New Roman'

fig = plt.figure()

# Plotting original example
plt.subplot(2, 4, 1)  # rows, columns, current index of the plot
plt.imshow(example0)
plt.xlabel('RGB', fontsize=15)
plt.ylabel(labels[np.argmax(data[2]['y_train'][2])], fontsize=15)
plt.xticks([])
plt.yticks([])

# Going through all of the four examples
k = 5  # Setting index for the plots
ii = 0  # Setting index for getting method's name
for i in n:
    plt.subplot(2, 4, k)  # rows, columns, current index of the plot
    if i == 7 or i == 8:
        plt.imshow(example[i], cmap='gray')
    else:
        plt.imshow(example[i])
    plt.xlabel(m[ii], fontsize=15)
    plt.ylabel(labels[np.argmax(data[2]['y_train'][2])], fontsize=15)
    plt.xticks([])
    plt.yticks([])
    k += 1
    ii += 1

# Adjusting height between subplots
plt.subplots_adjust(hspace=0)
plt.tight_layout()

# Showing the plot
plt.show()

# Saving the plot
fig.savefig('one_ts_from_different_datasets.png')
plt.close()


# 🏗️ Building model of CNN with Keras for RGB dataset[2]

In [None]:
model = Sequential()

model.add(Conv2D(32, kernel_size=9, padding='same', activation='relu', input_shape=(32, 32, 3)))
model.add(MaxPool2D(pool_size=2))

model.add(Conv2D(64, kernel_size=7, padding='same', activation='relu'))
model.add(MaxPool2D(pool_size=2))

model.add(Conv2D(128, kernel_size=3, padding='same', activation='relu'))
model.add(MaxPool2D(pool_size=2))

model.add(Flatten())
model.add(Dense(500, activation='relu'))
model.add(Dense(43, activation='softmax'))

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])


## 🤏 Overfitting small data for RGB dataset[2]

In [None]:
annealer = LearningRateScheduler(lambda x: 1e-3 * 0.95 ** (x + epochs))
epochs = 20

h = model.fit(data[2]['x_train'][:100], data[2]['y_train'][:100],
              batch_size=5, epochs = epochs,
              validation_data = (data[2]['x_validation'], data[2]['y_validation']),
              callbacks=[annealer], verbose=1)

print()
print('Epochs={0:d}, Train accuracy={1:.5f}, \
      Validation accuracy={2:.5f}'.\
      format(epochs, max(h.history['accuracy']), max(h.history['val_accuracy'])))


## 📈 Plotting history results for overfitting small data for RGB dataset[2]

In [None]:
%matplotlib inline
plt.rcParams['figure.figsize'] = (15.0, 5.0) # Setting default size of plots
plt.rcParams['image.interpolation'] = 'nearest'

fig = plt.figure()

plt.plot(h.history['accuracy'], '-o', linewidth=3.0)
plt.plot(h.history['val_accuracy'], '-o', linewidth=3.0)
plt.title('Overfitting small data for RGB dataset[2]', fontsize=22)
plt.legend(['train', 'validation'], loc='upper left', fontsize='xx-large', borderpad=2)
plt.xlabel('Epoch', fontsize=22)
plt.ylabel('Accuracy', fontsize=22)
plt.tick_params(labelsize=18)
plt.show()

# Saving the plot
fig.savefig('Overfitting_dataset_2.png')
plt.close()


# 🏗️ Building set of models of CNN with Keras

In [None]:
# Defining dictionary for models
model = {}

# Building four models
for i in n:
    model[i] = Sequential()
    
    if i == 7 or i == 8:
        model[i].add(Conv2D(32, kernel_size=9, padding='same', activation='relu', input_shape=(32, 32, 1)))
    else:
        model[i].add(Conv2D(32, kernel_size=9, padding='same', activation='relu', input_shape=(32, 32, 3)))
        
    model[i].add(MaxPool2D(pool_size=2))

    model[i].add(Conv2D(64, kernel_size=7, padding='same', activation='relu'))
    model[i].add(MaxPool2D(pool_size=2))

    model[i].add(Conv2D(128, kernel_size=3, padding='same', activation='relu'))
    model[i].add(MaxPool2D(pool_size=2))

    model[i].add(Flatten())
    model[i].add(Dense(500, activation='relu'))
    model[i].add(Dense(43, activation='softmax'))

    model[i].compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])


# 💡 Training with different datasets

In [None]:
annealer = LearningRateScheduler(lambda x: 1e-3 * 0.95 ** (x + epochs))
epochs = 5

# Defining dictionary for saving history results
h = {}

for i in n:
    h[i] = model[i].fit(data[i]['x_train'], data[i]['y_train'],
                        batch_size=5, epochs = epochs,
                        validation_data = (data[i]['x_validation'], data[i]['y_validation']),
                        callbacks=[annealer], verbose=0)
    
    print('Model trained on dataset{0}.pickle, epochs={1:d}, training accuracy={2:.5f}, validation accuracy={3:.5f}'.format(i, epochs, max(h[i].history['accuracy']), max(h[i].history['val_accuracy'])))


# 📈 Plotting comparison results for accuracy

In [None]:
%matplotlib inline
plt.rcParams['figure.figsize'] = (15.0, 15.0) # Setting default size of plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams["font.family"] = 'Times New Roman'

fig = plt.figure()

# Plotting history of training accuracy
plt.subplot(2, 1, 1)
plt.plot(h[2].history['accuracy'], '-o', linewidth=3.0)
plt.plot(h[3].history['accuracy'], '-s', linewidth=3.0)
plt.plot(h[7].history['accuracy'], '-D', linewidth=3.0)
plt.plot(h[8].history['accuracy'], '-D', linewidth=3.0)
plt.legend(['dataset2', 'dataset3', 'dataset7', 'dataset8'], loc='lower right', fontsize='xx-large', borderpad=2)
plt.xlabel('Epoch', fontsize=22, fontname='Times New Roman')
plt.ylabel('Training Accuracy', fontsize=22, fontname='Times New Roman')
plt.yscale('linear')  # {"linear", "log", "symlog", "logit", ...}
plt.ylim(0.9, 1.0)
plt.xlim(0.5, 5.3)
plt.tick_params(labelsize=18)
# plt.title('Accuracy for different datasets', fontsize=20)

# Plotting history of validation accuracy
plt.subplot(2, 1, 2)
# plt.gca().set_title('Validation accuracy')
plt.plot(h[2].history['val_accuracy'], '-o', linewidth=3.0)
plt.plot(h[3].history['val_accuracy'], '-s', linewidth=3.0)
plt.plot(h[7].history['val_accuracy'], '-D', linewidth=3.0)
plt.plot(h[8].history['val_accuracy'], '-D', linewidth=3.0)
plt.legend(['dataset2', 'dataset3', 'dataset7', 'dataset8'], loc='lower right', fontsize='xx-large', borderpad=2)
plt.xlabel('Epoch', fontsize=22, fontname='Times New Roman')
plt.ylabel('Validation Accuracy', fontsize=22, fontname='Times New Roman')
plt.yscale('linear')  # {"linear", "log", "symlog", "logit", ...}
plt.ylim(0.75, 1.0)
plt.xlim(0.5, 5.3)
plt.tick_params(labelsize=18)

# Showing the plot
plt.show()

# Saving the plot
fig.savefig('models_accuracy.png')
plt.close()


# Showing values of training accuracy for different datasets
for i in n:
    print('dataset{0}.pickle training accuracy = {1:.5f}'.\
          format(i, np.max(h[i].history['accuracy'])))

# Showing values of validation accuracy for different datasets
print()
for i in n:
    print('dataset{0}.pickle validation accuracy = {1:.5f}'.\
          format(i, np.max(h[i].history['val_accuracy'])))


# 🧮 Calculating accuracy with testing datasets

In [None]:
# Going through all of the four models
for i in n:
    temp = model[i].predict(data[i]['x_test'])
    temp = np.argmax(temp, axis=1)

    # We compare predicted class with correct class for all input images
    # And calculating mean value among all values of following numpy array
    # By saying 'testing_accuracy == data[i]['y_test']' we create numpy array with True and False values
    # 'np.mean' function will return average of the array elements
    # The average is taken over the flattened array by default
    temp = np.mean(temp == data[i]['y_test'])
    
    print('dataset{0}.pickle testing accuracy = {1:.5f}'.format(i, temp))


# ⌛ Calculating time for classification

In [None]:
# Getting scores from forward pass of one input image
# Scores is given for each image with 43 numbers of predictions for each class
# Measuring at the same time execution time

# Going through all of the four models
for i in n:
    start = timer()
    temp = model[i].predict(data[i]['x_test'][:1, :, :, :])
    end = timer()
    
    print('dataset{0}.pickle classification time = {1:.5f}'.format(i, end - start))


# 🖼️ Predicting with one image from test dataset

In [None]:
%matplotlib inline

plt.rcParams['figure.figsize'] = (15.0, 15.0) # Setting default size of the plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams["font.family"] = 'Times New Roman'

# Going through all of the four models
k = 1  # Setting index for the plots
ii = 0  # Setting index for getting method's name
for i in n:
    # Preparing image for predicting from test dataset
    x_input = data[i]['x_test'][100:101, :, :, :]
    # print(x_input.shape)  # (1, 32, 32, 3) or (1, 32, 32, 1)
    
    y_input = data[i]['y_test'][100:101]
    # print(y_input)  # [3]
    
    # Plotting input image
    plt.subplot(1, 4, k)  # rows, columns, current index of the plot
    if i == 7 or i == 8:
        plt.imshow(x_input[0, :, :, 0], cmap='gray')
    else:
        plt.imshow(x_input[0])
    plt.xlabel(m[ii], fontsize=14)
    plt.ylabel(labels[y_input[0]], fontsize=14)
    plt.xticks([])
    plt.yticks([])
    k += 1
    ii += 1
    
    # Getting scores from forward pass of input image
    scores = model[i].predict(x_input)
    # print(scores[0].shape)  # (43,)

    # Scores is given for image with 43 numbers of predictions for each class
    # Getting only one class with maximum value
    prediction = np.argmax(scores)
    print('Predicted classId for model trained on dataset{0}.pickle: {1}'.format(i, prediction))

    # Printing label for classified Traffic Sign
    print('Predicted label:', labels[prediction])
    
    print()


# Showing the plot
plt.show()


# 💾 Saving models

In [None]:
for i in n:
    name = 'model-dataset' + str(i) + '.h5'
    model[i].save(name)

# # Saving model locally without committing
# from IPython.display import FileLink

# FileLink('model-dataset2.h5')
