In [None]:
import numpy as np
import os
import PIL
import PIL.Image
import tensorflow as tf
from sklearn.impute import SimpleImputer
import pandas as pd
from sklearn.model_selection import train_test_split
import matplotlib.image as mpimg
from keras.preprocessing import image
from keras.preprocessing.image import ImageDataGenerator
from keras.utils.vis_utils import plot_model
from tensorflow.keras.optimizers import RMSprop
import matplotlib.pyplot as plt
from sklearn.utils.class_weight import compute_class_weight

In [None]:
path ="./trafficsigns_dataset/"
lst = []
for root, dirs, files in os.walk(path):
    for file in files:
        if(file.endswith(".png")):
#             print(root)
            root1=root.split("/")[-1].split("\\")
            filepath=path+root1[0]+'/'+root1[1]+'/'+file
            lst.append([root1[0], root1[1], file, filepath])
dfdata = pd.DataFrame(lst, columns=['shape', 'sign', 'imageName','filePath'])

In [None]:
img = mpimg.imread(filepath)
imgplot = plt.imshow(img, cmap='gray')
plt.show()

In [None]:
img.shape

In [None]:
plt.figure(figsize=(30,50))
for i, col in enumerate(['shape', 'sign']):
    plt.subplot(10,4,i+1)
    plt.hist(dfdata[col], alpha=0.3, color='blue',bins=35)
    plt.title(col)
    plt.xticks(rotation='vertical')

In [None]:
dfdata['shape'].value_counts()

In [None]:
dfdata['sign'].value_counts()

In [None]:
model_1_df=pd.DataFrame()
model_1_df['Path']=dfdata['filePath']
model_1_df['Shape']=dfdata['shape']

In [None]:
model_1_df

In [None]:
trainX,valX,trainY,valY=train_test_split(model_1_df["Path"],model_1_df["Shape"],test_size=0.2,shuffle=True,random_state=10)

In [None]:
trainY
trainY.value_counts()

In [None]:
valY.value_counts()

In [None]:
train_df=pd.DataFrame()
train_df["Paths"]=trainX
train_df["Shapes"]=trainY
val_df=pd.DataFrame()
val_df["Paths"]=valX
val_df["Shapes"]=valY

In [None]:
val_df.shape

In [None]:
#imagedatagenerators

In [None]:
idg_train=ImageDataGenerator(rescale=1/255.0,data_format='channels_last')
idg_val=ImageDataGenerator(rescale=1/255.0,data_format='channels_last')

In [None]:
train_gen=idg_train.flow_from_dataframe(dataframe=train_df,
                                        batch_size=32,
                                        seed=10,
                                        class_mode='categorical',
                                        target_size=(28, 28),
                                        x_col='Paths',
                                        y_col='Shapes',
                                        color_mode = 'grayscale')
val_gen=idg_val.flow_from_dataframe(dataframe=val_df,
                                       batch_size=32,
                                       seed=10,
                                       class_mode='categorical',
                                       target_size=(28, 28),
                                       x_col='Paths',
                                       y_col='Shapes',
                                       color_mode = 'grayscale'
                                      )

In [None]:
train_gen

In [None]:
#baseline

In [None]:
baseline_model = tf.keras.models.Sequential([
#     tf.keras.layers.Conv2D(5,(3,3),activation='relu',input_shape=(28,28,1)),
#     tf.keras.layers.MaxPooling2D(2,2),
    tf.keras.layers.Flatten(input_shape=(28,28,1)),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(5,activation='softmax')
])

In [None]:
baseline_model.summary()

In [None]:
baseline_model.compile(optimizer='adam',loss='categorical_crossentropy',
              metrics=['categorical_accuracy'])

In [None]:
fitted_baseline=baseline_model.fit(train_gen,validation_data=val_gen,batch_size=32,epochs=20,shuffle=False)

In [None]:
plt.plot(fitted_baseline.history['categorical_accuracy'],label='tr_acc')
plt.plot(fitted_baseline.history['val_categorical_accuracy'],label='val_acc')
plt.legend(loc='best')

In [None]:
plt.plot(fitted_baseline.history['loss'],label='tr_loss')
plt.plot(fitted_baseline.history['val_loss'],label='val_loss')
plt.legend(loc='best')

In [None]:
#handling imbalance

In [None]:
weights = compute_class_weight("balanced",np.unique(train_gen.classes),train_gen.classes)

In [None]:
weights_dict={}
for i in range(weights.shape[0]):
    weights_dict[np.unique(train_gen.classes)[i]]=weights[i]

In [None]:
weights_dict

In [None]:
#training weighted model

In [None]:
baseline_model_weighted = tf.keras.models.Sequential([
    tf.keras.layers.Flatten(input_shape=(28,28,1)),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(5,activation='softmax')
])
baseline_model_weighted.compile(optimizer='adam',loss='categorical_crossentropy',
              metrics=['categorical_accuracy'])
fitted_baseline_weighted=baseline_model_weighted.fit(train_gen,validation_data=val_gen,batch_size=32,epochs=20,class_weight=weights_dict,shuffle=False)

In [None]:
plt.plot(fitted_baseline_weighted.history['categorical_accuracy'],label='tr_acc')
plt.plot(fitted_baseline_weighted.history['val_categorical_accuracy'],label='val_acc')
plt.legend(loc='best')

In [None]:
#incremental_improvements

In [None]:
baseline_model_weighted_2_hidden = tf.keras.models.Sequential([
    tf.keras.layers.Flatten(input_shape=(28,28,1)),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(256, activation='relu'),
    tf.keras.layers.Dense(5,activation='softmax')
])
baseline_model_weighted_2_hidden.compile(optimizer='adam',loss='categorical_crossentropy',
              metrics=['categorical_accuracy'])
fitted_baseline_weighted_2_hidden=baseline_model_weighted_2_hidden.fit(train_gen,validation_data=val_gen,batch_size=32,epochs=20,class_weight=weights_dict,shuffle=False)

In [None]:
plt.plot(fitted_baseline_weighted_2_hidden.history['categorical_accuracy'],label='tr_acc')
plt.plot(fitted_baseline_weighted_2_hidden.history['val_categorical_accuracy'],label='val_acc')
plt.legend(loc='best')

In [None]:
#creating image data generators with augmentation

In [None]:
new_idg_train=ImageDataGenerator(rescale=1/255.0,
                                 data_format='channels_last',
                                 rotation_range=95,
                                 #width_shift_range=0.2,
                                 #height_shift_range=0.2,
                                 #shear_range=0.2,
                                 zoom_range=0.2,
                                 fill_mode='nearest',
                                 horizontal_flip=True,
                                 vertical_flip=True)
new_idg_val=ImageDataGenerator(rescale=1/255.0,
                                 data_format='channels_last',
                                 rotation_range=95,
                                 #width_shift_range=0.2,
                                 #height_shift_range=0.2,
                                 #shear_range=0.2,
                                 zoom_range=0.2,
                                 fill_mode='nearest',
                                 horizontal_flip=True,
                                 vertical_flip=True)
new_train_gen=new_idg_train.flow_from_dataframe(dataframe=train_df,
                                        batch_size=32,
                                        seed=10,
                                        class_mode='categorical',
                                        target_size=(28, 28),
                                        x_col='Paths',
                                        y_col='Shapes',
                                        color_mode = 'grayscale')
new_val_gen=new_idg_val.flow_from_dataframe(dataframe=val_df,
                                       batch_size=32,
                                       seed=10,
                                       class_mode='categorical',
                                       target_size=(28, 28),
                                       x_col='Paths',
                                       y_col='Shapes',
                                       color_mode = 'grayscale'
                                      )

In [None]:
baseline_model_weighted_2_hidden_aug = tf.keras.models.Sequential([
    tf.keras.layers.Flatten(input_shape=(28,28,1)),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(256, activation='relu'),
    tf.keras.layers.Dense(5,activation='softmax')
])
baseline_model_weighted_2_hidden_aug.compile(optimizer='adam',loss='categorical_crossentropy',
              metrics=['categorical_accuracy'])
fitted_baseline_model_weighted_2_hidden_aug=baseline_model_weighted_2_hidden_aug.fit(
    new_train_gen,validation_data=new_val_gen,batch_size=32,epochs=100,class_weight=weights_dict,shuffle=False)

In [None]:
plt.plot(fitted_baseline_model_weighted_2_hidden_aug.history['categorical_accuracy'],label='tr_acc')
plt.plot(fitted_baseline_model_weighted_2_hidden_aug.history['val_categorical_accuracy'],label='val_acc')
plt.legend(loc='best')

In [None]:
#adding pooling and conv2dlayers

In [None]:
# max_conv_model= tf.keras.models.Sequential([
#     tf.keras.layers.Conv2D(5,(3,3),activation='relu',input_shape=(28,28,1)),
#     tf.keras.layers.MaxPooling2D(2,2),
#     tf.keras.layers.Flatten(input_shape=(28,28,1)),
#     tf.keras.layers.Dense(32, activation='relu'),
#     tf.keras.layers.Dense(5,activation='softmax')
# ])
# max_conv_model.compile(optimizer='adam',loss='categorical_crossentropy',
#               metrics=['categorical_accuracy'])
# fitted_max_conv_model=max_conv_model.fit(new_train_gen,validation_data=new_val_gen,batch_size=32,
#                                          epochs=100,class_weight=weights_dict,shuffle=False)

In [None]:
max_conv_model=tf.keras.models.Sequential([
                                  tf.keras.layers.Conv2D(5,(3,3),activation='relu',input_shape=(28,28,1)),
                                  tf.keras.layers.MaxPooling2D(2,2),
    
    
    
        tf.keras.layers.Conv2D(64, (3, 3), activation='relu', padding='same'),
    tf.keras.layers.Conv2D(64, (3, 3), activation='relu', padding='same'),
    tf.keras.layers.MaxPooling2D((2, 2)),
    
    
                                  tf.keras.layers.Flatten(),
                                  tf.keras.layers.Dense(128,activation='relu'),
                                  tf.keras.layers.Dense(256,activation='relu'),
                                  tf.keras.layers.Dense(5,activation='softmax')
])
max_conv_model.compile(optimizer='sgd',loss='categorical_crossentropy',
              metrics=['categorical_accuracy'])
fitted_max_conv_model=max_conv_model.fit(new_train_gen,validation_data=new_val_gen,batch_size=32,epochs=100,class_weight=weights_dict,shuffle=False)

In [None]:
max_conv_model.summary()

In [None]:
plt.plot(fitted_max_conv_model.history['categorical_accuracy'],label='tr_acc')
plt.plot(fitted_max_conv_model.history['val_categorical_accuracy'],label='val_acc')
plt.legend(loc='best')