In [28]:
import os
import random
import numpy as np
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow.keras.datasets import cifar10
from tensorflow.keras.models import Sequential
#one-hot encoding
from tensorflow.keras.utils import to_categorical
#use image data generator for image augmentations
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import Dense, Conv2D, MaxPooling2D, Dropout, Flatten

In [29]:
# settings for reproducibility 

seed = 42
random.seed(seed)
tf.random.set_seed(seed)
np.random.seed(seed)

os.environ['TF_DETERMINISTIC_OPS'] = '1'

# Data import

In [30]:
(train_images, train_labels), (test_images, test_labels) = cifar10.load_data()

# Exploring the data


In [None]:
%matplotlib qt
print('training data shape:',train_images.shape, train_labels.shape)
print('testing data shape:',test_images.shape, test_labels.shape)

#find the unqiue number of train labels
classes = np.unique(train_labels)
classes_num = len(classes)
print('total number of outputs:',classes_num)
print('output classes:',classes)

plt.figure(figsize = [4,2])

#display the first image in the training data
plt.subplot(121)
plt.imshow(train_images[0,:,:],cmap='gray')
plt.title('first train image : {}'.format(train_labels[0]))

#display the first image in the test data
plt.subplot(122)
plt.imshow(test_images[0,:,:],cmap='gray')
plt.title('first test image : {}'.format(test_labels[0]))

# Preprocess the data

In [None]:
#find the shape of images and create the variable input_shape
Rows, Cols, Dims = train_images.shape[1:]
train_data = train_images.reshape(train_images.shape[0], Rows, Cols, Dims)
test_data = test_images.reshape(test_images.shape[0], Rows, Cols, Dims)
input_shape = (Rows, Cols, Dims)

#Change to float datatype

train_data = train_data.astype('float32')
test_data = test_data.astype('float32')

#scale the data to lie b/w 0 to 1
train_data /= 255
test_data /= 255

#change the labels from integer to categorical data
train_labels_one_hot = to_categorical(train_labels)
test_labels_one_hot = to_categorical(test_labels)

In [None]:
#display the new labels
print('original label 0',train_labels[0])
print('after one hot encoding',train_labels_one_hot[0])

# Define the model

In [None]:
#conv layer, max pooling layer, dropout layer, Dense layer
def createModel():
    model = Sequential()
    # The first two layers with 32 filters of window size 3*3
    model.add(Conv2D(32, (3,3), padding='same',activation='relu',
                     input_shape=input_shape))
    model.add(Conv2D(32, (3,3),activation='relu'))
    model.add(MaxPooling2D(pool_size = (2,2)))
    model.add(Dropout(0.25))
    
    #other layers use 64 filters
    
    model.add(Conv2D(64, (3,3), padding='same',activation='relu'))
    model.add(Conv2D(64, (3,3),activation='relu'))
    model.add(MaxPooling2D(pool_size=(2,2)))
    model.add(Dropout(0.25))
    
    model.add(Conv2D(64, (3, 3), padding='same', activation='relu'))
    model.add(Conv2D(64, (3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))

    model.add(Flatten())
    model.add(Dense(512, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(classes_num, activation='softmax'))#for classification
    #among 10 classes
    
    return model
#We have used 6 conv layers with 1 fully-connected layer

# Train the model

In [None]:
#initialize the model
model1 = createModel()

#set training process parameters
batch_size = 256
epochs = 50

#set the training configurations: optimizer, Loss function, accuracy metrics
model1.compile(optimizer='rmsprop', loss='categorical_crossentropy',metrics
              =['accuracy'])

#view model architecture
model1.summary()

history = model1.fit(train_data,train_labels_one_hot,
                    batch_size=batch_size,epochs=epochs,verbose=1,
                    validation_data = (test_data, test_labels_one_hot))

#check the model results on the test set
model1.evaluate(test_data, test_labels_one_hot)

# plotting the test and training results

In [None]:
#training loss vs test loss
plt.figure(figsize=[8,6])
plt.plot(history.history['loss'],'r',linewidth=3.0)
plt.plot(history.history['val_loss'],'b',linewidth=3.0)
plt.legend(['Training loss','Validation loss'],fontsize=18)
plt.xlabel('epochs',fontsize=15)
plt.ylabel('loss',fontsize=15)
plt.title('loss curve',fontsize=17)

#training acccuracy vs the test accracy
plt.figure(figsize=[8,6])
plt.plot(history.history['accuracy'],'r',linewidth=3.0)
plt.plot(history.history['val_accuracy'],'b',linewidth=3.0)
plt.legend(['Training accuracy','Validation accuracy'],fontsize=18)
plt.xlabel('epochs',fontsize=15)
plt.ylabel('accuracy',fontsize=15)
plt.title('accuracy curve',fontsize=17)

In [None]:
"""
we can see there is still overfitting in both the
curves even after using the dropout layer
"""

# Data augmentation

<h>
    One of the major reasons for overfitting is that we don’t have enough data to train our network. Apart from regularization, another very effective way to counter Overfitting is Data Augmentation. It is the process of artificially creating more images from the images we already have by changing the size, orientation etc of the image. It can be a tedious task but fortunately, this can be done in Keras using the ImageDataGenerator instance

In [None]:
# Initialize the model
model2 = createModel()

model2.compile(optimizer='rmsprop',loss='categorical_crossentropy',metrics
              =['accuracy'])

# set training process parameters
batch_size = 256
epochs = 50

# define transformations for train data
datagen = ImageDataGenerator(width_shift_range=0.1,
                             # randomly shifts images horizontally(fraction of total width)
                             height_shift_range=0.1,
                             # randomly shifts images vertically
                             horizontal_flip=True,vertical_flip=False)

# fit the model on the batches generated by datagen.flow()
history2 = model2.fit(datagen.flow(train_data, train_labels_one_hot,
                                  batch_size=batch_size),
                                  steps_per_epoch=int(np.ceil(train_data.shape[0] / float(batch_size))),
                                  epochs=epochs,validation_data=(test_data,test_labels_one_hot),
                                  workers=4)

model2.evaluate(test_data,test_labels_one_hot)


# plotting results after data augmentation

In [None]:
plt.figure()
plt.plot(history2.history['loss'],'r',linewidth=3.0)
plt.plot(history2.history['val_loss'],'b',linewidth=3.0)
plt.legend(['training loss','testing loss'],fontsize=15)
plt.xlabel('epochs',fontsize=15)
plt.ylabel('loss',fontsize=15)
plt.title('loss curve',fontsize=18)

plt.figure()
plt.plot(history2.history['accuracy'],'r',linewidth=3.0)
plt.plot(history2.history['val_accuracy'],'b',linewidth=3.0)
plt.legend(['training accuracy','testing accuracy'],fontsize=15)
plt.xlabel('epochs',fontsize=15)
plt.ylabel('accuracy',fontsize=15)
plt.title('accuracy curve',fontsize=18)

# Above figures clearly show that there is no overfitting, even the test accuracy is greater than that of train