#### Problem Statement: Train the CNN Model for Classifying images from CIFAR-10 dataset.

In [None]:
import findspark
findspark.init()

In [None]:
import pyspark
from pyspark import SparkConf
from pyspark.sql import SparkSession
#Create new spark session
conf = SparkConf()
spark = SparkSession.builder \
    .master('local[5]') \
    .config('spark.driver.memory','16g') \
    .appName('CIFAR10 image classification') \
    .getOrCreate()

In [None]:
sc = spark.sparkContext
sc

In [None]:
import matplotlib.pyplot as plt

CIFAR-10 is an image dataset. 
It contains 60000 tiny color images with the size of 32 by 32 pixels.
The dataset consists of 10 different classes (i.e. airplane, automobile, bird, cat, deer, dog, frog, horse, ship and truck), in which each of those classes consists of 6000 images.

In [None]:
#Import CIFAR10 dataset
from keras.datasets import cifar10
#  Split data set into training and test dataset
(x_train, y_train), (x_test, y_test) = cifar10.load_data()

The dataset size itself is around 160 MB.<br>
After the code finishes running, the dataset is going to be stored automatically to X_train, y_train, X_test and y_test variables, where the training and testing data itself consist of 50000 and 10000 samples respectively.

Now if we try to print out the shape of training data (X_train.shape), we will get the following output.<br>
(50000, 32, 32, 3)<br>
(number of samples, height, width, color channels)

In [None]:
#Display the dimension and the sample images
print('x_train shape:', x_train.shape)

In [None]:
#Display the dimension and the sample images
print('y_train shape:', y_train.shape)

if we try to print out the value of y_train, it will output labels which are all already encoded into numbers

In [None]:
y_train

Its difficult to interpret those encoded labels, so Lets create a list of actual label names.

In [None]:
# select features into variable 'class_name'
class_names = ['airplane', 'automobile', 'bird', 'cat', 'deer',
               'dog', 'frog', 'horse', 'ship', 'truck']

The code Below tells the computer that we are about to display the first 21 images in the dataset which are divided into 7 columns and 3 rows. <br> The figsize argument is used just to define the size of our figure.<br> set_title() method is used to set the title and display the images using imshow() method.

In [None]:
fig, axes = plt.subplots(ncols=7, nrows=3, figsize=(17, 8))
index = 0
for i in range(3):
    for j in range(7):
        axes[i,j].set_title(class_names[y_train[index][0]])
        axes[i,j].imshow(x_train[index])
        axes[i,j].get_xaxis().set_visible(False)
        axes[i,j].get_yaxis().set_visible(False)
        index += 1
plt.show()

y_train and y_test labels are still in form of a single number ranging from 0 to 9 stored in array.<br>In fact, such labels are not the one that a neural network expect.<br>Instead, all those labels should be in form of one-hot representation.

In [None]:
#Convert images using one hot encoding
from keras.utils import np_utils
import keras
y_train_one_hot = np_utils.to_categorical(y_train, 10)
y_test_one_hot = np_utils.to_categorical(y_test, 10)
print('The one hot label is:', y_train_one_hot[1])

In [None]:
#Convert the images and display as an array
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train = x_train / 255
x_test = x_test / 255
x_train[0]

In [None]:
#Create the convolutional neural network model and display the model
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D
model = Sequential()
model.add(Conv2D(32, (3, 3), activation='relu', padding='same', input_shape=(32,32,3)))
model.add(Conv2D(32, (3, 3), activation='relu', padding='same'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Conv2D(64, (3, 3), activation='relu', padding='same'))
model.add(Conv2D(64, (3, 3), activation='relu', padding='same'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(512, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(10, activation='softmax'))

In [None]:
model.summary()

In [None]:
model.compile(loss='categorical_crossentropy',optimizer='adam',metrics=['accuracy'])

In [None]:
hist = model.fit(x_train, y_train_one_hot, 
           batch_size=32, epochs=2, 
           validation_split=0.2)

In [None]:
#Plot the model loss 
plt.plot(hist.history['loss'])
plt.plot(hist.history['val_loss'])
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Val'], loc='upper right')
plt.show()

In [None]:
#Plot Model accuracy 
plt.plot(hist.history['accuracy'])
plt.plot(hist.history['val_accuracy'])
plt.title('Model accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Val'], loc='lower right')
plt.show()

In [None]:
#Evaluate the accuracy of the model
model.evaluate(x_test, y_test_one_hot)[1]

In [None]:
import numpy as np
# select the image from our test dataset
image_number = 0
 
# display the image
plt.imshow(x_test[image_number])

# load the image in an array
n = np.array(x_test[image_number])
 
# reshape it
p = n.reshape(1, 32, 32, 3)

# pass in the network for prediction and
# save the predicted label
predicted_label = class_names[model.predict(p).argmax()]
 
# load the original label
original_label = class_names[y_test[image_number][0]]
 
# display the result
print("Original label is {} and predicted label is {}".format(
    original_label, predicted_label))

In [None]:
fig, axes = plt.subplots(ncols=7, nrows=3, sharex=False,
    sharey=True, figsize=(17, 8))
index = 0
for i in range(3):
    for j in range(7):
        n = np.array(x_test[index])
        p = n.reshape(1, 32, 32, 3)
        axes[i,j].set_title('actual:' + class_names[y_test[index][0]] + '\n' 
                            + 'predicted:' + class_names[model.predict(p).argmax()])
        axes[i,j].imshow(x_test[index], cmap='gray')
        axes[i,j].get_xaxis().set_visible(False)
        axes[i,j].get_yaxis().set_visible(False)
        index += 1
plt.show()

In [None]:
sc.stop

In [None]:
spark.stop()