In [1]:
import tensorflow as tf
print(tf.__version__)

import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.layers import Input, Conv2D, Dense, Flatten, Dropout, GlobalMaxPooling2D, MaxPooling2D, BatchNormalization
from tensorflow.keras.models import Model

2.3.0


In [2]:
# Load in the data
cifar10 = tf.keras.datasets.cifar10

(x_train, y_train), (x_test, y_test) = cifar10.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0
y_train, y_test = y_train.flatten(), y_test.flatten()
print("x_train.shape:", x_train.shape)
print("y_train.shape", y_train.shape)

Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz
x_train.shape: (50000, 32, 32, 3)
y_train.shape (50000,)


In [3]:
# number of classes
K = len(set(y_train))
print("number of classes:", K)

number of classes: 10


In [4]:
# Build the model using the functional API ( the commented out sections are the old version of CIFAR model)
i = Input(shape=x_train[0].shape)

#removing the striding and using maxpooling2D works better for smaller images , this is different from the VGG as they use 5 groups of conv layers with multiple conv inside each 

# x = Conv2D(32, (3, 3), strides=2, activation='relu')(i)
# x = Conv2D(64, (3, 3), strides=2, activation='relu')(x)
# x = Conv2D(128, (3, 3), strides=2, activation='relu')(x)

#the padding is used because without it the image would shrink after each conv which will make the image too small for it work with so many conv layers
x = Conv2D(32, (3, 3), activation='relu', padding='same')(i)
#Adding a batch normalization that will help with overfitting
x = BatchNormalization()(x)

x = Conv2D(32, (3, 3), activation='relu', padding='same')(x)
x = BatchNormalization()(x)

x = MaxPooling2D((2, 2))(x)
#tried using dropout to see what would happen to results but it could break the pattern for the image recognition
# x = Dropout(0.2)(x)

x = Conv2D(64, (3, 3), activation='relu', padding='same')(x)
x = BatchNormalization()(x)

x = Conv2D(64, (3, 3), activation='relu', padding='same')(x)
x = BatchNormalization()(x)

x = MaxPooling2D((2, 2))(x)
# x = Dropout(0.2)(x)

x = Conv2D(128, (3, 3), activation='relu', padding='same')(x)
x = BatchNormalization()(x)

x = Conv2D(128, (3, 3), activation='relu', padding='same')(x)
x = BatchNormalization()(x)

x = MaxPooling2D((2, 2))(x)

# x = Dropout(0.2)(x)
# x = GlobalMaxPooling2D()(x)

#this is the same as the last one 
x = Flatten()(x)
x = Dropout(0.2)(x)
x = Dense(1024, activation='relu')(x)
x = Dropout(0.2)(x)
x = Dense(K, activation='softmax')(x)

model = Model(i, x)

In [5]:
# Compile
#make sure you are using the GPU runtime as this will take a long time
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

In [6]:
# Fit
#r = model.fit(x_train, y_train, validation_data=(x_test, y_test), epochs=50)

In [None]:
# Fit with data augmentation
#RESET the run times and only run this part to see results with no trained weights

#otherwise it will continue to train with the weights declared (This can be used for fine tuning)
#run this after the code above to see the difference though with fine tuning 

batch_size = 32

#this is used to move the images around to train it better 
data_generator = tf.keras.preprocessing.image.ImageDataGenerator(width_shift_range=0.1, height_shift_range=0.1, horizontal_flip=True)

train_generator = data_generator.flow(x_train, y_train, batch_size)

steps_per_epoch = x_train.shape[0] // batch_size


r = model.fit(train_generator, validation_data=(x_test, y_test), steps_per_epoch=steps_per_epoch, epochs=50)

#the higher the val accuracy means less likely to overfit 

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50

In [None]:
# Plot loss per iteration
import matplotlib.pyplot as plt
plt.plot(r.history['loss'], label='loss')
plt.plot(r.history['val_loss'], label='val_loss')
plt.legend()

In [None]:
# Plot accuracy per iteration
plt.plot(r.history['accuracy'], label='acc')
plt.plot(r.history['val_accuracy'], label='val_acc')
plt.legend()

In [None]:
# Plot confusion matrix
from sklearn.metrics import confusion_matrix
import itertools

def plot_confusion_matrix(cm, classes,
                          normalize=False,
                          title='Confusion matrix',
                          cmap=plt.cm.Blues):
  """
  This function prints and plots the confusion matrix.
  Normalization can be applied by setting `normalize=True`.
  """
  if normalize:
      cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
      print("Normalized confusion matrix")
  else:
      print('Confusion matrix, without normalization')

  print(cm)

  plt.imshow(cm, interpolation='nearest', cmap=cmap)
  plt.title(title)
  plt.colorbar()
  tick_marks = np.arange(len(classes))
  plt.xticks(tick_marks, classes, rotation=45)
  plt.yticks(tick_marks, classes)

  fmt = '.2f' if normalize else 'd'
  thresh = cm.max() / 2.
  for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
      plt.text(j, i, format(cm[i, j], fmt),
               horizontalalignment="center",
               color="white" if cm[i, j] > thresh else "black")

  plt.tight_layout()
  plt.ylabel('True label')
  plt.xlabel('Predicted label')
  plt.show()


p_test = model.predict(x_test).argmax(axis=1)
cm = confusion_matrix(y_test, p_test)
plot_confusion_matrix(cm, list(range(10)))

In [None]:
# label mapping
labels = '''Moth
Noise'''.split()

In [None]:
# Show misclassified examples
misclassified_idx = np.where(p_test != y_test)[0]
i = np.random.choice(misclassified_idx)
plt.imshow(x_test[i], cmap='gray')
plt.title("True label: %s Predicted: %s" % (labels[y_test[i]], labels[p_test[i]]));

In [None]:
# Show classified examples
classified = np.where(p_test == y_test)[0]
i = np.random.choice(classified)
plt.imshow(x_test[i], cmap='gray')
plt.title("True label: %s Predicted: %s" % (labels[y_test[i]], labels[p_test[i]]));

In [None]:
# Since the model that was created was quite large, it is useful to summarise the model
model.summary()

# **EPOCH**

---


**Orginial CIFAR EPOCH**

![Normal](https://drive.google.com/uc?id=1yLGON7U7jT_2ByI9i7hajq-k5dWogS8H)

(loss 0.8424 - Acc 0.7012 - val_loss 0.8657 - val_acc 0.7003) 


---


**Without Batch normalisation but different model (Data augmentation)**

![alt text](https://drive.google.com/uc?id=1RMSnQpBGccHP0GIATQdk6qbkduCWg0mo)

(loss 0.0427 - Acc 0.9867 - val_loss 0.8767 - val_acc 0.8394) 

---

**With only batch normalisation (Data Augmentation)**

![alt text](https://drive.google.com/uc?id=1paJ2_IY4TgyBYPI5ZP-pSqBvJTnbYqmt)

(loss 0.1837 - Acc 0.9376 - val_loss 0.4224 - val_acc 0.8755) 

---


**Fine tuning (First model run and the batch normalisation)**

![alt text](https://drive.google.com/uc?id=1wn6ArKLGkH7BqbCTL_g6QlYdpAkllo9j)

(loss 0.1608 - Acc 0.9461 - val_loss 0.4111 - val_acc 0.8845) 

---

**Conclusion** 
While the  new model without batch norm has a high accuracy, it is still not as good as the model using only batch which has slighlty less accuracy but much better val loss and val acc, and this can be improved on by using both models.fit






# **LOSS**

**OLD CIFAR**

![alt text](https://drive.google.com/uc?id=1z2Z51-MVcljClnyMbafSIy6VBXAU_ETx)


---

**NEW MODEL WITH ONLY BATCH NORM**

![alt text](https://drive.google.com/uc?id=1x5hSEz7Uqd5dnuBa5heTWEnUOMil_a_z)

---

**NEW MODEL WITH FINE TUNING**


![alt text](https://drive.google.com/uc?id=1JLSQeTQPfMkiF735uO9G_FrLKpZZf1UK)


---




# ACCURACY

**OLD CIFAR**

![alt text](https://drive.google.com/uc?id=1ozzwbVT8a2Q9TwZX5Jkwdf6Sv3q_zY2l)

---

**NEW MODEL WITH BATCH NORM**


![alt text](https://drive.google.com/uc?id=1UYkVvIqIVFPJxi_23G7lV0TmGCLXQd3W)



---

**FINE TUNING **

![alt text](https://drive.google.com/uc?id=13_rFmDyWy7ii0U1auvWmzAPls4daoOnt)

# SCATTER MATRIX


**OLD CIFAR MODEL**

![alt text](https://drive.google.com/uc?id=1Jp41TFsVkcceeCVcvt14gKQ-RXxpst48)

---

**NEW MODEL WITH ONLY BATCH NORM**

![alt text](https://drive.google.com/uc?id=1-CVht8Na8w4gssWerLHK_asa2y1vsAII)

---

**FINE TUNING**

![alt text](https://drive.google.com/uc?id=12N2S6yalNJcqgQdVVHDFYLcysw9m4dzb)


In [None]:
#THE CONFUSION MATRIX SHOWS HOW THE NUMBERS ARE MUCH LESS THAN THE ORGINAL MEANING THERE WILL BE LESS ERRORS AND BETTER RESULTS