<a href="https://colab.research.google.com/github/SamanZargarzadeh/Deep-Learning/blob/main/16_CNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 1) Import

In [None]:
import tensorflow as tf
# check the version
tf.__version__

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, BatchNormalization, Dropout, Conv2D, MaxPooling2D 
from tensorflow.keras import regularizers
from tensorflow.keras.callbacks import Callback
from tensorflow.keras.preprocessing import image

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

# 2) Load Data

In [None]:
fashion_mnist_data = tf.keras.datasets.fashion_mnist
(X_train, y_train), (X_test, y_test) = fashion_mnist_data.load_data()

In [None]:
print("train X shape: ", X_train.shape) # X_train: numpy array with shape: (num_samples_train, num_features)
print("train y shape: ", y_train.shape) # y_train: numpy array with shape: (num_samples_train,) 
print("test X shape: ", X_test.shape)   # X_test: numpy array with shape: (num_samples_test, num_features)
print("test y shape: ", y_test.shape)   # y_test: numpy array with shape: (num_samples_test,)

In [None]:
print("minimum train y value: ", min(y_train))
print("maximum train y value: ", max(y_train))

In [None]:
# Define the labels

labels = [
    'T-shirt/top', # 0
    'Trouser',     # 1
    'Pullover',    # 2
    'Dress',       # 3
    'Coat',        # 4
    'Sandal',      # 5
    'Shirt',       # 6
    'Sneaker',     # 7
    'Bag',         # 8
    'Ankle boot'   # 9
]

In [None]:
# Rescale the image values so that they lie in between 0 and 1.

X_train = X_train / 255.
X_test = X_test / 255.

In [None]:
# Display one of the images

i = 0
img = X_train[i, : , :]
plt.imshow(img)
plt.show()
print(f"label: {labels[y_train[i]]}")

# 3) Build a Convolutional Neural Network (CNN) Model

In [None]:
# Build the Sequential convolutional neural network model

# the input layer with input_shape (28, 28, 1). # it is not RGB (just one channel).
# the first hidden layer (conv1), has 16 filters with a shape of 3x3, with a relu activation function.
# the second hidden layer (pool1), is a max pooling layer, with size 3x3
# the third hidden layer, flatten/unroll to a long vector
# the fourth hidden layer, is a fully connected layer, with 64 units, with a relu activation function.
# the output (final) layer is a multi-class with 10 classes (activation function is softmax).

model = Sequential([
                    Conv2D(filters=16, kernel_size=(3,3), strides=(1,1), padding='valid', activation = 'relu', input_shape=(28,28,1)), # note we need 1 in  input_shape=(28,28,1)
                    MaxPooling2D(pool_size=(3,3), strides=(1,1), padding='valid'),
                    Flatten(),
                    Dense(64, activation = 'relu'),
                    Dense(10, activation='softmax')
])

# alternative: write the kernel size or the pool size as a single integer 
# model = Sequential([
#                     Conv2D(filters=16, kernel_size=3, strides=(1,1), padding='valid', activation = 'relu', input_shape=(28,28,1)), 
#                     MaxPooling2D(pool_size=3, strides=(1,1), padding='valid'),
#                     Flatten(),
#                     Dense(10, activation='softmax')
# ])

In [None]:
model.summary()

The first dimension in all the layers has a value of None, and that is because the first dimension will always be the batch size. And the batch size is flexible and we can ignore it for now.

In [None]:
# number of parameters:
# (3x3x1) x 16 = 160
# 24 x 24 x 16 = 9216
# 9216 x 64 + 64 = 589,888 
# 64 x 10 + 10 = 650

# 4) Start Training (Compile)

Compile the model. Use Adam optimizer and change the learning rate to 0.005. For loss, use 'sparse_categorical_crossentropy' and for metrics use 'SparseCategoricalAccuracy'.

In [None]:
model.compile(
    optimizer = tf.keras.optimizers.Adam(learning_rate = 0.005),
    loss = 'sparse_categorical_crossentropy', 
    metrics = [tf.keras.metrics.SparseCategoricalAccuracy()] 
)

# 5) Model Fit

Calling model.fit returns a TensorFlow history object. This object contains a record of the progress of the network during training in terms of the loss and the metrics that we defined when we compiled the model.

In [None]:
# fit the model

# history = model.fit(X_train, y_train, epochs = 8, batch_size = 256, verbose = 2)

# This will give us error, can you see why?

In [None]:
X_train.shape

In [None]:
X_train[..., np.newaxis].shape

You get an error due to the input shape of the train images. This is due to the fact that in the sequential model you specified a channel dimension. To solve it, just set a dummy channel dimension to the training images. After this, you are set to run the fit method.

In [None]:
history = model.fit(X_train[..., np.newaxis] , y_train, epochs = 8, batch_size = 256, verbose = 2)

In [None]:
# Load the history into a pandas Dataframe

df = pd.DataFrame(history.history)

In [None]:
# Make a plot for the loss

loss_plot = df.plot(y="loss", title = "Loss vs. Epochs", legend=False)
loss_plot.set(xlabel="Epochs", ylabel="Loss")

In [None]:
# Make a plot for the accuracy

accuracy_plot = df.plot(y="sparse_categorical_accuracy", legend=False)
accuracy_plot.set(xlabel="Epochs", ylabel="sparse_categorical_accuracy")

# 6) Model Fit with Validation Sets

In [None]:
# fit the model with validation set

history = model.fit(X_train[..., np.newaxis], y_train, epochs = 8, batch_size = 256, verbose = 2, validation_split=0.2) 
# The 0.2 means that 20 percent of the training data will be held back for validation.

In [None]:
# Plot the training and validation loss

plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Loss vs. epochs')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Training', 'Validation'], loc='upper right')
plt.show()

# 7) Reduce Overfitting

In [None]:
model = Sequential([
                    Conv2D(filters=16, kernel_size=(3,3), strides=(1,1), padding='valid', activation = 'relu', input_shape=(28,28,1)), # note we need 1 in  input_shape=(28,28,1)
                    MaxPooling2D(pool_size=(3,3), strides=(1,1), padding='valid'),
                    Flatten(),
                    BatchNormalization(),
                    Dropout(0.5),
                    Dense(64, activation = 'relu'),
                    BatchNormalization(),
                    Dropout(0.5),
                    Dense(10, activation='softmax')
])

In [None]:
model.summary()

In [None]:
model.compile(
    optimizer = tf.keras.optimizers.Adam(learning_rate = 0.005),
    loss = 'sparse_categorical_crossentropy', 
    metrics = [tf.keras.metrics.SparseCategoricalAccuracy()] 
)

In [None]:
# fit the model with validation set

history = model.fit(X_train[..., np.newaxis], y_train, epochs = 8, batch_size = 256, verbose = 2, validation_split=0.2) 

In [None]:
# Plot the training and validation loss

plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Loss vs. epochs')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Training', 'Validation'], loc='upper right')
plt.show()

# 8) Model Evaluate on Test

In [None]:
test_loss, test_accuracy = model.evaluate(X_test[..., np.newaxis], y_test)

# 9) Model Predict

In [None]:
# Choose a random test image

random_inx = np.random.choice(X_test.shape[0])
X_sample = X_test[random_inx, :]
plt.imshow(X_sample)
plt.show()
print(f"Label: {labels[y_test[random_inx]]}")

In [None]:
# Get the model predictions

model.predict(X_sample[np.newaxis,...,np.newaxis])

In [None]:
# Get the model prediction label

predictions = model.predict(X_sample[np.newaxis,...,np.newaxis])
print(np.argmax(predictions))
print(f"Model prediction:{labels[np.argmax(predictions)]}")