In [13]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
plt.rcParams["savefig.dpi"] = 300
plt.rcParams["savefig.bbox"] = "tight"

# Make numpy values easier to read.
np.set_printoptions(precision=3, suppress=True)

import tensorflow as tf
from tensorflow.keras.utils import to_categorical
from tensorflow.keras import layers, models, datasets
from tensorflow.keras.layers.experimental import preprocessing


# visualization tools
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns

## Classification Example - MNIST

In [None]:
# mnist = tf.keras.datasets.mnist

In [None]:
(x_train, y_train),(x_test, y_test) = datasets.mnist.load_data()

# rescale the images from [0, 255] to the [0.0, 0.1] range
x_train, x_test = x_train / 255.0, x_test / 255.0

print("Number of original training examples:", len(x_train))
print("Number of original test examples:", len(x_test))

In [None]:
# check the first 10 examples
plt.figure(figsize=(10,10))
for i in range(10):
    plt.subplot(5,5,i+1)
    plt.xticks([])
    plt.yticks([])
    plt.grid(False)
    plt.imshow(x_train[i])
    # The CIFAR labels happen to be arrays, 
    # which is why you need the extra index
#     plt.xlabel(class_names[train_labels[i][0]])
plt.show()

#### Convolutional Neural Networks

<img src="img/lab_10_cnn.png"/>

A **Convolutional Neural Network** (ConvNet/CNN) is a Deep Learning algorithm which can take in an input image, assign importance (learnable weights and biases) to various aspects/objects in the image and be able to differentiate one from the other.


The architecture of a ConvNet is analogous to that of the connectivity pattern of Neurons in the Human Brain and was inspired by the organization of the Visual Cortex. Individual neurons respond to stimuli only in a restricted region of the visual field known as the Receptive Field. A collection of such fields overlap to cover the entire visual area.

## Image as data
Sofar we've seen that everytime we work with ML models, we need to have the input data as vectors. There’s almost no ML model where vectors aren’t used at some point in the project lifecycle. Machines can’t read text or look at images like you and me. They need input to be transformed or encoded into numbers. 

Please be aware that the a strictly mathematical definition of vectors can fail to convey all the information you need to work with and understand vectors in an ML context like this:

<img src="img/lab_10_img_as_data_.png"/>

so for MNIST data, we turned a (28, 28) image to a vector of size (748,).

An image is nothing but a matrix of pixel values. In last session, we just flattened them in order to train a sequential NN on them.

In [None]:
x_train[0].flatten().shape

In [None]:
print(x_train[0].shape, x_train[0].flatten().shape)

a = np.expand_dims(x_train[0].flatten(), axis=0)  # or axis=1
plt.imshow(a)
plt.show()

## Why CNN as oppose to good old sequential models?
You can always feed the flatten images to a sequential model, however a ConvNet is able to **successfully capture the Spatial and Temporal dependencies** in an image through the application of relevant filters. 

In the figure, we have an RGB image which has been separated by its three color planes — Red, Green, and Blue.

<img src="img/lab_10_rgb.png"/>

You can imagine how computationally intensive things would get once the images reach dimensions, say 8K (7680×4320). The role of the ConvNet is to reduce the images into a form which is easier to process, without losing features which are critical for getting a good prediction. This is important when we are to design an architecture which is not only good at learning features but also is scalable to massive datasets.

The architecture performs a better fitting to the image dataset due to the reduction in the number of parameters involved and reusability of weights. In other words, the network can be trained to understand the sophistication of the image better.


# Download and prepare the MNIST data

In [2]:
(x_train, y_train),(x_test, y_test) = datasets.mnist.load_data()

# rescale the images from [0, 255] to the [0.0, 0.1] range
x_train, x_test = x_train / 255.0, x_test / 255.0

In [3]:
x_train[0].shape

(28, 28)

In [5]:
# reshape dataset to have a single channel
x_train = x_train.reshape((x_train.shape[0], 28, 28, 1))
x_test = x_test.reshape((x_test.shape[0], 28, 28, 1))

In [11]:
x_test.shape

(10000, 28, 28, 1)

In [17]:
# one hot encode target values
y_train = to_categorical(y_train, 10)
y_test = to_categorical(y_test, 10)

# Create the convolutional base

The 6 lines of code below define the convolutional base using a common pattern: a stack of Conv2D and MaxPooling2D layers.

As input, a CNN takes tensors of shape (image_height, image_width, color_channels), ignoring the batch size. If you are new to these dimensions, color_channels refers to (R,G,B). In this example, you will configure your CNN to process inputs of shape (32, 32, 3), which is the format of CIFAR images. You can do this by passing the argument input_shape to your first layer.

In [None]:
model = models.Sequential()
model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(32, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))

In [6]:
model = models.Sequential([
    layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(32, (3, 3), activation='relu'),
    layers.MaxPooling2D(pool_size=(2, 2))
])

2021-10-28 16:21:34.172658: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [7]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 26, 26, 32)        320       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 13, 13, 32)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 11, 11, 32)        9248      
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 5, 5, 32)          0         
Total params: 9,568
Trainable params: 9,568
Non-trainable params: 0
_________________________________________________________________


you can see that the output of every Conv2D and MaxPooling2D layer is a 3D tensor of shape (height, width, channels). The width and height dimensions tend to shrink as you go deeper in the network. The number of output channels for each Conv2D layer is controlled by the first argument (e.g., 32 or 64). Typically, as the width and height shrink, you can afford (computationally) to add more output channels in each Conv2D layer.

----

To complete the model, you will feed the last output tensor from the convolutional base (of shape (4, 4, 64)) into one or more Dense layers to perform classification. Dense layers take vectors as input (which are 1D), while the current output is a 3D tensor. First, you will flatten (or unroll) the 3D output to 1D, then add one or more Dense layers on top. CIFAR has 10 output classes, so you use a final Dense layer with 10 outputs.

In [8]:
model.add(layers.Flatten())
model.add(layers.Dense(128, activation='relu'))
model.add(layers.Dense(10, activation='softmax'))

In [9]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 26, 26, 32)        320       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 13, 13, 32)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 11, 11, 32)        9248      
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 5, 5, 32)          0         
_________________________________________________________________
flatten (Flatten)            (None, 800)               0         
_________________________________________________________________
dense (Dense)                (None, 128)               102528    
_________________________________________________________________
dense_1 (Dense)              (None, 10)                1

# Compile and train the model

In [18]:
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

history = model.fit(x_train, y_train, epochs=10, 
                    validation_data=(x_test, y_test))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


# Evaluate the model

In [None]:
plt.plot(history.history['accuracy'], label='accuracy')
plt.plot(history.history['val_accuracy'], label = 'val_accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.ylim([0.5, 1])
plt.legend(loc='lower right')

test_loss, test_acc = model.evaluate(x_test, y_test, verbose=2)

### visualize layers

In [None]:
layer1 = model_cnn.layers[0]

In [None]:
weights, biases = layer1.get_weights()

In [None]:
weights.shape

Using a filter smaller than the input is intentional as it allows the same filter (set of weights) to be multiplied by the input array multiple times at different points on the input. Specifically, the filter is applied systematically to each overlapping part or filter-sized patch of the input data, left to right, top to bottom.

In [None]:
fig, axes = plt.subplots(6, 6)
for ax, weight in zip(axes.ravel(), weights.T):
    ax.imshow(weight[0, :, :])

In [None]:
from tensorflow.keras import backend as K

# with a Sequential model
get_1rd_layer_output = K.function([model_cnn.layers[0].input],
                                  [model_cnn.layers[0].output])
get_3rd_layer_output = K.function([model_cnn.layers[0].input],
                                  [model_cnn.layers[3].output])

layer1_output = get_1rd_layer_output([x_train_reshaped[:5]])[0]
layer3_output = get_3rd_layer_output([x_train_reshaped[:5]])[0]

In [None]:
layer1_output.shape

In [None]:
weights, biases = model_cnn.layers[0].get_weights()
n_images = layer1_output.shape[0]
n_filters = layer1_output.shape[3]
# n_filters = 5



fig, axes = plt.subplots(n_images * 2, n_filters + 1, figsize=(20, 16), subplot_kw={'xticks': (), 'yticks': ()})
for i in range(n_images):
    # for reach input image (= 2 rows)
    axes[2 * i, 0].imshow(x_train_reshaped[i, :, :, 0], cmap="gray_r")
    axes[2 * i + 1, 0].set_visible(False)
    axes[2 * i, 1].set_ylabel("layer1")
    axes[2 * i + 1, 1].set_ylabel("layer3")
    for j in range(n_filters):
        # for each feature map (same number in layer 1 and 3)
        axes[2 * i, j + 1].imshow(layer1_output[i, :, :, j], cmap='gray_r')
        axes[2 * i + 1, j + 1].imshow(layer3_output[i, :, :, j], cmap='gray_r')

# Batch Normalization

In [None]:
# Create the convolutional base

# extra convolutional layer
from tensorflow.keras.layers import  BatchNormalization


model_cnn_bn = tf.keras.models.Sequential([
    Conv2D(32, (3, 3), 
           activation='relu', 
           input_shape=(28, 28, 1)),
    BatchNormalization(),
    MaxPooling2D((2, 2)),
    Conv2D(32, (3, 3), activation='relu'),
    BatchNormalization(),
    MaxPooling2D(pool_size=(2, 2)),
    Flatten(),
    Dense(128, activation='relu'),
    Dense(10, activation='softmax')    
])

model_cnn_bn.summary()

In [None]:
# Compile and train the model
model_cnn_bn.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])


history_cnn_bn = model_cnn_bn.fit(x=x_train_reshaped, 
                             y=y_train_encoded, 
                             epochs=5,
                             validation_split=0.2)

In [None]:
model_cnn_bn.evaluate(x_test_reshaped, y_test_encoded)

In [None]:
y_pred_cnn_bn = model_cnn_bn.predict(x_test_reshaped, batch_size=64, 
                               verbose=1)
y_pred_bool_cnn_bn = np.argmax(y_pred_cnn_bn, axis=1)

print(classification_report(y_test, y_pred_bool_cnn_bn))

In [None]:
# summarize history for accuracy
df_result = pd.DataFrame(history_cnn_bn.history)
df_result[['accuracy', 'val_accuracy']].plot()
plt.ylabel("accuracy")
plt.ylim(.9, 1)

In [None]:
# Transfer learning with CNN