In [1]:
# Import necessary libraries
import numpy as np
import tensorflow as tf
from numpy.random import seed
import matplotlib.pyplot as plt
from prettytable import PrettyTable
from helper import get_data, plot_activation
from tensorflow.keras.models import Sequential
from sklearn.model_selection import train_test_split
from tensorflow.keras.layers import Dense,Conv2D,Dropout,Flatten,MaxPooling2D,AveragePooling2D

# Set random seed
seed(1)
tf.random.set_seed(1)

%matplotlib inline

In [2]:
# Use the helper function get_data to get the train and 
# test MNIST dataset
x_train, y_train, x_test, y_test = get_data()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


In [15]:
# Setting the random seed
seed(1)
tf.random.set_seed(1)

# Function to define the CNN model for MNIST classification
def cnn_model(pool_type="no_pooling"):

  # Intialize a sequential model
  model = Sequential(name=pool_type) 

  # Define the input shape 
  input_shape = (28, 28, 1)

  # Add a convolutional layer with 28 filters, kernel size of 3,
  # input_shape as input_shape defined above and tanh activation
  model.add(Conv2D(28,3,1,padding='valid',input_shape=input_shape,activation='tanh'))

  # Define size of the pooling operation
  pool_size=(3,3)

  # Add an average pooling layer with pool size value as defined 
  # above by pool_size
  if pool_type=="avg_pooling":
    model.add(AveragePooling2D(pool_size,padding='valid'))

  # Add a max pooling layer based with pool size value as defined 
  # above by pool_size
  if pool_type=="max_pooling":
    model.add(MaxPooling2D(pool_size,padding='valid'))

  # Add a flatten layer
  model.add(Flatten())

  # Add a dense layer with ReLU activation with 16 nodes
  model.add(Dense(16,activation='ReLU'))

  # Add a dropout layer with 0.3 as the dropout percentage
  model.add(Dropout(0.3))

  # Add an output layer with 10 nodes and softmax activation
  model.add(Dense(10,activation='softmax'))

  # Compile the model with adam optimizer, 
  # sparse_categorical_crossentropy as the loss 
  # and accuracy as the metric
  opt = tf.keras.optimizers.Adam(learning_rate=0.001)
  model.compile(optimizer=opt,loss='sparse_categorical_crossentropy',metrics=['accuracy'])
  
  # Fit the model on the train data with 8 epochs
  model.fit(x_train , y_train , epochs= 8, verbose=0, 
            shuffle=False, workers=0, use_multiprocessing=False)

  return model



In [16]:
### edTest(test_no_pool) ###
# Call the cnn_model function with pool_type as no_pooling 
# to get the trained model without pooling
model = cnn_model(pool_type="no_pooling")

# Evaluate on the test data
no_pool_acc = model.evaluate(x_test, y_test)
print("The accuracy of the model with no pooling is", no_pool_acc[1])

# Get the number of parameters of the network
no_pool_params = model.count_params()


The accuracy of the model with no pooling is 0.8772000074386597


In [17]:
### edTest(test_avg_pool) ###
# Call the cnn_model function with pool_type as avg_pooling 
# to get the trained model with avg pooling
model = cnn_model(pool_type="avg_pooling")

# Evaluate on the test data
avg_pool_acc = model.evaluate(x_test, y_test)
print("The accuracy of the model with average pooling is", avg_pool_acc[1])

# Get the number of parameters of the network
avg_pool_params = model.count_params()


The accuracy of the model with average pooling is 0.9028000235557556


In [18]:
### edTest(test_max_pool) ###
# Call the cnn_model function with pool_type as max_pooling 
# to get the trained model with max pooling
model = cnn_model(pool_type="max_pooling")

# Evaluate on the test data
max_pool_acc = model.evaluate(x_test, y_test)
print("The accuracy of the model with max pooling is", max_pool_acc[1])

# Get the number of parameters of the network
max_pool_params = model.count_params()


The accuracy of the model with max pooling is 0.9531999826431274


### ⏸ Based on the results seen here, which of the following is the most true?

#### A. The average pooling provides no advantage over no pooling models.
#### B. The no pooling model is more robust and reliable for all datasets.
#### C. The max pooling and average pooling though have lower number of parameters takes longer time to train than the no pooling model.
#### D. The max pooling model performs better as MNIST is made up of mostly edges and high contrasts which provide for max pooling to easily identify the sharp edges.

In [19]:
### edTest(test_chow1) ###
# Submit an answer choice as a string below (eg. if you choose option C, put 'C')
answer1 = 'D'

In [20]:
### edTest(test_accuracy) ###
# Display the models with their accuracy score and parameters 
table = PrettyTable()

table.field_names = ["Model Type", "Test Accuracy", "Test Loss", "Number of Parameters"]
table.add_row(["Without pooling", round(no_pool_acc[1],4), round(no_pool_acc[0],4), no_pool_params])
table.add_row(["With avg pooling", round(avg_pool_acc[1],4), round(avg_pool_acc[0],4), avg_pool_params])
table.add_row(["With max pooling", round(max_pool_acc[1],4), round(max_pool_acc[0],4), max_pool_params])
print(table)

+------------------+---------------+-----------+----------------------+
|    Model Type    | Test Accuracy | Test Loss | Number of Parameters |
+------------------+---------------+-----------+----------------------+
| Without pooling  |     0.8772    |   0.4364  |        303314        |
| With avg pooling |     0.9028    |   0.3424  |        29138         |
| With max pooling |     0.9532    |   0.1654  |        29138         |
+------------------+---------------+-----------+----------------------+


### ⏸ How does the accuracy and loss of the model vary by increasing the pool_size to (5x5)? Why does this happen?

In [14]:
### edTest(test_chow2) ###

# Type your answer within in the quotes given
answer2 = 'The accuracy decreasing because we are eliminating too many features. There are only 28*28\
            and we are reducing it by nearly 1/5 everytime. '