## ![](https://ga-dash.s3.amazonaws.com/production/assets/logo-9f88ae6c9c3871690e33280fcf557f33.png) Convolutional Neural Networks I

### LEARNING OBJECTIVES
_By the end of this lesson, students should be able to:_
- Build convolutional neural networks in Keras.

We'll recreate a very similar neural network to the example provided at the end of the notes.

In [1]:
# 1. Import libraries and modules
import numpy as np
np.random.seed(123)  # for reproducibility

from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Convolution2D, MaxPooling2D
from keras.utils import np_utils
from keras.datasets import mnist
 
# 2. Load pre-shuffled MNIST data into train and test sets
(X_train, y_train), (X_test, y_test) = mnist.load_data()

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.
  return f(*args, **kwds)


Couldn't import dot_parser, loading of dot files will not be possible.


In [3]:
X_train[0:5]

array([[[0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        ...,
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0]],

       [[0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        ...,
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0]],

       [[0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        ...,
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0]],

       [[0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        ...,
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0]],

       [[0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        ...,
        [0, 0, 0, ..., 0, 0, 0],
   

In [4]:
X_train[0][5]

array([  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   3,
        18,  18,  18, 126, 136, 175,  26, 166, 255, 247, 127,   0,   0,
         0,   0], dtype=uint8)

In [8]:
X_train.shape

(60000, 28, 28)

In [9]:
# 3. Preprocess our input data.

# Reshape our NumPy array to be "friendlier." (last dim = 1 b/c grayscale)
X_train = X_train.reshape(X_train.shape[0], 28, 28, 1)
X_test = X_test.reshape(X_test.shape[0], 28, 28, 1)

# Convert dtype from int to float.
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')

# Change range from 0-255 to 0-1.
X_train /= 255
X_test /= 255

In [10]:
# 4. Preprocess our Y data.

y_train[0:10]

array([5, 0, 4, 1, 9, 2, 1, 3, 1, 4], dtype=uint8)

In [11]:
Y_train = np_utils.to_categorical(y_train, 10)
Y_test = np_utils.to_categorical(y_test, 10)

In [12]:
Y_train[0:10]

array([[0., 0., 0., 0., 0., 1., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 1.],
       [0., 0., 1., 0., 0., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.]], dtype=float32)

In [13]:
# 5. Define model architecture.

model = Sequential() # Same way we've instantiated NNs before.

model.add(Convolution2D(filters = 6,         # I specify 6 filters.
                        kernel_size = 3,     # means a 3x3 filter
                        activation = 'relu', # Rectified Linear Unit activation
                        input_shape = (28, 28, 1) # specify input dims
                        ))

model.add(MaxPooling2D(pool_size = 2)) # 2x2 filter for MaxPooling
     # by default, the stride will be set such that the regions
     # do not overlap

model.add(Convolution2D(filters = 16,
                        kernel_size = 3,
                        activation = 'relu'))

model.add(MaxPooling2D(pool_size = (2,2)))

model.add(Dropout(0.25))

model.add(Flatten()) ## 400 outputs

model.add(Dense(128, activation = 'relu')) 
## densely connect 400 nodes to 128 nodes

## 400 nodes * 128 nodes = 51,200 weights
## 128 nodes * 1 bias per node = 128 biases

## Grand Total: 51,200 + 128 = 51,328 parameters we need to learn.

model.add(Dropout(0.5))

model.add(Dense(10, activation = 'softmax'))

In [14]:
# 6. Compile the model.

model.compile(loss = 'categorical_crossentropy',
# Categorical cross-entropy is common for unordered discrete predictions.
              optimizer = 'adam',
# Adaptive Moment Estimation, "sophisticated gradient descent"
              metrics = ['accuracy'])

In [15]:
# 7. Fit the model on the training data.

model.fit(X_train,
          Y_train,
          batch_size = 32,
          epochs = 10,
          verbose = 1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x107cddeb8>

In [16]:
# 8. Evaluate model on test data.

score = model.evaluate(X_test, Y_test, verbose = 1)
labels = model.metrics_names



In [17]:
print(str(labels[0]) + ": " + str(score[0]))
print(str(labels[1]) + ": " + str(score[1]))

loss: 0.033042028610457054
acc: 0.9897


In [19]:
1 - score[1]

0.010299999999999976

In [20]:
(score[1]) ** 5

0.9495500288895132

- 3x3 filters --> 9 parameters per filter
- 6 filters --> 54 parameters across all filters
- 6 filters x 1 bias parameter per filter --> 6 bias parameters
    - Total number of parameters: 60


- 3x3 filters --> 9 parameters per filter
- 16 filters --> 16 x 9 = 144 parameters
- 144 parameters x 6 channels from the previous layer --> 864 params
- 16 filters x 1 bias parameter per filter --> 16 params
    - 864 + 16 = 880

input: 26 x 26 x 1 = 676

676 densely connected to 128...

676 * 128 = 86528

In [22]:
26 * 26 

676

In [23]:
676 * 128

86528

In [21]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 26, 26, 6)         60        
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 13, 13, 6)         0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 11, 11, 16)        880       
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 5, 5, 16)          0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 5, 5, 16)          0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 400)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 128)               51328     
__________

## Conclusion

<details><summary>Why are neural networks better equipped to handle image data than non-neural networks?
</summary>
```
Neural networks are naturally set up to consider interactions among features.
```
</details>

<details><summary>Why are **convolutional neural networks** better equipped to handle image data than non-CNNs?
</summary>
```
CNNs are naturally set up to consider interactions among "close pixels" only and drastically cuts down the number of parameters needed to learn through parameter sharing.
```
</details>

