# CNN From Scratch

In [1]:
import numpy as np
from torchvision.datasets import MNIST
import matplotlib.pyplot as plt 

In [2]:
# Prepare the MNIST dataset by splitting into train and test set

trainset = MNIST(root='Desktop', train=True, download=True)
testset  = MNIST(root='Desktop', train=False, download=True)

trainset_array = trainset.data.numpy()
testset_array  = testset.data.numpy()
y_train_array = trainset.targets.numpy()
y_test_array  = testset.targets.numpy()

print('Shape of the train set : ', trainset_array.shape)
print('Shape of the test set : ', testset_array.shape)

Shape of the train set :  (60000, 28, 28)
Shape of the test set :  (10000, 28, 28)


In [3]:
class Conv_layer:

    def __init__(self ,filter_size ,num_filters):

        self.num_filters = num_filters
        self.filter_size = filter_size
        self.filters = np.random.randn(num_filters, self.filter_size, self.filter_size) / self.filter_size**2
    
    def image_region(self, image):
        '''
        Generates all possible 3x3 image regions using valid padding.
        
        - image : numpy array
        '''
        h, w = image.shape
        self.image = image
        for i in range(h - self.filter_size + 1):
            for j in range(w - self.filter_size + 1):
                image_patch = image[i : (i + self.filter_size), j : (j + self.filter_size)]
                yield image_patch, i, j
        
    def forward(self, image):
        
        '''
    
        Forward pass through the conv layer.
        
        - input : numpy array
        
        '''
        
        h, w = image.shape
        conv_output = np.zeros((h - self.filter_size + 1, w - self.filter_size + 1, self.num_filters))
        for image_patch, i, j in self.image_region(image): 
            conv_output[i, j] = np.sum(image_patch*self.filters , axis=(1,2))
        return conv_output

    def backprop(self, dL_dout, lr):
        
        '''
        
        Performs a backward pass of the conv layer.
        
        - dL_dout loss gradient backpropagated from the maxpool layer.
        - lr : learning rate.
        
        '''
        dL_dFilter = np.zeros(self.filters.shape)
        for image_patch, i, j in self.image_region(self.image):
            for k in range(self.num_filters):
                dL_dFilter[k] += image_patch * dL_dout[i, j, k]
        self.filters -= lr*dL_dFilter
        return dL_dFilter
        

In [4]:
class Maxpool :
    
    def __init__(self, filter_size):
        
        self.filter_size = filter_size
    
    def image_region(self, image):
        
        '''
        Returns non overlapping image regions
        
        - image : numpy array.
        
        '''
        self.image = image
        h = image.shape[0] // self.filter_size
        w = image.shape[1] // self.filter_size
        
        for i in range(h):
            for j in range(w):
                image_patch = image[(i * self.filter_size) : (i * self.filter_size + self.filter_size), 
                                            (j * self.filter_size) : (j * self.filter_size + self.filter_size)]
                yield image_patch, i, j
    
    def forward(self, image):
        
        '''
        
        Performs the forward pass and the maxpool operation.
        
        - image : numpy array.
        
        '''
        
        h, w, num_filters = image.shape
        maxpool_output = np.zeros((h // self.filter_size, w // self.filter_size, num_filters))
        
        for image_patch, i, j in self.image_region(image):
            maxpool_output[i, j] = np.amax(image_patch, axis = (0,1))
        return maxpool_output
    
    def backprop(self, dL_dout):
        
        '''
        
        Performs a backward pass of the maxpool layer.
        Returns the loss gradient for this layer's inputs.
        
        - dL_dout : loss gradient backpropagated from the Softmax layer.
        
        '''
        dL_dMaxpool = np.zeros(self.image.shape)
        for image_patch, i, j in self.image_region(self.image):
            
            h, w, num_filters = image_patch.shape
            max_val = np.amax(image_patch, axis = (0,1))
            
            for ii in range(h):
                for jj in range(w):
                    for kk in range(num_filters):
                        if image_patch[ii, jj, kk] == max_val[kk]:
                            dL_dMaxpool[(i * self.filter_size + ii), (j * self.filter_size + jj), kk] = dL_dout[i, j, kk]
        
        return dL_dMaxpool
        

In [5]:
class Softmax:
    
    # fully connected layer with Softmax activation
    
    def __init__(self, input_node, softmax_node):
        
        self.weights = np.random.randn(input_node, softmax_node)/input_node
        self.biaises = np.random.randn(softmax_node)
        
    def forward(self, image): 
        '''
        
        Forward pass of the image through the Softmax layer.
        Returns a 1d numpy array containing the respective probability values.
        
        - image : numpy array.
        '''
        
        self.orig_img_shape = image.shape
        image_flat          = image.flatten()
        self.flat_input     = image_flat
        output_val          = np.dot(image_flat,self.weights) + self.biaises
        self.out            = output_val
        exp_output          = np.exp(output_val)
        return exp_output / np.sum(exp_output, axis =0)
    
    def backprop(self, dL_dout, lr):
        
        '''
        
        Performs a backward pass of the softmax layer.
        Returns the loss gradient for this layer's inputs.
        
        - dL_dout : loss gradient.
        - lr      : learning-rate
        
        '''
        
        for i, grad in enumerate(dL_dout):
            if grad == 0:
                continue
            
            exp_z      = np.exp(self.out)
            S          = np.sum(exp_z)
            
            dout_dz    = - exp_z[i] * exp_z / S**2
            dout_dz[i] = exp_z[i] * (S - exp_z[i]) / S**2
            
            dz_dw      = self.flat_input
            dz_db      = 1
            dz_dinp    = self.weights
            
            dL_dz      = grad * dout_dz
            dL_dw      = dz_dw[np.newaxis].T @ dL_dz[np.newaxis]
            dL_db      = dL_dz * dz_db
            dL_dinp    = dz_dinp @ dL_dz
            
            self.weights -= lr * dL_dw
            self.biaises -= lr * dL_db
            
            return dL_dinp.reshape(self.orig_img_shape)
        
            
 

In [258]:
%%time

conv = Conv_layer(8, 3)
pool = Maxpool(2)
softmax = Softmax(10 * 10 * 3, 10)


def cnn_forward(images, labels):
    '''
      Forward pass through the network, returns the output of the network, cross-entropy loss
      and the accuracy.
      
      - images : 2d numpy array
      - labels : digit  
    '''
    output  = conv.forward((images / 255) - 0.5)
    output  = pool.forward(output)
    output  = softmax.forward(output)
    
    cross_ent = - np.log(output[labels])
    accuracy  = 1 if np.argmax(output) == labels else 0
    
    return output, cross_ent, accuracy

def train_cnn(images, labels, lr):
    '''
      Completes a full training step on the given image and label.
      Returns the cross-entropy loss and accuracy.
      
      - images : 2d numpy array
      - labels : digit
      - lr     : learning rate
  '''
    
    output, loss, acc = cnn_forward(images, labels)
    
    gradient = np.zeros(10)
    
    gradient[labels] = -1 / output[labels]
    
    gradback = softmax.backprop(gradient, lr)
    gradback = pool.backprop(gradback)
    gradback = conv.backprop(gradback, lr)
    
    return loss, acc

print('MNIST CNN initialized!')

# Train the CNN for 3 epochs
for epoch in range(3):
    print('--- Epoch %d ---' % (epoch + 1))

  # Shuffle the training data
    permutation = np.random.permutation(len(trainset_array))
    train_images = trainset_array[permutation]
    train_labels = y_train_array[permutation]

  # Train!
    loss = 0
    num_correct = 0
    for i, (im, label) in enumerate(zip(train_images, train_labels)):
        if i > 0 and i % 10000 == 9999:
            print(
            '[Step %d] Past 10000 steps: Average Loss %.3f | Accuracy: %d%%' %
                (i + 1, loss / 10000, num_correct/100)
                  )
            loss = 0
            num_correct = 0

        l, acc = train_cnn(im, label, 0.005)
        loss += l
        num_correct += acc

# Test the CNN
print('\n--- Testing the CNN ---')
loss = 0
num_correct = 0

for im, label in zip(testset_array, y_test_array):
    _, l, acc = cnn_forward(im, label)
    loss += l
    num_correct += acc

num_tests = len(y_test_array)
print('Test Loss:', loss / num_tests)
print('Test Accuracy:', num_correct / num_tests)


MNIST CNN initialized!
--- Epoch 1 ---
[Step 10000] Past 10000 steps: Average Loss 0.508 | Accuracy: 84%
[Step 20000] Past 10000 steps: Average Loss 0.281 | Accuracy: 91%
[Step 30000] Past 10000 steps: Average Loss 0.235 | Accuracy: 93%
[Step 40000] Past 10000 steps: Average Loss 0.199 | Accuracy: 94%
[Step 50000] Past 10000 steps: Average Loss 0.192 | Accuracy: 94%
[Step 60000] Past 10000 steps: Average Loss 0.194 | Accuracy: 94%
--- Epoch 2 ---
[Step 10000] Past 10000 steps: Average Loss 0.165 | Accuracy: 95%
[Step 20000] Past 10000 steps: Average Loss 0.181 | Accuracy: 94%
[Step 30000] Past 10000 steps: Average Loss 0.177 | Accuracy: 94%
[Step 40000] Past 10000 steps: Average Loss 0.200 | Accuracy: 94%
[Step 50000] Past 10000 steps: Average Loss 0.171 | Accuracy: 95%
[Step 60000] Past 10000 steps: Average Loss 0.196 | Accuracy: 94%
--- Epoch 3 ---
[Step 10000] Past 10000 steps: Average Loss 0.159 | Accuracy: 95%
[Step 20000] Past 10000 steps: Average Loss 0.180 | Accuracy: 95%
[Step