In [3]:
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split

from tqdm import tqdm
import pandas as pd
import numpy as np
import math
from matplotlib.pyplot as plt

In [4]:
df = pd.read_csv("/content/train.csv")
df = shuffle(df)
df.head()

Unnamed: 0,label,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
24487,5,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
21493,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
28597,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
21750,2,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1123,3,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [5]:
features = df.iloc[:,1:].values.reshape(len(df), 28, 28)
labels = df['label'].values

X_train, X_test, y_train, y_test = train_test_split(
                                    features, labels, test_size=0.33, random_state=42)

convolution 3x3

In [6]:
class Conv3x3:
    '''
        A Convolution layer using 3x3 filters.
    '''

    def __init__(self, num_filters):
        self.num_filters = num_filters

        # filters is a 3d array with dimensions (num_filters, 3, 3)
        # We divide by 9 to reduce the variance of our initial values
        self.filters = np.random.randn(num_filters, 3, 3) / 9

    def iterate_regions(self, image):
        '''
        Generates all possible 3x3 image regions using valid padding.
        - image is a 2d numpy array.
        '''
        h, w = image.shape

        for i in range(h - 2):
            for j in range(w - 2):
                im_region = image[i:(i + 3), j:(j + 3)]
                yield im_region, i, j

    def forward(self, input):
        '''
        Performs a forward pass of the conv layer using the given input.
        Returns a 3d numpy array with dimensions (h, w, num_filters).
        - input is a 2d numpy array
        '''
        self.last_input = input

        h, w = input.shape
        output = np.zeros((h - 2, w - 2, self.num_filters))

        for im_region, i, j in self.iterate_regions(input):
            output[i, j] = np.sum(im_region * self.filters, axis=(1, 2))

        return output

    def backprop(self, d_L_d_out, learn_rate):
        '''
        Performs a backward pass of the conv layer.
        - d_L_d_out is the loss gradient for this layer's outputs.
        - learn_rate is a float.
        '''
        d_L_d_filters = np.zeros(self.filters.shape)

        for im_region, i, j in self.iterate_regions(self.last_input):
            for f in range(self.num_filters):
                d_L_d_filters[f] += d_L_d_out[i, j, f] * im_region

        # Update filters
        self.filters -= learn_rate * d_L_d_filters

        # We aren't returning anything here since we use Conv3x3 as the first layer in our CNN.
        # Otherwise, we'd need to return the loss gradient for this layer's inputs, just like every
        # other layer in our CNN.
        return None

maxpool2x2

In [7]:
class MaxPool2:
    '''
      A Max Pooling layer using a pool size of 2
    '''

    def iterate_regions(self, image):
        '''
        Generates non-overlapping 2x2 image regions to pool over.
        - image is a 2d numpy array
        '''
        h, w, _ = image.shape
        new_h = h // 2
        new_w = w // 2

        for i in range(new_h):
            for j in range(new_w):
                im_region = image[(i * 2):(i * 2 + 2), (j * 2):(j * 2 + 2)]
                yield im_region, i, j

    def forward(self, input):
        '''
        Performs a forward pass of the maxpool layer using the given input.
        Returns a 3d numpy array with dimensions (h / 2, w / 2, num_filters).
        - input is a 3d numpy array with dimensions (h, w, num_filters)
        '''
        self.last_input = input

        h, w, num_filters = input.shape
        output = np.zeros((h // 2, w // 2, num_filters))

        for im_region, i, j in self.iterate_regions(input):
            output[i, j] = np.amax(im_region, axis=(0, 1))

        return output

    def backprop(self, d_L_d_out):
        '''
        Performs a backward pass of the maxpool layer.
        Returns the loss gradient for this layer's inputs.
        - d_L_d_out is the loss gradient for this layer's outputs.
        '''
        d_L_d_input = np.zeros(self.last_input.shape)

        for im_region, i, j in self.iterate_regions(self.last_input):
            h, w, f = im_region.shape
            amax = np.amax(im_region, axis=(0, 1))

            for i2 in range(h):
                for j2 in range(w):
                    for f2 in range(f):
                        # If this pixel was the max value, copy the gradient to it.
                        if im_region[i2, j2, f2] == amax[f2]:
                            d_L_d_input[i * 2 + i2, j * 2 + j2, f2] = d_L_d_out[i, j, f2]

        return d_L_d_input

softmax

In [9]:
class Softmax:
    '''
        A standard fully-connected layer with softmax activation.
    '''

    def __init__(self, input_len, nodes):
        # We divide by input_len to reduce the variance of our initial values
        self.weights = np.random.randn(input_len, nodes) / input_len
        self.biases = np.zeros(nodes)

    def forward(self, input):
        '''
        Performs a forward pass of the softmax layer using the given input.
        Returns a 1d numpy array containing the respective probability values.
        - input can be any array with any dimensions.
        '''
        self.last_input_shape = input.shape

        input = input.flatten()
        self.last_input = input

        input_len, nodes = self.weights.shape

        totals = np.dot(input, self.weights) + self.biases
        self.last_totals = totals

        exp = np.exp(totals)
        return exp / np.sum(exp, axis=0)

    def backprop(self, d_L_d_out, learn_rate):
        '''
        Performs a backward pass of the softmax layer.
        Returns the loss gradient for this layer's inputs.
        - d_L_d_out is the loss gradient for this layer's outputs.
        - learn_rate is a float.
        '''
        # We know only 1 element of d_L_d_out will be nonzero
        for i, gradient in enumerate(d_L_d_out):
            if gradient == 0:
                continue

            # e^totals
            t_exp = np.exp(self.last_totals)

            # Sum of all e^totals
            S = np.sum(t_exp)

            # Gradients of out[i] against totals
            d_out_d_t = -t_exp[i] * t_exp / (S ** 2)
            d_out_d_t[i] = t_exp[i] * (S - t_exp[i]) / (S ** 2)

            # Gradients of totals against weights/biases/input
            d_t_d_w = self.last_input
            d_t_d_b = 1
            d_t_d_inputs = self.weights

            # Gradients of loss against totals
            d_L_d_t = gradient * d_out_d_t

            # Gradients of loss against weights/biases/input
            d_L_d_w = d_t_d_w[np.newaxis].T @ d_L_d_t[np.newaxis]
            d_L_d_b = d_L_d_t * d_t_d_b
            d_L_d_inputs = d_t_d_inputs @ d_L_d_t

            # Update weights / biases
            self.weights -= learn_rate * d_L_d_w
            self.biases -= learn_rate * d_L_d_b

        return d_L_d_inputs.reshape(self.last_input_shape)

training

In [8]:
def forward(image, label):
    '''
    Completes a forward pass of the CNN and calculates the accuracy and
    cross-entropy loss.
    - image is a 2d numpy array
    - label is a digit
    '''
    # We transform the image from [0, 255] to [-0.5, 0.5] to make it easier
    # to work with. This is standard practice.
    out = conv.forward((image / 255) - 0.5)
    out = pool.forward(out)
    out = softmax.forward(out)

    # Calculate cross-entropy loss and accuracy. np.log() is the natural log.
    loss = -np.log(out[label])
    acc = 1 if np.argmax(out) == label else 0

    return out, loss, acc

def train(im, label, lr=.005):
    '''
    Completes a full training step on the given image and label.
    Returns the cross-entropy loss and accuracy.
    - image is a 2d numpy array
    - label is a digit
    - lr is the learning rate
    '''
    # Forward
    out, loss, acc = forward(im, label)

    # Calculate initial gradient
    gradient = np.zeros(10)
    gradient[label] = -1 / out[label]

    # Backprop
    gradient = softmax.backprop(gradient, lr)
    gradient = pool.backprop(gradient)
    gradient = conv.backprop(gradient, lr)

    return loss, acc

In [10]:
# 28x28x1 -> 26x26x8
conv = Conv3x3(8)

# 26x26x8 -> 13x13x8
pool = MaxPool2()

# 13x13x8 -> 10
softmax = Softmax(13 * 13 * 8, 10)

In [11]:
for epoch in range(1):
    print('--- Epoch %d ---' % (epoch + 1))

    # shuffle
    X_train, y_train = shuffle(X_train, y_train)

    loss = 0
    num_correct = 0
    for i, (im, label) in enumerate(zip(X_train, y_train)):
        if i > 0 and i % 2000 == 1999:
            print('[Step {:5d}] Past 2000 steps: Average Loss {:.3f} | Accuracy: {:2.2f}%'
                   .format(i + 1, loss / 2000, num_correct / 20))

            loss = 0
            num_correct = 0

        l, acc = train(im, label)
        loss += l
        num_correct += acc

--- Epoch 1 ---
[Step  2000] Past 2000 steps: Average Loss 0.853 | Accuracy: 73.50%
[Step  4000] Past 2000 steps: Average Loss 0.456 | Accuracy: 86.45%
[Step  6000] Past 2000 steps: Average Loss 0.381 | Accuracy: 88.70%
[Step  8000] Past 2000 steps: Average Loss 0.346 | Accuracy: 89.60%
[Step 10000] Past 2000 steps: Average Loss 0.333 | Accuracy: 89.55%
[Step 12000] Past 2000 steps: Average Loss 0.313 | Accuracy: 90.75%
[Step 14000] Past 2000 steps: Average Loss 0.309 | Accuracy: 91.00%
[Step 16000] Past 2000 steps: Average Loss 0.273 | Accuracy: 91.70%
[Step 18000] Past 2000 steps: Average Loss 0.309 | Accuracy: 90.15%
[Step 20000] Past 2000 steps: Average Loss 0.276 | Accuracy: 92.15%
[Step 22000] Past 2000 steps: Average Loss 0.283 | Accuracy: 91.30%
[Step 24000] Past 2000 steps: Average Loss 0.245 | Accuracy: 92.65%
[Step 26000] Past 2000 steps: Average Loss 0.220 | Accuracy: 93.55%
[Step 28000] Past 2000 steps: Average Loss 0.237 | Accuracy: 93.45%


In [12]:
print('--- Testing the CNN ---')
loss = 0
num_correct = 0
for im, label in zip(X_test, y_test):
    _, l, acc = forward(im, label)
    loss += l
    num_correct += acc

num_tests = len(X_test)
print('Test Loss:', loss / num_tests)
print('Test Accuracy:', num_correct / num_tests)

--- Testing the CNN ---
Test Loss: 0.22083455934882432
Test Accuracy: 0.9351370851370852


In [20]:
def predict(x):
    '''
        Function to predict label
        return the softmax for each
    '''
    out, _, _ = forward(im, label)
    return out

In [14]:
df_test = pd.read_csv("/content/test.csv")
subm_arr = df_test.values.reshape(len(df_test), 28, 28)

In [15]:
def subm_predict(subm_arr):
    '''
        Function to predict submission prediction
        return the max for each prediction
    '''
    result = []

    for i in tqdm(range(len(subm_arr))):
        result.append([i + 1, np.argmax(predict(subm_arr[i]))])

    return result

# predict submission data
result = subm_predict(subm_arr)

100%|██████████| 28000/28000 [05:29<00:00, 84.88it/s]


In [16]:
df_final = pd.DataFrame(data=result, columns=["ImageId", "Label"])
df_final.to_csv('submission.csv', index=False)