# Convolutional Neural Network in Numpy

## Imports

### Libraries

In [None]:
import numpy as np
import pandas as pd 
from matplotlib import pyplot as plt

### Data and preprocessing



In [None]:
np.random.seed(42)

#Importing the dataset
dataset = pd.read_csv('train.csv')

#Shuffling the dataset
dataset = dataset.sample(frac=1, random_state=42)

#Making sure the dataset is balanced
train_data = pd.DataFrame()
test_data = pd.DataFrame()
for label in range (10):
    train_label_data = dataset[dataset['label']==label].head(500)
    test_label_data = dataset[dataset['label']==label].iloc[500:600,:]
    train_data = pd.concat([train_data, train_label_data])
    test_data = pd.concat([test_data, test_label_data])

y_train_labels = train_data.iloc[:, 0].values
y_test_labels = test_data.iloc[:, 0].values
#One hot encoding the labels
def one_hot_encoding(df, column_name):
    one_hot_labels = pd.get_dummies(df[column_name])
    dff = pd.concat([df,one_hot_labels], axis=1)
    dff.drop(column_name, axis = 1, inplace = True) 
    return dff

train_data = one_hot_encoding(train_data, 'label')
test_data = one_hot_encoding(test_data, 'label')

In [None]:
train_data.reset_index(drop=True, inplace=True)

In [None]:
#From dataframes to numpy arrays
X_train = train_data.iloc[:, 0:784].values
X_test = test_data.iloc[:, 0:784].values
y_train = train_data.iloc[:, 784:794].values
y_test = test_data.iloc[:, 784:794].values

In [None]:
#Reshaping the data
X_train_r = np.zeros((X_train.shape[0], 1, 28, 28), dtype=np.float64)
X_test_r = np.zeros((X_test.shape[0], 1, 28, 28), dtype=np.float64)

for im in range(X_train_r.shape[0]):
    X = X_train[im,:]
    X = np.ravel(X)
    X = X.reshape((1, 28, 28))
    X_train_r[im,0,:,:] = X
X_train = X_train_r
for im in range (X_test_r.shape[0]):
    X = X_test[im,:]
    X = np.ravel(X)
    X = X.reshape((1, 28, 28))
    X_test_r[im,0,:,:] = X
X_test = X_test_r

In [None]:
X_train.shape

### Inspecting the data

In [None]:
%matplotlib inline
plt.imshow(X_train[800][0], interpolation='nearest')
plt.show()

## Convolution and generating the Feature Map

### The filters

In [None]:
conv0 = np.random.randn(2,1,5,5)
conv1 = conv0 * np.sqrt(1. / 5)
conv1.shape[3]

### Generating the feature map

In [None]:
#The stride is 1
def conv_layers(data, filters):
    fearture_map_dim = len(data[0][0])-len(filters[0][0])+1
    feature_map = np.zeros((data.shape[0], filters.shape[0], fearture_map_dim, fearture_map_dim))
    for i in range (data.shape[0]):
        image = data[i][0]
        for j in range (filters.shape[0]):
            filt = filters[j][0]
            for d1 in range(fearture_map_dim):
                s1 = d1
                e1 = s1 + filters.shape[3]
                for d2 in range(fearture_map_dim):
                    s2 = d2
                    e2 = s2 + filters.shape[3]
                    im_section = image[s1:e1, s2:e2]
                    feature_section = np.dot(im_section, filt)
                    feature = np.sum(feature_section)
                    feature_map[i][j][d1][d2] = feature
    return feature_map

### Inspecting the feature map

In [None]:
feature_map = conv_layers(X_train, conv1)
plt.imshow(feature_map[0][0], interpolation='nearest')
plt.show()

In [None]:
feature_map.shape

## MaxPool layer

### MaxPool operation

In [None]:
def MaxPool(feature_map, stride=2):
    filter_dim = 2
    output_dim = int((feature_map.shape[2]-filter_dim)/stride)+1
    pooled_list = []
    pooled_rows = np.empty(shape=[filter_dim, output_dim])
    pooled_features = np.zeros((feature_map.shape[0], feature_map.shape[1], output_dim, output_dim))
    
    for i in range(feature_map.shape[0]):
        for j in range(feature_map.shape[1]):
            image = feature_map[i][j]
            for d1 in range (0, feature_map.shape[2], stride):
                start1 = d1 #if d1 ==0 else d1*2)
                end1 = d1 + filter_dim
                if end1<=feature_map.shape[2]:
                    image_rectangle =image[start1:end1, :]
                    for d2 in range (0, feature_map.shape[2], stride):
                        start2 = d2 #if d2 ==0 else d1*2)
                        end2 = d2 + filter_dim
                        if end2<=feature_map.shape[2]:
                            image_section = image_rectangle[:,start2:end2]
                            pooled_section = np.amax(image_section)
                            pooled_list.append(pooled_section)      
                    if (pooled_list != []):
                        pooled_row = np.array(pooled_list)
                        pooled_row = np.reshape(pooled_list, (1, output_dim))
                        pooled_rows = np.concatenate((pooled_rows, pooled_row), axis = 0)
                        pooled_list.clear()
            pooled_features[i][j] = pooled_rows[1:13,:]
    return pooled_features

### Keeping the pooled pixels' indices in a vector

In [None]:
def maxpool_indices(input_image,stride=2,filter_height=2, filter_width=2):
    positional_vector = []

    for channel in range(input_image.shape[1]):
        x = -1

        chosen_image_channel = input_image[:,channel,:,:]
        for height in range(0,chosen_image_channel.shape[1],stride):
            if height+stride<=chosen_image_channel.shape[1]:
                image_rectangle = chosen_image_channel[:,height:height+filter_height,:]
                x = x+1
                y = -1
                
                for width in range(0,image_rectangle.shape[2],stride):
                    if width+stride<= image_rectangle.shape[2]:
                        y = y+1
                        
                        image_square = image_rectangle[:,:,width:width+filter_width]
                        
                        a,b,c = np.unravel_index(image_square.argmax(),image_square.shape)

                        
                        positional_vector.append([0,channel,int(b)+height,int(c)+width,0,channel,x,y])
    return positional_vector

def maxpool_indices_multiple(input_image,stride=2,filter_height=2, filter_width=2):
    positional_vector =[]
    for i in range(input_image.shape[0]):
        positional_vector.append(maxpool_indices(input_image[i:i+1,:,:,:],stride=2,filter_height=2,filter_width=2))
    return positional_vector

### Inspecting the result of the pooled features

In [None]:
pooled_features1 = MaxPool(feature_map)
pooled_features1.shape

In [None]:
plt.imshow(pooled_features1[0][0], interpolation='nearest')
plt.show()

## Useful functions:

### ReLu and ReLu Derivative

In [None]:
def ReLu(x):
    return (x>0)*x

def reluDerivative(x):
    x1 = np.copy(x)
    x1[x1<=0] = 0
    x1[x1>0] = 1
    return x1

In [None]:
feature_map = ReLu(feature_map)

### Im2col function: Transforming the image to col

In [None]:
def im2col(X,conv1, stride, pad):
    # Padding
    X_padded = np.pad(X, ((0,0), (0,0), (pad, pad), (pad, pad)), mode='constant')
    X = X_padded
    new_height = int((X.shape[2]+(2*pad)-(conv1.shape[2]))/stride)+1
    new_width =  int((X.shape[3]+(2*pad)-(conv1.shape[3]))/stride)+1
    im2col_vector = np.zeros((X.shape[1]*conv1.shape[2]*conv1.shape[3],new_width*new_height*X.shape[0]))
    c = 0
    for position in range(X.shape[0]):

        image_position = X[position,:,:,:]
        for height in range(0,image_position.shape[1],stride):
            image_rectangle = image_position[:,height:height+conv1.shape[2],:]
            if image_rectangle.shape[1]<conv1.shape[2]:
                continue
            else:
                for width in range(0,image_rectangle.shape[2],stride):
                    image_square = image_rectangle[:,:,width:width+conv1.shape[3]]
                    if image_square.shape[2]<conv1.shape[3]:
                        continue
                    else:
                        im2col_vector[:,c:c+1]=image_square.reshape(-1,1)
                        c = c+1         
            
    return(im2col_vector)

### Softmax:

In [None]:
def softmax(x):
    return np.exp(x) / np.sum(np.exp(x), axis=1, keepdims = True)
def Softmax(x):
    f = np.exp(x - np.max(x))  # shift values
    return f / f.sum(axis=1, keepdims = True)

### Reshaping the error layer for the conv:

In [None]:
def error_layer_reshape(error_layer):
    test_array = error_layer
    test_array_new = np.zeros((test_array.shape[1],test_array.shape[0]*test_array.shape[2]*test_array.shape[3]))
    for i in range(test_array_new.shape[0]):
        test_array_new[i:i+1,:] = test_array[:,i:i+1,:,:].ravel()
    return test_array_new

## Initialising the network:

### Network Parameters

In [None]:
input_dim = pooled_features1.shape[1]*pooled_features1.shape[2]*pooled_features1.shape[3]
hidden_dim1 = 128
hidden_dim2 = 128
output_dim = 10
lambda_reg = 0.01
learning_rate = 0.001

### Innit function

In [None]:
def network_innit(input_dim, hidden_dim1, hidden_dim2, output_dim):
    model_dict = {}
    W1 = np.random.randn(input_dim , hidden_dim1) * np.sqrt(1. / 5)
    W2 = np.random.randn(hidden_dim1, hidden_dim2) * np.sqrt(1. / 5)
    W3 = np.random.randn(hidden_dim2, output_dim) * np.sqrt(1. / 5)
    b1 = np.zeros((1, hidden_dim1))
    b2 = np.zeros((1, hidden_dim2))
    b3 = np.zeros((1, output_dim))
    model_dict['W1'] = W1
    model_dict['W2'] = W2
    model_dict['W3'] = W3
    model_dict['b1'] = b1
    model_dict['b2'] = b2
    model_dict['b3'] = b3
    return model_dict

## Forward Propagation:

### Model Dict

In [None]:
model_dict = network_innit(input_dim, hidden_dim1, hidden_dim2, output_dim)
def get_param (model_dict):
    W1 = model_dict['W1']
    W2 = model_dict['W2']
    W3 = model_dict['W3']
    b1 = model_dict['b1']
    b2 = model_dict['b2']
    b3 = model_dict['b3']
    return W1, W2, W3, b1, b2, b3

### Forward Function

In [None]:
def forward(input_data, model_dict, filters):
    W1, W2, W3, b1, b2, b3 = get_param(model_dict)
    feature_map = conv_layers(input_data, filters)
    activated_fm = ReLu(feature_map)
    pooled_features = MaxPool(activated_fm)
    max_indices = maxpool_indices_multiple(activated_fm,stride=2,filter_height=2, filter_width=2)
    X = pooled_features.reshape(pooled_features.shape[0], -1)
    z1 = X.dot(W1) + b1
    a1 = ReLu(z1)
    z2 = a1.dot(W2) + b2
    a2 = ReLu(z2)
    z3 = a2.dot(W3) + b3
    predict = Softmax(z3)
    return predict, z1, a1, z2, a2, z3, X, pooled_features, feature_map, max_indices

### Getting the final predictions

In [None]:
def get_predictions(predict):
    return np.argmax(predict, axis=1)

## Loss: Cross Entropy

In [None]:
def cross_entropy (predictions, labels, model_dict):
    W1, W2, W3, b1, b2, b3 = get_param(model_dict)
    m = np.zeros(predictions.shape[0])
    for i, correct_predict in enumerate(labels):
        m[i] = predictions[i][correct_predict]
    log_prob = - np.log(m+0.0000000001)
    loss = np.sum(log_prob)
    reg_loss = (lambda_reg / 2)*(np.sum(np.sum(W1)+np.sum(W2)+np.sum(W3)))
    loss += reg_loss
    return float(loss/labels.shape[0]), log_prob

## Back Propagation

In [None]:
def backpropagation(input_data, model_dict, filters, labels, labels_encoded, epochs):
    for epoch in range (epochs):
        W1, W2, W3, b1, b2, b3 = get_param(model_dict)
        predictions, z1, a1, z2, a2, z3, X_f, X_maxpool, feature_map, max_indices  = forward(input_data, model_dict, filters)
        predictions_f = get_predictions(predictions)
        loss = cross_entropy (predictions, labels , model_dict)
        
        delta3 = predictions - labels_encoded
        dW3 = np.dot(a2.T, delta3)
        db3 = np.sum(delta3, axis=1, keepdims=True)
        
        delta2 = np.multiply(delta3.dot(W3.T), reluDerivative(a1@W2+b2))
        dW2 = a1.T@delta2
        db2 = np.sum(delta2, axis=1, keepdims=True)
        
        delta1 = np.multiply(delta2@W2.T, reluDerivative(X_f@W1+b1)) #(5000, 128)
        dW1 = X_f.T@delta1
        db1 = np.sum(delta1, axis=1, keepdims=True)
        
        delta0 = np.multiply(delta1@W1.T, 1.0)
        
        delta_maxpool = delta0.reshape(X_maxpool.shape)
        
        delta_conv = np.zeros(feature_map.shape)
        for image in range(len(max_indices)):
            indices = max_indices[image]
            for p in indices:
                delta_conv[image:image+1,p[1],p[2],p[3]] = delta_maxpool[image:image+1,p[5],p[6],p[7]]
        
        delta_activation = np.multiply(delta_conv, reluDerivative(feature_map))
        #print(f'delta conv: {delta_conv.shape}')
        #print(f'delta activation: {delta_activation.shape}')
        #dconv1 = conv_layers(input_data, filters)
        Im2Col = im2col(input_data, filters, stride=1, pad =0)
        #print(f'Im2Col: {Im2Col.shape}')
        delta_activation_reshape = error_layer_reshape(delta_activation)
        #print(f'delta activation reshape: {delta_activation_reshape.shape}')
        
        conv1_delta = (delta_activation_reshape@Im2Col.T).reshape(2,1,5,5)
        
        ## Update Weights
        filters = filters - learning_rate * conv1_delta
        W1 = W1 - learning_rate * dW1
        W2 = W2 - learning_rate * dW2
        W3 = W3 - learning_rate * dW3
        b1 = b1 - learning_rate * db1
        b2 = b2 - learning_rate * db2
        b3 = b3 - learning_rate * db3
        
        
        model_dict['W1'] = W1
        model_dict['W2'] = W2
        model_dict['W3'] = W3
        model_dict['b1'] = b1
        model_dict['b2'] = b2
        model_dict['b3'] = b3


        if epoch%2==0:
            print (f'---------------- Epoch: {epoch} ----------------')
            print (f'Train Loss: {loss}')
            labels_predict = predictions_f.tolist()
            labels_predict = [int(value) for value in labels_predict]
            #labels_df.loc[:,'label_predict'] = labels_predict
            labels_df = pd.DataFrame(labels, columns = ['labels'])
            labels_df.insert(1,'label_predict', labels_predict)
            accuracy = np.sum(labels_df['labels']==labels_df['label_predict'])/labels.shape[0]
            print(f'Train Accuracy: {round(accuracy*100,2)}%')

    return model_dict    

## Training the model

In [None]:
y_train_labels

In [None]:
model_dict = network_innit(input_dim, hidden_dim1, hidden_dim2, output_dim)
model = backpropagation(X_train, model_dict, conv1, y_train_labels, y_train, 40)