In [146]:
import numpy as np
import math
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm


### Linear Layer

In [294]:
class Linear:
    def __init__(self, input_size, output_size):
        
        self.weights=np.random.normal(0, 0.1,( input_size, output_size))
        self.bias=0
        self.in_size=input_size
        self.out_size=output_size

    def forward(self, input_x):
        self.input=input_x
        output=np.dot(input_x, self.weights) + self.bias
        return output

    def backward(self, grad_loss, lr):
        grad_W=np.dot(self.input.T, grad_loss)
        grad_Bias=np.sum(grad_loss)
        self.weights-=lr*grad_W/self.input.shape[0]
        self.bias-=lr*grad_Bias/ self.input.shape[0]
        return (np.dot(grad_loss,self.weights.T))

    def backwardL2reg(self, grad_loss, lr):
        grad_reg=np.dot(self.weight, grad_loss)
        grad_W=np.dot(self.input.T, grad_loss) + grad_reg
        grad_Bias=np.sum(grad_loss)
        self.weights-=lr*grad_W/self.input.shape[0]
        self.bias-=lr*grad_Bias/ self.input.shape[0]
        return (np.dot(grad_loss,self.weights.T))

    def backwardL1reg(self, grad_loss, lr):
        grad_reg=grad_loss
        grad_W=np.dot(self.input.T, grad_loss) + grad_reg
        grad_Bias=np.sum(grad_loss)
        self.weights-=lr*grad_W/self.input.shape[0]
        self.bias-=lr*grad_Bias/ self.input.shape[0]
        return (np.dot(grad_loss,self.weights.T))

### Convolutional Layer

In [295]:
class Conv2D:
    def __init__(self, in_channels,filter_size,filter_num, padding=0, stride=1, bias=True,  conv_layer=1):
        sigma=0.1
        self.filter_size=filter_size
        self.filter=filter_num
        self.padding=padding
        self.stride=stride
        self.in_channels=in_channels
        self.weights=np.random.normal(0, sigma, size=(filter_num,in_channels,filter_size, filter_size))
        # self.weights=np.random.randn(filter_num,in_channels,filter_size, filter_size)
        self.bias=np.zeros(self.filter)
        self.conv_layer=conv_layer
        
    def forward(self,input):
        batch_size, in_channels, height, width=input.shape
        #Input shape= (Batch_size, in_channels, in_height, in_width)
        pad_size=self.padding
        #adding padd to the input
        padded_input=np.zeros(shape=(batch_size, in_channels, height+2*pad_size, width+2*pad_size), dtype='int32')
        padded_input[:,:, pad_size:height+pad_size, pad_size: width+pad_size]=input
        # Reminder: Save Input for backward-prop
        self.input=padded_input
        # Simple Conv operation:
        self.out_height=((height-self.filter_size + 2*pad_size)//self.stride)+1
        self.out_width=((width-self.filter_size + 2*pad_size)//self.stride)+1
        self.out_channel=self.filter
        output=np.zeros(shape=(batch_size, self.out_channel,self.out_height, self.out_width ), dtype='float64')
        # Loop over every location in inp_height * inp_width for the whole batch
        offset=self.filter_size//2
        for depth in range(self.out_channel):
            for batch in range(batch_size):
                for m in range(offset,(height-offset), self.stride):
                    for n in range(offset,(width-offset), self.stride):
                        output[batch,depth,m-offset, n-offset]=\
                            np.sum(np.multiply(padded_input[batch,:, m-offset:m+offset+1,n-offset:n+offset+1],
                                               self.weights[depth]))
        # Output will be of the size (Batch_size, out_channels, out_height, out_width)
        return output
    
    def backward(self, grad_out, learning_rate):
        batch_size,channels, height, width = self.input.shape
        grad_weights = np.zeros(self.weights.shape)
        grad_bias = np.zeros(self.filter)
        flip_weights=np.flip(self.weights, axis=(2,3))
        grad_input=np.zeros(self.input.shape)
        for batch in range(batch_size):
            for depth in range(self.filter):
                for m in range(0,height - self.filter_size + 1, self.stride):
                    for n in range(0, width - self.filter_size + 1, self.stride):
                    # grad_W=grad_output* input[kernel_size]
                    # grad_W=
                        grad_weights[depth] += grad_out[batch, depth, m,n] * self.input[batch,:, m:m+self.filter_size, n:n+self.filter_size]
                        grad_bias[depth] += grad_out[batch, depth, m,n]
        
        if(self.conv_layer!=1):
            for batch in range(batch_size):
                for m in range(self.out_height):
                    for n in range(self.out_width):
                        vert_start = m * self.stride
                        vert_end = vert_start + self.filter_size
                        horiz_start = n * self.stride
                        horiz_end = horiz_start + self.filter_size
                        for depth in range(self.filter):
                            grad_input[batch, :, vert_start:vert_end, horiz_start:horiz_end]+= np.sum(grad_out[batch,depth, m,n] * flip_weights[depth], axis=0)
            
        
        self.weights -= learning_rate * grad_weights / batch
        self.bias -= learning_rate * grad_bias / batch

        return grad_input
        


### MaxPooling Layer

In [234]:
class MaxPool2D:
    def __init__(self, pool_size, stride,padding=0, dilation=1):
        self.pool_size=pool_size
        self.stride=stride
        self.padding=padding
    def forward(self, input_data):
        batch_size, in_channels, height, width=input_data.shape
        pad_size=self.padding
        padded_input=np.zeros(shape=(batch_size, in_channels, height+2*pad_size, width+2*pad_size), dtype='int32')
        padded_input[:,:, pad_size:height+pad_size, pad_size: width+pad_size]=input_data
        self.input=padded_input
        offset=self.pool_size//2
        out_height=(height//self.stride) 
        out_width=(width//self.stride) 

        output=np.zeros((batch_size, in_channels, out_height, out_width), dtype="float64")
        # print(output.shape)
        for m in range(0,(height-self.pool_size), self.stride):
            for n in range(0,(width-self.pool_size), self.stride):
                output[:,: ,m:m+1, n:n+1]= np.max(np.max(padded_input[:,:, m:m+self.pool_size,n:n+self.pool_size],axis=2, keepdims=True ), axis=3, keepdims=True)
                
        self.max_output=output
        return output
    
    def backward(self, grad_out, lr):
        batch_size, in_channels ,  height, width= self.input.shape
        grad_input = np.zeros_like(self.input)

        for batch in range(batch_size):
            for depth in range(in_channels):
                for m in range(0, height, self.pool_size):
                    for n in range(0, width, self.pool_size):
                        window = self.input[batch,depth, m:m+self.pool_size, n:n+self.pool_size]
                        max_val = np.max(window)
                        grad_input[batch,depth, m:m+self.pool_size, n:n+self.pool_size] = (window == max_val) * grad_out[batch,depth,  m//self.pool_size, n//self.pool_size]
        return grad_input
        

### Average pooling Layer

In [268]:
class AvgPool2D:
    def __init__(self, pool_size, stride,padding=0, dilation=1):
        self.pool_size=pool_size
        self.stride=stride
        self.padding=padding
    def forward(self, input_data):
        batch_size, in_channels, height, width=input_data.shape
        pad_size=self.padding
        padded_input=np.zeros(shape=(batch_size, in_channels, height+2*pad_size, width+2*pad_size), dtype='int32')
        padded_input[:,:, pad_size:height+pad_size, pad_size: width+pad_size]=input_data
        self.input=padded_input
        offset=self.pool_size//2
        out_height=(height//self.stride) 
        out_width=(width//self.stride) 

        output=np.zeros((batch_size, in_channels, out_height, out_width), dtype="float64")
        # print(output.shape)
        for m in range(0,(height-self.pool_size), self.stride):
            for n in range(0,(width-self.pool_size), self.stride):
                output[:,: ,m, n]=np.mean(padded_input[:,:, m:m+self.pool_size,n:n+self.pool_size])

    def backward(self, grad_loss, lr):
        return grad_loss/(self.pool_size)**2

### Flatten Layer

In [235]:
class Flatten:
    def __init__(self):
        self=None
    def forward(self, input_data):
        self.input=input_data
        batch=input_data.shape[0]
        return input_data.reshape(batch, -1)
    
    def backward(self, grad_out, lr):
        return grad_out.reshape(self.input.shape)

### Activation Layer

In [236]:
class activation:
  def __init__(self, func):
    self.func=func
  def forward(self, x):
      self.x=x
      if(self.func=='sigmoid'):
          loss= 1/(1+np.exp(-x))

      elif(self.func=='relu'):
          loss= np.maximum(0, x)
          
      elif(self.func=='softmax'):
          exp=np.exp(x)
          loss=exp/ np.sum(exp, axis=0)

      return loss
  def backward(self, grad_loss, lr):
      if(self.func=='sigmoid'):
          loss= 1/(1+np.exp(-self.x))
          back_loss= np.exp(-self.x)/ (loss**2)
      elif(self.func=='relu'):

          back_loss=1*(self.x>0)
      elif(self.func=='softmax'):
        
        t_exp = np.exp(self.x)
        # Sum of all e^totals
        S = np.sum(t_exp)

        back_loss = t_exp * (S - t_exp) / (S ** 2)
        
      
      return grad_loss*back_loss

###  Loss Functions

In [237]:
def oneHotVector(input_y, num_classes):
    batch=input_y.shape[0]
    one_hot_vector=np.zeros((batch, num_classes))
    for b in range(batch):
        one_hot_vector[b,int(input_y[b])]=1
    return one_hot_vector


In [288]:
def crossEntropy(predicted,input_y):
    row,col=input_y.shape
    regu=0
    epsilon = 1e-12
    predicted = np.clip(predicted, epsilon, 1. - epsilon)
    y1=np.log(predicted)
    
    return np.mean(-np.sum(input_y*y1)) , input_y/predicted

### Creating the Convolutional Neural Network Model

In [239]:
# compiling all the above layers in one CNN Model
class Model:
    def __init__(self):
        self.layers=[]
    def add(self, layer):
        self.layers.append(layer)
    def forward(self, input_data):
        output=input_data
        for layer in self.layers:
            output=layer.forward(output)
            print(output.shape)
        return output
    def backward(self,grad_out, lr):
        for layer in reversed(self.layers):
            grad_out=layer.backward(grad_out, lr)
            print(" gradout", grad_out.shape)
        

### Load KMNIST Data

In [156]:
train_data=np.load("kmnist-train-imgs.npz")["arr_0"]
test_data= np.load("kmnist-test-imgs.npz")['arr_0']
train_labels= np.load("kmnist-train-labels.npz")['arr_0']
test_labels=np.load("kmnist-test-labels.npz")['arr_0']

In [126]:
test_labels

array([2, 9, 3, ..., 9, 4, 2], dtype=uint8)

In [157]:
train_data=train_data.reshape(train_data.shape[0], 1, train_data.shape[1], train_data.shape[2])
test_data=test_data.reshape((test_data.shape[0], 1, test_data.shape[1], test_data.shape[2]))

In [None]:
train_data=train_data/255


In [130]:
# Split train data in training and validation
train_X, validation_X=train_data[: 1000], train_data[1000:]
train_y, validation_y=train_labels[: 1000], train_labels[1000:]

In [132]:
output=np.ones((100, 64, 13,13), dtype="float64")

In [134]:
binary_train_y=oneHotVector(train_y, 10)
binary_train_y

array([[0., 0., 0., ..., 0., 1., 0.],
       [0., 0., 0., ..., 1., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 1., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 1., 0., 0.]])

### CNN Model Training

Model1

In [218]:
cnn_model1=Model()
cnn_model1.add(Conv2D(1, 5, 64))# 1000 x 64 x 24x 24
cnn_model1.add(MaxPool2D(2, 2)) # 1000 x64 x12x12
cnn_model1.add(activation('sigmoid'))
cnn_model1.add(Flatten())#1000x (64. 13.13)  = 1000x 1600
cnn_model1.add(Linear((64*12*12), 1000))
cnn_model1.add(Linear(1000, 10))
cnn_model1.add(activation('softmax'))


In [221]:
#Hyperparameters
epochs=10
lr=100000


In [None]:
train_loss=[]
accuracy=[]
for epoch in tqdm(range(epochs), total=epochs):
    total_samples=0
    total_correct=0
    output=cnn_model1.forward(train_X)
    # print(output)
    y=oneHotVector(train_y, 10)
    loss, grad_out= crossEntropy(output, y)
    cnn_model1.backward(grad_out, lr)
    predicted = np.argmax(output,axis=1)
    # print(predicted)
    total_samples += train_y.shape[0]
    total_correct += (predicted == train_y).sum().item()
    acc=total_correct/total_samples
    train_loss.append(loss)
    accuracy.append(acc)
    print("Epoch {}: loss= {}, accuracy= {}".format(epoch, loss/total_samples, acc*100))
    