In [None]:
import os
import torch
import  torchvision
import numpy as np
import matplotlib.pyplot as plt
import torch.nn as nn
import torch.nn.functional as F
from torchvision.datasets import STL10
from torchvision.transforms import ToTensor
from torchvision.utils import make_grid
from torch.utils.data.dataloader import DataLoader,Dataset
from torch.utils.data import random_split
from torchvision import transforms

In [None]:
class Conv_operation:
    def __init__(self,num_filters,filter_size,stride_size,padding_size):
        self.num_filters=num_filters
        self.filter_size=filter_size
        self.padding_size=padding_size
        self.stride_size=stride_size
        self.conv_filter=torch.rand(num_filters,filter_size,filter_size)/(filter_size*filter_size)
    #image patching 
    def image_region(self,image):
        height,width=image.shape
        self.image=image
        for j in range((height-self.filter_size)+1):
            for k in range((width-self.filter_size)+1):
                image_patch=image[j:(j+self.filter_size),k:(k+self.filter_size)]
                yield image_patch,j,k
    def forward_prop(self,image):
#         assert single_sample.dim()==3, f'Input not 2D, given {single_sample.dim()}D'
        # image=torch.squeeze(image)
        height,width=image.shape
        padding_size=(self.filter_size-1)//2
        conv_out=torch.zeros(((height-self.filter_size+2*padding_size)//self.stride_size)+1,((width-self.filter_size+2*padding_size)//self.stride_size)+1,self.num_filters)
        for image_path,i,j in self.image_region(image):
            conv_out[i,j]= torch.sum(image_path*self.conv_filter)
        # conv_out = 1. / (1. + torch.exp(-conv_out))
        return conv_out
    def padding_size(self):
        return(self.filter_size-1)//2

        
    def relu(self,xa,derive=False):
      if derive:
        return torch.ceil(torch.clamp(xa,min=0,max=1)).detach
      return torch.clamp(xa,min=0).detach()


    def backward_prop(self,d_L_dout,learning_rate):
        dL_dF_params=torch.zeros(self.conv_filter.shape)
        for image_patch,i,j in self.image_region(self.image):
            for k in range(self.num_filters):
                dL_dF_params[k]+=image_patch*d_L_dout[i,j,k]
#         learning_rate=torch.tensor(learning_rate)
#         dL_dF_params=torch.tensor(dL_dF_params)
        self.conv_filter-=learning_rate*dL_dF_params
        return dL_dF_params

In [None]:
class Maxpooling:
    def __init__(self,filter_size):
        self.filter_size=filter_size
    def image_region(self,image):
        new_height=image.shape[0]//self.filter_size
        new_width=image.shape[1]//self.filter_size
        self.image=image
        for i in range(new_height):


          for j in range(new_width):


            image_patch=image[(i*self.filter_size):(i*self.filter_size+self.filter_size),
                                 (j*self.filter_size):(j*self.filter_size+self.filter_size)]
            yield image_patch,i,j
    def forward_prop(self,image):

      height,width,num_filters=image.shape
      output=torch.zeros(height//self.filter_size,width//self.filter_size,num_filters)
      
      for image_patch, i, j in self.image_region(image):
        # image_patch=torch.flatten(image_patch,start_dim=0,end_dim=1)
        output[i,j]=torch.amax(image_patch)
      return output
    

    def backward_prop(self,d_L_dout): #dL_dout is the input from softmax layer
      dl_dinput=torch.zeros(self.image.shape)
      for image_patch,i,j in self.image_region(self.image):
        height,width,num_filters=image_patch.shape
        # max_val=torch.max(image_patch,dim=0)

  
            
        for i1 in range(height):

          for j1 in range(width):

                for k1 in range(num_filters):
                  x_pool=dl_dinput[i*self.filter_size+i1,j*self.filter_size+j1,k1]
                  mask=(x_pool==torch.amax(x_pool))
                  dl_dinput[i*self.filter_size+i1,j*self.filter_size+j1,k1]=mask*d_L_dout[i,j,k1]
                                  
                           
        return dl_dinput

In [None]:
class Softmax:
    def __init__(self,input_node,softmax_node):
        self.weight=torch.randn(input_node,softmax_node)/input_node
        self.bias=torch.zeros(softmax_node)
    def forward_prop(self,image):
        self.last_input_shape=image.shape
        new_image=image.flatten()
        self.modified_input=new_image #to be  used in backpropagation
#         input_node,softmax_node=self.weight.shape
#         new_image=torch.squeeze(new_image,dim=1)
        new_image=new_image.cpu().detach().numpy()
        self.weight=self.weight.cpu().detach().numpy()
        self.bias=self.bias.cpu().detach().numpy()
        output_val=np.dot(new_image,self.weight)+self.bias
        self.weight=torch.from_numpy(self.weight)
        self.bias=torch.from_numpy(self.bias)
      
        output_val=torch.from_numpy(output_val)
        self.out=output_val
        exp_out=torch.exp(output_val)
        return exp_out/torch.sum(exp_out)
    
    def backward_prop(self,d_L_dout,learning_rate):
        for i, gradient in enumerate(d_L_dout):
            if gradient==0:
                continue
            
    #out(c)=e^tc/summation(e^ti)
    #   where S=summation(e^ti)
        t_exp=torch.exp(self.out)
        #SUM OF ALL e^totals
        S=torch.sum(t_exp)
        #gradients of output[i] against totals
        dout_dt=-t_exp[i]*t_exp/(S**2)
        dout_dt[i]=t_exp[i]*(S - t_exp[i]) / (S ** 2)
        
        #gradients of totals against weights,biases, input
        dt_dw=self.modified_input
        dt_db=1
        dt_dinput=self.weight
        
        #gradients of loss against totals
        dL_dt=gradient*dout_dt
        
        #gradients of loss against weights, biases and input
#         dt_dw=torch.unsqueeze(dt_dw,dim=0)
#         dl_dt=torch.unsqueeze(dl_dt,dim=0)
        dl_dw=torch.matmul(dt_dw.unsqueeze(0).t(),dL_dt.unsqueeze(0))
        dl_db=torch.mul(dL_dt,dt_db)
        dl_dinput=torch.matmul(dt_dinput,dL_dt)
        
        #update weights biases
        
        self.weight-=torch.mul(learning_rate,dl_dw)
        self.bias-=torch.mul(learning_rate,dl_db)
        
        # return dl_dinput.reshape(self.last_input_shape)
        return torch.reshape(dl_dinput,self.last_input_shape)

In [None]:
from keras.datasets import mnist
(X_train, y_train), (X_test, y_test) = mnist.load_data()
train_images = X_train[:1500]
train_labels = y_train[:1500]
test_images = X_test[:1500]
test_labels = y_test[:1500]
test_images=torch.tensor(test_images)
test_lables=torch.tensor(test_labels)

conv=Conv_operation(8,3,1,1)
pool=Maxpooling(2)
softmax=Softmax(14*14*8,10)

def cnn_forward_prop(image,label):
    out= conv.forward_prop((image/255) -0.5)
    out=torch.tensor(out)
    out=pool.forward_prop(out)
    out=softmax.forward_prop(out)
    
    #calculate cross entropy loss and accuracy
    loss=-torch.log(out[label])
    acc=torch.where(torch.argmax(out)==label,1,0)
    
#     out_p=torch.argmax(out_p)
#     label=label.float()
#     out_p=out_p.float()*(label>0).float()
#     accuracy_eval=out_p*(out_p==label).float()
    
#     accuracy_eval=1 if torch.argmax(out_p)==label else 0
#     correct+=(out_p==label).sum().item()
#     accuracy_eval=100*correct/total
    
    return out,loss,acc
def train_cnn(image,label,learning_rate=0.000000005):
    #forward
    out,loss,acc=cnn_forward_prop(image,label)
    #calculate initial gradient
    gradient=torch.zeros(10)
    gradient[label]=-1/out[label]
    
    #backward
    grad_back=softmax.backward_prop(gradient,learning_rate)
    grad_back=pool.backward_prop(grad_back)
    grad_back=conv.backward_prop(grad_back,learning_rate)
    return loss,acc

In [None]:
for epoch1 in range(2):
  print('Epoch %d ->'% (epoch1 +1))
  

  # shuffle the training data
  shuffle_data = torch.randperm(len(train_images))
  train_images = train_images[shuffle_data]
  train_labels = train_labels[shuffle_data]
  train_images=torch.tensor(train_images)
  test_images=torch.tensor(test_images)

  

  #training the CNN
  loss = 0.0
  num_correct = 0

  for i, (im, label) in enumerate(zip(train_images, train_labels)):
    if i % 100 == 0:
      print('%d steps out of 100 steps: Average Loss %.3f and Accuracy: %d%%' %(i+1, loss/100, num_correct))
      loss = 0
      num_correct = 0

    l1, acc = train_cnn(im, label)
    loss += l1

    num_correct +=acc

Epoch 1 ->
1 steps out of 100 steps: Average Loss 0.000 and Accuracy: 0%


  # Remove the CWD from sys.path while we load stuff.
  app.launch_new_instance()


101 steps out of 100 steps: Average Loss 2.300 and Accuracy: 14%
201 steps out of 100 steps: Average Loss 2.306 and Accuracy: 6%
301 steps out of 100 steps: Average Loss 2.307 and Accuracy: 8%
401 steps out of 100 steps: Average Loss 2.300 and Accuracy: 11%
501 steps out of 100 steps: Average Loss 2.297 and Accuracy: 12%
601 steps out of 100 steps: Average Loss 2.294 and Accuracy: 12%
701 steps out of 100 steps: Average Loss 2.297 and Accuracy: 12%
801 steps out of 100 steps: Average Loss 2.299 and Accuracy: 11%
901 steps out of 100 steps: Average Loss 2.304 and Accuracy: 9%
1001 steps out of 100 steps: Average Loss 2.291 and Accuracy: 10%
1101 steps out of 100 steps: Average Loss 2.297 and Accuracy: 9%
1201 steps out of 100 steps: Average Loss 2.302 and Accuracy: 9%
1301 steps out of 100 steps: Average Loss 2.304 and Accuracy: 9%
1401 steps out of 100 steps: Average Loss 2.300 and Accuracy: 7%
Epoch 2 ->
1 steps out of 100 steps: Average Loss 0.000 and Accuracy: 0%


  if __name__ == '__main__':


101 steps out of 100 steps: Average Loss 2.303 and Accuracy: 10%
201 steps out of 100 steps: Average Loss 2.303 and Accuracy: 12%
301 steps out of 100 steps: Average Loss 2.296 and Accuracy: 13%
401 steps out of 100 steps: Average Loss 2.295 and Accuracy: 12%
501 steps out of 100 steps: Average Loss 2.297 and Accuracy: 11%
601 steps out of 100 steps: Average Loss 2.311 and Accuracy: 7%
701 steps out of 100 steps: Average Loss 2.303 and Accuracy: 10%
801 steps out of 100 steps: Average Loss 2.292 and Accuracy: 11%
901 steps out of 100 steps: Average Loss 2.300 and Accuracy: 8%
1001 steps out of 100 steps: Average Loss 2.307 and Accuracy: 8%
1101 steps out of 100 steps: Average Loss 2.299 and Accuracy: 14%
1201 steps out of 100 steps: Average Loss 2.294 and Accuracy: 11%
1301 steps out of 100 steps: Average Loss 2.300 and Accuracy: 9%
1401 steps out of 100 steps: Average Loss 2.300 and Accuracy: 4%


In [None]:
print('\n--- Testing the CNN ---')
loss = 0
num_correct = 0
for im, label in zip(test_images, test_labels):
  _, l, acc = cnn_forward_prop(im, label)
  loss += l
  num_correct += acc

num_tests = len(test_images)
print('Test Loss:', loss / num_tests)
print('Test Accuracy:', num_correct / num_tests)


--- Testing the CNN ---


  app.launch_new_instance()


Test Loss: tensor(17.8416)
Test Accuracy: tensor(0.0960)
