In [1]:
import numpy as np

In [28]:
class ConvolutionLayers:
  def __init__(self,kernel_num,kernel_size,stride = 1,bias = 1):

    self.no_of_kernels = kernel_num
    self.kernel_size = kernel_size
    self.stride = stride
    self.bias = bias
    self.kernels = np.random.randn(kernel_num,kernel_size,kernel_size) / kernel_size**2
  def partition_generator(self,input_img):
    img_h,img_w = input_img.shape
    self.img = input_img
    for h in range(0,img_h-self.kernel_size+1,self.stride):
      for w in range(0,img_w - self.kernel_size+1,self.stride):
        slice = input_img[h:(h+self.kernel_size),w:(w+self.kernel_size)]
        yield slice,h,w
  def forward_prop(self,img):
    img_h,img_w = img.shape
    conv_output = np.zeros((img_h-self.kernel_size+1,img_w-self.kernel_size+1,self.no_of_kernels))
    for sec,h,w in self.partition_generator(img):
      conv_output[h,w] = np.sum(sec*self.kernels,axis = (1,2))
    return conv_output
  def backward_prop(self,dL_dZ,learning_rate):
    dL_dk = np.zeros(self.kernels.shape)
    for sec,h,w in self.partition_generator(self.img):
      for f in range(self.no_of_kernels):
        dL_dk += sec*dL_dZ[h,w,f]
    self.kernels -= learning_rate*dL_dk
    return dL_dk

In [29]:
class MaxPool:
  def __init__(self,pool_size,stride=2):
    self.pool_size = pool_size
    self.stride = stride

  def partition_generator(self,img):
    img_h,img_w = img.shape[0]//self.pool_size,img.shape[1]//self.pool_size
    self.img = img
    for h in range(0,img_h,self.stride):
      for w in range(0,img_w,self.stride):
        slice = img[(h*self.pool_size):(h*self.pool_size+self.pool_size),(w*self.pool_size):(w*self.pool_size+self.pool_size)]
        yield slice,h,w
  def forward_prop(self,img):
    h,w,num_kernels = img.shape
    max_pooled_out = np.zeros((h//self.pool_size,w//self.pool_size,num_kernels))
    for slice,h,w in self.partition_generator(img):
      max_pooled_out[h,w] = np.amax(slice,axis = (0,1))
    return max_pooled_out

  def backward_prop(self,dL_dZ):
    dL_dk = np.zeros(self.img.shape)
    for patch,h,w in self.partition_generator(self.img):
      h0, w0, num_kernels = patch.shape
      max_val = np.amax(patch, axis=(0,1))
      for idx_h in range(h0):
        for idx_w in range(w0):
          for idx_k in range(num_kernels):
            if patch[idx_h,idx_w,idx_k] == max_val[idx_k]:
              dL_dk[h*self.pool_size+idx_h, w*self.pool_size+idx_w, idx_k] = dL_dZ[h,w,idx_k]
    return dL_dk

In [30]:
class FullyCon_SoftmaxLayer:
    def __init__(self, input_units, output_units):
        self.weight = np.random.randn(input_units, output_units)/input_units
        self.bias = np.zeros(output_units)
        self.output = None

    def _dense_layer(self,image):
        self.original_shape = image.shape
        image_flattened = image.flatten()
        self.flattened_input = image_flattened
        dense_output = np.dot(image_flattened, self.weight) + self.bias
        self.output = dense_output
        return dense_output

    def _softmax_out(self,dense_out):
        softmax_output = np.exp(dense_out) / np.sum(np.exp(dense_out), axis=0)
        return softmax_output

    def forward_prop(self, image):
        dense_out = self._dense_layer(image)
        softmax_output = self._softmax_out(dense_out)
        return softmax_output

    def backward_prop(self, dL_dz, lr):
        for i, gradient in enumerate(dL_dz):
            if gradient == 0:
                continue
            transformation_eq = np.exp(self.output)
            S_total = np.sum(transformation_eq)

            dY_dZ = -transformation_eq[i]*transformation_eq / (S_total**2)
            dY_dZ[i] = transformation_eq[i]*(S_total - transformation_eq[i]) / (S_total**2)

            dZ_dw = self.flattened_input
            dZ_db = 1
            dZ_dX = self.weight

            dE_dZ = gradient * dY_dZ
            dE_dw = dZ_dw[np.newaxis].T @ dE_dZ[np.newaxis]
            dE_db = dE_dZ * dZ_db
            dE_dX = dZ_dX @ dE_dZ

            self.weight -= lr*dE_dw
            self.bias -= lr*dE_db

            return dE_dX.reshape(self.original_shape)

In [45]:
class Train_Pred_Model:
    def __init__(self,epochs = 10,img = None,labels = None,layers = None,isMnist = True,train = True):
      self.isMnist = isMnist
      self.img = img
      self.labels = labels
      self.layers = layers
      self.epochs = epochs
      
      if self.isMnist and train:
        self._Mnist_Exec()
      elif train:
        self._Normal_Exec()

    def _Mnist_Exec(self):
      (X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data()
      X_train = X_train[:5000]
      y_train = y_train[:5000]

      layers = [
          ConvolutionLayers(16,3),
          MaxPool(2),
          FullyCon_SoftmaxLayer(13*13*16, 10)
          ]

      for epoch in range(self.epochs):
          print('Epoch {} ->'.format(epoch+1))

          permutation = np.random.permutation(len(X_train))
          X_train = X_train[permutation]
          y_train = y_train[permutation]

          loss = 0
          accuracy = 0
          for i, (image, label) in enumerate(zip(X_train, y_train)):
            if i % 100 == 0:
              print("Step {}. For the last 100 steps: average loss {}, accuracy {}".format(i+1, loss/100, accuracy))
              loss = 0
              accuracy = 0
            loss_1, accuracy_1 = self.train(image, label, layers)
            loss += loss_1
            accuracy += accuracy_1
      return layers
    
    def _Normal_Exec(self): 
      for i in range(self.epochs):
        loss = 0
        accuracy = 0
        for i, (image, label) in enumerate(zip(self.imgs, self.labels)):
            if i % 100 == 0:
                print("Step {}. For the last 100 steps: average loss {}, accuracy {}".format(i+1, loss/100, accuracy))
                loss = 0
                accuracy = 0
            loss_1, accuracy_1 = self.train(image, label, self.layers)
            loss += loss_1
            accuracy += accuracy_1
        print(f"Epoch {i}--> loss: {loss}  accuracy: {accuracy}") 
      return self.layers

    def _forward(self,img,label,layers):
        output = img/255.
        for layer in layers:
          output = layer.forward_prop(output)
        loss = -np.log(output[label])
        acc = 1 if np.argmax(output) == label else 0
        return output,loss,acc
    
    def _backprop(self,gradients,layers,lr = 0.01):
        grad = gradients
        for layer in layers[::-1]:
          if type(layer) in [ConvolutionLayers,FullyCon_SoftmaxLayer]:
            grad = layer.backward_prop(grad,lr)
          else:
            grad = layer.backward_prop(grad)
        return grad
    def train(self,img,label,layers,lr=0.05):
        output,loss,acc = self._forward(img,label,layers)
        gradient = np.zeros(10)
        gradient[label] = -1/output[label]
        grad = self._backprop(gradient,layers,lr)
        return loss,acc

    def predict(self,img,layers):
        output = img/255.
        for layer in layers:
          output = layer.forward_prop(output)
        return output

In [49]:
# import tensorflow as tf
# Model = Train_Pred_Model(30,train = True)



Epoch 1 ->
Step 1. For the last 100 steps: average loss 0.0, accuracy 0
Step 101. For the last 100 steps: average loss 2.6287586402564234, accuracy 22
Step 201. For the last 100 steps: average loss 1.9588065311567378, accuracy 42
Step 301. For the last 100 steps: average loss 2.412216035322477, accuracy 37
Step 401. For the last 100 steps: average loss 4.229739562509493, accuracy 33
Step 501. For the last 100 steps: average loss 22.616304622931967, accuracy 22


  dY_dZ = -transformation_eq[i]*transformation_eq / (S_total**2)
  dY_dZ[i] = transformation_eq[i]*(S_total - transformation_eq[i]) / (S_total**2)
  dY_dZ = -transformation_eq[i]*transformation_eq / (S_total**2)
  dY_dZ = -transformation_eq[i]*transformation_eq / (S_total**2)
  dY_dZ[i] = transformation_eq[i]*(S_total - transformation_eq[i]) / (S_total**2)
  dY_dZ[i] = transformation_eq[i]*(S_total - transformation_eq[i]) / (S_total**2)


Step 601. For the last 100 steps: average loss nan, accuracy 13
Step 701. For the last 100 steps: average loss nan, accuracy 5
Step 801. For the last 100 steps: average loss nan, accuracy 9
Step 901. For the last 100 steps: average loss nan, accuracy 14
Step 1001. For the last 100 steps: average loss nan, accuracy 13
Step 1101. For the last 100 steps: average loss nan, accuracy 10
Step 1201. For the last 100 steps: average loss nan, accuracy 11
Step 1301. For the last 100 steps: average loss nan, accuracy 7
Step 1401. For the last 100 steps: average loss nan, accuracy 14
Step 1501. For the last 100 steps: average loss nan, accuracy 6
Step 1601. For the last 100 steps: average loss nan, accuracy 10
Step 1701. For the last 100 steps: average loss nan, accuracy 7
Step 1801. For the last 100 steps: average loss nan, accuracy 10
Step 1901. For the last 100 steps: average loss nan, accuracy 11
Step 2001. For the last 100 steps: average loss nan, accuracy 4
Step 2101. For the last 100 steps: 

In [83]:
print(y_test[9])
img = X_test[9]
layers = [
    ConvolutionLayers(16,3),
    MaxPool(2),
    FullyCon_SoftmaxLayer(13*13*16, 10)
    ]
pre = Train_Pred_Model(train = False)
pred = pre.predict(img,layers)
print(pred)
print(np.argmax(pred))

9
[0.09992258 0.09981661 0.09997726 0.09999994 0.09994578 0.09989672
 0.10009112 0.09989858 0.10018644 0.10026496]
9


In [79]:
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data()

In [69]:
y_test[0]

7