**CREATE THE CONVOLUTION FUNTION**

In [None]:
import numpy as np

In [None]:
class CNN ():
  def __init__(self,s_i,n_s,s_k,stride=1):
    super().__init__()
    self.s_o = s_i[-1]- s_k+1
    self.stride = stride
    self.s_k = s_k
    self.n_k = n_s
    self.kernels = np.random.randn(n_s,s_i[0],s_k,s_k).astype(np.float32)/ n_s
    self.bias = np.random.randn(n_s).astype(np.float32) / n_s
  def image_segmentation(self,x):
    for i in range(0,self.s_o, self.stride):
      for j in range(0,self.s_o, self.stride):
        yield x[:,i:i+self.s_k,j:j+self.s_k],i,j
  def forward(self,x):

    conv = np.zeros(shape=(self.kernels.shape[0],self.s_o,self.s_o))
    for k,kernel in enumerate(self.kernels):
      for segment,i,j in self.image_segmentation(x):
        conv[k,i,j] = self.bias[k] + np.sum(segment*kernel)
    return conv
  def backward(self,x,dL_dc,lr):
    dL_Conv = np.zeros_like(self.kernels)
    dL_in_c = np.zeros_like(x)
    dL_bias = np.zeros_like(self.bias)
    for segment,i,j in self.image_segmentation(x):
      for k in range(0,self.s_k,1):
        dL_Conv[k] +=  segment * dL_dc[k,i,j]
        dL_in_c[:,i:i+ self.s_k,j:j+self.s_k] += self.kernels[k] * dL_dc[k,i,j]
        dL_bias[k] += dL_dc[k,i,j]
    self.bias -= lr* dL_bias
    self.kernels -= lr * dL_Conv
    return dL_in_c

In [None]:
CNNp = CNN((3,5,5), 6, 2)

In [None]:
CNNp.backward(np.random.randn(3,5,5),np.random.randn(6,4,4), 0.5).shape

(3, 5, 5)

# Definition of Maxpooling Layer

In [None]:
class MaxPooling():
  def __init__(self,s_i,k_s):
    self.k_s = k_s
    self.s_i = s_i
    self.s_o = s_i//2
  def segment(self,x):
    for i in range(0,self.s_i-self.k_s+1, self.k_s):
      for j in range(0,self.s_i-self.k_s+1, self.k_s):
        yield x[:,i:i+self.k_s,j:j+self.k_s], i, j
  def forward(self,x):
    pooled = np.zeros(shape =(x.shape[0],self.s_o,self.s_o))
    for ims,i,j in self.segment(x):
        for s in range(0,x.shape[0]):
            pooled[s,i//2,j//2] = np.max(ims[s])
    return pooled
  def backward(self,x,x_c,dL_dm):
    dM = np.zeros_like(x_c)
    for ims,i,j in self.segment(x_c):
      for s in range(0,x_c.shape[0]):
        max_value  = np.max(ims[s])
        max_positions = np.where(ims[s] == max_value)
        for i1, j1 in zip(max_positions[0], max_positions[1]):
          dM[s,i+i1,j+j1] = dL_dm[s,i//2,j//2]
    return dM

**Normal layer**

In [None]:
class Linear():
   def __init__(self,input_n,output_n):
     self.bias = np.random.randn(1,output_n).astype(np.float32) / input_n
     self.weighs = np.random.randn(input_n,output_n).astype(np.float32) / input_n

   def Sigmoid(self,x):
       return 1 / ( 1 + np.exp(-x))
   def DSigmoid(self,x):
       x = self.Sigmoid(x)
       return x *( 1 - x)
   def forward(self, x ):
       return np.dot(x,self.weighs) + self.bias
   def backward(self,x, dL, lr):
       d_w = np.dot(x.T,dL)
       d_b = 1 * dL
       d_x = np.dot(dL,self.weighs.T)
       self.weighs -= lr * d_w
       self.bias -= lr * d_b

       return d_x.reshape(x.shape)

**Classification Funtion**

In [None]:
class Softmax():
    def __init__(self):
        self.b_layer = None
    def forward(self,x):
      exp = np.exp(x - np.max(x,axis = 1,keepdims = True))
      return np.exp(x) / np.sum(np.exp(x),axis = 1,keepdims = True)

    def backward(self,DL):
        for i,gradient in enumerate(DL):
            if gradient == 0:
                continue
            t_e = np.exp(self.b_layer[:])
            S = np.sum(t_e, axis=1)
            dL_s = - t_e[:,i] * t_e / (S**2)
            dL_s[:,i] = t_e[:,i] * (S - t_e[:,i])/ (S**2)
            return dL_s * gradient


**Architecture definition**

In [None]:
class Net():
    def __init__(self,n_classes,lr):
        self.n_classes = n_classes
        self.lr = lr
        self.Conv1 = CNN((1,28,28),3,2)
        self.MaxPooling1 = MaxPooling(27,2)

        self.Conv2 = CNN((3,13,13),6,2)
        self.MaxPooling2 = MaxPooling(12,2)

        self.before_flatting_shape = None
        self.Linear1 = Linear(6*6**2,5)
        self.Linear2 = Linear(5,n_classes)
        self.Softmax = Softmax()
    def forward(self,x):
        self.conv_p = [x]
        self.max_p = []
        x = self.Conv1.forward(x)
        self.conv_p.append(x)
        x = self.MaxPooling1.forward(x)
        self.max_p.append(x)
        x = self.Conv2.forward(x)
        self.conv_p.append(x)
        x = self.MaxPooling2.forward(x)
        self.max_p.append(x)

        self.before_flatting_shape = x.shape
        x = x.flatten()
        self.linear_p = [x[np.newaxis,:]]
        x = self.Linear1.forward(x)
        self.linear_p.append(x)
        x = self.Linear1.Sigmoid(x)
        self.linear_p.append(x)
        x = self.Linear2.forward(x)
        self.Softmax.b_layer = x
        x = self.Softmax.forward(x)
        return x
    def backward(self,x,y):
        y_pred = self.forward(x)

        LOSS = -np.log(np.take(y_pred,y))


        dL = np.zeros(self.n_classes)
        dL[y] =  -1/np.take(y_pred,y)
        dL_dout = self.Softmax.backward(dL)
        dL_linear2 = self.Linear2.backward(self.linear_p[-1], dL_dout ,self.lr)
        dL_sigmoid = self.Linear1.DSigmoid(self.linear_p[-2])* dL_linear2
        dL_linear1 = self.Linear1.backward(self.linear_p[-3],dL_sigmoid,self.lr)
        dL_linear1 = dL_linear1.reshape(self.before_flatting_shape)

        dL_Maxpool2 = self.MaxPooling2.backward(self.max_p[-1],self.conv_p[-1],dL_linear1)
        dL_Conv2 = self.Conv2.backward(self.max_p[-2], dL_Maxpool2 , self.lr)

        dL_Maxpool1 = self.MaxPooling1.backward(self.max_p[-2],self.conv_p[-2],dL_Conv2)
        dL_Conv1 = self.Conv1.backward(self.conv_p[0], dL_Maxpool1 , self.lr)



**LOAD DATASET**

In [None]:
! pip install -q kaggle

In [None]:
! cp kaggle.json ~/.kaggle/

cp: cannot create regular file '/root/.kaggle/': Not a directory


In [None]:
!kaggle datasets download -d hojjatk/mnist-dataset

Dataset URL: https://www.kaggle.com/datasets/hojjatk/mnist-dataset
License(s): copyright-authors
Downloading mnist-dataset.zip to /content
 91% 20.0M/22.0M [00:00<00:00, 106MB/s] 
100% 22.0M/22.0M [00:00<00:00, 106MB/s]


In [None]:
!unzip mnist-dataset.zip

Archive:  mnist-dataset.zip
  inflating: t10k-images-idx3-ubyte/t10k-images-idx3-ubyte  
  inflating: t10k-images.idx3-ubyte  
  inflating: t10k-labels-idx1-ubyte/t10k-labels-idx1-ubyte  
  inflating: t10k-labels.idx1-ubyte  
  inflating: train-images-idx3-ubyte/train-images-idx3-ubyte  
  inflating: train-images.idx3-ubyte  
  inflating: train-labels-idx1-ubyte/train-labels-idx1-ubyte  
  inflating: train-labels.idx1-ubyte  


In [None]:
import os

In [None]:
from mlxtend.data import loadlocal_mnist

In [None]:
training_images_filepath = '/content/train-images.idx3-ubyte'
training_labels_filepath = '/content/train-labels.idx1-ubyte'
test_images_filepath = '/content/t10k-images.idx3-ubyte'
test_labels_filepath = '/content/t10k-labels.idx1-ubyte'

In [None]:
X_train, y_train = loadlocal_mnist( training_images_filepath, training_labels_filepath)
X_test, y_test = loadlocal_mnist(test_images_filepath, test_labels_filepath)

In [None]:
X_train = X_train.reshape(60000,1, 28, 28).astype(np.float32)
X_test = X_test.reshape(10000,1, 28, 28).astype(np.float32)

# Model Training

In [None]:
from google.colab.patches import cv2_imshow

In [None]:
np.max(X_train[0])

255.0

In [None]:
modelo1 = Net(10,0.0005)

In [None]:
epochs = 10
idn = 0
for i in range(0,epochs,1):
  print(f'===== EPOCH ===== {i}')
  for x,y in zip(X_train,y_train):
    idn += 1

    modelo1.backward((x/255)-0.5,y)

    if idn%1000 == 0:
      idx_v = set()
      c = 0
      for _ in range(100):
        while True:
          T =np.random.randint(0,len(X_test))
          if T not in idx_v:
            break
        idx_v.add(T)
        probs = modelo1.forward((X_test[T]/255) -0.5)
        probs = [prob for prob in probs[0]]
        if y_test[T] == probs.index(max(probs)):
          c += 1
      print(f'Acertados de 100: {c}')
  idn = 0

===== EPOCH ===== 0
Acertados de 100: 5
Acertados de 100: 9
Acertados de 100: 8
Acertados de 100: 7
Acertados de 100: 9
Acertados de 100: 12
Acertados de 100: 10
Acertados de 100: 8
Acertados de 100: 13
Acertados de 100: 11
Acertados de 100: 11
Acertados de 100: 12
Acertados de 100: 12
Acertados de 100: 11
Acertados de 100: 13
Acertados de 100: 17
Acertados de 100: 23
Acertados de 100: 25
Acertados de 100: 11
Acertados de 100: 14
Acertados de 100: 13
Acertados de 100: 16
Acertados de 100: 15
Acertados de 100: 11
Acertados de 100: 17
Acertados de 100: 9
Acertados de 100: 17
Acertados de 100: 16
Acertados de 100: 14
Acertados de 100: 14
Acertados de 100: 20
Acertados de 100: 19
Acertados de 100: 12
Acertados de 100: 21
Acertados de 100: 20
Acertados de 100: 24
Acertados de 100: 27
Acertados de 100: 23
Acertados de 100: 29
Acertados de 100: 37
Acertados de 100: 35
Acertados de 100: 43
Acertados de 100: 36
Acertados de 100: 36
Acertados de 100: 36
Acertados de 100: 42
Acertados de 100: 37


**Test each example by hand**




In [None]:
T =np.random.randint(0,len(X_train))
cv2_imshow(X_train[T][0])
x = modelo1.forward(X_train[T])
print(np.max(x[0]))
prob = []
print(x.shape)
for i in x[0]:
  prob.append(i)
print(y_train[T],prob)