In [1]:
# CNN from scratch

In [2]:
import numpy as np
import pandas as pd
from tqdm import tqdm
from keras.datasets import mnist

In [3]:
(X_train,y_train),(X_test,y_test) = mnist.load_data()

In [4]:
X_train.shape,y_train.shape

((60000, 28, 28), (60000,))

In [5]:
X_train = X_train[:200]

In [6]:
y_train = y_train[:200]

In [7]:
X_train = X_train.T
y_train.resize((1,200))

In [8]:
y_train.shape

(1, 200)

In [9]:
# Checking all unique values
pd.Series(y_train[0,:]).value_counts()

1    26
9    23
7    21
4    21
3    21
0    21
2    20
6    19
8    15
5    13
dtype: int64

In [10]:
# Converting the problem to binary classification
for i in range(y_train.shape[1]):
  y_train[0,i] = y_train[0,i]>4

pd.Series(y_train[0]).value_counts()

0    109
1     91
dtype: int64

In [11]:
# Initializing filters
filter = np.random.uniform(size=(3,5,5))


In [12]:
filter = filter.T

In [13]:
X_train.shape,y_train.shape,filter.shape

((28, 28, 200), (1, 200), (5, 5, 3))

In [14]:

def convolution(filter,X): #X is one image  #stride= 1 and no zero-padding
  conv_Image=[]
  xdim,ydim = filter.shape
  for k in range(X.shape[2]):
    new_Image=[]

    for i in range(X.shape[0]-xdim+1):
      val = []

      for j in range(X.shape[1]-ydim+1):
        a = np.sum(X[i:i+xdim,j:j+ydim,k]*filter)
        val.append(a)

      new_Image.append(val)
    conv_Image.append(new_Image)

  return np.array(conv_Image)

In [15]:
f = np.array([[1,0,1],[0,1,0],[1,0,1]])

In [16]:
res=np.array(convolution(f,X_train))

In [17]:
res = res.T

In [18]:
res.shape

(26, 26, 200)

In [19]:
def convolution3D(filter,X_train):
  convRes = []
  for i in range(filter.shape[2]):
    f = filter[:,:,0].reshape((filter.shape[0],filter.shape[1]))
    convRes.append(convolution(f,X_train))
  convRes=np.array(convRes)
  convRes = np.moveaxis(np.moveaxis(convRes,0,-1),0,-1)
  return convRes

In [20]:
convRes = convolution3D(filter,X_train)

In [21]:
convRes.shape

(24, 24, 3, 200)

In [22]:
def sigmoid(x): #activation function
  return 1/(1+np.exp(-x))

def sigmoidDerivative(x):
  return sigmoid(x)*(1-sigmoid(x))

In [23]:
convRes = convolution3D(filter,X_train) 

In [24]:
convRes.shape

(24, 24, 3, 200)

In [25]:
finalConvRes = []
for i in range(convRes.shape[3]):
  output = convRes[:,:,:,i]
  out = []
  for j in range(output.shape[2]):
    out.append(output[:,:,j].ravel())
  finalConvRes.append(out)
  

In [26]:
finalConvRes = np.array(finalConvRes)

In [27]:
sigmoidConvRes = sigmoid(finalConvRes)

In [28]:
sigmoidConvRes.shape

(200, 3, 576)

In [29]:
finalSigmoidConvRes = sigmoidConvRes.reshape((sigmoidConvRes.shape[0],
                                              sigmoidConvRes.shape[1]*sigmoidConvRes.shape[2])).T

In [39]:
A = finalSigmoidConvRes

In [30]:
finalSigmoidConvRes.shape

(1728, 200)

In [31]:
# Giving  finalSigmoidConvRes as input to neural network. 
# Let the depth of neural network be one , like simple linear regression model

# Adding bias to input layer
bias = np.ones((1,finalSigmoidConvRes.shape[1]))
finalSigmoidConvRes = np.append(bias,finalSigmoidConvRes,axis=0).T

In [32]:
finalSigmoidConvRes.shape

(200, 1729)

In [33]:
# Initializing weight matrix

inputLayer,outputLayer = finalSigmoidConvRes.shape[1],1

w = np.random.uniform(size=(inputLayer,outputLayer))

In [34]:
w.shape

(1729, 1)

In [36]:
#output layer for all datasets
Z2 = np.dot(finalSigmoidConvRes,w)
output = sigmoid(Z2)

In [37]:
def forwardProgation():
  return sigmoid(np.dot(finalSigmoidConvRes,w))

In [42]:
# summary of outputs

Z1 = finalConvRes
A1 = finalSigmoidConvRes
Z2 = np.dot(finalSigmoidConvRes,w)
output = sigmoid(Z2)

FORWARD PROPAGATION IS DONE, NOW WE MOVE TO PROPAGATION

PERFORMING BACKPROPAGATION TO ADJUST FILTER WEIGHTS

In [45]:
y_train.shape,output.shape

((1, 200), (200, 1))

In [46]:
# Error
error = np.square(y_train.T-output)/2



In [47]:
error.shape

(200, 1)

In [50]:
# Error gradient w.r.t output
error_wrt_output = -(y_train.T-output)


In [51]:
error_wrt_output.shape

(200, 1)

In [52]:
Z2.shape

(200, 1)

In [53]:
# output gradient w.r.t Z2
output_wrt_z2 = output*(1-output)


In [56]:
output_wrt_z2.shape

(200, 1)

In [58]:

# Z2 gradient w.r.t A1
z2_wrt_a1 = w


In [59]:
z2_wrt_a1.shape

(1729, 1)

In [61]:
Z1.shape,A1.shape

((200, 3, 576), (200, 1729))

In [62]:
a1_wrt_z1 = A1*(1-A1)

In [63]:
a1_wrt_z1.shape

(200, 1729)

In [64]:
z1_wrt_f = X_train

In [65]:
z1_wrt_f.shape

(28, 28, 200)

In [66]:
f.shape

(3, 3)

In [69]:
error_wrt_output.shape,output_wrt_z2.shape,z2_wrt_a1.shape,a1_wrt_z1.shape,z1_wrt_f.shape

((200, 1), (200, 1), (1729, 1), (200, 1729), (28, 28, 200))

These is some confusion in back propagation hence it is not continued till it is cleared

In [None]:
# Gradient descent Algorithm
# dE_df = f - (learning_rate)*(dE/df)