In [1]:
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
import numpy as np
import requests
import json

In [3]:

try:
    from tqdm import tqdm
except ImportError:
    tqdm = lambda x, total, unit: x  # If tqdm doesn't exist, replace it with a function that does nothing
    print('**** Could not import tqdm. Please install tqdm for download progressbars! (pip install tqdm) ****')

download_dict = {
    '1) Kuzushiji-MNIST (10 classes, 28x28, 70k examples)': {
        '1) MNIST data format (ubyte.gz)':
            ['http://codh.rois.ac.jp/kmnist/dataset/kmnist/train-images-idx3-ubyte.gz',
             'http://codh.rois.ac.jp/kmnist/dataset/kmnist/train-labels-idx1-ubyte.gz',
             'http://codh.rois.ac.jp/kmnist/dataset/kmnist/t10k-images-idx3-ubyte.gz',
             'http://codh.rois.ac.jp/kmnist/dataset/kmnist/t10k-labels-idx1-ubyte.gz'],
        '2) NumPy data format (.npz)':
            ['http://codh.rois.ac.jp/kmnist/dataset/kmnist/kmnist-train-imgs.npz',
             'http://codh.rois.ac.jp/kmnist/dataset/kmnist/kmnist-train-labels.npz',
             'http://codh.rois.ac.jp/kmnist/dataset/kmnist/kmnist-test-imgs.npz',
             'http://codh.rois.ac.jp/kmnist/dataset/kmnist/kmnist-test-labels.npz'],
    }
}

# Download a list of files
def download_list(url_list):
    for url in url_list:
        path = url.split('/')[-1]
        r = requests.get(url, stream=True)
        with open(path, 'wb') as f:
            total_length = int(r.headers.get('content-length'))
            print('Downloading {} - {:.1f} MB'.format(path, (total_length / 1024000)))

            for chunk in tqdm(r.iter_content(chunk_size=1024), total=int(total_length / 1024) + 1, unit="KB"):
                if chunk:
                    f.write(chunk)
    print('All dataset files downloaded!')

def traverse_dict(d):
    if isinstance(d, list):  # If we've hit a list of downloads, download that list
        download_list(d)
    else:
        selected = list(d.keys())[0]  # Select the first option by default
        traverse_dict(d[selected])     # Repeat with the next level

traverse_dict(download_dict['1) Kuzushiji-MNIST (10 classes, 28x28, 70k examples)']['2) NumPy data format (.npz)'])


Downloading kmnist-train-imgs.npz - 18.0 MB


100%|██████████| 17954/17954 [00:15<00:00, 1175.53KB/s]


Downloading kmnist-train-labels.npz - 0.0 MB


100%|██████████| 30/30 [00:00<00:00, 207.55KB/s]


Downloading kmnist-test-imgs.npz - 3.0 MB


100%|██████████| 3008/3008 [00:03<00:00, 925.68KB/s] 


Downloading kmnist-test-labels.npz - 0.0 MB


100%|██████████| 6/6 [00:00<00:00, 3811.27KB/s]

All dataset files downloaded!





In [4]:
X_train = np.load('kmnist-train-imgs.npz')['arr_0']
y_train = np.load('kmnist-train-labels.npz')['arr_0']

X_test = np.load('kmnist-test-imgs.npz')['arr_0']
y_test = np.load('kmnist-test-labels.npz')['arr_0']

print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)

(60000, 28, 28)
(60000,)
(10000, 28, 28)
(10000,)


In [5]:
X_train = X_train / 255
X_test = X_test / 255

permutation = np.random.permutation(len(X_train))
X_train = X_train[permutation]
y_train = y_train[permutation]

permutation = np.random.permutation(len(X_test))
X_test = X_test[permutation]
y_test = y_test[permutation]

In [6]:
# Range of pixel value --> [-0.5 , 0.5]
X_train -= 0.5
X_test -= 0.5

#Architecture:

1. **Input Layer**:
   - The input layer is responsible for passing the input data to the subsequent layers.


2. **Convolutional Layer**:
   - This layer performs convolution operations on the input data using learnable filters (kernels).

3. **ReLU Layer (Rectified Linear Unit)**:
   - The ReLU layer introduces non-linearity into the network by applying the ReLU activation function to the feature maps.

4. **Pooling Layer**:
   - The pooling layer reduces the spatial dimensions of the feature maps generated by the convolutional layer.

5. **Reshaping Layer**:
   - The reshaping layer reshapes the output of the preceding layers into a format suitable for feeding into fully connected layers.

6. **Fully Connected (Linear) Layers**:
   - These layers consist of neurons that are fully connected to all neurons in the previous layer.
   
7. **Softmax Layer**:
   - It computes the probabilities of each class given the input and ensures that the sum of these probabilities is 1.

8. **Loss Function (Cross Entropy)**:
   - The cross-entropy loss function is used to measure the difference between the predicted probability distribution and the actual distribution (one-hot encoded labels).

9. **Accuracy Calculation**:
    - The accuracy module calculates the accuracy of the model predictions by comparing the predicted class labels with the true class labels.

10. **Training Loop**:
    - The training loop runs for multiple epochs, where each epoch consists of iterations over batches of training data. In each iteration, forward pass, backward pass (backpropagation), and optimization (applying SGD) are performed to update the parameters of the network.
    - Learning rate adjustments based on performance thresholds are also implemented to improve convergence and accuracy.


In [7]:
class Neural_Network:

    def __init__(self, Network):
        self.Network = Network

    # forward pass
    def forward_pass(self, X):
        n = X
        for i in self.Network:
            n = i.forward_pass(n,saved_weights = None)
        return n

    #backward pass
    def backprop(self, Y):
        m = Y
        for i in (reversed(self.Network)):
            m = i.backprop(m)

    # applying sgd
    def applying_sgd(self):
        for i in self.Network:
            i.applying_sgd()

    # applying adam
    def applying_adam(self):
        for i in self.Network:
            i.applying_adam()

    # changing alpha
    def change_alpha(self):
        for i in self.Network:
            i.change_alpha()

    # saving weights
    def saving_params(self):
        for i,layer in enumerate(self.Network):
            layer.saving_params()

    # predicting after loading weights
    def predict(self,X):
        n = X
        for i in self.Network:
            n = i.forward_pass(n,saved_weights = 1)
        return n


# ADAM Optimizer

In [8]:
class Adam:
    def __init__(self, lr=0.001, beta1=0.9, beta2=0.999, epsilon=1e-8):
        self.lr = lr
        self.beta1 = beta1
        self.beta2 = beta2
        self.epsilon = epsilon
        self.m = None
        self.v = None
        self.t = 0

    def update(self, grads):
        if self.m is None:
            self.m = np.zeros_like(grads)
            self.v = np.zeros_like(grads)

        self.t += 1
        self.m = self.beta1 * self.m + (1 - self.beta1) * grads
        self.v = self.beta2 * self.v + (1 - self.beta2) * (grads ** 2)
        m_hat = self.m / (1 - self.beta1 ** self.t)
        v_hat = self.v / (1 - self.beta2 ** self.t)
        return self.lr * m_hat / (np.sqrt(v_hat) + self.epsilon)

#ACCURACY

In [9]:
class accuracy:
    def __init__(self):
        pass

    def value(self, out, Y):
        self.out = np.argmax(out, axis=1)
        return np.mean(self.out == Y)


#FLATTEN



*   Forward prop --> ( n , a , b , c ) --> ( n , a * b * c )



*   Backward Prop --> ( n , a * b * c ) --> ( n , a , b , c )



In [10]:
class reshaping:

    def __init__(self):
        pass

    def forward_pass(self, data, saved_weights = None):
        self.data_shape = data.shape

        self.flatten = data.reshape(self.data_shape[0], self.data_shape[1]*self.data_shape[2]*self.data_shape[3])
        print(self.flatten.shape)
        return self.flatten

    def backprop(self, prev_data):
        return prev_data.reshape(self.data_shape[0], self.data_shape[1], self.data_shape[2], self.data_shape[3])

    def applying_sgd(self):
        pass

    def change_alpha(self):
        pass

    def applying_adam(self):
        pass

    def saving_params(self):
        pass



#CROSS ENTROPY

L = - sum ( y_i * log(p_i) )

In [11]:
class cross_entropy:

    def _init_(self):
        pass

    def loss(self, A, Y):

        epsilon = 1e-15  # Small value to prevent division by zero

        # Compute cross-entropy loss
        m = Y.shape[0]  # Number of examples
        ce_loss = -np.sum(np.log(A[np.arange(m), Y] + epsilon)) / m

        return ce_loss

# SoftMax

1. forward_pass(x) :  Using formula. returns softmax_probs
2. backward_prop(actual_y) : returns gradient = softmax_probs - one_hot_encoder(actual)
3. expansion(actual_y) : returns one hot vector of actual_y

[Softmax Backpropogation](https://towardsdatascience.com/derivative-of-the-softmax-function-and-the-categorical-cross-entropy-loss-ffceefc081d1)

[Stable Softmax](https://eli.thegreenplace.net/2016/the-softmax-function-and-its-derivative/)

In [12]:
class softmax:

    def __init__(self):
        pass

    def expansion(self, actual_pred):
        d = actual_pred.shape[0]
        one_hot_pred = np.zeros((d,self.n_classes))
        for i in range(0,d):
            one_hot_pred[i,actual_pred[i]] = 1
        return one_hot_pred

    def forward_pass(self, z, saved_weights = None):

        self.n_classes = z.shape[1]
        # vectorised form below
        shiftx = z - np.max(z, axis=1, keepdims=True)

        # Exponentiate the shifted values
        exps = np.exp(shiftx)

        # Calculate softmax probabilities
        self.softmax_probs = exps / np.sum(exps, axis=1, keepdims=True)

        return self.softmax_probs

        # self.a = np.zeros_like(z)
        # """Compute the softmax of vector x in a numerically stable way."""
        # for i,x in enumerate(z):
        #   shiftx = x - np.max(x)
        #   exps = np.exp(shiftx)
        #   self.a[i] = exps / np.sum(exps)
        # return self.a

    def backprop(self, Y):
        y = self.expansion(Y)
        self.grad = (self.softmax_probs - y)
        return self.grad


    def applying_sgd(self):
        pass

    def change_alpha(self):
        pass

    def applying_adam(self):
        pass

    def saving_params(self):
        pass





#RELU



*   forward --> If element of input_data <= 0 then 0 else same. Returns output
*   backward --> Take input of forward pass. Replace positive values with 1 and rest 0. Then do elementwise multiplication Gradient of successive layer.



In [13]:

class relu:
    def __init__(self):
        pass

    def forward_pass(self, z, saved_weights = None):
        self.z = z
        return np.maximum(0, z)

    def derivative(self, a):
        return np.where(a > 0, 1, 0)

    def backprop(self, grad_previous):
        return grad_previous * self.derivative(self.z)

    def applying_sgd(self):
        pass

    def change_alpha(self):
        pass

    def applying_adam(self):
        pass

    def saving_params(self):
        pass

#POOLING


*   forward pass --> ( n , a , b , c ) --> ( n , a , b/2 , c/2 )
Taking only max value from each pooling window.
*   Backward pass --> ( n , a , b/2 , c/2 ) --> ( n , a , b , c )
Passing gradient to only those elements where max was found on that pooling window.



In [14]:

class pooling:
    def __init__(self, pool_size=(2, 2), strides=None):
        self.pool_height, self.pool_width = pool_size
        if strides is None:
            self.strides = pool_size
        else:
            self.strides = strides

    def forward_pass(self, input_data, saved_weights = None):
        self.input_data = input_data
        self.input_data_shape = input_data.shape
        batch_size, input_channels, input_height, input_width = input_data.shape
        output_height = (input_height - self.pool_height) // self.strides[0] + 1
        output_width = (input_width - self.pool_width) // self.strides[1] + 1
        self.output = np.zeros((batch_size, input_channels, output_height, output_width))

        for i in range(output_height):
            for j in range(output_width):
                self.output[:,:, i, j] = np.max(input_data[:, :, i*self.strides[0]:i*self.strides[0]+self.pool_height,
                                            j*self.strides[1]:j*self.strides[1]+self.pool_width],axis = (2,3))

        return self.output

    def backprop(self, grad_previous):
        batch_size, input_channels, output_height, output_width = grad_previous.shape
        grad_back = np.zeros(self.input_data_shape)

        for i in range(self.output.shape[2]):
          for j in range(self.output.shape[3]):

            x_start = i*self.strides[0]
            x_end = x_start + self.pool_height
            y_start = j * self.strides[1]
            y_end = y_start + self.pool_width
            grad_back[:, :, x_start:x_end,y_start:y_end] = np.where(self.input_data[:, :, x_start:x_end,y_start:y_end] >= (self.output[:,:,i,j])[:,:,np.newaxis, np.newaxis],(grad_previous[:,:, i, j])[:,:,np.newaxis, np.newaxis],0)

        return grad_back

    def applying_sgd(self):
        pass

    def change_alpha(self):
        pass

    def applying_adam(self):
        pass

    def saving_params(self):
        pass





#Linear Layer
1. Does Input( n , a ) * Theta( a , 10 ) + Bias( 10 )
2. forward prop(x) --> (thetha)x + b
3. back_prop(grad_forward) --> grdient wrt x,theta,b. Returns grad_back



In [15]:
class Linear_Layer:

    def __init__(self, in_dim, out_dim, alpha = 0.01,index = 0,reg = None, reg_penelty = 0):
        self.Theta = np.random.randn(in_dim, out_dim)/(in_dim * out_dim)
        self.bias = np.zeros((out_dim,))
        self.optimizer_theta = Adam(lr = alpha)
        self.optimizer_bias = Adam(lr = alpha)
        self.alpha = alpha
        self.index = index
        self.reg = reg
        self.reg_penelty = reg_penelty


    def forward_pass(self, X, saved_weights = None):
        if saved_weights != None:
          saved_data = np.load(f'/content/drive/MyDrive/Colab Notebooks/Saved_Models/Linear_layer{self.index}.npz')
          self.Theta =  saved_data['arr1']
          self.bias = saved_data['arr2']

        self.X = X
        self.z = np.dot(X,self.Theta) + self.bias
        return self.z


    def backprop(self, grad_previous):
        t= self.X.shape[0]
        self.grad_theta = np.matmul((self.X.transpose()), grad_previous)
        self.grad_bias = (grad_previous.sum(axis=0))/t
        self.grad_back = np.matmul(grad_previous, self.Theta.transpose())

        # Add L1 and L2 and elastic regularization terms
        if self.reg == 'l1':
          self.grad_theta += self.reg_penelty * np.sign(self.Theta)
          self.grad_bias += self.reg_penelty * np.sign(self.bias)

        elif self.reg == 'l2':
          self.grad_theta += 2 * self.reg_penelty * self.Theta
          self.grad_bias += 2 * self.reg_penelty * self.bias

        elif self.reg == 'elastic':
          self.grad_theta += self.reg_penelty * (0.5 * np.sign(self.Theta) + 0.5 * self.Theta)
          self.grad_bias += self.reg_penelty * (0.5 * np.sign(self.bias) + 0.5 * self.bias)

        return self.grad_back

    def applying_sgd(self):
        self.Theta = self.Theta - (self.alpha*self.grad_theta)
        self.bias = self.bias - (self.alpha*self.grad_bias)

    def applying_adam(self):
        self.Theta -= self.optimizer_theta.update(self.grad_theta)
        self.bias -= self.optimizer_bias.update(self.grad_bias)

    def change_alpha(self):
        self.alpha = self.alpha/5

    def saving_params(self):
        np.savez(f'/content/drive/MyDrive/Colab Notebooks/Saved_Models/Linear_layer{self.index}.npz',arr1 = self.Theta, arr2 = self.bias)

#CONV LAYER

Convolution with kernels donw on each image.

( n , 28 , 28 ) --> ( n , n_filters , new_height , new_width )

In [16]:
class Convolutional_Layer:
    def __init__(self, filter_dim = 3, stride = 1, pad = 0, alpha=0.01, num_of_filters = 1,reg = None,reg_penelty = 0):
        self.filter_dim = filter_dim
        self.n_filters = num_of_filters
        self.stride = stride
        self.bias = np.random.randn(self.n_filters, 1) / self.n_filters
        self.filter = np.random.randn(self.n_filters, self.filter_dim, self.filter_dim)/(self.filter_dim ** 2)
        self.pad = pad
        self.alpha = alpha
        self.optimizer_bias = Adam(lr = alpha)
        self.optimizer_filter = Adam(lr = alpha)
        self.reg_penelty = reg_penelty
        self.reg = reg

    def convolving(self, dimen_x, dimen_y):
        z = np.zeros((self.X.shape[0],self.n_filters, dimen_x, dimen_y))
        for i in range(dimen_x):
            for ii in range(dimen_y):
                  temp = np.multiply(self.X[:, np.newaxis,i : i+self.filter_dim, ii : ii+self.filter_dim], self.filter[ np.newaxis,:, :, :])
                  z[:,:,i,ii] = np.sum(temp,axis=(2,3)) + self.bias[:,0]
        return z


    def forward_pass(self, X, saved_weights = None):
        if saved_weights != None:
          saved_data = np.load('/content/drive/MyDrive/Colab Notebooks/Saved_Models/conv2d.npz')
          self.filter = saved_data['arr1']
          self.bias = saved_data['arr2']

        self.X = np.pad(X , ((0, 0), (self.pad, self.pad), (self.pad, self.pad)),'constant', constant_values=0)
        (d, p, t) = self.X.shape
        dimen_x = int(((p - self.filter_dim)/self.stride) + 1)
        dimen_y = int(((t - self.filter_dim)/self.stride) + 1)
        self.z = np.zeros((d, self.n_filters, dimen_x, dimen_y))
        self.z = self.convolving(dimen_x, dimen_y)

        return self.z

    def backprop(self, grad_z):
        (d, f, p, t) = grad_z.shape

        self.grads = np.zeros((d, p, t))
        # for i in range(d):
        #   for k in range(self.n_filters):
        #     filter_1 = np.flip((np.flip(self.filter[k], axis = 0)), axis = 1)
        #     self.grads[i] += self.convolving(np.pad(grad_z[i,k], ((1,1), (1,1)), 'constant', constant_values = 0), filter_1, p, t)

        # self.grads /= self.n_filters
        # self.grads = np.pad(self.grads, ((0,0),(1,1),(1,1)), 'constant', constant_values = 0)

        self.grad_filter = np.zeros((self.n_filters, self.filter_dim, self.filter_dim))
        grad_z = np.pad(grad_z, ((0,0),(0,0),(self.pad,self.pad),(self.pad,self.pad)), 'constant', constant_values = 0)

        for i in range(self.filter_dim):
              for ii in range(self.filter_dim):
                  self.grad_filter[:, i, ii] = np.sum(np.multiply(grad_z[:,:,:,:], self.X[:,  np.newaxis, i:p+i, ii:t+ii]),axis =(0,2,3) )
        self.grad_filter = self.grad_filter /(grad_z.shape[0] * grad_z.shape[2]*grad_z.shape[3])

        self.grad_bias = np.zeros_like(self.bias)

        for k in range(self.n_filters):
          self.grad_bias[k] = (grad_z[:,k].sum()) /(grad_z.shape[0] * grad_z.shape[2]*grad_z.shape[3])

        # Add L1 and L2 and elastic regularization terms
        if self.reg == 'l1':
          self.grad_filter += self.reg_penelty * np.sign(self.filter)
          self.grad_bias += self.reg_penelty * np.sign(self.bias)

        elif self.reg == 'l2':
          self.grad_filter += 2 * self.reg_penelty * self.filter
          self.grad_bias += 2 * self.reg_penelty * self.bias

        elif self.reg == 'elastic':
          self.grad_filter += self.reg_penelty * (0.5 * np.sign(self.filter) + 0.5 * self.filter)
          self.grad_bias += self.reg_penelty * (0.5 * np.sign(self.bias) + 0.5 * self.bias)

        return self.grads

    def applying_sgd(self):
        self.filter = self.filter - (self.alpha*self.grad_filter)
        self.bias = self.bias - (self.alpha*self.grad_bias)

    def applying_adam(self):
        self.filter -= self.optimizer_bias.update(self.grad_filter)
        self.bias -= self.optimizer_filter.update(self.grad_bias)

    def change_alpha(self):
        self.alpha = self.alpha/5

    def saving_params(self):
        np.savez('/content/drive/MyDrive/Colab Notebooks/Saved_Models/conv2d.npz',arr1 = self.filter,arr2 = self.bias )

#TRAINING Using Regularisation (Not Tuned)

---



In [17]:
X_testing = X_train
Y_testing = y_train

al = 0.005

complete_NN = Neural_Network([
                                Convolutional_Layer(alpha = al,num_of_filters = 16,pad = 0, reg = 'l1',reg_penelty = 0.5),
                                relu(),
                                pooling(),
                                reshaping(),
                                Linear_Layer(2704, 10, alpha = al,index = 0, reg = 'l2',reg_penelty = 0.5),
                                softmax()
                                ])
CE = cross_entropy()

acc = accuracy()
epochs = 3
batch_size = 6000
done = 0
for i in range(epochs):
    for batch in range(0, X_testing.shape[0], batch_size):
        out = complete_NN.forward_pass(X_testing[batch:batch + batch_size,:,:])
        print("epoch:{} \t batch: {} \t ".format(i+1, 1 + (batch//batch_size)), end="\t")  #, CE.loss(out, Y_testing[batch:batch + batch_size])
        accuracy_val = acc.value(out, Y_testing[batch:batch + batch_size])*100
        print("accuracy: {}".format(accuracy_val))

        # if ((accuracy_val>=80) and (done==0)):
        #     complete_NN.change_alpha()
        #     done += 1
        # if ((accuracy_val>=85) and (done==1)):
        #     complete_NN.change_alpha()
        #     done += 1

        # if ((accuracy_val>=90) and (done==2)):
        #     complete_NN.change_alpha()
        #     done += 1

        # if (accuracy_val>=95):
        #     complete_NN.change_alpha()
        #     done += 1

        complete_NN.backprop(Y_testing[batch:batch + batch_size])
        # complete_NN.applying_sgd()
        complete_NN.applying_adam()



(6000, 2704)
epoch:1 	 batch: 1 	 	accuracy: 11.766666666666667
(6000, 2704)
epoch:1 	 batch: 2 	 	accuracy: 39.983333333333334
(6000, 2704)
epoch:1 	 batch: 3 	 	accuracy: 69.33333333333334
(6000, 2704)
epoch:1 	 batch: 4 	 	accuracy: 64.98333333333333
(6000, 2704)
epoch:1 	 batch: 5 	 	accuracy: 66.45
(6000, 2704)
epoch:1 	 batch: 6 	 	accuracy: 68.7
(6000, 2704)
epoch:1 	 batch: 7 	 	accuracy: 69.76666666666667
(6000, 2704)
epoch:1 	 batch: 8 	 	accuracy: 72.31666666666666
(6000, 2704)
epoch:1 	 batch: 9 	 	accuracy: 72.56666666666666
(6000, 2704)
epoch:1 	 batch: 10 	 	accuracy: 71.93333333333334
(6000, 2704)
epoch:2 	 batch: 1 	 	accuracy: 72.76666666666667
(6000, 2704)
epoch:2 	 batch: 2 	 	accuracy: 72.06666666666666
(6000, 2704)
epoch:2 	 batch: 3 	 	accuracy: 72.25
(6000, 2704)
epoch:2 	 batch: 4 	 	accuracy: 71.06666666666666
(6000, 2704)
epoch:2 	 batch: 5 	 	accuracy: 71.36666666666667
(6000, 2704)
epoch:2 	 batch: 6 	 	accuracy: 71.5
(6000, 2704)
epoch:2 	 batch: 7 	 	accu

#Testing Regularisation

In [18]:
out_5 = complete_NN.forward_pass(X_test)
print("The accuracy on test set is {}".format(acc.value(out_5, y_test)*100))

(10000, 2704)
The accuracy on test set is 53.900000000000006


#Training Without Regularisation

In [19]:
X_testing = X_train
Y_testing = y_train

al = 0.005

complete_NN = Neural_Network([
                                Convolutional_Layer(alpha = al,num_of_filters = 16,pad = 0),
                                relu(),
                                pooling(),
                                reshaping(),
                                Linear_Layer(2704, 10, alpha = al,index = 0),
                                softmax()
                                ])
CE = cross_entropy()

acc = accuracy()
epochs = 5
batch_size = 6000
done = 0
for i in range(epochs):
    for batch in range(0, X_testing.shape[0], batch_size):
        out = complete_NN.forward_pass(X_testing[batch:batch + batch_size,:,:])
        print("epoch:{} \t batch: {} \t ".format(i+1, 1 + (batch//batch_size)), end="\t")  #, CE.loss(out, Y_testing[batch:batch + batch_size])
        accuracy_val = acc.value(out, Y_testing[batch:batch + batch_size])*100
        print("accuracy: {}".format(accuracy_val))

        complete_NN.backprop(Y_testing[batch:batch + batch_size])
        # complete_NN.applying_sgd()
        complete_NN.applying_adam()



(6000, 2704)
epoch:1 	 batch: 1 	 	accuracy: 10.533333333333333
(6000, 2704)
epoch:1 	 batch: 2 	 	accuracy: 31.966666666666665
(6000, 2704)
epoch:1 	 batch: 3 	 	accuracy: 56.65
(6000, 2704)
epoch:1 	 batch: 4 	 	accuracy: 57.4
(6000, 2704)
epoch:1 	 batch: 5 	 	accuracy: 67.46666666666667
(6000, 2704)
epoch:1 	 batch: 6 	 	accuracy: 69.58333333333333
(6000, 2704)
epoch:1 	 batch: 7 	 	accuracy: 70.85000000000001
(6000, 2704)
epoch:1 	 batch: 8 	 	accuracy: 71.76666666666667
(6000, 2704)
epoch:1 	 batch: 9 	 	accuracy: 71.83333333333334
(6000, 2704)
epoch:1 	 batch: 10 	 	accuracy: 71.1
(6000, 2704)
epoch:2 	 batch: 1 	 	accuracy: 73.18333333333334
(6000, 2704)
epoch:2 	 batch: 2 	 	accuracy: 73.98333333333333
(6000, 2704)
epoch:2 	 batch: 3 	 	accuracy: 75.23333333333333
(6000, 2704)
epoch:2 	 batch: 4 	 	accuracy: 75.1
(6000, 2704)
epoch:2 	 batch: 5 	 	accuracy: 75.6
(6000, 2704)
epoch:2 	 batch: 6 	 	accuracy: 76.6
(6000, 2704)
epoch:2 	 batch: 7 	 	accuracy: 77.71666666666667
(60

#Testing Without Regularisation

In [20]:
out_3 = complete_NN.forward_pass(X_test)
print("The accuracy on test set is {}".format(acc.value(out_3, y_test)*100))

(10000, 2704)
The accuracy on test set is 75.66000000000001


#SAVING WEIGHTS

In [None]:
# complete_NN.saving_params()



---

