<a href="https://colab.research.google.com/github/ShivamCholin/CS6910_Assignment_1/blob/main/MBGD.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import random          
import numpy as np      
from time import time    

In [None]:
def sigmoid(z):
    return 1.0 / (1.0 + np.exp(-z))
def dsigmoid(z):
    return sigmoid(z) * (1 - sigmoid(z))

In [None]:
def relu(z):
    return np.maximum(z, 0)
def drelu(z):
    return np.heaviside(z, 1)

In [None]:
def tanh(z):
    return (np.exp(z)-np.exp(-z))/(np.exp(z)+np.exp(-z))
def dtanh(z):
    return 1-tanh(z)**2

In [None]:
def mbgdlearn(nn, tr_data, epochs, batch_size, learning_rate):
    n = len(tr_data)
    for j in range(epochs):
        time_start=time()
        random.shuffle(tr_data)
        batches = [tr_data[k: k + batch_size] for k in range(0,n,batch_size)]
        for batch in batches:
            mbgd(nn, batch, learning_rate)
        time_end=time()
        print('Epoch {0}:time taken {1} seconds, accuracy {2}%'.format(f'{j + 1:2}',1.0*time_end-time_start, 100.0 * evaluate2(nn, tr_data) / len(tr_data)))


In [None]:
def mbgd(nn, batch, eta):
    nb = [np.zeros(b.shape) for b in nn.biases]
    nw = [np.zeros(w.shape) for w in nn.weights]
    for x, y in batch:
        dnb, dnw = backward(nn, x, y) 

        nb = [nb + dnb for nb, dnb in zip(nb, dnb)]
        nw = [nw + dnw for nw, dnw in zip(nw, dnw)]

    nn.vw = [vweight*nn.gamma + eta * nw for vweight,nw in zip(nn.vw,nw)]
    nn.vb = [vbiases*nn.gamma + eta * nb for vbiases,nb in zip(nn.vb,nb)]
    #nn.weights = [w - (eta ) * nw for w, nw in zip(nn.weights, nw)]
    #nn.biases  = [b - (eta ) * nb for b, nb in zip(nn.biases, nb)]
    nn.weights = [w - v for w, v in zip(nn.weights, nn.vw)]
    nn.biases  = [b - v for b, v in zip(nn.biases, nn.vb)]

In [None]:
def softmax(x):
		e_x = np.exp(x - np.max(x))
		return e_x / np.sum(e_x)

In [None]:
def dcost(act, y):
    act=softmax(act)
    act= act-y
    return act

In [None]:
class Network:
    num_layers=0
    biases=[]
    weights=[]
    def __init__(self,nl,x,y,act1,act2):
      self.num_layers=nl
      self.biases=x
      self.weights=y
      self.act=act1
      self.dact=act2
      self.vw=[np.zeros(w.shape) for w in self.weights]
      self.vb=[np.zeros(b.shape) for b in self.biases]
      self.mw=[np.zeros(w.shape) for w in self.weights]
      self.mb=[np.zeros(b.shape) for b in self.biases]
      self.gamma=0.9
      self.epsilon = 1e-8
      self.beta1w = 0.9
      self.beta2w = 0.999
      self.beta1_expw = 1.0
      self.beta2_expw = 1.0
      self.beta1b = 0.9
      self.beta2b = 0.999
      self.beta1_expb = 1.0
      self.beta2_expb = 1.0

def init_network(layers,actfunc):
    if actfunc=="tanh":
      act1=tanh
      act2=dtanh
    elif actfunc=="sigmoid":
      act1=sigmoid
      act2=dsigmoid
    else:
      act1=relu
      act2=drelu
    return Network(len(layers),[np.random.randn(y, 1) for y in layers[1:]],[np.random.randn(y, x) for x, y in zip(layers[:-1], layers[1:])],act1,act2)

In [None]:
def forward(nn, a):
    for b, w in zip(nn.biases, nn.weights):
        a = nn.act(np.dot(w, a) + b)
    return a

In [None]:
def evaluate(nn, te_data):
    test_results = [(np.argmax(forward(nn, x)), y) for (x, y) in te_data]
    return sum(int(x == y) for (x, y) in test_results)

In [None]:
def evaluate2(nn, te_data):
    test_results = [(np.argmax(forward(nn, x)), y) for (x, y) in te_data]
    return sum(int(x == np.argmax(y)) for (x, y) in test_results)

In [None]:
def backward(nn, x, y):
    nb = [np.zeros(b.shape) for b in nn.biases]
    nw = [np.zeros(w.shape) for w in nn.weights]
    activation = x 
    acts = [x]
    zs = []     

    for b, w in zip(nn.biases, nn.weights):
        z = np.dot(w, activation) + b 
        zs.append(z)        
        activation = nn.act(z)   
        acts.append(activation)
    delta = dcost(acts[-1], y) * nn.dact(zs[-1])
    nb[-1] = delta
    nw[-1] = np.dot(delta, acts[-2].transpose())
    for i in range(2, nn.num_layers):
        z = zs[-i]
        sp = nn.dact(z)
        delta = np.dot(nn.weights[-i + 1].transpose(), delta) * sp
        nb[-i] = delta
        nw[-i] = np.dot(delta, acts[-i - 1].transpose())
    return (nb, nw)

In [None]:
import keras
def change_y(j):
    e = np.zeros((10, 1))
    e[j] = 1.0
    return e
fashion_mnist = keras.datasets.fashion_mnist
tr_data, te_data = fashion_mnist.load_data()
training_x = [np.reshape(x, (784, 1))/255 for x in tr_data[0]]
training_y = [change_y(y) for y in tr_data[1]]
tr_data = zip(training_x, training_y)
test_inputs = [np.reshape(x, (784, 1))/255 for x in te_data[0]]
te_data = zip(test_inputs, te_data[1])
tr_data=list(tr_data)
te_data=list(te_data)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-labels-idx1-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-images-idx3-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-labels-idx1-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-images-idx3-ubyte.gz


In [None]:
nn = init_network([784, 30, 10],"sigmoid")
epochs = 20
batch_size =10
learning_rate = 0.001
print('start')
mbgdlearn(nn, tr_data, epochs, batch_size, learning_rate)
print('accuracy {0}%'.format(100.0 * evaluate(nn, te_data) / len(te_data)))

start
Epoch  1:time taken 11.872398138046265 seconds, accuracy 51.945%
Epoch  2:time taken 11.52560305595398 seconds, accuracy 57.64333333333333%
Epoch  3:time taken 11.651083946228027 seconds, accuracy 60.255%
Epoch  4:time taken 11.572169542312622 seconds, accuracy 62.22%
Epoch  5:time taken 11.552661895751953 seconds, accuracy 72.02166666666666%
Epoch  6:time taken 11.688402652740479 seconds, accuracy 73.32833333333333%
Epoch  7:time taken 11.683450937271118 seconds, accuracy 76.64833333333333%
Epoch  8:time taken 11.970276832580566 seconds, accuracy 78.83166666666666%
Epoch  9:time taken 12.018939971923828 seconds, accuracy 79.59666666666666%
Epoch 10:time taken 11.713474988937378 seconds, accuracy 80.23333333333333%
Epoch 11:time taken 12.02733564376831 seconds, accuracy 80.83333333333333%
Epoch 12:time taken 11.821383237838745 seconds, accuracy 81.17833333333333%
Epoch 13:time taken 11.700340509414673 seconds, accuracy 81.50333333333333%
Epoch 14:time taken 11.63221526145935 seco