<a href="https://colab.research.google.com/github/ShivamCholin/CS6910_Assignment_1/blob/main/Rmsprop.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import random          
import numpy as np      
from time import time    

In [2]:
def sigmoid(z):
    return 1.0 / (1.0 + np.exp(-z))
def dsigmoid(z):
    return sigmoid(z) * (1 - sigmoid(z))

In [3]:
def relu(z):
    return np.maximum(z, 0)
def drelu(z):
    return np.heaviside(z, 1)

In [4]:
def tanh(z):
    return (np.exp(z)-np.exp(-z))/(np.exp(z)+np.exp(-z))
def dtanh(z):
    return 1-tanh(z)**2

In [5]:
def mbgdlearn(nn, tr_data, epochs, batch_size, learning_rate):
    n = len(tr_data)
    for j in range(epochs):
        time_start=time()
        random.shuffle(tr_data)
        batches = [tr_data[k: k + batch_size] for k in range(0,n,batch_size)]
        for batch in batches:
            mbgd(nn, batch, learning_rate)
        time_end=time()
        print('Epoch {0}:time taken {1} seconds, accuracy {2}%'.format(f'{j + 1:2}',1.0*time_end-time_start, 100.0 * evaluate2(nn, tr_data) / len(tr_data)))


In [23]:
def mbgd(nn, batch, eta):
    nb = [np.zeros(b.shape) for b in nn.biases]
    nw = [np.zeros(w.shape) for w in nn.weights]
    for x, y in batch:
        dnb, dnw = backward(nn, x, y) 

        nb = [nb + dnb for nb, dnb in zip(nb, dnb)]
        nw = [nw + dnw for nw, dnw in zip(nw, dnw)]

    nn.gew = [gew*nn.gamma + (1.0-nn.gamma)*np.square(nw) for gew,nw in zip(nn.gew,nw)]
    nn.geb = [geb*nn.gamma + (1.0-nn.gamma)*np.square(nb) for geb,nb in zip(nn.geb,nb)]
    #nn.weights = [w - (eta ) * nw for w, nw in zip(nn.weights, nw)]
    #nn.biases  = [b - (eta ) * nb for b, nb in zip(nn.biases, nb)]
    nn.weights = [w - eta * nw / np.sqrt(ge+nn.epsilon) for w, ge,nw in zip(nn.weights, nn.gew,nw)]
    nn.biases  = [b - eta * nb / np.sqrt(ge+nn.epsilon) for b, ge,nb in zip(nn.biases, nn.geb,nb)]

In [7]:
def dcost(act, y):
    return (act - y)

In [24]:
class Network:
    num_layers=0
    biases=[]
    weights=[]
    def __init__(self,nl,x,y,act1,act2,vb,vw):
      self.num_layers=nl
      self.biases=x
      self.weights=y
      self.act=act1
      self.dact=act2
      self.gew=vw
      self.geb=vb
      self.gamma=0.9
      self.epsilon = 1e-8

def init_network(layers,actfunc):
    if actfunc=="tanh":
      act1=tanh
      act2=dtanh
    elif actfunc=="sigmoid":
      act1=sigmoid
      act2=dsigmoid
    else:
      act1=relu
      act2=drelu
    return Network(len(layers),[np.random.randn(y, 1) for y in layers[1:]],[np.random.randn(y, x) for x, y in zip(layers[:-1], layers[1:])],act1,act2,[np.random.randn(y, 1)*0 for y in layers[1:]],[np.random.randn(y, x)*0 for x, y in zip(layers[:-1], layers[1:])])

In [9]:
def forward(nn, a):
    for b, w in zip(nn.biases, nn.weights):
        a = nn.act(np.dot(w, a) + b)
    return a

In [10]:
def evaluate(nn, te_data):
    test_results = [(np.argmax(forward(nn, x)), y) for (x, y) in te_data]
    return sum(int(x == y) for (x, y) in test_results)

In [11]:
def evaluate2(nn, te_data):
    test_results = [(np.argmax(forward(nn, x)), y) for (x, y) in te_data]
    return sum(int(x == np.argmax(y)) for (x, y) in test_results)

In [12]:
def backward(nn, x, y):
    nb = [np.zeros(b.shape) for b in nn.biases]
    nw = [np.zeros(w.shape) for w in nn.weights]
    activation = x 
    acts = [x]
    zs = []     

    for b, w in zip(nn.biases, nn.weights):
        z = np.dot(w, activation) + b 
        zs.append(z)        
        activation = nn.act(z)   
        acts.append(activation)
    delta = dcost(acts[-1], y) * nn.dact(zs[-1])
    nb[-1] = delta
    nw[-1] = np.dot(delta, acts[-2].transpose())
    for i in range(2, nn.num_layers):
        z = zs[-i]
        sp = nn.dact(z)
        delta = np.dot(nn.weights[-i + 1].transpose(), delta) * sp
        nb[-i] = delta
        nw[-i] = np.dot(delta, acts[-i - 1].transpose())
    return (nb, nw)

In [13]:
import keras
def change_y(j):
    e = np.zeros((10, 1))
    e[j] = 1.0
    return e
fashion_mnist = keras.datasets.fashion_mnist
tr_data, te_data = fashion_mnist.load_data()
training_x = [np.reshape(x, (784, 1))/255 for x in tr_data[0]]
training_y = [change_y(y) for y in tr_data[1]]
tr_data = zip(training_x, training_y)
test_inputs = [np.reshape(x, (784, 1))/255 for x in te_data[0]]
te_data = zip(test_inputs, te_data[1])
tr_data=list(tr_data)
te_data=list(te_data)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-labels-idx1-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-images-idx3-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-labels-idx1-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-images-idx3-ubyte.gz


In [25]:
nn = init_network([784, 30, 10],"sigmoid")
epochs = 20
batch_size =10
learning_rate = 0.001
print('start')
mbgdlearn(nn, tr_data, epochs, batch_size, learning_rate)
print('accuracy {0}%'.format(100.0 * evaluate(nn, te_data) / len(te_data)))

start
Epoch  1:time taken 12.622778177261353 seconds, accuracy 78.74%
Epoch  2:time taken 12.399789333343506 seconds, accuracy 81.685%
Epoch  3:time taken 12.46320104598999 seconds, accuracy 83.00333333333333%
Epoch  4:time taken 12.371015787124634 seconds, accuracy 83.71%
Epoch  5:time taken 12.481857538223267 seconds, accuracy 84.56333333333333%
Epoch  6:time taken 12.380115985870361 seconds, accuracy 84.83833333333334%
Epoch  7:time taken 12.382048845291138 seconds, accuracy 85.36833333333334%
Epoch  8:time taken 12.407553911209106 seconds, accuracy 85.695%
Epoch  9:time taken 12.376633644104004 seconds, accuracy 85.97%
Epoch 10:time taken 12.517082214355469 seconds, accuracy 86.03666666666666%
Epoch 11:time taken 12.575782060623169 seconds, accuracy 86.36%
Epoch 12:time taken 12.556490659713745 seconds, accuracy 86.71%
Epoch 13:time taken 12.440420866012573 seconds, accuracy 86.84%
Epoch 14:time taken 12.511504411697388 seconds, accuracy 86.85666666666667%
Epoch 15:time taken 12.55