<a href="https://colab.research.google.com/github/returaj/cs6910/blob/assginment1_akash/adagrad.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
class adagrad_gd(object):
  def __init__(self, model, alpha):
    self.model = model
    self.alpha = alpha
  def optimize(self, X, y):
    """
    X: (batch_size(B), data_size(N))
    y: (batch_size(B))
    """
    gamma=0.9
    epsilon=0.000001
    beta=0.9
    model = self.model
    v_w=[]
    v_b=[]
    num_layers = len(model.weight)
    layers=num_layers
    for i in range(num_layers):
      m, n = model.weight[i].shape
      v_w.append(np.zeros((m,n)))
      v_b.append(np.zeros(n))
    layer_output = model.forward(X)
    dw,db = model.backward(X, y, layer_output)
    for l in range(num_layers):
      v_w.append( v_w[l]+np.power(dw[l],2))
      v_b.append(v_b[l]+np.power(db[l],2))
      model.weight[l]-=(self.alpha/np.sqrt(v_w[l]+epsilon))*dw[l]
      model.bias[l]-=(self.alpha/np.sqrt(v_b[l]+epsilon))*db[l]
      
  def error(self, X, y):
    """
    X: (batch_size(B), data_size(N))
    y: (batch_size(B))
    """
    batch_size = X.shape[0]
    prob = self.model.forward(X)[-1]
    err = - np.sum(np.log(prob[np.arange(batch_size), y])) / batch_size
    return err

In [3]:
import numpy as np
from keras.datasets import fashion_mnist
##from optimizer import SGD


class FNN(object):
  def __init__(self, input_size, output_size, hidden_layers_size):
    self.input_size = input_size
    self.output_size = output_size
    self.weight, self.bias = None, None
    self.initialize(input_size, hidden_layers_size, output_size)

  def initialize(self, input_size, hidden_layers_size, output_size):
    self.weight, self.bias = [], []
    prev_layer_size = input_size
    hidden_layers_size.append(output_size)
    for curr_layer_size in hidden_layers_size:
      self.weight.append(np.random.normal(0, 1, size=(prev_layer_size, curr_layer_size)))
      self.bias.append(np.zeros(curr_layer_size))
      prev_layer_size = curr_layer_size

  def reset(self):
    num_layers = len(self.weight)
    for l in range(num_layers):
      m, n = self.weight[l].shape
      self.weight[l] = np.random.normal(0, 1, size=(m, n))
      self.bias[l] = np.zeros(n)

  @staticmethod
  def sigmoid(x):
    return 1./(1+np.exp(-x))

  @staticmethod
  def softmax(x):
    """
    x: (batch_size(B), data_size(N))
    """
    exp_prob = np.exp(x)
    prob = exp_prob / np.sum(exp_prob, axis=1, keepdims=True)
    return prob

  def forward(self, X):
    """
    X: (batch_size(B), data_size(N))
    """
    layer_output = []
    prev_layer = X
    num_hidden_layers = last_layer = len(self.weight) - 1
    for t in range(num_hidden_layers):
      w, b = self.weight[t], self.bias[t]
      next_layer = self.sigmoid(np.dot(prev_layer, w) + b)
      layer_output.append(next_layer)
      prev_layer = next_layer
    w, b = self.weight[last_layer], self.bias[last_layer]
    prob = self.softmax(np.dot(prev_layer, w) + b)
    layer_output.append(prob)
    return layer_output

  def backward(self, X, y, layer_output):
    """
    X: (batch_size(B), data_size(N))
    y: (batch_size(B))
    """
    batch_size, _ = X.shape
    num_hidden_layers = last_layer = len(layer_output)-1
    dw, db = [None]*(num_hidden_layers+1), [None]*(num_hidden_layers+1)
    for t in range(num_hidden_layers, -1, -1):
      if t == last_layer:
        dh = layer_output[t] / batch_size
        dh[np.arange(batch_size), y] -= 1/batch_size
      else:
        dh = np.dot(dh_fwd, self.weight[t+1].T) * layer_output[t] * (1-layer_output[t])
      prev_layer_output = X if t==0 else layer_output[t-1]
      dw[t] = np.dot(prev_layer_output.T, dh)
      db[t] = np.sum(dh, axis=0)
      dh_fwd = dh
    return dw, db


if __name__ == '__main__':
  (x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()
  consider = 1000
  X = np.array([x_train[i].flatten() for i in range(consider)])
  Y = y_train[:consider]
  batch_size, epochs = 16, 20
  model = FNN(784, 10, [50, 20])
  ##sgd = SGD(model, 0.01)
  adagrad_gd=adagrad_gd(model,0.01)


  for ep in range(1, epochs+1):
    ids = np.arange(consider)
    np.random.shuffle(ids)
    start, end = 0, batch_size
    while end > start:
      x, y = X[ids[start:end]], Y[ids[start:end]]
      adagrad_gd.optimize(x, y)
      start, end = end, min(consider, end+batch_size)
    err = adagrad_gd.error(X, Y)
    print(f'epoch: {ep}, error: {err}')




epoch: 1, error: 8.670460900229173
epoch: 2, error: 8.079426699610947
epoch: 3, error: 3.7661568878442404
epoch: 4, error: 5.524658357246258
epoch: 5, error: 13.717951143534847
epoch: 6, error: 15.508103208646226
epoch: 7, error: 8.438013720207277
epoch: 8, error: 10.705083002075252
epoch: 9, error: 8.857462912455699
epoch: 10, error: 5.258599007746574
epoch: 11, error: 8.141543154621553
epoch: 12, error: 8.17135241781875
epoch: 13, error: 8.008683900174839
epoch: 14, error: 11.470384448360754
epoch: 15, error: 13.306398100951785
epoch: 16, error: 8.704948388552827
epoch: 17, error: 6.142630464486949
epoch: 18, error: 7.374671461593477
epoch: 19, error: 7.696708223847634
epoch: 20, error: 6.080411944905256
