In [151]:
import numpy as np
import matplotlib.pyplot as plt
from keras.datasets import fashion_mnist
import copy

# load dataset
(x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()

In [152]:
classes = set(y_train)
variousSamples = list()
for i in classes:
  ind = np.where(y_train == i)[0][0]
  variousSamples.append(x_train[ind])

In [153]:
x_flatten_train = x_train.reshape(x_train.shape[0],x_train.shape[1]*x_train.shape[2],1)
y_encoded = np.zeros((y_train.shape[0], max(classes) + 1))
y_encoded[np.arange(y_train.shape[0]), y_train] = 1

In [154]:
y_encoded = y_encoded.reshape(60000,10,1)

In [155]:
def normalize_data(x):
  x_norm = x.astype('float32')
  x_norm = x_norm / 255.0
  return x_norm

x_flatten_train = normalize_data(x_flatten_train)

In [156]:
layers = 3
samples = y_train.shape[0]
lr = 0.001
epochs = 5
nodesPerLayer = list()

In [157]:
nodesPerLayer.append(784)
for i in range(0,layers):
  nodesPerLayer.append(int(1024/(2**(i+1))))
nodesPerLayer.append(10)

In [158]:
def func(activation,a_k):
  if(activation == "tanh"):
    a_k = np.tanh(a_k)
  else:
    a_k = 1/(1 + np.exp(-1*a_k))
  return a_k

In [159]:
def derivativeFun(activation,a_k):
  activationResult = func(activation,a_k)
  if(activation == "tanh"):
    activationResult = 1 - (activationResult**2)
  else:
    activationResult = activationResult - (activationResult**2)

  return activationResult

In [160]:
def decision(a_k,classificationFunction):
  if classificationFunction == "crossEntropy":
    a_k = np.exp(a_k - np.max(a_k))
    a_k = a_k / sum(a_k)
  return a_k

In [161]:
def OneHotEncode(C):
  oneHot = np.zeros(C.shape)
  oneHot[np.argmax(C)] = 1
  return oneHot

In [162]:
def forwardProp(inputX,activation,classificationFunction,weights,bias):
  h_k = inputX
  PreActivations = list()
  PostActivations = list()
  PostActivations.append(h_k)
  for k in range(0,layers):
    #print(weights[k].shape,h_k.shape)
    a_k = bias[k] + np.dot(weights[k],h_k)
    PreActivations.append(a_k)
    h_k = func(activation,a_k)
    PostActivations.append(h_k)
  a_k = bias[layers] + np.matmul(weights[layers],h_k)
  PreActivations.append(a_k)
  yPred = decision(a_k,classificationFunction)
  return PreActivations,PostActivations,yPred

In [163]:
def backProp(real, pred, h_k, weights, activation, PreActivations):
    a_l_L_theta = pred - real
    currentActivationGradient = a_l_L_theta
    WeightGradients = []
    biasGradients = []
    layers = len(weights) - 1

    for i in range(layers, -1, -1):
        W_i_L_theta = currentActivationGradient*np.transpose(h_k[i])
        WeightGradients.insert(0, W_i_L_theta)
        b_i_L_theta = np.sum(currentActivationGradient, axis=0, keepdims=True)
        biasGradients.insert(0, b_i_L_theta)

        if i > 0:
            h_i_prev_L_theta = np.matmul(weights[i].T, currentActivationGradient)
            currentActivationGradient = h_i_prev_L_theta * derivativeFun(activation, PreActivations[i - 1])

    return WeightGradients, biasGradients


In [164]:
def stochastic_gradient_descent(nodesPerLayer,x_flatten_train,y_encoded):
  weights = list()
  bias = list()
  for i in range(1,len(nodesPerLayer)):
    w = np.random.randn(nodesPerLayer[i],nodesPerLayer[i-1])*0.1
    b =  np.random.randn(nodesPerLayer[i],1)
    weights.append(w)
    bias.append(b)

  for i in range(0,epochs):
    for j in range(0,len(y_train)):
      A,B,C = forwardProp(x_flatten_train[j],"sigmoid","crossEntropy",weights,bias)
      Wdelta,Bdelta = backProp(y_encoded[j],C,B,weights,"sigmoid",A)
      if(j%1000 == 0):
        print(j/1000)
      for k in range(0,len(weights)):
        weights[k] = weights[k] - lr*Wdelta[k]
        bias[k] = bias[k] - lr*Bdelta[k]
  return weights,bias

In [169]:
def gradient_descent(nodesPerLayer,x_flatten_train,y_encoded):
  weights = list()
  bias = list()
  for i in range(1,len(nodesPerLayer)):
    w = np.random.randn(nodesPerLayer[i],nodesPerLayer[i-1])*0.1
    b =  np.random.randn(nodesPerLayer[i],1)
    weights.append(w)
    bias.append(b)
  Wdelta = list()
  Bdelta = list()
  for i in range(0,epochs):
    Wdelta.clear()
    Bdelta.clear()
    print("Epoch:" + str(i))
    for j in range(0,len(y_train)):
      A,B,C = forwardProp(x_flatten_train[j],"sigmoid","crossEntropy",weights,bias)
      CurrWdelta,CurrBdelta = backProp(y_encoded[j],C,B,weights,"sigmoid",A)
      if( len(Wdelta) == 0):
        Wdelta =  copy.deepcopy(CurrWdelta)
        Bdelta = copy.deepcopy(CurrBdelta)
      else:
        for k in range(0,len(Wdelta)):
          Wdelta[k] = Wdelta[k] + CurrWdelta[k]
          Bdelta[k] = Bdelta[k] + CurrBdelta[k]
      if(j%1000 == 0):
        print(j/1000)
    for k in range(0,len(weights)):
      weights[k] = weights[k] - lr*Wdelta[k]
      bias[k] = bias[k] - lr*Bdelta[k]
  return weights,bias

In [170]:
def trainModel(optimizer,x_train,y_train,nodesPerLayer):
  FinalWeights = list()
  FinalBias = list()
  if(optimizer == "gradient_descent"):
    FinalWeights, FinalBias = gradient_descent(nodesPerLayer,x_train,y_train)
  elif(optimizer == "SGD"):
    FinalWeights, FinalBias = stochastic_gradient_descent(nodesPerLayer,x_train,y_train)
  return FinalWeights,FinalBias

In [None]:
a,b = trainModel("gradient_descent",x_flatten_train,y_encoded,nodesPerLayer)

Epoch:0
0.0
1.0
2.0
3.0
4.0
5.0
6.0
7.0
8.0
9.0
10.0
11.0
12.0
13.0
14.0
15.0
16.0
17.0
18.0
19.0
20.0
21.0
22.0
23.0
24.0
25.0
26.0
27.0
28.0
29.0
30.0
31.0


In [None]:
def testModel(weights,bias,x_test,y_test,optimizer):
  count = 0
  for i in range(0,x_test.shape[0]):
    A,B,C = forwardProp(x_test[i],"logistic","crossEntropy",weights,bias)
    #print(C)
    if( y_test[i] == np.argmax(C)):
      count+=1
  print("Accuracy :" + str((count/y_test.shape[0])*100) + "%")

In [16]:
x_flatten_test = x_test.reshape(x_test.shape[0],x_test.shape[1]*x_test.shape[2],1)
x_flatten_test = normalize_data(x_flatten_test)

In [17]:
testModel(weights,bias,x_flatten_test,y_test)

Accuracy :80.82000000000001%
