In [162]:
import numpy as np
import matplotlib.pyplot as plt
from keras.datasets import fashion_mnist

# load dataset
(x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()

In [182]:
classes = set(y_train)
variousSamples = list()
for i in classes:
  ind = np.where(y_train == i)[0][0]
  variousSamples.append(x_train[ind])

In [183]:
x_flatten_train = x_train.reshape(x_train.shape[0],x_train.shape[1]*x_train.shape[2],1)
y_encoded = np.zeros((y_train.shape[0], max(classes) + 1))
y_encoded[np.arange(y_train.shape[0]), y_train] = 1

In [184]:
y_encoded = y_encoded.reshape(60000,10,1)

In [185]:
def normalize_data(x):
  x_norm = x.astype('float32')
  x_norm = x_norm / 255.0
  return x_norm

x_flatten_train = normalize_data(x_flatten_train)

In [186]:
layers = 3
samples = y_train.shape[0]
lr = 0.001
epochs = 5
nodesPerLayer = list()

In [187]:
nodesPerLayer.append(784)
for i in range(0,layers):
  nodesPerLayer.append(int(1024/(2**(i+1))))
nodesPerLayer.append(10)

In [188]:
def func(activation,a_k):
  if(activation == "tanh"):
    a_k = np.tanh(a_k)
  else:
    a_k = 1/(1 + np.exp(-1*a_k))
  return a_k

In [189]:
def derivativeFun(activation,a_k):
  activationResult = func(activation,a_k)
  if(activation == "tanh"):
    activationResult = 1 - (activationResult**2)
  else:
    activationResult = activationResult - (activationResult**2)

  return activationResult

In [190]:
def decision(a_k,classificationFunction):
  if classificationFunction == "crossEntropy":
    a_k = np.exp(a_k - np.max(a_k))
    a_k = a_k / sum(a_k)
  return a_k

In [191]:
def OneHotEncode(C):
  oneHot = np.zeros(C.shape)
  oneHot[np.argmax(C)] = 1
  return oneHot

In [192]:
def forwardProp(inputX,activation,classificationFunction,weights,bias):
  h_k = inputX
  PreActivations = list()
  PostActivations = list()
  PostActivations.append(h_k)
  for k in range(0,layers):
    a_k = bias[k] + np.matmul(weights[k],h_k)
    PreActivations.append(a_k)
    h_k = func(activation,a_k)
    PostActivations.append(h_k)
  a_k = bias[layers] + np.matmul(weights[layers],h_k)
  PreActivations.append(a_k)
  yPred = decision(a_k,classificationFunction)
  return PreActivations,PostActivations,yPred

In [178]:
def backProp(real, pred, h_k, weights, activation, PreActivations):
    a_l_L_theta = pred - real
    currentActivationGradient = a_l_L_theta
    WeightGradients = []
    biasGradients = []
    layers = len(weights) - 1

    for i in range(layers, -1, -1):
        W_i_L_theta = currentActivationGradient*np.transpose(h_k[i])
        WeightGradients.insert(0, W_i_L_theta)
        b_i_L_theta = np.sum(currentActivationGradient, axis=0, keepdims=True)
        biasGradients.insert(0, b_i_L_theta)

        if i > 0:
            h_i_prev_L_theta = np.matmul(weights[i].T, currentActivationGradient)
            currentActivationGradient = h_i_prev_L_theta * derivativeFun(activation, PreActivations[i - 1])

    return WeightGradients, biasGradients


In [None]:
weights = list()
bias = list()
for i in range(1,len(nodesPerLayer)):
  #*(np.sqrt(0.001/(nodesPerLayer[i]+nodesPerLayer[i-1])))
  w = np.random.randn(nodesPerLayer[i],nodesPerLayer[i-1])*0.1
  b =  np.random.randn(nodesPerLayer[i],1)
  #np.random.rand(nodesPerLayer[i],1)
  weights.append(w)
  bias.append(b)

for i in range(0,epochs):
  for j in range(0,samples):
    A,B,C = forwardProp(x_flatten_train[j],"sigmoid","crossEntropy",weights,bias)
    #print(x_flatten_train)
    newC = OneHotEncode(C)
    #print(A[0][0])
    Wdelta,Bdelta = backProp(y_encoded[j],C,B,weights,"sigmoid",A)
    #print(Wdelta[1])
    #print("#########")
    for k in range(0,len(weights)):
      weights[k] = weights[k] - lr*Wdelta[k]
      bias[k] = bias[k] - lr*Bdelta[k]
    if j%600 == 0 :
      print(j/600)

In [194]:
def testModel(weights,bias,x_test,y_test):
  count = 0
  for i in range(0,x_test.shape[0]):
    A,B,C = forwardProp(x_test[i],"logistic","crossEntropy",weights,bias)
    #print(C)
    if( y_test[i] == np.argmax(C)):
      count+=1
  print("Accuracy :" + str((count/y_test.shape[0])*100) + "%")

In [195]:
x_flatten_test = x_test.reshape(x_test.shape[0],x_test.shape[1]*x_test.shape[2],1)
x_flatten_test = normalize_data(x_flatten_test)

In [196]:
testModel(weights,bias,x_flatten_test,y_test)

Accuracy :64.8%
