In [1]:
from keras.datasets import fashion_mnist
import numpy as np
from  matplotlib import pyplot as plt
import time
import math
from sklearn.model_selection import train_test_split

### Loading Dataset

In [3]:
dataset= fashion_mnist.load_data()
(X_train_and_validation, y_train_and_validation), (X_test, y_test) = dataset
X_train, X_validation, y_train, y_validation = train_test_split(X_train_and_validation, y_train_and_validation, test_size=0.1, random_state=42)
X_train = (X_train/255.0).astype(np.float32)
X_validation = (X_validation/255.0).astype(np.float32)
X_test = (X_test/255.0).astype(np.float32)

X_train = np.array(X_train.reshape(X_train.shape[0], 784,1))         
X_test = np.array(X_test.reshape(X_test.shape[0], 784,1))
X_validation = np.array(X_validation.reshape(X_validation.shape[0], 784,1))

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-labels-idx1-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-images-idx3-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-labels-idx1-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-images-idx3-ubyte.gz


In [4]:
#Activation function
def activation(activation_function):
  if activation_function == 'sigmoid':
    return sigmoid
  if activation_function == 'tanh':
    return tanh
  if activation_function == 'ReLU':
    return relu

def sigmoid(x, derivative = False):
  if derivative:
    return sigmoid(x)*(1-sigmoid(x))
  return 1/(1 + np.exp(-x))  

def tanh(x, derivative = False):
  if derivative:
    return 1 - tanh(x)**2
  return (np.exp(x)-np.exp(-x))/(np.exp(x)+np.exp(-x))

def relu(x, derivative = False):
  if derivative:
    return (x>0)*1 
  return x*(x>0)

def softmax(x,derivative = False):
  if derivative:
    return softmax(x)*(1- softmax(x))
  return np.exp(x)/np.sum(np.exp(x), axis = 0)

def one_hot(y, num_output_nodes):
  v = np.zeros((num_output_nodes, len(y)))
  for i,j in enumerate(y):
    v[j,i] = 1
  return v

In [5]:
### Initialization of Weights
def param_inint(num_inputs_nodes, hidden_layers, num_output_nodes, init_type):
  W = []
  B = []
  if init_type == "random":
    W.append(np.random.randn(hidden_layers[0],num_inputs_nodes)*0.1)
    B.append(np.random.randn(hidden_layers[0], 1)*0.1)
    for i in range(len(hidden_layers)-1):
      W.append(np.random.randn(hidden_layers[i+1],hidden_layers[i])*0.1)
      B.append(np.random.randn(hidden_layers[i+1], 1)*0.1)
    W.append(np.random.randn(num_output_nodes, hidden_layers[-1])*0.1)
    B.append(np.random.randn(num_output_nodes, 1)*0.1)
    return W, B

  if init_type == "xavier":
    W.append(np.random.randn(hidden_layers[0],num_inputs_nodes)*np.sqrt(2/(hidden_layers[0] + num_inputs_nodes)))
    B.append(np.random.randn(hidden_layers[0], 1)*0.1)
    for i in range(len(hidden_layers)-1):
      W.append(np.random.randn(hidden_layers[i+1],hidden_layers[i])*np.sqrt(2/(hidden_layers[i+1] + hidden_layers[i])))
      B.append(np.random.randn(hidden_layers[i+1], 1)*0.1)
    W.append(np.random.randn(num_output_nodes, hidden_layers[-1])*np.sqrt(2/(num_output_nodes + hidden_layers[-1])))
    B.append(np.random.randn(num_output_nodes, 1)*0.1)
    return W, B


### Feed Forward Network

In [6]:
def feed_forward(x, W, B, activation_type):
  h = []
  a = []
  sigma = activation(activation_type)  #activation
  h.append(x)   #h0 = x
  a.append(np.dot(W[0], h[0]) + B[0])
  for i in range(len(W)-1):
    h.append(sigma(a[-1]))
    a.append(np.dot(W[i+1], h[-1]) + B[i+1])
  y_hat = softmax(a[-1])

  return y_hat, h, a


In [11]:
num_inputs_nodes = 784                                                          # Input nodes 28 x 28 image
hidden_layers = [64,32,16]                                                      #3 hidden layers with 64, 32, 16 as nodes
num_output_nodes = 10                                                           # 10 classes
init_type = "xavier"
activation_type = "tanh"

x = np.squeeze(X_train[5, :]).reshape(784,1)                                    # taking one training example; we can take multiple examples(batch) also

W_initial, B_initial = param_inint(num_inputs_nodes, hidden_layers, num_output_nodes, init_type) # weight initialization

y_hat, h, a = feed_forward(x, W_initial, B_initial, activation_type)


In [12]:
print(y_hat)  # Prediction on one training example

[[0.08056535]
 [0.06034125]
 [0.16556654]
 [0.13946036]
 [0.12007072]
 [0.1294739 ]
 [0.09675075]
 [0.09587671]
 [0.09062571]
 [0.0212687 ]]
