In [76]:
from tensorflow import keras
from keras.datasets import fashion_mnist
import matplotlib.pyplot as plt
import numpy as np
from sklearn.preprocessing import OneHotEncoder
from sklearn.utils import shuffle
from sklearn.metrics import accuracy_score
import math

In [78]:
(x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()

In [37]:
enc=OneHotEncoder(sparse=False)
y_train_new = np.array(y_train.reshape(len(y_train), 1))
yy = enc.fit_transform(y_train_new)

In [126]:
def batch_split(bs,X_train,Y_train,X_test,Y_test):
    x_train, y_train_cat = shuffle(X_train,Y_train)
    x_test,y_test=shuffle(X_test,Y_test)
    x_train = x_train.reshape(x_train.shape[0],784)
    x_test = x_test.reshape(x_test.shape[0],784)
    x_train=x_train/np.max(x_train)
    x_test=x_test/np.max(x_test)
    enc=OneHotEncoder(sparse=False)
    y_train = enc.fit_transform(np.array(y_train_cat.reshape(len(y_train_cat), 1)))
    y_train_bs=[]
    x_train_bs=[]
    if x_train.shape[0]%bs==0:
        x_train_bs=np.vsplit(x_train,int(x_train.shape[0]/bs))
        y_train_bs=np.vsplit(y_train,int(x_train.shape[0]/bs))
    else:
        x_train_bs=np.vsplit(x_train[0:x_train.shape[0]-x_train.shape[0]%bs],math.floor(x_train[0:x_train.shape[0]-x_train.shape[0]%bs].shape[0]/bs))
        x_train_bs.append(x_train[x_train.shape[0]-x_train.shape[0]%bs:x_train.shape[0]])
        y_train_bs=np.vsplit(y_train[0:x_train.shape[0]-x_train.shape[0]%bs],math.floor(x_train[0:x_train.shape[0]-x_train.shape[0]%bs].shape[0]/bs))
        y_train_bs.append(y_train[x_train.shape[0]-x_train.shape[0]%bs:x_train.shape[0]])
    return x_train_bs,y_train_bs,x_test,y_test

In [97]:
def logistic_func(x):
    log_func =1 / (1 + np.exp(-x))
    return log_func

In [98]:
def func_softmax(x):
    sf=[]
    e_x = np.exp(x)
    for i in range(x.shape[1]):
        sf.append(e_x[:,i]/np.sum(e_x[:,i]))
    return sf

In [99]:
def compute_loss(y_pred,y):
    samp=y.shape[0]
    loss=np.empty(samp)
    for i in range(samp):
        loss[i]=-np.dot(y_pred[i],np.transpose(y[i]))
    return loss

In [None]:
input_size=784
no_hidden =3
size_hidden=[200,100,50]
no_output=10
learning_rate=0.15

## Tryout after debugging

In [85]:
def initialize_params(input_size,no_hidden,size_hidden,no_output):
    std=1e-2
    w=[]
    w.append(std*np.sqrt(1./size_hidden[0])*np.random.rand(size_hidden[0],input_size))
    b=[]
    b.append(std*np.random.rand(size_hidden[0],1))
    for i in range(no_hidden-1):
        w.append(std*np.sqrt(1./size_hidden[i+1])*np.random.rand(size_hidden[i+1],size_hidden[i]))
        b.append(std*np.random.rand(size_hidden[i+1],1)) 
    w.append(std*np.sqrt(1./no_output)*np.random.rand(no_output,size_hidden[-1]))
    b.append(std*np.random.rand(no_output,1))
    return w,b

In [134]:
def forward_prop(eg,x_train,w,b):
    a=[]
    h=[]
    h.append(x_train[eg].T)
    for i in range(no_hidden):
        a.append(np.dot(w[i],h[i])+b[i])
        h.append(logistic_func(a[i]))
    a_f=np.dot(w[no_hidden],h[no_hidden])+b[no_hidden]
    y_pred= func_softmax(a_f)
    return a,h,a_f,y_pred

def backward_prop(eg,y_train,y_pred,w,b,h,a,a_f,no_hidden):
    grad_a=[None]*(no_hidden+1)#initialise a list to add the gradients corresponding to each layer C
    grad_w=[None]*(no_hidden+1)#
    grad_b=[None]*(no_hidden+1)#
    grad_h=[None]*(no_hidden)#
    grad_a[no_hidden]=-(y_train[eg]-y_pred)
    for k in range(no_hidden,-1,-1):
        grad_w[k]=np.dot(h[k],grad_a[k])
        grad_b[k]= np.sum(grad_a[k], axis=0, keepdims=True)
        if k >= 1:
            grad_h[k-1]=np.dot(grad_a[k],w[k])
            grad_a[k-1]=grad_h[k-1]*(logistic_func(a[k-1].T)*(1-logistic_func(a[k-1].T)))
    return grad_b,grad_w 

def param_update(w,b,grad_w,grad_b,learning_rate):
    for i in range(no_hidden+1):
        w[i]=w[i]-(learning_rate*grad_w[i].T)
        b[i]=b[i]-(learning_rate*grad_b[i].T)
    return w,b

def test_model(w,b,x_test,y_test): 
    an=[]
    hn=[]
    hn.append(x_test.T)
    for i in range(no_hidden):
        an.append(np.dot(w[i],hn[i])+b[i])
        hn.append(logistic_func(an[i]))
    a_fn=np.dot(w[no_hidden],hn[no_hidden])+b[no_hidden]
    y_pred1= func_softmax(a_fn)
    y_final=np.empty(10000)
    for i in range(10000):
        y_final[i]=y_pred1[i].argmax()
    return accuracy_score(y_test, y_final)

def train_model(input_size,no_hidden,size_hidden,no_output,bs,x_train,y_train,x_test,y_test,max_iterations,learning_rate):
    ct=1;
    x_train,y_train,x_test,y_test=batch_split(batch_size,x_train,y_train,x_test,y_test)
    no_batch=len(x_train)
    w,b=initialize_params(input_size,no_hidden,size_hidden,no_output)
    while ct<=max_iterations:
        for eg in range(no_batch):
            a,h,a_f,y_pred=forward_prop(eg,x_train,w,b)
            grad_b,grad_w=backward_prop(eg,y_train,y_pred,w,b,h,a,a_f,no_hidden)
            w,b=param_update(w,b,grad_w,grad_b,learning_rate)
        acc=test_model(w,b,x_test,y_test)
        print('Epoch',ct,'Accuracy',acc)
        ct+=1


In [132]:
input_size=784
no_hidden =3
size_hidden=[200,100,50]
no_output=10
learning_rate=0.15
(x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()
max_iterations=10
batch_size=1

In [135]:
train_model(input_size,no_hidden,size_hidden,no_output,bs,x_train,y_train,x_test,y_test,max_iterations,learning_rate)

Epoch 1 Accuracy 0.754
Epoch 2 Accuracy 0.8168
Epoch 3 Accuracy 0.8265
Epoch 4 Accuracy 0.831
Epoch 5 Accuracy 0.8374
Epoch 6 Accuracy 0.83
Epoch 7 Accuracy 0.8337
Epoch 8 Accuracy 0.8254
Epoch 9 Accuracy 0.8347
Epoch 10 Accuracy 0.8219


(10000, 784)