In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from tqdm import tqdm
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import Sequential
from tensorflow.keras.optimizers import Adam,SGD
from tensorflow.keras.preprocessing import image
from keras.utils.np_utils import to_categorical
from tensorflow.keras.layers import Flatten, Dense, Dropout, BatchNormalization, Conv2D, MaxPool2D

In [2]:
#data = pd.read_csv("C:/Users/karpit/Downloads/ML/SRI Project/Fmnist/fashion-mnist_train.csv")
data = pd.read_csv("D:/Downloads/Fashion MNIST/fashion-mnist_train.csv")
#test = pd.read_csv("C:/Users/karpit/Downloads/ML/SRI Project/Fmnist/fashion-mnist_test.csv")
test = pd.read_csv("D:/Downloads/Fashion MNIST/fashion-mnist_test.csv")

data = np.array(data)
test = np.array(test)

print(data.shape)
print(test.shape)

(60000, 785)
(10000, 785)


In [3]:
train_X = data[:,1:]/255
test_X = test[:,1:]/255

train_X = train_X.reshape(train_X.shape[0],784)
test_X = test_X.reshape(test_X.shape[0],784)

train_y = data[:,0]
test_y = test[:,0]

train_X.shape, train_y.shape, test_X.shape, test_y.shape

((60000, 784), (60000,), (10000, 784), (10000,))

In [4]:
train_y = to_categorical(train_y,10)
test_y = to_categorical(test_y,10)

train_y.shape, test_y.shape

((60000, 10), (10000, 10))

In [5]:
X_train, X_val, y_train, y_val = train_test_split(train_X, train_y, test_size = 0.2)


print ("Training data shape : ",X_train.shape, y_train.shape)
print ("Validation data shape : ",X_val.shape, y_val.shape)

Training data shape :  (48000, 784) (48000, 10)
Validation data shape :  (12000, 784) (12000, 10)


In [8]:
from sklearn.metrics import log_loss

def init(sizes):
    num_layers = len(sizes)
    
    biases = [np.random.randn(y, 1) for y in sizes[1:]]
    weights = [np.random.randn(y, x)*np.sqrt(2/x) for x, y in zip(sizes[:-1], sizes[1:])]
        
    return biases, weights

def dx_relu(a):
    da = np.ones_like(a)
    da[a<0] = 0
    return da

"""def dx_relu(a, alpha = 0.3):
    da = np.ones_like(a)
    da[a<0] = alpha
    return da"""

def forward_pass(w, b, x):
    a = []
    h = []
    num_layer = len(w)+1
    for i in range(num_layer-1):
        t = np.matmul(w[i],x) + np.tile(b[i],x.shape[1])
        x = np.matmul(w[i],x)
        a.append(t)
        if i != num_layer-2:
            #h.append(tf.nn.relu(t, alpha = 0.3))
            h.append(tf.nn.relu(t))
            
    h.append(tf.nn.sigmoid(t))
    return a, h



def dfa_backward_pass(e, h, B, a, x):
    dW = [np.zeros(w.shape) for w in weights]
    db = [np.zeros(b.shape) for b in biases]
    
    num_layer = len(weights) + 1
    
    dW[-1] = -np.matmul(e, np.transpose(h[-2]))
    tmp = -np.sum(e, axis=1)
    tmp = tmp[:,np.newaxis]
    db[-1] = tmp
    
    for l in range(2,num_layer):
        da = np.matmul(B[-l+1], e)*dx_relu(a[-l])
        if l != num_layer-1:
            dW[-l] = -np.matmul(da, np.transpose(h[-l-1]))
        else :
            dW[-l] = -np.matmul(da, np.transpose(x))
        tmp = -np.sum(da, axis=1)
        tmp = tmp[:,np.newaxis]
        db[-l] = tmp
    
    return dW, db


def dfa_train(x, y, X_val, y_val, n_epochs=10, lr=1e-3, batch_size=200):
    x = np.transpose(x)
    y = np.transpose(y)
    
    num_layer = len(weights) + 1
    
    B=[]
    # initialization of random matrix for FA
    '''for i in range(num_layer-2):
        shape = weights[i+1].shape
        B.append(np.random.randn(shape[1], shape[0]))'''
    
    # initialization of random matrix for DFA
    for i in range(num_layer-2):
        shape = weights[i+1].shape
        B.append(np.random.randn(shape[1], 10))
        
    
    dataset_size = x.shape[1]
    n_batches = dataset_size//batch_size
    for epoch in range(n_epochs):
        perm = np.random.permutation(x.shape[1])
        x = x[:, perm]
        y = y[:, perm]
        loss = 0.
        for j in range(n_batches):
            samples = x[:, j*batch_size:(j+1)*batch_size]
            targets = y[:, j*batch_size:(j+1)*batch_size]
            a, h = forward_pass(weights, biases, samples)
            error = h[-1] - targets
            preds = np.argmax(h[-1], axis=0) 
            truth = np.argmax(targets, axis=0)
            loss_on_batch = log_loss(targets, h[-1])
            
            dW, db = dfa_backward_pass(error, h, B, a, samples)
            
            
            for l in range(len(dW)):
                weights[l] += lr*dW[l]
                biases[l] += lr*db[l]

            loss += loss_on_batch
        
        print ('Loss at epoch', epoch+1, ':', loss/x.shape[1])
        
        outputs = forward_pass(weights, biases, X_val.T)[-1]
        p = np.argmax(outputs[-1], axis=0)
        actual = np.argmax(y_val.T, axis=0)
        val_acc =  (np.sum(p == actual)/p.shape[0])*100
        
        print('Validation Accuracy is :',val_acc, '%\n')
     
    return weights, biases



In [9]:
biases, weights = init([784, 700, 600, 500, 400, 300, 200, 100, 10])

Wdfa, bdfa = dfa_train(X_train, y_train, X_val, y_val, n_epochs=500, lr=1e-6, batch_size=200)


 : 63.19166666666667 %

Loss at epoch 257 : 1.2853403512289967
Validation Accuracy is : 54.13333333333333 %

Loss at epoch 258 : 1.2842747148793419
Validation Accuracy is : 51.758333333333326 %

Loss at epoch 259 : 1.2806292076071795
Validation Accuracy is : 55.474999999999994 %

Loss at epoch 260 : 1.2907027822555517
Validation Accuracy is : 54.40833333333334 %

Loss at epoch 261 : 1.2685828637953038
Validation Accuracy is : 61.12499999999999 %

Loss at epoch 262 : 1.3079742887580463
Validation Accuracy is : 66.23333333333333 %

Loss at epoch 263 : 1.299472299397823
Validation Accuracy is : 63.56666666666667 %

Loss at epoch 264 : 1.275437398718461
Validation Accuracy is : 63.28333333333334 %

Loss at epoch 265 : 1.310365905253461
Validation Accuracy is : 69.95833333333333 %

Loss at epoch 266 : 1.2867132393923244
Validation Accuracy is : 59.983333333333334 %

Loss at epoch 267 : 1.2840664367367312
Validation Accuracy is : 61.76666666666667 %

Loss at epoch 268 : 1.2797589687894022
Va