In [2]:
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score

In [110]:
def sigmoid(x):
    return 1/(1+np.exp(-x))

def sigmoid_dv(x):
    return sigmoid(x)*(1-sigmoid(x))

def MSE_loss(Y_true, Y_pred):
    return ((Y_pred-Y_true)**2).sum(axis=1).mean()/2

def MSE_loss_dv(Y_true, Y_pred):
    return (Y_pred-Y_true).mean(axis=0)

def constant_lr(epoch, batch):
    return 0.1

def epoch_lr(epoch, batch):
    return 0.1/np.sqrt(epoch)

class NeuralNetwork:
    
    def __init__(self, M, n, layers, r, activation=sigmoid, activation_dv=sigmoid_dv, loss=MSE_loss, loss_dv=MSE_loss_dv, lr_fn=constant_lr):
        l = [n] + layers + [r]
        self.weights = [np.random.randn(l[i], l[i+1])*0.05 for i in range(len(l)-1)]
        self.biases = [np.random.randn(l[i+1])*0.05 for i in range(len(l)-1)]
        self.Z = []
        self.L = []
        self.activation = activation
        self.activation_dv = activation_dv
        self.loss = loss
        self.loss_dv = loss_dv
        self.n = len(self.weights)
        self.lr_fn = lr_fn
        self.M = M
    
    def backward(self, y):
        # returns weight matrix derivatives with the shapes (n,l_1), (l_1,l_2), \ldots, (l_L,r) and
        # bias derivatives with the shapes l_1, l_2, \ldots, l_L, r
        
        wt_dv = []
        bias_dv = []
        
        # first get del L
        dL = self.loss_dv(y,self.L[-1]) # change to using sigmoid derivative here, as output layer will always be sigmoid
        #print(dL.shape)
        for i in range(self.n-1,-1,-1):
            z_dv = dL*(self.activation_dv(self.Z[i]).mean(axis=0))
            # print(self.L[i].shape)
            # print(self.L[i].mean(axis=0).reshape(-1,1).shape)
            # print(z_dv.shape)
            wt_dv.append(np.outer(self.L[i].mean(axis=0),z_dv))
            bias_dv.append(z_dv)
            dL = z_dv@self.weights[i].T
        
        return wt_dv[::-1], bias_dv[::-1]

    def forward(self, X):
        # populates self.Z and self.L with outputs of each layer, and finally returns the output of the 
        # final layer
        
        self.L = []
        self.Z = []
        self.L.append(X)
        for i in range(self.n):
            self.Z.append(self.L[-1]@self.weights[i] + self.biases[i])
            self.L.append(self.activation(self.Z[-1]))
            
        return self.L[-1]

    def backpropagate(self, wt_dv, bias_dv, eta=0.1):
        for (i,(dw,db)) in enumerate(zip(wt_dv,bias_dv)):
            self.weights[i] -= eta*dw
            self.biases[i] -= eta*db
        
    def fit(self, X, y):
        
        # training
        n_batches = X.shape[0]//self.M
        y_dec = np.argmax(y,axis=1).flatten()
        # train for 300 epochs
        for epoch in range(300):
            for batch in range(n_batches):
                preds = self.forward(X[batch*self.M:(batch+1)*self.M])
                wt_dv, bias_dv = self.backward(y[batch*self.M:(batch+1)*self.M])
                self.backpropagate(wt_dv, bias_dv, eta=self.lr_fn(epoch, batch))
            if (epoch%10 == 9):
                print(f"Epoch {epoch+1}")
                print(f"  Training set accuracy: {accuracy_score(y_dec,self.predict(X))}")
                print(f"  Loss: {self.loss(y,self.L[-1])}")
                print("")
            
        
    def predict(self, X):
        return np.argmax(self.forward(X),axis=1).flatten()
        

# Testing

In [46]:
def load_dataset(path):
    mat = np.loadtxt(path, delimiter=",")
    X,y = mat[:,:28*28],mat[:,28*28].flatten().astype(np.int32)
    y_enc = np.eye(10)[y]
    return X,y,y_enc

In [47]:
dpath = '../data/part2_data'
X_train, y_train, y_train_onehot = load_dataset(f"{dpath}/fmnist_train.csv")
X_test, y_test, y_test_onehot = load_dataset(f"{dpath}/fmnist_test.csv")

In [111]:
net = NeuralNetwork(100, 784, [100], 10)

In [112]:
#import warnings
#warnings.filterwarnings('ignore')
net.fit(X_train, y_train_onehot)

Epoch 10
  Training set accuracy: 0.4074
  Loss: 0.43218071400111024

Epoch 20
  Training set accuracy: 0.5288833333333334
  Loss: 0.4116071181448074

Epoch 30
  Training set accuracy: 0.5470666666666667
  Loss: 0.3937834693042511

Epoch 40
  Training set accuracy: 0.5650833333333334
  Loss: 0.3776370134078666

Epoch 50
  Training set accuracy: 0.5835833333333333
  Loss: 0.3635554943197915

Epoch 60
  Training set accuracy: 0.59855
  Loss: 0.35042287286179036

Epoch 70
  Training set accuracy: 0.6063833333333334
  Loss: 0.3402978933648469



KeyboardInterrupt: 

In [96]:
from sklearn.metrics import accuracy_score

preds = net.predict(X_test)
score = accuracy_score(y_test, preds)
print(score)

0.6902


In [78]:
print(preds)

[1 0 0 ... 6 0 2]


In [67]:
net.forward(X_train)

array([[1., 1., 1., ..., 1., 1., 1.],
       [1., 1., 1., ..., 1., 1., 1.],
       [1., 1., 1., ..., 1., 1., 1.],
       ...,
       [1., 1., 1., ..., 1., 1., 1.],
       [1., 1., 1., ..., 1., 1., 1.],
       [1., 1., 1., ..., 1., 1., 1.]])

In [63]:
y_train_onehot

array([[0., 0., 0., ..., 0., 0., 1.],
       [1., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [64]:
net.weights[0]

array([[0.01531875, 0.2341253 , 0.88297172, ..., 0.91529547, 0.0530619 ,
        0.99801892],
       [0.77981012, 0.05530899, 0.92192286, ..., 0.28482677, 0.65887782,
        0.57785324],
       [0.72118082, 0.23557015, 0.63697934, ..., 0.14700197, 0.19349988,
        0.01488687],
       ...,
       [0.88380899, 0.61386444, 0.43785856, ..., 0.8318427 , 0.05593743,
        0.35893407],
       [0.42203514, 0.37888886, 0.25573292, ..., 0.17721268, 0.66253352,
        0.05657037],
       [0.41270869, 0.80240223, 0.87296582, ..., 0.76340554, 0.542691  ,
        0.85509081]])