In [1]:
import numpy as np
from functions import sigmoid,softmax,dsigmoid
from util import spiral_data_gen
from sklearn.metrics import classification_report
from sklearn.datasets import *

# Naive implementation of 3-layered vanilla NN with relu and softmax activations.

In [2]:
#X,y=spiral_data_gen(False)
X,y=load_wine()['data'],load_wine()['target']

X=X.T
D,N=X.shape
print(D,N)
K=len(np.unique(y))

hidden_size=50
W0 = 0.01 * np.random.randn(hidden_size,D)
b0 = np.ones((hidden_size,1))*.001 # against dead ReLU neurons
W1 = 0.01 * np.random.randn(hidden_size,hidden_size)
b1 = np.ones((hidden_size,1))*.001
W2 = 0.01 * np.random.randn(K,hidden_size)
b2 = np.zeros((K,1))
    
num_epoch=100_000
mode=num_epoch//10
step_size=.001

for epoch in range(num_epoch):
    
        # forward pass
        S0 = W0.dot(X)+ b0
        Z0 = np.maximum(0, S0)
        
        S1 = W1.dot(Z0)+ b1
        Z1 = np.maximum(0, S1)
        
        S2 = W2.dot(Z1) + b2
        Z2  = softmax(S2)

        # compute the loss
        corect_logprobs = -np.log(Z2[y,range(N)])
        loss = np.sum(corect_logprobs)/N
        
        if epoch % mode == 0:
            print("Epoch %d: loss %f" % (epoch, loss))

        # backward pass

        # Derivative of loss w.r.t. S2 the input of of softmax.
        dS2 = Z2
        dS2[y,range(N)] -= 1
        dS2 /= N

        # ScoreGate => Multiply and Addition gate
        db2 = np.sum(dS2, axis=1, keepdims=True)
        dW2 = dS2.dot(Z1.T)
        dZ1  = W2.T.dot(dS2) 

        # ReluGate
        dZ1[S1 <= 0] = 0
        dS1=dZ1
        
        # ScoreGate
        dW1 = dS1.dot(Z0.T)
        db1 = np.sum(dS1, axis=1, keepdims=True)
        dZ0  = W1.T.dot(dS1)

        # ReluGate
        dZ0[S0 <= 0] = 0
        
        # ScoreGate
        dS0=dZ0
        dW0=dS0.dot(X.T)
        db0 = np.sum(dS0, axis=1, keepdims=True)
        #dX=W0.T.dot(dZ0)



        # perform a parameter update
        W0 += -step_size * dW0
        b0 += -step_size * db0
        
        W1 += -step_size * dW1
        b1 += -step_size * db1
        
        W2 += -step_size * dW2
        b2 += -step_size * db2
    
# forward pass
S0 = W0.dot(X)+ b0
Z0 = np.maximum(0, S0)

S1 = W1.dot(Z0)+ b1
Z1 = np.maximum(0, S1)

S2 = W2.dot(Z1) + b2
Z2  = softmax(S2)
predicted_class = np.argmax(Z2, axis=0)

print(classification_report(y, predicted_class))

13 178
Epoch 0: loss 1.097287
Epoch 10000: loss 0.570037
Epoch 20000: loss 0.468397
Epoch 30000: loss 0.423234
Epoch 40000: loss 0.356129
Epoch 50000: loss 0.276574
Epoch 60000: loss 0.281639
Epoch 70000: loss 0.186600
Epoch 80000: loss 0.130987
Epoch 90000: loss 0.456909
              precision    recall  f1-score   support

           0       0.90      0.95      0.93        59
           1       0.97      0.82      0.89        71
           2       0.86      1.00      0.92        48

    accuracy                           0.91       178
   macro avg       0.91      0.92      0.91       178
weighted avg       0.92      0.91      0.91       178



# NN as computational graph

In [3]:
class MatMulGate:
    def __init__(self):
        pass
    def forward(self,W,b,X):
        self.W=W
        self.X=X
        return self.W.dot(self.X)+b
    def backward(self,dL):
        dW=dL.dot(self.X.T)
        dX=self.W.T.dot(dL)
        db=np.sum(dL, axis=1, keepdims=True)
        return dW,dX,db

class ReluGate:
    def __init__(self):
        pass
    def forward(self,X):
        self.X=X
        return np.maximum(0, self.X)
    def backward(self,dL):
        dL[self.X <= 0] = 0
        return dL

class SoftmaxGate:
    def __init__(self):
        pass
    def forward(self,X):
        self.Z=softmax(X)
        return self.Z
    def backward(self,dL):
        return dL
    
class ComputationalGraph:
    def __init__(self):
        self.gates=[]
    def add(self,shape,gate):
        fout,fin=shape
        W = 0.01 * np.random.randn(fout,fin)
        b = np.ones((fout,1))*.001
        self.gates.append(((W,b), MatMulGate(),gate))
    def forward(self,X):
        for t in self.gates:
            (W,b),score,sigma=t
            S=score.forward(W,b,X)
            X=sigma.forward(S)
        self.Z=X
        return self.Z
    
    def backward(self,y):
        # Derivative of loss w.r.t. S2 the input of of softmax.
        dL = self.Z
        dL[y,range(N)] -= 1
        for t in reversed(self.gates):
            (W,b),score,sigma=t
            dW,dL,db=score.backward(sigma.backward(dL))
            W+=-.001*dW
            b+=-.001*db

# 1. Example 

In [4]:
X,y=spiral_data_gen(False)
X=X.T
D,N=X.shape
K=len(np.unique(y))

hidden_size=50

model=ComputationalGraph()
model.add((hidden_size,D),ReluGate())
model.add((hidden_size,hidden_size),ReluGate())
model.add((K,hidden_size),SoftmaxGate())
num_epoch=10_000
mode=num_epoch//10


for epoch in range(num_epoch):
    preds=model.forward(X)
    
    if epoch%mode==0:
        loss = (-np.log(preds[y,range(N)])).mean()# mean corect_logprobs
        print('{0}.th epoch Loss:{1}'.format(epoch,loss))
    model.backward(y)
    
    
y_head = np.argmax(model.forward(X), axis=0)
print(classification_report(y, y_head))

0.th epoch Loss:1.0986035985370461
1000.th epoch Loss:0.3363575663676093
2000.th epoch Loss:0.036526840180824104
3000.th epoch Loss:0.02558364402726342
4000.th epoch Loss:0.020615359935970905
5000.th epoch Loss:0.020835372518647404
6000.th epoch Loss:0.01870629886306993
7000.th epoch Loss:0.017806767778799255
8000.th epoch Loss:0.017432439581720912
9000.th epoch Loss:0.015239174073848616
              precision    recall  f1-score   support

           0       1.00      0.99      0.99       100
           1       0.98      1.00      0.99       100
           2       1.00      0.99      0.99       100

    accuracy                           0.99       300
   macro avg       0.99      0.99      0.99       300
weighted avg       0.99      0.99      0.99       300



# 2. Example 

In [5]:
X,y=spiral_data_gen(False)
X=X.T
D,N=X.shape
K=len(np.unique(y))

hidden_size=10

model=ComputationalGraph()
model.add((hidden_size,D),ReluGate())
model.add((hidden_size,hidden_size),ReluGate())
model.add((K,hidden_size),SoftmaxGate())
num_epoch=10_000
mode=num_epoch//10


for epoch in range(num_epoch):
    preds=model.forward(X)
    
    if epoch%mode==0:
        loss = (-np.log(preds[y,range(N)])).mean()# mean corect_logprobs
        print('{0}.th epoch Loss:{1}'.format(epoch,loss))
    model.backward(y)
    
    
y_head = np.argmax(model.forward(X), axis=0)
print(classification_report(y, y_head))

0.th epoch Loss:1.0986127970515298
1000.th epoch Loss:1.0984531965136364
2000.th epoch Loss:0.5452662860827222
3000.th epoch Loss:0.039976832852391966
4000.th epoch Loss:0.026662964448613533
5000.th epoch Loss:0.021519567642350936
6000.th epoch Loss:0.019624775449742664
7000.th epoch Loss:0.01873121818890523
8000.th epoch Loss:0.01799883200089535
9000.th epoch Loss:0.017166354828606737
              precision    recall  f1-score   support

           0       1.00      0.99      0.99       100
           1       0.98      1.00      0.99       100
           2       1.00      0.99      0.99       100

    accuracy                           0.99       300
   macro avg       0.99      0.99      0.99       300
weighted avg       0.99      0.99      0.99       300



# 3. Example

In [6]:
X,y=spiral_data_gen(False)
X=X.T
D,N=X.shape
K=len(np.unique(y))

hidden_size=2

model=ComputationalGraph()
model.add((hidden_size,D),ReluGate())
model.add((hidden_size,hidden_size),ReluGate())
model.add((K,hidden_size),SoftmaxGate())
num_epoch=10_000
mode=num_epoch//10


for epoch in range(num_epoch):
    preds=model.forward(X)
    
    if epoch%mode==0:
        loss = (-np.log(preds[y,range(N)])).mean()# mean corect_logprobs
        print('{0}.th epoch Loss:{1}'.format(epoch,loss))
    model.backward(y)
    
    
y_head = np.argmax(model.forward(X), axis=0)
print(classification_report(y, y_head))

0.th epoch Loss:1.0986124329899025
1000.th epoch Loss:1.0986122997309662
2000.th epoch Loss:1.098612227197544
3000.th epoch Loss:1.0986120860203632
4000.th epoch Loss:1.0986113751635018
5000.th epoch Loss:1.098590780052423
6000.th epoch Loss:0.8678096677100844
7000.th epoch Loss:0.7448633032637624
8000.th epoch Loss:0.6485987119180769
9000.th epoch Loss:0.6543618666477579
              precision    recall  f1-score   support

           0       0.67      1.00      0.80       100
           1       0.91      0.52      0.66       100
           2       0.65      0.61      0.63       100

    accuracy                           0.71       300
   macro avg       0.74      0.71      0.70       300
weighted avg       0.74      0.71      0.70       300



In [7]:
X,y=spiral_data_gen(False)
X-=np.mean(X,axis=0) # Zero centered data is always good.
X=X.T
D,N=X.shape
K=len(np.unique(y))

hidden_size=2

model=ComputationalGraph()
model.add((hidden_size,D),ReluGate())
model.add((hidden_size,hidden_size),ReluGate())
model.add((K,hidden_size),SoftmaxGate())
num_epoch=10_000
mode=num_epoch//10


for epoch in range(num_epoch):
    preds=model.forward(X)
    
    if epoch%mode==0:
        loss = (-np.log(preds[y,range(N)])).mean()# mean corect_logprobs
        print('{0}.th epoch Loss:{1}'.format(epoch,loss))
    model.backward(y)
    
    
y_head = np.argmax(model.forward(X), axis=0)
print(classification_report(y, y_head))

0.th epoch Loss:1.0986122578268582
1000.th epoch Loss:1.0986121457447826
2000.th epoch Loss:1.0986114667613445
3000.th epoch Loss:1.098590054680897
4000.th epoch Loss:0.6322695540594208
5000.th epoch Loss:0.5810257553547651
6000.th epoch Loss:0.5703092413348116
7000.th epoch Loss:0.5718422355032038
8000.th epoch Loss:0.5732302550770745
9000.th epoch Loss:0.6514727162328073
              precision    recall  f1-score   support

           0       0.68      0.57      0.62       100
           1       0.65      0.89      0.75       100
           2       0.95      0.75      0.84       100

    accuracy                           0.74       300
   macro avg       0.76      0.74      0.74       300
weighted avg       0.76      0.74      0.74       300



# Next let's use Momentum, Rmsprop and ADAM.