## Lecture 07 - Neural Network: Forward, Backpropagation

In [5]:
import numpy as np
import matplotlib.pyplot as plt

In [12]:
def activation(z, actype='sigmoid'):
    if actype=='sigmoid':
        return 1/(1+np.exp(-z))

In [17]:
def dactivation(z, actype='sigmoid'):
    if actype=='sigmoid':
        return np.multiply(activation(z, actype),1-activation(z, actype))

### Multiclass classification cost function
\begin{equation}
p_i=\frac{\exp(z_i)}{\sum\limits_{k=1}^{K}\exp(z_k)}
\end{equation}

The cost function for one data point
\begin{equation}
C=-\sum\limits_{j=1}^K y_j\log p_j=-\sum\limits_{j=1}^K \left(y_j z_j- y_j \log(\sum\limits_{k=1}^K \exp(z_k))\right)
\end{equation}

\begin{align}
\frac{\partial C}{\partial z_i}&=-\sum\limits_{j=1}^K \left(y_j\delta_{ij}-y_j \frac{\exp(z_i)}{\sum\limits_{k=1}^K \exp(z_k)} \right)\\
&=-\sum\limits_{j=1}^K \left(y_j\delta_{ij}-y_j p_i \right)\\
&=-(y_i-p_i)
\end{align}


In [24]:
def softmax(z):
    return np.exp(z)/np.sum(np.exp(z))

In [557]:
class neuralnet:
    def __init__(self, inputsize, hiddensize=[128, 10], outputsize=2, bias=True):
        
        if bias:
            inputsize+=1
            hiddensize=list(map(lambda x: x+1, hiddensize))
        self.weights = []
#        self.outputs=np.ones([1, outputsize])
#        self.inputs=np.ones([1, inputsize])
        self.layersize=[inputsize]+hiddensize+[outputsize]
        self.L=len(self.layersize)
        # define weight matrix
        for i, isize in enumerate(self.layersize):
            if i<(self.L-1):
                self.weights.append(np.random.randn(isize, self.layersize[i+1]))
        self.z=[None]*(self.L-1)
        self.a=[None]*(self.L-1)
        self.delta=[None]*(self.L-1)
        self.dweights=[None]*(self.L-1)

    def forward(self):
        for i, w in enumerate(self.weights):
            
            if i==0:
                self.z[i] = np.matmul(self.inputs, w) # Calculate the Z for each layer
                self.a[i] = activation(self.z[i])     #Calculate the Z for each layer
                self.a[i][0,-1] = 1
            else:
                self.z[i] = np.matmul(self.a[i-1], w)       
                self.a[i] = activation(self.z[i])       
                self.a[i][0,-1] = 1
        
    def backprop(self):
        self.dactdz=list(map(dactivation, self.z))      
        for i, value in enumerate(self.delta):
            irev=self.L-2-i
            self.dactdz[irev][-1]=1
            if i==0:
                self.delta[irev]=softmax(self.z[-1])-self.outputs
            else:
                self.delta[irev]=np.multiply(np.matmul(self.weights[irev+1], self.delta[irev+1].T).T, self.dactdz[irev])
        for i, value in enumerate(self.dweights):
            if i==0:
                self.dweights[i]=np.matmul(self.inputs.T, self.delta[i])
            else:
                self.dweights[i]=np.matmul(self.a[i-1].T, self.delta[i])

    def createbatch(self):
            for i in range(0, len(self.data), self.batchsize):
                yield self.data[i:i + self.batchsize]
    
    def fit(self, data, batchsize, learningrate, epochs):
        self.batchsize = batchsize
        self.data = data
        self.batches= list(self.createbatch())
        for i in np.arange(epochs):
            for minibatch in self.batches:
                self.totalDW=[None]*(self.L-1)
                for i, value in enumerate(self.totalDW):
                    self.totalDW[i]=np.zeros([self.layersize[i], self.layersize[i+1]]) #initialize self.totalDW
                for xentry, yentry in minibatch:
                    self.outputs=yentry
                    self.inputs=np.column_stack([xentry, [1]])
                    self.forward()
                    self.backprop()
                    self.totalDW=list(map(lambda x, y: x+y, self.totalDW, self.dweights))
                self.weights=list(map(lambda w, dw: w-dw*learningrate/batchsize,  self.weights, self.totalDW))
    def predict(self, x):
        self.predz=[None]*(self.L-1)
        self.preda=[None]*(self.L-1)
        x=np.column_stack([x, [1]])
        for i, w in enumerate(self.weights):
            if i==0:
                self.predz[i] = np.matmul(x, w)
                self.preda[i] = activation(self.predz[i])
            else:
                self.predz[i] = np.matmul(self.preda[i-1], w)       
                self.preda[i] = activation(self.predz[i])
        return(softmax(self.predz[-1]))

In [558]:
X1 = 2*np.random.randn(5000, 1)
X2 = 5*np.random.randn(5000, 1)
X3 = np.random.randn(5000, 1)
eta=0.5*X1+0.1*X2+1.56*X3-1
X=np.column_stack([X1,X2, X3])
p=1/(1+np.exp(-eta))
y=np.random.binomial(1, p).reshape(5000,1)
y=np.column_stack([y, 1-y])

In [559]:
logsticdata=[]
for predictor, target in zip(X, y):
    logsticdata.append((predictor.reshape(1, -1), target.reshape(1,-1)))

In [560]:
np.column_stack([logsticdata[0][0], [1]])

array([[-1.9787409 , -6.2467915 ,  0.00670936,  1.        ]])

In [582]:
singleneuron=neuralnet(3, [3], 2)

In [583]:
singleneuron.fit(logsticdata, 100, learningrate=0.01, epochs=50)

In [584]:
singleneuron.weights

[array([[ 3.21478102,  1.92365079,  0.05862402, -4.7342825 ],
        [ 1.36748184, -0.21825899,  0.12854786, -1.68612637],
        [ 3.64512275,  3.15857517,  2.4431216 , -6.75217596],
        [-0.2272826 ,  0.66779496, -0.78119702,  0.94754739]]),
 array([[ 0.5166483 , -0.80406116],
        [ 0.6493974 , -0.54891423],
        [ 1.09190037,  0.02686781],
        [-1.41973747,  1.10846792]])]

In [585]:
singleneuron.predict(logsticdata[4][0])

array([[0.09825398, 0.90174602]])

In [586]:
ypred=[]
for ientry in logsticdata:
    ypred.append((singleneuron.predict(ientry[0])[0][0]>0.5)*1)

In [587]:
from sklearn.metrics import confusion_matrix
confusion_matrix(list(y[:,0]), ypred)

array([[2182, 1115],
       [ 329, 1374]])

In [581]:
from sklearn.metrics import confusion_matrix
confusion_matrix(list(y[:,0]), ypred)

array([[2446,  851],
       [ 347, 1356]])

In [428]:
singleneuron.predict(ientry[0])[0][0][0]

0.395813866018872

In [408]:
logsticdata[3]

(array([[-1.82698596, -7.36548003,  0.05225958]]), array([[0, 1]]))

In [407]:
p[3]

array([0.07119237])

In [389]:
singleneuron.z

[array([[-2.22275634, -4.08002901]])]