<a href="https://colab.research.google.com/github/13194307/UTS_ML2019_ID13194307/blob/master/ML_A2/NeuralNetwork.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

TODO LIST:


*   Finish off backpropagation (currently only updates output layer weights)
*   Perhaps add different kinds of layer classes (one for Dense layers, one for output)
*   Double check the order of values in each derivative



In [0]:
import math
import numpy as np
import sklearn
from sklearn.model_selection import train_test_split
from sklearn.metrics import *
from scipy.special import softmax
from scipy import stats

In [0]:
class NeuralNetwork:
    class Layer:
        class Neuron:
            def __init__(self, inputShape):
                self.weights, self.bias = self.initialiseWeights(inputShape)
                
            def initialiseWeights(self, inputShape):
                weights = np.array([np.random.randn() for _ in range(0, inputShape)]) / math.sqrt(inputShape)
                bias = np.random.randn()
                
                return weights, bias
            
            def getWeights(self):
                return self.weights
            
            def generateNeuronOutput(self, x):
                return np.dot(self.weights, x) + self.bias
            
            def updateWeights(self, update):
                print("Before:", self.weights)
                self.weights -= update
                print("After:", self.weights)
            
        
        def __init__(self, layerType, neuronsPerLayer, inputShape):
            self.layerType = layerType
            self.numNeurons = neuronsPerLayer
            self.inputShape = inputShape
            self.neurons = [self.Neuron(inputShape) for _ in range(neuronsPerLayer)]
            self.dWeightedSum_dWeights = []
            self.dWeightedSum_dInput = []
            
            if layerType == "Output":
                self.dSoftmax = []
            elif layerType == "Dense":
                self.dRelu = []
            
        def getInputShape(self):
            return self.inputShape
        
        def getNumNeurons(self):
            return self.numNeurons
        
        def getDSoftmax(self):
            temp = self.dSoftmax
            self.dSoftmax = []
            return temp
        
        def getDWeightedSum_dWeights(self):
            temp = self.dWeightedSum_dWeights
            self.dWeightedSum_dWeights = []
            return temp
        
        def getDWeightedSum_dInput(self):
            temp = self.dWeightedSum_dInput
            self.dWeightedSum_dInput = []
            return temp
        
        def getDRelu(self):
            temp = self.dRelu
            self.dRelu = []
            return temp
        
        def generateLayerOutput(self, x):
            layerOutput = np.array([])
            change = [self.calcDWeightedSum_dWeights(x) for _ in range(self.numNeurons)]
            self.dWeightedSum_dWeights.append(change)
            
            change = []
            
            for neuron in self.neurons:
                change.append(neuron.getWeights())
                neuronOutput = neuron.generateNeuronOutput(x)
                layerOutput = np.append(layerOutput, neuronOutput)
                
            self.dWeightedSum_dInput.append(change)
               
            print(self.layerType, ":", layerOutput)
            if self.layerType == "Dense":
                #Leaky ReLU activation
                change = self.calcDRelu(layerOutput)
                self.dRelu.append(change)
                layerOutput[layerOutput < 0] *= 0.01
            elif self.layerType == "Output":
                if len(layerOutput) == 1:
                    #Sigmoid activation
                    layerOutput = 1 / (1 + math.exp(-1*layerOutput[0]))
                    #print("Probabilities: ", layerOutput)
                else:
                    #Softmax activation
                    layerOutput = np.exp(layerOutput)/sum(np.exp(layerOutput))
                    change = self.calcDSoftmax(layerOutput)
                    self.dSoftmax.append(change)
                    #print(self.dSoftmax)
                    #print("Probabilities: ", layerOutput)
            else:
                raise NotImplementedError
                    
            print("After activation - ", layerOutput)
            return layerOutput
        
        # Derivative of softmax with respect to weighted sum/dot product
        def calcDSoftmax(self, prob):
            n = len(prob)
            return [prob[i]-(prob[i]**2) for i in range(n)]
        
        # Derivative of weighted sum with respect to the weights
        # This function is kinda pointless as it just returns the argument
        # passed to it unaltered but I added it in to remind me that this
        # is the derivative.
        def calcDWeightedSum_dWeights(self, x):
            return x
        
        # Derivative of weighted sum with respect to the input provided
        # Also a redundant function, and was also added in for the same
        # reason as above
        def calcDWeightedSum_dInput(self, weights):
            return weights
        
        # Derivative of Leaky ReLU with respect to weighted sum/dot product
        def calcDRelu(self, input_vector):
            derivative = np.array(input_vector)
            derivative[derivative > 0] = 1
            derivative[derivative <= 0] = 0.01
            return derivative
        
        def updateWeights(self, update):
            for i in range(self.numNeurons):
                self.neurons[i].updateWeights(update[i])
            
        def __str__(self):
            output = ""
            for neuron in self.neurons:
                weights, bias = neuron.getWeightsAndBias()
                output+="\t"
                
                for j in range(0, len(weights)):
                    output+=("w{}: {}, ".format(j, weights[j]))
                    
                output+=("b0: {}\n".format(bias))
            
            return output
        
        
        
        
        
    
    def __init__(self):
        self.layers = []
        self.numLayers = 0
        
    def addLayer(self, layerType, neuronsPerLayer, inputShape=None):
        if inputShape == None:
            inputShape = self.layers[-1].getNumNeurons()
            
        self.layers.append(self.Layer(layerType, neuronsPerLayer, inputShape))
        self.numLayers+=1
        
    def predict(self, x, labels, batch_size=2, step_size = 0.1):
        probabilities = []
        dError = []
        counter = 0
        
        for i in range(len(x)):
            prob = self.feedForward(x[i])
            change = self.calcDError(prob, labels[i])
            dError.append(change)
            counter+=1
            
            if counter == batch_size:
                counter = 0
                self.backPropagation(dError, step_size, batch_size)
                dError = []
                
            #print(pred)
            probabilities.append(prob)
      
        predictions = np.argmax(probabilities, axis=1)
        #loss = self.calcTotalLoss(probabilities, labels)
        #print(loss)
        return predictions
    
    def feedForward(self, x):
        lastLayerOutput = x
        
        for layer in self.layers:
            lastLayerOutput = layer.generateLayerOutput(lastLayerOutput)
            
        return lastLayerOutput
    
    def backPropagation(self, dError, step_size, batch_size=None):
        #Only updates output layer weights for now
        d1 = np.array(dError)
        d2 = np.array(self.layers[-1].getDSoftmax())
        d3 = np.array(self.layers[-1].getDWeightedSum_dWeights())
        print("d1:", d1)
        print("d2:", d2)
        print("d3:", d3[0])
        changeOutputWeights = [d1[i]*d2[i]*d3[i] for i in range(len(d3))]
        avgChange = (sum(changeOutputWeights)/batch_size) * step_size
        #print("change:", changeOutputWeights)
        #print("fin change:", avgChange)
        self.layers[-1].updateWeights(avgChange)
        
        # Derivatives for H[-1] 
        d4 = np.array(self.layers[-1].getDWeightedSum_dInput())
        d5 = np.array(self.layers[-2].getDRelu())
        d6 = np.array(self.layers[-2].getDWeightedSum_dWeights())
        print("d4:", d4)
        print("d5:", d5)
        print("d6:", d6)
        dTotalError = np.array([np.sum(d1[i]*d2[i]*d4[i], axis=1) for i in range(batch_size)])
        length = len(dTotalError[0])
        onesArray = np.ones((length, length))
        dTotalError = [np.transpose(dTotalError[i]*onesArray) for i in range(batch_size)]
        print(onesArray)
        print(dTotalError)
        changeHiddenWeights = [d5[i]*d6[i]*dTotalError[i] for i in range(batch_size)]
        avgChange = (sum(changeHiddenWeights)/batch_size) * step_size
        
        print("dTotalError sum:", dTotalError)
        print("Change:", changeHiddenWeights)
        print("Avg:", avgChange)
        
        self.layers[-2].updateWeights(avgChange)
        # End of derivatives for H[-1] 
        
        # Start of loop experiments
        numWeights = self.layers[-2].getNumNeurons()
        shape = np.shape(d2)
        cumulativeDTotalError = [[[d2[i,j] for _ in range(numWeights)] 
                                  for j in range(shape[1])] 
                                 for i in range(shape[0])]
        #print("Cumulative:",cumulativeDTotalError)
        
        for i in range(-2, -1*(self.numLayers+1), -1):
            
            if i == -2:
                d1 = [[[d1[i,j] for _ in range(numWeights)] 
                                  for j in range(shape[1])] 
                                 for i in range(shape[0])]
                print("New d4:", d4)
                combined = d4 * d1
            else:
                d4 = np.array(self.layers[i+1].getDWeightedSum_dInput())
                d2 = np.array(self.layers[i].getDRelu())
                d2 = [[[d2[i,j] for _ in range(numWeights)] 
                                  for j in range(shape[1])] 
                                 for i in range(shape[0])]
                combined = d4 * d2
            
            cumulativeDTotalError *= combined
            print("Cumulative:",cumulativeDTotalError)
        # End of loop experiments
    
    #Derivative of error (cross-entropy) with respect to softmax probabilities
    def calcDError(self, prob, actual):
        n = len(actual)
        return [-1*(actual[i]/prob[i]) + ((1 - actual[i])*(-1/(1-prob[i]))) for i in range(n)]
    
    def calcTotalLoss(self, prob, actual):
        #REDO THIS TO WORK WITH ONE HOT ENCODING
        n = len(actual)
        loss = -1*sum(np.log([prob[i][actual[i]] for i in range(n)]))/n
        return loss
        
    def __str__(self):
        output = ""
        
        for i in range(0, self.numLayers):
            output+=("Layer {}:\n".format(i+1))
            output+=str(self.layers[i])
        
        return output

In [0]:
nn = NeuralNetwork()
nn.addLayer("Dense", 3, inputShape=4)
nn.addLayer("Dense", 3)
nn.addLayer("Dense", 3)
nn.addLayer("Output", 3)
#print(nn)

In [0]:
from sklearn.datasets import load_iris

iris_X, iris_y = load_iris(True)

In [0]:
from sklearn.preprocessing import MinMaxScaler

iris_X_trimmed = iris_X

scaler = MinMaxScaler()
iris_X_scaled = scaler.fit_transform(iris_X_trimmed)
iris_X_zscore = stats.zscore(iris_X_trimmed)

In [0]:
from sklearn.preprocessing import LabelBinarizer

lb = LabelBinarizer()
labels = lb.fit_transform(iris_y)

In [231]:
pred = nn.predict(iris_X_zscore[0:2, :], labels[0:2])
accuracy_score(pred, iris_y[0:2])

Dense : [-2.23221845 -0.8751624   0.64987507]
After activation -  [-0.02232218 -0.00875162  0.64987507]
Dense : [ 0.37090724 -2.54010339 -1.45263818]
After activation -  [ 0.37090724 -0.02540103 -0.01452638]
Dense : [ 0.44076772 -0.3262773  -1.47400625]
After activation -  [ 0.44076772 -0.00326277 -0.01474006]
Output : [-1.7569247   0.0523926   2.08445091]
After activation -  [0.01862343 0.11371984 0.86765673]
Dense : [-1.96967242 -1.50415952  1.00259954]
After activation -  [-0.01969672 -0.0150416   1.00259954]
Dense : [ 0.32009756 -2.68752592 -1.65213799]
After activation -  [ 0.32009756 -0.02687526 -0.01652138]
Dense : [ 0.43102416 -0.33465124 -1.45712886]
After activation -  [ 0.43102416 -0.00334651 -0.01457129]
Output : [-1.75410058  0.04350586  2.07920362]
After activation -  [0.01877928 0.11333643 0.86788429]
d1: [[-53.69579668  -1.12831139  -7.55610777]
 [-53.25017418  -1.12782349  -7.56912256]]
d2: [[0.0182766  0.10078764 0.11482853]
 [0.01842662 0.10049128 0.11466115]]
d3: [[

0.0

In [0]:
print(iris_y)

[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2
 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
 2 2]
