22/11/2019

The code here is close to Nielsen. Each activation is treated as a column vector, even the last one which for XOR is just a simple number and is encloded in a shape (1,1) column vector of just one row, i.e if activation value of output neuron is a, then it is computed as np.array([[a]]).

Can easily adapt code here for the MLP excercises and the Iris classification problem.
But you may need to use more than 2 hidden neurons and more than 1 output neuron.

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [None]:
def sigm(z):
    return  1.0/(1.0 + np.exp(-z))

def sigm_deriv(z):
    a = sigm(z)
    return a*(1 - a)

In [None]:
class XOR_MLP:
    def __init__(self):
        self.train_inputs = np.array([[0,0], [0,1], [1,0], [1,1]])
        self.train_outputs = np.array([0,1,1,0])
          
        np.random.seed(23)
        # hidden layer of 2 neurons
        self.w2 = np.random.randn(2,2)
        self.b2 = np.random.randn(2,1)
        
        # output layer has 1 neuron
        self.w3 = np.random.randn(1,2)
        self.b3 = np.random.randn(1,1)
        

    def feedforward(self, xs):    
        # here xs is a matrix where each column is an input vector
        # w2.dot(xs) applies the weight matrix w2 to each input at once
        a2s = sigm(self.w2.dot(xs) + self.b2)
        a3s = sigm(self.w3.dot(a2s) + self.b3)            
        return a3s

    
    def backprop(self, xs, ys):   # Assumed here that input vectors are rows in xs
        del_w2 = np.zeros(self.w2.shape, dtype=float)
        del_b2 = np.zeros(self.b2.shape, dtype=float)
        
        del_w3 = np.zeros(self.w3.shape, dtype=float)
        del_b3 = np.zeros(self.b3.shape, dtype=float)
        cost = 0.0
        
        for x,y in zip(xs,ys):               # for zip to work, each x in xs must be a row vector
            a1 = x.reshape(2,1)              # convert input row vector x into (2,1) column vector
            z2 = self.w2.dot(a1) + self.b2   # so will z2 and a2
            a2 = sigm(z2)                    # column vector shape (2,1)
            
            z3 = self.w3.dot(a2) + self.b3   # a simple number in a (1,1) column vector
            a3 = sigm(z3)                    # so is a3
            
            delta3 = (a3-y) * sigm_deriv(z3)                   # delta3.shape is (1,1)
           
            delta2 = sigm_deriv(z2) * (self.w3.T.dot(delta3))  # w3 shape is (1,2), w3.T shape is (2,1)
                                                               # delta2 is shape (2,1)
            del_b3 += delta3
            del_w3 += delta3.dot(a2.T)  # shape (1,1) by (1,2) gives (1,2)
            
            del_b2 += delta2
            del_w2 += delta2.dot(a1.T)  # shape (2,1) by (1,2) gives (2,2)
        
           
            cost += ((a3 - y)**2).sum() 
        
        n = len(ys)  # number of training vectors    
        
        # get the average change per training input  
        # return the average adjustments to the biases and weights 
        # in each layer and the cost
        return del_b2/n, del_w2/n, del_b3/n, del_w3/n, cost/n
        
    def train(self, epochs, eta):
        xs = self.train_inputs
        ys = self.train_outputs
        cost = np.zeros((epochs,))
        
        for e in range(epochs):
            d_b2,d_w2,d_b3,d_w3, cost[e] = self.backprop(xs,ys)
                
            self.b2 -= eta * d_b2
            self.w2 -= eta * d_w2
            self.b3 -= eta * d_b3
            self.w3 -= eta * d_w3
        plt.plot(cost)
        return cost
                
                
            
    

In [None]:
xor = XOR_MLP()
xs = xor.train_inputs.T

print(xor.feedforward(xs))

epochs = 1000
c = xor.train(epochs, 3.0)

print(xor.feedforward(xs))

x_axis = np.linspace(1, epochs, epochs, dtype=int)
fig, axs = plt.subplots(3,1,figsize=(10,15))
plt.subplot(3,1,1)
plt.plot(x_axis, c)
plt.subplot(3,1,2)
plt.plot(x_axis[:61], c[:61])
plt.subplot(3,1,3)
plt.plot(x_axis[900:], c[900:])





Exercise 1: copy and adapt the above XOR_MLP code so that it uses 3 neurons in the hidden layer. Train such a MLP and see if it learns faster than the previous one.

In [None]:
# Exercise 1
class XOR_MLPv2:
    def __init__(self):
        self.train_inputs = np.array([[0,0], [0,1], [1,0], [1,1]])
        self.train_outputs = np.array([0,1,1,0])
          
        np.random.seed(23)
        # hidden layer of 3 neurons
        self.w2 = np.random.randn(3,2)
        self.b2 = np.random.randn(3,1)
        
        # output layer has 1 neuron
        self.w3 = np.random.randn(1,3)
        self.b3 = np.random.randn(1,1)
        

    def feedforward(self, xs):    
        # here xs is a matrix where each column is an input vector
        # w2.dot(xs) applies the weight matrix w2 to each input at once
        a2s = sigm(self.w2.dot(xs) + self.b2)
        a3s = sigm(self.w3.dot(a2s) + self.b3)            
        return a3s

    
    def backprop(self, xs, ys):   # Assumed here that input vectors are rows in xs
        del_w2 = np.zeros(self.w2.shape, dtype=float)
        del_b2 = np.zeros(self.b2.shape, dtype=float)
        
        del_w3 = np.zeros(self.w3.shape, dtype=float)
        del_b3 = np.zeros(self.b3.shape, dtype=float)
        cost = 0.0
        
        for x,y in zip(xs,ys):               # for zip to work, each x in xs must be a row vector
            a1 = x.reshape(2,1)              # convert input row vector x into (2,1) column vector
            z2 = self.w2.dot(a1) + self.b2   # so will z2 and a2
            a2 = sigm(z2)                    # column vector shape (2,1)
            
            z3 = self.w3.dot(a2) + self.b3   # a simple number in a (1,1) column vector
            a3 = sigm(z3)                    # so is a3
            
            delta3 = (a3-y) * sigm_deriv(z3)                   # delta3.shape is (1,1)
           
            delta2 = sigm_deriv(z2) * (self.w3.T.dot(delta3))  # w3 shape is (1,2), w3.T shape is (2,1)
                                                               # delta2 is shape (2,1)
            del_b3 += delta3
            del_w3 += delta3.dot(a2.T)  # shape (1,1) by (1,2) gives (1,2)
            
            del_b2 += delta2
            del_w2 += delta2.dot(a1.T)  # shape (2,1) by (1,2) gives (2,2)
        
           
            cost += ((a3 - y)**2).sum() 
        
        n = len(ys)  # number of training vectors    
        
        # get the average change per training input  
        # return the average adjustments to the biases and weights 
        # in each layer and the cost
        return del_b2/n, del_w2/n, del_b3/n, del_w3/n, cost/n
        
    def train(self, epochs, eta):
        xs = self.train_inputs
        ys = self.train_outputs
        cost = np.zeros((epochs,))
        
        for e in range(epochs):
            d_b2,d_w2,d_b3,d_w3, cost[e] = self.backprop(xs,ys)
                
            self.b2 -= eta * d_b2
            self.w2 -= eta * d_w2
            self.b3 -= eta * d_b3
            self.w3 -= eta * d_w3
        plt.plot(cost)
        return cost
                
                
            
    

In [None]:
#Testing xor_mlpv2
xor2 = XOR_MLPv2()
xs = xor2.train_inputs.T

print(xor2.feedforward(xs))

epochs = 1000
c = xor2.train(epochs, 3.0)

print(xor2.feedforward(xs))

x_axis = np.linspace(1, epochs, epochs, dtype=int)
fig, axs = plt.subplots(3,1,figsize=(10,15))
plt.subplot(3,1,1)
plt.plot(x_axis, c)
plt.subplot(3,1,2)
plt.plot(x_axis[:61], c[:61])
plt.subplot(3,1,3)
plt.plot(x_axis[900:], c[900:])


In [None]:
# A more general purpose MLP with m input neurons, n hidden neurons and o output neurond
# You must complete this code yourself
class MLP:
    def __init__(self, m, n, o):

        self.m = m
        self.n = n
        self.o = o
          
        np.random.seed(23)
        # hidden layer of N neurons
        self.w2 = np.random.randn(n,m)
        self.b2 = np.random.randn(n,1)
        
        # output layer has O neurons but code is incorrect
        # code here needs to be modified
        self.w3 = np.random.randn(o,n)
        self.b3 = np.random.randn(o,1)
        
        

    def feedforward(self, xs):    
        # here xs is a matrix where each column is an input vector
        # w2.dot(xs) applies the weight matrix w2 to each input at once
        a2s = sigm(self.w2.dot(xs) + self.b2)
        a3s = sigm(self.w3.dot(a2s) + self.b3)   
        # Format the output for better readability
        formatted_output = self.format_output(a3s)

        return formatted_output         
    

    
    def backprop(self, xs, ys):
        del_w2 = np.zeros(self.w2.shape, dtype=float)
        del_b2 = np.zeros(self.b2.shape, dtype=float)
        
        del_w3 = np.zeros(self.w3.shape, dtype=float)
        del_b3 = np.zeros(self.b3.shape, dtype=float)
        cost = 0.0
        
        for x, y in zip(xs,ys):            
            a1 = x.reshape(self.m, 1)        # convert input vector x into (2,1) column vector
            y = y.reshape(self.o, 1)         # convert output vector y into (1,1) column vector


            z2 = self.w2.dot(a1) + self.b2   # so will z2 and a2
            a2 = sigm(z2)                    # column vector shape (2,1)
            a2 = a2.reshape(self.n, 1)       # convert a2 into (2,1) column vector
            
            z3 = self.w3.dot(a2) + self.b3   # a simple number in a (1,1) column vector
            a3 = sigm(z3)                    # so is a3
            a3 = a3.reshape(self.o, 1)       # convert a3 into (1,1) column vector

            delta3 = (a3-y) * sigm_deriv(z3)                   # delta3.shape is (1,1)
           
            delta2 = sigm_deriv(z2) * (self.w3.T.dot(delta3))  # w3 shape is (1,2), w3.T shape is (2,1)
                                                               # delta2 is shape (2,1)
            del_b3 += delta3
            del_w3 += delta3.dot(a2.T)  # shape (1,1) by (1,2) gives (1,2)
            
            del_b2 += delta2
            del_w2 += delta2.dot(a1.T)  # shape (2,1) by (1,2) gives (2,2)
        
           
            cost += ((a3 - y)**2).sum() 
        
        n = len(ys)  # number of training vectors    
        
        # get the average change per training input  
        # return the average adjustments to the biases and weights 
        # in each layer and the cost
        return del_b2/n, del_w2/n, del_b3/n, del_w3/n, cost/n
    
    def predict(self, xs):
        return self.feedforward(xs)
        
    def train(self, epochs, eta):
        cost = np.zeros((epochs,))
        
        for e in range(epochs):
            d_b2,d_w2,d_b3,d_w3, cost[e] = self.backprop(self.train_inputs ,self.train_outputs)
                
            self.b2 -= eta * d_b2
            self.w2 -= eta * d_w2
            self.b3 -= eta * d_b3
            self.w3 -= eta * d_w3
        plt.plot(cost)
        plt.title('Loss Per Iteration')
        plt.ylabel('Loss')
        plt.xlabel('Iteration')
        return cost
                
    def format_output(self, output):
        # Format the output for better readability
        formatted = [', '.join(f'{value:.4f}' for value in row) for row in output.T]
        return '\n'.join(formatted)
    

         

In [None]:
#MLP With Cross Entropy Cost
class MLP_Cross_Entropy:
    def __init__(self, m, n, o):

        self.m = m
        self.n = n
        self.o = o
          
        np.random.seed(23)
        # hidden layer of N neurons
        self.w2 = np.random.randn(n,m)
        self.b2 = np.random.randn(n,1)
        
        self.w3 = np.random.randn(o,n)
        self.b3 = np.random.randn(o,1)
        
        

    def feedforward(self, xs):    
        a2s = sigm(self.w2.dot(xs) + self.b2)
        a3s = sigm(self.w3.dot(a2s) + self.b3)   
        # Format the output for better readability
        formatted_output = self.format_output(a3s)
        return formatted_output         
    

    
    def backprop(self, xs, ys):
        del_w2 = np.zeros(self.w2.shape, dtype=float)
        del_b2 = np.zeros(self.b2.shape, dtype=float)
        
        del_w3 = np.zeros(self.w3.shape, dtype=float)
        del_b3 = np.zeros(self.b3.shape, dtype=float)
        cost = 0.0
        
        for x, y in zip(xs,ys):            
            a1 = x.reshape(self.m, 1)        
            y = y.reshape(self.o, 1)         


            z2 = self.w2.dot(a1) + self.b2   
            a2 = sigm(z2)                    
            a2 = a2.reshape(self.n, 1)       
            
            z3 = self.w3.dot(a2) + self.b3  
            a3 = sigm(z3)                    
            a3 = a3.reshape(self.o, 1)       

            #Adding Cross Entropy Cost Function
            delta3 = (a3-y)                 
            delta2 = sigm_deriv(z2) * (self.w3.T.dot(delta3)) 
                                                            
            del_b3 += delta3
            del_w3 += delta3.dot(a2.T)  
            
            del_b2 += delta2
            del_w2 += delta2.dot(a1.T)  
        
           
            cost += ((a3 - y)**2).sum() 
        
        n = len(ys)  # number of training vectors    
        
        # get the average change per training input  
        # return the average adjustments to the biases and weights 
        # in each layer and the cost
        return del_b2/n, del_w2/n, del_b3/n, del_w3/n, cost/n
    
    def predict(self, xs):
        return self.feedforward(xs)
        
    def train(self, epochs, eta):
        cost = np.zeros((epochs,))
        
        for e in range(epochs):
            d_b2,d_w2,d_b3,d_w3, cost[e] = self.backprop(self.train_inputs ,self.train_outputs)
                
            self.b2 -= eta * d_b2
            self.w2 -= eta * d_w2
            self.b3 -= eta * d_b3
            self.w3 -= eta * d_w3
        plt.plot(cost)
        plt.title('Loss Per Iteration')
        plt.ylabel('Loss')
        plt.xlabel('Iteration')
        return cost
                
    def format_output(self, output):
        # Format the output for better readability
        formatted = [', '.join(f'{value:.4f}' for value in row) for row in output.T]
        return '\n'.join(formatted)
    

         

In [None]:
#Problem 1
#Testing MLP
prob1_mlp = MLP(3,4,1)
prob1_mlp.train_inputs = np.array([[0,0,1], [0,1,1], [1,0,1], [1,1,1]])
prob1_mlp.train_outputs = np.array([0,1,1,0])

xs = prob1_mlp.train_inputs.T

print("Inputs Before Training:\n" + prob1_mlp.feedforward(xs))


#As mentoined in doc starting with 2000 epochs and changing learning rate as needed
epochs = 2000
learning_rate = 10.0

c = prob1_mlp.train(epochs, learning_rate)

print("\nInputs After Training:\n" + prob1_mlp.feedforward(xs))

In [None]:
#Problem 2 Neural Network with 3 input and 2 output
prob2_mlp = MLP(3,4,2)

prob2_mlp.train_inputs = np.array([[1,1,0], [1, -1, -1], [-1, 1, 1], [-1, -1, 1], [0, 1, -1], [0, -1, -1], [1, 1, 1]])

prob2_mlp.train_outputs = np.array([[1, 0], [0, 1], [1, 1], [1, 0], [1, 0], [1, 1], [1, 1]])

xs = prob2_mlp.train_inputs.T

print("Inputs Before Training:\n" + prob2_mlp.feedforward(xs))

epochs = 2000
learning_rate = 16.0

c = prob2_mlp.train(epochs, learning_rate)
print("\nInputs After Training:\n" + prob2_mlp.feedforward(xs))



In [None]:
#Problem 3 Transport Choice
#Training Input data to see what mode of transport a user must use:
#Gender
# 0 - Male, 1 - Female
#Car Ownership
# 0, 1 or 2 Car Ownership is a quantitative integer
#Travel Costs
# 0 - Cheap, 1 - Standard, 2 - Expensive
#Income
# 0 - Low, 1 - Medium, 2 - High

#Possible Outputs
# Bus [1, 0, 0], Train [0, 1, 0], Car [0, 0, 1]

prob3_mlp = MLP(4,6,3)

prob3_mlp.train_inputs = np.array([[0, 0, 0, 0], [0, 1, 0, 1], [1, 1, 0 ,1], [1, 0, 0, 0], [0, 1, 0, 1], [0, 0, 1 ,1], [1, 1, 1, 1], [1, 1, 2, 2], [0, 2, 2, 1], [1, 2, 2 ,2]])
prob3_mlp.train_outputs = np.array([[1, 0, 0], [1, 0, 0], [0, 1, 0], [1, 0, 0], [1, 0, 0], [0, 1, 0], [0, 1, 0], [0, 0, 1], [0, 0, 1], [0, 0, 1]])

#Outputs before training
print("Outputs before training:")
xs = prob3_mlp.train_inputs.T
print(prob3_mlp.feedforward(xs))

epochs = 1000
learning_rate = 11.0

print("\nOutputs after training:")
c = prob3_mlp.train(epochs, learning_rate)
print("cost = ", str(c[-1]))
print(prob3_mlp.feedforward(xs))

x_axis = np.linspace(1, epochs, epochs, dtype=int)

#Test case for a Women with no cars, expensive travel costs and medium income
print("\nWomen Test Case:")
test_case = np.array([1, 0, 2, 1]).reshape(4,1)
print(prob3_mlp.predict(test_case))


#Copying the data to a dataframe and saving it to a csv file seperated by commas
prob3_mlp_df = pd.DataFrame(prob3_mlp.train_inputs)
prob3_mlp_df.to_csv('transport.csv', sep=',', index=False, header=False)

In [None]:
#Probelem 4 Bringing in Iris Dataset and cleaning it
#Reading the data from the csv file
iris_data = pd.read_csv('iris_data.csv', header=None)

#Iris data has 4 features and 1 output, the last column is the output
iris_data_output = iris_data.iloc[:, -1]

#Converting the output to a one hot encoded vector
iris_data_output = pd.get_dummies(iris_data_output)

#Converting the dataframe to a numpy array
iris_data_output = iris_data_output.to_numpy()

#Converting the output to 1's and 0's
iris_data_output = iris_data_output.astype(int)

#Dropping the last column from the dataframe
iris_data = iris_data.drop(iris_data.columns[[-1]], axis=1)

#Converting the dataframe to a numpy array
iris_data_inputs = iris_data.to_numpy()

#Ensuring all the remaining values are floats
iris_data_inputs = iris_data_inputs.astype(float)

In [None]:
#Iris data set with MLP
xs = iris_data_inputs.T

#Creating the MLP
iris_mlp = MLP(4,7,3)

#Setting the training inputs and outputs
iris_mlp.train_inputs = iris_data_inputs
iris_mlp.train_outputs = iris_data_output

#Outputs before training
print("Outputs before training:\n" + iris_mlp.feedforward(xs))

#Training the MLP
epochs = 1000
learning_rate = 0.8

print("\nOutputs after training:")
c = iris_mlp.train(epochs, learning_rate)
print("cost = ", str(c[-1]))
print(iris_mlp.feedforward(xs))

In [None]:
#Problem 4 Iris Dataset with Cross Entropy Cost
#Creating the MLP
iris_mlp_cross_entropy = MLP_Cross_Entropy(4,7,3)

#Setting the training inputs and outputs
iris_mlp_cross_entropy.train_inputs = iris_data_inputs
iris_mlp_cross_entropy.train_outputs = iris_data_output

#Training the MLP
epochs = 1000
learning_rate = 0.17

print("\nOutputs with Cross Entropy after training:")
c = iris_mlp_cross_entropy.train(epochs, learning_rate)
print("cost = ", str(c[-1]))
print(iris_mlp_cross_entropy.feedforward(xs))

In [None]:
#Bringing in Heart Disease Dataset and cleaning it
heart_df = pd.read_csv('heart.csv', header=None)

#Dropping the first row as it contains the column names
heart_df = heart_df.drop(heart_df.index[0])

#Heart data has 13 features and 1 output, the last column is the output, 0 means no heart disease and 1 means heart disease
heart_data_outputs = heart_df.iloc[:, -1]

#Converting the output to a one hot encoded vector
heart_data_outputs = pd.get_dummies(heart_data_outputs)

#Converting the dataframe to a numpy array
heart_data_outputs = heart_data_outputs.to_numpy()

#Dropping the last column from the dataframe since we have used it already in the output
#Droping the last 4 columns as they are not required
heart_df = heart_df.drop(heart_df.columns[[-1, -2, -3, -4]], axis=1)

#Converting the dataframe to a numpy array
heart_data_inputs = heart_df.to_numpy()

#Ensuring all the remaining values are floats
heart_data_inputs = heart_data_inputs.astype(float)


In [None]:
#TESTING HEART DATA WITH MLP
#Creating the MLP

input_size = 10 #Number of features
hidden_size = 8 #Number of hidden neurons
output_size = 2 #Number of outputs

heart_mlp = MLP(input_size, hidden_size, output_size)

#Setting the training inputs and outputs
heart_mlp.train_inputs = heart_data_inputs
heart_mlp.train_outputs = heart_data_outputs

xs = heart_data_inputs.T
#heart_mlp.feedforward(xs)

#Training the MLP
epochs = 6000

learning_rate = 0.08

print("\nOutputs after training:")
c = heart_mlp.train(epochs, learning_rate)
print("cost = ", str(c[-1]))
print(heart_mlp.feedforward(xs))

In [None]:
#TESTING HEART DATA WITH MLP
#Creating the MLP

input_size = 10 #Number of features
hidden_size = 8 #Number of hidden neurons
output_size = 2 #Number of outputs

heart_mlp = MLP_Cross_Entropy(input_size, hidden_size, output_size)

#Setting the training inputs and outputs
heart_mlp.train_inputs = heart_data_inputs
heart_mlp.train_outputs = heart_data_outputs

xs = heart_data_inputs.T
#heart_mlp.feedforward(xs)

#Training the MLP
epochs = 8000

learning_rate = 0.0075

print("\nOutputs after training:")
c = heart_mlp.train(epochs, learning_rate)
print("cost = ", str(c[-1]))
print(heart_mlp.feedforward(xs))