In [2]:
import numpy as np
import pandas as pd
import seaborn as sns ;sns.set()
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
# Read data from csv
iris = pd.read_csv("data/Iris.csv")
print(iris.head())

# Plot the various combinations of 2D graph
#g = sns.pairplot(iris.drop("Id", axis=1), hue="Species")

# Replace the species with 0, 1 or 2 as appropriate
iris['Species'].replace(['Iris-setosa', 'Iris-virginica', 'Iris-versicolor'], [0, 1, 2], inplace=True)

# Get labels, flatten and encode to one-hot
columns = ['Species']
labels = pd.DataFrame(iris, columns=columns).to_numpy()
labels = labels.flatten()
labels = np.eye(np.max(labels) + 1)[labels]

# Get Features
columns = ['SepalLength', 'SepalWidth', 'PetalLength', 'PetalWidth']
features = pd.DataFrame(iris, columns=columns).to_numpy()

# Split data to training and test data, 2/3 for training and 1/3 for testing
train_x, test_x, train_y, test_y = train_test_split(features, labels, test_size=0.33)


# Learning rate
learning_rate = 0.01

# Number of training epochs
num_epochs = 200

# Network architecture parameters
num_features = len(train_x[0])
num_classes = len(train_y[0])
num_hidden_nodes = 6

class NeuralNetwork:
    def __init__(self, num_inputs, num_outputs, num_hidden_nodes):
        # Get the number of inputs, outputs and hidden nodes
        self.num_inputs = num_inputs
        self.num_ouputs = num_outputs
        self.num_hidden_nodes = num_hidden_nodes
        
        # Initialise weights in the range -0.5 to 0.5
        # Hidden layer weights with shape (number of input features x number of hidden nodes)
        self.hidden_weights = np.random.uniform(-0.5, 0.5, size=(self.num_inputs, self.num_hidden_nodes))
        self.hidden_bias = np.random.uniform(-0.5, 0.5, size=(1, self.num_hidden_nodes))
        # Output layer weights with shape (number of hidden nodes x number of output classes)
        self.output_weights = np.random.uniform(-0.5, 0.5, size=(self.num_hidden_nodes, self.num_ouputs))
        self.output_bias = np.random.uniform(-0.5, 0.5, size=(1, self.num_ouputs))

    # Sigmoid activation function and its derivative
    def sigmoid(self, x):
        return 1/(1+np.exp(-x))
    
    def sigmoid_deriv(self, x):
        return x * (1 - x)
    
    def forward_pass(self, x):
        """ Forward Pass - propagates input data through the network. """
        
        # Input layer is just the input data
        input_layer = x
        # Hidden layer sigmoid(W * X + b)
        hidden_output = self.sigmoid(np.dot(input_layer, self.hidden_weights) + self.hidden_bias)
        # Output layer sigmoid(W * X + b)
        output = self.sigmoid(np.dot(hidden_output, self.output_weights) + self.output_bias)
        
        # Return both layers output
        return hidden_output, output
    
    def backward_pass(self, x, y, output, hidden_output, learning_rate):
        """ Backpropagation - propagates the error backwards through the network. """
        
        # Calculate output layer error
        output_layer_error = y - output
    
        # Calculate output layer gradient from error and derivative of output
        # Note: just need this layers error for the bias
        output_layer_delta = output_layer_error * self.sigmoid_deriv(output)
        output_bias_delta = np.sum(output_layer_error, axis=0)
        
        # Calculate hidden layer error (from the output layers weights and gradient)
        hidden_layer_error = output_layer_delta.dot(self.output_weights.T)
        
        # Calculate hidden layer gradient
        # Note: just need this layers error for the bias
        hidden_layer_delta = hidden_layer_error * self.sigmoid_deriv(hidden_output)
        hidden_bias_delta = np.sum(hidden_layer_error, axis=0)
         
        """ Update the Weights - update the weights using the error gradients and learning rate. """
        # Change in weight = learning rate * layers input * layers gradient
        self.output_weights += learning_rate * hidden_output.T.dot(output_layer_delta)
        self.output_bias += learning_rate * output_bias_delta
        
        self.hidden_weights += learning_rate * x.T.dot(hidden_layer_delta)
        self.hidden_bias += learning_rate * hidden_bias_delta
        
    def predict(self, x):
        """ Generate predictions on input data. """
        
        # Pass the data through the network and generate outputs
        outputs, _ = self.forward_pass(x)
        
        # Prediction is the output node with the highest value
        predictions = np.argmax(outputs)
        return predictions
    
    def train(self, x, y, num_epochs, learning_rate):
        """ Train the network on the input data. """
        
        # For recording error and accuracy - for graph later
        train_errors, test_errors = [], []
        train_accuracies, test_accuracies = [], []
        
        # Train for number of epochs
        for epoch in range(num_epochs+1):
            outputs,hidden_output = self.forward_pass(x)
            self.backward_pass(x, y, outputs, hidden_output, learning_rate)
            
            # Every 10 epochs record error and accuracy on training and test set
            if (epoch % 10) == 0:
                
                # Mean squared error over all errors this epoch
                error = np.square(y - outputs).mean() 
                train_errors.append(error)
   
                # Get the prediction i.e. the output with the highest value
                predictions = self.predict(x)
                # Get the actual labels
                actual_labels = np.argmax(train_y)
                
                # If they match the prediction was correct
                correct_predictions = np.sum(predictions == actual_labels)
                accuracy = (100 / len(train_x)) * correct_predictions
                train_accuracies.append(accuracy)
               
                # Test data forward pass
                test_outputs, _ = self.forward_pass(test_y)
                # Mean squared error over all errors
                test_error = np.square(test_y - test_outputs).mean()
                test_errors.append(test_error)
        
                # Get the prediction i.e. the output with the highest value
                predictions = self.predict(test_x)
                # Get the actual labels
                actual_labels = np.argmax(test_y)
                
                # If they match the prediction was correct
                correct_predictions = np.sum(predictions == actual_labels)
                test_accuracy = (100 / len(test_x)) * correct_predictions
                test_accuracies.append(test_accuracy)
        
                print("Epoch: " + str(epoch) +
                      " Error: " + str(round(error, 5)) +
                      " Accuracy: " + str(accuracy) + "%" +
                      " Test Error: " + str(round(test_error, 5)) +
                      " Accuracy: " + str(test_accuracy) + "%")

nn = NeuralNetwork(num_features, num_classes, num_hidden_nodes)
nn.train(train_x, train_y, 200, 0.01)
# x_range = [i*10 for i in range(len(train_errors))]
# figure, ax = plt.subplots(1, 2, figsize=(16, 6))
# sns.lineplot(x=x_range, y=train_accuracies, color='b', ax=ax[0])
# sns.lineplot(x=x_range, y=test_accuracies, color='r', ax=ax[0])
# ax[0].title.set_text("Accuracy")
# sns.lineplot(x=x_range, y=train_errors, color='b', ax=ax[1])
# sns.lineplot(x=x_range, y=test_errors, color='r', ax=ax[1])
# ax[1].title.set_text("Error")
# ax[1].legend(['train', 'test'])
# plt.show()


   Id  SepalLength  SepalWidth  PetalLength  PetalWidth      Species
0   1          5.1         3.5          1.4         0.2  Iris-setosa
1   2          4.9         3.0          1.4         0.2  Iris-setosa
2   3          4.7         3.2          1.3         0.2  Iris-setosa
3   4          4.6         3.1          1.5         0.2  Iris-setosa
4   5          5.0         3.6          1.4         0.2  Iris-setosa


ValueError: operands could not be broadcast together with shapes (100,3) (100,6) 