In [21]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import warnings
warnings.filterwarnings("ignore")

## Data Handeling & Preprocessing

In [4]:
iris = pd.read_csv("iris.csv")
iris = iris.sample(frac=1).reset_index(drop=True) # Shuffle
#shuffled dataset is better both for separating the dataset into train/test/validation and for avoiding overfitting.

In [6]:
iris.columns

Index(['Sepal.Length', 'Sepal.Width', 'Petal.Length', 'Petal.Width',
       'Species'],
      dtype='object')

In [20]:
iris.isnull().sum()

Sepal.Length    0
Sepal.Width     0
Petal.Length    0
Petal.Width     0
Species         0
dtype: int64

###  Visualization

In [22]:
# importing the modules 
from bokeh.sampledata.iris import flowers 
from bokeh.plotting import figure, show, output_file 

# file to save the model 
output_file("gfg.html") 
	
# instantiating the figure object 
graph = figure(title = "Iris Visualization") 

# labeling the x-axis and the y-axis 
graph.xaxis.axis_label = "Petal Length (in cm)"
graph.yaxis.axis_label = "Petal Width (in cm)"

# plotting for setosa petals 
x = flowers[flowers["species"] == "setosa"]["petal_length"] 
y = flowers[flowers["species"] == "setosa"]["petal_width"] 
color = "blue"
legend_label = "setosa petals"
graph.circle(x, y, 
			color = color, 
			legend_label = legend_label) 

# plotting for versicolor petals 
x = flowers[flowers["species"] == "versicolor"]["petal_length"] 
y = flowers[flowers["species"] == "versicolor"]["petal_width"] 
color = "yellow"
legend_label = "versicolor petals"
graph.circle(x, y, 
			color = color, 
			legend_label = legend_label) 

# plotting for virginica petals 
x = flowers[flowers["species"] == "virginica"]["petal_length"] 
y = flowers[flowers["species"] == "virginica"]["petal_width"] 
color = "red"
legend_label = "virginica petals"
graph.circle(x, y, 
			color = color, 
			legend_label = legend_label) 

# relocating the legend table to 
# avoid abstruction of the graph 
graph.legend.location = "top_left"

# displaying the model 
show(graph) 


# DATA TRAIN AND TEST SPLIT

In [7]:
X = iris[['Sepal.Length', 'Sepal.Width', 'Petal.Length', 'Petal.Width']]
X = np.array(X)
X[:5]

array([[6.4, 2.8, 5.6, 2.2],
       [5.7, 3.8, 1.7, 0.3],
       [7.7, 2.6, 6.9, 2.3],
       [5.2, 2.7, 3.9, 1.4],
       [4.8, 3.4, 1.9, 0.2]])

In [8]:
from sklearn.preprocessing import OneHotEncoder
one_hot_encoder = OneHotEncoder(sparse=False)

Y = iris.Species
Y = one_hot_encoder.fit_transform(np.array(Y).reshape(-1, 1))
Y[:5]

array([[0., 0., 1.],
       [1., 0., 0.],
       [0., 0., 1.],
       [0., 1., 0.],
       [1., 0., 0.]])

In [9]:
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.15)
X_train, X_val, Y_train, Y_val = train_test_split(X_train, Y_train, test_size=0.1)

## Implementation

* `X_train`, `Y_train`: The training data and target values.
* `X_val`, `Y_val`: The validation data and target values. These are optional parameters.
* `epochs`: Number of epochs. Defaults at 10.
* `nodes`: A list of integers. Each integer denotes the number of nodes in each layer. The length of this list denotes the number of layers. That is, each integer in this list corresponds to the number of nodes in each layer.
* `lr`: The learning rate of the back-propagation training algorithm. Defaults at 0.15.

In [10]:
def NeuralNetwork(X_train, Y_train, X_val=None, Y_val=None, epochs=10, nodes=[], lr=0.15):
    hidden_layers = len(nodes) - 1
    weights = InitializeWeights(nodes)

    for epoch in range(1, epochs+1):
        weights = Train(X_train, Y_train, lr, weights)

        if(epoch % 20 == 0):
            print("Epoch {}".format(epoch))
            print("Training Accuracy:{}".format(Accuracy(X_train, Y_train, weights)))
            if X_val.any():
                print("Validation Accuracy:{}".format(Accuracy(X_val, Y_val, weights)))
            
    return weights

In [11]:
def InitializeWeights(nodes):
    """Initialize weights with random values in [-1, 1] (including bias)"""
    layers, weights = len(nodes), []
    
    for i in range(1, layers):
        w = [[np.random.uniform(-1, 1) for k in range(nodes[i-1] + 1)]
              for j in range(nodes[i])]
        weights.append(np.matrix(w))
    
    return weights

**Forward Propagation:**

* Each layer receives an input and computes an output. The output is computed by first calculating the dot product between the input and the weights of the layer and then passing this dot product through an activation function (in this case, the sigmoid function).
* The output of each layer is the input of the next.
* The input of the first layer is the feature vector.
* The output of the final layer is the prediction of the network.

In [12]:
def ForwardPropagation(x, weights, layers):
    activations, layer_input = [x], x
    for j in range(layers):
        activation = Sigmoid(np.dot(layer_input, weights[j].T))
        activations.append(activation)
        layer_input = np.append(1, activation) # Augment with bias
    
    return activations

**Backward Propagation:**

* Calculate error at final output.
* Propagate error backwards through the layers and perform corrections.
    * Calculate Delta: Back-propagated error of current layer *times* Sigmoid derivation of current layer activation.
    * Update Weights between current layer and previous layer: Multiply delta with activation of previous layer and learning rate, and add this product to weights of previous layer.
    * Calculate error for current layer. Remove the bias from the weights of the previous layer and multiply the result with delta to get error.


In [13]:
def BackPropagation(y, activations, weights, layers):
    outputFinal = activations[-1]
    error = np.matrix(y - outputFinal) # Error at output
    
    for j in range(layers, 0, -1):
        currActivation = activations[j]
        
        if(j > 1):
            # Augment previous activation
            prevActivation = np.append(1, activations[j-1])
        else:
            # First hidden layer, prevActivation is input (without bias)
            prevActivation = activations[0]
        
        delta = np.multiply(error, SigmoidDerivative(currActivation))
        weights[j-1] += lr * np.multiply(delta.T, prevActivation)

        w = np.delete(weights[j-1], [0], axis=1) # Remove bias from weights
        error = np.dot(delta, w) # Calculate error for current layer
    
    return weights

In our implementation we will pass each sample of our dataset through the network, performing first the forward pass and then the weight updating via the back-propagation algorithm. Finally, the newly calculated weights will be returned.

In [14]:
def Train(X, Y, lr, weights):
    layers = len(weights)
    for i in range(len(X)):
        x, y = X[i], Y[i]
        x = np.matrix(np.append(1, x)) # Augment feature vector
        
        activations = ForwardPropagation(x, weights, layers)
        weights = BackPropagation(y, activations, weights, layers)

    return weights

In [15]:
def Sigmoid(x):
    return 1 / (1 + np.exp(-x))

def SigmoidDerivative(x):
    return np.multiply(x, 1-x)

When we want to make a prediction for an item, we need to first pass it through the network. The output of the network (in the case of three different classes, as in the Iris problem) will be in the form `[x, y, z]` where `x, y, z` are real numbers in the range [0, 1]. The higher the value of an element, the more confident the network is that it is the correct class. We need to convert this output to the proper one-hot format we mentioned earlier. Thus, we will take the largest of the outputs and set the corresponding index to 1, while the rest are set to 0. This means the predicted class is the one the network is most confident in (ie. the greatest value).

So, a prediction involves the forward propagation and the conversion of the output to one-hot encoding, with the 1 denoting the predicted class.

In [16]:
def Predict(item, weights):
    layers = len(weights)
    item = np.append(1, item) # Augment feature vector
    
    ##_Forward Propagation_##
    activations = ForwardPropagation(item, weights, layers)
    
    outputFinal = activations[-1].A1
    index = FindMaxActivation(outputFinal)

    # Initialize prediction vector to zeros
    y = [0 for i in range(len(outputFinal))]
    y[index] = 1  # Set guessed class to 1

    return y # Return prediction vector


def FindMaxActivation(output):
    """Find max activation in output"""
    m, index = output[0], 0
    for i in range(1, len(output)):
        if(output[i] > m):
            m, index = output[i], i
    
    return index

Finally, we need a way to evaluate our network. For this, we will write the `Accuracy` function which, given the computed weights, predicts the class of each object in its input and checks it against the actual class, returning the percentage of correct predictions.

In [17]:
def Accuracy(X, Y, weights):
    """Run set through network, find overall accuracy"""
    correct = 0

    for i in range(len(X)):
        x, y = X[i], list(Y[i])
        guess = Predict(x, weights)

        if(y == guess):
            # Guessed correctly
            correct += 1

    return correct / len(X)

We have now completed our implementation and we can check the results! Below we build a network by passing to the main function (`NeuralNetwork`) the training/validation sets, the number of epochs, the learning rate and the number of nodes in each layer.

During the training, after each 20th epoch, the accuracy of the network on the training and validation sets will be printed.

In [18]:
f = len(X[0]) # Number of features
o = len(Y[0]) # Number of outputs / classes

layers = [f, 5, 10, o] # Number of nodes in layers
lr, epochs = 0.15, 100

weights = NeuralNetwork(X_train, Y_train, X_val, Y_val, epochs=epochs, nodes=layers, lr=lr);

Epoch 20
Training Accuracy:0.7368421052631579
Validation Accuracy:0.7692307692307693
Epoch 40
Training Accuracy:0.9649122807017544
Validation Accuracy:1.0
Epoch 60
Training Accuracy:0.9473684210526315
Validation Accuracy:0.9230769230769231
Epoch 80
Training Accuracy:0.9649122807017544
Validation Accuracy:0.9230769230769231
Epoch 100
Training Accuracy:0.9649122807017544
Validation Accuracy:0.9230769230769231


For the grand finale, we will test the network against the testing dataset:

In [19]:
print("Testing Accuracy: {}".format(Accuracy(X_test, Y_test, weights)))

Testing Accuracy: 0.9565217391304348
