In [9]:
import pandas as pd
import numpy as np

iris = pd.read_csv("iris.data", names=["sepal_length", "sepal_width", "petal_length", "petal_width", "class"])
#splitting dataset into X (iris_data) and y (iris_types)
iris_data = iris[["sepal_length", "sepal_width", "petal_length", "petal_width"]]
iris_types = iris[["class"]]
#preparing data
#changing the types of iris into numbers [0, 1, 2]
#normalizing iris_data so the values will be between (0,1)
#concatenating prepared data with indexes
iris_types = iris_types.replace(["Iris-setosa", "Iris-versicolor", "Iris-virginica"], [0, 1, 2])
normalized_iris_data = (iris_data-iris_data.min())/(iris_data.max()-iris_data.min())
preprocessed_data = pd.concat([normalized_iris_data, iris_types], axis=1)

#Shuffling and splitting data for model training and testing in n (ratio of data distribution)
preprocessed_data = preprocessed_data.sample(frac=1).reset_index(drop=True)
n = 0.3
#for 0.3 it will be 105 elements for training and 45 for testing
data_ratio = int(len(preprocessed_data)*n)
test_data = preprocessed_data.iloc[:data_ratio]
train_data = preprocessed_data.iloc[data_ratio:]
#separating training data into X and one hot encoded y
#X preparing
X_train = train_data.values[:, :4]
X_test = test_data.values[:, :4]
#y preparing
one_hot_encoding = [[1, 0, 0], [0, 1, 0], [0, 0, 1]]
y_train = np.array([one_hot_encoding[int(x)] for x in train_data.values[:, 4]])
y_test =  np.array([one_hot_encoding[int(x)] for x in test_data.values[:, 4]])

In [10]:
#hid1/hid2 stands for first/second hidden layer

#Setting number of neurons in hidden layers
#Setting learning rate for weights and biases and number of epochs
neurons_number_hid1 = 20
neurons_number_hid2 = 20
learning_rate = 0.1

# First randomized weights for all layers of NN (min val, max val, size)
w1 = np.random.uniform(-1, 1, (len(X_train[0]), neurons_number_hid1)) 
w2 = np.random.uniform(-1, 1, (neurons_number_hid1, neurons_number_hid2))
w3 = np.random.uniform(-1, 1, (neurons_number_hid2, len(y_train[0])))
#First biases for hidden layers and output
bias_hid1 = np.zeros((neurons_number_hid1))
bias_hid2 = np.zeros((neurons_number_hid2))
output_layer_bias = np.zeros((len(y_train[0])))

In [11]:
#Defining activation functions
#RELu activation function setting all negative functions to 0
# if x > 0 => x, else 0
def relu(x):
    return np.maximum(x, 0)
#Softmax activation function
def softmax(output_array):
    exp = np.exp(output_array)
    return exp / np.sum(exp, axis=1, keepdims=True)

In [12]:
#creating a model of NN with forward and back propagation
epochs = 100
for epoch in range(epochs):
    
    #creating of model with input layer, 2 hidden layers, output layer
    input_layer = np.dot(X_train, w1) + bias_hid1
    hid1_layer_activation = relu(input_layer)
    hid2_layer = np.dot(hid1_layer_activation, w2) + bias_hid2
    hid2_layer_activation = relu(hid2_layer)
    output_layer = np.dot(hid2_layer_activation, w3) + output_layer_bias
    output_layer_activation = softmax(output_layer) 
    
    # Error calculation in hidden layers
    output_error = (output_layer_activation - y_train) / len(output_layer_activation)
    error_hid2 = np.dot(output_error, w3.T)
    error_hid1 = np.dot(error_hid2, w2.T)

    # Back propagation to adjust the weight and bias values 
    grad_output_w = np.dot(hid2_layer_activation.T, output_error)
    grad_output_bias = np.sum(output_error, axis=0, keepdims=True)
    w3 = w3 - learning_rate * grad_output_w
    output_layer_bias = output_layer_bias - learning_rate * grad_output_bias
    
    grad_h2_w = np.dot(hid1_layer_activation.T, error_hid2)
    grad_h2_bias = np.sum(error_hid2, axis=0, keepdims=True)
    w2 = w2 - learning_rate * grad_h2_w
    bias_hid2 = bias_hid2 - learning_rate * grad_h2_bias
    
    grad_h1_w = np.dot(X_train.T, error_hid1)
    grad_h1_bias = np.sum(error_hid1, axis=0, keepdims=True)
    w1 = w1 - learning_rate * grad_h1_w
    bias_hid1 = bias_hid1 - learning_rate * grad_h1_bias

In [13]:
# Calculate the accuracy of test run
def accuracy(predicted, actual):
    is_correct = np.argmax(predicted, axis=1) == np.argmax(actual, axis=1)
    correct_predictions = np.sum(is_correct)
    accuracy = correct_predictions / len(predicted) * 100.0
    return accuracy

input_layer = np.dot(X_test, w1) + bias_hid1
hid1_layer_activation = relu(input_layer)
hid2_layer = np.dot(hid1_layer_activation, w2) + bias_hid2
hid2_layer_activation = relu(hid2_layer)
output_layer = np.dot(hid2_layer_activation, w3) + output_layer_bias
prediction = softmax(output_layer)

print(f'Test accuracy: {round(accuracy(prediction, y_test), 2)}%')

Test accuracy: 93.33%


In [14]:
setosa_num = 0
vergicolor_num = 0
virginica_num = 0

for i in range(len(X_test)):
    if np.argmax(y_test[i]) != np.argmax(prediction[i]):
        print(f'For index number: "{i}" of test data the model was mistaken.')
        print(f'Actual type: {np.argmax(y_test[i])} Predicted type: {np.argmax(prediction[i])}')
        if np.argmax(y_test[i]) == 0:
            setosa_num = setosa_num + 1
        elif np.argmax(y_test[i]) == 1:
            vergicolor_num = vergicolor_num + 1
        elif np.argmax(y_test[i]) == 2:
            virginica_num = virginica_num + 1
print(f'Model was mistaken {setosa_num} times for setosa, {vergicolor_num} times for vergicolor')
print(f'and {virginica_num} times for virginica.')

For index number: "26" of test data the model was mistaken.
Actual type: 1 Predicted type: 2
For index number: "31" of test data the model was mistaken.
Actual type: 2 Predicted type: 1
For index number: "39" of test data the model was mistaken.
Actual type: 2 Predicted type: 1
Model was mistaken 0 times for setosa, 1 times for vergicolor
and 2 times for virginica.


In [15]:
#checking of flower types data distribution
iris_type = test_data.values[:, 4]
setosa = 0
vergicolor = 0
virginica = 0
for x in range(len(test_data)):
    if iris_type[x] == 0:
        setosa += 1
    elif iris_type[x] == 1:
        vergicolor += 1
    else:
        virginica += 1
    x += 1
    
print(f'Number of records for flower type in training dataset:')
print(f'setosa = {setosa}')
print(f'vergicolor = {vergicolor}')
print(f'virginica = {virginica}')

Number of records for flower type in training dataset:
setosa = 13
vergicolor = 16
virginica = 16
