# BT3017 Tutorial 3

- There is an online copy<sup>+</sup> of this tutorial on github available [here](https://github.com/KohSiXing/Feature-Engineering-for-Machine-Learning/blob/master/BT3017%20Tutorial%203.ipynb)
- Dataset and codes referenced from Machine Learning Mastery: [How to Code a Neural Network with Backpropagation In Python](https://machinelearningmastery.com/implement-backpropagation-algorithm-scratch-python/)

<sup>+</sup> Online copy will only be published after Wednesday 1000 of that week to prevent plagiarism.

### Preprocessing

In [1]:
from random import seed
from random import randrange
from random import random
from csv import reader
from math import exp
from functools import reduce
import pandas as pd

# Load a CSV file
#dataset = pd.read_csv("wheat-seed.csv", header = None)
#dataset

### original
def load_csv(filename):
    dataset = list()
    with open(filename, 'r') as file:
        csv_reader = reader(file)
        for row in csv_reader:
            if not row:
                continue
            dataset.append(row)
    return dataset


### Other helper methods for changing datatypes if there is a need
# Convert string column to float
def str_column_to_float(dataset, column):
    for row in dataset:
        row[column] = float(row[column].strip())
 
# Convert string column to integer
def str_column_to_int(dataset, column):
    class_values = [row[column] for row in dataset]
    unique = set(class_values)
    lookup = dict()
    for i, value in enumerate(unique):
        lookup[value] = i
    for row in dataset:
        row[column] = lookup[row[column]]
    return lookup

dataset = load_csv("wheat-seed.csv")

# Clean the dataset from string to numeric values
for i in range(len(dataset[0])-1):
    str_column_to_float(dataset, i)
str_column_to_int(dataset, len(dataset[0])-1)

pd.DataFrame(dataset)

Unnamed: 0,0,1,2,3,4,5,6,7
0,15.26,14.84,0.8710,5.763,3.312,2.221,5.220,1
1,14.88,14.57,0.8811,5.554,3.333,1.018,4.956,1
2,14.29,14.09,0.9050,5.291,3.337,2.699,4.825,1
3,13.84,13.94,0.8955,5.324,3.379,2.259,4.805,1
4,16.14,14.99,0.9034,5.658,3.562,1.355,5.175,1
...,...,...,...,...,...,...,...,...
205,12.19,13.20,0.8783,5.137,2.981,3.631,4.870,2
206,11.23,12.88,0.8511,5.140,2.795,4.325,5.003,2
207,13.20,13.66,0.8883,5.236,3.232,8.315,5.056,2
208,11.84,13.21,0.8521,5.175,2.836,3.598,5.044,2


### 1a

- Maximum value of the dataset
    - Overall Maximum 21.18 [column 1]
    
### 1b
- Minimum value of the dataset
    - Overall Minimum 0.7651 [column 6]
    
|Columns | Max Values | Min Values |
|---     |---         |---         |
|1       | 21.18      | 10.59      |
|2       | 17.25      | 12.41      |
|3       | 0.9183     | 0.8081     |
|4       | 6.675      | 4.899      |
|5       | 4.033      | 2.63       |
|6       | 8.456      | 0.7651     |
|7       | 6.55       | 4.519      |

- column 8 is ignored since that is the label (category)
    

In [2]:
# Find the min and max values for each column
def dataset_minmax(dataset):
    minmax = list()
    stats = [[min(column), max(column)] for column in zip(*dataset)]
    return stats

dataset_minmax(dataset)

[[10.59, 21.18],
 [12.41, 17.25],
 [0.8081, 0.9183],
 [4.899, 6.675],
 [2.63, 4.033],
 [0.7651, 8.456],
 [4.519, 6.55],
 [0, 2]]

### 1c

- After scaling, the min and max values for all columns (except the label column) will be between 0 and 1 inclusive
- column 8 is ignored since that is the label (category)

- Formula used for scaling (Normalizing to be specific in this scenario):

$\frac{x - min(x)}{max(x) - min(x)}$

- where minmax[i][0] is the min value of the column, minmax[i][1] is the max value of the column

In [3]:
# Rescale dataset columns to the range 0-1
def normalize_dataset(dataset, minmax):
    for row in dataset:
        for i in range(len(row)-1):
            row[i] = (row[i] - minmax[i][0]) / (minmax[i][1] - minmax[i][0])

            

minmax = dataset_minmax(dataset)
normalize_dataset(dataset, minmax)

# Dataset Min and Max after scaling
dataset_minmax(dataset)

[[0.0, 1.0],
 [0.0, 1.0],
 [0.0, 1.0],
 [0.0, 1.0],
 [0.0, 1.0],
 [0.0, 1.0],
 [0.0, 1.0],
 [0, 2]]

### 1d

- The `cross_validation_split` will divide the dataset into n number of folds. Each with approximately the same number of data. There are 210 observations and if fold is set to 5 for instance, there will be 5 folds of 42 observations in the result of `cross_validation_split`

- This is mainly used to tackle overfitting errors through the use of cross-validation. At the testing phase the model will be tested with the various folds over n times. For example, the first run, the model could be trained with the 2nd to 4th fold and the 1st is used for testing. The second run, the 2nd fold is withheld for testing while the other folds are used to train the model, so on and so forth for the subsequent runs.

In [4]:
# Split a dataset into n folds
def cross_validation_split(dataset, n_folds):
    dataset_split = list()
    dataset_copy = list(dataset)
    fold_size = int(len(dataset) / n_folds)
    for i in range(n_folds):
        fold = list()
        while len(fold) < fold_size:
            index = randrange(len(dataset_copy))
            fold.append(dataset_copy.pop(index))
        dataset_split.append(fold)
    return dataset_split

# try 5 folds
pd.DataFrame(cross_validation_split(dataset, 5))

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,32,33,34,35,36,37,38,39,40,41
0,"[0.3068932955618508, 0.31611570247933873, 0.79...","[0.15297450424929188, 0.21900826446281002, 0.3...","[0.5703493862134088, 0.6301652892561985, 0.604...","[0.5609065155807367, 0.6053719008264462, 0.673...","[0.4523135033050048, 0.46487603305785125, 0.82...","[0.91123701605288, 0.9297520661157025, 0.74047...","[0.8083097261567516, 0.8347107438016528, 0.734...","[0.5930122757318226, 0.6694214876033059, 0.514...","[0.8300283286118979, 0.8904958677685948, 0.576...","[0.13503305004721433, 0.19008264462809915, 0.3...",...,"[0.4523135033050048, 0.48760330578512384, 0.70...","[0.3654390934844194, 0.4008264462809916, 0.668...","[0.11709159584513694, 0.16942148760330586, 0.3...","[0.5835694050991501, 0.6632231404958676, 0.505...","[0.7610953729933899, 0.8264462809917356, 0.559...","[0.635505193578848, 0.7231404958677686, 0.4700...","[0.4995278564683665, 0.5144628099173554, 0.823...","[0.3493862134088762, 0.3471074380165289, 0.879...","[0.3975448536355053, 0.4359504132231404, 0.673...","[0.3371104815864023, 0.4111570247933885, 0.456..."
1,"[0.20868744098205863, 0.21900826446281002, 0.7...","[0.06137865911237019, 0.12190082644628096, 0.2...","[0.8111425873465533, 0.8719008264462808, 0.577...","[0.20774315391879125, 0.2314049586776858, 0.63...","[0.13786591123701614, 0.2066115702479339, 0.30...","[0.07743153918791315, 0.11157024793388412, 0.4...","[0.6166194523135035, 0.6487603305785126, 0.735...","[0.06043437204910298, 0.09710743801652906, 0.3...","[0.14353163361661941, 0.21900826446281002, 0.2...","[0.16713881019830024, 0.1611570247933883, 0.76...",...,"[0.4268177525967894, 0.440082644628099, 0.8212...","[0.37110481586402266, 0.45247933884297514, 0.4...","[0.789423984891407, 0.8285123966942152, 0.6787...","[0.35316336166194523, 0.3863636363636362, 0.68...","[0.5288007554296508, 0.5681818181818182, 0.696...","[0.7403210576015109, 0.7355371900826447, 0.903...","[0.5221907459867801, 0.5351239669421487, 0.833...","[0.06326723323890462, 0.12396694214876026, 0.2...","[0.4702549575070822, 0.566115702479339, 0.4047...","[0.3210576015108593, 0.2933884297520661, 1.0, ..."
2,"[0.30311614730878195, 0.33677685950413205, 0.6...","[0.715769593956563, 0.7954545454545457, 0.5045...","[0.19924457034938617, 0.2066115702479339, 0.71...","[0.9556185080264401, 0.9958677685950414, 0.618...","[0.5269121813031163, 0.6136363636363638, 0.460...","[0.5524079320113315, 0.5867768595041322, 0.725...","[0.18413597733711043, 0.26033057851239666, 0.3...","[0.7677053824362605, 0.8119834710743802, 0.661...","[0.24268177525967896, 0.23553719008264476, 0.8...","[0.7516525023607178, 0.7871900826446279, 0.711...",...,"[0.7762039660056657, 0.8016528925619832, 0.748...","[0.7714825306893297, 0.7830578512396693, 0.819...","[0.7998111425873464, 0.8347107438016528, 0.701...","[0.19641170915958453, 0.18801652892561987, 0.8...","[0.1916902738432483, 0.26033057851239666, 0.36...","[0.10953729933899907, 0.2293388429752065, 0.00...","[0.33238904627006605, 0.34917355371900816, 0.7...","[0.2747875354107649, 0.2975206611570247, 0.699...","[0.7252124645892352, 0.7603305785123966, 0.715...","[0.23418319169027388, 0.3119834710743801, 0.36..."
3,"[0.22946175637393765, 0.2789256198347107, 0.50...","[0.2464589235127478, 0.2582644628099174, 0.727...","[0.07271010387157692, 0.1322314049586778, 0.27...","[0.1850802644003778, 0.23966942148760334, 0.43...","[0.07648725212464594, 0.13842975206611569, 0.2...","[0.40509915014164316, 0.44628099173553726, 0.6...","[0.7705382436260624, 0.7789256198347106, 0.833...","[0.38810198300283283, 0.37190082644628114, 0.9...","[0.7554296506137866, 0.7520661157024795, 0.893...","[0.040604343720491, 0.12190082644628096, 0.098...",...,"[0.33238904627006605, 0.3657024793388429, 0.67...","[0.16147308781869696, 0.19214876033057846, 0.5...","[0.35410764872521244, 0.40495867768595023, 0.5...","[0.5259678942398489, 0.6033057851239669, 0.510...","[0.02266288951841362, 0.11363636363636379, 0.0...","[0.11614730878186973, 0.20454545454545459, 0.1...","[0.408876298394712, 0.4173553719008264, 0.8393...","[0.12086874409820579, 0.12603305785123955, 0.6...","[0.7799811142587348, 0.7768595041322317, 0.884...","[0.0, 0.0, 0.514519056261343, 0.0, 0.111903064..."
4,"[0.348441926345609, 0.3636363636363636, 0.7831...","[1.0, 0.9917355371900828, 0.8239564428312162, ...","[0.14730878186968843, 0.21487603305785108, 0.3...","[0.6298394711992448, 0.6859504132231405, 0.618...","[0.20113314447592076, 0.23966942148760334, 0.5...","[0.8479697828139755, 0.8946280991735533, 0.633...","[0.7677053824362605, 0.7809917355371904, 0.813...","[0.06421152030217184, 0.0929752066115701, 0.43...","[0.06421152030217184, 0.11570247933884308, 0.3...","[0.06043437204910298, 0.04545454545454559, 0.6...",...,"[0.6647780925401321, 0.7128099173553718, 0.652...","[0.15108593012275728, 0.16322314049586759, 0.6...","[0.1425873465533522, 0.15289256198347112, 0.64...","[0.3871576959395656, 0.4297520661157025, 0.651...","[0.06043437204910298, 0.08471074380165293, 0.4...","[0.8375826251180359, 0.8450413223140496, 0.820...","[0.4636449480642115, 0.5061983471074378, 0.670...","[0.1765816808309727, 0.2066115702479339, 0.567...","[0.7903682719546743, 0.7830578512396693, 0.903...","[0.25779036827195473, 0.31611570247933873, 0.4..."


### 1e

- The function `accuracy_metric` checks for the accuracy score of the model, i.e. rate of predicted is the same as actual

In [5]:
# Calculate accuracy percentage
def accuracy_metric(actual, predicted):
    correct = 0
    for i in range(len(actual)):
        if actual[i] == predicted[i]:
            correct += 1
    return correct / float(len(actual)) * 100.0

# Evaluate an algorithm using a cross validation split
def evaluate_algorithm(dataset, algorithm, n_folds, *args):
	folds = cross_validation_split(dataset, n_folds)
	scores = list()
	for fold in folds:
		train_set = list(folds)
		train_set.remove(fold)
		train_set = sum(train_set, [])
		test_set = list()
		for row in fold:
			row_copy = list(row)
			test_set.append(row_copy)
			row_copy[-1] = None
		predicted = algorithm(train_set, test_set, *args)
		actual = [row[-1] for row in fold]
		accuracy = accuracy_metric(actual, predicted)
		scores.append(accuracy)
	return scores
 
# Calculate neuron activation for an input
def activate(weights, inputs):
	activation = weights[-1]
	for i in range(len(weights)-1):
		activation += weights[i] * inputs[i]
	return activation
 
# Transfer neuron activation
def transfer(activation):
	return 1.0 / (1.0 + exp(-activation))
 
# Forward propagate input to a network output
def forward_propagate(network, row):
	inputs = row
	for layer in network:
		new_inputs = []
		for neuron in layer:
			activation = activate(neuron['weights'], inputs)
			neuron['output'] = transfer(activation)
			new_inputs.append(neuron['output'])
		inputs = new_inputs
	return inputs
 
# Calculate the derivative of an neuron output
def transfer_derivative(output):
	return output * (1.0 - output)
 
# Backpropagate error and store in neurons
def backward_propagate_error(network, expected):
	for i in reversed(range(len(network))):
		layer = network[i]
		errors = list()
		if i != len(network)-1:
			for j in range(len(layer)):
				error = 0.0
				for neuron in network[i + 1]:
					error += (neuron['weights'][j] * neuron['delta'])
				errors.append(error)
		else:
			for j in range(len(layer)):
				neuron = layer[j]
				errors.append(neuron['output'] - expected[j])
		for j in range(len(layer)):
			neuron = layer[j]
			neuron['delta'] = errors[j] * transfer_derivative(neuron['output'])
 
# Update network weights with error
def update_weights(network, row, l_rate):
	for i in range(len(network)):
		inputs = row[:-1]
		if i != 0:
			inputs = [neuron['output'] for neuron in network[i - 1]]
		for neuron in network[i]:
			for j in range(len(inputs)):
				neuron['weights'][j] -= l_rate * neuron['delta'] * inputs[j]
			neuron['weights'][-1] -= l_rate * neuron['delta']
 
# Train a network for a fixed number of epochs
def train_network(network, train, l_rate, n_epoch, n_outputs):
    for epoch in range(n_epoch):
        for row in train:
            outputs = forward_propagate(network, row)
            expected = [0 for i in range(n_outputs)]
            expected[row[-1]] = 1
            backward_propagate_error(network, expected)
            update_weights(network, row, l_rate)
 
# Initialize a network
def initialize_network(n_inputs, n_hidden, n_outputs):
    network = list()
    hidden_layer = [{'weights':[random() for i in range(n_inputs + 1)]} for i in range(n_hidden)]
    network.append(hidden_layer)
    output_layer = [{'weights':[random() for i in range(n_hidden + 1)]} for i in range(n_outputs)]
    network.append(output_layer)
    return network
 
# Make a prediction with a network
def predict(network, row):
    outputs = forward_propagate(network, row)
    return outputs.index(max(outputs))
 
# Backpropagation Algorithm With Stochastic Gradient Descent
def back_propagation(train, test, l_rate, n_epoch, n_hidden):
    n_inputs = len(train[0]) - 1
    n_outputs = len(set([row[-1] for row in train]))
    network = initialize_network(n_inputs, n_hidden, n_outputs)
    train_network(network, train, l_rate, n_epoch, n_outputs)
    predictions = list()
    for row in test:
        prediction = predict(network, row)
        predictions.append(prediction)
    return(predictions)

In [6]:
# Test Backprop on Seeds dataset
seed(1)

# load and prepare data
filename = 'wheat-seed.csv'
dataset = load_csv(filename)
for i in range(len(dataset[0])-1):
    str_column_to_float(dataset, i)
    
# convert class column to integers
str_column_to_int(dataset, len(dataset[0])-1)

# normalize input variables
minmax = dataset_minmax(dataset)
normalize_dataset(dataset, minmax)

# evaluate algorithm
n_folds = 5
l_rate = 0.3
n_epoch = 500
n_hidden = 5
scores = evaluate_algorithm(dataset, back_propagation, n_folds, l_rate, n_epoch, n_hidden)
print('Scores: %s' % scores)
print('Mean Accuracy: %.3f%%' % (sum(scores)/float(len(scores))))

Scores: [92.85714285714286, 92.85714285714286, 97.61904761904762, 92.85714285714286, 90.47619047619048]
Mean Accuracy: 93.333%


### 1f

- The non-linear perceptron activation function used is the sigmoid activation function. The function in the code that performs this function is `transfer()`

In [7]:
# Transfer neuron activation
def transfer(activation):
    return 1.0 / (1.0 + exp(-activation))

### 1g

- The reason the return value of the `transfer_derivative` function is `output * (1 - output)` as that is the partial derivative of the sigmoid function
- Recall the lecture notes Lecture 3 Page 30:

y (i.e. the output) = $\frac{1}{1 + e^-x}$

$\frac{\partial y}{\partial x}$ = $\frac{1}{1 + e^-x}$ - $\frac{1}{(1 + e^-x)^2}$ = y(1 - y)

### 1h

- In the function `backward_propagate_error` the error here means how far the output and the expected results differ. Ideally, neuron['output'] - expected[j] should be as small as possible, which means less errors in the neuron.

### 1i
- neuron['delta'] means the error signal calculated for a particular neuron

### 1j

- The total training error (a scalar) for epochs at 100, 200, 300, 400, 500 will be printed out for each fold
- total training error here means the accumulated error of **one** epoch (i.e. at 100, 200, ..., 500)

In [8]:
# Backpropagate error and store in neurons
def backward_propagate_error(network, expected):
    all_errors = 0
    for i in reversed(range(len(network))):
        layer = network[i]
        errors = list()
        if i != len(network)-1:
            for j in range(len(layer)):
                error = 0.0
                for neuron in network[i + 1]:
                    error += (neuron['weights'][j] * neuron['delta'])
                errors.append(error)
        else:
            for j in range(len(layer)):
                neuron = layer[j]
                errors.append(neuron['output'] - expected[j])
        for j in range(len(layer)):
            neuron = layer[j]
            neuron['delta'] = errors[j] * transfer_derivative(neuron['output'])
        all_errors += reduce(lambda x,y : x+y, errors)
    return all_errors

# Evaluate an algorithm using a cross validation split
def evaluate_algorithm(dataset, algorithm, n_folds, *args):
    folds = cross_validation_split(dataset, n_folds)
    scores = list()
    count = 1
    for fold in folds:
        print("----- Fold ",count," -----")
        train_set = list(folds)
        train_set.remove(fold)
        train_set = sum(train_set, [])
        test_set = list()
        for row in fold:
            row_copy = list(row)
            test_set.append(row_copy)
            row_copy[-1] = None
        predicted = algorithm(train_set, test_set, *args)
        actual = [row[-1] for row in fold]
        accuracy = accuracy_metric(actual, predicted)
        scores.append(accuracy)
        count += 1
    return scores
 
# Train a network for a fixed number of epochs
def train_network(network, train, l_rate, n_epoch, n_outputs):
    total_training_errors = 0
    for epoch in range(n_epoch):
        for row in train:
            outputs = forward_propagate(network, row)
            expected = [0 for i in range(n_outputs)]
            expected[row[-1]] = 1
            total_training_errors = backward_propagate_error(network, expected)
            update_weights(network, row, l_rate)
        
        # that the total training error for epochs at 100, 200, 300, 400, 500 will be printed out.
        if((epoch + 1) % 100 == 0) :
            print("Epoch ", (epoch + 1) ," : ", total_training_errors)

In [9]:
# Test Backprop on Seeds dataset
seed(1)

# load and prepare data
filename = 'wheat-seed.csv'
dataset = load_csv(filename)
for i in range(len(dataset[0])-1):
    str_column_to_float(dataset, i)
    
# convert class column to integers
str_column_to_int(dataset, len(dataset[0])-1)

# normalize input variables
minmax = dataset_minmax(dataset)
normalize_dataset(dataset, minmax)

# evaluate algorithm
n_folds = 5
l_rate = 0.3
n_epoch = 500
n_hidden = 5
scores = evaluate_algorithm(dataset, back_propagation, n_folds, l_rate, n_epoch, n_hidden)
print('Scores: %s' % scores)
print('Mean Accuracy: %.3f%%' % (sum(scores)/float(len(scores))))

----- Fold  1  -----
Epoch  100  :  0.006966376141469076
Epoch  200  :  0.005911027598791276
Epoch  300  :  0.007930595286578732
Epoch  400  :  0.010760308338063618
Epoch  500  :  0.00863231298482717
----- Fold  2  -----
Epoch  100  :  0.0007019949630168746
Epoch  200  :  -0.0003608756994039671
Epoch  300  :  -0.0035311857359784345
Epoch  400  :  -0.010418288422241462
Epoch  500  :  -0.006082865584776025
----- Fold  3  -----
Epoch  100  :  0.006213380653722034
Epoch  200  :  0.0052927792552067586
Epoch  300  :  0.00304944880446246
Epoch  400  :  0.0013701643944341595
Epoch  500  :  0.000392951966078362
----- Fold  4  -----
Epoch  100  :  0.0029852051345457607
Epoch  200  :  0.001776387739310207
Epoch  300  :  0.0018799401476959836
Epoch  400  :  0.0017175429539026473
Epoch  500  :  0.0014618546428714374
----- Fold  5  -----
Epoch  100  :  -0.054530093703318405
Epoch  200  :  0.012259065565953724
Epoch  300  :  0.0043189258248682435
Epoch  400  :  0.0025625841122598244
Epoch  500  :  0.

### 2

- The neural network has two hidden layers. 
    - added hidden layer will be in-between the existing hidden layer and the output layer.
- The added hidden layer has 3 neurons

In [10]:
# Load a CSV file
def load_csv(filename):
	dataset = list()
	with open(filename, 'r') as file:
		csv_reader = reader(file)
		for row in csv_reader:
			if not row:
				continue
			dataset.append(row)
	return dataset
 
# Convert string column to float
def str_column_to_float(dataset, column):
	for row in dataset:
		row[column] = float(row[column].strip())
 
# Convert string column to integer
def str_column_to_int(dataset, column):
	class_values = [row[column] for row in dataset]
	unique = set(class_values)
	lookup = dict()
	for i, value in enumerate(unique):
		lookup[value] = i
	for row in dataset:
		row[column] = lookup[row[column]]
	return lookup
 
# Find the min and max values for each column
def dataset_minmax(dataset):
	minmax = list()
	stats = [[min(column), max(column)] for column in zip(*dataset)]
	return stats
 
# Rescale dataset columns to the range 0-1
def normalize_dataset(dataset, minmax):
	for row in dataset:
		for i in range(len(row)-1):
			row[i] = (row[i] - minmax[i][0]) / (minmax[i][1] - minmax[i][0])
 
# Split a dataset into k folds
def cross_validation_split(dataset, n_folds):
	dataset_split = list()
	dataset_copy = list(dataset)
	fold_size = int(len(dataset) / n_folds)
	for i in range(n_folds):
		fold = list()
		while len(fold) < fold_size:
			index = randrange(len(dataset_copy))
			fold.append(dataset_copy.pop(index))
		dataset_split.append(fold)
	return dataset_split
 
# Calculate accuracy percentage
def accuracy_metric(actual, predicted):
	correct = 0
	for i in range(len(actual)):
		if actual[i] == predicted[i]:
			correct += 1
	return correct / float(len(actual)) * 100.0
 
# Evaluate an algorithm using a cross validation split
def evaluate_algorithm(dataset, algorithm, n_folds, *args):
	folds = cross_validation_split(dataset, n_folds)
	scores = list()
	for fold in folds:
		train_set = list(folds)
		train_set.remove(fold)
		train_set = sum(train_set, [])
		test_set = list()
		for row in fold:
			row_copy = list(row)
			test_set.append(row_copy)
			row_copy[-1] = None
		predicted = algorithm(train_set, test_set, *args)
		actual = [row[-1] for row in fold]
		accuracy = accuracy_metric(actual, predicted)
		scores.append(accuracy)
	return scores
 
# Calculate neuron activation for an input
def activate(weights, inputs):
	activation = weights[-1]
	for i in range(len(weights)-1):
		activation += weights[i] * inputs[i]
	return activation
 
# Transfer neuron activation
def transfer(activation):
	return 1.0 / (1.0 + exp(-activation))
 
# Forward propagate input to a network output
def forward_propagate(network, row):
	inputs = row
	for layer in network:
		new_inputs = []
		for neuron in layer:
			activation = activate(neuron['weights'], inputs)
			neuron['output'] = transfer(activation)
			new_inputs.append(neuron['output'])
		inputs = new_inputs
	return inputs
 
# Calculate the derivative of an neuron output
def transfer_derivative(output):
	return output * (1.0 - output)
 
# Backpropagate error and store in neurons
def backward_propagate_error(network, expected):
	for i in reversed(range(len(network))):
		layer = network[i]
		errors = list()
		if i != len(network)-1:
			for j in range(len(layer)):
				error = 0.0
				for neuron in network[i + 1]:
					error += (neuron['weights'][j] * neuron['delta'])
				errors.append(error)
		else:
			for j in range(len(layer)):
				neuron = layer[j]
				errors.append(neuron['output'] - expected[j])
		for j in range(len(layer)):
			neuron = layer[j]
			neuron['delta'] = errors[j] * transfer_derivative(neuron['output'])
 
# Update network weights with error
def update_weights(network, row, l_rate):
	for i in range(len(network)):
		inputs = row[:-1]
		if i != 0:
			inputs = [neuron['output'] for neuron in network[i - 1]]
		for neuron in network[i]:
			for j in range(len(inputs)):
				neuron['weights'][j] -= l_rate * neuron['delta'] * inputs[j]
			neuron['weights'][-1] -= l_rate * neuron['delta']
 
# Train a network for a fixed number of epochs
def train_network(network, train, l_rate, n_epoch, n_outputs):
	for epoch in range(n_epoch):
		for row in train:
			outputs = forward_propagate(network, row)
			expected = [0 for i in range(n_outputs)]
			expected[row[-1]] = 1
			backward_propagate_error(network, expected)
			update_weights(network, row, l_rate)
 
## Edited codes to add in a second hidden layer between the first hidden layer and the output
# Initialize a network
def initialize_network(n_inputs, n_hidden, n_sec_hidden, n_outputs):
    network = list()
    hidden_layer = [{'weights':[random() for i in range(n_inputs + 1)]} for i in range(n_hidden)]
    network.append(hidden_layer)
    sec_hidden_layer = [{'weights':[random() for i in range(n_hidden + 1)]} for i in range(n_sec_hidden)]
    network.append(sec_hidden_layer)
    output_layer = [{'weights':[random() for i in range(n_sec_hidden + 1)]} for i in range(n_outputs)]
    network.append(output_layer)
    return network
 
# Make a prediction with a network
def predict(network, row):
	outputs = forward_propagate(network, row)
	return outputs.index(max(outputs))
 
## Edited codes to add in a second hidden layer between the first hidden layer and the output
# Backpropagation Algorithm With Stochastic Gradient Descent
def back_propagation(train, test, l_rate, n_epoch, n_hidden, n_sec_hidden):
    n_inputs = len(train[0]) - 1
    n_outputs = len(set([row[-1] for row in train]))
    network = initialize_network(n_inputs, n_hidden, n_sec_hidden, n_outputs)
    train_network(network, train, l_rate, n_epoch, n_outputs)
    predictions = list()
    for row in test:
        prediction = predict(network, row)
        predictions.append(prediction)
    return(predictions)
 

# Test Backprop on Seeds dataset
seed(1)

# load and prepare data
filename = 'wheat-seed.csv'
dataset = load_csv(filename)
for i in range(len(dataset[0])-1):
    str_column_to_float(dataset, i)
    
# convert class column to integers
str_column_to_int(dataset, len(dataset[0])-1)

# normalize input variables
minmax = dataset_minmax(dataset)
normalize_dataset(dataset, minmax)

## Edited codes to add in neurons for second hidden layer
# evaluate algorithm
n_folds = 5
l_rate = 0.3
n_epoch = 500
n_hidden = 5
n_sec_hidden = 3 # the second hidden layer between first hidden and output has 3 neurons
scores = evaluate_algorithm(dataset, back_propagation, n_folds, l_rate, n_epoch, n_hidden, n_sec_hidden)
print('Scores: %s' % scores)
print('Mean Accuracy: %.3f%%' % (sum(scores)/float(len(scores))))

Scores: [92.85714285714286, 83.33333333333334, 97.61904761904762, 92.85714285714286, 88.09523809523809]
Mean Accuracy: 90.952%


### Other References

<sup>1</sup> Bhardwaj, A. (2020, October 12). What is a Perceptron? – Basics of Neural Networks. Medium. Retrieved February 12, 2022, from https://towardsdatascience.com/what-is-a-perceptron-basics-of-neural-networks-c4cfea20c590#:~:text=A%20perceptron%20works%20by%20taking,known%20as%20the%20weighted%20sum).&amp;text=The%20activation%20function%20takes%20the,and%20returns%20a%20final%20output. 

<sup>2</sup> Diandaru, R. (2021, June 5). A little about perceptrons and activation functions. Medium. Retrieved February 12, 2022, from https://medium.com/mlearning-ai/a-little-about-perceptrons-and-activation-functions-aed19d672656 