In [45]:
import random
from math import exp
# process data label as 0, 1, 2 for training
def loadDataset(dataset): 
    newdata = []
    for x in range(len(dataset)-1):
        for i in range(0,len(dataset[x]),4):
            if dataset[x][i] == "I": # encounter labels, change it
                if dataset[x][-3:-1] == "sa": # Iris-setosa
                    newdata.append(0)
                elif dataset[x][-2:-1] == "r": # Iris-versicolor
                    newdata.append(1)
                elif dataset[x][-3:-1] == "ca": # Iris-virginica
                    newdata.append(2) 
                break
            else:
                attribute = float(dataset[x][i:i+3])
                newdata.append(attribute)
        trainingSet.append(newdata)       
        newdata = [] # clear the package
    return trainingSet

# find the range of data to do normalize
def dataset_minmax(dataset): # zip a(1,2,3) , b(4,5,6) to [(1,4), (2,5), (3,6)] 
    minmax = list()
    stats = [[min(column), max(column)] for column in zip(*dataset)] # unzip the file
    return stats

# rescale data to range 0~1
def normalize_data(dataset, minmax):
    for row in dataset:
        for i in range(len(row)-1):
            row[i] = (row[i] - minmax[i][0]) / (minmax[i][1] - minmax[i][0])

# calculate neuron activation for an input
def activate(weights, inputs):
    activation = weights[-1]
    for i in range(len(weights)-1):
        activation += weights[i] * inputs[i]
    return activation

# transfer neuron activation 
def transfer(activation):
    return 1.0 / (1.0 + exp(-activation))

# calculate the derivative of an neuron output
def transfer_derivative(output):
    return output * (1.0 - output)

#forward propagate input to a network output
def forward_propagate(network, row):
    inputs = row
    for layer in network:
        new_inputs = []
        for neuron in layer:
            activation = activate(neuron['weights'], inputs)
            neuron['output'] = transfer(activation)
            new_inputs.append(neuron['output'])
        inputs = new_inputs # keep updating the neurons
    return inputs

# backpropagate error and sotre in neurons
def backward_propagate_error(network, expected): 
    for i in reversed(range(len(network))):
        layer = network[i]
        errors = list()
        if i != len(network)-1:
            for j in range(len(layer)): # hidden
                error = 0.0
                for neuron in network[i+1]:
                    error += (neuron['weights'][j] * neuron['delta'])
                errors.append(error) 
        else:
            for j in range(len(layer)): # output
                neuron = layer[j]
                errors.append(expected[j] - neuron['output'])
        for j in range(len(layer)):
            neuron = layer[j]
            neuron['delta'] = errors[j] * transfer_derivative(neuron['output'])

# update network weights with error
def update_weights(network, row, learning_rate):
    for i in range(len(network)):
        inputs = row[:-1]
        if i != 0:
            inputs = [neuron['output'] for neuron in network[i-1]]
        for neuron in network[i]:
            for j in range(len(inputs)):
                neuron['weights'][j] += learning_rate * neuron['delta'] * inputs[j]
            neuron['weights'][-1] += learning_rate * neuron['delta']

def train_network(network, train, learning_rate, n_epoch, n_outputs):
    print("--------------- lrate=%.3f" % learning_rate ,"-----------------")
    prev_MSE = 0
    MSE = 0
    abs_fraction_of_change = 0
    done = False
    for epoch in range(n_epoch):
        MSE = 0
        for row in train:
            outputs = forward_propagate(network, row)
            expected = [0 for i in range(n_outputs)]
            expected[row[-1]] = 1 # one hot encoding !!!
            backward_propagate_error(network, expected)
            update_weights(network, row, learning_rate)
            update_outputs = forward_propagate(network, row) # update forward results
            MSE += sum([(expected[i] - update_outputs[i])**2 for i in range(len(expected))])
            MSE /= len(expected)
            #print(MSE)
        if epoch > 0:
            abs_fraction_of_change = abs((MSE - prev_MSE) / prev_MSE )
            if abs_fraction_of_change <= 10e-5:
                print("Epoxh need:%d"%(epoch+1))
                break
        print('>epoch=%d, MSE=%.3f, abs fraction of change=%.6f' % (epoch, MSE, abs_fraction_of_change))
        prev_MSE = MSE
        #print("prev",prev_MSE)
    
    

def init_network(n_inputs, n_hidden, n_outputs): 
# create n_hidden neurons and each neuron in the hidden layer has n_inputs + 1 weights
    network = list()
    hidden_layer1 = [{'weights': [(random.random()-0.5)/5.0 for i in range(n_inputs)]} for i in range(n_hidden)]
    network.append(hidden_layer1) 
    hidden_layer2 = [{'weights': [(random.random()-0.5)/5.0 for i in range(n_inputs)]} for i in range(n_hidden)]
    network.append(hidden_layer2)
    output_layer = [{'weights': [(random.random()-0.5)/5.0 for i in range(n_hidden)]} for i in range(n_outputs)]
    network.append(output_layer) 
    return network

# make a prediction with a network
# It returns the index in the network output that has the largest probability. 
# Assuming that class values have been converted to integers starting at 0. [0,1,2]
def predict(network, row):
    outputs = forward_propagate(network, row)
    return outputs.index(max(outputs))

# backpropagation with stochastic gradient descent
def back_propagate(train, learning_rate, n_epoch, n_hidden):
    n_inputs = len(train[0]) - 1
    n_outputs = len(set([row[-1] for row in train]))
    network = init_network(n_inputs, n_hidden, n_outputs)
    train_network(network, train, learning_rate, n_epoch, n_outputs)
    

In [46]:
trainingSet=[]
testSet=[]

f = open('iris.data.txt', "r")
lines = f.readlines()
dataset = list(lines)
trainingSet = loadDataset(dataset)

In [53]:
# normalize inputs

minmax = dataset_minmax(trainingSet)
normalize_data(trainingSet, minmax)

learning_rate = 0.1
n_epoch = 700
n_hidden = 4

back_propagate(trainingSet, learning_rate, n_epoch, n_hidden)

--------------- lrate=0.100 -----------------
>epoch=0, MSE=0.199, abs fraction of change=0.000000
>epoch=1, MSE=0.184, abs fraction of change=0.079050
>epoch=2, MSE=0.181, abs fraction of change=0.015379
>epoch=3, MSE=0.181, abs fraction of change=0.002027
>epoch=4, MSE=0.181, abs fraction of change=0.001072
>epoch=5, MSE=0.181, abs fraction of change=0.001804
>epoch=6, MSE=0.181, abs fraction of change=0.001971
>epoch=7, MSE=0.182, abs fraction of change=0.002002
>epoch=8, MSE=0.182, abs fraction of change=0.002000
>epoch=9, MSE=0.182, abs fraction of change=0.001990
>epoch=10, MSE=0.183, abs fraction of change=0.001978
>epoch=11, MSE=0.183, abs fraction of change=0.001966
>epoch=12, MSE=0.184, abs fraction of change=0.001954
>epoch=13, MSE=0.184, abs fraction of change=0.001942
>epoch=14, MSE=0.184, abs fraction of change=0.001930
>epoch=15, MSE=0.185, abs fraction of change=0.001919
>epoch=16, MSE=0.185, abs fraction of change=0.001907
>epoch=17, MSE=0.185, abs fraction of change=0

>epoch=156, MSE=0.221, abs fraction of change=0.000828
>epoch=157, MSE=0.221, abs fraction of change=0.000823
>epoch=158, MSE=0.222, abs fraction of change=0.000819
>epoch=159, MSE=0.222, abs fraction of change=0.000815
>epoch=160, MSE=0.222, abs fraction of change=0.000811
>epoch=161, MSE=0.222, abs fraction of change=0.000808
>epoch=162, MSE=0.222, abs fraction of change=0.000804
>epoch=163, MSE=0.223, abs fraction of change=0.000800
>epoch=164, MSE=0.223, abs fraction of change=0.000797
>epoch=165, MSE=0.223, abs fraction of change=0.000793
>epoch=166, MSE=0.223, abs fraction of change=0.000790
>epoch=167, MSE=0.223, abs fraction of change=0.000786
>epoch=168, MSE=0.223, abs fraction of change=0.000783
>epoch=169, MSE=0.224, abs fraction of change=0.000780
>epoch=170, MSE=0.224, abs fraction of change=0.000776
>epoch=171, MSE=0.224, abs fraction of change=0.000773
>epoch=172, MSE=0.224, abs fraction of change=0.000770
>epoch=173, MSE=0.224, abs fraction of change=0.000768
>epoch=174