In [106]:
import random
from math import exp
# process data label as 0, 1, 2 for training
def loadDataset(dataset): 
    newdata = []
    for x in range(len(dataset)-1):
        for i in range(0,len(dataset[x]),4):
            if dataset[x][i] == "I": # encounter labels, change it
                if dataset[x][-3:-1] == "sa": # Iris-setosa
                    newdata.append(0)
                elif dataset[x][-2:-1] == "r": # Iris-versicolor
                    newdata.append(1)
                elif dataset[x][-3:-1] == "ca": # Iris-virginica
                    newdata.append(2) 
                break
            else:
                attribute = float(dataset[x][i:i+3])
                newdata.append(attribute)
        trainingSet.append(newdata)       
        newdata = [] # clear the package
    return trainingSet

# find the range of data to do normalize
def dataset_minmax(dataset): # zip a(1,2,3) , b(4,5,6) to [(1,4), (2,5), (3,6)] 
    minmax = list()
    stats = [[min(column), max(column)] for column in zip(*dataset)] # unzip the file
    return stats

# rescale data to range 0~1
def normalize_data(dataset, minmax):
    for row in dataset:
        for i in range(len(row)-1):
            row[i] = (row[i] - minmax[i][0]) / (minmax[i][1] - minmax[i][0])

# calculate neuron activation for an input
def activate(weights, inputs):
    activation = weights[-1]
    for i in range(len(weights)-1):
        activation += weights[i] * inputs[i]
    return activation

# transfer neuron activation 
def transfer(activation):
    return 1.0 / (1.0 + exp(-activation))

# calculate the derivative of an neuron output
def transfer_derivative(output):
    return output * (1.0 - output)

#forward propagate input to a network output
def forward_propagate(network, row):
    inputs = row
    for layer in network:
        new_inputs = []
        for neuron in layer:
            activation = activate(neuron['weights'], inputs)
            neuron['output'] = transfer(activation)
            new_inputs.append(neuron['output'])
        inputs = new_inputs # keep updating the neurons
    return inputs

# backpropagate error and sotre in neurons
def backward_propagate_error(network, expected): 
    for i in reversed(range(len(network))):
        layer = network[i]
        errors = list()
        if i != len(network)-1:
            for j in range(len(layer)): # hidden
                error = 0.0
                for neuron in network[i+1]:
                    error += (neuron['weights'][j] * neuron['delta'])
                errors.append(error) 
        else:
            for j in range(len(layer)): # output
                neuron = layer[j]
                errors.append(expected[j] - neuron['output'])
        for j in range(len(layer)):
            neuron = layer[j]
            neuron['delta'] = errors[j] * transfer_derivative(neuron['output'])

# update network weights with error
def update_weights(network, row, learning_rate):
    for i in range(len(network)):
        inputs = row[:-1]
        if i != 0:
            inputs = [neuron['output'] for neuron in network[i-1]]
        for neuron in network[i]:
            for j in range(len(inputs)):
                neuron['weights'][j] += learning_rate * neuron['delta'] * inputs[j]
            neuron['weights'][-1] += learning_rate * neuron['delta']

def train_network(network, train, learning_rate, n_epoch, n_outputs):
    tmp_MSE = 0
    abs_fraction_of_change = 0
    done = False
    for epoch in range(n_epoch):
        MSE = 0
        for row in train:
            outputs = forward_propagate(network, row)
            expected = [0 for i in range(n_outputs)]
            expected[row[-1]] = 1 # one hot encoding !!!
            MSE += sum([(expected[i] - outputs[i])**2 for i in range(len(expected))])
            MSE /= len(expected)
            #print(MSE, tmp_MSE)
            if epoch > 0:
                abs_fraction_of_change = abs((MSE - tmp_MSE) / tmp_MSE )
                if abs_fraction_of_change <= 10e-5:
                    done =True
                    break
            backward_propagate_error(network, expected)
            update_weights(network, row, learning_rate)
            tmp_MSE = MSE
        print('>epoch=%d, lrate=%.3f, MSE=%.3f, abs fraction of change=%.6f' % (epoch, learning_rate, MSE, abs_fraction_of_change))
        if done == True:
            print("Epoxh need:%d"%(epoch+1))
            break


def init_network(n_inputs, n_hidden, n_outputs): 
# create n_hidden neurons and each neuron in the hidden layer has n_inputs + 1 weights
    network = list()
    hidden_layer = [{'weights': [(random.random()-0.5)/5.0 for i in range(n_inputs + 1)]} for i in range(n_hidden)]
    network.append(hidden_layer)
    output_layer = [{'weights': [(random.random()-0.5)/5.0 for i in range(n_hidden + 1)]} for i in range(n_outputs)]
    network.append(output_layer)
    for layer in network:
        print(layer)
    return network

# make a prediction with a network
# It returns the index in the network output that has the largest probability. 
# Assuming that class values have been converted to integers starting at 0. [0,1,2]
def predict(network, row):
    outputs = forward_propagate(network, row)
    return outputs.index(max(outputs))

# backpropagation with stochastic gradient descent
def back_propagate(train, learning_rate, n_epoch, n_hidden):
    n_inputs = len(train[0]) - 1
    n_outputs = len(set([row[-1] for row in train]))
    network = init_network(n_inputs, n_hidden, n_outputs)
    train_network(network, train, learning_rate, n_epoch, n_outputs)
    

In [107]:
trainingSet=[]
testSet=[]

f = open('iris.data.txt', "r")
lines = f.readlines()
dataset = list(lines)
trainingSet = loadDataset(dataset)

In [108]:
# normalize inputs

minmax = dataset_minmax(trainingSet)
normalize_data(trainingSet, minmax)

learning_rate = 0.1
n_epoch = 100
n_hidden = 2

back_propagate(trainingSet, learning_rate, n_epoch, n_hidden)

[{'weights': [-0.07077952971949689, 0.04956524385849852, -0.05043512066616671, -0.010603197274186682, 0.0951332435281236]}, {'weights': [0.03448312910232661, -0.024241954480695927, -0.08037234331305618, -0.01652065422969875, 0.07526464274881209]}]
[{'weights': [0.05550420597296146, -0.05529273828875507, 0.0952358778407612]}, {'weights': [-0.004493389878984311, 0.08747949532472939, -0.005285495535512852]}, {'weights': [-0.08598530704483412, 2.133956401406678e-05, 0.014194505018712712]}]
>epoch=0, lrate=0.100, MSE=0.251, abs fraction of change=0.000000
>epoch=1, lrate=0.100, MSE=0.228, abs fraction of change=0.016957
>epoch=2, lrate=0.100, MSE=0.223, abs fraction of change=0.016754
>epoch=3, lrate=0.100, MSE=0.222, abs fraction of change=0.016533
>epoch=4, lrate=0.100, MSE=0.221, abs fraction of change=0.016220
>epoch=5, lrate=0.100, MSE=0.222, abs fraction of change=0.015752
>epoch=6, lrate=0.100, MSE=0.222, abs fraction of change=0.015053
>epoch=7, lrate=0.100, MSE=0.221, abs fraction 