In [1]:
# Backprop on the Vowel Dataset
# Inserting Required Packages
import pandas as pd
import numpy as np
import csv
from random import seed
from random import randrange
from random import random
from csv import reader
from math import exp
from sklearn.metrics import confusion_matrix
from sklearn.metrics import cohen_kappa_score

In [4]:
# Load a CSV file
def loadCsv(file):
    trainset = []
    lines = csv.reader(open(file, 'r'))
    dataset = list(lines)
    for i in range(len(dataset)):
        for j in range(4):
            # Print("DATA{}".format(dataset[i]))
            dataset[i][j] = float(dataset[i][j])
            trainset.append(dataset[i])
            return trainset
        def minmax(dataset):
            minmax = list()
            stats = [[min(column), max(column)] for column in zip(*dataset)]
            return stats

In [5]:
# Rescale dataset columns to the range 0-1
def normalize(dataset, minmax):
    for row in dataset:
        for i in range(len(row)-1):
            row[i] = (row[i] - minmax[i][0]) / (minmax[i][1] - minmax[i][0])
            

In [7]:
# Convert string columns to float
def column_to_float(dataset,column):
    for row in dataset:
        try:
            row[column] = float(row[column])
        except ValueError:
                print("Error with row",column,":",row[column])
                pass

In [8]:
# Convert string column to integer
def column_to_int(dataset, column):
    class_values = [row[column] for row in dataset]
    unique = set(class_values)
    lookup = dict()
    for i, value in enumerate(unique):
        lookup[value] = i
    for row in dataset:
        row[column] = lookup[row[column]]
    return lookup

In [9]:
# Find the min and max values for each column
# Split a dataset into k folds
def cross_validation_split(dataset, n_folds):
    dataset_split = list()
    dataset_copy = list(dataset)
    fold_size = int(len(dataset) / n_folds)
    for i in range(n_folds):
        fold = list()
        while len(fold) < fold_size:
            index = randrange(len(dataset_copy))
            fold.append(dataset_copy.pop(index))
            dataset_split.append(fold)
            return dataset_split

In [10]:
# Calculate accuracy percentage
def accuracy_met(actual, predicted):
    correct = 0
    for i in range(len(actual)):
        if actual[i] == predicted[i]:
            correct += 1
    return correct / float(len(actual)) * 100.0

In [11]:
# Evaluate an algorithm using a cross validation split
def run_algorithm(dataset, algorithm, n_folds, *args):
    folds = cross_validaton_split(dataset, n_folds)
    # for fold in folds:
        # print("Fold{} \n \n".format(fold))
    scores = list()
    for fold in folds:
        # print("Test Fold {} \n \n".format(fold))
        train_set = list(folds)
        train_set.remove(fold)
        train_set = sum(train_set, [])
        test_set = list()
        for row in fold:
            row_copy = list(row)
            test_set.append(row_copy)
            row_copy[-1] = None
        predicted = algorithm(train_set, test_set, *args)
        actual = [row[-1] for row in fold]
        accuracy = accuracy_met(actual, predicted)
        cm = confusion_matrix(actual, predicted)
        print('\n'.join([''.join(['{:4}'.format(item) for item in row]) for row in cm]))
        # Confusin matrix = np.matrix(cm)
        FP = cm.sum(axis=0) - np.diag(cm)
        FN = cm.sum(axis=1) - np.diag(cm)
        TP = np.diag(cm)
        TN = cm.sum() - (FP + FN + TP)
        print('False Positivities\n {}'.format(FP))
        print('False Negativities\n {}'.format(FN))
        print('True Positivities\n {}'.format(TP))
        print('True Negativities\n {}'.format(TN))
        TPR = TP/(TP+FN)
        print('Sensitivity \n {}'.format(TPR))
        TNR = TN/(TN+FP)
        print('Specificity \n {}'.format(TNR))
        Precision = TP/(TP+FP)
        print('Precision \n {}.'.format(Precision))
        Recall = TP/(TP+FN)
        print('Recall \n {}'.format(Recall))
        Acc = (TP+TN)/(TP+TN+FP+FN)
        print('Accuracy \n {}'.format(Acc))
        Fscore = 2*(Precision*Recall)/(Precision+Recall)
        print('FScore \n {}'.format(Fscore))
        k = cohen_kappa_score(actual, predicted)
        print('Cohen Kappa \n {}'.format(k))
        scores.append(accuracy)
        return scores

In [12]:
# Calculate Neuron Activation for an Input
def activate(weights, inputs):
    activation = weights[-1]
    for i in range(len(weights)-1):
        activation += weights[i] * inputs[i]
        return activation
# Transfer Neuron Activation
def transfer(activation):
    return 1.0 / (1.0 + exp(-activation))

In [13]:
# Forward propogate input to a Network Output
def forward_propogate(network, row):
    inputs = row
    for layer in network:
        new_inputs = []
        for neuron in layer:
            activation = activate(neuron['weights'], inputs)
            neuron['output'] = transfer(activation)
            new_inputs.append(neuron['output'])
            inputs = new_inputs
    return inputs
# Calculate the derivative of an neuron output
def transfer_derivative(output):
    return output * (1.0 - output)

In [14]:
# Backpropogate error and store in neurons
def backward_propogate_error(network, expected):
    for i in reversed(range(len(network))):
        layer = network[i]
        errors = list()
        if i != len(network)-1:
            for j in range(len(layer)):
                error = 0.0
                for neuron in network[i + 1]:
                    error += (neuron['weights'][j] * neuron['delta'])
                    errors.append(error)
        else:
            for j in range(len(layer)):
                neuron = layer[j]
                errors.append(expected[j] - neuron['output'])
        for j in range(len(layer)):
            neuron = layer[j]
            neuron['delta'] = errors[j] * transfer_derivative(neuron['output'])

In [15]:
# Update network weights with error
def update_weights(network, row, l_rate):
    for i in range(len(network)):
        if i != 0:
            inputs = [neuron['output'] for neuron in network[i - 1]]
            for neuron in network[i]:
                for j in range(len(inputs)):
                    temp = l_rate * neuron['delta'] * inputs[j] + mu * neuron['prev'][j]
                    neuron['weights'][j] += temp
                    # print("neuron weight {} \n".format(neuron['weights'][j]))
                    neuron['prev'][j] = temp
                    temp = l_rate * neuron['delta'] + mu * neuron['prev'][-1]
                    neuron['weights'][-1] += temp
                    neuron['prev'][-1] = temp

In [16]:
# Train a network for a fixed number of epochs
def train_network(network, train, l_rate, n_epoch, n_outputs):
    for epoch in range(n_epoch):
        for row in train:
            outputs = forward_propogate(network, row)
            # print(network)
            expected = [0 for i in range(n_outpus)]
            expected[row[-1]] = 1
            # print("expected roe {} \n".format(expected))
            backward_propogate_error(network, expected)
            update_weights(network, row, l_rate)
# Initialize a network
def initialize_network(n_inputs, n_hidden, n_outputs):
    network = list()
    hidden_layer = [{'weights':[random() for i in range(n_inputs + 1)], 'prev':[0 for i in range(n_inputs+1)]} for i in range(n_hidden)]
    network.append(hidden_layer)
    hidden_layer = [{'weights':[random() for i in range(n_inputs + 1)], 'prev':[0 for i in range(n_inputs+1)]} for i in range(n_hidden)]
    network.append(hidden_layer)
    output_layer = [{'weights':[random() for i in range(n_inputs + 1)], 'prev':[0 for i in range(n_inputs+1)]} for i in range(n_hidden)]
    network.append(output_layer)
    # print(network)
    return network
# Make a prediction with a network
def predict(network, row):
    outputs = forward_propogate(network, row)
    return outputs.index(max(outputs))

In [17]:
# Back propogation Algorithm with Stochastic Gradient Descent
def back_propogation(train, test, l_rate, n_epoch, n_hidden):
    n_inputs = len(train[0]) - 1
    n_outputs = len(set([row[-1] for row in train]))
    network = initialize_network(n_inputs, n_hidden, n_outputs)
    train_network(network, train, l_rate, n_epoch, n_outputs)
    # print("network {} \n".format(network))
    predictions = list()
    for row in test:
        prediction = predict(network, row)
        predictions.append(prediction)
    return(predictions)

In [25]:
# Test Backprop on Seeds Dataset
seed(1)
# Load and Prepare data
file = "data.csv"
dataset = loadCsv(file)
for i in range(len(dataset[0])-1):
    column_to_float(dataset, i)
    # Convert class column to integers
    column_to_int(dataset, len(dataset[0])-1)
    # Normalize input variables
    minmax = minmax(dataset)
    normalize(dataset, minmax)
    # Evaluate algorithm
    n_folds = 5
    l_rate = 0.1
    mu = 0.001
    n_epoch = 1500
    n_hidden = 4
    scores = run_algorithm(dataset, back_propogation, n_folds, l_rate, n_epoch, n_hidden)
    # print('Scores: %s' % scores)
    # print('Mean Accuracy: %.3f%%' %(sum(scores)/float(len(scores))))
    

FileNotFoundError: [Errno 2] No such file or directory: 'data.csv'