Wine Quality Neural Network

In [1]:
from random import *
from math import *

INITIALIZING THE NETWORK

In [2]:

def neural_net(input_no , hidden_no , output_no):
    network=[] 
    hidden_layer = [] 
    output_layer = []
        
    for i in range(hidden_no):
        neuronA={}
        weight_listA=[]
        for j in range(input_no + 1):
            weight_listA.append(random())
        neuronA['weight']=weight_listA
        hidden_layer.append(neuronA)
        
    for i in range(output_no):
        neuronB={}
        weight_listB=[]
        for j in range(hidden_no + 1):
            weight_listB.append(random())
        neuronB['weight']=weight_listB
        output_layer.append(neuronB)
        
        
    network.append(hidden_layer)
    network.append(output_layer)   
    return network

FORWARD PROPAGATE :
This has split into 3 parts : Neuron activation, Neuron Transfer and Forward Propagation

In [3]:
def activate(weights, inputs):
    activation = weights[-1] 
    for i in range(len(weights)-2):
        activation += weights[i] * inputs[i]
    return activation
 
# Transfer neuron activation
def transfer(activation):
    return max(0.0,activation)

# Forward propagate input to a network output
def forward_propagation(network, row):
    inputs = row
    for layer in network:
        new_inputs = []
        for neuron in layer:
            activation = activate(neuron['weight'], inputs)
            neuron['output'] = transfer(activation)
            new_inputs.append(neuron['output'])
        inputs = new_inputs
    return inputs

# Make a prediction with a network
def predict(network, row):
    outputs = forward_propagation(network, row)
    return outputs.index(max(outputs))

In [4]:
# Calculate the derivative of an neuron output
def sigmoid_derivative(output):
    return output * (1.0 - output)

def relu_derivative(output):
0

# Backpropagate error and store in neurons
def backward_propagate_error(network, expected):
    for i in range(len(network)-1,-1,-1):
        layer = network[i] 
        errors = list()
        if i != len(network)-1:
            for j in range(len(layer)):
                error = 0.0
                for neuron in network[i + 1]:
                    error += (neuron['weight'][j] * neuron['delta'])
                errors.append(error)
        else:
            for j in range(len(layer)):
                neuron = layer[j] 
                errors.append(neuron['output'] - expected[j])
        for j in range(len(layer)):
            neuron = layer[j]
            neuron['delta'] = errors[j] * sigmoid_derivative(neuron['output'])

In [5]:
def update_weights(network, row, l_rate):
    for i in range(len(network)):
        inputs = row[:-1]
        if i != 0:
            inputs = [neuron['output'] for neuron in network[i - 1]]
        for neuron in network[i]:
            for j in range(len(inputs)):
                neuron['weight'][j] -= l_rate * neuron['delta'] * inputs[j]
            neuron['weight'][-1] -= l_rate * neuron['delta']

In [6]:
#training the network
def train_network(network, train, l_rate, n_epoch, n_outputs):
    for epoch in range(n_epoch):
        sum_error = 0
        for row in train:
            outputs = forward_propagation(network, row)
            desired = [0 for i in range(n_outputs)]
            desired[row[-1]] = 1
            sum_error += sum([(desired[i]-outputs[i])**2 for i in range(len(desired))])
            backward_propagate_error(network, desired)
            update_weights(network, row, l_rate)
        print('epoch=%d, lrate=%.3f, error=%.3f' % (epoch, l_rate, sum_error))

IMPORTING THE DATASET

In [7]:
from csv import *

In [8]:
# Load a CSV file
def load_csv(filename):
    dataset = list()
    with open(filename, 'r') as file:
        csv_reader = reader(file)
        rec = next(csv_reader)
        for row in csv_reader:
            if not row:
                continue
            dataset.append(row)
    return dataset

In [9]:
# Convert string column to float
def str_column_to_float(dataset, column):
    for row in dataset:
        row[column] = float(row[column].strip())
 
 #Convert string column to integer
def str_column_to_int(dataset, column):
    class_values = [row[column] for row in dataset]
    unique = set(class_values)
    lookup = dict()
    for i, value in enumerate(unique):
        lookup[value] = i
    for row in dataset:
        row[column] = lookup[row[column]]
    return lookup

In [10]:
# Find the min and max values for each column
def dataset_minmax(dataset):
    minmax = list()
    stats = [[min(column), max(column)] for column in zip(*dataset)]
    return stats

In [11]:
#normalize the data using min-max normalization so that 0-1 is the range
def normalization(dataset, minmax):
    for row in dataset:
        for i in range(len(row)-1):
            row[i] = (row[i] - minmax[i][0]) / (minmax[i][1] - minmax[i][0])

In [12]:
# Backpropagation Algorithm With Stochastic Gradient Descent
def back_propagation(train, test, l_rate, epochs, hidden_no):
    input_no = len(train[0]) - 1
    outputs_no = len(set([row[-1] for row in train]))
    network = neural_net(input_no, hidden_no, outputs_no)
    train_network(network, train, l_rate, epochs, outputs_no)
    predictions = list()
    for row in test:
        prediction = predict(network, row)
        predictions.append(prediction)
    return(predictions)

In [13]:
# Split a dataset into k folds
def cross_validation_split(dataset, n_folds):
    dataset_split = list()
    dataset_copy = list(dataset)
    fold_size = int(len(dataset) / n_folds)
    for i in range(n_folds):
        fold = list()
        while len(fold) < fold_size:
            index = randrange(len(dataset_copy))
            fold.append(dataset_copy.pop(index))
        dataset_split.append(fold)
    return dataset_split
 
# Calculate accuracy percentage
def accuracy_metric(actual, predicted):
    correct = 0
    for i in range(len(actual)):
        if actual[i] == predicted[i]:
            correct += 1
    return correct / float(len(actual)) * 100.0
 
# Evaluate an algorithm using a cross validation split
def evaluate_algorithm(dataset, algorithm, n_folds, *args):
    folds = cross_validation_split(dataset, n_folds)
    scores = list()
    for fold in folds:
        train_set = list(folds)
        train_set.remove(fold)
        train_set = sum(train_set, [])
        test_set = list()
        for row in fold:
            row_copy = list(row)
            test_set.append(row_copy)
            row_copy[-1] = None
        predicted = algorithm(train_set, test_set, *args)
        actual = [row[-1] for row in fold]
        accuracy = accuracy_metric(actual, predicted)
        scores.append(accuracy)
    return scores

In [14]:
seed(1) 
# load and prepare data
filename = 'winequality-red.csv'
dataset = load_csv(filename)
for i in range(len(dataset[0])-1):
    str_column_to_float(dataset, i) 
    
# convert class column to integers
str_column_to_int(dataset, len(dataset[0])-1)
minmax = dataset_minmax(dataset)
normalization(dataset, minmax)


# evaluate algorithm
n_folds = 5
l_rate = 0.2
epochs = 5
hidden_no = 10
scores = evaluate_algorithm(dataset, back_propagation, n_folds, l_rate, epochs, hidden_no)
print('Scores:' , scores)
print('Mean Accuracy: ' , ( sum(scores)/float ( len(scores) ) ) )

epoch=0, lrate=0.200, error=1029.784
epoch=1, lrate=0.200, error=851.586
epoch=2, lrate=0.200, error=849.260
epoch=3, lrate=0.200, error=846.186
epoch=4, lrate=0.200, error=841.583
epoch=0, lrate=0.200, error=1109.749
epoch=1, lrate=0.200, error=840.281
epoch=2, lrate=0.200, error=837.410
epoch=3, lrate=0.200, error=833.305
epoch=4, lrate=0.200, error=826.627
epoch=0, lrate=0.200, error=1106.368
epoch=1, lrate=0.200, error=835.645
epoch=2, lrate=0.200, error=828.627
epoch=3, lrate=0.200, error=816.843
epoch=4, lrate=0.200, error=800.953
epoch=0, lrate=0.200, error=1295.441
epoch=1, lrate=0.200, error=841.568
epoch=2, lrate=0.200, error=836.887
epoch=3, lrate=0.200, error=829.238
epoch=4, lrate=0.200, error=817.457
epoch=0, lrate=0.200, error=1266.165
epoch=1, lrate=0.200, error=838.949
epoch=2, lrate=0.200, error=830.699
epoch=3, lrate=0.200, error=817.060
epoch=4, lrate=0.200, error=799.298
Scores: [40.43887147335423, 32.9153605015674, 39.811912225705335, 42.31974921630094, 46.0815047