# Task 2

## Import libraries, data, and set variables

In [2]:
import pandas as pd
import numpy as np
import random

random.seed(2809)

In [3]:
# Import data
train_in = pd.read_csv("train_in.csv", header=None)
train_out = pd.read_csv("train_out.csv", header=None)
test_in = pd.read_csv("test_in.csv", header=None)
test_out = pd.read_csv("test_out.csv", header=None)

# Append bias column to both training and test input
train_in = train_in.assign(bias=1)
test_in = test_in.assign(bias=1)

# Variables
nodes = 10
features = train_in.shape[1]

## Function for training perceptrons and testing results

In [4]:
# Training function
def train_perceptron(weights, train_in, train_out, eta):
    train_in = pd.DataFrame(train_in)
    train_out = pd.DataFrame(train_out)

    # Generate output with random weights, and determine classes
    output = np.dot(train_in, weights)
    pred_class = pd.DataFrame(np.argmax(output, axis=1))
    it = 0

    # For each misclassified example, update weights, as long as there are misclassified examples
    while any(pred != real for pred, real in zip(pred_class.values.flatten(), train_out.values.flatten())):
        indices = np.where(pred_class != train_out)[0]
        for i in indices:
            delta = np.zeros(nodes)
            delta[train_out.iloc[i]] = 1 # Increase weight for true value
            delta[pred_class.iloc[i]] = -1  # Decrease weight for the current, incorrect, result
            weights += eta*np.outer(train_in.T[i], delta)
        
        # Determine new output and predicted classes based on new weights. If any examples are still misclassified, 
        # update weights again
        output = np.dot(train_in, weights)
        pred_class = pd.DataFrame(np.argmax(output, axis=1))
        it += 1

    # Determine accuracy: proportion of correctly classified examples
    accuracy = len(np.where(pred_class == train_out)[0])/len(train_out)
    return weights, it, accuracy


In [5]:
# Function to test perceptron on test data
def predict(input, output, weights):
    predictions = pd.DataFrame(np.argmax(np.dot(input, weights), axis=1))
    accuracy = len(np.where(predictions == output)[0])/len(output)
    return predictions, accuracy

## Results

In [6]:
# Function to get all results: algorithm and its accuracy
def results(train_in, train_out, test_in, test_out):
    # Randomly initialize weights
    weights_init = pd.DataFrame(np.random.rand(features,nodes))

    # Train perceptron, and determine test and training accuracy
    weights_results, iterations, train_acc = train_perceptron(weights_init, train_in, train_out, eta = 0.1)
    test_preds, test_acc = predict(test_in, test_out, weights_results)

    print("Training converged in ", iterations, "iterations, with an accuracy of ", train_acc)
    print("The accuracy on the training set is", test_acc)

In [7]:
# Run multiple times to get a feel of the accuracy
for i in range(10):
    print("Run", i+1)
    results(train_in, train_out, test_in, test_out)

Run 1
Training converged in  273 iterations, with an accuracy of  1.0
The accuracy on the training set is 0.872
Run 2
Training converged in  293 iterations, with an accuracy of  1.0
The accuracy on the training set is 0.871
Run 3
Training converged in  324 iterations, with an accuracy of  1.0
The accuracy on the training set is 0.871
Run 4
Training converged in  271 iterations, with an accuracy of  1.0
The accuracy on the training set is 0.867
Run 5
Training converged in  250 iterations, with an accuracy of  1.0
The accuracy on the training set is 0.867
Run 6
Training converged in  251 iterations, with an accuracy of  1.0
The accuracy on the training set is 0.87
Run 7
Training converged in  323 iterations, with an accuracy of  1.0
The accuracy on the training set is 0.87
Run 8
Training converged in  321 iterations, with an accuracy of  1.0
The accuracy on the training set is 0.87
Run 9
Training converged in  314 iterations, with an accuracy of  1.0
The accuracy on the training set is 0