# Perceptron Learning Algorithm (PLA)

## Learning from Data: Homework 1

### Questions 7, 8, 9, 10

In [69]:
import numpy as np
import pickle
import matplotlib.pyplot as plt

In [70]:
# Load the data set
with open('dataset.pickle', 'rb') as f:
    dataset = pickle.load(f)

In [71]:
# Generate a random target function
def generate_target():
    p = np.random.uniform(-1, 1, (2,2))

    x = [[z,1] for z in p[:,0]]
    y = p[:,1]
    a,b = np.linalg.solve(x,y)
    
    f = lambda x: a*x+b
    w = [-b, -a, 1]
    
    Ys = np.sign(np.dot(dataset, w))
    return (w, Ys)

In [72]:
# Pick a random data set of the required size
def pick_data(targets, N, dim):
    Xs = np.zeros((N,dim))
    Ys = np.zeros(N)

    for i in range(N):
        idx = np.random.randint(0, len(dataset))
        Xs[i] = dataset[idx]
        Ys[i] = targets[idx]
    return (Xs, Ys)

In [73]:
def error_rate(w, w_est):
    actual_Ys = np.sign(np.dot(dataset, w))
    predicted_Ys = np.sign(np.dot(dataset, w_est))

    return np.mean(actual_Ys != predicted_Ys)

In [77]:
# Main PLA Algorithm
def PLA(iterations, N, dim):
    convergence = np.zeros(iterations)
    error_rates = np.zeros(iterations)

    for i in range(iterations):
        w, Y = generate_target()
        Xs, Ys = pick_data(Y, N, dim)
        w_est = np.zeros(dim)
        ctr = 0
        acc = 0
        while acc < 1:
            ctr += 1
            # Make and evaluate predictions
            pred = np.sign(np.dot(Xs, w_est))
            wrong_Xs = Xs[pred != Ys]
            wrong_Ys = Ys[pred != Ys]
        
            # Measure accuracy of current iteration
            acc = np.mean(pred == Ys)
        
            # Randomly pick one misclassified point and update weight
            if acc < 1:
                idx = np.random.randint(len(wrong_Xs))
                w_est = w_est + wrong_Ys[idx] * wrong_Xs[idx]        
        convergence[i] = ctr
        error_rates[i] = error_rate(w, w_est)
    return (np.mean(convergence), np.mean(error_rates))

In [78]:
# Run the PLA algorithm for X iterations, with Y data points
def main(iterations, N):
    dim = dataset.shape[1]
    conv, err_rate = PLA(iterations, N, dim)
    return (conv, err_rate)

In [83]:
conv, err_rate = main(1000,10)
print("PLA run for 1000 iterations")
print("Dataset of size 10")
print("Average number of iterations needed for convergence is", conv)
print("Average error rate is", err_rate)

PLA run for 1000 iterations
Dataset of size 10
Average number of iterations needed for convergence is 10.374
Average error rate is 0.108771


In [84]:
conv, err_rate = main(1000,100)
print("PLA run for 1000 iterations")
print("Dataset of size 100")
print("Average number of iterations needed for convergence is", conv)
print("Average error rate is", err_rate)

PLA run for 1000 iterations
Dataset of size 100
Average number of iterations needed for convergence is 123.62
Average error rate is 0.01259
