#### Imports

In [236]:
import numpy as np
import matplotlib.pylab as plt

#### Functions for graph plotting

In [237]:
def plotting_classification_result10(Sp, Sn, w_vector, title):
    """
    Plot classification result with the current weight vector
    """
    # Initilize the scale for the plot
    scale=10 # Scale for the plot

    # Collect the coordinates for the postive and negative samples (for the easy of ploting)
    Sp_x = [] # List to store x-coordinates of positive samples
    Sp_y = [] # List to store y-coordinates of positive samples
    Sn_x = [] # List to store x-coordinates of negative samples
    Sn_y = [] # List to store y-coordinates of negative samples

    # Iterate through positive samples and append x, y coordinates to respective lists
    for i in Sp:
        Sp_x.append(i[0])
        Sp_y.append(i[1])

    # Iterate through negative samples and append x, y coordinates to respective lists
    for i in Sn:
        Sn_x.append(i[0])
        Sn_y.append(i[1])

    w0=w_vector[0]
    w1=w_vector[1]
    w2=w_vector[2]
    if w1 != 0:
      x1 = -(w0-w2*scale/2)/w1
      x2 = -(w0+w2*scale/2)/w1
      X = np.array([x1, x2])
      Y = np.array([-scale/2, scale/2])
    elif w2 != 0:
      y1 = -(w0-w1*scale/2)/w2
      y2 = -(w0+w1*scale/2)/w2
      X = np.array([-scale/2, scale/2])
      Y = np.array([y1, y2])

    fig, ax = plt.subplots(figsize=(5,5))
    blue = plt.scatter(Sn_x, Sn_y, c ='b', label='Sn : {} elements'.format(len(Sn_x)))
    red = plt.scatter(Sp_x, Sp_y, c='r', marker = "^", label='Sp : {} elements'.format(len(Sp_x)))
    line = ax.plot(X, Y, c = 'green', label='Perceptron Decision Boundary')
    plt.title(title)
    plt.legend(loc="upper right")
    plt.ylim([-scale/2,scale/2])
    plt.xlim([-scale/2,scale/2])
    plt.show()


def plotting_misclassification_over_updates(w_history, missed):
    """
    Plot number of misclassifications vs. the updates graph
    """
    n_epochs = range(len(w_history)+1) # Create a range of epochs for x-axis of plot
    fig, ax = plt.subplots(figsize=(5, 5)) # Create a plot figure
    ax.plot(n_epochs, missed+[0], c = 'green') # Plot number of misclassifications vs. epochs
    plt.ylabel('Number of Misclassifications') # Set y-axis label
    plt.xlabel('Number of Epochs') # Set x-axis label
    plt.ylim(bottom=0) # Set lower limit of y-axis to 0
    plt.xlim(left=0) # Set lower limit of x-axis to 0
    plt.show() # Display the plot

#### Training perceptron learning algorithm (PLA)

In [238]:
def activation_fn(x):
    """
    Activation function to determine the output based on input value
    """
    if x >= 0:
        y = 1 # Positive Sample Set
    else:
        y = -1 # Negative Sample Set
    return y


def misclassified(dataset, w_vector):
    """
    Function to calculate the number of misclassifications in the dataset using current weight vector
    """
    misclassifications = 0
    for sample in dataset: # The first two in samples are the x,y coordinates. The last one in sample is the label.
        y = (w_vector[0]+(sample[0]*w_vector[1])+(sample[1]*w_vector[2]))
        y = activation_fn(y)
        if y != sample[2]:
            misclassifications += 1
    return misclassifications

In [239]:
def perceptron_training(w_vec, training_samples):
    """
    Function to train the perceptron
    """
    w_vectors = [w_vec] # List to store weight vectors at each epoch
    missed = [misclassified(training_samples, w_vec)] # List to store number of misclassifications at each epoch

    # To-Do
    # Need a while loop to continue training until no misclassifications
        # Using a for loop to iterate through each sample in the dataset
            # Compute perceptron output
            # If misclassified,
                # update weights
                # Calculate misclassification count after weight updates
    while missed[-1] > 0:
        for sample in training_samples:
            x1, x2, op_label = sample
            sign = w_vec[0] + (x1 * w_vec[1]) + (x2 * w_vec[2])
            cls_sign = activation_fn(sign)
            if cls_sign != op_label:
                misclassifications = True
                w_vec[0] += op_label
                w_vec[1] += op_label * x1
                w_vec[2] += op_label * x2
                w_vectors.append(w_vec.copy())
                num_miss = misclassified(training_samples, w_vec)
                missed.append(num_miss)

    return w_vectors, missed


In [240]:
def generate_samples(subset):
    """
    Generate positive and negative samples
    """
    # Initilize the dataset
    Sn = [] # List to store negative samples
    Sp = [] # List to store positive samples
    num_n = 0 # Counter for negative samples
    num_p = 0 # Counter for positive samples

    while len(Sp) < 20 or len(Sn) < 20:
      i1, i2 = np.random.uniform(-5, 5, 2)
      if i1 + i2 >= 0 and len(Sp) < 20:
          Sp.append([i1, i2, 1])
      elif i1 + i2 < 0 and len(Sn) < 20:
          Sn.append([i1, i2, -1])

    # Splitting samples into training and testing samples
    testing_samples = Sp[10:] + Sn[10:]

    # First split: subset of the samples used for training
    training_samples = Sp[:subset] + Sn[:subset]

    return training_samples, testing_samples

In [255]:
acc_100_list = []
acc_50_list = []

for _ in range(100):
    # ================================
    # Using 100% of the training samples
    training_data, testing_data = generate_samples(10)

    # Train the PLA boundary using the training samples
    w_vector = [np.random.uniform(-1/4, 1/4), np.random.uniform(-1, 1), np.random.uniform(-1, 1)]
    w_history, missed = perceptron_training(w_vector, training_data)

    # Test the accuracy on the unseen testing samples
    num_misclassified = misclassified(testing_data, w_history[-1])
    accuracy_100 = (len(testing_data) - num_misclassified) / len(testing_data)

    acc_100_list.append(accuracy_100)

    # ================================
    # Using 50% of the training samples
    training_data, testing_data = generate_samples(5)

    # Train the PLA boundary using the training samples
    w_vector = [np.random.uniform(-1/4, 1/4), np.random.uniform(-1, 1), np.random.uniform(-1, 1)]
    w_history, missed = perceptron_training(w_vector, training_data)

    # Test the accuracy on the unseen testing samples
    num_misclassified = misclassified(testing_data, w_history[-1])
    accuracy_50 = (len(testing_data) - num_misclassified) / len(testing_data)

    acc_50_list.append(accuracy_50)

print('Using 100% training samples')
print('Accuracy : %s' % np.mean(acc_100_list))
print('Standard deviation: %s' % np.std(acc_100_list))
print('===============================')
print('Using 50% training samples')
print('Accuracy : %s' % np.mean(acc_50_list))
print('Standard deviation: %s' % np.std(acc_50_list))

Using 100% training samples
Accuracy : 0.9420000000000001
Standard deviation: 0.06772001181334804
Using 50% training samples
Accuracy : 0.912
Standard deviation: 0.08692525524840292
