In [1]:
import numpy as np
import matplotlib.pyplot as plt
import time

In [2]:
# given data
np.random.seed(1)

x = np.random.normal(loc=1, scale=1, size=(100,2)) - np.array([0.5, 0])
x[50:] = np.random.normal(loc=2, scale=1, size=(50,2)) + np.array([1.5,0])

y = np.ones(100) * -1
y[50:] *= -1

In [3]:
def perceptron_single_step_update(
        feature_vector,
        label,
        current_theta,
        current_theta_0):
    """
    Updates theta and theta_0 on a single step of the perceptron algorithm.

    Arguments:
        feature_vector - A numpy array describing a single data point.
        label - The correct classification of the feature vector.
        current_theta - The current theta being used before this update.
        current_theta_0 - The current theta_0 being used before this update.

    Returns: A tuple where the first element is the theta after the update and 
             the second element is the theta_0 after the update.
    """
    
    if label*(feature_vector@current_theta + current_theta_0) <= 0:
        current_theta += label*feature_vector
        current_theta_0 += label
    return (current_theta, current_theta_0)

In [4]:
def pegasos_single_step_update(
        feature_vector,
        label,
        L,
        eta,
        current_theta,
        current_theta_0):
    """
    Updates theta and theta_0 on a single step of the Pegasos algorithm.

    Arguments:
        feature_vector - A numpy array describing a single data point.
        label - The correct classification of the feature vector.
        current_theta - The current theta being used before this update.
        current_theta_0 - The current theta_0 being used before this update.

    Returns: A tuple where the first element is the theta after the update and 
             the second element is the theta_0 after the update.
    """
    if label*(feature_vector@current_theta + current_theta_0) <= 1:
        current_theta =  (1 - eta*L)*current_theta + eta*label*feature_vector
        current_theta_0 = current_theta_0 + eta*label

    else:
        current_theta =  (1 - eta*L)*current_theta
        
    return (current_theta, current_theta_0)

In [5]:
def perceptron(feature_matrix, labels, T, L):
    """
    Runs the full perceptron algorithm on a given set of data. Runs T
    iterations through the data set.

    Arguments:
        feature_matrix - A numpy matrix describing the given data.
        labels - The correct classification of the feature vector.
        T - An integer indicating how many times the perceptron iterates through the entire feature matrix.
        L - The lamba value being used to update the Pegasos parameters.

    Returns: A tuple that contains the thetas for perceptron, for average perceptron, and for pagasos after 
             T iterations through the feature matrix.
    """
    # initializing theta and theta_0 for the different perceptron algorithms
    theta = np.ones(feature_matrix.shape[1])
    theta_0 = np.zeros(1)
    
    sum_theta = np.zeros(feature_matrix.shape[1])
    sum_theta_0 = np.zeros(1)
    
    pegasos_theta = np.zeros(feature_matrix.shape[1])
    pegasos_theta_0 = np.zeros(1)
    
    update_counter = 0
    
    # updating perceptrons
    for t in range(T):
        for i in range(feature_matrix.shape[0]):
            update_counter += 1
            eta = 1/np.sqrt(update_counter)
            
            theta, theta_0 = perceptron_single_step_update(feature_matrix[i],
                                                          labels[i],
                                                          theta,
                                                          theta_0)
            sum_theta += theta
            sum_theta_0 += theta_0
            
            pegasos_theta, pegasos_theta_0 = pegasos_single_step_update(feature_matrix[i],
                                                                        labels[i],
                                                                        L,
                                                                        eta,
                                                                        pegasos_theta,
                                                                        pegasos_theta_0)
        
        # draw the classification boundary line per epoch
        draw_perceptron_line(theta, theta_0,'perceptron')
        draw_perceptron_line(sum_theta/update_counter, sum_theta_0/update_counter,'average_perceptron')
        draw_perceptron_line(pegasos_theta, pegasos_theta_0,'pegasos')
                        
        # updating the epoch on the plot
        text.set_text(f'$epoch:{t+1}$')  
        
        # updating the figure per epoch
        fig.canvas.draw()
        fig.canvas.flush_events()
        
        # delaying
        time.sleep(0.025)
        
    return (theta, theta_0, sum_theta, sum_theta_0, pegasos_theta, pegasos_theta_0) 

In [6]:
def draw_perceptron_line(theta, theta_0, algorithm='perceptron'):
    """
    Draw the classification boundary line

    Arguments:
        theta - The theta being used.
        theta_0 - The theta_0 being used.
        algorithm - Indicating which line to draw. 

    Returns: The data points of the boundary line
    """
    # generate the data points of the boundary line
    if  theta[1] != 0:
        line_x1 = np.linspace(-5,10,2)
        line_x2 = (-theta_0-(theta[0]*line_x1))/theta[1]
    elif theta[0] != 0:
        line_x2 = np.linspace(-5,10,2)
        line_x1 = (-theta_0-(theta[1]*line_x2))/theta[0]
    else:
        line_x1 = np.array([0])
        line_x2 = np.array([0])
    
    # updating the boundary line
    if algorithm == 'perceptron':
        perceptron_line.set_xdata(line_x1)
        perceptron_line.set_ydata(line_x2)
    elif algorithm == 'average_perceptron':
        average_perceptron_line.set_xdata(line_x1)
        average_perceptron_line.set_ydata(line_x2)
    elif algorithm == 'pegasos':
        pegasos_line.set_xdata(line_x1)
        pegasos_line.set_ydata(line_x2)
        
    return line_x1, line_x2

In [7]:
# pop-out window
%matplotlib tk

# create matplotlib figure and axes
plt.style.use('seaborn-whitegrid')
fig, ax = plt.subplots(1, figsize=(7, 7))

# scatter plotting the given data
plt.plot(x[:int(x.shape[0]/2),0],x[:int(x.shape[0]/2),1], color='#97cbff', lw=0, marker='o', markersize=12)
plt.plot(x[int(x.shape[0]/2):,0],x[int(x.shape[0]/2):,1], color='#ffc1e0', lw=0, marker='o', markersize=12)

# draw classification boundary line
# perceptron
perceptron_line_x1 = np.array([0])
perceptron_line_x2 = np.array([0])
perceptron_line, = ax.plot(perceptron_line_x1, 
                           perceptron_line_x2, 
                           color='#ff0080', 
                           lw=6, 
                           zorder= 4, 
                           label='$perceptron$')

# average perceptron
average_perceptron_line_x1 = np.array([0])
average_perceptron_line_x2 = np.array([0])
average_perceptron_line, = ax.plot(average_perceptron_line_x1, 
                                   average_perceptron_line_x2, 
                                   color='#ffd306', 
                                   lw=6, 
                                   zorder= 3, 
                                   label='$average \ perceptron$')

# pegasos
pegasos_line_x1 = np.array([0])
pegasos_line_x2 = np.array([0])
pegasos_line, = ax.plot(pegasos_line_x1, 
                        pegasos_line_x2, 
                        color='#8cea00', 
                        lw=6, 
                        zorder= 2, 
                        label='$pegasos$')

# display epoches on the plot
text = plt.text(4, 5, '$epoch:1$', fontsize=15)

# basic formatting for the axes
plt.grid(True, linewidth=0.3)
plt.legend(loc='lower left', prop={'size': 15})
plt.setp(ax.get_xticklabels(), visible=False)
plt.setp(ax.get_yticklabels(), visible=False)

ax.set_ylim(-3.5, 6.5)
ax.set_xlim(-3, 7)

ax.set_xlabel('$x_{1}$', fontsize=20)
ax.set_ylabel('$x_{2}$', fontsize=20) 

ax.set_title('$Classification \ by \ Perceptron \ Algorithm$', fontsize=20)

# draw the figure
fig.canvas.draw()
fig.canvas.flush_events()   
plt.show()

In [8]:
def main():
    perceptron(x, y, 100, 0.2)

if __name__ == '__main__':
    main()