In [10]:
import numpy as np
import random

def perceptron_single_step_update(
        feature_vector,
        label,
        current_theta,
        current_theta_0):
    """
    Properly updates the classification parameter, theta and theta_0, on a
    single step of the perceptron algorithm.

    Args:
        feature_vector - A numpy array describing a single data point.
        label - The correct classification of the feature vector.
        current_theta - The current theta being used by the perceptron
            algorithm before this update.
        current_theta_0 - The current theta_0 being used by the perceptron
            algorithm before this update.

    Returns: A tuple where the first element is a numpy array with the value of
    theta after the current update has completed and the second element is a
    real valued number with the value of theta_0 after the current updated has
    completed.
    """
    e = 1e-10
    
    if label*(np.dot(feature_vector, current_theta) + current_theta_0) <= 0:
        current_theta += label * feature_vector
        current_theta_0 += label
    
    return current_theta, current_theta_0

    raise NotImplementedError

In [11]:
def get_order(n_samples):
    try:
        with open(str(n_samples) + '.txt') as fp:
            line = fp.readline()
            return list(map(int, line.split(',')))
    except FileNotFoundError:
        random.seed(1)
        indices = list(range(n_samples))
        random.shuffle(indices)
        return indices

In [12]:
a = np.zeros((4,))
a

array([0., 0., 0., 0.])

In [13]:
get_order(a.shape[0])

[3, 0, 2, 1]

In [14]:
def perceptron(feature_matrix, labels, T):
    """
    Runs the full perceptron algorithm on a given set of data. Runs T
    iterations through the data set, there is no need to worry about
    stopping early.

    NOTE: Please use the previously implemented functions when applicable.
    Do not copy paste code from previous parts.

    NOTE: Iterate the data matrix by the orders returned by get_order(feature_matrix.shape[0])

    Args:
        feature_matrix -  A numpy matrix describing the given data. Each row
            represents a single data point.
        labels - A numpy array where the kth element of the array is the
            correct classification of the kth row of the feature matrix.
        T - An integer indicating how many times the perceptron algorithm
            should iterate through the feature matrix.

    Returns: A tuple where the first element is a numpy array with the value of
    theta, the linear classification parameter, after T iterations through the
    feature matrix and the second element is a real number with the value of
    theta_0, the offset classification parameter, after T iterations through
    the feature matrix.
    """
    # Initialization of theta and theta_0
    theta_0 = 0
    theta = np.zeros((feature_matrix.shape[1],))

    for t in range(T):
        for i in get_order(feature_matrix.shape[0]):
            # Update of theta if there is misclassfication
#             import pdb; pdb.set_trace()
            theta, theta_0 = perceptron_single_step_update(feature_matrix[i], labels[i], theta, theta_0)
            
    return theta, theta_0

In [15]:
random.seed(145)
arr = np.random.rand(10,5)
arr[0]

array([0.66068391, 0.23439329, 0.1229993 , 0.16931117, 0.32450537])

In [16]:
arr

array([[0.66068391, 0.23439329, 0.1229993 , 0.16931117, 0.32450537],
       [0.91523552, 0.43526343, 0.14347812, 0.44896918, 0.64246545],
       [0.64421619, 0.57417166, 0.52672421, 0.5132716 , 0.03183599],
       [0.30688028, 0.96658939, 0.82090175, 0.26814157, 0.68124049],
       [0.17870579, 0.63961194, 0.71939167, 0.56526994, 0.95916421],
       [0.77551339, 0.31132147, 0.24392278, 0.24980905, 0.14035603],
       [0.34612887, 0.30736495, 0.85341838, 0.16920917, 0.23966649],
       [0.83504334, 0.09657253, 0.03456458, 0.74030113, 0.56790701],
       [0.25938515, 0.90220747, 0.57045712, 0.88516722, 0.2088193 ],
       [0.31164959, 0.44499719, 0.23211174, 0.52554501, 0.66963259]])

In [17]:
T = 50

from itertools import combinations_with_replacement
lab = [-1,1]
labels = combinations_with_replacement(lab, 10)
labels = np.array(random.choice(list(labels)))
labels

array([-1, -1, -1, -1,  1,  1,  1,  1,  1,  1])

In [18]:
print(f'Theta: \n{perceptron(arr, labels, T)[0]}')
print(f'\nTheta 0: {perceptron(arr, labels, T)[1]}')

Theta: 
[-4.95094634 -7.76079069  1.9613172   6.20813238 -1.35680462]

Theta 0: 2


In [19]:
def average_perceptron(feature_matrix, labels, T):
    """
    Runs the average perceptron algorithm on a given set of data. Runs T
    iterations through the data set, there is no need to worry about
    stopping early.

    NOTE: Please use the previously implemented functions when applicable.
    Do not copy paste code from previous parts.

    NOTE: Iterate the data matrix by the orders returned by get_order(feature_matrix.shape[0])


    Args:
        feature_matrix -  A numpy matrix describing the given data. Each row
            represents a single data point.
        labels - A numpy array where the kth element of the array is the
            correct classification of the kth row of the feature matrix.
        T - An integer indicating how many times the perceptron algorithm
            should iterate through the feature matrix.

    Returns: A tuple where the first element is a numpy array with the value of
    the average theta, the linear classification parameter, found after T
    iterations through the feature matrix and the second element is a real
    number with the value of the average theta_0, the offset classification
    parameter, found after T iterations through the feature matrix.

    Hint: It is difficult to keep a running average; however, it is simple to
    find a sum and divide.
    """
    # Initialization of theta and theta_0
    theta_0 = 0
    theta = np.zeros((feature_matrix.shape[1],))
    temp = theta

    for t in range(T):
        for i in get_order(feature_matrix.shape[0]):
            # Update of theta if there is misclassfication
#             import pdb; pdb.set_trace()
            theta, theta_0 = perceptron_single_step_update(feature_matrix[i], labels[i], theta, theta_0)
            temp = temp + theta
            
    return temp/(T*feature_matrix.shape[0]), theta_0