# Pocket Algorithm in Python

In this project, I will be using the MNIST data set in order to try and successfully classify 1's and 5's. This will be done using a Pocket Perceptron Learning Algorithm

In [1]:
import pandas as pd
import numpy as np
import time
import matplotlib.pyplot as plt

In [2]:
mnist_train = pd.read_csv('mnist_train.csv')
mnist_test = pd.read_csv('mnist_test.csv')

In [3]:
# The testing and training data have the same format, which as it turns out, is exactly how I wanted it.
# Each row is one picture, and the column vectors are stacked on top of each other and that 756x1 vector is each row
mnist_test.head(3)

Unnamed: 0,label,1x1,1x2,1x3,1x4,1x5,1x6,1x7,1x8,1x9,...,28x19,28x20,28x21,28x22,28x23,28x24,28x25,28x26,28x27,28x28
0,7,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,2,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [4]:
# Taking all of the data and extracting all of the data for just the two numbers I want to differentiate from
# This way, I can change these two numbers to any two numbers from the interval [1,9] and it should work
# throughout the entire program
test_num1 = 1
test_num2 = 5

test_data = mnist_test[(mnist_test['label']==test_num1) | (mnist_test['label']==test_num2)]
train_data = mnist_train[(mnist_train['label']==test_num1) | (mnist_train['label']==test_num2)]

test_labels = test_data['label']
test_outputs = []
for i in test_labels:
    if i == test_num2:
        test_outputs.append(-1)
    else:
        test_outputs.append(1)
test_data['Expected Output'] = test_outputs

train_labels = train_data['label']
train_outputs = []
for i in train_labels:
    if i == test_num2:
        train_outputs.append(-1)
    else:
        train_outputs.append(1)
train_data['Expected Output'] = train_outputs

train_data.head(3)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Unnamed: 0,label,1x1,1x2,1x3,1x4,1x5,1x6,1x7,1x8,1x9,...,28x20,28x21,28x22,28x23,28x24,28x25,28x26,28x27,28x28,Expected Output
0,5,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,-1
3,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
6,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1


In [5]:
def h_sum(weight_vec, input_vec):
    """
    Function to add up the sum(weights_i x input_i), 
    essentially finding the dot product of the two vectors
    INPUTS:
        - weights_vec, weights vector passed as an array
        - input_vec, input vector also passed as an array. Should be
                    the same length as weights_vec
    OUTPUTS:
        - the dot product of weights_vec and input vec, a scalar float value
    """
    return np.dot(weight_vec, input_vec)

def sign(h, threshold = 0):
    """
    Takes the sum(weights x input) and decides whether the perceptron
    fires (returns 1) or doesn't fire (returns -1)
    INPUTS:
        - h, a scalar float value
        - threshold, the threshold that h should be greater than
                for the perceptron to fire. Default is zero.
    OUTPUTS:
        - Either 1 or -1, depending on whether the perceptron
            fires or doesn't fire.
    """
    if h > threshold:
        return 1
    if h < threshold:
        return -1
    
def errors(weight_vec, input_df, input_labels):
    """
    Calculates the decimal error of the given weights to accurately
    classify the data in the given DataFrame
    INPUTS:
        - weight_vec, weights vector passed as an array
        - input_df, an m x n Pandas DataFrame where each row is an input vector
        - input_labels, a List where each input_labels[i] corresponds to the 
                    correct label for input_df[i]
    OUTPUTS:
        - error, a scalar float value in the interval [0,1]
    """
    counter = 0
    n = len(input_df)
    for i in input_df.index.values:
        x_i = np.array(input_df.loc[i])
        h = h_sum(weight_vec, x_i)
        if sign(h) != input_labels[i]:
            counter += 1
    return counter / n
    
def pocket_perceptron(input_df, input_labels, weight_vec, num_iterations = 100):
    """
    Perceptron Algorithm that trains itself to recognize the difference 
    between linearly seperable data
    INPUTS:
        - input_df, an m x n Pandas DataFrame where each row is an input vector
        - input_labels, a List where each input_labels[i] is the correct
                    label for input_df[i]
        - weight_vec, a numpy array containing an array of weights to start with.
                    Should have length = n
        - num_iterations, an integer. This should be the number of times the algorithm
                    goes through the entire data set and updates the weights.
    OUTPUTS:
        - weights, a numpy array with the updated weights. Should be length = n.
    """
    w_hat = 0
    w_hat = weight_vec
    for i in range(num_iterations):
        j = np.random.choice(input_df.index.values)
        x_j = np.array(input_df.loc[j])
        h = h_sum(weight_vec, x_j)
        if sign(h) != input_labels[j]:
            w_new = w_hat + (x_j * input_labels[j])
        if errors(w_new, input_df, input_labels) < errors(w_hat, input_df, input_labels):
            w_hat = w_new
        print(errors(w_hat, input_df, input_labels))
    return weights

def predict(test_vec, weight_vec):
    """
    This function predicts whether a given test_vec would be classified as 
    test_num1 or test_num2 based on the given trained weight_vec.
    INPUTS:
        - test_vec, a numpy array of length = n that contains input data
        - weight vec, a numpy array also of length = n containing
                    weights
    OUTPUTS:
        - returns test_num1 or test_num2
    """
    if sign(h_sum(weight_vec, test_vec)) == 1:
        return test_num1
    else:
        return test_num2

# Setting up the training and testing data as dataframes.
train_input = train_data.drop(['label', 'Expected Output'], axis=1)
train_input['Bias'] = [1 for i in range(len(train_input))]
test_input = test_data.drop(['label', 'Expected Output'], axis=1)
test_input['Bias'] = [1 for i in range(len(test_input))]

In [6]:
# The weights array is a numpy array with random floating point values in the interval [0,1].
weights = [round(np.random.normal(), 2) for i in range(len(train_input.columns))]

# This chunk actually trains the weights and gives us the amount of time the function took to execute.
# It also tells us how well the trained weights classified the test data.
# I found that, generally, the amount of time to run this code chunk is equal to (3 x num_iterations) seconds
start_time = time.time()
trained_weights = pocket_perceptron(train_input, train_labels, weights, num_iterations=20)
error = errors(trained_weights, test_input, test_labels)
elapsed = round(time.time() - start_time, 2)
print("Execution completed in {} seconds".format(elapsed))
print("Errors = ", round(error*100, 2), '%', sep='')

0.44569596316698185
0.44569596316698185
0.44569596316698185
0.44569596316698185
0.44569596316698185
0.44569596316698185
0.44569596316698185
0.44569596316698185
0.44569596316698185
0.44569596316698185
0.44569596316698185
0.44569596316698185
0.44569596316698185
0.44569596316698185
0.44569596316698185
0.44569596316698185
0.44569596316698185
0.44569596316698185
0.44569596316698185
0.44569596316698185
Execution completed in 101.96 seconds
Errors = 80.41%


In [7]:
num = np.random.choice(test_input.index.values)

print(predict(np.array(test_input.loc[num]), trained_weights))
print(test_labels[num])

1
1


In [8]:
print(errors(trained_weights, test_input, test_labels))
print(errors(weights, test_input, test_labels))

0.8041440552540701
0.8041440552540701


In [16]:
ran_weights = [round(np.random.normal(), 2) for i in range(len(train_input.columns))]
ran_loc = np.random.choice(train_input.index.values)
print(errors(ran_weights, train_input, train_labels)*100)
print(errors(ran_weights + np.array(train_input.loc[ran_loc]) * train_labels[ran_loc], train_input, train_labels)*100)

71.74216887281098
44.56959631669818
