# Perceptron Learning Algorithm, Supervised learning method: 
### A break-down of Rosenblatt's simple, crude Perceptron in python,  with visualizations in matplotlib and Bokeh

In [67]:
import numpy as np
import matplotlib.pyplot as plt
import random
import pandas as pd
%matplotlib
# to view graphing inside the cell use ''%matplotlib inline' instead of ''%matplotlib'

Using matplotlib backend: TkAgg


In [68]:
# This 'χ' stores multiple records as a matrix for the machine to learn from.  
# Each row of the matrix is a piece of data, a record, an observation, event, etc.
# First column are all 1's for multiplying with the bias weights
# Second column contains feature 1 (x1) of each record
# Third column contains feature 2 (x2) of each record
# χ = {1} x ℝ^2 = {[x0,x1,...,xd]T | x0=1, x1 ∈ ℝ,..., xd ∈ ℝ } # Create 20x3 matrix x0,x1,x2 (20 observations):
X = np.matrix([np.asarray([1]*20),np.random.sample(size=20)*10,np.random.sample(size=20)*10]) 

In [69]:
# Since this is supervised learning, each record needs an associated 'answer' or target which will let the
# Perceptron know what the correct solution is for each record. This essentially 'guides' the machine to
# infer a kind of framework to recreate the targets that are given and make new solutions or predictions from
# new unseen data records.  Colors are assigned for your viewing.  I arbitrarily have chosen a formula to assign
# each record a class.  If the two features add up to at least 8, then this data record is part of class 1 (red)
# and if the data record's features add up to a value less than 8 then this record is part of class -1 (blue)

# initialize lists for targets and target color(visualization)
targets = []
target_colors=[]

# Record lists of data points for interactive bokeh plot:
x1_class_red  = []
x1_class_blue = []
x2_class_red  = []
x2_class_blue = []

# This loop sets up the true conditions/decision criteria of 'reality'
sum_ = sum(X[1], X[2]).tolist()[0]
for i in range(0,np.size(X[0])):
    if sum_[i] >= 8:
        targets.append(1)
        target_colors.append('r')
        x1_class_red.append(X.tolist()[1][i])
        x2_class_red.append(X.tolist()[2][i])
    else:
        targets.append(-1)
        target_colors.append('b')
        x1_class_blue.append(X.tolist()[1][i])
        x2_class_blue.append(X.tolist()[2][i])

In [70]:
# create row vector for weights, w0,w1,w2; w0=bias.
# The perceptron will determine the weights w0,w1,w2 which are 'learned' from the data features and targets above.
# The perceptron will find weights which are also called coefficients, this coefficients add WEIGHT to a feature,
# giving it more value or less (a common simple example is house price where size of house and lot size would  
# be given heavy weight in determining the price (target) of a house  )
weights = np.matrix([np.random.sample(size=3)*10] )                  
# set bias w0 to an arbitrary value of '2.5', remaining two weights are for weighting the 2 dimensions of X 
weights[0,0]=2.5

#print via pandas dataframe for easy viewing of data:
pd.DataFrame(
    {'[w0 (bias), w1, w2]': [weights.tolist()[0] for i in range(20)],
     'x0': X.tolist()[0],
     'x1': X.tolist()[1],
     'x2': X.tolist()[2]
    }
);

In [71]:
for i in range(0,np.size(X[0])):
    print(str(X[0,i])+"  "+ str(X[1,i]) +"  "+ str(X[2,i]) +"  " + str(targets[i]))
    plt.scatter(
        X[1].tolist()[0],
        X[2].tolist()[0],
        c=target_colors
    );
# Can you SEE in the plotted graph that the 2 classes of data are linearly separable?

1.0  1.913002791702505  9.98473162267954  1
1.0  5.626597220698795  8.011360240204276  1
1.0  1.9445233606166168  1.9658301965762748  -1
1.0  4.986459818910354  0.9804553992221232  -1
1.0  0.48590019060458567  3.237372282400851  -1
1.0  3.797791704107901  5.299340749282509  1
1.0  8.590531462847837  1.4843454133065181  1
1.0  2.6519319533550023  0.18999160035520934  -1
1.0  8.003198978088289  2.5504746575508697  1
1.0  7.386994206308821  0.9289506075971155  1
1.0  7.249296046131306  7.131244595178874  1
1.0  5.776402969307491  4.123316850019436  1
1.0  5.749178372647771  1.492239446173813  -1
1.0  2.167316754797255  9.856051117513832  1
1.0  1.1816572846018736  7.967764875779554  1
1.0  7.165791572909839  5.962844091576772  1
1.0  9.26237823474912  9.105485307500018  1
1.0  1.3117471364588307  5.956723862086998  -1
1.0  6.023537163268524  9.37646101683574  1
1.0  9.960491330259241  7.560986691294964  1


In [74]:
def predict(X,weights):
    # Use current weights as 'hypothesis' and predict
    predictions = []
    prediction  = 0
    #predict for each element in data set:
    for i in range(0,np.size(X[0])):
        prediction = np.dot(weights,X.T[i].T) 
        if prediction >= 0: # decision boundary is at zero 
            predictions.append(1) # if non-negative, predict class 1 or red based off current hypothesis
        else:
            predictions.append(-1)    # else, class -1 or blue
    return predictions

def adjust_weights(X,predictions, weights,targets):
    new_weights = weights
    for i in range(0, np.size(targets)):
        if predictions[i] != targets[i]:
            for j in range(0,np.size(weights)):
                new_weights[0,j] = weights[0,j] + targets[i]*X[j,i]
    return new_weights

def graph(weights,X, x_coord_hist,y_coord_hist):
    w0, w1, w2 = weights[0,0],weights[0,1],weights[0,2]
    # set x2 or 'y' to zero to find x1 or 'x' :
    # w0x0 + w1x1 + w2x2 = 0
    # w0x0 = b , (the bias) ->  b + w1x1 + w2x2 = 0
    # b + w1x1 + w2x2 = 0
    # w1x1 + w2(0) = -b
    # w1x1 + 0 = -b
    # x1 = -b / w1
    x1 = (-w0) / w1
    #set x2 or 'y' to zero to find x1 or 'x'
    x2 = (-w0) / w2
    # find slope and y-intercept of this 2-dimensional visualization
    # y= mx + b
    # 1. x=0 :     y = m(0) + b, so b = y  (0,b) 
    # 2. b=0 :     0 = m(x) + b, so m = -b / x
    # for this program : the feature along horizontal axis is x1, so x1 = x
    # for this program : the feature along vertical   axis is x2, so x2 = b
    # b = x2 and m = -x2 / x1
    b = x2
    m = -x2 / x1
    x_coords = np.array([0,x1])
    y_coords = m * x_coords + b
    #now we have two points (0,x2) and (x1,0)
    axes = plt.gca()
    axes.set_xlim([-1,13])
    axes.set_ylim([-1,13])
    plt.clf()
    plt.plot(x_coords, y_coords)
    x_coord_hist.append(x_coords[1])
    y_coord_hist.append(y_coords[0])
    plt.scatter(
        X[1].tolist()[0],
        X[2].tolist()[0],
        c=target_colors
    )
    # pause so humans can see some of the process
    plt.pause(.1)
    plt.show()

In [75]:
# track number of learning steps
epochs=0
predictions=[]
#keep track of each line for interactive bokeh plot in 'line_hist'
x_coord_hist = []
y_coord_hist = []

# after each step or the fancy machine learning jargon 'epoch', I plot the learning process, which 
# shows how the machine is deciding to classify each class (you'll see the 'decision boundary move and 
# eventually settle between the classes (between the blue and red dots) ).
# this decision boundary is an equation of a line.
# The result of the program is an equation of the format (x0w0 + x1w1 + x2w2 = 0)
# The program will create something and equation similar to reality x0(-8) + x1(1) + x2(1), which is our original
# decision boundary, however, the machine doesn't know this and is instead, infering statistics (weights)
# from the data.  Given more data points, in particular, data near the true boundary (line from 0,8 to 8,0), 
# the Perceptron would learn a model with a decision boundary closer to the true boundary.

# Perceptron 'learning' : 1. predict 2. adjust weights and repeat untill all predicted values are equivalent
# to the actual targets:
while not (np.array_equal(predictions,targets)):
    predictions = predict(X,weights)
    adjust_weights(X,predictions, weights,targets)
    epochs+=1
    plt.show()
    graph(weights,X,x_coord_hist,y_coord_hist)

In [76]:
print("Perceptron's calculated weights in",epochs,"epochs: \nw0:",weights[0,0],"\nw1:",weights[0,1],"\nw2:",weights[0,2])
print("\nThe resulting formula (plug in the existing points (x1,x2) to verify or use new data to classify \
new data):")
print(weights[0,0]," + x1(",weights[0,1],") + x2(",weights[0,2],")")
print("\nValues less than zero are below the decision boundary and are of classified as class -1 (blue) while \
values greater than or equal to zero are of class +1 (red)")

Perceptron's calculated weights in 62 epochs: 
w0: -112.5 
w1: 14.033955352694212 
w2: 14.442091798738941

The resulting formula (plug in the existing points (x1,x2) to verify or use new data to classify new data):
-112.5  + x1( 14.033955352694212 ) + x2( 14.442091798738941 )

Values less than zero are below the decision boundary and are of classified as class -1 (blue) while values greater than or equal to zero are of class +1 (red)


In [77]:
from bokeh.layouts import row, widgetbox
from bokeh.models import CustomJS, Slider
from bokeh.plotting import figure, output_file, show, ColumnDataSource
from bokeh.io import output_file, show
from bokeh.layouts import widgetbox
from bokeh.models.widgets import Button
from bokeh.events import ButtonClick

source_decision_boundary = ColumnDataSource(data=dict(x=[2,0], y=[0,4]))
source_update_boundary   = ColumnDataSource(data=dict(x=x_coord_hist, y=y_coord_hist))
source_class_red         = ColumnDataSource(data=dict(x=x1_class_red,     y=x2_class_red))
source_class_blue        = ColumnDataSource(data=dict(x=x1_class_blue,    y=x2_class_blue))

plot = figure(y_range=(-1, 13), x_range=(-1,13), plot_width=400, plot_height=400)
plot.line('x', 'y', source=source_decision_boundary, line_width=3, line_alpha=0.6)
plot.circle(x='x', y='y',source=source_class_red,  size=10, color="red",  alpha=0.5)
plot.circle(x='x', y='y',source=source_class_blue, size=10, color="blue", alpha=0.5)

callback = CustomJS(args=dict(source_db  = source_decision_boundary,
                              source_db_update  = source_update_boundary,
                              source_red = source_class_red,
                              source_blue= source_class_blue
                             ),
                    code="""
                    var data = source_db.data;
                    var data_update = source_db_update.data;
                    var t = epoch_slider.value;
                    var x = data['x']
                    var y = data['y']
                    x[0] = data_update['x'][t]
                    y[1] = data_update['y'][t]
                    source_db.change.emit();
                    """)


epoch_slider = Slider(start=0, end=epochs-1, value=0, step=1,
                       title="Accelerate Epoch", callback=callback)

callback.args["epoch_slider"] = epoch_slider
layout = column(
    plot,
    widgetbox(epoch_slider,div)
)

output_file("Primitive_Perceptron_model.html", title="'Primitive' (1960's) Perceptron model")
show(layout)