### Towards data science's version of the perceptron algorithm  

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.axes as ax

"""def get_line_point(W, delimit = 0):
    shape = np.shape(W)   
    W = W / W[0].item(0)
    delimit_1_np = np.array([delimit])                 
    return np.dot(W,np.hstack((delimit_1_np, np.random.random(shape).transpose()))).item(0)
"""
def load_data():
    URL_= 'iris.data'
    data = pd.read_csv(URL_, header = None)

    # make the dataset linearly separable
    data = data[:100]
    
    data[4] = np.where(data.iloc[:, -1]=='Iris-setosa', 0, 1)
    
    data = np.asmatrix(data, dtype = 'float64')
    return data

data = load_data()

plt.scatter(np.array(data[:50,0]), np.array(data[:50,2]), marker='o', label='setosa')
plt.scatter(np.array(data[50:,0]), np.array(data[50:,2]), marker='x', label='versicolor')
plt.xlabel('petal length')
plt.ylabel('sepal length')
plt.legend()
plt.show()

def perceptron(data, num_iter):
    features = data[:, :-1]
    labels = data[:, -1]
    # set weights to zero
    w = np.zeros(shape=(1, features.shape[1]+1))
    misclassified_ = []
    counter = 0
    W_saved = []
    for epoch in range(num_iter):
        misclassified = 0
        for x, label in zip(features, labels):
            x = np.insert(x,0,1)
            y = np.dot(w, x.transpose())
            target = 1.0 if (y > 0) else 0.0
            delta = (label.item(0,0) - target)

            if(delta): # misclassified
                misclassified += 1
                w += (delta * x)
        W_saved.append(w)
        misclassified_.append(misclassified)

    return (w, misclassified_, W_saved)

num_iter = 10
w, misclassified_, W_saved = perceptron(data, num_iter)
#print(w)
#print(get_line_point(w))

plt.scatter(np.array(data[:50,0]), np.array(data[:50,2]), marker='o', label='setosa')
plt.scatter(np.array(data[50:,0]), np.array(data[50:,2]), marker='x', label='versicolor')

plt.xlabel('petal length')
plt.ylabel('sepal length')
plt.legend()
plt.show()


epochs = np.arange(1, num_iter+1)
plt.plot(epochs, misclassified_)
plt.xlabel('iterations')
plt.ylabel('misclassified')
plt.show()


import matplotlib.pyplot as plt
import matplotlib.lines as mlines

def newline(p1, p2):
    ax = plt.gca()
    xmin, xmax = ax.get_xbound()

    if(p2[0] == p1[0]):
        xmin = xmax = p1[0]
        ymin, ymax = ax.get_ybound()
    else:
        ymax = p1[1]+(p2[1]-p1[1])/(p2[0]-p1[0])*(xmax-p1[0])
        ymin = p1[1]+(p2[1]-p1[1])/(p2[0]-p1[0])*(xmin-p1[0])

    l = mlines.Line2D([xmin,xmax], [ymin,ymax])
    ax.add_line(l)
    return l

x = np.linspace(0,10)
y = x

p1 = [4.5, 20]
p2 = [6,70]

plt.plot(x, y)
newline(p1,p2)
plt.show()

In [None]:
data = pd.read_csv("iris.data")

data.info()

In [None]:
data.head()

In [None]:
data.tail(100)

In [None]:

"""print(data[49:98, [0]])
print(type(data))
print("="*50)
print(data[98:149, 2:3])
"""
#data.iloc[90:105]
len(data.iloc[99:148, 2])

In [None]:
plt.scatter(np.array(data.iloc[49:98, 0]), np.array(data.iloc[49:98, 2]), marker='o', label='versicolor')
plt.scatter(np.array(data.iloc[99:148, 0]), np.array(data.iloc[99:148, 2]), marker='x', label='virginica')

plt.xlabel('petal length')
plt.ylabel('sepal length')
plt.legend()
plt.show()

### Udacity's version of the perceptron algorithm 

In [None]:


import numpy as np

np.random.seed(42)

def stepFunction(t):
    """ Determines if 
    -----------
    Args: 
        t (int): The result of the product of w and a trainning example 
    
    Returns:
           int: 1 if greater than threshold (0 in this case), 0 otherwise.""" 
    if t >= 0:
        return 1
    return 0

def prediction(X, W, b):
    """ Determines if 
    -----------
    Args: 
         X (np array): A numpy array with shape (m,) that represents one trainning example of the features matrix   

         W (np array): A numpy array with shape (m,) that represents the weight of the classifier

         b (float): Represent the bias  
     
    Returns:
           int: 0 or 1 depending of step function. See stepFunction()""" 
    return stepFunction((np.matmul(X,W)+b)[0])

def perceptronStep(X, y, W, b, learn_rate = 0.01):
    """ Determines if 
    -----------
    Args: 
         X (np array): A numpy array/matrix of features of the data with shape (n,m) such that m is the same 
                       as the size of W(weights vector).

         y (int): The label(i.e. classification of 1 or 0) of the trainning sample  

         W (np array): An array of size (m,) where m is any integer as long as it is compatible with the matrix X

         b (float): Represent the bias  

         learn_rate (float): Learning rate number for trainning the data

    Returns:
           (np array): An array W of size (m,) where m is any integer as long as it is compatible
                       with the matrix X. This array is intended to be the set of weigth for the 
                       already trainned model 
           
           (float) : A number that represent the bias the bias of the trained clasifier""" 
    for i in range(len(X)):   
        y_hat = prediction(X[i],W,b)
        if y[i]-y_hat == 1:
            W[0] += X[i][0]*learn_rate
            W[1] += X[i][1]*learn_rate
            b += learn_rate
        elif y[i]-y_hat == -1:
            W[0] -= X[i][0]*learn_rate
            W[1] -= X[i][1]*learn_rate
            b -= learn_rate
    return W, b

def trainPerceptronAlgorithm(X, y, learn_rate = 0.01, num_epochs = 25):
    """ Determines if 
    -----------
    Args: 

         X (np array): A numpy array/matrix of features of the data with shape (n,m) such that m is the same 
                       as the size of W(weights vector).

         y (int): The label(i.e. classification of 1 or 0) of the trainning sample  

         learn_rate (float): Learning rate number for trainning the data

         num_epochs (int): number of times that the weights will go through the perceptron step for them to
                           take more precise values 

    Returns:
        
        (list): boundary_lines will be a list of tuples (m,b) where m is the slope and b is the intercept of the
                slope intercept formula of a line"""  
    
    x_min, x_max = min(X.T[0]), max(X.T[0]) # The .T[i] object of the np matrix returns the ith column of the matrix
    y_min, y_max = min(X.T[1]), max(X.T[1])
    W = np.array(np.random.rand(2,1))
    b = np.random.rand(1)[0] + x_max
    # These are the solution lines that get plotted below.
    boundary_lines = []
    for i in range(num_epochs):
        # In each epoch, we apply the perceptron step.
        W, b = perceptronStep(X, y, W, b, learn_rate)
        boundary_lines.append((-W[0]/W[1], -b/W[1]))
    return boundary_lines
