# Perceptron class in python

In [3]:
import numpy as np

class Perceptron(object):
    """Perceptron classifier.
    
    Parameters
    ----------
    eta: float
        Learning rate (between 0.0 and 1.0)
    n_iter : int
        Passes over the training dataset.
        
    Attributes
    ----------
    w_ : 1d-array
        Weights after fitting.
    errors_: list
        Number of misclassifications in every epoch.
    
    """
    def __init__(self, eta=0.01, n_iter=10):
        self.eta = eta
        self.n_iter = n_iter
        
    def fit(self, X, y):
        """Fit training data.

        Parameters
        ----------
        X : {array-like}, shape = [n_samples, n_features]
            Training vectors, where n_samples is the 
            number of samples and n_features is the 
            number of features.
        y : array-like, shape = [n_samples]
            Target values.

        Returns
        -------
        self : object

        """
        
        self.w_ = np.zeros(1 + X.shape[1])
        self.errors_ = []
        
        for _ in range(self.n_iter):
            errors = 0
            for xi, target in zip(X, y):
                update = self.eta * (target - self.predict(xi))
                self.w_[1:] += update * xi
                self.w_[0] += update
                errors += int(update != 0.0)
            self.errors_.append(errors)
        return self
    
    def net_input(self, X):
        """Calculate net input or (w^t)x"""
        return np.dot(X, self.w_[1:]) + self.w[0]
    
    def predict(self, X):
        """Return class label after unity step"""
        return np.where(self.net_input(X) >= 0.0, 1, -1)


## Explanation for net_input

The self.w_[0] is basically the "threshold" or so-called "bias unit." I simply included the bias unit in the weight vector, which makes the math part easier, but on the other hand, it may make the code more confusing as you mentioned.

Let's say we have a 3x2 dimensional dataset X (3 training samples with 2 features). Also, let's just assume we have a weight 2 for feature 1 and a weight 3 for feature 2, and we set the bias unit to 4. This is how you would normally do it.

In [23]:
bias = 4
X = np.array([[2.,3.], [4.,5.], [6.,7.]])
w = np.array([bias, 2., 3.])
print("X =\n",X)
print("w =\n",w)

X =
 [[ 2.  3.]
 [ 4.  5.]
 [ 6.  7.]]
w =
 [ 4.  2.  3.]


In order to match the mathematical notation, we would have to add a vector of 1s to compute the dot-product:

In [25]:
ones = np.ones((X.shape[0], 1))
print("vector of ones = \n",ones)

X_with1 = np.hstack((ones, X))
print("X with 1 = \n", X_with1)
dotProd = np.dot(X_with1, w)

print("value from dot product = \n", dotProd)


vector of ones = 
 [[ 1.]
 [ 1.]
 [ 1.]]
X with 1 = 
 [[ 1.  2.  3.]
 [ 1.  4.  5.]
 [ 1.  6.  7.]]
value from dot product = 
 [ 17.  27.  37.]


However, I thought that adding a vector of 1s to the training array each time we want to make a prediction would be fairly inefficient. So, instead, we can just "add" the bias unit (w[0]) to the do product (it's equivalent, since 1.0 * w_0 = w_0:


In [26]:
efficient = np.dot(X, w[1:]) + w[0]
print(efficient)

[ 17.  27.  37.]


#### matrix operation