In [14]:
import pandas as pd

df = pd.read_csv('bank.csv', header=None)
df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16
0,age,job,marital,education,default,balance,housing,loan,contact,day,month,duration,campaign,pdays,previous,poutcome,y
1,30,unemployed,married,primary,no,1787,no,no,cellular,19,oct,79,1,-1,0,unknown,no
2,33,services,married,secondary,no,4789,yes,yes,cellular,11,may,220,1,339,4,failure,no
3,35,management,single,tertiary,no,1350,yes,no,cellular,16,apr,185,1,330,1,failure,no
4,30,management,married,tertiary,no,1476,yes,yes,unknown,3,jun,199,4,-1,0,unknown,no


In [33]:
import numpy as np


class Perceptron(object):
    """Perceptron classifier.

    Parameters
    ------------
    eta : float
      Learning rate (between 0.0 and 1.0)
    n_iter : int
      Passes over the training dataset.
    random_state : int
      Random number generator seed for random weight
      initialization.

    Attributes
    -----------
    w_ : 1d-array
      Weights after fitting.
    errors_ : list
      Number of misclassifications (updates) in each epoch.

    """
    def __init__(self, eta=0.01, n_iter=50, random_state=1):
        self.eta = eta
        self.n_iter = n_iter
        self.random_state = random_state

    def fit(self, X, y):
        """Fit training data.

        Parameters
        ----------
        X : {array-like}, shape = [n_samples, n_features]
          Training vectors, where n_samples is the number of samples and
          n_features is the number of features.
        y : array-like, shape = [n_samples]
          Target values.

        Returns
        -------
        self : object

        """
        rgen = np.random.RandomState(self.random_state)
        self.w_ = rgen.normal(loc=0.0, scale=0.01, size=1 + X.shape[1])
        self.errors_ = []

        for _ in range(self.n_iter):
            errors = 0
            for xi, target in zip(X, y):
                update = self.eta * (target - self.predict(xi))
                self.w_[1:] += update * xi
                self.w_[0] += update
                errors += int(update != 0.0)
            self.errors_.append(errors)
        return self

    def net_input(self, X):
        """Calculate net input"""
        return np.dot(X, self.w_[1:]) + self.w_[0]

    def predict(self, X):
        """Return class label after unit step"""
        return np.where(self.net_input(X) >= 0.0, 1, 0)

In [42]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np

# select setosa and versicolor
y = df.iloc[1:100,16].values
y = np.where(y == 'no', 0, 1)

# extract sepal length and petal length
X = df.iloc[1:100, [0,4,5,6,7]].values
X[:,0] =  map(int,X[:,0])
X[:,1] = np.where(X[:,1] == 'no',0,1)
X[:,2] =  map(int,X[:,2])
X[:,3] = np.where(X[:,3] == 'no',0,1)
X[:,4] = np.where(X[:,4] == 'no',0,1)


In [43]:
ppn = Perceptron(eta=0.1, n_iter=10)

ppn.fit(X, y)

plt.plot(range(1, len(ppn.errors_) + 1), ppn.errors_, marker='o')
plt.xlabel('Epochs')
plt.ylabel('Number of updates')

# plt.savefig('images/02_07.png', dpi=300)
plt.show()

TypeError: ufunc 'add' output (typecode 'O') could not be coerced to provided output parameter (typecode 'd') according to the casting rule ''same_kind''

In [44]:
X

array([[30, 0, 1787, 0, 0],
       [33, 0, 4789, 1, 1],
       [35, 0, 1350, 1, 0],
       [30, 0, 1476, 1, 1],
       [59, 0, 0, 1, 0],
       [35, 0, 747, 0, 0],
       [36, 0, 307, 1, 0],
       [39, 0, 147, 1, 0],
       [41, 0, 221, 1, 0],
       [43, 0, -88, 1, 1],
       [39, 0, 9374, 1, 0],
       [43, 0, 264, 1, 0],
       [36, 0, 1109, 0, 0],
       [20, 0, 502, 0, 0],
       [31, 0, 360, 1, 1],
       [40, 0, 194, 0, 1],
       [56, 0, 4073, 0, 0],
       [37, 0, 2317, 1, 0],
       [25, 0, -221, 1, 0],
       [31, 0, 132, 0, 0],
       [38, 0, 0, 1, 0],
       [42, 0, 16, 0, 0],
       [44, 0, 106, 0, 0],
       [44, 0, 93, 0, 0],
       [26, 0, 543, 0, 0],
       [41, 0, 5883, 0, 0],
       [55, 0, 627, 1, 0],
       [67, 0, 696, 0, 0],
       [56, 0, 784, 0, 1],
       [53, 0, 105, 0, 1],
       [68, 0, 4189, 0, 0],
       [31, 0, 171, 0, 0],
       [59, 0, 42, 0, 0],
       [32, 0, 2536, 1, 0],
       [49, 0, 1235, 0, 0],
       [42, 0, 1811, 1, 0],
       [78, 0, 229, 0