In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

## Loading the iris dataset with only two classes - Setosa and Virginica so that they are linearly seperable

In [2]:

URL_='https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
data = pd.read_csv(URL_, header = None)

data = data[:100]
data[4] = np.where(data.iloc[:, -1]=='Iris-setosa', 0, 1)

data.head()

Unnamed: 0,0,1,2,3,4
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0


In [3]:
X = data.iloc[:,0:4]
Y = data.iloc[:,-1]

X = X.to_numpy()
Y = Y.to_numpy()

print(X.shape, Y.shape)

(100, 4) (100,)


## Creating a Perceptron model

In [4]:
from sklearn.linear_model import Perceptron

# define model
pctr = Perceptron()

pctr.fit(X, Y)

print("Accuracy Score:", pctr.score(X, Y))


Accuracy Score: 1.0


**Score is 1 because the data is perfectly linearly seperable**

## Checking on other data

In [5]:
from sklearn.datasets import make_classification

X, Y = make_classification(n_samples=1000, n_features=10, n_informative=10, n_redundant=0, random_state=1)

print(X.shape, Y.shape)

(1000, 10) (1000,)


In [6]:
pctr = Perceptron()

pctr.fit(X, Y)

print("Accuracy Score:", pctr.score(X, Y))

Accuracy Score: 0.863


**Since the data is not perfectly linearly seperable so the Accuracy is decreased. Lets use cross-validation for tuning some hyper parameters**

In [7]:
from sklearn.model_selection import GridSearchCV, RepeatedStratifiedKFold

model = Perceptron()
# define model evaluation method
cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=1)

grid = dict()
grid['eta0'] = [0.0001, 0.001, 0.01, 0.1, 1.0] # eta0 is the Learning Rate

gSearch = GridSearchCV(model, grid, scoring='accuracy', cv=cv, n_jobs=-1)

results = gSearch.fit(X, Y)
# summarize
print('Mean Accuracy: ', round(results.best_score_, 3))
print('Config: ', results.best_params_)
# summarize all
means = results.cv_results_['mean_test_score']
params = results.cv_results_['params']
for mean, param in zip(means, params):
    print("%.3f with: %r" % (mean, param))

Mean Accuracy:  0.857
Config:  {'eta0': 0.0001}
0.857 with: {'eta0': 0.0001}
0.857 with: {'eta0': 0.001}
0.853 with: {'eta0': 0.01}
0.847 with: {'eta0': 0.1}
0.847 with: {'eta0': 1.0}
