In [1]:
from sklearn.datasets import load_iris
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
iris_data = load_iris()

# Create Dataframe
data = pd.DataFrame(iris_data.data, columns=iris_data.feature_names)
data['target'] = iris_data.target

In [3]:
data.iloc[[0, 90, 149]]

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),target
0,5.1,3.5,1.4,0.2,0
90,5.5,2.6,4.4,1.2,1
149,5.9,3.0,5.1,1.8,2


In [4]:
data.shape

(150, 5)

In [5]:
data['target'].value_counts()

target
0    50
1    50
2    50
Name: count, dtype: int64

### Now we know that the Perceptron can only work on the Linearly-Seperable dataset, so we have to make our Iris dataset Linear-Sepratable. 
The Iris dataset contains 3 classes:

1. Iris Setosa (class 0)

2. Iris Versicolor (class 1)

3. Iris Virginica (class 2) 

#### Now Class 0 (Setosa) vs Class 1 or 2: is Linearly separable, so we will label Setosa as 1 and others as 0, and our main objective will be to tell whether the given i/p of the flower is of Setosa or not.

In [6]:
data.loc[(data['target'] == 1) | (data['target'] == 2), 'target'] = 3
data.loc[data['target'] == 0, 'target' ] = 1
data.loc[data['target'] == 3, 'target' ] = 0

In [7]:
data.iloc[[0, 90, 149]]

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),target
0,5.1,3.5,1.4,0.2,1
90,5.5,2.6,4.4,1.2,0
149,5.9,3.0,5.1,1.8,0


In [8]:
from sklearn.model_selection import train_test_split

In [9]:
x = data.drop(columns = 'target', axis=1)
y = data['target']

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size= 0.1, random_state = 42)
print(x_train.shape, x_test.shape, y_train.shape, y_test.shape)

(135, 4) (15, 4) (135,) (15,)


In [10]:
from sklearn.linear_model import Perceptron

In [11]:
clf = Perceptron(max_iter=1000, eta0=0.1, random_state=42)

clf.fit(x_train, y_train)

In [12]:
y_pred = clf.predict(x_test)

In [13]:
comparision = pd.DataFrame( {'Actual': y_test, 'Predicted':y_pred})

comparision

Unnamed: 0,Actual,Predicted
73,0,0
18,1,1
118,0,0
78,0,0
76,0,0
31,1,1
64,0,0
141,0,0
68,0,0
82,0,0


In [14]:
clf.coef_

array([[ 0.14,  0.36, -0.8 , -0.44]])

In [15]:
clf.intercept_

array([0.1])

### We can see here that only sepal length and sepal width are the most influencing features for the o/p by the perceptron, as seen by their weight's value. While the feature 3 and 4 are the least influencing ones.