In [18]:
# conventional way to import pandas
import pandas as pd
import numpy as np

# read CSV file from the 'data' subdirectory using a relative path
cols=['sepal length','sepal width','petal length','petal width','class']
df = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data', header=None,names=cols)

In [19]:
df.columns

Index(['sepal length', 'sepal width', 'petal length', 'petal width', 'class'], dtype='object')

In [20]:
features=['sepal length', 'sepal width', 'petal length', 'petal width']

X=df[features]
Y=df['class']

# Splitting X and Y into training and testing sets

In [21]:

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, Y, random_state=1,test_size=0.3)

Apply Scaling to train set and test set both Xs

In [22]:
from sklearn.preprocessing import StandardScaler
sc=StandardScaler()
sc.fit(X_train)
X_train_std=sc.transform(X_train)
X_test_std=sc.transform(X_test)

# 1. Perceptron Model

In [27]:
from sklearn.linear_model import Perceptron
ppn=Perceptron(n_iter=40,eta0=0.1,random_state=0)
ppn.fit(X_train_std,y_train)

Perceptron(alpha=0.0001, class_weight=None, eta0=0.1, fit_intercept=True,
      max_iter=None, n_iter=40, n_jobs=1, penalty=None, random_state=0,
      shuffle=True, tol=None, verbose=0, warm_start=False)

# Make Classification on Test Set

In [28]:
y_pred=ppn.predict(X_test_std)
print('Misclassified Samples: %d' % (y_test!=y_pred).sum())

Misclassified Samples: 2


In [30]:
from sklearn.metrics import accuracy_score
print('Accuracy of Perceptron Model: %.2f' % accuracy_score(y_test,y_pred))

Accuracy of Perceptron Model: 0.96


In [35]:
mods=[]
vals=[]
mods.append('Perceptron Model')
vals.append(accuracy_score(y_test,y_pred).round(2))

# 2. Logistic Regression

In [38]:
from sklearn.linear_model import LogisticRegression

lr=LogisticRegression(C=1000.0,random_state=0)
lr.fit(X_train_std,y_train)

y_pred=lr.predict(X_test_std)
print('Misclassified Samples: %d' % (y_test!=y_pred).sum())

from sklearn.metrics import accuracy_score
print('Accuracy of Logistic Regression Model: %.2f' % accuracy_score(y_test,y_pred))

Misclassified Samples: 2
Accuracy of Logistic Regression Model: 0.96


In [39]:
mods.append('Logistic Regression Model')
vals.append(accuracy_score(y_test,y_pred).round(2))

# 3. Support Vector Machine

In [43]:
from sklearn.svm import SVC

svm=SVC(kernel='linear',random_state=0,C=1.0)
svm.fit(X_train_std,y_train)

y_pred=svm.predict(X_test_std)
print('Misclassified Samples: %d' % (y_test!=y_pred).sum())

from sklearn.metrics import accuracy_score
print('Accuracy of Support Vector Machine Model: %.2f' % accuracy_score(y_test,y_pred))


Misclassified Samples: 1
Accuracy of Support Vector Machine Model: 0.98


In [44]:
mods.append('Support Vector Machine')
vals.append(accuracy_score(y_test,y_pred).round(2))

In [45]:
# For visualization
from bokeh.plotting import figure, output_file, show
from bokeh.models import ColumnDataSource, HoverTool
from bokeh.palettes import Spectral6, brewer
from bokeh.transform import factor_cmap

ser_df=pd.DataFrame({'Model': mods, 'Accuracy_Value': vals})

In [46]:
source = ColumnDataSource(ser_df)

p = figure(x_range=mods, plot_width=700, plot_height=500)
color_map = factor_cmap(field_name='Model', palette=Spectral6, factors=mods)
p.vbar(x='Model', top='Accuracy_Value', source=source, width=0.70, color=color_map)

p.title.text ='Comparison of Models'
p.xaxis.axis_label = 'Model Types'
p.yaxis.axis_label = "Accuracy value of different models"

show(p)