In [2]:
import pandas as pd 
import numpy as np 

df = pd.read_csv('iris.csv', header = None)
df[4] = df[4].replace({"Iris-setosa": 0, 'Iris-versicolor': 1, 'Iris-virginica':2})
columns = {0:'sepal length', 1:'sepal width', 2:'petal length', 3:'petal width', 4:'species'}
df = df.rename(columns=columns)
df

Unnamed: 0,sepal length,sepal width,petal length,petal width,species
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,2
146,6.3,2.5,5.0,1.9,2
147,6.5,3.0,5.2,2.0,2
148,6.2,3.4,5.4,2.3,2


# Key
0 = 'Iris-setosa'
1 = 'Iris-versicolor'
2 = 'Iris-virginica'

In [3]:
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

XX = df[['sepal length', 'sepal width']] # features
YY = df['species'] # output

XX_train, XX_test, YY_train, YY_test = train_test_split(XX, YY, test_size=0.20, random_state=662)

In [4]:
from sklearn.linear_model import SGDClassifier

# SGD Classifier with Hinge Function
SGDclf = SGDClassifier(loss='hinge')
SGDclf.fit(XX_train,YY_train)
test_predictions = SGDclf.predict(XX_test)
train_predictions = SGDclf.predict(XX_train)
test_accuracy = accuracy_score(YY_test, test_predictions)*100
train_accuracy = accuracy_score(YY_train, train_predictions)*100

print(f"SGD Classifier with Hinge Function Test Accuracy: {test_accuracy}")
print(f"SGD Classifier with Hinge Function Train Accuracy: {train_accuracy}")

SGD Classifier with Hinge Function Test Accuracy: 60.0
SGD Classifier with Hinge Function Train Accuracy: 69.16666666666667


In [30]:
# SGD Classifier with Modified Huber Function
SGDclf = SGDClassifier(loss='modified_huber')
SGDclf.fit(XX_train,YY_train)
test_predictions = SGDclf.predict(XX_test)
train_predictions = SGDclf.predict(XX_train)

test_accuracy = accuracy_score(YY_test, test_predictions)*100
train_accuracy = accuracy_score(YY_train, train_predictions)*100

print(f"SGD Classifier with Modified Huber Function Test Accuracy: {test_accuracy}")
print(f"SGD Classifier with Modified Huber Function Train Accuracy: {train_accuracy}")

SGD Classifier with Modified Huber Function Test Accuracy: 63.33333333333333
SGD Classifier with Modified Huber Function Train Accuracy: 70.0


In [29]:
# Logistic Regression
from sklearn.linear_model import LogisticRegression
logistic_clf = LogisticRegression().fit(XX_train, YY_train)

test_predictions = logistic_clf.predict(XX_test)
train_predictions = logistic_clf.predict(XX_train)

test_accuracy = accuracy_score(YY_test, test_predictions)*100
train_accuracy = accuracy_score(YY_train, train_predictions)*100

print(f"Logistic Regression Test Accuracy: {test_accuracy}")
print(f"Logistic Regression Train Accuracy: {train_accuracy}")

Logistic Regression Test Accuracy: 80.0
Logistic Regression Train Accuracy: 82.5


In [28]:
# Support Vector Machine with Linear Kernel 
from sklearn.svm import SVC
svc_clf = SVC(kernel = 'linear')
svc_clf.fit(XX_train, YY_train)

test_predictions = svc_clf.predict(XX_test)
train_predictions = svc_clf.predict(XX_train)

test_accuracy = accuracy_score(YY_test, test_predictions)*100
train_accuracy = accuracy_score(YY_train, train_predictions)*100

print(f"Support Vector Machine with Linear Kernel Test Accuracy: {test_accuracy}")
print(f"Support Vector Machine with Linear Kernel Train Accuracy: {train_accuracy}")

Support Vector Machine with Linear Kernel Test Accuracy: 83.33333333333334
Support Vector Machine with Linear Kernel Train Accuracy: 83.33333333333334


In [32]:
# Support Vector Machine with RBF Kernel 
from sklearn.svm import SVC
svc_clf = SVC(kernel = 'rbf')
svc_clf.fit(XX_train, YY_train)

test_predictions = svc_clf.predict(XX_test)
train_predictions = svc_clf.predict(XX_train)

test_accuracy = accuracy_score(YY_test, test_predictions)*100
train_accuracy = accuracy_score(YY_train, train_predictions)*100

print(f"Support Vector Machine with RBF Kernel Test Accuracy: {test_accuracy}")
print(f"Support Vector Machine with RBF Kernel Train Accuracy: {train_accuracy}")

Support Vector Machine with RBF Kernel Test Accuracy: 83.33333333333334
Support Vector Machine with RBF Kernel Train Accuracy: 83.33333333333334


In [41]:
# Decision Tree with Gini Impurity as Impurity Measure
from sklearn.tree import DecisionTreeClassifier
tree_clf = DecisionTreeClassifier(criterion='gini')
tree_clf.fit(XX_train, YY_train)

test_accuracy = tree_clf.predict(XX_test)
train_accuracy = tree_clf.predict(XX_train)

test_accuracy = accuracy_score(YY_test, test_predictions)*100
train_accuracy = accuracy_score(YY_train, train_predictions)*100

print(f"Decision Tree with Gini Impurity as Impurity Measure Test Accuracy: {test_accuracy}")
print(f"Decision Tree with Gini Impurity as Impurity Measure Train Accuracy: {train_accuracy}")

Decision Tree with Gini Impurity as Impurity Measure Test Accuracy: 83.33333333333334
Decision Tree with Gini Impurity as Impurity Measure Train Accuracy: 83.33333333333334


In [49]:
# Random Forest 
from sklearn.ensemble import RandomForestClassifier

randomForest_clf = RandomForestClassifier()
randomForest_clf.fit(XX_train, YY_train)

test_predictions = randomForest_clf.predict(XX_test)
train_predictions = randomForest_clf.predict(XX_train)

test_accuracy = accuracy_score(YY_test, test_predictions)*100
train_accuracy = accuracy_score(YY_train, train_predictions)*100

print(f"Random Forest Test Accuracy: {test_accuracy}")
print(f"Random Forest Train Accuracy: {train_accuracy}")

Random Forest Test Accuracy: 76.66666666666667
Random Forest Train Accuracy: 91.66666666666666


In [51]:
# KNN with 10 as K value 
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors=10)
knn.fit(XX_train, YY_train)

test_predictions = knn.predict(XX_test)
train_predictions = knn.predict(XX_train)

test_accuracy = accuracy_score(YY_test, test_predictions)*100
train_accuracy = accuracy_score(YY_train, train_predictions)*100

print(f"KNN with 10 as K value Test Accuracy: {test_accuracy}")
print(f"KNN with 10 as K value Train Accuracy: {train_accuracy}")

KNN with 10 as K value Test Accuracy: 76.66666666666667
KNN with 10 as K value Train Accuracy: 82.5


In [67]:
# KNN with 3 as K value 
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors=3)
knn.fit(XX_train, YY_train)

test_predictions = knn.predict(XX_test)
train_predictions = knn.predict(XX_train)

test_accuracy = accuracy_score(YY_test, test_predictions)*100
train_accuracy = accuracy_score(YY_train, train_predictions)*100

print(f"KNN with 35 as K value Test Accuracy: {test_accuracy}")
print(f"KNN with 35 as K value Train Accuracy: {train_accuracy}")

KNN with 35 as K value Test Accuracy: 73.33333333333333
KNN with 35 as K value Train Accuracy: 86.66666666666667


# Resources
https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html
https://scikit-learn.org/stable/modules/generated/sklearn.metrics.accuracy_score.html
https://www.tutorialspoint.com/scikit_learn/scikit_learn_support_vector_machines.htm
https://scikit-learn.org/stable/modules/generated/sklearn.tree.DecisionTreeClassifier.html
https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.RandomForestClassifier.html
https://scikit-learn.org/stable/modules/generated/sklearn.neighbors.KNeighborsClassifier.html