In [1]:
import pandas as pd
from sklearn import metrics
from sklearn.datasets import load_iris
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split

from Convert import convert_to_dataframe, single_y_test_pred

In [3]:
iris = convert_to_dataframe(load_iris())

In [4]:
iris.sample(10)

Unnamed: 0,sepallength,sepalwidth,petallength,petalwidth,target
140,6.7,3.1,5.6,2.4,virginica
87,6.3,2.3,4.4,1.3,versicolor
86,6.7,3.1,4.7,1.5,versicolor
97,6.2,2.9,4.3,1.3,versicolor
143,6.8,3.2,5.9,2.3,virginica
101,5.8,2.7,5.1,1.9,virginica
93,5.0,2.3,3.3,1.0,versicolor
25,5.0,3.0,1.6,0.2,setosa
66,5.6,3.0,4.5,1.5,versicolor
130,7.4,2.8,6.1,1.9,virginica


In [5]:
X = iris.drop(['target'], axis=1)
Y = iris['target']
feature_names = iris.columns.values.tolist()[:-1]
class_names = Y.unique().tolist()
print(feature_names)
print(class_names)

['sepallength', 'sepalwidth', 'petallength', 'petalwidth']
['setosa', 'versicolor', 'virginica']


In [6]:
x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, shuffle=True)

In [7]:
x_train.head()

Unnamed: 0,sepallength,sepalwidth,petallength,petalwidth
143,6.8,3.2,5.9,2.3
123,6.3,2.7,4.9,1.8
55,5.7,2.8,4.5,1.3
146,6.3,2.5,5.0,1.9
18,5.7,3.8,1.7,0.3


In [8]:
clf = SVC(kernel='linear')

In [9]:
clf = clf.fit(x_train, y_train)

In [10]:
y_pred = clf.predict(x_test)

In [11]:
print(single_y_test_pred(y_test, y_pred))

    index      target      y_pred
0      69  versicolor  versicolor
1     147   virginica   virginica
2      65  versicolor  versicolor
3      63  versicolor  versicolor
4     106   virginica   virginica
5      19      setosa      setosa
6      59  versicolor  versicolor
7      83  versicolor   virginica
8      80  versicolor  versicolor
9     103   virginica   virginica
10    131   virginica   virginica
11     12      setosa      setosa
12    140   virginica   virginica
13     57  versicolor  versicolor
14     21      setosa      setosa
15     44      setosa      setosa
16     89  versicolor  versicolor
17      7      setosa      setosa
18      0      setosa      setosa
19    148   virginica   virginica
20    144   virginica   virginica
21    109   virginica   virginica
22     37      setosa      setosa
23     51  versicolor  versicolor
24     34      setosa      setosa
25    100   virginica   virginica
26     35      setosa      setosa
27     15      setosa      setosa
28     20     

In [12]:
print(metrics.classification_report(y_test, y_pred))

              precision    recall  f1-score   support

      setosa       1.00      1.00      1.00        11
  versicolor       1.00      0.90      0.95        10
   virginica       0.90      1.00      0.95         9

    accuracy                           0.97        30
   macro avg       0.97      0.97      0.96        30
weighted avg       0.97      0.97      0.97        30



In [14]:
print("Confusion matrix:")
print(metrics.confusion_matrix(y_test, y_pred, labels=class_names))

Confusion matrix:
[[11  0  0]
 [ 0  9  1]
 [ 0  0  9]]


In [15]:
accuracy_test = metrics.accuracy_score(y_test, y_pred) * 100
accuracy_train = metrics.accuracy_score(y_train, clf.predict(x_train)) * 100

print(f"Accuracy: {round(accuracy_test, 2)}% on Test Data")
print(f"Accuracy: {round(accuracy_train, 2)}% on Training Data")

Accuracy: 96.67% on Test Data
Accuracy: 100.0% on Training Data


In [16]:
clf.score(x_test, y_test)

0.9666666666666667