In [1]:
import pandas as pd
from sklearn import metrics
from sklearn.datasets import load_iris
from sklearn.ensemble import AdaBoostClassifier
from sklearn.model_selection import train_test_split

from Convert import convert_to_dataframe, single_y_test_pred

In [2]:
iris = convert_to_dataframe(load_iris())

In [3]:
iris.sample(10)

Unnamed: 0,sepallength,sepalwidth,petallength,petalwidth,target
61,5.9,3.0,4.2,1.5,versicolor
112,6.8,3.0,5.5,2.1,virginica
2,4.7,3.2,1.3,0.2,setosa
85,6.0,3.4,4.5,1.6,versicolor
134,6.1,2.6,5.6,1.4,virginica
56,6.3,3.3,4.7,1.6,versicolor
33,5.5,4.2,1.4,0.2,setosa
149,5.9,3.0,5.1,1.8,virginica
26,5.0,3.4,1.6,0.4,setosa
142,5.8,2.7,5.1,1.9,virginica


In [4]:
X = iris.drop(['target'], axis=1)
Y = iris['target']
feature_names = iris.columns.values.tolist()[:-1]
class_names = Y.unique().tolist()
print(feature_names)
print(class_names)

['sepallength', 'sepalwidth', 'petallength', 'petalwidth']
['setosa', 'versicolor', 'virginica']


In [5]:
x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, shuffle=True)

In [6]:
x_train.head()

Unnamed: 0,sepallength,sepalwidth,petallength,petalwidth
44,5.1,3.8,1.9,0.4
109,7.2,3.6,6.1,2.5
148,6.2,3.4,5.4,2.3
76,6.8,2.8,4.8,1.4
19,5.1,3.8,1.5,0.3


In [7]:
clf = AdaBoostClassifier()

In [8]:
clf = clf.fit(x_train, y_train)

In [9]:
y_pred = clf.predict(x_test)

In [10]:
print(single_y_test_pred(y_test, y_pred))

    index      target      y_pred
0       4      setosa      setosa
1      41      setosa      setosa
2     146   virginica   virginica
3      96  versicolor  versicolor
4     139   virginica   virginica
5      80  versicolor  versicolor
6      23      setosa      setosa
7      58  versicolor  versicolor
8     134   virginica  versicolor
9      29      setosa      setosa
10    101   virginica   virginica
11    120   virginica   virginica
12     69  versicolor  versicolor
13     89  versicolor  versicolor
14    149   virginica   virginica
15      3      setosa      setosa
16    144   virginica   virginica
17     34      setosa      setosa
18     56  versicolor  versicolor
19     63  versicolor  versicolor
20     18      setosa      setosa
21      8      setosa      setosa
22     37      setosa      setosa
23    145   virginica   virginica
24     57  versicolor  versicolor
25     22      setosa      setosa
26     43      setosa      setosa
27     60  versicolor  versicolor
28     25     

In [11]:
print(metrics.classification_report(y_test, y_pred))

              precision    recall  f1-score   support

      setosa       1.00      1.00      1.00        12
  versicolor       0.91      1.00      0.95        10
   virginica       1.00      0.88      0.93         8

    accuracy                           0.97        30
   macro avg       0.97      0.96      0.96        30
weighted avg       0.97      0.97      0.97        30



In [12]:
print("Confusion matrix:")
print(metrics.confusion_matrix(y_test, y_pred, labels=class_names))

Confusion matrix:
[[12  0  0]
 [ 0 10  0]
 [ 0  1  7]]


In [13]:
accuracy_test = metrics.accuracy_score(y_test, y_pred) * 100
accuracy_train = metrics.accuracy_score(y_train, clf.predict(x_train)) * 100

print(f"Accuracy: {round(accuracy_test, 2)}% on Test Data")
print(f"Accuracy: {round(accuracy_train, 2)}% on Training Data")

Accuracy: 96.67% on Test Data
Accuracy: 95.83% on Training Data


In [14]:
clf.score(x_test, y_test)

0.9666666666666667

In [15]:
feature_imp = pd.Series(clf.feature_importances_, index=feature_names)
feature_imp

sepallength    0.02
sepalwidth     0.00
petallength    0.52
petalwidth     0.46
dtype: float64