In [1]:
import pandas as pd
from sklearn import metrics
from sklearn.datasets import load_iris
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.model_selection import train_test_split

from Convert import convert_to_dataframe, single_y_test_pred


In [2]:
iris = convert_to_dataframe(load_iris())

In [3]:
iris.sample(10)

Unnamed: 0,sepallength,sepalwidth,petallength,petalwidth,target
125,7.2,3.2,6.0,1.8,virginica
64,5.6,2.9,3.6,1.3,versicolor
89,5.5,2.5,4.0,1.3,versicolor
72,6.3,2.5,4.9,1.5,versicolor
71,6.1,2.8,4.0,1.3,versicolor
40,5.0,3.5,1.3,0.3,setosa
147,6.5,3.0,5.2,2.0,virginica
59,5.2,2.7,3.9,1.4,versicolor
24,4.8,3.4,1.9,0.2,setosa
77,6.7,3.0,5.0,1.7,versicolor


In [4]:
X = iris.drop(['target'], axis=1)
Y = iris['target']
feature_names = iris.columns.values.tolist()[:-1]
class_names = Y.unique().tolist()
print(feature_names)
print(class_names)

['sepallength', 'sepalwidth', 'petallength', 'petalwidth']
['setosa', 'versicolor', 'virginica']


In [5]:
x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, shuffle=True)

In [6]:
x_train.head()

Unnamed: 0,sepallength,sepalwidth,petallength,petalwidth
107,7.3,2.9,6.3,1.8
65,6.7,3.1,4.4,1.4
23,5.1,3.3,1.7,0.5
60,5.0,2.0,3.5,1.0
93,5.0,2.3,3.3,1.0


In [7]:
clf = ExtraTreesClassifier(n_estimators=5)

In [8]:
clf = clf.fit(x_train, y_train)

In [9]:
y_pred = clf.predict(x_test)

In [10]:
print(single_y_test_pred(y_test, y_pred))

    index      target      y_pred
0     108   virginica   virginica
1      19      setosa      setosa
2      85  versicolor  versicolor
3     115   virginica   virginica
4      64  versicolor  versicolor
5      92  versicolor  versicolor
6      38      setosa      setosa
7      42      setosa      setosa
8      24      setosa      setosa
9      18      setosa      setosa
10     51  versicolor  versicolor
11     45      setosa      setosa
12     54  versicolor  versicolor
13    136   virginica   virginica
14    149   virginica   virginica
15    119   virginica   virginica
16     33      setosa      setosa
17    135   virginica   virginica
18     43      setosa      setosa
19    133   virginica  versicolor
20     47      setosa      setosa
21    102   virginica   virginica
22     68  versicolor  versicolor
23     46      setosa      setosa
24      8      setosa      setosa
25     59  versicolor  versicolor
26      9      setosa      setosa
27    139   virginica   virginica
28     76  ver

In [11]:
print(metrics.classification_report(y_test, y_pred))

              precision    recall  f1-score   support

      setosa       1.00      1.00      1.00        12
  versicolor       0.89      0.89      0.89         9
   virginica       0.89      0.89      0.89         9

    accuracy                           0.93        30
   macro avg       0.93      0.93      0.93        30
weighted avg       0.93      0.93      0.93        30



In [12]:
print("Confusion matrix:")
print(metrics.confusion_matrix(y_test, y_pred, labels=class_names))

Confusion matrix:
[[12  0  0]
 [ 0  8  1]
 [ 0  1  8]]


In [13]:
accuracy = metrics.accuracy_score(y_test, y_pred) * 100
print(f"Accuracy: {round(accuracy, 2)}%")

Accuracy: 93.33%


In [14]:
feature_imp = pd.Series(clf.feature_importances_, index=feature_names)
feature_imp

sepallength    0.137070
sepalwidth     0.054723
petallength    0.397855
petalwidth     0.410353
dtype: float64

In [15]:
clf.score(x_test, y_test)

0.9333333333333333

In [16]:
clf.estimators_

[ExtraTreeClassifier(random_state=236454104),
 ExtraTreeClassifier(random_state=1431782033),
 ExtraTreeClassifier(random_state=1399935665),
 ExtraTreeClassifier(random_state=1132386477),
 ExtraTreeClassifier(random_state=1383393783)]