In [2]:
import pandas as pd
from sklearn import metrics
from sklearn.datasets import load_iris
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.model_selection import train_test_split

from Convert import convert_to_dataframe, single_y_test_pred


In [3]:
iris = convert_to_dataframe(load_iris())

In [4]:
iris.sample(10)

Unnamed: 0,sepallength,sepalwidth,petallength,petalwidth,target
127,6.1,3.0,4.9,1.8,virginica
130,7.4,2.8,6.1,1.9,virginica
106,4.9,2.5,4.5,1.7,virginica
107,7.3,2.9,6.3,1.8,virginica
132,6.4,2.8,5.6,2.2,virginica
64,5.6,2.9,3.6,1.3,versicolor
79,5.7,2.6,3.5,1.0,versicolor
27,5.2,3.5,1.5,0.2,setosa
84,5.4,3.0,4.5,1.5,versicolor
0,5.1,3.5,1.4,0.2,setosa


In [5]:
X = iris.drop(['target'], axis=1)
Y = iris['target']
feature_names = iris.columns.values.tolist()[:-1]
class_names = Y.unique().tolist()
print(feature_names)
print(class_names)

['sepallength', 'sepalwidth', 'petallength', 'petalwidth']
['setosa', 'versicolor', 'virginica']


In [6]:
x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, shuffle=True)

In [7]:
x_train.head()

Unnamed: 0,sepallength,sepalwidth,petallength,petalwidth
113,5.7,2.5,5.0,2.0
91,6.1,3.0,4.6,1.4
127,6.1,3.0,4.9,1.8
21,5.1,3.7,1.5,0.4
44,5.1,3.8,1.9,0.4


In [8]:
clf = ExtraTreesClassifier(n_estimators=5)

In [9]:
clf = clf.fit(x_train, y_train)

In [10]:
y_pred = clf.predict(x_test)

In [11]:
print(single_y_test_pred(y_test, y_pred))

    index      target      y_pred
0     128   virginica   virginica
1     110   virginica   virginica
2     144   virginica   virginica
3     124   virginica   virginica
4      38      setosa      setosa
5      75  versicolor  versicolor
6      66  versicolor  versicolor
7      99  versicolor  versicolor
8     146   virginica   virginica
9      47      setosa      setosa
10    126   virginica   virginica
11     34      setosa      setosa
12     20      setosa      setosa
13     41      setosa      setosa
14     40      setosa      setosa
15      1      setosa      setosa
16    118   virginica   virginica
17     39      setosa      setosa
18      9      setosa      setosa
19     31      setosa      setosa
20    134   virginica   virginica
21     51  versicolor  versicolor
22    148   virginica   virginica
23     98  versicolor  versicolor
24    116   virginica   virginica
25     85  versicolor  versicolor
26     73  versicolor  versicolor
27     13      setosa      setosa
28    111   vi

In [12]:
print(metrics.classification_report(y_test, y_pred))

              precision    recall  f1-score   support

      setosa       1.00      1.00      1.00        11
  versicolor       1.00      1.00      1.00         8
   virginica       1.00      1.00      1.00        11

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30



In [13]:
print("Confusion matrix:")
print(metrics.confusion_matrix(y_test, y_pred, labels=class_names))

Confusion matrix:
[[11  0  0]
 [ 0  8  0]
 [ 0  0 11]]


In [14]:
accuracy_test = metrics.accuracy_score(y_test, y_pred) * 100
accuracy_train = metrics.accuracy_score(y_train, clf.predict(x_train)) * 100

print(f"Accuracy: {round(accuracy_test, 2)}% on Test Data")
print(f"Accuracy: {round(accuracy_train, 2)}% on Training Data")

Accuracy: 100.0% on Test Data
Accuracy: 100.0% on Training Data


In [15]:
feature_imp = pd.Series(clf.feature_importances_, index=feature_names)
feature_imp

sepallength    0.126474
sepalwidth     0.082173
petallength    0.320867
petalwidth     0.470486
dtype: float64

In [16]:
clf.score(x_test, y_test)

1.0

In [17]:
clf.estimators_

[ExtraTreeClassifier(random_state=177367023),
 ExtraTreeClassifier(random_state=962988424),
 ExtraTreeClassifier(random_state=1031988102),
 ExtraTreeClassifier(random_state=713523525),
 ExtraTreeClassifier(random_state=1246498134)]