In [1]:
import pandas as pd
from sklearn import metrics
from sklearn.datasets import load_iris
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split

from Convert import convert_to_dataframe, single_y_test_pred


In [2]:
iris = convert_to_dataframe(load_iris())

In [3]:
iris.sample(10)

Unnamed: 0,sepallength,sepalwidth,petallength,petalwidth,target
117,7.7,3.8,6.7,2.2,virginica
52,6.9,3.1,4.9,1.5,versicolor
51,6.4,3.2,4.5,1.5,versicolor
133,6.3,2.8,5.1,1.5,virginica
5,5.4,3.9,1.7,0.4,setosa
123,6.3,2.7,4.9,1.8,virginica
113,5.7,2.5,5.0,2.0,virginica
55,5.7,2.8,4.5,1.3,versicolor
49,5.0,3.3,1.4,0.2,setosa
142,5.8,2.7,5.1,1.9,virginica


In [4]:
X = iris.drop(['target'], axis=1)
Y = iris['target']
feature_names = iris.columns.values.tolist()[:-1]
class_names = Y.unique().tolist()
print(feature_names)
print(class_names)

['sepallength', 'sepalwidth', 'petallength', 'petalwidth']
['setosa', 'versicolor', 'virginica']


In [5]:
x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, shuffle=True)

In [6]:
x_train.head()

Unnamed: 0,sepallength,sepalwidth,petallength,petalwidth
23,5.1,3.3,1.7,0.5
80,5.5,2.4,3.8,1.1
29,4.7,3.2,1.6,0.2
144,6.7,3.3,5.7,2.5
95,5.7,3.0,4.2,1.2


In [7]:
clf = RandomForestClassifier(n_estimators=5)

In [8]:
clf = clf.fit(x_train, y_train)

In [9]:
y_pred = clf.predict(x_test)

In [10]:
print(single_y_test_pred(y_test, y_pred))

    index      target      y_pred
0     133   virginica  versicolor
1      48      setosa      setosa
2      91  versicolor  versicolor
3      40      setosa      setosa
4      37      setosa      setosa
5      71  versicolor  versicolor
6     127   virginica   virginica
7      84  versicolor  versicolor
8     130   virginica   virginica
9     146   virginica   virginica
10     99  versicolor  versicolor
11     46      setosa      setosa
12    143   virginica   virginica
13     45      setosa      setosa
14     14      setosa      setosa
15     26      setosa      setosa
16     30      setosa      setosa
17      4      setosa      setosa
18     49      setosa      setosa
19    115   virginica   virginica
20      3      setosa      setosa
21    120   virginica   virginica
22     68  versicolor  versicolor
23    129   virginica  versicolor
24     22      setosa      setosa
25     53  versicolor  versicolor
26     54  versicolor  versicolor
27     69  versicolor  versicolor
28     36     

In [11]:
print(metrics.classification_report(y_test, y_pred))

              precision    recall  f1-score   support

      setosa       1.00      1.00      1.00        13
  versicolor       0.82      1.00      0.90         9
   virginica       1.00      0.75      0.86         8

    accuracy                           0.93        30
   macro avg       0.94      0.92      0.92        30
weighted avg       0.95      0.93      0.93        30



In [12]:
print("Confusion matrix:")
print(metrics.confusion_matrix(y_test, y_pred, labels=class_names))

Confusion matrix:
[[13  0  0]
 [ 0  9  0]
 [ 0  2  6]]


In [13]:
accuracy = metrics.accuracy_score(y_test, y_pred) * 100
print(f"Accuracy: {round(accuracy, 2)}%")

Accuracy: 93.33%


In [14]:
feature_imp = pd.Series(clf.feature_importances_, index=feature_names)
feature_imp

sepallength    0.090013
sepalwidth     0.027394
petallength    0.581705
petalwidth     0.300887
dtype: float64

In [15]:
clf.score(x_test, y_test)

0.9333333333333333

In [16]:
clf.estimators_

[DecisionTreeClassifier(max_features='sqrt', random_state=1307978660),
 DecisionTreeClassifier(max_features='sqrt', random_state=320430758),
 DecisionTreeClassifier(max_features='sqrt', random_state=1124265001),
 DecisionTreeClassifier(max_features='sqrt', random_state=1740210363),
 DecisionTreeClassifier(max_features='sqrt', random_state=99717368)]