In [1]:
import pandas as pd
from sklearn import metrics
from sklearn.datasets import load_iris
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split

from Convert import convert_to_dataframe, single_y_test_pred


In [2]:
iris = convert_to_dataframe(load_iris())

In [3]:
iris.sample(10)

Unnamed: 0,sepallength,sepalwidth,petallength,petalwidth,target
124,6.7,3.3,5.7,2.1,virginica
31,5.4,3.4,1.5,0.4,setosa
75,6.6,3.0,4.4,1.4,versicolor
17,5.1,3.5,1.4,0.3,setosa
84,5.4,3.0,4.5,1.5,versicolor
0,5.1,3.5,1.4,0.2,setosa
97,6.2,2.9,4.3,1.3,versicolor
114,5.8,2.8,5.1,2.4,virginica
35,5.0,3.2,1.2,0.2,setosa
25,5.0,3.0,1.6,0.2,setosa


In [4]:
X = iris.drop(['target'], axis=1)
Y = iris['target']
feature_names = iris.columns.values.tolist()[:-1]
class_names = Y.unique().tolist()
print(feature_names)
print(class_names)

['sepallength', 'sepalwidth', 'petallength', 'petalwidth']
['setosa', 'versicolor', 'virginica']


In [5]:
x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, shuffle=True)

In [6]:
x_train.head()

Unnamed: 0,sepallength,sepalwidth,petallength,petalwidth
126,6.2,2.8,4.8,1.8
70,5.9,3.2,4.8,1.8
75,6.6,3.0,4.4,1.4
146,6.3,2.5,5.0,1.9
89,5.5,2.5,4.0,1.3


In [7]:
clf = RandomForestClassifier()

In [8]:
clf = clf.fit(x_train, y_train)

In [9]:
y_pred = clf.predict(x_test)

In [10]:
print(single_y_test_pred(y_test, y_pred))

    index      target      y_pred
0     133   virginica  versicolor
1      56  versicolor  versicolor
2       7      setosa      setosa
3      63  versicolor  versicolor
4      12      setosa      setosa
5     118   virginica   virginica
6      43      setosa      setosa
7      90  versicolor  versicolor
8      62  versicolor  versicolor
9      74  versicolor  versicolor
10     39      setosa      setosa
11    145   virginica   virginica
12    108   virginica   virginica
13    109   virginica   virginica
14     37      setosa      setosa
15    111   virginica   virginica
16    117   virginica   virginica
17    107   virginica   virginica
18    134   virginica  versicolor
19    102   virginica   virginica
20     64  versicolor  versicolor
21    130   virginica   virginica
22     30      setosa      setosa
23     50  versicolor  versicolor
24     42      setosa      setosa
25    132   virginica   virginica
26    110   virginica   virginica
27    143   virginica   virginica
28      6     

In [11]:
print(metrics.classification_report(y_test, y_pred))

              precision    recall  f1-score   support

      setosa       1.00      1.00      1.00         9
  versicolor       0.78      1.00      0.88         7
   virginica       1.00      0.86      0.92        14

    accuracy                           0.93        30
   macro avg       0.93      0.95      0.93        30
weighted avg       0.95      0.93      0.93        30



In [12]:
print("Confusion matrix:")
print(metrics.confusion_matrix(y_test, y_pred, labels=class_names))

Confusion matrix:
[[ 9  0  0]
 [ 0  7  0]
 [ 0  2 12]]


In [13]:
accuracy = metrics.accuracy_score(y_test, y_pred) * 100
print(f"Accuracy: {round(accuracy, 2)}%")

Accuracy: 93.33%


In [14]:
feature_imp = pd.Series(clf.feature_importances_, index=feature_names)
feature_imp

sepallength    0.089941
sepalwidth     0.033751
petallength    0.352671
petalwidth     0.523637
dtype: float64

In [15]:
clf.score(x_test, y_test)

0.9333333333333333