In [36]:
import numpy as np 
import pandas as pd 
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split , GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score ,classification_report , confusion_matrix

In [37]:
# load iris dataset
iris = load_iris()
X = iris.data
y = iris.target

# convert to df
df = pd.DataFrame(X,columns=iris.feature_names)
df['species'] = y
df.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),species
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0


In [38]:
# train test split
X_train ,X_test,y_train ,y_test = train_test_split(
    X,y,
    test_size = 0.3,
    random_state=42,
    stratify=y
)

In [39]:
rf = RandomForestClassifier(random_state=42)
rf.fit(X_train,y_train)
y_pred = rf.predict(X_test)

print('accuracy score = ',accuracy_score(y_test,y_pred))

accuracy score =  0.8888888888888888


In [40]:
# Hyperparameter Tuning

param_grid = {
    'n_estimators':[50,100],
    'max_depth' :[None,5],
    'min_samples_split' :[2,5],
    'min_samples_leaf':[1,2]
    #'max_feature':['sqrt']
}

rf = RandomForestClassifier(random_state=42)

grid = GridSearchCV(
    estimator=rf,
    param_grid = param_grid,
    cv=5,
    n_jobs=-1,
    verbose=2
)
grid.fit(X_train,y_train)


Fitting 5 folds for each of 16 candidates, totalling 80 fits


In [41]:
# best hyperparameters
print(grid.best_params_)

{'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 50}


In [42]:
best_rf = grid.best_estimator_
y_pred_best = best_rf.predict(X_test)
print("Sccuracy after tuning - ",accuracy_score(y_test,y_pred_best))

Sccuracy after tuning -  0.8888888888888888


In [44]:
# classification report 
print(classification_report(y_test,y_pred_best,target_names=iris.target_names))

              precision    recall  f1-score   support

      setosa       1.00      1.00      1.00        15
  versicolor       0.78      0.93      0.85        15
   virginica       0.92      0.73      0.81        15

    accuracy                           0.89        45
   macro avg       0.90      0.89      0.89        45
weighted avg       0.90      0.89      0.89        45



In [45]:
# confusion matrix
print(confusion_matrix(y_test,y_pred_best))

[[15  0  0]
 [ 0 14  1]
 [ 0  4 11]]
