In [1]:
## Install Dependencies
!pip install numpy scikit-learn



In [11]:
## Import Dependencies
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.metrics import accuracy_score

from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import RandomForestClassifier

## Dataset preparation

In [3]:
## load iris data
iris_data = load_iris()

## label, data for training 
label = iris_data.target
data = iris_data.data

X_train, X_val, y_train, y_val = train_test_split(data, label, test_size=0.2)

## Setting parameter for GridSearchCV

In [5]:
# setting param_grid for GridSearchCV
param_grid = {'max_depth': np.arange(5,8,1),
             'n_estimators':np.arange(130,170,10)}

In [6]:
## load model instance
## for practicing, use Gradient Boosting classifier model
gbc = GradientBoostingClassifier()

## GridSearchCV

In [7]:
## Executing grid search cv
grid_tree = GridSearchCV(gbc, param_grid=param_grid, cv=3, refit=True)
grid_tree.fit(X_train, y_train)

## check out best parameters and scores
print('best parameters : ', grid_tree.best_params_)
print('best score : ', grid_tree.best_score_)

best parameters :  {'max_depth': 5, 'n_estimators': 130}
best score :  0.9416666666666668


## Inference

In [9]:
## setting best_estimator_model
em = grid_tree.best_estimator_
## predict
pred = em.predict(X_val)

print("accuracy performance of best estimator : ", accuracy_score(y_val, pred))

accuracy performance of best estimator :  0.9


## Additional modelling

In [12]:
param_grid = {'n_estimators': np.arange(10,201,20), 'max_depth':np.arange(3,15,2)}
rfc = RandomForestClassifier()
rfc_grid = GridSearchCV(rfc, param_grid=param_grid, n_jobs=8)
rfc_grid.fit(X_train, y_train)

## check out best parameters and scores
print('best parameters of random forest classifier : ', rfc_grid.best_params_)
print('best score of random forest classifier : ', rfc_grid.best_score_)

best parameters of random forest classifier :  {'max_depth': 3, 'n_estimators': 90}
best score of random forest classifier :  0.9666666666666668


In [13]:
## setting best_estimator_model
em = rfc_grid.best_estimator_
## predict
pred = em.predict(X_val)

print("accuracy performance of best estimator : ", accuracy_score(y_val, pred))

accuracy performance of best estimator :  0.9
