In [1]:
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_breast_cancer
import pandas as pd
import mlflow

In [2]:
# Load the Breast Cancer dataset
data = load_breast_cancer()
X = pd.DataFrame(data.data, columns=data.feature_names)
y = pd.Series(data.target, name='target')
# Splitting into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Creating the RandomForestClassifier model
rf = RandomForestClassifier(random_state=42)

# Defining the parameter grid for GridSearchCV
param_grid = {
    'n_estimators': [10, 50, 100],
    'max_depth': [None, 10, 20, 30]
}

In [3]:
grid_search = GridSearchCV(estimator=rf, param_grid=param_grid, cv=5, n_jobs=-1, verbose=2)


In [13]:
grid_search.fit(X_train, y_train)

Fitting 5 folds for each of 12 candidates, totalling 60 fits
[CV] END ....................max_depth=None, n_estimators=10; total time=   0.0s
[CV] END ....................max_depth=None, n_estimators=10; total time=   0.0s
[CV] END ....................max_depth=None, n_estimators=10; total time=   0.0s
[CV] END ....................max_depth=None, n_estimators=10; total time=   0.0s
[CV] END ....................max_depth=None, n_estimators=10; total time=   0.0s
[CV] END ....................max_depth=None, n_estimators=50; total time=   0.1s
[CV] END ....................max_depth=None, n_estimators=50; total time=   0.2s
[CV] END ....................max_depth=None, n_estimators=50; total time=   0.2s
[CV] END ....................max_depth=None, n_estimators=50; total time=   0.2s
[CV] END ......................max_depth=10, n_estimators=10; total time=   0.0s
[CV] END ......................max_depth=10, n_estimators=10; total time=   0.0s
[CV] END ...................max_depth=None, n_es

In [19]:
grid_search.cv_results_['params']

[{'max_depth': None, 'n_estimators': 10},
 {'max_depth': None, 'n_estimators': 50},
 {'max_depth': None, 'n_estimators': 100},
 {'max_depth': 10, 'n_estimators': 10},
 {'max_depth': 10, 'n_estimators': 50},
 {'max_depth': 10, 'n_estimators': 100},
 {'max_depth': 20, 'n_estimators': 10},
 {'max_depth': 20, 'n_estimators': 50},
 {'max_depth': 20, 'n_estimators': 100},
 {'max_depth': 30, 'n_estimators': 10},
 {'max_depth': 30, 'n_estimators': 50},
 {'max_depth': 30, 'n_estimators': 100}]

In [21]:
grid_search.cv_results_['params'][1]

{'max_depth': None, 'n_estimators': 50}

In [20]:
grid_search.cv_results_['mean_test_score']

array([0.93186813, 0.95384615, 0.95824176, 0.93186813, 0.95384615,
       0.95824176, 0.93186813, 0.95384615, 0.95824176, 0.93186813,
       0.95384615, 0.95824176])

In [16]:
grid_search.cv_results_

{'mean_fit_time': array([0.03153296, 0.15016222, 0.27661319, 0.03724456, 0.1245358 ,
        0.22653737, 0.02906361, 0.12415833, 0.25028124, 0.02330403,
        0.10261817, 0.21784   ]),
 'std_fit_time': array([0.00302029, 0.00886515, 0.07438552, 0.00161451, 0.00845379,
        0.0239249 , 0.00307621, 0.0038278 , 0.05467711, 0.00501224,
        0.00582935, 0.00599469]),
 'mean_score_time': array([0.00394464, 0.00877161, 0.01208763, 0.00525208, 0.00798273,
        0.01307364, 0.00417161, 0.00655241, 0.01016402, 0.00481186,
        0.00523906, 0.00711203]),
 'std_score_time': array([0.0007122 , 0.00210291, 0.00679986, 0.00241712, 0.003115  ,
        0.00465779, 0.00121495, 0.0011171 , 0.00227033, 0.00319497,
        0.00043846, 0.00033582]),
 'param_max_depth': masked_array(data=[None, None, None, 10, 10, 10, 20, 20, 20, 30, 30, 30],
              mask=[False, False, False, False, False, False, False, False,
                    False, False, False, False],
        fill_value='?',
       

In [22]:
grid_search.best_estimator_