In [1]:
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

In [2]:
#load dataset
iris = load_iris()
X = iris.data
y = iris.target

In [3]:
# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [4]:
# Define the model
rf = RandomForestClassifier()

# Define the parameter grid
param_grid = {
    'n_estimators': [10, 50, 100, 200],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10]
}

# Set up GridSearchCV
grid_search = GridSearchCV(estimator=rf, param_grid=param_grid, cv=5, n_jobs=-1, verbose=2)


In [5]:
grid_search.fit(X_train, y_train)

Fitting 5 folds for each of 48 candidates, totalling 240 fits


In [6]:
#checking the results
grid_search.cv_results_

{'mean_fit_time': array([0.05784726, 0.23785906, 0.4748908 , 0.95288625, 0.04997272,
        0.23287048, 0.48277535, 0.98347011, 0.05880518, 0.2819437 ,
        0.52197576, 1.00821524, 0.05774202, 0.25850253, 0.52982163,
        1.17219687, 0.04926081, 0.28979197, 0.61065493, 1.19648857,
        0.05240707, 0.28114705, 0.58154669, 1.18231168, 0.05838833,
        0.30500321, 0.61396799, 1.18583937, 0.0670537 , 0.31633549,
        0.61179128, 1.23166981, 0.06187139, 0.29296055, 0.61218638,
        1.21987796, 0.06429081, 0.30673046, 0.61322145, 1.24007864,
        0.0687778 , 0.30849357, 0.6309216 , 1.20863953, 0.06743641,
        0.30139179, 0.60667481, 1.04777012]),
 'std_fit_time': array([0.0062481 , 0.00176646, 0.00805033, 0.02299638, 0.00160978,
        0.00735125, 0.01139801, 0.03986627, 0.01108586, 0.01003003,
        0.02912198, 0.08309635, 0.00931909, 0.00861844, 0.0613131 ,
        0.08698354, 0.00298241, 0.01219908, 0.04985912, 0.16852256,
        0.00818773, 0.04988702, 0.084

In [7]:
#creating a table for the results 
import pandas as pd
df = pd.DataFrame(grid_search.cv_results_)

In [8]:
df

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_max_depth,param_min_samples_split,param_n_estimators,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.057847,0.006248,0.00324,0.005073,,2,10,"{'max_depth': None, 'min_samples_split': 2, 'n...",0.958333,1.0,0.833333,0.958333,0.916667,0.933333,0.056519,46
1,0.237859,0.001766,0.009035,0.003823,,2,50,"{'max_depth': None, 'min_samples_split': 2, 'n...",0.958333,0.958333,0.833333,1.0,0.958333,0.941667,0.056519,33
2,0.474891,0.00805,0.017508,0.004191,,2,100,"{'max_depth': None, 'min_samples_split': 2, 'n...",0.958333,0.958333,0.833333,1.0,0.958333,0.941667,0.056519,33
3,0.952886,0.022996,0.03314,0.003405,,2,200,"{'max_depth': None, 'min_samples_split': 2, 'n...",0.958333,0.958333,0.833333,1.0,0.958333,0.941667,0.056519,33
4,0.049973,0.00161,0.00588,0.001989,,5,10,"{'max_depth': None, 'min_samples_split': 5, 'n...",0.958333,1.0,0.791667,1.0,0.958333,0.941667,0.07728,33
5,0.23287,0.007351,0.008728,0.002878,,5,50,"{'max_depth': None, 'min_samples_split': 5, 'n...",0.958333,0.958333,0.833333,1.0,0.958333,0.941667,0.056519,33
6,0.482775,0.011398,0.017281,0.006857,,5,100,"{'max_depth': None, 'min_samples_split': 5, 'n...",0.958333,1.0,0.833333,1.0,0.958333,0.95,0.061237,7
7,0.98347,0.039866,0.036807,0.002891,,5,200,"{'max_depth': None, 'min_samples_split': 5, 'n...",0.958333,1.0,0.875,1.0,0.958333,0.958333,0.045644,2
8,0.058805,0.011086,0.002518,0.003922,,10,10,"{'max_depth': None, 'min_samples_split': 10, '...",1.0,1.0,0.833333,1.0,0.958333,0.958333,0.06455,2
9,0.281944,0.01003,0.01089,0.002077,,10,50,"{'max_depth': None, 'min_samples_split': 10, '...",0.958333,1.0,0.833333,1.0,0.958333,0.95,0.061237,7


In [9]:
# Evaluating the best model on the test data
best_model = grid_search.best_estimator_
test_score = best_model.score(X_test, y_test)
print(f"Test set accuracy: {test_score}")

Test set accuracy: 1.0


In [10]:
#exploring the parameter 
dir(grid_search)

['__abstractmethods__',
 '__annotations__',
 '__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__setstate__',
 '__sizeof__',
 '__sklearn_clone__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_abc_impl',
 '_build_request_for_signature',
 '_check_feature_names',
 '_check_n_features',
 '_check_refit_for_multimetric',
 '_doc_link_module',
 '_doc_link_template',
 '_doc_link_url_param_generator',
 '_estimator_type',
 '_format_results',
 '_get_default_requests',
 '_get_doc_link',
 '_get_metadata_request',
 '_get_param_names',
 '_get_routed_params_for_fit',
 '_get_scorers',
 '_get_tags',
 '_more_tags',
 '_parameter_constraints',
 '_repr_html_',
 '_repr_html_inner',
 '_repr_mimebundle_',
 '_required_parameters',
 '_run

In [11]:
grid_search.best_score_

0.9666666666666666

In [12]:
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression

In [13]:
models = {
    'RandomForest': RandomForestClassifier(),
    'SVM': SVC(),
    'LogisticRegression': LogisticRegression()
}

In [14]:
# Parameter grids for each model
param_grids = {
    'RandomForest': {
        'n_estimators': [10, 50, 100],
        'max_depth': [None, 10, 20]
    },
    'SVM': {
        'C': [0.1, 1, 10],
        'kernel': ['linear', 'rbf']
    },
    'LogisticRegression': {
        'C': [0.1, 1, 10],
        'solver': ['liblinear', 'saga']
    }
}


In [15]:
# GridSearchCV for each model
best_estimators = {}

for model_name, model in models.items():
    print(f"Performing GridSearchCV for {model_name}...")
    
    # Creating GridSearchCV for each model
    grid_search = GridSearchCV(estimator=model, param_grid=param_grids[model_name], cv=3, n_jobs=-1, verbose=2)
    
    # Fitting GridSearchCV
    grid_search.fit(X_train, y_train)
    
    # Storing the best estimator for each model
    best_estimators[model_name] = grid_search.best_estimator_
    
    # Printing the best parameters and scores
    print(f"Best parameters for {model_name}: {grid_search.best_params_}")
    print(f"Best cross-validation score for {model_name}: {grid_search.best_score_}\n")


Performing GridSearchCV for RandomForest...
Fitting 3 folds for each of 9 candidates, totalling 27 fits
Best parameters for RandomForest: {'max_depth': 10, 'n_estimators': 100}
Best cross-validation score for RandomForest: 0.9583333333333334

Performing GridSearchCV for SVM...
Fitting 3 folds for each of 6 candidates, totalling 18 fits
Best parameters for SVM: {'C': 1, 'kernel': 'linear'}
Best cross-validation score for SVM: 0.975

Performing GridSearchCV for LogisticRegression...
Fitting 3 folds for each of 6 candidates, totalling 18 fits
Best parameters for LogisticRegression: {'C': 1, 'solver': 'saga'}
Best cross-validation score for LogisticRegression: 0.9666666666666667





In [16]:
# Evaluating the models on the test data
for model_name, best_model in best_estimators.items():
    test_score = best_model.score(X_test, y_test)
    print(f"Test accuracy for {model_name}: {test_score}")


Test accuracy for RandomForest: 1.0
Test accuracy for SVM: 1.0
Test accuracy for LogisticRegression: 1.0
