# `GridSearchCV()`

In [6]:
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score


In [None]:
data = load_iris()
X = data.data
y = data.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# We are not setting parameters here; GridSearch will do it.
dt = DecisionTreeClassifier()

# Defining the "Grid" of Parameters to test
param_grid = {
    'criterion': ['gini', 'entropy'],     # Measure of split quality
    'max_depth': [None, 2, 4, 6, 8],      # Maximum depth of the tree
    'min_samples_split': [2, 5, 10]       # Min samples needed to split a node
}
# Initialize GridSearchCV
# cv=5: 5-fold Cross-Validation
# scoring='accuracy': We want the highest accuracy
grid_search = GridSearchCV(dt, param_grid, cv=5, scoring='accuracy')        # instead can also write the code below
# grid_search = GridSearchCV(estimator=dt, param_grid=param_grid, cv=5, scoring='accuracy')

grid_search.fit(X_train, y_train)


print(f"Best Parameters Found: {grid_search.best_params_}")
print(f"Best Cross-Validation Accuracy: {grid_search.best_score_ * 100:.2f}%")


# grid_search automatically behaves like the best model found
best_model = grid_search.best_estimator_
y_pred = best_model.predict(X_test)
print(f"\nAccuracy on Test Set: {accuracy_score(y_test, y_pred) * 100:.2f}%")

Best Parameters Found: {'criterion': 'gini', 'max_depth': None, 'min_samples_split': 2}
Best Cross-Validation Accuracy: 94.17%

Accuracy on Test Set: 100.00%
