In [29]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, confusion_matrix

# Load the dataset
data = load_breast_cancer()
X = data.data
y = data.target

In [12]:
# Define hyperparameters grid for GridSearchCV
param_grid_dt = {
    'criterion': ['gini', 'entropy'],
    'max_depth': [None, 5, 10, 15, 20],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

# Create GridSearchCV object
grid_search_dt = GridSearchCV(estimator=dt_classifier, param_grid=param_grid_dt, cv=5, n_jobs=-1)

# Fit to the data
grid_search_dt.fit(X_train, y_train)

In [17]:
# Print best parameters
print("Best Parameters:", grid_search_dt.best_params_)

Best Parameters: {'criterion': 'entropy', 'max_depth': 5, 'min_samples_leaf': 2, 'min_samples_split': 5}


In [18]:
# Print best estimator
best_dt_classifier_grid = grid_search_dt.best_estimator_
print("Best Estimator:", best_dt_classifier_grid)

Best Estimator: DecisionTreeClassifier(criterion='entropy', max_depth=5, min_samples_leaf=2,
                       min_samples_split=5)


In [19]:
# Perform k-fold cross-validation
cv_scores_dt_grid = cross_val_score(best_dt_classifier_grid, X_train, y_train, cv=5)
print("Cross-validation scores:", cv_scores_dt_grid)
print("Average cross-validation score:", np.mean(cv_scores_dt_grid))

Cross-validation scores: [0.95604396 0.91208791 0.97802198 0.93406593 0.91208791]
Average cross-validation score: 0.9384615384615385


In [20]:
# Evaluate on the test set
y_pred_dt_grid = best_dt_classifier_grid.predict(X_test)
test_accuracy_dt_grid = accuracy_score(y_test, y_pred_dt_grid)
print("Test Accuracy:", test_accuracy_dt_grid)


Test Accuracy: 0.9473684210526315


In [25]:
# Calculate sensitivity (recall) and specificity
conf_matrix_dt_grid = confusion_matrix(y_test, y_pred_dt_grid)
tn, fp, fn, tp = conf_matrix_dt_grid.ravel()

sensitivity_dt_grid = tp / (tp + fn)
specificity_dt_grid = tn / (tn + fp)

print("Sensitivity (Recall):", sensitivity_dt_grid)
print("Specificity:", specificity_dt_grid)

Sensitivity (Recall): 0.971830985915493
Specificity: 0.9069767441860465
