In [1]:
# Import necessary libraries
import numpy as np
import pandas as pd
from sklearn import datasets
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split, KFold, GridSearchCV, RandomizedSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, confusion_matrix

In [2]:
# Load dataset
cancer = datasets.load_breast_cancer()

In [3]:
# Split into features and labels
X = cancer.data
y = cancer.target

# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [4]:
# Define the tune_hyperparameters function
def tune_hyperparameters(name, model, param_grid, X_train, y_train):
    cv = KFold(n_splits=5, shuffle=True, random_state=42)

    if name == 'Grid Search':
        search = GridSearchCV(model, param_grid, cv=cv, scoring='accuracy', n_jobs=-1, verbose=1)
    elif name == 'Random Search':
        search = RandomizedSearchCV(model, param_grid, cv=cv, scoring='accuracy', n_jobs=-1, verbose=1, n_iter=10)
    else:
        raise ValueError('Invalid name. Choose either Grid Search or Random Search.')

    search.fit(X_train, y_train)
    return search.best_estimator_, search

# Parameter grid for Random Forest
param_grid_rf = {
    'n_estimators': [100, 200, 300],
    'max_depth': [None, 10, 20, 30],
    'criterion': ['gini', 'entropy']
}

# Tune hyperparameters for Random Forest
best_rf, search_rf = tune_hyperparameters('Grid Search', RandomForestClassifier(random_state=42), param_grid_rf, X_train, y_train)

Fitting 5 folds for each of 24 candidates, totalling 120 fits


In [5]:
# Make predictions with the best Random Forest model
y_pred_rf = best_rf.predict(X_test)

# Evaluate the model
accuracy_rf = accuracy_score(y_test, y_pred_rf)

# Calculate sensitivity and specificity
cm = confusion_matrix(y_test, y_pred_rf)
sensitivity_rf = cm[1,1] / (cm[1,0] + cm[1,1])
specificity_rf = cm[0,0] / (cm[0,0] + cm[0,1])

# results
print(f'Random Forest Accuracy: {accuracy_rf:.2f}')
print(f'Random Forest Sensitivity: {sensitivity_rf:.2f}')
print(f'Random Forest Specificity: {specificity_rf:.2f}')

print(f'Best score: {search_rf.best_score_:.2f}')
print(f'Best parameters: {search_rf.best_params_}')

Random Forest Accuracy: 0.96
Random Forest Sensitivity: 0.99
Random Forest Specificity: 0.93
Best score: 0.96
Best parameters: {'criterion': 'gini', 'max_depth': None, 'n_estimators': 300}
