In [24]:
import numpy as np
import pandas as pd
from sklearn import datasets
from sklearn.model_selection import train_test_split, RandomizedSearchCV, cross_val_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix

cancer = datasets.load_breast_cancer()
X = cancer.data
y = cancer.target

In [25]:
# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Random forest classifier model
rf = RandomForestClassifier()

# Hyperparameter tuning using random search
hyperparameters = {'n_estimators': [100, 200, 300, 400, 500],
                   'max_depth': [3, 4, 5, 6, 7, 8, 9, 10],
                   'min_samples_split': [2, 3, 4, 5],
                   'min_samples_leaf': [1, 2, 3, 4, 5]}

# Random search cross-validation
random_search = RandomizedSearchCV(rf, hyperparameters, cv=5, random_state=42)

# Fit the random search cross-validation to the training data
random_search.fit(X_train, y_train)

In [26]:
# Print the best hyperparameters
print('Best hyperparameters:', random_search.best_params_)

# hyperparameters
rf = RandomForestClassifier(n_estimators=random_search.best_params_['n_estimators'],
                            max_depth=random_search.best_params_['max_depth'],
                            min_samples_split=random_search.best_params_['min_samples_split'],
                            min_samples_leaf=random_search.best_params_['min_samples_leaf'])

Best hyperparameters: {'n_estimators': 400, 'min_samples_split': 4, 'min_samples_leaf': 2, 'max_depth': 8}


In [27]:
# Evaluating the model using k-fold cross-validation
scores = cross_val_score(rf, X_train, y_train, cv=5)
print('Cross-validation scores:', scores)

# Fit the model to the training data
rf.fit(X_train, y_train)

# Make predictions on the test data
y_pred = rf.predict(X_test)

Cross-validation scores: [0.97802198 0.94505495 0.97802198 0.95604396 0.94505495]


In [28]:

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print('Test set accuracy:', accuracy)

# Calculate the confusion matrix
tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()

# Calculate sensitivity and specificity
sensitivity = tp / (tp + fn)
specificity = tn / (tn + fp)

print('Sensitivity:', sensitivity)
print('Specificity:', specificity)

Test set accuracy: 0.9649122807017544
Sensitivity: 0.9859154929577465
Specificity: 0.9302325581395349
