In [3]:
from sklearn.datasets import load_breast_cancer
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV

import pandas as pd
cancer = load_breast_cancer()
X = pd.DataFrame(cancer.data, columns=cancer.feature_names)
y = pd.Series(cancer.target)

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

models = {
    'Gradient_boosting': GradientBoostingClassifier(),
    'Random forest': RandomForestClassifier(),
    'SVM':SVC(),
}

for name, model in models.items():
    score = cross_val_score(model, X_train, y_train, cv=5, scoring='accuracy')
    print(f'{name} Mean accuracy = {score.mean():.4f}')

param_grid = {
    'max_depth': [None, 1,2,3,4,5,6,7,8,9,10],
    'n_estimators' : [50,100,200],
    'min_samples_split' : [2,5,10]
}

grid = GridSearchCV(RandomForestClassifier(), param_grid, cv=5, scoring='accuracy')

Gradient_boosting Mean accuracy = 0.9582
Random forest Mean accuracy = 0.9626
SVM Mean accuracy = 0.9714


In [4]:
grid.fit(X_train, y_train)


In [9]:
print("Best Parameters:", grid.best_params_)
print("Best Accuracy:", grid.best_score_)

Best Parameters: {'max_depth': None, 'min_samples_split': 2, 'n_estimators': 50}
Best Accuracy: 0.964835164835165


In [6]:
best_model = grid.best_estimator_
best_model.fit(X_train, y_train)

In [12]:
final_score = cross_val_score(best_model, X_train, y_train, cv=5, scoring='accuracy')
print(f'Final model accuracy after tuning {final_score.mean():.4f}')

Final model accuracy after tuning 0.9582
