In [1]:
import numpy as np
import pandas as pd

from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix


In [2]:
# Load dataset
data = load_breast_cancer()
X = pd.DataFrame(data.data, columns=data.feature_names)
y = pd.Series(data.target, name="target")

X.head(), y.value_counts()


(   mean radius  mean texture  mean perimeter  mean area  mean smoothness  \
 0        17.99         10.38          122.80     1001.0          0.11840   
 1        20.57         17.77          132.90     1326.0          0.08474   
 2        19.69         21.25          130.00     1203.0          0.10960   
 3        11.42         20.38           77.58      386.1          0.14250   
 4        20.29         14.34          135.10     1297.0          0.10030   
 
    mean compactness  mean concavity  mean concave points  mean symmetry  \
 0           0.27760          0.3001              0.14710         0.2419   
 1           0.07864          0.0869              0.07017         0.1812   
 2           0.15990          0.1974              0.12790         0.2069   
 3           0.28390          0.2414              0.10520         0.2597   
 4           0.13280          0.1980              0.10430         0.1809   
 
    mean fractal dimension  ...  worst radius  worst texture  worst perimeter 

In [None]:
# train test split


In [3]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)


In [None]:
#  Default model (baseline)

In [4]:
# Choose model (SVM with RBF kernel as example)
default_model = SVC()  # default hyperparameters
default_model.fit(X_train, y_train)

y_pred_default = default_model.predict(X_test)

default_acc = accuracy_score(y_test, y_pred_default)
print("Default accuracy:", default_acc)
print(classification_report(y_test, y_pred_default))
print(confusion_matrix(y_test, y_pred_default))


Default accuracy: 0.9298245614035088
              precision    recall  f1-score   support

           0       0.95      0.86      0.90        42
           1       0.92      0.97      0.95        72

    accuracy                           0.93       114
   macro avg       0.93      0.91      0.92       114
weighted avg       0.93      0.93      0.93       114

[[36  6]
 [ 2 70]]


In [None]:
# Define parameter grid and run GridSearchCV

In [5]:
param_grid = {
    "C": [0.1, 1, 10, 100],
    "gamma": [1, 0.1, 0.01, 0.001],
    "kernel": ["rbf"]
}

grid_search = GridSearchCV(
    estimator=SVC(),
    param_grid=param_grid,
    cv=5,           # 5-fold cross-validation
    scoring="accuracy",
    n_jobs=-1,
    verbose=1
)

grid_search.fit(X_train, y_train)

print("Best params:", grid_search.best_params_)
print("Best CV accuracy:", grid_search.best_score_)
best_model = grid_search.best_estimator_


Fitting 5 folds for each of 16 candidates, totalling 80 fits
Best params: {'C': 1, 'gamma': 0.001, 'kernel': 'rbf'}
Best CV accuracy: 0.934065934065934


In [6]:
# Evaluate tuned model

In [7]:
y_pred_best = best_model.predict(X_test)

tuned_acc = accuracy_score(y_test, y_pred_best)
print("Tuned accuracy:", tuned_acc)
print(classification_report(y_test, y_pred_best))
print(confusion_matrix(y_test, y_pred_best))


Tuned accuracy: 0.8947368421052632
              precision    recall  f1-score   support

           0       0.81      0.93      0.87        42
           1       0.95      0.88      0.91        72

    accuracy                           0.89       114
   macro avg       0.88      0.90      0.89       114
weighted avg       0.90      0.89      0.90       114

[[39  3]
 [ 9 63]]


In [None]:
# Performance comparison table 

In [8]:
results_table = pd.DataFrame({
    "Model": ["Default SVC", "Tuned SVC (GridSearchCV)"],
    "Test Accuracy": [default_acc, tuned_acc]
})
results_table


Unnamed: 0,Model,Test Accuracy
0,Default SVC,0.929825
1,Tuned SVC (GridSearchCV),0.894737
