In [1]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score


In [2]:
# Load the breast cancer dataset
data = load_breast_cancer()
X = data.data
y = data.target

# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [3]:
#Define model
model = SVC()


The param_grid is a dictionary where:

  . Each key represents a hyperparameter of the model.

  . Each value is a list of possible settings for that hyperparameter.

Hyperparameters in this grid:
1. 'C' (Regularization Parameter)

  .Controls the trade-off between misclassification and model simplicity.

  .Values: [0.1, 1, 10]

     .A smaller C allows more misclassification (softer margin).

     .A larger C enforces stricter classification (harder margin).

2. 'kernel' (Kernel Function)

    .Determines the type of decision boundary the SVM will use.

    .Values: ['linear', 'rbf', 'poly']

       .'linear': Uses a linear decision boundary (good for linearly separable data).

       .'rbf' (Radial Basis Function): Uses a non-linear boundary (good for complex data).

       .'poly' (Polynomial): Uses polynomial transformations (degree can be adjusted).

3. 'gamma' (Kernel Coefficient)

    .Defines how far the influence of a single training example reaches.

    .Values: ['scale', 'auto']

       .'scale': Default, uses 1 / (n_features * X.var()).

       .'auto': Uses 1 / n_features.

In [4]:
# Create a grid of hyperparameters
param_grid = {
    'C': [0.1, 1, 10],
    'kernel': ['linear', 'rbf', 'poly'],
    'gamma': ['scale', 'auto']
}


In [None]:
grid = GridSearchCV(model, param_grid, cv=5, scoring='accuracy')
grid.fit(X_train, y_train)

# Best parameters and score
print("Best Parameters:", grid.best_params_)
print("Best CV Score:", grid.best_score_)


In [None]:
# Evaluate with the best model
best_model = grid.best_estimator_
y_pred = best_model.predict(X_test)

print("Test Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))


In [None]:
#RandomizedSearch CV
from scipy.stats import uniform

param_dist = {
    'C': uniform(0.1, 10),
    'kernel': ['linear', 'rbf', 'poly'],
    'gamma': ['scale', 'auto']
}

random_search = RandomizedSearchCV(model, param_distributions=param_dist,
                                   n_iter=10, cv=5, scoring='accuracy', random_state=42)
random_search.fit(X_train, y_train)

print("Best Parameters (Randomized):", random_search.best_params_)
print("Best Score (Randomized):", random_search.best_score_)
