<a href="https://colab.research.google.com/github/MelMacLondon/ML/blob/main/HyperParameter_Optimization_20250720.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

https://www.geeksforgeeks.org/machine-learning/hyperparameter-optimization-based-on-bayesian-optimization/




In [1]:
pip install scikit-optimize


Collecting scikit-optimize
  Downloading scikit_optimize-0.10.2-py2.py3-none-any.whl.metadata (9.7 kB)
Collecting pyaml>=16.9 (from scikit-optimize)
  Downloading pyaml-25.7.0-py3-none-any.whl.metadata (12 kB)
Downloading scikit_optimize-0.10.2-py2.py3-none-any.whl (107 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m107.8/107.8 kB[0m [31m2.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pyaml-25.7.0-py3-none-any.whl (26 kB)
Installing collected packages: pyaml, scikit-optimize
Successfully installed pyaml-25.7.0 scikit-optimize-0.10.2


In [2]:
import numpy as np
import pandas as pd
import gc
import warnings
import matplotlib.pyplot as plt
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix, make_scorer, accuracy_score, recall_score, f1_score
from datetime import timedelta
import time
from skopt import BayesSearchCV

Load the Dataset and Extract Train Test Split
Sometimes dual coefficients or intercepts are not finite and this arises generally in SVMs and leads to the model running for an indefinite amount of time. To address this issue prepocessing of data is necessary. Here we have used the Scaling technique to normalize the data so that they have a similar range.

In [3]:
X, y = load_breast_cancer(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=1234)
scaler = StandardScaler()

# Fit the scaler on training data and transform both training and test data
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [4]:
# Training a Machine Learning Model

start_time = time.time()
svc_model = SVC(kernel='rbf')
svc_model.fit(X_train, y_train)

elapsed_time_secs = time.time() - start_time
msg = 'Execution took: %s secs (Wall clock time)' % timedelta(seconds=round(elapsed_time_secs))


svc_pred = svc_model.predict(X_test)

print('Train Accuracy', accuracy_score(y_train, svc_model.predict(X_train)))
print('Test Accuracy', accuracy_score(y_test, svc_model.predict(X_test)))

print('\n')

print('Train Recall Score', recall_score(y_train, svc_model.predict(X_train)))
print('Test Recall Score', recall_score(y_test, svc_model.predict(X_test)))

print('\n')

print('Train F1 Score', f1_score(y_train, svc_model.predict(X_train)))
print('Test F1 Score', f1_score(y_test, svc_model.predict(X_test)))



Train Accuracy 0.989010989010989
Test Accuracy 0.9824561403508771


Train Recall Score 1.0
Test Recall Score 1.0


Train F1 Score 0.991304347826087
Test F1 Score 0.9863013698630136


In [5]:
# Define Hyperparameter Search Space

param_space = {
    'C': (1e-6, 1e+6, 'log-uniform'),
    'gamma': (1e-6, 1e+1, 'log-uniform'),
    'degree': (1, 8),  # integer valued parameter
    'kernel': ['linear', 'poly', 'rbf'],  # categorical parameter
}

In [6]:
# Bayesian Optimization
# Initialize Bayesian Optimization
opt = BayesSearchCV(
    SVC(),
    param_space,
    n_iter=32,
    cv=3
)

In [8]:
# Run Bayesian Optimization

opt.fit(X_train, y_train)

print('val. score: %s' % opt.best_score_)
print('test score: %s' % opt.score(X_test, y_test))

# Get best hyperparameters
best_params = opt.best_params_
print('Best Parameters:', best_params)

val. score: 0.9692256303009179
test score: 0.9824561403508771
Best Parameters: OrderedDict([('C', 16.578161672333977), ('degree', 1), ('gamma', 0.004473306916630009), ('kernel', 'rbf')])


In [9]:
# Get best hyperparameters
best_params = opt.best_params_

# Create an SVM classifier with the best parameters
best_svc_model = SVC(**best_params)

# Fit the classifier on the training data
best_svc_model.fit(X_train, y_train)

# Predict on the test data
best_svc_pred = best_svc_model.predict(X_test)

# Evaluate the performance of the model
print('Train Accuracy with best parameters:', accuracy_score(y_train, best_svc_model.predict(X_train)))
print('Test Accuracy with best parameters:', accuracy_score(y_test, best_svc_pred))

print('\n')

print('Train Recall Score with best parameters:', recall_score(y_train, best_svc_model.predict(X_train)))
print('Test Recall Score with best parameters:', recall_score(y_test, best_svc_pred))

print('\n')

print('Train F1 Score with best parameters:', f1_score(y_train, best_svc_model.predict(X_train)))
print('Test F1 Score with best parameters:', f1_score(y_test, best_svc_pred))

Train Accuracy with best parameters: 0.9868131868131869
Test Accuracy with best parameters: 0.9824561403508771


Train Recall Score with best parameters: 0.9929824561403509
Test Recall Score with best parameters: 1.0


Train F1 Score with best parameters: 0.9895104895104895
Test F1 Score with best parameters: 0.9863013698630136


In [10]:
print (best_svc_model.get_params)

<bound method BaseEstimator.get_params of SVC(C=16.578161672333977, degree=1, gamma=0.004473306916630009)>


In [12]:
# Implementing SVM with Best Hyperparameters

# Get best hyperparameters
best_params = opt.best_params_

# Create an SVM classifier with the best parameters
best_svc_model = SVC(**best_params)

# Fit the classifier on the training data
best_svc_model.fit(X_train, y_train)

# Predict on the test data
best_svc_pred = best_svc_model.predict(X_test)

# Evaluate the performance of the model
print('Train Accuracy with best parameters:', accuracy_score(y_train, best_svc_model.predict(X_train)))
print('Test Accuracy with best parameters:', accuracy_score(y_test, best_svc_pred))

print('\n')

print('Train Recall Score with best parameters:', recall_score(y_train, best_svc_model.predict(X_train)))
print('Test Recall Score with best parameters:', recall_score(y_test, best_svc_pred))

print('\n')

print('Train F1 Score with best parameters:', f1_score(y_train, best_svc_model.predict(X_train)))
print('Test F1 Score with best parameters:', f1_score(y_test, best_svc_pred))



Train Accuracy with best parameters: 0.9868131868131869
Test Accuracy with best parameters: 0.9824561403508771


Train Recall Score with best parameters: 0.9929824561403509
Test Recall Score with best parameters: 1.0


Train F1 Score with best parameters: 0.9895104895104895
Test F1 Score with best parameters: 0.9863013698630136
