In [None]:
!pip install scikit-optimize

Collecting scikit-optimize
  Downloading scikit_optimize-0.9.0-py2.py3-none-any.whl (100 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m100.3/100.3 kB[0m [31m1.1 MB/s[0m eta [36m0:00:00[0m
Collecting pyaml>=16.9 (from scikit-optimize)
  Downloading pyaml-23.9.7-py3-none-any.whl (23 kB)
Installing collected packages: pyaml, scikit-optimize
Successfully installed pyaml-23.9.7 scikit-optimize-0.9.0


In [None]:
# Required imports
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from skopt import BayesSearchCV
from skopt.space import Real

In [None]:
# Load the Wine Quality dataset
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv"
data = pd.read_csv(url, delimiter=';')

# Convert quality scores into categorical classes
bins = [0, 5, 7, 10] # Define bins for low, medium, high
labels = [0, 1, 2] # 0: low, 1: medium, 2: high
data['quality'] = pd.cut(data['quality'], bins=bins, labels=labels)

# Split the dataset
X = data.drop('quality', axis=1)
y = data['quality'].astype(int).values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [None]:
X.shape

(1599, 11)

In [None]:
set(y)

{0, 1, 2}

In [None]:
# Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
# PSO Fitness Function
def svm_fitness(position):
    C, gamma = position
    model = SVC(C=C, gamma=gamma, random_state=42)
    scores = cross_val_score(model, X_train, y_train, cv=5, scoring='accuracy')
    return np.mean(scores)

# PSO Particle class
class Particle:
    def __init__(self, bounds):
        self.position = np.random.uniform(low=bounds[:, 0], high=bounds[:, 1], size=bounds.shape[0])
        self.velocity = np.zeros(bounds.shape[0])
        self.best_position = self.position.copy()
        self.best_score = -np.inf

    def update(self, global_best_position, w, c1, c2, bounds):
        r1, r2 = np.random.random(size=2)
        self.velocity = w * self.velocity + c1 * r1 * (self.best_position - self.position) + c2 * r2 * (global_best_position - self.position)
        self.position = self.position + self.velocity

        # Clip the position to stay within bounds
        self.position = np.clip(self.position, bounds[:, 0], bounds[:, 1])

# PSO algorithm
def particle_swarm_optimization(fitness_function, bounds, num_particles=30, iterations=50, w=0.5, c1=2, c2=2):
    particles = [Particle(bounds) for _ in range(num_particles)]
    global_best_position = None
    global_best_score = -np.inf

    for iteration in range(iterations):
        for particle in particles:
            score = fitness_function(particle.position)
            if score > particle.best_score:
                particle.best_score = score
                particle.best_position = particle.position

            if score > global_best_score:
                global_best_score = score
                global_best_position = particle.position

        for particle in particles:
            particle.update(global_best_position, w, c1, c2, bounds)

    return global_best_position, global_best_score

In [None]:
%%time
# Define bounds for C and gamma
bounds = np.array([[0.1, 100], [0.001, 1]])

# Run PSO
best_position_pso, best_score_pso = particle_swarm_optimization(svm_fitness, bounds)

CPU times: user 7min 27s, sys: 573 ms, total: 7min 28s
Wall time: 7min 42s


In [None]:
%%time
# Bayesian Optimization
search_space = {'C': Real(0.1, 100, 'log-uniform'), 'gamma': Real(0.001, 1, 'log-uniform')}
bayes_search = BayesSearchCV(SVC(random_state=41), search_space, n_iter=30, scoring='accuracy', n_jobs=-1, cv=5)
bayes_search.fit(X_train, y_train)

# Best parameters and score from Bayesian Optimization
best_params_bayes = bayes_search.best_params_
best_score_bayes = bayes_search.best_score_

CPU times: user 27.4 s, sys: 14.9 s, total: 42.3 s
Wall time: 38.7 s


In [None]:
# Output results
print("Best Parameters from PSO:", best_position_pso)
print("Best Score from PSO:", best_score_pso)
print("Best Parameters from Bayesian Optimization:", best_params_bayes)
print("Best Score from Bayesian Optimization:", best_score_bayes)

# Train and evaluate SVM with parameters from PSO
svm_pso = SVC(C=best_position_pso[0], gamma=best_position_pso[1], random_state=42)
svm_pso.fit(X_train, y_train)
accuracy_pso = accuracy_score(y_test, svm_pso.predict(X_test))

# Train and evaluate SVM with parameters from Bayesian Optimization
svm_bayes = SVC(C=bayes_search.best_estimator_.C, gamma=bayes_search.best_estimator_.gamma, random_state=42)
svm_bayes.fit(X_train, y_train)
accuracy_bayes = accuracy_score(y_test, svm_bayes.predict(X_test))

# Output final accuracy results
print("Test Accuracy with PSO parameters:", accuracy_pso)
print("Test Accuracy with Bayesian Optimization parameters:", accuracy_bayes)

Best Parameters from PSO: [2.65600356 0.44666913]
Best Score from PSO: 0.765002402306214
Best Parameters from Bayesian Optimization: OrderedDict([('C', 1.373861629213652), ('gamma', 0.1775970525437393)])
Best Score from Bayesian Optimization: 0.7632286995515696
Test Accuracy with PSO parameters: 0.7479166666666667
Test Accuracy with Bayesian Optimization parameters: 0.7458333333333333


In [None]:
%%time
model_defolt = SVC()
model_defolt.fit(X_train, y_train)

CPU times: user 61.9 ms, sys: 0 ns, total: 61.9 ms
Wall time: 62.2 ms


In [None]:
np.mean(cross_val_score(model_defolt, X_train, y_train, cv=5, scoring='accuracy'))

0.755209000640615

In [None]:
accuracy_score(y_test, model_defolt.predict(X_test))

0.7354166666666667