In [None]:
pip install mlxtend
pip install scikit-optimize
pip install deap
pip install pyswarms

In [None]:
import io
import sklearn
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plot
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

import mlxtend
from mlxtend.evaluate import bias_variance_decomp
from sklearn import metrics

In [None]:
import pandas as pd

dfa = pd.read_csv(r'...')

In [None]:
df1 = dfa.drop(columns = ['SubjectID', 'VideoID','predefinedlabel'])
df1.dropna()

In [None]:
df = df1.sample(frac =0.8)

In [None]:
df['user-definedlabeln'].value_counts()

feature preprocessing


In [None]:
# Splitting into train, val and test set -- 80-10-10 split

# First, an 70-30 split
train_df, test_df = train_test_split(df, test_size = 0.3, random_state = 113)

# Then split the 20% into half
val_df, test_df = train_test_split(test_df, test_size = 0.5, random_state = 113)

ic = df.columns.tolist()
ic.remove('user-definedlabeln')

oc = ['user-definedlabeln']

ytrain = train_df[oc]
X_train = train_df.drop(columns = oc)

yval = val_df[oc]
X_val = val_df.drop(columns = oc)

ytest = test_df[oc]
X_test = test_df.drop(columns = oc)


**Training SVM in Default Parameters (Control Model)**

In [None]:
from sklearn import  svm
svm = svm.SVC()

svm.fit(X_train,ytrain)
print(f'The accuracy score of the model is {svm.score(X_test,ytest.values.ravel()):.5f}')


In [None]:
params = svm.get_params()
params_df = pd.DataFrame(params, index=[0])
params_df.T

**Training SVM using Hyper Parameter Tuning (Experimental Model)**

In [None]:
from sklearn import  svm
svm1 = svm.SVC(kernel='rbf', C=7, degree = 1, gamma= 'scale', probability = True)
svm1.fit(X_train,ytrain)
print(f'The accuracy score of the model is {svm.score(X_test,ytest.values.ravel()):.5f}')

**Decomposing Bias and Variance**

*passing pandas dataframe to numpy array as the mlxtend package does not support dataframe*

In [None]:
X_train=X_train.values

In [None]:
X_test=X_test.values

In [None]:
ytrain=ytrain.values

In [None]:
ytest=ytest.values

**Decomposing Bias Variance in Control Model**

In [None]:
avg_expected_loss, avg_bias, avg_var = bias_variance_decomp(
        svm, X_train, ytrain, X_test, ytest,
        loss='0-1_loss',
        random_seed=123)

print('Average expected loss: %.3f' % avg_expected_loss)
print('Average bias: %.3f' % avg_bias)
print('Average variance: %.3f' % avg_var)

**Decomposing Bias Variance in Experimental Model**





In [None]:
avg_expected_loss, avg_bias, avg_var = bias_variance_decomp(
        svm1, X_train, ytrain, X_test, ytest,
        loss='0-1_loss',
        random_seed=123)

print('Average expected loss: %.3f' % avg_expected_loss)
print('Average bias: %.3f' % avg_bias)
print('Average variance: %.3f' % avg_var)

**Hyper Paramter Tuning Algorithms**

1. Random Search

In [None]:
from sklearn.model_selection import StratifiedKFold, RandomizedSearchCV, cross_val_score
from hyperopt import tpe, STATUS_OK, Trials, hp, fmin, STATUS_OK, space_eval

# List of C values
C_range = np.logspace(-1, 1, 3)
print(f'The list of values for C are {C_range}')
# List of gamma values
gamma_range = np.logspace(-1, 1, 3)
print(f'The list of values for gamma are {gamma_range}')
The list of values for C are [ 0.1  1.  10. ]
The list of values for gamma are [ 0.1  1.  10. ]

# Define the search space
param_grid = {
    # Regularization parameter.
    "C": C_range,
    # Kernel type
    "kernel": ['rbf', 'poly'],
    # Gamma is the Kernel coefficient for ‘rbf’, ‘poly’ and ‘sigmoid’.
    "gamma": gamma_range.tolist()+['scale', 'auto']
    }
# Set up score
scoring = ['accuracy']
# Set up the k-fold cross-validation
kfold = StratifiedKFold(n_splits=3, shuffle=True, random_state=0)
# Define random search
random_search = RandomizedSearchCV(estimator=svm,
                           param_distributions=param_grid,
                           n_iter=500,
                           scoring=scoring,
                           refit='accuracy',
                           n_jobs=-1,
                           cv=kfold,
                           verbose=0)
# Fit grid search
random_result = random_search.fit(X_train, ytrain)
# Print grid search summary
random_result

2. Bayesian Optimization

In [None]:
from skopt import gp_minimize

# Define the function to optimize (fitness function)
def svm_accuracy(params):
    C = params[0]  # Regularization parameter
    gamma = params[1]  # Kernel coefficient for 'rbf', 'poly', and 'sigmoid'
    degree = int(params[2])  # Degree of the polynomial kernel (for 'poly' only)

    svm_model = svm(C=C, kernel='rbf' if degree == 2 else 'poly' if degree > 2 else 'linear',
                    gamma=gamma, degree=degree, random_state=42)
    svm_model.fit(X_train, ytrain)
    y_pred = svm_model.predict(X_test)
    accuracy = accuracy_score(ytest, y_pred)

    return 1.0 - accuracy  # Minimize 1 - accuracy to maximize accuracy

# Set up the hyperparameter search space for Bayesian optimization
space = [(0.1, 100.0),  # C: regularization parameter
         (0.01, 10.0),   # gamma: kernel coefficient for 'rbf', 'poly', and 'sigmoid'
         (2, 5)]         # degree: degree of the polynomial kernel (for 'poly' only)

# Perform Bayesian optimization
result = gp_minimize(func=svm_accuracy, dimensions=space, n_calls=50, random_state=42)

# Extract the best hyperparameters from the optimization results
best_C, best_gamma, best_degree = result.x
best_degree = int(best_degree)  # Ensure the degree is an integer

print("Best Hyperparameters:")
print("C =", best_C)
print("Gamma =", best_gamma)
print("Degree =", best_degree)

# Train the SVM model with the best hyperparameters and evaluate on the test set
best_svm_model = svm(C=best_C, kernel='rbf' if best_degree == 2 else 'poly' if best_degree > 2 else 'linear',
                     gamma=best_gamma, degree=best_degree, random_state=42)
best_svm_model.fit(X_train, ytrain)
y_pred = best_svm_model.predict(X_test)
test_accuracy = accuracy_score(ytest, y_pred)
print("Test Accuracy with Best Model:", test_accuracy)

3. Genetic Algorithm

In [None]:
import random
from deap import base, creator, tools, algorithms


# Create a function to evaluate the fitness of an individual (set of hyperparameters)
def evaluate_svm(individual):
    C = individual[0]  # Regularization parameter
    kernel = individual[1]  # Kernel type ('linear', 'poly', 'rbf', 'sigmoid')
    degree = individual[2]  # Degree of the polynomial kernel (for 'poly' only)

    svm_model = svm(C=C, kernel=kernel, degree=degree, random_state=42)
    svm_model.fit(X_train, ytrain)
    y_pred = svm_model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)

    return accuracy,

# Genetic algorithm setup
creator.create("FitnessMax", base.Fitness, weights=(1.0,))
creator.create("Individual", list, fitness=creator.FitnessMax)

toolbox = base.Toolbox()
# Define the hyperparameter search space
# Here, we are tuning the 'C' (regularization parameter),
# 'kernel' (type of kernel), and 'degree' (for polynomial kernel) hyperparameters.
toolbox.register("attr_C", random.uniform, 0.1, 100)
toolbox.register("attr_kernel", random.choice, ['linear', 'poly', 'rbf', 'sigmoid'])
toolbox.register("attr_degree", random.randint, 2, 5)
toolbox.register("individual", tools.initCycle, creator.Individual,
                 (toolbox.attr_C, toolbox.attr_kernel, toolbox.attr_degree), n=1)
toolbox.register("population", tools.initRepeat, list, toolbox.individual)

toolbox.register("evaluate", evaluate_svm)
toolbox.register("mate", tools.cxTwoPoint)
toolbox.register("mutate", tools.mutFlipBit, indpb=0.1)
toolbox.register("select", tools.selTournament, tournsize=3)

def main():
    pop_size = 10
    num_generations = 5
    cx_prob = 0.5
    mut_prob = 0.2

    population = toolbox.population(n=pop_size)

    print("Starting Genetic Algorithm...")
    for gen in range(num_generations):
        offspring = algorithms.varAnd(population, toolbox, cxpb=cx_prob, mutpb=mut_prob)
        fits = toolbox.map(toolbox.evaluate, offspring)
        for fit, ind in zip(fits, offspring):
            ind.fitness.values = fit

        population = toolbox.select(offspring, k=len(population))

    best_individual = tools.selBest(population, k=1)[0]
    best_C, best_kernel, best_degree = best_individual
    print("Best Hyperparameters:")
    print("C =", best_C)
    print("Kernel =", best_kernel)
    print("Degree =", best_degree)

    best_model = SVC(C=best_C, kernel=best_kernel, degree=best_degree, random_state=42)
    best_model.fit(X_train, y_train)
    y_pred = best_model.predict(X_test)
    test_accuracy = accuracy_score(y_test, y_pred)
    print("Test Accuracy with Best Model:", test_accuracy)

if __name__ == "__main__":
    main()

4. Particle Swarm Optimization

In [None]:
import pyswarms as ps

# Define the function to optimize (fitness function)
def svm_accuracy(params):
    C = params[0]  # Regularization parameter
    gamma = params[1]  # Kernel coefficient for 'rbf', 'poly', and 'sigmoid'
    degree = int(params[2])  # Degree of the polynomial kernel (for 'poly' only)

    svm_model = svm(C=C, kernel='rbf' if degree == 2 else 'poly' if degree > 2 else 'linear',
                    gamma=gamma, degree=degree, random_state=42)
    svm_model.fit(X_train, ytrain)
    y_pred = svm_model.predict(X_test)
    accuracy = accuracy_score(ytest, y_pred)

    return 1.0 - accuracy  # Minimize 1 - accuracy to maximize accuracy

# Set up the hyperparameter search space for PSO optimization
lower_bound = [0.1, 0.01, 2]  # Lower bounds for C, gamma, degree
upper_bound = [100.0, 10.0, 5]  # Upper bounds for C, gamma, degree
bounds = (lower_bound, upper_bound)

# Set up the PSO optimizer
options = {'c1': 0.5, 'c2': 0.3, 'w': 0.9}
optimizer = ps.single.GlobalBestPSO(n_particles=10, dimensions=3, options=options, bounds=bounds)

# Perform PSO optimization
best_cost, best_params = optimizer.optimize(svm_accuracy, iters=50)

# Extract the best hyperparameters from the optimization results
best_C, best_gamma, best_degree = best_params
best_degree = int(best_degree)  # Ensure the degree is an integer

print("Best Hyperparameters:")
print("C =", best_C)
print("Gamma =", best_gamma)
print("Degree =", best_degree)

# Train the SVM model with the best hyperparameters and evaluate on the test set
best_svm_model = svm(C=best_C, kernel='rbf' if best_degree == 2 else 'poly' if best_degree > 2 else 'linear',
                     gamma=best_gamma, degree=best_degree, random_state=42)
best_svm_model.fit(X_train, y_train)
y_pred = best_svm_model.predict(X_test)
test_accuracy = accuracy_score(y_test, y_pred)
print("Test Accuracy with Best Model:", test_accuracy)