In [1]:
!pip install pyswarms

Collecting pyswarms
  Downloading pyswarms-1.3.0-py2.py3-none-any.whl.metadata (33 kB)
Downloading pyswarms-1.3.0-py2.py3-none-any.whl (104 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m104.1/104.1 kB[0m [31m3.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pyswarms
Successfully installed pyswarms-1.3.0


In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
import numpy as np
import pyswarms as ps
from sklearn.model_selection import cross_val_score

In [4]:
df = pd.read_csv('/content/pca_transformed_data_25.csv')

In [5]:
# Split into train(70%) and temp(30%)
df_train, df_temp = train_test_split(df, test_size=0.3, random_state=42, stratify=df['Test Results'])

# Split temp into val(15%) and test(15%)
df_val, df_test = train_test_split(df_temp, test_size=0.5, random_state=42, stratify=df_temp['Test Results'])

In [6]:
print(f"Original dataframe size: {len(df)}")
print(f"Training set size: {len(df_train)}")
print(f"Validation set size: {len(df_val)}")
print(f"Testing set size: {len(df_test)}")

Original dataframe size: 50000
Training set size: 35000
Validation set size: 7500
Testing set size: 7500


In [7]:
sample_size = min(10000, len(df_train))
df_train_sample = df_train.sample(n=sample_size, random_state=42)

In [8]:
X_train_sample = df_train_sample.drop('Test Results', axis=1).replace({False: 0, True: 1})
y_train_sample = df_train_sample['Test Results']

In [9]:
bounds = (np.array([-1, -4]),  # log10(0.1), log10(1e-4)
          np.array([2, 0]))    # log10(100), log10(1)

In [10]:
def fitness_function(params):
    scores = []
    for particle in params:
        C = 10 ** particle[0]
        gamma = 10 ** particle[1]
        svm = SVC(C=C, gamma=gamma, kernel='rbf')
        # 3-fold CV for speed
        score = cross_val_score(svm, X_train_sample, y_train_sample, cv=3, scoring='accuracy').mean()
        scores.append(1 - score)  # minimize inverted accuracy
    return np.array(scores)

In [11]:
# PSO setup
options = {'c1': 0.5, 'c2': 0.3, 'w': 0.9}
optimizer = ps.single.GlobalBestPSO(n_particles=15, dimensions=2, options=options, bounds=bounds)

In [12]:
# Run PSO optimization (20 iterations)
best_cost, best_pos = optimizer.optimize(fitness_function, iters=20)

2025-05-25 18:41:41,879 - pyswarms.single.global_best - INFO - Optimize for 20 iters with {'c1': 0.5, 'c2': 0.3, 'w': 0.9}
pyswarms.single.global_best: 100%|██████████|20/20, best_cost=0.181
2025-05-25 19:24:16,466 - pyswarms.single.global_best - INFO - Optimization finished | best cost: 0.18090045357265183, best pos: [ 1.61813778 -1.35547008]


In [13]:
X_train = df_train.drop('Test Results', axis=1).replace({False: 0, True: 1})
y_train = df_train['Test Results']

In [14]:
# Train final SVM model on full training data
final_C = 10 ** best_pos[0]
final_gamma = 10 ** best_pos[1]

In [15]:
final_svm = SVC(C=final_C, gamma=final_gamma, kernel='rbf')
final_svm.fit(X_train, y_train)

In [16]:
print("Final SVM model trained on full training data.")

Final SVM model trained on full training data.


In [17]:
# Optionally prepare validation and test sets (for later use)
X_val = df_val.drop('Test Results', axis=1).replace({False: 0, True: 1})
y_val = df_val['Test Results']

X_test = df_test.drop('Test Results', axis=1).replace({False: 0, True: 1})
y_test = df_test['Test Results']

In [18]:
#print the accuracy
y_pred = final_svm.predict(X_val)
accuracy = accuracy_score(y_val, y_pred)
print(f"Validation Accuracy: {accuracy}")

Validation Accuracy: 0.8233333333333334


In [19]:
#Actual Accuracy
y_pred = final_svm.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Test Accuracy: {accuracy}")

Test Accuracy: 0.8226666666666667


In [20]:
#Accuracy of training
y_pred = final_svm.predict(X_train)
accuracy = accuracy_score(y_train, y_pred)
print(f"Training Accuracy: {accuracy}")

Training Accuracy: 0.8402
