# Support Vector Machine approach

In [18]:
import pandas as pd
import numpy as np
import torch
from torch.utils.data import DataLoader, TensorDataset, random_split
import torch.nn as nn
import torch.optim as optim

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

In [55]:
X = np.load("Datasets/kryptonite-15-X.npy")
y = np.load("Datasets/kryptonite-15-Y.npy")

print(f"Shape of the dataset: {X.shape}")
print(f"Shape of the dataset: {y.shape}")


Shape of the dataset: (30000, 15)
Shape of the dataset: (30000,)


In [56]:
# Split into train and test sets (80% train, 20% test) with stratification
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Scale features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

## SVM on n = 9

In [49]:
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report, accuracy_score

# Define parameter grid for grid search
param_grid = {
    'C': [10],
    'gamma': [1],
    'kernel': ['rbf']
}

# Perform grid search with 5-fold cross-validation
grid_search = GridSearchCV(SVC(class_weight='balanced', random_state=42), param_grid, cv=5, scoring='accuracy', verbose=2)
grid_search.fit(X_train, y_train)

# Print the best parameters and best cross-validation score
print("Best Parameters:", grid_search.best_params_)
print(f"Best Cross-Validation Score: {grid_search.best_score_:.4f}")

# Train the best model on the training data
best_model = grid_search.best_estimator_
best_model.fit(X_train, y_train)

# Predict on the test set
y_pred = best_model.predict(X_test)

# Evaluate the best model
accuracy = accuracy_score(y_test, y_pred)
print(f"Test Accuracy: {accuracy:.4f}")
print("Classification Report:")
print(classification_report(y_test, y_pred))

Fitting 5 folds for each of 1 candidates, totalling 5 fits
[CV] END ..........................C=10, gamma=1, kernel=rbf; total time=   1.7s
[CV] END ..........................C=10, gamma=1, kernel=rbf; total time=   1.6s
[CV] END ..........................C=10, gamma=1, kernel=rbf; total time=   1.6s
[CV] END ..........................C=10, gamma=1, kernel=rbf; total time=   1.5s
[CV] END ..........................C=10, gamma=1, kernel=rbf; total time=   1.6s
Best Parameters: {'C': 10, 'gamma': 1, 'kernel': 'rbf'}
Best Cross-Validation Score: 0.9495
Test Accuracy: 0.9475
Classification Report:
              precision    recall  f1-score   support

           0       0.95      0.95      0.95      1793
           1       0.95      0.95      0.95      1807

    accuracy                           0.95      3600
   macro avg       0.95      0.95      0.95      3600
weighted avg       0.95      0.95      0.95      3600



## SVM on n=12

In [52]:
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report, accuracy_score

# Define parameter grid for grid search
param_grid = {
    'C': [10],
    'gamma': [1],
    'kernel': ['rbf']
}

# Perform grid search with 5-fold cross-validation
grid_search = GridSearchCV(SVC(class_weight='balanced', random_state=42), param_grid, cv=5, scoring='accuracy', verbose=2)
grid_search.fit(X_train, y_train)

# Print the best parameters and best cross-validation score
print("Best Parameters:", grid_search.best_params_)
print(f"Best Cross-Validation Score: {grid_search.best_score_:.4f}")

# Train the best model on the training data
best_model = grid_search.best_estimator_
best_model.fit(X_train, y_train)

# Predict on the test set
y_pred = best_model.predict(X_test)

# Evaluate the best model
accuracy = accuracy_score(y_test, y_pred)
print(f"Test Accuracy: {accuracy:.4f}")
print("Classification Report:")
print(classification_report(y_test, y_pred))

Fitting 5 folds for each of 1 candidates, totalling 5 fits
[CV] END ..........................C=10, gamma=1, kernel=rbf; total time=  17.5s
[CV] END ..........................C=10, gamma=1, kernel=rbf; total time=  17.6s
[CV] END ..........................C=10, gamma=1, kernel=rbf; total time=  17.4s
[CV] END ..........................C=10, gamma=1, kernel=rbf; total time=  17.2s
[CV] END ..........................C=10, gamma=1, kernel=rbf; total time=  17.5s
Best Parameters: {'C': 10, 'gamma': 1, 'kernel': 'rbf'}
Best Cross-Validation Score: 0.9135
Test Accuracy: 0.9294
Classification Report:
              precision    recall  f1-score   support

           0       0.93      0.93      0.93      2362
           1       0.93      0.93      0.93      2438

    accuracy                           0.93      4800
   macro avg       0.93      0.93      0.93      4800
weighted avg       0.93      0.93      0.93      4800



## SVM on n=15

In [60]:
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report, accuracy_score

# Define parameter grid for grid search
param_grid = {
    'C': [500],
    'gamma': [1],
    'kernel': ['rbf']
}

# Perform grid search with 5-fold cross-validation
grid_search = GridSearchCV(SVC(class_weight='balanced', random_state=42), param_grid, cv=5, scoring='accuracy', verbose=2)
grid_search.fit(X_train, y_train)

# Print the best parameters and best cross-validation score
print("Best Parameters:", grid_search.best_params_)
print(f"Best Cross-Validation Score: {grid_search.best_score_:.4f}")

# Train the best model on the training data
best_model = grid_search.best_estimator_
best_model.fit(X_train, y_train)

# Predict on the test set
y_pred = best_model.predict(X_test)

# Evaluate the best model
accuracy = accuracy_score(y_test, y_pred)
print(f"Test Accuracy: {accuracy:.4f}")
print("Classification Report:")
print(classification_report(y_test, y_pred))

Fitting 5 folds for each of 1 candidates, totalling 5 fits
[CV] END .........................C=500, gamma=1, kernel=rbf; total time=  31.5s
[CV] END .........................C=500, gamma=1, kernel=rbf; total time=  30.8s
[CV] END .........................C=500, gamma=1, kernel=rbf; total time=  31.2s
[CV] END .........................C=500, gamma=1, kernel=rbf; total time=  31.1s
[CV] END .........................C=500, gamma=1, kernel=rbf; total time=  31.4s
Best Parameters: {'C': 500, 'gamma': 1, 'kernel': 'rbf'}
Best Cross-Validation Score: 0.5183
Test Accuracy: 0.5600
Classification Report:
              precision    recall  f1-score   support

           0       0.56      0.55      0.55      2995
           1       0.56      0.57      0.57      3005

    accuracy                           0.56      6000
   macro avg       0.56      0.56      0.56      6000
weighted avg       0.56      0.56      0.56      6000



## SVM on n=18

## SVM on n=24

## SVM on n=30

## SVM on n=45