### Inspiration:

https://scikit-learn.org/1.5/auto_examples/classification/plot_classifier_comparison.html#sphx-glr-auto-examples-classification-plot-classifier-comparison-py

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import random

import matplotlib.pyplot as plt

from sklearn.model_selection import KFold
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV

In [2]:
def load_data(feature_size: int):
    X = np.load('../Datasets/kryptonite-%s-X.npy'%(feature_size))
    y = np.load('../Datasets/kryptonite-%s-y.npy'%(feature_size))

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

    scaler = StandardScaler().fit(X_train)

    X_train = scaler.transform(X_train)
    X_test = scaler.transform(X_test)

    return X_train, [int(y) for y in y_train], X_test, [int(y) for y in y_test]

### Kryptonite-9

In [3]:
feature_size = 9
baseline_accuracy = 0.95

In [4]:
X_train, y_train, X_test, y_test = load_data(feature_size)

In [5]:
clf = RandomForestClassifier(min_samples_leaf=10).fit(X_train, y_train)

In [6]:
neigh = KNeighborsClassifier(n_neighbors=7).fit(X_train, y_train)

In [7]:
svm = SVC(gamma=2).fit(X_train, y_train)

In [8]:
print("Random Forest: ", clf.score(X_test, y_test))
print("KNN: ", neigh.score(X_test, y_test))
print("SVM: ", svm.score(X_test, y_test))

Random Forest:  0.5113888888888889
KNN:  0.9591666666666666
SVM:  0.9591666666666666


### Kryptonite-12

In [9]:
feature_size = 12
baseline_accuracy = 0.925

In [10]:
X_train, y_train, X_test, y_test = load_data(feature_size)

In [11]:
clf = RandomForestClassifier(min_samples_leaf=10).fit(X_train, y_train)

In [12]:
neigh = KNeighborsClassifier(n_neighbors=3, p=7, weights="distance").fit(X_train, y_train)

In [13]:
svm = SVC(gamma=2).fit(X_train, y_train)

In [14]:
print("Random Forest: ", clf.score(X_test, y_test))
print("KNN: ", neigh.score(X_test, y_test))
print("SVM: ", svm.score(X_test, y_test))

Random Forest:  0.501875
KNN:  0.943125
SVM:  0.948125


### Kryptonite-15

In [15]:
feature_size = 15
baseline_accuracy = 0.9
X_train, y_train, X_test, y_test = load_data(feature_size)

In [16]:
clf = RandomForestClassifier(min_samples_leaf=10).fit(X_train, y_train)

In [17]:
neigh = KNeighborsClassifier(n_neighbors=3, p=7, weights="distance").fit(X_train, y_train)

In [None]:
params = {
    'C': [1, 2, 5, 10],               # Regularization parameter
    'gamma': ['scale'],     # Kernel coefficient ('auto' is redundant since 'scale' generally works better with RBF)
    'tol': [1e-3, 1e-4],              # Tolerance for stopping criteria
    'shrinking': [True, False],       # Shrinking heuristic may impact performance
    'cache_size': [200, 500],         # Kernel cache size, larger may be better for larger datasets
}

n_classifier = SVC()

grid = GridSearchCV(n_classifier, params, cv=5, n_jobs=-2, verbose=2)
grid.fit(X_train, y_train)

Fitting 5 folds for each of 32 candidates, totalling 160 fits


In [74]:
print("Random Forest: ", clf.score(X_test, y_test))
print("KNN: ", neigh.score(X_test, y_test))
print("SVM: ", grid.score(X_test, y_test))

Random Forest:  0.49516666666666664
KNN:  0.6128333333333333
SVM:  0.7223333333333334


Fitting 5 folds for each of 8 candidates, totalling 40 fits


KeyboardInterrupt: 