### Inspiration:

https://scikit-learn.org/1.5/auto_examples/classification/plot_classifier_comparison.html#sphx-glr-auto-examples-classification-plot-classifier-comparison-py

In [43]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import random

import matplotlib.pyplot as plt

from sklearn.model_selection import KFold
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import PolynomialFeatures

In [70]:
def load_data(feature_size: int):
    X = np.load('../Datasets/kryptonite-%s-X.npy'%(feature_size))
    y = np.load('../Datasets/kryptonite-%s-y.npy'%(feature_size))

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

    scaler = StandardScaler().fit(X_train)

    X_train = scaler.transform(X_train)
    X_test = scaler.transform(X_test)

    X_train = np.where(X_train >= 0, np.float32(1.0), np.float32(-1.0))
    X_test = np.where(X_test >= 0, np.float32(1.0), np.float32(-1.0))

    X_train = PolynomialFeatures(degree=6).fit_transform(X_train)
    X_test = PolynomialFeatures(degree=6).fit_transform(X_test)

    return X_train, [int(y) for y in y_train], X_test, [int(y) for y in y_test], X_test.shape[1]

### Kryptonite-9

In [52]:
feature_size = 9
baseline_accuracy = 0.95

In [53]:
X_train, y_train, X_test, y_test, feature_size = load_data(feature_size)

In [54]:
clf = RandomForestClassifier(min_samples_leaf=10).fit(X_train, y_train)

In [55]:
neigh = KNeighborsClassifier(n_neighbors=7).fit(X_train, y_train)

In [56]:
svm = SVC(gamma=2).fit(X_train, y_train)

In [57]:
print("Random Forest: ", clf.score(X_test, y_test))
print("KNN: ", neigh.score(X_test, y_test))
print("SVM: ", svm.score(X_test, y_test))

Random Forest:  0.9625
KNN:  0.9625
SVM:  0.9625


### Kryptonite-12

In [58]:
feature_size = 12
baseline_accuracy = 0.925

In [59]:
X_train, y_train, X_test, y_test, feature_size = load_data(feature_size)

In [60]:
clf = RandomForestClassifier(min_samples_leaf=10).fit(X_train, y_train)

In [61]:
neigh = KNeighborsClassifier(n_neighbors=3, p=7, weights="distance").fit(X_train, y_train)

In [62]:
svm = SVC(gamma=2).fit(X_train, y_train)

In [63]:
print("Random Forest: ", clf.score(X_test, y_test))
print("KNN: ", neigh.score(X_test, y_test))
print("SVM: ", svm.score(X_test, y_test))

Random Forest:  0.9627083333333334
KNN:  0.95
SVM:  0.9516666666666667


### Kryptonite-15

In [71]:
feature_size = 15
baseline_accuracy = 0.9
X_train, y_train, X_test, y_test, feature_size = load_data(feature_size)

In [72]:
clf = RandomForestClassifier(min_samples_leaf=10).fit(X_train, y_train)

In [73]:
print("Random Forest: ", clf.score(X_test, y_test))

Random Forest:  0.9685


In [38]:
neigh = KNeighborsClassifier(n_neighbors=3, p=7, weights="distance").fit(X_train, y_train)

In [41]:
svc = SVC(gamma=15).fit(X_train, y_train)

In [None]:
# params = {
#     'C': [1, 2, 5, 10],               # Regularization parameter
#     'gamma': ['scale'],     # Kernel coefficient ('auto' is redundant since 'scale' generally works better with RBF)
#     'tol': [1e-3, 1e-4],              # Tolerance for stopping criteria
#     'shrinking': [True, False],       # Shrinking heuristic may impact performance
#     'cache_size': [200, 500],         # Kernel cache size, larger may be better for larger datasets
# }

params = {
    'C': [2, 5],
    'gamma': [2, 7, 15, 20, 'scale'],
    'tol': [1e-3],
    'kernel': ['rbf', 'poly', 'sigmoid'],
    'cache_size': [500]
}

n_classifier = SVC()

grid = GridSearchCV(n_classifier, params, cv=5, n_jobs=-1, verbose=2)
grid.fit(X_train, y_train)

In [42]:
print("Random Forest: ", clf.score(X_test, y_test))
print("KNN: ", neigh.score(X_test, y_test))
print("SVM: ", svc.score(X_test, y_test))

Random Forest:  0.49033333333333334
KNN:  0.6305
SVM:  0.7228333333333333


### Kryptonite-18

In [None]:
feature_size = 18
baseline_accuracy = 0.875

In [75]:
X_train, y_train, X_test, y_test, feature_size = load_data(feature_size)

In [76]:
clf = RandomForestClassifier(min_samples_leaf=10).fit(X_train, y_train)

In [77]:
print("Random Forest: ", clf.score(X_test, y_test))

Random Forest:  0.9702777777777778
