In [1]:
from sklearn.svm import SVC
from sklearn.metrics import classification_report
import pickle
import numpy as np

In [15]:
with open("h0_kernel_train.pkl", "rb") as f:
    h0_kernel_train = pickle.load(f)
with open("h0_kernel_test.pkl", "rb") as f:
    h0_kernel_test = pickle.load(f)
with open("h1_kernel_train.pkl", "rb") as f:
    h1_kernel_train = pickle.load(f)
with open("h1_kernel_test.pkl", "rb") as f:
    h1_kernel_test = pickle.load(f)
with open("h0_complete_kernel_train.pkl", "rb") as f:
    h0_complete_kernel_train = pickle.load(f)
with open("h0_complete_kernel_test.pkl", "rb") as f:
    h0_complete_kernel_test = pickle.load(f)
with open("train_y.pkl", "rb") as f:
    y_train = pickle.load(f)
with open("test_y.pkl", "rb") as f:
    y_test = pickle.load(f)
with open("train_h0sr.pkl", "rb") as f:
    train_h0sr = pickle.load(f)
with open("test_h0sr.pkl", "rb") as f:
    test_h0sr = pickle.load(f)
with open("train_h1sr.pkl", "rb") as f:
    train_h1sr = pickle.load(f)
with open("test_h1sr.pkl", "rb") as f:
    test_h1sr = pickle.load(f)
with open("train_h0sr_complete.pkl", "rb") as f:
    train_h0sr_complete = pickle.load(f)
with open("test_h0sr_complete.pkl", "rb") as f:
    test_h0sr_complete = pickle.load(f)
with open("X_train.pkl", "rb") as f:
    X_train = pickle.load(f)
with open("X_test.pkl", "rb") as f:
    X_test = pickle.load(f)



In [None]:
print(np.max(h0_kernel_train))
print(np.max(h1_kernel_train))
print(np.max(h0_complete_kernel_train))

112344.04904499945
302.36971941189574
120967940.45243345


In [34]:
scale_h0 = 1
scale_h1 = 1
scale_h0_complete = 100000

print(scale_h0)
print(scale_h1)
print(scale_h0_complete)

1
1
100000


# H0 complete (clustering) stable rank

In [35]:
# Fit SVC on the precomputed train kernel

# VALID KERNEL TRANSFORMATION for stability
h0_complete_kernel_train_scaled = h0_complete_kernel_train / scale_h0_complete 
h0_complete_kernel_test_scaled = h0_complete_kernel_test / scale_h0_complete

svc = SVC(kernel='precomputed')
svc.fit(h0_complete_kernel_train_scaled, y_train)
# Predict using the precomputed test kernel
y_pred = svc.predict(h0_complete_kernel_test_scaled)
# Classification report
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.78      0.80      0.79       200
           1       0.79      0.78      0.79       200

    accuracy                           0.79       400
   macro avg       0.79      0.79      0.79       400
weighted avg       0.79      0.79      0.79       400



# H0 stable rank

In [36]:
svc = SVC(kernel='precomputed')

h0_kernel_train_scaled = h0_kernel_train / scale_h0
h0_kernel_test_scaled = h0_kernel_test / scale_h0

svc.fit(h0_kernel_train_scaled, y_train)
# Predict using the precomputed test kernel
y_pred = svc.predict(h0_kernel_test_scaled)
# Classification report
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.69      0.81      0.75       200
           1       0.77      0.65      0.70       200

    accuracy                           0.72       400
   macro avg       0.73      0.73      0.72       400
weighted avg       0.73      0.72      0.72       400



# H1 stable rank

In [37]:
h1_kernel_train_scaled = h1_kernel_train / scale_h1
h1_kernel_test_scaled = h1_kernel_test / scale_h1
svc = SVC(kernel='precomputed')
svc.fit(h1_kernel_train_scaled, y_train)
# Predict using the precomputed test kernel
y_pred = svc.predict(h1_kernel_test_scaled)
# Classification report
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.75      0.74      0.75       200
           1       0.75      0.75      0.75       200

    accuracy                           0.75       400
   macro avg       0.75      0.75      0.75       400
weighted avg       0.75      0.75      0.75       400



# Triple Composite Kernel

In [44]:
composite_kernel_train = h0_complete_kernel_train_scaled + 1000*h1_kernel_train_scaled + h0_kernel_train_scaled
composite_kernel_test = h0_complete_kernel_test_scaled + 1000*h1_kernel_test_scaled + h0_kernel_test_scaled


# Fit SVC on the precomputed train kernel
svc = SVC(kernel='precomputed')
svc.fit(composite_kernel_train, y_train)
# Predict using the precomputed test kernel
y_pred = svc.predict(composite_kernel_test)
# Classification report
print(classification_report(y_test, y_pred))


              precision    recall  f1-score   support

           0       0.83      0.78      0.80       200
           1       0.79      0.84      0.82       200

    accuracy                           0.81       400
   macro avg       0.81      0.81      0.81       400
weighted avg       0.81      0.81      0.81       400



# Baseline

In [None]:
# LINEAR KERNEL (NON OPTIMIZED BASELINE)

X_train_data = np.asarray([np.asarray(X.flatten()) for X in X_train], dtype=np.float32) # 10000x257
X_test_data = np.asarray([np.asarray(X.flatten()) for X in X_test], dtype=np.float32) # 10000x257

baseline_train_kernel = X_train_data @ X_train_data.T
baseline_test_kernel = X_test_data @ X_train_data.T

In [11]:
# Fit SVC on the precomputed train kernel
svc = SVC(kernel='precomputed')
svc.fit(baseline_train_kernel, y_train)
# Predict using the precomputed test kernel
y_pred = svc.predict(baseline_test_kernel)
# Classification report
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.74      0.82      0.78       200
           1       0.80      0.70      0.75       200

    accuracy                           0.76       400
   macro avg       0.77      0.76      0.76       400
weighted avg       0.77      0.76      0.76       400

