In [6]:
from sklearn.svm import SVC, LinearSVC
from sklearn.metrics import classification_report
import pickle
import numpy as np

In [7]:
with open("h0_kernel_train.pkl", "rb") as f:
    h0_kernel_train = pickle.load(f)
with open("h0_kernel_test.pkl", "rb") as f:
    h0_kernel_test = pickle.load(f)
with open("h1_kernel_train.pkl", "rb") as f:
    h1_kernel_train = pickle.load(f)
with open("h1_kernel_test.pkl", "rb") as f:
    h1_kernel_test = pickle.load(f)
with open("h0_complete_kernel_train.pkl", "rb") as f:
    h0_complete_kernel_train = pickle.load(f)
with open("h0_complete_kernel_test.pkl", "rb") as f:
    h0_complete_kernel_test = pickle.load(f)
with open("train_y.pkl", "rb") as f:
    y_train = pickle.load(f)
with open("test_y.pkl", "rb") as f:
    y_test = pickle.load(f)
with open("train_h0sr.pkl", "rb") as f:
    train_h0sr = pickle.load(f)
with open("test_h0sr.pkl", "rb") as f:
    test_h0sr = pickle.load(f)
with open("train_h1sr.pkl", "rb") as f:
    train_h1sr = pickle.load(f)
with open("test_h1sr.pkl", "rb") as f:
    test_h1sr = pickle.load(f)
with open("train_h0sr_complete.pkl", "rb") as f:
    train_h0sr_complete = pickle.load(f)
with open("test_h0sr_complete.pkl", "rb") as f:
    test_h0sr_complete = pickle.load(f)
with open("X_train.pkl", "rb") as f:
    X_train = pickle.load(f)
with open("X_test.pkl", "rb") as f:
    X_test = pickle.load(f)



In [8]:
print(np.max(h0_kernel_train))
print(np.max(h1_kernel_train))
print(np.max(h0_complete_kernel_train))

112344.04904499945
302.36971941189574
120967940.45243345


In [9]:
scale_h0 = 1
scale_h1 = 1
scale_h0_complete = 100000

print(scale_h0)
print(scale_h1)
print(scale_h0_complete)

1
1
100000


# H0 complete (clustering) stable rank

In [10]:
# Fit SVC on the precomputed train kernel

# VALID KERNEL TRANSFORMATION for stability
h0_complete_kernel_train_scaled = h0_complete_kernel_train / scale_h0_complete 
h0_complete_kernel_test_scaled = h0_complete_kernel_test / scale_h0_complete

svc = SVC(kernel='precomputed')
svc.fit(h0_complete_kernel_train_scaled, y_train)
# Predict using the precomputed test kernel
y_pred = svc.predict(h0_complete_kernel_test_scaled)
# Classification report
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.78      0.80      0.79       200
           1       0.79      0.78      0.79       200

    accuracy                           0.79       400
   macro avg       0.79      0.79      0.79       400
weighted avg       0.79      0.79      0.79       400



# H0 stable rank

In [11]:
svc = SVC(kernel='precomputed')

h0_kernel_train_scaled = h0_kernel_train / scale_h0
h0_kernel_test_scaled = h0_kernel_test / scale_h0

svc.fit(h0_kernel_train_scaled, y_train)
# Predict using the precomputed test kernel
y_pred = svc.predict(h0_kernel_test_scaled)
# Classification report
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.69      0.81      0.75       200
           1       0.77      0.65      0.70       200

    accuracy                           0.72       400
   macro avg       0.73      0.73      0.72       400
weighted avg       0.73      0.72      0.72       400



# H1 stable rank

In [12]:
h1_kernel_train_scaled = h1_kernel_train / scale_h1
h1_kernel_test_scaled = h1_kernel_test / scale_h1
svc = SVC(kernel='precomputed')
svc.fit(h1_kernel_train_scaled, y_train)
# Predict using the precomputed test kernel
y_pred = svc.predict(h1_kernel_test_scaled)
# Classification report
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.75      0.74      0.75       200
           1       0.75      0.75      0.75       200

    accuracy                           0.75       400
   macro avg       0.75      0.75      0.75       400
weighted avg       0.75      0.75      0.75       400



# Triple Composite Kernel

In [17]:
composite_kernel_train = h0_complete_kernel_train_scaled + h1_kernel_train_scaled + h0_kernel_train_scaled
composite_kernel_test = h0_complete_kernel_test_scaled + h1_kernel_test_scaled + h0_kernel_test_scaled


# Fit SVC on the precomputed train kernel
svc = SVC(kernel='precomputed')
svc.fit(composite_kernel_train, y_train)
# Predict using the precomputed test kernel
y_pred = svc.predict(composite_kernel_test)
# Classification report
print(classification_report(y_test, y_pred))


              precision    recall  f1-score   support

           0       0.83      0.78      0.80       200
           1       0.79      0.84      0.82       200

    accuracy                           0.81       400
   macro avg       0.81      0.81      0.81       400
weighted avg       0.81      0.81      0.81       400



# Baseline

In [14]:
# LINEAR KERNEL (NON OPTIMIZED BASELINE)

X_train_data = np.asarray([np.asarray(X.flatten()) for X in X_train], dtype=np.float32) # 10000x257
X_test_data = np.asarray([np.asarray(X.flatten()) for X in X_test], dtype=np.float32) # 10000x257

baseline_train_kernel = X_train_data @ X_train_data.T
baseline_test_kernel = X_test_data @ X_train_data.T

In [15]:
# Fit SVC on the precomputed train kernel
svc = SVC(kernel='precomputed')
svc.fit(baseline_train_kernel, y_train)
# Predict using the precomputed test kernel
y_pred = svc.predict(baseline_test_kernel)
# Classification report
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.74      0.82      0.78       200
           1       0.80      0.70      0.75       200

    accuracy                           0.76       400
   macro avg       0.77      0.76      0.76       400
weighted avg       0.77      0.76      0.76       400



In [11]:
K1 = h0_complete_kernel_train_scaled
K2 = 1000*h1_kernel_train_scaled
K3 = h0_kernel_train_scaled
y = y_train
y_test = y_test
K1_test = h0_complete_kernel_test_scaled
K2_test = 1000*h1_kernel_test_scaled
K3_test = h0_kernel_test_scaled


In [15]:
# Cross-validate (w1,w2,w3,C) for a precomputed-kernel SVM, then evaluate on test kernels.
# You must already have:
#   K1, K2, K3                 : (n_train, n_train) numpy arrays
#   K1_test, K2_test, K3_test  : (n_test, n_train) numpy arrays
#   y                          : (n_train,) labels
#   y_test                     : (n_test,) labels

import numpy as np
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV, StratifiedKFold
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix

class MultiKernelSVC(BaseEstimator, ClassifierMixin):
    def __init__(self, K1, K2, K3, weights=(1.0, 0.0, 0.0), C=1.0, normalize_weights=True, random_state=0):
        self.K1 = K1
        self.K2 = K2
        self.K3 = K3
        self.weights = weights
        self.C = C
        self.normalize_weights = normalize_weights
        self.random_state = random_state

    def _norm_weights(self, w):
        w = np.asarray(w, float)
        if self.normalize_weights:
            s = w.sum()
            if s > 0:
                w = w / s
        return w

    def _combine(self, rows, cols, w):
        return (w[0] * self.K1[np.ix_(rows, cols)]
              + w[1] * self.K2[np.ix_(rows, cols)]
              + w[2] * self.K3[np.ix_(rows, cols)])

    def fit(self, X, y):
        idx = np.asarray(X, int).ravel()
        w = self._norm_weights(self.weights)
        K = self._combine(idx, idx, w)
        self._svc = SVC(C=self.C, kernel='precomputed', random_state=self.random_state)
        self._svc.fit(K, y)
        self.train_idx_ = idx
        self.weights_ = w
        return self

    def predict(self, X):
        idx = np.asarray(X, int).ravel()
        K = self._combine(idx, self.train_idx_, self.weights_)
        return self._svc.predict(K)

    def decision_function(self, X):
        idx = np.asarray(X, int).ravel()
        K = self._combine(idx, self.train_idx_, self.weights_)
        return self._svc.decision_function(K)

    def predict_from_test_kernels(self, K1_test, K2_test, K3_test):
        w = self.weights_
        K = w[0]*K1_test + w[1]*K2_test + w[2]*K3_test
        return self._svc.predict(K)

# ---- CV setup ----
n = K1.shape[0]
indices = np.arange(n)

# convex weights grid (sum to 1)
STEP = 0.25
grid_weights = []
vals = np.arange(0.0, 1.0 + 1e-12, STEP)
for a in vals:
    for b in vals:
        c = 1.0 - a - b
        if -1e-9 <= c <= 1.0 + 1e-9:
            c = float(max(0.0, min(1.0, c)))
            if abs((a + b + c) - 1.0) <= 1e-9:
                grid_weights.append((float(a), float(b), float(c)))

param_grid = {
    "weights": grid_weights,
    "C": [0.1, 0.5, 1.0],
}

cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=0)
est = MultiKernelSVC(K1=K1, K2=K2, K3=K3, normalize_weights=True, random_state=0)

gs = GridSearchCV(
    estimator=est,
    param_grid=param_grid,
    cv=cv,
    scoring="accuracy",
    n_jobs=-1,
    refit=True,
    verbose=10,   # <- set high verbosity
)

# ---- Fit & select ----
gs.fit(indices, y)
print("Best params:", gs.best_params_)
print("Best CV accuracy: %.4f" % gs.best_score_)

# ---- Test evaluation ----
best_model = gs.best_estimator_
y_test_pred = best_model.predict_from_test_kernels(K1_test, K2_test, K3_test)

print("\nTest accuracy: %.4f" % accuracy_score(y_test, y_test_pred))
print("\nClassification report:\n")
print(classification_report(y_test, y_test_pred))
print("Confusion matrix:")
print(confusion_matrix(y_test, y_test_pred))


Fitting 5 folds for each of 45 candidates, totalling 225 fits
[CV 1/5; 1/45] START C=0.1, weights=(0.0, 0.0, 1.0).............................
[CV 2/5; 1/45] START C=0.1, weights=(0.0, 0.0, 1.0).............................
[CV 1/5; 2/45] START C=0.1, weights=(0.0, 0.25, 0.75)...........................
[CV 5/5; 1/45] START C=0.1, weights=(0.0, 0.0, 1.0).............................
[CV 3/5; 1/45] START C=0.1, weights=(0.0, 0.0, 1.0).............................
[CV 3/5; 2/45] START C=0.1, weights=(0.0, 0.25, 0.75)...........................
[CV 2/5; 2/45] START C=0.1, weights=(0.0, 0.25, 0.75)...........................
[CV 5/5; 1/45] END C=0.1, weights=(0.0, 0.0, 1.0);, score=0.704 total time=   0.5s
[CV 4/5; 1/45] START C=0.1, weights=(0.0, 0.0, 1.0).............................
[CV 1/5; 2/45] END C=0.1, weights=(0.0, 0.25, 0.75);, score=0.783 total time=   0.6s
[CV 4/5; 2/45] START C=0.1, weights=(0.0, 0.25, 0.75)...........................
[CV 5/5; 2/45] START C=0.1, weights=(0.0,