In [2]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from sklearn.svm import SVC
from sklearn.metrics import (
    accuracy_score,
    roc_auc_score,
    confusion_matrix,
    roc_curve,
    f1_score,
    precision_score,
    recall_score
)


kernel_matrix_train = np.load("kernel_matrix_train.npy")
kernel_matrix_test = np.load("kernel_matrix_test.npy")

# Data paths
TRAIN_FILE = '../Data/kernel_estimation_train.csv'
TEST_FILE = '../Data/kernel_estimation_test.csv'
VAL_FILE = '../Data/kernel_estimation_val.csv'

# Load training data
df_train = pd.read_csv(TRAIN_FILE)
# Load test data
df_test = pd.read_csv(TEST_FILE)
# Load validation data
df_val = pd.read_csv(VAL_FILE)

df_label_train = pd.read_csv(TRAIN_FILE)["ef_class"]
df_label_train = df_label_train[df_label_train.isin([2,3])]
df_label_test = pd.read_csv(TEST_FILE)["ef_class"]
df_label_test = df_label_test[df_label_test.isin([2,3])]
df_label_val = pd.read_csv(VAL_FILE)["ef_class"]
df_label_val = df_label_val[df_label_val.isin([2,3])]

df_train = df_train[df_train.columns[:-2]]
df_test = df_test[df_test.columns[:-2]]
df_val = df_val[df_val.columns[:-2]]
print(df_label_test.head())

df_label_train = (df_label_train.to_numpy() == 3).astype(int)
df_label_test = (df_label_test == 3).astype(int)
df_label_val = (df_label_val == 3).astype(int)

print(df_label_train)


0    2
1    2
2    2
3    2
4    3
Name: ef_class, dtype: int64
[0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 1 1 1 1 1 0 0 1 0 1 1 0 0 0 0 0 0 0 0 0 0 0
 0 1 0 0 0 1 0 0 0 0 0 0 1 1 0 1 0 0 0 1 0 1 0 0 1 0 0 0 0 1 0 0]


In [5]:
#SVC Classifier
svc = SVC(kernel='precomputed', probability=True)
svc.fit(kernel_matrix_train, df_label_train)
y_test = df_label_test
# Predict and probabilities
y_pred = svc.predict(kernel_matrix_test)
y_prob = svc.predict_proba(kernel_matrix_test)[:, 1]

# Metrics
acc = accuracy_score(y_test, y_pred)
auc = roc_auc_score(y_test, y_prob)
cm = confusion_matrix(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)

# ROC curve data
fpr, tpr, thresholds = roc_curve(y_test, y_prob)

print("Accuracy:", acc)
print("AUC:", auc)
print("Confusion Matrix:\n", cm)
print("F1:", f1)
print("Precision:", precision)
print("Recall:", recall)

Accuracy: 0.7619047619047619
AUC: 0.6625
Confusion Matrix:
 [[16  0]
 [ 5  0]]
F1: 0.0
Precision: 0.0
Recall: 0.0


  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


In [8]:
import numpy as np, pandas as pd
from sklearn.svm import SVC
from sklearn.metrics import (accuracy_score, roc_auc_score, confusion_matrix,
                             f1_score, precision_score, recall_score, roc_curve)

# --- Load kernels (built in CSV row order) ---
K_train_full = np.load("kernel_matrix_train.npy")   # (n_train_all, n_train_all)
K_test_full  = np.load("kernel_matrix_test.npy")    # (n_test_all,  n_train_all)

# --- Load labels (keep your mapping: 2->0, 3->1) ---
TRAIN_FILE = '../Data/kernel_estimation_train.csv'
TEST_FILE  = '../Data/kernel_estimation_test.csv'
VAL_FILE   = '../Data/kernel_estimation_val.csv'

ef_train = pd.read_csv(TRAIN_FILE)["ef_class"]
ef_test  = pd.read_csv(TEST_FILE)["ef_class"]
ef_val   = pd.read_csv(VAL_FILE)["ef_class"]

mask_train = ef_train.isin([2,3]).values
mask_test  = ef_test.isin([2,3]).values
mask_val   = ef_val.isin([2,3]).values

y_train = (ef_train[mask_train].to_numpy() == 3).astype(int)
y_test  = (ef_test[mask_test].to_numpy()  == 3).astype(int)
y_val   = (ef_val[mask_val].to_numpy()   == 3).astype(int)

# --- Slice kernels with the SAME masks/order ---
K_train = K_train_full[np.ix_(mask_train, mask_train)]
K_test  = K_test_full[np.ix_(mask_test,  mask_train)]

# --- Sanity checks ---
assert K_train.shape[0] == K_train.shape[1] == y_train.shape[0]
assert K_test.shape[1] == K_train.shape[0]
assert K_test.shape[0] == y_test.shape[0]
print("Class balance (train):", np.bincount(y_train))
print("Symmetry (train):", np.allclose(K_train, K_train.T, atol=1e-8))
print("Diag≈1 before norm:", np.allclose(np.diag(K_train), 1.0, atol=1e-2))

# --- Normalize the Gram matrix to stabilize scale ---
d = np.sqrt(np.clip(np.diag(K_train), 1e-12, None))
K_train = K_train / (d[:, None] * d[None, :])
K_test  = K_test / d[None, :]  # column-normalize by train norms

# --- Tiny jitter to improve conditioning (optional) ---
K_train += 1e-10 * np.eye(K_train.shape[0])

# --- Train SVC ---
svc = SVC(kernel='precomputed', probability=True, class_weight='balanced', C=1.0, random_state=0)
svc.fit(K_train, y_train)

# --- Evaluate TRAIN too (to catch trivial collapse early) ---
p_train = svc.predict_proba(K_train)[:,1]; yhat_train = (p_train >= 0.5).astype(int)
print("TRAIN acc:", accuracy_score(y_train, yhat_train))

# --- Test ---
p_test = svc.predict_proba(K_test)[:,1]; yhat_test = (p_test >= 0.5).astype(int)

acc = accuracy_score(y_test, yhat_test)
auc = roc_auc_score(y_test, p_test)
cm  = confusion_matrix(y_test, yhat_test)
f1  = f1_score(y_test, yhat_test)
precision = precision_score(y_test, yhat_test)
recall    = recall_score(y_test, yhat_test)
fpr, tpr, thresholds = roc_curve(y_test, p_test)

print("TEST acc:", acc)
print("AUC:", auc)
print("Confusion Matrix:\n", cm)
print("F1:", f1, "Precision:", precision, "Recall:", recall)


Class balance (train): [50 19]
Symmetry (train): True
Diag≈1 before norm: True
TRAIN acc: 0.7246376811594203
TEST acc: 0.7619047619047619
AUC: 0.3375
Confusion Matrix:
 [[16  0]
 [ 5  0]]
F1: 0.0 Precision: 0.0 Recall: 0.0


  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
