In [6]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from sklearn.svm import SVC
from sklearn.metrics import (
    accuracy_score,
    roc_auc_score,
    confusion_matrix,
    roc_curve,
    f1_score,
    precision_score,
    recall_score
)
from qiskit.circuit.library import zz_feature_map
from qiskit_machine_learning.kernels import FidelityQuantumKernel
from Preprocessing import *

FEATURE_DIMENSION=6
REPS=2

kernel_matrix_train = np.load("kernel_matrix_train.npy")
kernel_matrix_test = np.load("kernel_matrix_test.npy")

# Data paths
TRAIN_FILE = '../Data/kernel_estimation_train.csv'
TEST_FILE = '../Data/kernel_estimation_test.csv'
VAL_FILE = '../Data/kernel_estimation_val.csv'

# Load training data
df_train = pd.read_csv(TRAIN_FILE)
# Load test data
df_test = pd.read_csv(TEST_FILE)
# Load validation data
df_val = pd.read_csv(VAL_FILE)

df_label_train = pd.read_csv(TRAIN_FILE)["ef_class"]
df_train = df_train[df_label_train.isin([2,3])]
df_label_test = pd.read_csv(TEST_FILE)["ef_class"]
df_test = df_test[df_label_test.isin([2,3])]
df_label_val = pd.read_csv(VAL_FILE)["ef_class"]
df_val = df_val[df_label_val.isin([2,3])]

X_train, y_train, X_test, y_test, X_val, y_val = Preprocess(df_train, df_test, df_val, balance = 'smote', classes = 'multiclass')


In [33]:
#Renormalize Data to [-1,1] to better represent angles
X_train = np.tanh(X_train)
X_test = np.tanh(X_train)
X_val = np.tanh(X_val)

#Data Encoding (zz)

feature_map = zz_feature_map(feature_dimension=FEATURE_DIMENSION, reps=REPS, entanglement="linear")
quantum_kernel = FidelityQuantumKernel(feature_map=feature_map)

In [34]:
#Find the Kernel
kernel_matrix_train = quantum_kernel.evaluate(X_train)
kernel_matrix_test = quantum_kernel.evaluate(X_test, X_test)

np.save("kernel_matrix_train.npy", kernel_matrix_train)
np.save("kernel_matrix_test.npy", kernel_matrix_test)

In [44]:
#SVC Classifier
svc = SVC(kernel='precomputed', probability=True)
svc.fit(kernel_matrix_train, y_train)

# Predict and probabilities
y_pred = svc.predict(X_test.to_numpy().T)
y_prob = svc.predict_proba(kernel_matrix_test)[:, 1]

# Metrics
acc = accuracy_score(y_test.to_numpy(), y_pred.T)
auc = roc_auc_score(y_test, y_prob)
cm = confusion_matrix(y_test, y_pred)
f1 = f1_score(y_test, y_pred, pos_label=3)
precision = precision_score(y_test, y_pred, pos_label=3)
recall = recall_score(y_test, y_pred, pos_label=3)

# ROC curve data
fpr, tpr, thresholds = roc_curve(y_test, y_prob, pos_label=3)

print("Accuracy:", acc)
print("AUC:", auc)
print("Confusion Matrix:\n", cm)
print("F1:", f1)
print("Precision:", precision)
print("Recall:", recall)

ValueError: Found input variables with inconsistent numbers of samples: [21, 6]

In [42]:
y_pred.shape

(6,)

In [15]:
import seaborn as sns

# Critical Success Index (CSI)
tp = cm[1,1]
fn = cm[1,0]
fp = cm[0,1]
csi = tp / (tp + fn + fp)

# --- Plot Confusion Matrix ---
plt.figure(figsize=(5,4))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Confusion Matrix')
plt.show()

# --- Plot ROC Curve ---
fpr, tpr, thresholds = roc_curve(y_test, y_pred, pos_label=3)
plt.figure(figsize=(6,5))
plt.plot(fpr, tpr, label=f'AUC = {auc:.4f}', color='blue')
plt.plot([0,1], [0,1], linestyle='--', color='gray')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve')
plt.legend()
plt.show()

# --- Plot Metrics Bar Chart ---
metrics = {'Accuracy': acc, 'F1 Score': f1, 'CSI': csi}
plt.figure(figsize=(6,4))
plt.bar(metrics.keys(), metrics.values(), color=['skyblue', 'orange', 'green'])
plt.ylim(0,1)
plt.title('Classification Metrics')
plt.show()

NameError: name 'cm' is not defined

In [19]:
y_test

0     2
1     2
2     2
3     2
4     3
5     2
6     2
7     2
8     2
9     2
10    3
11    2
12    2
13    2
14    2
15    2
16    2
17    2
18    3
19    3
20    3
Name: ef_class, dtype: int64