#### Quantum SVM Ideal Condition Impelementation

In [1]:
# Imports
import pandas as pd
import numpy as np
import time
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score

# Qiskit Imports
from qiskit.circuit.library import zz_feature_map
from qiskit import QuantumCircuit
from qiskit_aer import AerSimulator
from qiskit_ibm_runtime import QiskitRuntimeService

# Since this model is in the ideal condition - we use the statevector
from qiskit.circuit import Parameter, ParameterVector
from qiskit.circuit.library import unitary_overlap

# Import StatevectorSampler as our sampler
from qiskit.quantum_info import Statevector
from qiskit.primitives import StatevectorSampler

# For Runtime Service
from qiskit_ibm_runtime import QiskitRuntimeService

# For Qiskit QSVC methods - But incompatible with 2.2.1 so need to downgrade
# from qiskit.primitives import StatevectorSampler as Sampler
# from qiskit_machine_learning.state_fidelities import ComputeUncompute
# from qiskit_machine_learning.kernels import FidelityQuantumKernel


In [8]:
# Checking Qiskit Version ONLY !!!
import qiskit
qiskit.version.get_version_info()
import qiskit_aer
print(qiskit.__version__)
print("Aer:", qiskit_aer.__version__)

2.2.1
Aer: 0.17.2


In [14]:
# --- Import Spambase Column Names ---
spambase_columns = [
    "word_freq_make",
    "word_freq_address",
    "word_freq_all",
    "word_freq_3d",
    "word_freq_our",
    "word_freq_over",
    "word_freq_remove",
    "word_freq_internet",
    "word_freq_order",
    "word_freq_mail",
    "word_freq_receive",
    "word_freq_will",
    "word_freq_people",
    "word_freq_report",
    "word_freq_addresses",
    "word_freq_free",
    "word_freq_business",
    "word_freq_email",
    "word_freq_you",
    "word_freq_credit",
    "word_freq_your",
    "word_freq_font",
    "word_freq_000",
    "word_freq_money",
    "word_freq_hp",
    "word_freq_hpl",
    "word_freq_george",
    "word_freq_650",
    "word_freq_lab",
    "word_freq_labs",
    "word_freq_telnet",
    "word_freq_857",
    "word_freq_data",
    "word_freq_415",
    "word_freq_85",
    "word_freq_technology",
    "word_freq_1999",
    "word_freq_parts",
    "word_freq_pm",
    "word_freq_direct",
    "word_freq_cs",
    "word_freq_meeting",
    "word_freq_original",
    "word_freq_project",
    "word_freq_re",
    "word_freq_edu",
    "word_freq_table",
    "word_freq_conference",
    "char_freq_;",
    "char_freq_(",
    "char_freq_[",
    "char_freq_!",
    "char_freq_$",
    "char_freq_#",
    "capital_run_length_average",
    "capital_run_length_longest",
    "capital_run_length_total",
    # finally the target label column:
    "label"
]

# --- 1. Load the Spambase Dataset ---
file_path = r'C:\Users\User\Documents\MyProjects\FYP_ResearchProject\data\spambase\spambase.data'
df = pd.read_csv(file_path, header=None, names=spambase_columns)
df.drop_duplicates(inplace=True)


In [19]:
# 3. Separate features and target
X = df.drop('label', axis=1) # Columns axis 1, Rows axis 2 - just additional info
y = df['label']

# Now got : 
# Features - X
# Target - y

In [23]:
# Data splitting
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42, stratify=y
)

# test_size - 0.3 means 30% as test set
# random_state - ensures the random shuffling is the same every time the code runs
# if its random, the result will be different and other people might ended up getting different results as well
# stratify=y - nsures fairness when comparing classical SVM vs QSVM, especially if dataset is imbalanced (like more spam than non-spam emails).
# Look at the labels in y, calculate the percentage of each class (like 80% Class A and 20% Class B), and make sure the new training set and testing set both keep that exact same 80/20 ratio.

In [26]:
# Scaling and PCA

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.fit_transform(X_test)

n_components = 4
pca = PCA(n_components=n_components)
X_train_pca = pca.fit_transform(X_train_scaled)
X_test_pca = pca.transform(X_test_scaled)

"""
StandardScaler - as mentioned before, it normalizes the feature to mean 0, std 1
PCA - reduces dimensionality to 4 principal components, 4 also because to match with the 4 qubit feature map
PCA - also why choose change into 4 components is to match 4 qubits of ZZFeatureMap
Also (Binary Classification) is just the classification between two classes. It does nothing to amount of qubits

Data leakage - information from test sets sneaks into the training process, makes model look better because it looks like it seen some information

Additional Info:
Why only scale and PCA the features x and not labels y
- X is because they are numerical
- So need better format of features (same range so they dont dominate), and reduce number of features to match the number of qubits

- Y are class identifiers
- Not features 
- If scaled they it destroys their meaning

Summary :
Scale + PCA → features (X)
Do not touch → labels (y)

""" 

'\nStandardScaler - as mentioned before, it normalizes the feature to mean 0, std 1\nPCA - reduces dimensionality to 4 principal components, 4 also because to match with the 4 qubit feature map\nPCA - also why choose change into 4 components is to match 4 qubits of ZZFeatureMap\nAlso (Binary Classification) is just the classification between two classes. It does nothing to amount of qubits\n\nData leakage - information from test sets sneaks into the training process, makes model look better because it looks like it seen some information\n\nAdditional Info:\nWhy only scale and PCA the features x and not labels y\n- X is because they are numerical\n- So need better format of features (same range so they dont dominate), and reduce number of features to match the number of qubits\n\n- Y are class identifiers\n- Not features \n- If scaled they it destroys their meaning\n\nSummary :\nScale + PCA → features (X)\nDo not touch → labels (y)\n\n'

#### Quantum Kernel Definition and Computation

In [29]:
service = QiskitRuntimeService()  # loads your saved default account
print(service.backends())          # list available backends

IBMInputValueError: 'The instance specified (fypproj) is not a valid instance name.'

In [None]:
# For this, A manually constructed Quantum Kernel Computation is done.
# Use ZZFeatureMap
# Define quantum feature map
feature_map = zz_feature_map(feature_dimension=X_train_pca.shape[1], reps=2, entanglement='linear')

In [None]:
# Evaluate the problem using state vector primitives
sampler = StatevectorSampler()

In [None]:
# Preparing empty kernel matrices
# For storing multiple outputs

# retrieves the number of samples from training and testing features
num_train = X_train_pca.shape[0]
num_test = X_test_pca.shape[0]

# makes kernel matrices to make sure they have correct dimension
matrix_train_ideal = np.zeros((num_train, num_train)) # a square matrix of size ntrain x ntrain
matrix_test_ideal = np.zeros((num_test, num_train)) # n_test x n_train

# These matrix are then be used to store the calculated overlaps ?

# np.zeros - initializing with the value 0
# np.full(()) - initializing with the value Not a Number NaN

In [None]:
# Compute the Training Kernel Matrix
# Loop will continue computing the similarity between every pair of training points


# service = QiskitRuntimeService()
# backend = service.least_busy()

print("Calculating training kernel matrix...")
start_time_kernel_train = time.time()

num_shots = 1024

# Creates (xi, xj)
for i in range(num_train):
    for j in range(i, num_train):

        # Handles diagonal entries
        if i == j:
            matrix_train_ideal[i, j] = 1.0 # The overlap of a state with itself it 1
            continue 

        # make specific circuits for two data points
        circuit_i = feature_map.assign_parameters(X_train_pca[i])
        circuit_j = feature_map.assign_parameters(X_train_pca[j])

        # make the overlap circuit : U(x_i) * U(x_j)^dagger
        overlap_circuit = unitary_overlap(circuit_i, circuit_j)
        overlap_circuit.measure_all()

        # Runs the qiskit sampler primitive
        # num shots setted to 1024
        counts = (
            sampler.run([overlap_circuit], shots=1024)
            .result()[0]
            .data.meas.get_int_counts()
        )

        # The kernel entry is the probability of measuring the all-zero state
        kernel_value = counts.get(0, 0.0) / num_shots
        matrix_train_ideal[i, j] = kernel_value
        matrix_train_ideal[j, i] = kernel_value # The matrix is symmetric

end_time_kernel_train = time.time()
print(f"Training kernel matrix calculated in {end_time_kernel_train - start_time_kernel_train:.2f} seconds.\n")

Calculating training kernel matrix...


KeyboardInterrupt: 

In [None]:
# --- Compute the Test Kernel Matrix ---
# This loop computes the similarity between every test point and every training point.
print("Calculating test kernel matrix...")
start_time_kernel_test = time.time()

for i in range(num_test):
    for j in range(num_train):
        circuit_i = feature_map.assign_parameters(X_test_pca[i])
        circuit_j = feature_map.assign_parameters(X_train_pca[j])
        
        overlap_circuit = unitary_overlap(circuit_i, circuit_j)
        overlap_circuit.measure_all()
        
        job = sampler.run(overlap_circuit, shots=1024)
        result = job.result()
        counts = result.quasi_dists[0].get_probabilities()
        
        kernel_value = counts.get(0, 0.0)
        matrix_test_ideal[i, j] = kernel_value

end_time_kernel_test = time.time()
print(f"Test kernel matrix calculated in {end_time_kernel_test - start_time_kernel_test:.2f} seconds.\n")

In [None]:
print("--- Training QSVM with Manually Computed Ideal Kernel ---")
# This part is identical to before, as scikit-learn only needs the final matrix.
start_time_qsvm_train = time.time()

qsvm_ideal = SVC(kernel='precomputed')
param_grid_qsvm = {'C': [0.1, 1, 10, 100]}
grid_qsvm_ideal = GridSearchCV(qsvm_ideal, param_grid_qsvm, cv=5, verbose=0)
grid_qsvm_ideal.fit(matrix_train_ideal, y_train)

In [None]:
best_qsvm_ideal = grid_qsvm_ideal.best_estimator_
end_time_qsvm_train = time.time()
print(f"Best parameters for Ideal QSVM: {grid_qsvm_ideal.best_params_}")
print(f"Training time for Ideal QSVM: {end_time_qsvm_train - start_time_qsvm_train:.2f} seconds\n")

In [None]:
# --- Evaluation for Ideal QSVM ---
y_train_pred_qsvm = best_qsvm_ideal.predict(matrix_train_ideal)
y_test_pred_qsvm = best_qsvm_ideal.predict(matrix_test_ideal)

train_accuracy_qsvm = accuracy_score(y_train, y_train_pred_qsvm)
test_accuracy_qsvm = accuracy_score(y_test, y_test_pred_qsvm)
gen_gap_qsvm = abs(train_accuracy_qsvm - test_accuracy_qsvm)

print("--- Ideal QSVM Evaluation (Spambase) ---")
print(f"Training Accuracy: {train_accuracy_qsvm:.4f}")
print(f"Test Accuracy:     {test_accuracy_qsvm:.4f}")
print(f"Generalization Gap: {gen_gap_qsvm:.4f}")
print("\nClassification Report (Test Set):")
print(classification_report(y_test, y_test_pred_qsvm))