#### Noisy Quantum SVM - New Try

In [31]:
import qiskit, qiskit_aer, qiskit_machine_learning
print("Qiskit:", qiskit.__version__)
print("Aer:", qiskit_aer.__version__)
print("QML:", qiskit_machine_learning.__version__)

Qiskit: 1.4.4
Aer: 0.17.2
QML: 0.8.4


In [32]:
# To ensure reproducibility of results
from qiskit_machine_learning.utils import algorithm_globals
algorithm_globals.random_seed = 12345

In [33]:
# Imports
import pandas as pd
import numpy as np
import time
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score
from sklearn.preprocessing import OneHotEncoder
from scipy.stats import chi2_contingency

In [34]:
# Qiskit and Qiskit Aer import
from qiskit_aer import AerSimulator
from qiskit_aer.noise import NoiseModel, depolarizing_error

from qiskit_aer.primitives import SamplerV2 as AerSampler
from qiskit.circuit.library import ZZFeatureMap

# Qiskit machine learning
from qiskit_machine_learning.state_fidelities import ComputeUncompute
from qiskit_machine_learning.kernels import FidelityQuantumKernel
from qiskit_machine_learning.algorithms import QSVC

# Getting realistic noise model
from qiskit_ibm_runtime.fake_provider import FakeManilaV2
from qiskit_ibm_runtime import QiskitRuntimeService

# For transpilation
from qiskit.transpiler.preset_passmanagers import generate_preset_pass_manager


In [35]:
# Load data first
lung_cancer_column_names = ['label'] + [f'attr_{i}' for i in range(1, 57)]
file_path_lung = r'C:\Users\User\Documents\MyProjects\FYP_ResearchProject\data\lung+cancer\lung-cancer.data'

# reads the data, treating "?" as missing values
df_lung = pd.read_csv(file_path_lung, header=None, names=lung_cancer_column_names, na_values=['?'])

print(f"Original shape of Lung Cancer data: {df_lung.shape}")

Original shape of Lung Cancer data: (32, 57)


In [36]:
# Mode imputation for missing values
modes = df_lung.mode().iloc[0]
df_lung.fillna(modes, inplace=True)

# Then check if all Nan are gone
print(f"Total missing values after imputation: {df_lung.isnull().sum().sum()}\n")

Total missing values after imputation: 0



In [37]:
# Target Binarization
df_lung['label_binary'] = df_lung['label'].apply(lambda x: 0 if x == 1 else 1)

In [38]:
# Separate Features & Target and Split Data
X_lung = df_lung.drop(['label', 'label_binary'], axis=1)
y_lung_binary = df_lung['label_binary']

In [39]:
# Data splitting 
X_train_lc, X_test_lc, y_train_lc, y_test_lc = train_test_split(
    X_lung, y_lung_binary, test_size=0.3, random_state=42, stratify=y_lung_binary
)

In [40]:
# One-Hot Encoding
encoder = OneHotEncoder(handle_unknown='ignore', sparse_output=False)
X_train_lc_encoded = pd.DataFrame(encoder.fit_transform(X_train_lc),
columns=encoder.get_feature_names_out())
X_test_lc_encoded = pd.DataFrame(encoder.transform(X_test_lc),
columns=encoder.get_feature_names_out())

In [41]:
# Feature Selection - Cramer's V
def cramers_v(x, y):
    confusion_matrix = pd.crosstab(x, y)
    chi2 = chi2_contingency(confusion_matrix)[0]
    n = confusion_matrix.sum().sum()
    phi2 = chi2 / n
    r, k = confusion_matrix.shape
    phi2corr = max(0, phi2 - ((k-1)*(r-1))/(n-1))
    rcorr = r - ((r-1)**2)/(n-1)
    kcorr = k - ((k-1)**2)/(n-1)
    if min((kcorr-1), (rcorr-1)) == 0: return 0
    return np.sqrt(phi2corr / min((kcorr-1), (rcorr-1)))

cramers_scores = {col: cramers_v(X_train_lc_encoded[col], y_train_lc) for col in X_train_lc_encoded.columns}
cramers_series = pd.Series(cramers_scores).sort_values(ascending=False)

N_FEATURES_TO_SELECT = 10 
top_features = cramers_series.head(N_FEATURES_TO_SELECT).index.tolist()

X_train_lc_final = X_train_lc_encoded[top_features]
X_test_lc_final = X_test_lc_encoded[top_features]

print("--- Data Preprocessing Complete ---")
print(f"Final training data shape: {X_train_lc_final.shape}")
print(f"Final testing data shape: {X_test_lc_final.shape}\n")


--- Data Preprocessing Complete ---
Final training data shape: (22, 10)
Final testing data shape: (10, 10)



##### Setup Noise Model 

In [42]:
# Depolarizing Error
noise_model = NoiseModel()
error_prob = 0.2  # Higher single-qubit error
depol_error_1q = depolarizing_error(error_prob, 1)
depol_error_2q = depolarizing_error(error_prob, 2)  # Add 2-qubit noise
noise_model.add_all_qubit_quantum_error(depol_error_1q, ['u1', 'u2', 'u3', 'rx', 'ry', 'rz', 'id'])
noise_model.add_all_qubit_quantum_error(depol_error_2q, ['cx', 'cz'])  # For entangling gates in ZZFeatureMap

In [43]:
# Create Noise Sampler
noise_sampler = AerSampler(
    options={
        "backend_options": {
            "noise_model": noise_model
        }
    }
)

##### Quantum Kernel Implementation

In [None]:
# Using Base AerSimulator
aer_backend = AerSimulator()

# Creating a TRANSPILATION MANAGER
# This ensures all circuits are converted into the native gates (basis gates)
# the simulator and noise model use.
# optimization_level=1 is will be a good starting point for balancing speed and circuit quality.
pm = generate_preset_pass_manager(optimization_level=1, backend=aer_backend)

# setup feature map
feature_dim = N_FEATURES_TO_SELECT
fm = ZZFeatureMap(feature_dimension=feature_dim, reps=4, entanglement='linear')

# Reinstantiate the ComputeUncompute and then passing the pass_manager
fidelity = ComputeUncompute(sampler=noise_sampler, pass_manager=pm)
qkernel = FidelityQuantumKernel(fidelity=fidelity, feature_map=fm)
qsvc_noisy = QSVC(quantum_kernel=qkernel, class_weight='balanced'))

In [45]:
# PLEASE LA DAPATTTTT 
print ("Calculating training kernel matrix ...")
start_time = time.time()
qsvc_noisy.fit(X_train_lc_final, y_train_lc)
end_time = time.time()
print(f"QSVC training finished in {end_time - start_time:.2f} seconds.")

Calculating training kernel matrix ...
QSVC training finished in 151.09 seconds.


In [46]:
y_pred_noisy = qsvc_noisy.predict(X_test_lc_final)
accuracy_noisy = accuracy_score(y_test_lc, y_pred_noisy)

print("\n--- Noisy QSVC Evaluation Results ---")
print(f"Accuracy Score: {accuracy_noisy:.4f}")
print("\nClassification Report:")
print(classification_report(y_test_lc, y_pred_noisy))


--- Noisy QSVC Evaluation Results ---
Accuracy Score: 0.7000

Classification Report:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         3
           1       0.70      1.00      0.82         7

    accuracy                           0.70        10
   macro avg       0.35      0.50      0.41        10
weighted avg       0.49      0.70      0.58        10



  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
