In [1]:
# --- Step 1: Core Libraries and Imports ---
import pandas as pd
import numpy as np
import time

# Scikit-Learn for ML tasks
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score

# Qiskit Libraries
from qiskit_machine_learning.utils import algorithm_globals
from qiskit.circuit.library import ZZFeatureMap
from qiskit_machine_learning.kernels import FidelityQuantumKernel
from qiskit_aer.noise import NoiseModel

# The correct V2-style Sampler for local, noisy simulation that handles transpilation
from qiskit_aer.primitives import Sampler

# The required V2-style Fidelity object
from qiskit_machine_learning.state_fidelities import ComputeUncompute

# The correct, modern import path for fake backends
from qiskit_ibm_runtime.fake_provider import FakeManilaV2

# Set a seed for reproducibility
algorithm_globals.random_seed = 42

print("Libraries imported successfully.")


# --- Step 2: Data Loading and Preprocessing ---
print("\n--- Loading and Preprocessing Spambase Data ---")
# [Your full data loading, cleaning, splitting, scaling, and PCA code goes here]
# For completeness, the code is included below:
spambase_columns = [
    "word_freq_make", "word_freq_address", "word_freq_all", "word_freq_3d", "word_freq_our", "word_freq_over", "word_freq_remove", "word_freq_internet", "word_freq_order", "word_freq_mail", "word_freq_receive", "word_freq_will", "word_freq_people", "word_freq_report", "word_freq_addresses", "word_freq_free", "word_freq_business", "word_freq_email", "word_freq_you", "word_freq_credit", "word_freq_your", "word_freq_font", "word_freq_000", "word_freq_money", "word_freq_hp", "word_freq_hpl", "word_freq_george", "word_freq_650", "word_freq_lab", "word_freq_labs", "word_freq_telnet", "word_freq_857", "word_freq_data", "word_freq_415", "word_freq_85", "word_freq_technology", "word_freq_1999", "word_freq_parts", "word_freq_pm", "word_freq_direct", "word_freq_cs", "word_freq_meeting", "word_freq_original", "word_freq_project", "word_freq_re", "word_freq_edu", "word_freq_table", "word_freq_conference", "char_freq_;", "char_freq_(", "char_freq_[", "char_freq_!", "char_freq_$", "char_freq_#", "capital_run_length_average", "capital_run_length_longest", "capital_run_length_total", "label"
]
file_path = r'C:\\Users\\User\\Documents\\MyProjects\\AI_Projects\\quantum-svm-generalization-study\\data\\spambase\\spambase.data'
df = pd.read_csv(file_path, header=None, names=spambase_columns)
df.drop_duplicates(inplace=True)
X = df.drop('label', axis=1)
y = df['label']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
n_components = 4
pca = PCA(n_components=n_components, random_state=42)
X_train_pca = pca.fit_transform(X_train_scaled)
X_test_pca = pca.transform(X_test_scaled)
train_subset_size = 200
test_subset_size = 100
X_train_subset = X_train_pca[:train_subset_size]
y_train_subset = y_train.iloc[:train_subset_size]
X_test_subset = X_test_pca[:test_subset_size]
y_test_subset = y_test.iloc[:test_subset_size]
print(f"Data preprocessed. Using subsets of size {train_subset_size} (train) and {test_subset_size} (test).")


# --- Step 3: Configure the Realistic Backend Properties ---
print("\n--- Configuring Realistic Backend Properties from FakeManilaV2 ---")
backend_device = FakeManilaV2()
noise_model = NoiseModel.from_backend(backend_device)
coupling_map = backend_device.configuration().coupling_map
basis_gates = noise_model.basis_gates
print("Backend properties configured successfully.")


# --- Step 4: Create the Fully Configured Noisy Sampler and Kernel ---
fm = ZZFeatureMap(feature_dimension=n_components, reps=2, entanglement='linear')

print("\n--- Creating high-performance, transpiling noisy sampler ---")
noisy_sampler = Sampler(
    backend_options={'noise_model': noise_model},
    transpile_options={'coupling_map': coupling_map, 'basis_gates': basis_gates}
)
print("Sampler instance created successfully.")

# --- THE CRITICAL FIX IS HERE ---
# We must pass the `noisy_sampler` OBJECT (the instance), not the `Sampler` CLASS.
fidelity = ComputeUncompute(sampler=noisy_sampler)

# Create the kernel using the fidelity object
noisy_qkernel = FidelityQuantumKernel(feature_map=fm, fidelity=fidelity)
print("Noisy quantum kernel created successfully.")


# --- Step 5: Compute the Noisy Kernel Matrices and Evaluate ---
print("\n--- Computing Noisy Quantum Kernel Matrices (This may take a while) ---")
print("Calculating training kernel matrix...")
start_time = time.time()
matrix_train_noisy = noisy_qkernel.evaluate(x_vec=X_train_subset)
print(f"  -> Training matrix computed in {time.time() - start_time:.2f} seconds.")

print("Calculating testing kernel matrix...")
start_time = time.time()
matrix_test_noisy = noisy_qkernel.evaluate(x_vec=X_test_subset, y_vec=X_train_subset)
print(f"  -> Testing matrix computed in {time.time() - start_time:.2f} seconds.")


# --- Step 6: Train and Evaluate the Final QSVM Model ---
print("\n--- Training and Evaluating QSVM on Noisy Kernel ---")
qsvm_noisy = SVC(kernel='precomputed')
param_grid = {'C': [0.1, 1, 10, 100, 1000]}
grid_search = GridSearchCV(qsvm_noisy, param_grid, cv=5, verbose=0)
grid_search.fit(matrix_train_noisy, y_train_subset)
best_qsvm = grid_search.best_estimator_
print(f"Best hyperparameter found: C = {grid_search.best_params_['C']}")

y_test_pred = best_qsvm.predict(matrix_test_noisy)
test_accuracy = accuracy_score(y_test_subset, y_test_pred)

print("\n--- Final Performance Report (QSVM with Realistic Noise from FakeManilaV2) ---")
print(f"Test Accuracy:     {test_accuracy:.4f}")
print("\nClassification Report (Test Set):")
print(classification_report(y_test_subset, y_test_pred, zero_division=0))

Libraries imported successfully.

--- Loading and Preprocessing Spambase Data ---
Data preprocessed. Using subsets of size 200 (train) and 100 (test).

--- Configuring Realistic Backend Properties from FakeManilaV2 ---
Backend properties configured successfully.

--- Creating high-performance, transpiling noisy sampler ---
Sampler instance created successfully.


ValueError: The sampler should be an instance of BaseSampler or BaseSamplerV2, but got <class 'qiskit_aer.primitives.sampler.Sampler'>