In [8]:
# --- Step 1: Core Libraries and Imports ---
import pandas as pd
import numpy as np
import time

# Scikit-Learn for ML tasks
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score

# Qiskit Libraries
from qiskit_machine_learning.utils import algorithm_globals
from qiskit.circuit.library import ZZFeatureMap
from qiskit_machine_learning.kernels import FidelityQuantumKernel
from qiskit_aer.noise import NoiseModel, ReadoutError, depolarizing_error

# For Qiskit Aer Simulation
from qiskit import QuantumCircuit, transpile
from qiskit_aer import AerSimulator
from qiskit_aer.noise import NoiseModel 
from qiskit.visualization import plot_histogram
from qiskit_aer.noise import (
    NoiseModel,
    QuantumError,
    ReadoutError,
    depolarizing_error,
    pauli_error,
    thermal_relaxation_error,
)

# Realistic Backend Simulation
from qiskit.transpiler.preset_passmanagers import generate_preset_pass_manager
from qiskit_ibm_runtime.fake_provider import FakeKyoto 
from qiskit_ibm_runtime import SamplerV2 as Sampler

# The required V2-style Fidelity object
from qiskit_machine_learning.state_fidelities import ComputeUncompute

# Set a seed for reproducibility
algorithm_globals.random_seed = 42

print("Libraries imported successfully.")


# --- Step 2: Data Loading and Preprocessing ---
print("\n--- Loading and Preprocessing Spambase Data ---")
# [This section is identical to the previous script to ensure a fair comparison]
# For completeness, the code is included below:
spambase_columns = [
    "word_freq_make", "word_freq_address", "word_freq_all", "word_freq_3d", "word_freq_our", "word_freq_over", "word_freq_remove", "word_freq_internet", "word_freq_order", "word_freq_mail", "word_freq_receive", "word_freq_will", "word_freq_people", "word_freq_report", "word_freq_addresses", "word_freq_free", "word_freq_business", "word_freq_email", "word_freq_you", "word_freq_credit", "word_freq_your", "word_freq_font", "word_freq_000", "word_freq_money", "word_freq_hp", "word_freq_hpl", "word_freq_george", "word_freq_650", "word_freq_lab", "word_freq_labs", "word_freq_telnet", "word_freq_857", "word_freq_data", "word_freq_415", "word_freq_85", "word_freq_technology", "word_freq_1999", "word_freq_parts", "word_freq_pm", "word_freq_direct", "word_freq_cs", "word_freq_meeting", "word_freq_original", "word_freq_project", "word_freq_re", "word_freq_edu", "word_freq_table", "word_freq_conference", "char_freq_;", "char_freq_(", "char_freq_[", "char_freq_!", "char_freq_$", "char_freq_#", "capital_run_length_average", "capital_run_length_longest", "capital_run_length_total", "label"
]
file_path = r'C:\\Users\\User\\Documents\\MyProjects\\FYP_ResearchProject\\data\\spambase\\spambase.data'
df = pd.read_csv(file_path, header=None, names=spambase_columns)
df.drop_duplicates(inplace=True)
X = df.drop('label', axis=1)
y = df['label']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
n_components = 4
pca = PCA(n_components=n_components, random_state=42)
X_train_pca = pca.fit_transform(X_train_scaled)
X_test_pca = pca.transform(X_test_scaled)
train_subset_size = 200
test_subset_size = 100
X_train_subset = X_train_pca[:train_subset_size]
y_train_subset = y_train[:train_subset_size]
X_test_subset = X_test_pca[:test_subset_size]
y_test_subset = y_test[:test_subset_size]
print(f"Data preprocessed. Using subsets of size {train_subset_size} (train) and {test_subset_size} (test).")


# --- Step 3: Main Experiment Loop for Sensitivity Analysis ---
# Define the error rates to test (e.g., 0.1%, 0.5%, 1%)
error_rates = [0.001, 0.005, 0.01]
results = {}

# Define the feature map (remains the same for all runs)
fm = ZZFeatureMap(feature_dimension=n_components, reps=2, entanglement='linear')

# Loop through each error rate
for rate in error_rates:
    print(f"\n{'='*50}")
    print(f"--- RUNNING SIMULATION WITH BASE ERROR RATE: {rate*100:.3f}% ---")
    print(f"{'='*50}")
    
    # --- Start: Inline Custom Noise Model Creation ---
    # Define error probabilities for this specific iteration.
    # It's common for 2-qubit gates to be an order of magnitude noisier.
    p_gate1q = rate
    p_gate2q = rate * 10
    p_readout = rate

    # Create the error objects based on the current rate
    error_1q = depolarizing_error(p_gate1q, 1)
    error_2q = depolarizing_error(p_gate2q, 2)
    error_readout = ReadoutError([[1 - p_readout, p_readout], [p_readout, 1 - p_readout]])

    # Build a new NoiseModel for this iteration
    noise_model = NoiseModel()
    noise_model.add_all_qubit_quantum_error(error_1q, ['sx', 'h', 'u1', 'u2', 'u3'])
    noise_model.add_all_qubit_quantum_error(error_2q, ['cx'])
    noise_model.add_all_qubit_readout_error(error_readout)
    
    basis_gates = noise_model.basis_gates
    # --- End: Inline Custom Noise Model Creation ---

    # Define a simple coupling map for transpilation. A linear chain is a good choice for a generic model.
    coupling_map = [[i, i + 1] for i in range(n_components - 1)]
    
    # Create the fully configured, transpiling noisy sampler
    backend = AerSimulator(
        noise_model=noise_model,
        coupling_map=coupling_map,
        basis_gates=basis_gates,
    )

    noisy_sampler = Sampler(mode=backend, options=None)
    # Create the fidelity object and the kernel
    fidelity = ComputeUncompute(sampler=noisy_sampler)
    noisy_qkernel = FidelityQuantumKernel(feature_map=fm, fidelity=fidelity)
    
    # Compute the noisy kernel matrices
    print("Calculating noisy kernel matrices...")
    start_time = time.time()
    matrix_train_noisy = noisy_qkernel.evaluate(x_vec=X_train_subset)
    matrix_test_noisy = noisy_qkernel.evaluate(x_vec=X_test_subset, y_vec=X_train_subset)
    print(f"  -> Done in {time.time() - start_time:.2f}s")
    
    # Train and evaluate the SVM
    qsvm_noisy = SVC(kernel='precomputed')
    param_grid = {'C': [0.1, 1, 10, 100]}
    grid_search = GridSearchCV(qsvm_noisy, param_grid, cv=5, verbose=0)
    grid_search.fit(matrix_train_noisy, y_train_subset)
    
    best_qsvm = grid_search.best_estimator_
    y_test_pred = best_qsvm.predict(matrix_test_noisy)
    test_accuracy = accuracy_score(y_test_subset, y_test_pred)
    
    # Store results
    results[rate] = {'test_accuracy': test_accuracy}
    print(f"--- > Test Accuracy at {rate*100:.3f}% base error: {test_accuracy:.4f}")


# --- Step 4: Final Summary of Results ---
print("\n\n--- FINAL SUMMARY: QSVM Performance vs. Custom Noise Level ---")
print("-" * 40)
print(f"{'Base Error Rate (%)':<25} | {'Test Accuracy'}")
print("-" * 40)

for rate, data in results.items():
    error_percent = f"{rate * 100:.3f}"
    accuracy_str = f"{data['test_accuracy']:.4f}"
    print(f"{error_percent:<25} | {accuracy_str}")

print("-" * 40)

Libraries imported successfully.

--- Loading and Preprocessing Spambase Data ---
Data preprocessed. Using subsets of size 200 (train) and 100 (test).

--- RUNNING SIMULATION WITH BASE ERROR RATE: 0.100% ---
Calculating noisy kernel matrices...


AlgorithmError: 'Sampler job failed!'