In [None]:
import qiskit; import numpy as np; import matplotlib.pyplot as plt; import random; from itertools import product; import pandas as pd
from sklearn.datasets import make_blobs
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.svm import SVC
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score
from qiskit_machine_learning.kernels import QuantumKernel
from qiskit import Aer
from qiskit.circuit.library import ZFeatureMap, ZZFeatureMap
from qiskit_machine_learning.kernels import FidelityQuantumKernel
from qiskit_machine_learning.algorithms import QSVC
from qiskit.circuit.library import ZFeatureMap, ZZFeatureMap, PauliFeatureMap
from sklearn.datasets import load_wine
from sklearn.pipeline import make_pipeline


from qiskit import QuantumCircuit as qc

from qoop.evolution.environment_synthesis import MetadataSynthesis
from qoop.evolution.generator import by_num_rotations,by_num_rotations_and_cnot
from qoop.evolution.environment import EEnvironment
from qoop.evolution.crossover import onepoint
from qoop.evolution.mutate import bitflip_mutate_with_normalizer, bitflip_mutate_with_normalizer_testing
from qoop.evolution.divider import by_num_cnot
from qoop.evolution.threshold import synthesis_threshold
from qoop.backend.constant import operations_with_rotations
from qoop.evolution.normalizer import by_num_cnot as normalizer_by_num_cnot
from qoop.evolution import normalizer
from qoop.evolution import divider
from qoop.backend.utilities import load_circuit


In [8]:
# Global Variables
global_credit_data = {}
global_wine_data = {}
pca_cache_credit = {}
pca_cache_wine = {}

# Preprocessing Function
def preprocess_data():
    global global_credit_data, global_wine_data

    # Credit Data
    Credit_data = pd.read_csv(
        r"/germancredit_data_updated.csv" # Change this to the path of the dataset
    )
    Credit_data = Credit_data.sample(frac=0.5, random_state=42)  # Use 20% of the data
    
    Xc = Credit_data.drop(columns=["Default"])
    yc = Credit_data["Default"]
    Xcredit_train, Xcredit_test, yc_train, yc_test = train_test_split(Xc, yc, test_size=0.3, random_state=42)

    scaler_credit = StandardScaler()
    Xc_train_scaled = scaler_credit.fit_transform(Xcredit_train)
    Xc_test_scaled = scaler_credit.transform(Xcredit_test)

    global_credit_data = {
        "X_train": Xc_train_scaled,
        "X_test": Xc_test_scaled,
        "y_train": yc_train,
        "y_test": yc_test,
        "scaler": scaler_credit,
    }

    # Wine Data
    wine = load_wine()
    Xw = wine.data[wine.target != 2]
    yw = wine.target[wine.target != 2]
    Xw_train, Xw_test, yw_train, yw_test = train_test_split(Xw, yw, test_size=0.2, random_state=42)

    scaler_wine = StandardScaler()
    Xw_train_scaled = scaler_wine.fit_transform(Xw_train)
    Xw_test_scaled = scaler_wine.transform(Xw_test)

    global_wine_data = {
        "X_train": Xw_train_scaled,
        "X_test": Xw_test_scaled,
        "y_train": yw_train,
        "y_test": yw_test,
        "scaler": scaler_wine,
    }

# PCA Application
def apply_pca(dataset_name, num_feature):
    global pca_cache_credit, pca_cache_wine, global_credit_data, global_wine_data

    # Select dataset
    if dataset_name == "credit":
        data = global_credit_data
        pca_cache = pca_cache_credit
    elif dataset_name == "wine":
        data = global_wine_data
        pca_cache = pca_cache_wine
    else:
        raise ValueError("Invalid dataset name. Use 'credit' or 'wine'.")

    X_train, X_test = data["X_train"], data["X_test"]

    # Check if PCA is needed
    if num_feature >= X_train.shape[1]:
        return X_train, data["y_train"], X_test, data["y_test"]

    # Reuse or create PCA object
    if num_feature not in pca_cache:
        pca_cache[num_feature] = PCA(n_components=num_feature)
        pca_cache[num_feature].fit(X_train)

    pca = pca_cache[num_feature]
    X_train_pca = pca.transform(X_train)
    X_test_pca = pca.transform(X_test)

    return X_train_pca, data["y_train"], X_test_pca, data["y_test"]

# Specific Dataset Generators
def generate_wine(num_feature: int):
    return apply_pca("wine", num_feature)

pca_cache_credit = {}  # Store precomputed PCA for all feature sizes

def generate_credit(num_feature: int):
    global pca_cache_credit, global_credit_data

    if num_feature not in pca_cache_credit:
        pca = PCA(n_components=num_feature)
        pca_cache_credit[num_feature] = {
            "X_train": pca.fit_transform(global_credit_data["X_train"]),
            "X_test": pca.transform(global_credit_data["X_test"]),
        }
    return (
        pca_cache_credit[num_feature]["X_train"],
        global_credit_data["y_train"],
        pca_cache_credit[num_feature]["X_test"],
        global_credit_data["y_test"],
    )


# Run preprocessing once
preprocess_data()


In [9]:
def train_qsvm_with_dataset(quantum_circuit, X_train, y_train, X_test, y_test):
    backend = Aer.get_backend('statevector_simulator')
    quantum_kernel = QuantumKernel(feature_map=quantum_circuit, quantum_instance=backend)
    qsvc = QSVC(quantum_kernel=quantum_kernel)
    qsvc.fit(X_train, y_train)
    y_pred = qsvc.predict(X_test)
    return accuracy_score(y_test, y_pred)

In [10]:
def train_svm(feature_num: int, generate_dataset):


    Xsvm_train, ysvm_train, Xsvm_test, ysvm_test = generate_dataset(feature_num)
    svm_pipeline = make_pipeline(StandardScaler(), SVC(kernel='rbf'))

    # Train the SVM
    svm_pipeline.fit(Xsvm_train, ysvm_train)

    # Test the SVM
    accuracy = svm_pipeline.score(Xsvm_test, ysvm_test)
    return accuracy

# Actual test run


In [None]:
def optimize_qsvm_wine():
    # Initialize lists to store the best accuracies for each feature size
    best_accuracies_wine = []
    # Define ranges for parameters
    prob_mutate_range = np.linspace(0.01, 0.2, 10)
    num_generation_range =  30
    feature_range = range(4, 14)

    for num_features in feature_range:
        best_accuracy_wine = 0  # Track the best accuracy for Wine data

        for prob_mutate in prob_mutate_range:
            # Generate datasets for Wine and GermanCredit
            Xw_train, yw_train, Xw_test, yw_test = generate_wine(num_features)

            # Define circuit parameters
            num_cnot = num_features + 1
            num_rx = random.randint(0, num_features)
            num_ry = random.randint(0, num_features - num_rx)
            num_rz = num_features - (num_rx + num_ry)

            # Setup MetadataSynthesis for GA
            env_metadata = MetadataSynthesis(
                num_qubits=num_features,
                num_cnot=num_cnot,
                num_rx=num_rx,
                num_ry=num_ry,
                num_rz=num_rz,
                depth=4 + (num_features - 4) * 1,
                num_circuit= 8,
                num_generation=num_generation_range,
                prob_mutate=prob_mutate,
            )
            def fitness_wine(qc):
                return train_qsvm_with_dataset(qc, Xw_train, yw_train, Xw_test, yw_test)
            # Initialize environment for Wine data
            envw = EEnvironment(
                metadata=env_metadata,
                fitness_func=fitness_wine,
                generator_func=by_num_rotations_and_cnot,
                crossover_func=onepoint(
                    divider.by_num_rotation_gate(int(env_metadata.num_qubits / 2)),
                    normalizer.by_num_rotation_gate(env_metadata.num_qubits),
                ),
                mutate_func=bitflip_mutate_with_normalizer_testing(
                    pool=operations_with_rotations,
                    normalizer_func=normalizer.by_num_rotation_gate(env_metadata.num_qubits),
                    prob_mutate=prob_mutate,
                    num_qubits=num_features,
                ),
                threshold_func=synthesis_threshold,
            )

            # Evolve and record the best accuracy for Wine
            envw.evol(verbose=False, mode="parallel")
            best_accuracy_wine = max(best_accuracy_wine, envw.best_fitness)
            print(best_accuracy_wine)
        # Append the best accuracies for the current feature size to the lists
        best_accuracies_wine.append(best_accuracy_wine)

    # Return the lists of best accuracies for each feature size
    return best_accuracies_wine


optimize_qsvm_wine()

In [None]:
def optimize_qsvm_credit():
    best_accuracies_credit = []

    # Define ranges for parameters
    prob_mutate_range = np.linspace(0.01, 0.2, 5)
    num_generation_range = 20
    feature_range = range(4, 20)  # Reduced feature range

    for num_features in feature_range:
        best_accuracy_credit = 0  # Track the best accuracy for GermanCredit data

        for prob_mutate in prob_mutate_range:
            # Generate datasets for GermanCredit
            Xc_train, yc_train, Xc_test, yc_test = generate_credit(num_features)

            # Define circuit parameters
            num_cnot = num_features + 1
            num_rx = random.randint(0, num_features)
            num_ry = random.randint(0, num_features - num_rx)
            num_rz = num_features - (num_rx + num_ry)

            # Setup MetadataSynthesis for GA
            env_metadata = MetadataSynthesis(
                num_qubits=num_features,
                num_cnot=num_cnot,
                num_rx=num_rx,
                num_ry=num_ry,
                num_rz=num_rz,
                depth=4 + (num_features - 4) * 1,  # Cap depth at 6
                num_circuit=8,
                num_generation=num_generation_range,
                prob_mutate=prob_mutate,
            )
            fitness_cache = {}

            def fitness_credit(qc):
                qc_str = str(qc)
                if qc_str not in fitness_cache:
                    fitness_cache[qc_str] = train_qsvm_with_dataset(qc, Xc_train, yc_train, Xc_test, yc_test)
                return fitness_cache[qc_str]

            # Initialize environment for GermanCredit data
            envc = EEnvironment(
                metadata=env_metadata,
                fitness_func=fitness_credit,
                generator_func=by_num_rotations_and_cnot,
                crossover_func=onepoint(
                    divider.by_num_rotation_gate(int(env_metadata.num_qubits / 2)),
                    normalizer.by_num_rotation_gate(env_metadata.num_qubits),
                ),
                mutate_func=bitflip_mutate_with_normalizer_testing(
                    pool=operations_with_rotations,
                    normalizer_func=normalizer.by_num_rotation_gate(env_metadata.num_qubits),
                    prob_mutate=prob_mutate,
                    num_qubits=num_features,
                ),
                threshold_func=synthesis_threshold,
            )

            envc.evol(verbose=False, mode="noparallel")
            best_accuracy_credit = max(best_accuracy_credit, envc.best_fitness)

        # Append the best accuracies for the current feature size to the lists
        best_accuracies_credit.append(best_accuracy_credit)

    # Return the lists of best accuracies for each feature size
    return best_accuracies_credit

optimize_qsvm_credit()

In [None]:
[0.75,
 0.7333333333333333,
 0.75,
 0.7333333333333333,
 0.75,
 0.7833333333333333,
 0.7833333333333333,
 0.7833333333333333,
 0.7833333333333333,
 0.7666666666666667,
 0.7666666666666667,
 0.7666666666666667,
 0.7666666666666667,
 0.7833333333333333,
 0.7666666666666667,
 0.7666666666666667] #20%

[0.68,
 0.7,
 0.6933333333333334,
 0.7133333333333334,
 0.7133333333333334,
 0.7266666666666667,
 0.7533333333333333,
 0.72,
 0.74,
 0.74,
 0.7466666666666667,
 0.7533333333333333,
 0.7266666666666667,
 0.7266666666666667,
 0.7133333333333334,
 0.7266666666666667] #50%

[0.73,
 0.7433333333333333,
 0.76,
 0.7566666666666667,
 0.7666666666666667,
 0.7666666666666667,
 0.7733333333333333,
 0.7666666666666667,
 0.7666666666666667,
 0.7766666666666666,
 0.78,
 0.77,
 0.7633333333333333,
 0.7633333333333333,
 0.7733333333333333,
 0.7566666666666667] #100%