# Quantum Learning Procedure

In [1]:
import sys
from pathlib import Path
qml_path = (Path.cwd() / "../../QML").resolve()
sys.path.insert(0, str(qml_path))

from Qsun.Qkernels import *
from Qsun.Qgates import *
from Qsun.Qmeas import *
from Qsun.Qcircuit import *
from Qsun.Qwave import *
from Qsun.Qencodes import *
from Qsun.Qdata import *

import numpy as np
import matplotlib.pyplot as plt
from test_file import *
#from src.load_datasets import *
from src.kernel_evaluation import *
from tqdm import tqdm

np.random.seed(1234)

### Loading 9 ansatzes from Qencodes.py

In [2]:
ENCODING_REGISTER = {
    "YZ_CX": {
        "fn": YZ_CX_encode,
        "has_params": True,
        "has_layers": True,
    },
    "HighDim": {
        "fn": HighDim_encode,
        "has_params": False,
        "has_layers": False,
    },
    "HZY_CZ": {
        "fn": HZY_CZ_encode,
        "has_params": True,
        "has_layers": True,
    },
    "Chebyshev": {
        "fn": Chebyshev_encode,
        "has_params": True,
        "has_layers": True,
    },
    "ParamZFeatureMap": {
        "fn": ParamZFeatureMap_encode,
        "has_params": True,
        "has_layers": True,
    },
    "SeparableRX": {
        "fn": SeparableRXEncoding_encode,
        "has_params": False,
        "has_layers": False,
    },
    "HardwareEfficientRx": {
        "fn": HardwareEfficientEmbeddingRx_encode,
        "has_params": False,
        "has_layers": True,
    },
    "ZFeatureMap": {
        "fn": ZFeatureMap_encode,
        "has_params": False,
        "has_layers": True,
    },
    "ZZFeatureMap": {
        "fn": ZZFeatureMap_encode,
        "has_params": False,
        "has_layers": True,
    },
}

def encode_sample(sample: np.ndarray, encoding_name: str, n_layers: int = 2, 
                  params: np.ndarray = None):
    if encoding_name not in ENCODING_REGISTER:
        raise ValueError(f"Unknown encoding: {encoding_name}")
    config = ENCODING_REGISTER[encoding_name]
    fn = config["fn"]
    if encoding_name == "YZ_CX":
        return fn(sample, params=params, n_layers=n_layers)
    elif encoding_name == "HZY_CZ":
        return fn(sample, params=params, n_layers=n_layers)
    elif encoding_name == "Chebyshev":
        return fn(sample, params=params, n_layers=n_layers)
    elif encoding_name == "ParamZFeatureMap":
        return fn(sample, params=params, n_layers=n_layers)
    elif encoding_name == "HardwareEfficientRx":
        return fn(sample, n_layers=n_layers)
    elif encoding_name == "ZFeatureMap":
        return fn(sample, n_layers=n_layers)
    elif encoding_name == "ZZFeatureMap":
        return fn(sample, n_layers=n_layers)
    elif encoding_name == "HighDim":
        return fn(sample)
    elif encoding_name == "SeparableRX":
        return fn(sample)
    else:
        raise ValueError(f"Unknown encoding: {encoding_name}")

### Quantum Embedding Kernels

In [3]:
def kernel_matrix(X_train, X_test,
                encoding_name, n_layers=2,
                params=None, random_state=42):
    n_train = X_train.shape[0]
    n_test = X_test.shape[0]
    encoded_train = []
    for i in range(n_train):
        state = encode_sample(X_train[i], encoding_name, n_layers, params)
        encoded_train.append(state)
    encoded_test = []
    for i in range(n_test):
        state = encode_sample(X_test[i], encoding_name, n_layers, params)
        encoded_test.append(state)
    K_train = np.zeros((n_train, n_train))
    for i in range(n_train):
        for j in range(i, n_train):
            k_ij = state_product(encoded_train[i], encoded_train[j])**2
            K_train[i, j] = k_ij
            K_train[j, i] = k_ij
    K_test = np.zeros((n_test, n_train))
    for i in range(n_test):
        for j in range(n_train):
            K_test[i, j] = state_product(encoded_test[i], encoded_train[j])**2
    
    return K_train, K_test

def total_kernels(X_train, X_test,
                encoding_names=None, n_layers=2,
                random_state=42):
    if encoding_names is None:
        encoding_names = list(ENCODING_REGISTER.keys())
    results = {}
    for name in encoding_names:
        try:
            K_train, K_test = kernel_matrix(
                X_train, X_test, name, n_layers, 
                random_state=random_state)
            results[name] = (K_train, K_test)
        except Exception as e:
                print(f"  Error: {e}")
    
    return results

def get_available_encodings():
    return list(ENCODING_REGISTER.keys())

In [4]:
datasets = load_datasets(data_dir="datasets", max_qubit=4)
X_train, X_test, y_train, y_test = datasets["Iris"]

print(f"\nIris dataset: Train {X_train.shape}, Test {X_test.shape}")
print(f"Available encodings: {get_available_encodings()}")

print("Sample encoding: ZFeatureMap")

K_train, K_test = kernel_matrix(
        X_train, X_test, "ZFeatureMap", n_layers=2)



Iris dataset: Train (80, 4), Test (20, 4)
Available encodings: ['YZ_CX', 'HighDim', 'HZY_CZ', 'Chebyshev', 'ParamZFeatureMap', 'SeparableRX', 'HardwareEfficientRx', 'ZFeatureMap', 'ZZFeatureMap']
Sample encoding: ZFeatureMap


In [5]:
datasets = load_datasets(data_dir="datasets", max_qubit=4)
for name, (X_tr, X_te, y_tr, y_te) in datasets.items():
    print(f"\n{name} dataset: Train {X_tr.shape}, Test {X_te.shape}")



Blobs_F2C2 dataset: Train (800, 2), Test (200, 2)

Blobs_F2C3 dataset: Train (800, 2), Test (200, 2)

Blobs_F2C4 dataset: Train (800, 2), Test (200, 2)

Blobs_F4C2 dataset: Train (800, 4), Test (200, 4)

Blobs_F4C3 dataset: Train (800, 4), Test (200, 4)

Blobs_F4C4 dataset: Train (800, 4), Test (200, 4)

Circle dataset: Train (80, 2), Test (20, 2)

Moons dataset: Train (80, 2), Test (20, 2)

Iris dataset: Train (80, 4), Test (20, 4)

Pima dataset: Train (613, 4), Test (154, 4)

Banknote dataset: Train (1097, 4), Test (275, 4)

Haberman dataset: Train (244, 3), Test (61, 3)


### Model Execution

In [6]:
def total_runs(dataset_name="Iris", 
            encodings=None,
            n_layers=2,
            n_runs=10,
            test_size=0.2,
            random_state=42):
    if encodings is None:
        encodings = get_available_encodings()
        print(f"Dataset: {dataset_name}") 

    results_accumulator = {enc: {m: {"train": [], "test": []} 
                                  for m in ["SVM"]} 
                           for enc in encodings}
    for run in tqdm(range(n_runs)):
        seed = random_state + run
        datasets = load_datasets(data_dir="datasets", random_state=seed, test_size=test_size, max_qubit=4)
        X_train, X_test, y_train, y_test = datasets[dataset_name]
        kernels = total_kernels(X_train, X_test, encodings, n_layers, seed)
        for enc_name, (K_train, K_test) in kernels.items():
            for model_name in ["SVM"]:
                result = evaluate_kernel(
                    K_train, K_test, y_train, y_test, enc_name, model_name
                )
                results_accumulator[enc_name][model_name]["train"].append(result.train_accuracy)
                results_accumulator[enc_name][model_name]["test"].append(result.test_accuracy)
    all_results = {}
    for enc_name in encodings:
        enc_results = []
        for model_name in ["SVM"]:
            train_scores = results_accumulator[enc_name][model_name]["train"]
            test_scores = results_accumulator[enc_name][model_name]["test"]
            enc_results.append(KernelEvaluation(
                model_name=model_name,
                encoding_name=enc_name,
                train_accuracy=np.mean(train_scores),
                test_accuracy=np.mean(test_scores),
                train_std=np.std(train_scores),
                test_std=np.std(test_scores)
            ))
        all_results[enc_name] = enc_results
    
    return {"results": all_results}

In [7]:
def summary(all_results):
    print(f"{'Encoding':<22} {'Model':<6} {'Train':<18} {'Test':<18}")
    print("-" * 75)
    
    best_test_acc = 0
    best_config = None
    
    for encoding_name, results in all_results.items():
        for r in results:
            train_str = f"{r.train_accuracy:.4f} ± {r.train_std:.4f}"
            test_str = f"{r.test_accuracy:.4f} ± {r.test_std:.4f}"
            print(f"{r.encoding_name:<22} {r.model_name:<6} {train_str:<18} {test_str:<18}")
            if r.test_accuracy > best_test_acc:
                best_test_acc = r.test_accuracy
                best_config = r
    
    print("-" * 75)
    print(f"Best: {best_config.encoding_name} + {best_config.model_name} = {best_test_acc:.4f} ± {best_config.test_std:.4f}")

## Accuracy Visualization

In [8]:
def plot_accuracy(all_results):
    encodings = list(all_results.keys())
    models = ["SVM"]
    
    test_accs = {m: [] for m in models}
    test_stds = {m: [] for m in models}
    
    for enc in encodings:
        for r in all_results[enc]:
            test_accs[r.model_name].append(r.test_accuracy)
            test_stds[r.model_name].append(r.test_std)
    
    x = np.arange(len(encodings))
    width = 0.25
    
    fig, ax = plt.subplots(figsize=(12, 5))
    
    for i, model in enumerate(models):
        ax.bar(x + i*width, test_accs[model], width, 
               yerr=test_stds[model], label=model, capsize=3)
    
    ax.set_ylabel('Test Accuracy')
    ax.set_xticks(x + width)
    ax.set_xticklabels(encodings, rotation=45, ha='right')
    ax.legend()
    ax.set_ylim([0, 1.05])
    
    plt.tight_layout()
    plt.show()


In [9]:
def create_summary_table(all_dataset_results, 
                        model_name="SVM"):
    encodings = list(ENCODING_REGISTER.keys())
    
    table_data = []
    for dataset_name, result_dict in all_dataset_results.items():
        row = {"Dataset": dataset_name}
        results = result_dict["results"]
        
        for enc_name in encodings:
            if enc_name in results:
                for r in results[enc_name]:
                    if r.model_name == model_name:
                        row[enc_name] = f"{r.test_accuracy:.4f}"
                        break
            else:
                row[enc_name] = "-"
        
        table_data.append(row)
    
    df = pd.DataFrame(table_data)
        
    return df

In [10]:
datasets = load_datasets(data_dir="datasets", max_qubit=4)

all_results = {}

for dataset_name in datasets.keys():
    print(f"Processing: {dataset_name}")
    print('-'*60)
    
    result = total_runs(
        dataset_name=dataset_name,
        n_layers=2,
        n_runs=10,
        random_state=42)
    all_results[dataset_name] = result

print("\n" + "-"*80)
print("SUMMARY TABLE (Test Accuracy - SVM)")
print("-"*80)
df_svm = create_summary_table(all_results, model_name="SVM")
print(df_svm.to_string())

Processing: Blobs_F2C2
------------------------------------------------------------
Dataset: Blobs_F2C2


100%|██████████| 10/10 [00:53<00:00,  5.36s/it]


Processing: Blobs_F2C3
------------------------------------------------------------
Dataset: Blobs_F2C3


100%|██████████| 10/10 [01:00<00:00,  6.08s/it]


Processing: Blobs_F2C4
------------------------------------------------------------
Dataset: Blobs_F2C4


100%|██████████| 10/10 [00:56<00:00,  5.69s/it]


Processing: Blobs_F4C2
------------------------------------------------------------
Dataset: Blobs_F4C2


100%|██████████| 10/10 [01:20<00:00,  8.01s/it]


Processing: Blobs_F4C3
------------------------------------------------------------
Dataset: Blobs_F4C3


100%|██████████| 10/10 [01:12<00:00,  7.29s/it]


Processing: Blobs_F4C4
------------------------------------------------------------
Dataset: Blobs_F4C4


100%|██████████| 10/10 [01:09<00:00,  6.99s/it]


Processing: Circle
------------------------------------------------------------
Dataset: Circle


100%|██████████| 10/10 [00:01<00:00,  9.23it/s]


Processing: Moons
------------------------------------------------------------
Dataset: Moons


100%|██████████| 10/10 [00:01<00:00,  9.30it/s]


Processing: Iris
------------------------------------------------------------
Dataset: Iris


100%|██████████| 10/10 [00:02<00:00,  3.98it/s]


Processing: Pima
------------------------------------------------------------
Dataset: Pima


100%|██████████| 10/10 [00:44<00:00,  4.46s/it]


Processing: Banknote
------------------------------------------------------------
Dataset: Banknote


100%|██████████| 10/10 [02:03<00:00, 12.31s/it]


Processing: Haberman
------------------------------------------------------------
Dataset: Haberman


100%|██████████| 10/10 [00:10<00:00,  1.03s/it]


--------------------------------------------------------------------------------
SUMMARY TABLE (Test Accuracy - SVM)
--------------------------------------------------------------------------------
       Dataset   YZ_CX HighDim  HZY_CZ Chebyshev ParamZFeatureMap SeparableRX HardwareEfficientRx ZFeatureMap ZZFeatureMap
0   Blobs_F2C2  0.5045  0.9990  0.9995    0.6970           0.8315      0.9995              0.9980      0.9755       0.9800
1   Blobs_F2C3  0.3270  0.9995  1.0000    0.5105           0.7100      1.0000              0.9995      0.9995       0.9965
2   Blobs_F2C4  0.2265  0.9985  0.9985    0.4380           0.5990      0.9980              0.9985      0.9970       0.9930
3   Blobs_F4C2  0.5050  1.0000  1.0000    0.6720           0.9220      1.0000              1.0000      1.0000       0.9970
4   Blobs_F4C3  0.3395  1.0000  1.0000    0.5115           0.8340      1.0000              1.0000      1.0000       0.9975
5   Blobs_F4C4  0.2445  1.0000  1.0000    0.4155           0.74




In [11]:
def label_dataframe(all_dataset_results, 
                          model_name="SVM"):

    encodings = list(ENCODING_REGISTER.keys())
    
    table_data = []
    for dataset_name, result_dict in all_dataset_results.items():
        results = result_dict["results"]
        
        best_acc = 0
        best_kernel = None
        
        for enc_name in encodings:
            if enc_name in results:
                for r in results[enc_name]:
                    if r.model_name == model_name:
                        if r.test_accuracy > best_acc:
                            best_acc = r.test_accuracy
                            best_kernel = enc_name
                        break
        
        table_data.append({
            "Dataset": dataset_name,
            "Best_Kernel": best_kernel,
            "Test Accuracy": best_acc
        })
    
    df = pd.DataFrame(table_data)

    return df

df_best = label_dataframe(all_results, model_name="SVM")
df_best.style.hide(axis='index')

Dataset,Best_Kernel,Test Accuracy
Blobs_F2C2,HZY_CZ,0.9995
Blobs_F2C3,HZY_CZ,1.0
Blobs_F2C4,HighDim,0.9985
Blobs_F4C2,HighDim,1.0
Blobs_F4C3,HighDim,1.0
Blobs_F4C4,HighDim,1.0
Circle,ZFeatureMap,0.99
Moons,HighDim,0.97
Iris,HighDim,1.0
Pima,ZFeatureMap,0.754545
