In [None]:
# import gdown

# url = 'https://drive.google.com/uc?id=1tHg-m1L3Du2YRCq1hqXT1A2pWAp3qYKZ'
# output = 'easy.csv'
# gdown.download(url, output, quiet=False)

: 

In [None]:
# pip install qiskit numpy scikit-learn
import itertools
import math
import numpy as np

from typing import List, Tuple, Optional
from dataclasses import dataclass

from qiskit import QuantumCircuit
from qiskit.quantum_info import Statevector, SparsePauliOp

from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler

import pandas as pd

# -----------------------------
# 1) Feature map (IQP-style)
# -----------------------------
def iqp_feature_map(x: np.ndarray, gamma: float = 1.0) -> QuantumCircuit:
    """
    IQP-style data-embedding circuit for a single sample x \in R^n.
    - Hadamards
    - Single-qubit RZ rotations with angle gamma * x_j
    - Two-qubit ZZ rotations with angle gamma * x_j * x_k
    (Pairs-only IQP; captures up to quadratic interactions as in common IQP maps.)
    """
    x = np.asarray(x).ravel()
    n = len(x)
    qc = QuantumCircuit(n)
    # H layer
    for q in range(n): qc.h(q)
    # Single qubit Z rotations
    for j in range(n): qc.rz(gamma * float(x[j]), j)
    # Pairwise ZZ (RZZ) rotations
    for j in range(n):
        for k in range(j+1, n):
            angle = gamma * float(x[j]) * float(x[k])
            if abs(angle) > 0:
                qc.rzz(angle, j, k)
    # Optional final H layer (often omitted in IQP feature maps); keep simple
    return qc

# -----------------------------
# 2) Enumerate H-body Pauli strings
# -----------------------------
def generate_h_body_paulis(n_qubits: int, H: int) -> List[str]:
    """
    Returns all length-n Pauli strings with exactly H non-identity factors,
    each chosen from {'X','Y','Z'}. (e.g., 'IXYZI' for n=5, H=3)
    """
    if H == 0:
        return ['I' * n_qubits]
    paulis = []
    for subset in itertools.combinations(range(n_qubits), H):
        for letters in itertools.product('XYZ', repeat=H):
            s = ['I'] * n_qubits
            for idx, letter in zip(subset, letters):
                s[idx] = letter
            paulis.append(''.join(s))
    return paulis

def sample_paulis(paulis_all: List[str], p: Optional[int], seed: int) -> List[str]:
    rng = np.random.default_rng(seed)
    if p is None or p >= len(paulis_all):
        return paulis_all
    idx = rng.choice(len(paulis_all), size=p, replace=False)
    return [paulis_all[i] for i in idx]

# -----------------------------
# 3) Expectation values ⟨P⟩_x for a state
# -----------------------------
def statevector_from_feature_map(x: np.ndarray, gamma: float, feature_map_fn) -> Statevector:
    qc = feature_map_fn(x, gamma=gamma)
    psi = Statevector.from_label('0' * qc.num_qubits).evolve(qc)
    return psi

def expectation_pauli(psi: Statevector, pauli_str: str) -> float:
    op = SparsePauliOp.from_list([(pauli_str, 1.0)])
    val = psi.expectation_value(op)
    return float(np.real_if_close(val))

# -----------------------------
# 4) H-body LPQK feature matrix Φ (N x p)
# -----------------------------
@dataclass
class HBodyLPQK:
    H: int = 2
    p: Optional[int] = None            # number of Lego features; if None use full d_H
    gamma: float = 1.0                 # kernel bandwidth (scales embedding)
    seed: int = 0
    feature_map_fn: callable = iqp_feature_map

    # will be set after fit_features(...)
    pauli_basis_: Optional[List[str]] = None

    def fit_features(self, X: np.ndarray) -> np.ndarray:
        """
        Compute and cache the Pauli subset; return Φ_train.
        """
        n_qubits = X.shape[1]
        all_paulis = generate_h_body_paulis(n_qubits, self.H)
        self.pauli_basis_ = sample_paulis(all_paulis, self.p, self.seed)
        return self.transform_features(X)

    def transform_features(self, X: np.ndarray) -> np.ndarray:
        """
        Compute Φ(X): each row i is [⟨P_1⟩_{x_i},...,⟨P_p⟩_{x_i}] / sqrt(p or d_H).
        """
        assert self.pauli_basis_ is not None, "Call fit_features on training data first."
        norm = math.sqrt(len(self.pauli_basis_))
        Φ = np.empty((len(X), len(self.pauli_basis_)), dtype=float)
        # Cache states to avoid re-simulation
        states = [statevector_from_feature_map(x, self.gamma, self.feature_map_fn) for x in X]
        for i, psi in enumerate(states):
            Φ[i, :] = [expectation_pauli(psi, P) for P in self.pauli_basis_]
        return Φ / norm

    def kernel_train(self, X: np.ndarray) -> np.ndarray:
        Φ = self.fit_features(X)
        return Φ @ Φ.T

    def kernel_test(self, X_test: np.ndarray, X_train: np.ndarray) -> np.ndarray:
        # make sure the same pauli_basis_ is used as in training
        Φ_train = self.transform_features(X_train)  # cheap: reuse states if you cache externally
        Φ_test  = self.transform_features(X_test)
        return Φ_test @ Φ_train.T

# -----------------------------
# 5) GFQK (fidelity) kernel
# -----------------------------
@dataclass
class GFQK:
    gamma: float = 1.0
    feature_map_fn: callable = iqp_feature_map

    def _states(self, X: np.ndarray) -> List[Statevector]:
        return [statevector_from_feature_map(x, self.gamma, self.feature_map_fn) for x in X]

    def kernel_train(self, X: np.ndarray) -> np.ndarray:
        states = self._states(X)
        N = len(states)
        K = np.empty((N, N), dtype=float)
        for i in range(N):
            for j in range(i, N):
                fid = abs(states[i].data.conj().dot(states[j].data))**2
                K[i, j] = K[j, i] = float(np.real_if_close(fid))
        return K

    def kernel_test(self, X_test: np.ndarray, X_train: np.ndarray) -> np.ndarray:
        S_test  = self._states(X_test)
        S_train = self._states(X_train)
        K = np.empty((len(X_test), len(X_train)), dtype=float)
        for i, psi in enumerate(S_test):
            for j, phi in enumerate(S_train):
                fid = abs(psi.data.conj().dot(phi.data))**2
                K[i, j] = float(np.real_if_close(fid))
        return K

# -----------------------------
# 6) SVM wrapper (precomputed kernel)
# -----------------------------
@dataclass
class QuantumKernelSVC:
    kernel: object
    C: float = 1.0
    scale_X: bool = True

    # learned artifacts
    svc_: Optional[SVC] = None
    scaler_: Optional[StandardScaler] = None
    X_train_: Optional[np.ndarray] = None
    y_train_: Optional[np.ndarray] = None

    def fit(self, X: np.ndarray, y: np.ndarray):
        X = np.asarray(X, dtype=float)
        y = np.asarray(y)
        if self.scale_X:
            self.scaler_ = StandardScaler().fit(X)
            Xs = self.scaler_.transform(X)
        else:
            Xs = X

        K_train = self.kernel.kernel_train(Xs)

        self.svc_ = SVC(C=self.C, kernel='precomputed').fit(K_train, y)
        self.X_train_ = Xs
        self.y_train_ = y
        return self

    def predict(self, X: np.ndarray) -> np.ndarray:
        assert self.svc_ is not None and self.X_train_ is not None
        X = np.asarray(X, dtype=float)
        Xs = self.scaler_.transform(X) if (self.scaler_ is not None) else X
        K_test = self.kernel.kernel_test(Xs, self.X_train_)
        return self.svc_.predict(K_test)

# -----------------------------
# 7) Example usage
# -----------------------------
if __name__ == "__main__":
    # Dummy binary dataset (replace with your own)
    # rng = np.random.default_rng(0)
    # N_train, N_test, n_features = 100, 20, 8
    # X_train = rng.normal(size=(N_train, n_features))
    # y_train = (X_train[:, 0] + 0.5 * X_train[:, 1] > 0).astype(int)
    # X_test  = rng.normal(size=(N_test,  n_features))
    data = pd.read_csv("easy.csv")
    print(data.head())
    
    num_rows = int(len(data) * 0.80)
    
    data_train = data[:num_rows]
    data_test = data[num_rows:]
    
    X_train = data_train.drop(columns=["Class"]).to_numpy()
    y_train = data_train["Class"].to_numpy()
    X_test = data_test.drop(columns=["Class"]).to_numpy()
    y_test = data_test["Class"].to_numpy()
    

    # H-body LPQK (H=2), use p=150 Lego features, bandwidth gamma=0.5
    kernel = HBodyLPQK(H=2, p=150, gamma=0.5, seed=42)
    clf = QuantumKernelSVC(kernel=kernel, C=2.0).fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    print("LPQK(H=2,p=150) preds:", y_pred[:10])

    # GFQK baseline for comparison (optional; O(N^2) states)
    gf_kernel = GFQK(gamma=0.5)
    gf_clf = QuantumKernelSVC(kernel=gf_kernel, C=2.0).fit(X_train, y_train)
    y_pred_gf = gf_clf.predict(X_test)
    print("GFQK preds:", y_pred_gf[:10])


  """
