In [None]:
!pip install qiskit==1.4.2
!pip install qiskit_machine_learning==0.8.2
!pip install qiskit_algorithms==0.3.0
!pip install openpyxl
!pip install XlsxWriter
!pip install pylatexenc
!pip install symengine



Collecting qiskit==1.4.2
  Downloading qiskit-1.4.2-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting rustworkx>=0.15.0 (from qiskit==1.4.2)
  Downloading rustworkx-0.16.0-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (10 kB)
Collecting dill>=0.3 (from qiskit==1.4.2)
  Downloading dill-0.4.0-py3-none-any.whl.metadata (10 kB)
Collecting stevedore>=3.0.0 (from qiskit==1.4.2)
  Downloading stevedore-5.4.1-py3-none-any.whl.metadata (2.3 kB)
Collecting symengine<0.14,>=0.11 (from qiskit==1.4.2)
  Downloading symengine-0.13.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.2 kB)
Collecting pbr>=2.0.0 (from stevedore>=3.0.0->qiskit==1.4.2)
  Downloading pbr-6.1.1-py2.py3-none-any.whl.metadata (3.4 kB)
Downloading qiskit-1.4.2-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (6.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.8/6.8 MB[0m [31m67.9 MB/s[0m eta [36m0:00:00[0m
[?25hDown

Collecting qiskit_machine_learning==0.8.2
  Downloading qiskit_machine_learning-0.8.2-py3-none-any.whl.metadata (13 kB)
Downloading qiskit_machine_learning-0.8.2-py3-none-any.whl (231 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/231.6 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━[0m [32m225.3/231.6 kB[0m [31m6.6 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m231.6/231.6 kB[0m [31m5.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: qiskit_machine_learning
Successfully installed qiskit_machine_learning-0.8.2
Collecting qiskit_algorithms==0.3.0
  Downloading qiskit_algorithms-0.3.0-py3-none-any.whl.metadata (4.2 kB)
Downloading qiskit_algorithms-0.3.0-py3-none-any.whl (308 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m308.6/308.6 kB[0m [31m6.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages:

In [None]:
import warnings, time
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.decomposition import PCA
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score, f1_score
from sklearn.pipeline import Pipeline
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from qiskit.circuit.library import ZFeatureMap, ZZFeatureMap
from qiskit_machine_learning.kernels import FidelityStatevectorKernel

warnings.filterwarnings("ignore")
SEED = 12345
np.random.seed(SEED)

# ====== CONFIG ======
DATA_PATH       = "Data/synthetic_fraud_dataset.csv"
REPETITIONS     = 30
TRAIN_PER_CLASS = 500
TEST_PER_CLASS  = 200
PCA_DIM         = 8
C_LIST          = [0.1, 1, 10]
SCALES          = np.logspace(-3, 0.3, 8)
CV_CLASSIC_K    = 3
CV_QUANT_K      = 3


def load_financial_data(path):
    df = pd.read_csv(path)
    cats = [
        'Transaction_Type','Device_Type','Location','Merchant_Category',
        'Card_Type','Authentication_Method'
    ]
    df = pd.get_dummies(df, columns=cats)
    df['Fraud_Label'] = df['Fraud_Label'].map({0: -1, 1: 1})
    df.drop(columns=['Transaction_ID','User_ID','Timestamp'], inplace=True, errors='ignore')
    return df.drop('Fraud_Label',axis=1).values, df['Fraud_Label'].values


def generate_balanced_split(X, y, train_pc, test_pc, seed):
    idx0 = np.where(y == -1)[0]
    idx1 = np.where(y == 1)[0]
    rng = np.random.default_rng(seed)
    sel0 = rng.choice(idx0, train_pc + test_pc, replace=False)
    sel1 = rng.choice(idx1, train_pc + test_pc, replace=False)
    tr = np.concatenate([sel0[:train_pc], sel1[:train_pc]])
    ts = np.concatenate([sel0[train_pc:], sel1[train_pc:]])
    return X[tr], y[tr], X[ts], y[ts]


def get_classical_models():
    return {
        "LogReg": ( Pipeline([
                        ("scaler", StandardScaler()),
                        ("clf",    LogisticRegression(max_iter=1000))
                    ]),
                    {"clf__C": C_LIST} ),
        "SVM-lin":( Pipeline([
                        ("scaler", StandardScaler()),
                        ("clf",    SVC(kernel="linear"))
                    ]),
                    {"clf__C": C_LIST} ),
        "SVM-poly":(Pipeline([
                        ("scaler", StandardScaler()),
                        ("clf",    SVC(kernel="poly"))
                    ]),
                    {"clf__degree":[3,4,5], "clf__C":C_LIST, "clf__coef0":[0,1]}),
        "SVM-rbf":( Pipeline([
                        ("scaler", StandardScaler()),
                        ("clf",    SVC(kernel="rbf"))
                    ]),
                    {"clf__C":C_LIST, "clf__gamma":np.logspace(-3,1,5)} ),
    }


def run_classical(Xtr, ytr, Xts, yts):
    records = []
    for name, (pipe, grid) in get_classical_models().items():
        t0 = time.time()
        gs = GridSearchCV(pipe, grid, cv=CV_CLASSIC_K, scoring="accuracy", n_jobs=-1)
        gs.fit(Xtr, ytr)
        ypred = gs.predict(Xts)
        t_el = time.time() - t0

        records.append({
            "model":     name,
            "accuracy":  accuracy_score(yts, ypred),
            "f1":        f1_score(yts, ypred),
            "best_C":    gs.best_params_.get("clf__C", None),
            "time_s":    round(t_el,2)
        })
    return pd.DataFrame(records)


def build_feature_maps(n_qubits):
    maps = []
    for r in [1,2]:
        maps.append((f"Z-reps{r}", ZFeatureMap(n_qubits, reps=r)))
    for ent in ["linear","full"]:
        for r in [1,2]:
            maps.append((f"ZZ-{ent}-reps{r}", ZZFeatureMap(n_qubits, reps=r, entanglement=ent)))
    return maps


def run_quantum(Xtr, ytr, Xts, yts):
    records = []
    scaler = MinMaxScaler((0, np.pi))
    Xtr0, Xts0 = scaler.fit_transform(Xtr), scaler.transform(Xts)
    n_qubits = Xtr.shape[1]

    for fmap_name, fmap in build_feature_maps(n_qubits):
        for scale in SCALES:
            t0 = time.time()
            Xtr_s = Xtr0 * scale
            Xts_s = Xts0 * scale

            qk = FidelityStatevectorKernel(feature_map=fmap)
            qk._validate_input = lambda x,y=None: (x,y)

            Ktr = qk.evaluate(Xtr_s)
            Kts = qk.evaluate(Xts_s, Xtr_s)

            svc = GridSearchCV(
                SVC(kernel="precomputed"),
                {"C": C_LIST},
                cv=CV_QUANT_K,
                scoring="accuracy"
            )
            svc.fit(Ktr, ytr)
            ypred = svc.predict(Kts)
            t_el = time.time() - t0

            records.append({
                "feature_map": fmap_name,
                "scale":       round(scale,5),
                "accuracy":    accuracy_score(yts, ypred),
                "f1":          f1_score(yts, ypred),
                "best_C":      svc.best_params_["C"],
                "time_s":      round(t_el,2)
            })
    return pd.DataFrame(records)


def aggregate_and_print(df, group_cols, title):
    agg = df.groupby(group_cols).agg(
        acc_mean=("accuracy","mean"),
        acc_std =("accuracy","std"),
        f1_mean =("f1","mean"),
        f1_std  =("f1","std")
    ).reset_index().sort_values("acc_mean", ascending=False)
    print(f"\n=== {title} (mean ± std) ===")
    print(agg.to_string(index=False))
    return agg


def main():
    X_full, y_full = load_financial_data(DATA_PATH)

    classical_all = []
    quantum_all   = []

    for rep in range(REPETITIONS):
        print(f"\n--- Repetition {rep+1}/{REPETITIONS} ---")
        Xtr, ytr, Xts, yts = generate_balanced_split(
            X_full, y_full, TRAIN_PER_CLASS, TEST_PER_CLASS, seed=rep
        )

        # PCA a 8 dimensiones
        pca = PCA(n_components=PCA_DIM, random_state=SEED)
        Xtr_pca = pca.fit_transform(Xtr)
        Xts_pca = pca.transform(Xts)

        # Clásicos
        df_cl = run_classical(Xtr_pca, ytr, Xts_pca, yts)
        print("\n-- Clásicos (esta réplica) --")
        print(df_cl.to_string(index=False))
        classical_all.append(df_cl)

        # Cuánticos
        df_q = run_quantum(Xtr_pca, ytr, Xts_pca, yts)
        print("\n-- Cuánticos (esta réplica) --")
        print(df_q.to_string(index=False))
        quantum_all.append(df_q)

    # Agregar y mostrar resumen
    df_cl_all = pd.concat(classical_all, ignore_index=True)
    df_q_all  = pd.concat(quantum_all,   ignore_index=True)

    agg_cl = aggregate_and_print(df_cl_all, ["model"],    "Modelos Clásicos")
    agg_q  = aggregate_and_print(df_q_all,  ["feature_map","scale"], "Kernels Cuánticos")




    with pd.ExcelWriter("Resultados_Fraud_PCA8_Modular.xlsx") as writer:
        agg_cl.to_excel(writer, sheet_name="Clasicos", index=False)
        agg_q.to_excel(writer, sheet_name="Cuanticos", index=False)

    print("\nResultados_Fraud_PCA8_Modular")


if __name__ == "__main__":
    main()
