In [2]:
from typing import Tuple, Optional
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.decomposition import PCA
from sklearn.ensemble import IsolationForest
from sklearn.svm import OneClassSVM
from sklearn.metrics import recall_score, precision_score, confusion_matrix, ConfusionMatrixDisplay, classification_report, accuracy_score, f1_score
from sklearn.utils import resample
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from qiskit.circuit.library import ZZFeatureMap
from qiskit_machine_learning.kernels import FidelityQuantumKernel

In [None]:
data = pd.read_csv("/Users/miloszglowacki/Desktop/uam/ibm/algo/Project_9/dataset/creditcard.csv")

In [4]:
def make_sub_dataset(df, n_norm=200, n_anom=50, random_state=0):
    df0 = df[df["Class"] == 0]
    df1 = df[df["Class"] == 1]
    
    n_norm = min(n_norm, len(df0))
    n_anom = min(n_anom, len(df1))
    df1s = df1.sample(n=n_anom, random_state=random_state)
    df0s = df0.sample(n=n_norm, random_state=random_state)

    out = pd.concat([df0s, df1s], axis=0).sample(frac=1.0, random_state=random_state).reset_index(drop=True)
    return out

def prepare_data(
    data: pd.DataFrame,
    test_size=0.3,
    pca_components=30,
    quantum=False,
    random_state=0,
) -> Tuple[np.ndarray, pd.Series, np.ndarray, pd.Series, MinMaxScaler, PCA, Optional[MinMaxScaler]]:
    """
    Wspólny pipeline pod OneClassSVM (quantum=False) i QSVC (quantum=True).

    - Zawsze: robi train_test_split(stratify=y)
    - quantum=False: X_train zawiera TYLKO Class==0 (One-Class), X_test zawiera 0+1
    - quantum=True: X_train zawiera 0+1 (supervised), plus fm_scaler do [0, pi]
    """
    X = data.drop(columns=["Class"])
    y = data["Class"]

    # jeden wspólny split dla obu modeli
    X_train_df, X_test_df, y_train_full, y_test = train_test_split(
        X, y, test_size=test_size, random_state=random_state, stratify=y
    )

    if quantum:
        # supervised: zostaw obie klasy w treningu
        X_train_used = X_train_df
        y_train_used = y_train_full
    else:
        # one-class: trenuj tylko na normalnych
        mask_norm = (y_train_full == 0)
        X_train_used = X_train_df.loc[mask_norm]
        y_train_used = y_train_full.loc[mask_norm]  # będzie samymi zerami (ok)

    # skalowanie + PCA fitowane tylko na treningu (bez leakage)
    scaler = MinMaxScaler()
    X_train_scaled = scaler.fit_transform(X_train_used)
    X_test_scaled = scaler.transform(X_test_df)

    pca = PCA(n_components=pca_components, random_state=random_state)
    X_train_pca = pca.fit_transform(X_train_scaled)
    X_test_pca = pca.transform(X_test_scaled)

    fm_scaler = None
    if quantum:
        # dopasowanie pod feature map: [0, pi]
        fm_scaler = MinMaxScaler(feature_range=(0, np.pi))
        X_train_final = fm_scaler.fit_transform(X_train_pca)
        X_test_final = fm_scaler.transform(X_test_pca)
    else:
        X_train_final = X_train_pca
        X_test_final = X_test_pca

    return X_train_final, y_train_used, X_test_final, y_test, scaler, pca, fm_scaler

In [5]:
def eval_binary(y_true, y_pred, name="model"):
    # metryki liczone dla klasy pozytywnej = 1 (anomalia)
    p = precision_score(y_true, y_pred, pos_label=1, zero_division=0)
    r = recall_score(y_true, y_pred, pos_label=1, zero_division=0)
    f1 = f1_score(y_true, y_pred, pos_label=1, zero_division=0)
    acc = accuracy_score(y_true, y_pred)

    print(f"\n[{name}] accuracy={acc:.4f}  precision(1)={p:.4f}  recall(1)={r:.4f}  f1(1)={f1:.4f}")
    print(confusion_matrix(y_true, y_pred))
    print(classification_report(y_true, y_pred, digits=4, zero_division=0))


def train_classical_model(X_train, y_train, X_test, y_test):
    clf_svm = OneClassSVM(kernel="rbf", degree=3, gamma=0.1, nu=0.01)
    clf_svm.fit(X_train)

    # OneClassSVM: 1=inlier (normal), -1=outlier (anomalia)
    y_predict = clf_svm.predict(X_test)
    y_pred = pd.Series(y_predict).replace({1: 0, -1: 1}).to_numpy()

    eval_binary(y_test, y_pred, name="OneClassSVM")


def train_qsvc(X_train, y_train, X_test, y_test, n_qubits=5, reps=2, entanglement="full"):
    feature_map = ZZFeatureMap(feature_dimension=n_qubits, reps=reps, entanglement=entanglement)
    quantum_kernel = FidelityQuantumKernel(feature_map=feature_map)

    K_train = quantum_kernel.evaluate(x_vec=X_train)
    model = SVC(kernel="precomputed")
    model.fit(K_train, y_train)

    K_test = quantum_kernel.evaluate(x_vec=X_test, y_vec=X_train)
    y_pred = model.predict(K_test)

    eval_binary(y_test, y_pred, name="QSVC (quantum kernel)")
    return model

In [6]:
data = make_sub_dataset(data, n_norm=200, n_anom=50)

X_train, y_train, X_test, y_test, scaler, pca, _ = prepare_data(data=data, test_size=0.3, pca_components=3, quantum=False)
train_classical_model(X_train, y_train, X_test, y_test)

X_train, y_train, X_test, y_test, scaler, pca, fm_scaler = prepare_data(data=data, test_size=0.3, pca_components=3, quantum=True)
train_qsvc(X_train, y_train, X_test, y_test, n_qubits=3, reps=2, entanglement="linear")


[OneClassSVM] accuracy=0.8133  precision(1)=1.0000  recall(1)=0.0667  f1(1)=0.1250
[[60  0]
 [14  1]]
              precision    recall  f1-score   support

           0     0.8108    1.0000    0.8955        60
           1     1.0000    0.0667    0.1250        15

    accuracy                         0.8133        75
   macro avg     0.9054    0.5333    0.5103        75
weighted avg     0.8486    0.8133    0.7414        75



  feature_map = ZZFeatureMap(feature_dimension=n_qubits, reps=reps, entanglement=entanglement)



[QSVC (quantum kernel)] accuracy=0.9600  precision(1)=1.0000  recall(1)=0.8000  f1(1)=0.8889
[[60  0]
 [ 3 12]]
              precision    recall  f1-score   support

           0     0.9524    1.0000    0.9756        60
           1     1.0000    0.8000    0.8889        15

    accuracy                         0.9600        75
   macro avg     0.9762    0.9000    0.9322        75
weighted avg     0.9619    0.9600    0.9583        75



0,1,2
,"C  C: float, default=1.0 Regularization parameter. The strength of the regularization is inversely proportional to C. Must be strictly positive. The penalty is a squared l2 penalty. For an intuitive visualization of the effects of scaling the regularization parameter C, see :ref:`sphx_glr_auto_examples_svm_plot_svm_scale_c.py`.",1.0
,"kernel  kernel: {'linear', 'poly', 'rbf', 'sigmoid', 'precomputed'} or callable, default='rbf' Specifies the kernel type to be used in the algorithm. If none is given, 'rbf' will be used. If a callable is given it is used to pre-compute the kernel matrix from data matrices; that matrix should be an array of shape ``(n_samples, n_samples)``. For an intuitive visualization of different kernel types see :ref:`sphx_glr_auto_examples_svm_plot_svm_kernels.py`.",'precomputed'
,"degree  degree: int, default=3 Degree of the polynomial kernel function ('poly'). Must be non-negative. Ignored by all other kernels.",3
,"gamma  gamma: {'scale', 'auto'} or float, default='scale' Kernel coefficient for 'rbf', 'poly' and 'sigmoid'. - if ``gamma='scale'`` (default) is passed then it uses  1 / (n_features * X.var()) as value of gamma, - if 'auto', uses 1 / n_features - if float, must be non-negative. .. versionchanged:: 0.22  The default value of ``gamma`` changed from 'auto' to 'scale'.",'scale'
,"coef0  coef0: float, default=0.0 Independent term in kernel function. It is only significant in 'poly' and 'sigmoid'.",0.0
,"shrinking  shrinking: bool, default=True Whether to use the shrinking heuristic. See the :ref:`User Guide `.",True
,"probability  probability: bool, default=False Whether to enable probability estimates. This must be enabled prior to calling `fit`, will slow down that method as it internally uses 5-fold cross-validation, and `predict_proba` may be inconsistent with `predict`. Read more in the :ref:`User Guide `.",False
,"tol  tol: float, default=1e-3 Tolerance for stopping criterion.",0.001
,"cache_size  cache_size: float, default=200 Specify the size of the kernel cache (in MB).",200
,"class_weight  class_weight: dict or 'balanced', default=None Set the parameter C of class i to class_weight[i]*C for SVC. If not given, all classes are supposed to have weight one. The ""balanced"" mode uses the values of y to automatically adjust weights inversely proportional to class frequencies in the input data as ``n_samples / (n_classes * np.bincount(y))``.",


In [7]:
# ...existing code...
from dataclasses import dataclass
from typing import Dict, List, Optional, Tuple
import time

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.metrics import (
    accuracy_score,
    precision_score,
    recall_score,
    f1_score,
    average_precision_score,
)

# --- modele: helpery bez printów (żeby dało się zbierać wyniki) ---

def fit_predict_oneclass_svm(
    X_train: np.ndarray,
    X_test: np.ndarray,
    *,
    kernel: str = "rbf",
    gamma: float = 0.1,
    nu: float = 0.01,
    degree: int = 3,
) -> Tuple[np.ndarray, np.ndarray]:
    """Zwraca: y_pred (0/1), score (większy = bardziej anomalia)."""
    model = OneClassSVM(kernel=kernel, gamma=gamma, nu=nu, degree=degree)
    model.fit(X_train)

    # predict: 1=inlier, -1=outlier -> mapujemy na 0/1
    y_oc = model.predict(X_test)
    y_pred = np.where(y_oc == 1, 0, 1)

    # decision_function: większe = bardziej normalne, więc odwracamy znak
    score = -model.decision_function(X_test)
    return y_pred, score


def fit_predict_qsvc(
    X_train: np.ndarray,
    y_train: pd.Series,
    X_test: np.ndarray,
    *,
    n_qubits: int,
    reps: int = 2,
    entanglement: str = "full",
) -> Tuple[np.ndarray, np.ndarray]:
    """Zwraca: y_pred (0/1), score (większy ~ bardziej klasa 1)."""
    feature_map = ZZFeatureMap(feature_dimension=n_qubits, reps=reps, entanglement=entanglement)
    quantum_kernel = FidelityQuantumKernel(feature_map=feature_map)

    K_train = quantum_kernel.evaluate(x_vec=X_train)
    model = SVC(kernel="precomputed")
    model.fit(K_train, y_train)

    K_test = quantum_kernel.evaluate(x_vec=X_test, y_vec=X_train)
    y_pred = model.predict(K_test)

    # dla SVC binary: decision_function działa i zwykle >0 oznacza klasę "1"
    score = model.decision_function(K_test)
    return y_pred, score


def compute_metrics(y_true: pd.Series, y_pred: np.ndarray, score: Optional[np.ndarray] = None) -> Dict[str, float]:
    """Metryki dla klasy pozytywnej = 1 (anomalia)."""
    out = {
        "accuracy": float(accuracy_score(y_true, y_pred)),
        "precision_1": float(precision_score(y_true, y_pred, pos_label=1, zero_division=0)),
        "recall_1": float(recall_score(y_true, y_pred, pos_label=1, zero_division=0)),
        "f1_1": float(f1_score(y_true, y_pred, pos_label=1, zero_division=0)),
    }
    if score is not None:
        # AUPRC (Average Precision) jest często lepsze niż ROC-AUC przy dużym niezbalansowaniu
        out["auprc"] = float(average_precision_score(y_true, score))
    return out


def collect_results_grid(
    df_full: pd.DataFrame,
    *,
    n_norm_list: List[int],
    n_anom_list: List[int],
    pca_components_list: List[int],
    test_size: float = 0.3,
    random_state: int = 0,
    qsvc_reps: int = 2,
    qsvc_entanglement: str = "linear",
    oneclass_params: Optional[Dict] = None,
) -> pd.DataFrame:
    """
    Dla każdej kombinacji (n_norm, n_anom, pca_components):
    - tworzy subset: make_sub_dataset(df_full, n_norm, n_anom)
    - przygotowuje dane prepare_data(... quantum=False) dla OneClass (train na 0)
    - przygotowuje dane prepare_data(... quantum=True)  dla QSVC (train na 0+1)
    - zbiera metryki na tym samym subsecie (ten sam random_state/test_size -> ten sam split)
    """
    if oneclass_params is None:
        oneclass_params = dict(kernel="rbf", gamma=0.1, nu=0.01, degree=3)

    rows = []

    for n_norm in n_norm_list:
        for n_anom in n_anom_list:
            data_sub = make_sub_dataset(df_full, n_norm=n_norm, n_anom=n_anom, random_state=random_state)

            # szybka kontrola, czy mamy obie klasy
            vc = data_sub["Class"].value_counts()
            if vc.get(0, 0) < 2 or vc.get(1, 0) < 2:
                continue

            for pca_components in pca_components_list:
                # --- OneClassSVM ---
                t0 = time.perf_counter()
                Xtr_c, ytr_c, Xte_c, yte_c, *_ = prepare_data(
                    data=data_sub,
                    test_size=test_size,
                    pca_components=pca_components,
                    quantum=False,
                    random_state=random_state,
                )
                y_pred_c, score_c = fit_predict_oneclass_svm(Xtr_c, Xte_c, **oneclass_params)
                m_c = compute_metrics(yte_c, y_pred_c, score=score_c)
                dt_c = time.perf_counter() - t0

                rows.append({
                    "model": "OneClassSVM",
                    "n_norm": n_norm,
                    "n_anom": n_anom,
                    "n_total": n_norm + n_anom,
                    "pca_components": pca_components,
                    "runtime_s": dt_c,
                    **m_c,
                })

                # --- QSVC ---
                # UWAGA: QSVC wymaga, by wymiar cech == n_qubits -> ustawiamy n_qubits=pca_components
                t1 = time.perf_counter()
                Xtr_q, ytr_q, Xte_q, yte_q, *_ = prepare_data(
                    data=data_sub,
                    test_size=test_size,
                    pca_components=pca_components,
                    quantum=True,
                    random_state=random_state,
                )

                # zabezpieczenie: czasem split może dać 1 klasę w treningu (przy bardzo małych danych)
                if len(pd.Series(ytr_q).unique()) < 2:
                    continue

                try:
                    y_pred_q, score_q = fit_predict_qsvc(
                        Xtr_q, ytr_q, Xte_q,
                        n_qubits=pca_components,
                        reps=qsvc_reps,
                        entanglement=qsvc_entanglement,
                    )
                    m_q = compute_metrics(yte_q, y_pred_q, score=score_q)
                    dt_q = time.perf_counter() - t1

                    rows.append({
                        "model": "QSVC",
                        "n_norm": n_norm,
                        "n_anom": n_anom,
                        "n_total": n_norm + n_anom,
                        "pca_components": pca_components,
                        "runtime_s": dt_q,
                        **m_q,
                    })
                except Exception as e:
                    rows.append({
                        "model": "QSVC",
                        "n_norm": n_norm,
                        "n_anom": n_anom,
                        "n_total": n_norm + n_anom,
                        "pca_components": pca_components,
                        "runtime_s": np.nan,
                        "error": str(e),
                    })

    return pd.DataFrame(rows)


def plot_metric_grid(
    results: pd.DataFrame,
    *,
    metric: str = "recall_1",
    models: Tuple[str, str] = ("OneClassSVM", "QSVC"),
    title: Optional[str] = None,
):
    """
    Wykres: oś X = n_total, osobne linie dla pca_components, osobne panele dla modeli.
    """
    if metric not in results.columns:
        raise ValueError(f"Brak kolumny metric='{metric}' w results. Dostępne: {list(results.columns)}")

    fig, axes = plt.subplots(1, len(models), figsize=(14, 4), sharey=True)
    if len(models) == 1:
        axes = [axes]

    for ax, model_name in zip(axes, models):
        dfm = results[results["model"] == model_name].copy()
        dfm = dfm.dropna(subset=[metric, "n_total", "pca_components"])

        for p in sorted(dfm["pca_components"].unique()):
            d = dfm[dfm["pca_components"] == p].sort_values("n_total")
            ax.plot(d["n_total"], d[metric], marker="o", label=f"PCA={p}")

        ax.set_title(model_name)
        ax.set_xlabel("Liczba próbek (n_total)")
        ax.grid(True, alpha=0.3)
        ax.legend()

    axes[0].set_ylabel(metric)
    if title:
        fig.suptitle(title)
    plt.tight_layout()
    plt.show()
# ...existing code...

In [None]:
data_full = pd.read_csv("/Users/miloszglowacki/Desktop/uam/ibm/algo/Project_9/dataset/creditcard.csv")

results = collect_results_grid(
    data_full,
    n_norm_list=[100, 200, 300, 400, 500, 600],
    n_anom_list=[20, 50],
    pca_components_list=[2, 3, 5],
    test_size=0.3,
    random_state=0,
    qsvc_reps=2,
    qsvc_entanglement="linear",
)

display(results.sort_values(["model", "pca_components", "n_total"]))
plot_metric_grid(results, metric="recall_1", title="Recall(1) vs liczba danych i PCA")
plot_metric_grid(results, metric="auprc", title="AUPRC vs liczba danych i PCA (lepsze przy niezbalansowaniu)")

  feature_map = ZZFeatureMap(feature_dimension=n_qubits, reps=reps, entanglement=entanglement)
  feature_map = ZZFeatureMap(feature_dimension=n_qubits, reps=reps, entanglement=entanglement)
  feature_map = ZZFeatureMap(feature_dimension=n_qubits, reps=reps, entanglement=entanglement)
  feature_map = ZZFeatureMap(feature_dimension=n_qubits, reps=reps, entanglement=entanglement)
  feature_map = ZZFeatureMap(feature_dimension=n_qubits, reps=reps, entanglement=entanglement)
  feature_map = ZZFeatureMap(feature_dimension=n_qubits, reps=reps, entanglement=entanglement)
  feature_map = ZZFeatureMap(feature_dimension=n_qubits, reps=reps, entanglement=entanglement)
  feature_map = ZZFeatureMap(feature_dimension=n_qubits, reps=reps, entanglement=entanglement)
  feature_map = ZZFeatureMap(feature_dimension=n_qubits, reps=reps, entanglement=entanglement)
  feature_map = ZZFeatureMap(feature_dimension=n_qubits, reps=reps, entanglement=entanglement)
  feature_map = ZZFeatureMap(feature_dimension=n_q

In [3]:
print("DATA COLUMNS:", data.columns)
print("\nDATA SIZE:", data.shape)
print("\nFIRST 5 ROWS:", data.head)
print(data["Class"].value_counts())

DATA COLUMNS: Index(['Time', 'V1', 'V2', 'V3', 'V4', 'V5', 'V6', 'V7', 'V8', 'V9', 'V10',
       'V11', 'V12', 'V13', 'V14', 'V15', 'V16', 'V17', 'V18', 'V19', 'V20',
       'V21', 'V22', 'V23', 'V24', 'V25', 'V26', 'V27', 'V28', 'Amount',
       'Class'],
      dtype='object')

DATA SIZE: (200, 31)

FIRST 5 ROWS: <bound method NDFrame.head of             Time        V1        V2        V3        V4        V5        V6  \
266543  162374.0  1.915075 -0.263068 -1.765741  0.332474  0.033409 -0.754076   
102875   68408.0 -0.328174  0.719165  1.111028 -0.434435 -0.085913 -0.888919   
198910  132709.0  2.062016  0.017803 -1.041502  0.409546 -0.064367 -1.196602   
91346    63424.0 -0.531320  0.667302  1.858070 -0.530679  0.303545  0.077263   
5489      5528.0 -1.062678  0.977074  1.922641  0.026997  0.449646 -0.288029   
...          ...       ...       ...       ...       ...       ...       ...   
247071  153446.0 -1.103430  1.121015  1.295806 -0.616834  0.100726  0.536032   
176968  122985

In [None]:
def prepare_data(
    data: pd.DataFrame,
) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame, pd.DataFrame]:
    X_test = data
    X_train = data[data["Class"] != 1]

    y_test = X_test["Class"]
    y_train = X_train["Class"]

    X_test = X_test.drop(columns=["Class"])
    X_train = X_train.drop(columns=["Class"])

    scaler = MinMaxScaler()
    scaler.fit(X_train)
    X_transformed_train = scaler.transform(X_train)
    X_transformed_test = scaler.transform(X_test)

    pca = PCA(n_components=30)
    pca.fit(X_transformed_train)
    X_transormed_train = pca.transform(X_transformed_train)
    X_transformed_test = pca.transform(X_transformed_test)

    return (X_transormed_train, y_train, X_transformed_test, y_test, scaler, pca)


def prepare_data_supervised(data: pd.DataFrame, test_size=0.3, random_state=0, pca_components=5):
    X = data.drop(columns=["Class"])
    y = data["Class"]

    X_train_df, X_test_df, y_train, y_test = train_test_split(
        X, y, test_size=test_size, random_state=random_state, stratify=y
    )

    scaler = MinMaxScaler()
    X_train_scaled = scaler.fit_transform(X_train_df)
    X_test_scaled = scaler.transform(X_test_df)

    pca = PCA(n_components=pca_components, random_state=random_state)
    X_train_pca = pca.fit_transform(X_train_scaled)
    X_test_pca = pca.transform(X_test_scaled)

    # dopasowanie pod feature map: [0, pi]
    fm_scaler = MinMaxScaler(feature_range=(0, np.pi))
    X_train_final = fm_scaler.fit_transform(X_train_pca)
    X_test_final = fm_scaler.transform(X_test_pca)

    return X_train_final, y_train, X_test_final, y_test, scaler, pca, fm_scaler

In [5]:
def eval_binary(y_true, y_pred, name="model"):
    # metryki liczone dla klasy pozytywnej = 1 (anomalia)
    p = precision_score(y_true, y_pred, pos_label=1, zero_division=0)
    r = recall_score(y_true, y_pred, pos_label=1, zero_division=0)
    f1 = f1_score(y_true, y_pred, pos_label=1, zero_division=0)
    acc = accuracy_score(y_true, y_pred)

    print(f"\n[{name}] accuracy={acc:.4f}  precision(1)={p:.4f}  recall(1)={r:.4f}  f1(1)={f1:.4f}")
    print(confusion_matrix(y_true, y_pred))
    print(classification_report(y_true, y_pred, digits=4, zero_division=0))


def train_classical_model(X_train, y_train, X_test, y_test):
    clf_svm = OneClassSVM(kernel="rbf", degree=3, gamma=0.1, nu=0.01)
    clf_svm.fit(X_train)

    # OneClassSVM: 1=inlier (normal), -1=outlier (anomalia)
    y_predict = clf_svm.predict(X_test)
    y_pred = pd.Series(y_predict).replace({1: 0, -1: 1}).to_numpy()

    eval_binary(y_test, y_pred, name="OneClassSVM")


def train_qsvc(X_train, y_train, X_test, y_test, n_qubits=5, reps=2, entanglement="full"):
    feature_map = ZZFeatureMap(feature_dimension=n_qubits, reps=reps, entanglement=entanglement)
    quantum_kernel = FidelityQuantumKernel(feature_map=feature_map)

    K_train = quantum_kernel.evaluate(x_vec=X_train)
    model = SVC(kernel="precomputed")
    model.fit(K_train, y_train)

    K_test = quantum_kernel.evaluate(x_vec=X_test, y_vec=X_train)
    y_pred = model.predict(K_test)

    eval_binary(y_test, y_pred, name="QSVC (quantum kernel)")
    return model

In [6]:
(X_train, y_train, X_test, y_test, _, _) = prepare_data(data)

print("TRAIN", X_train[:5])
print("Y TRAIN", y_train[:5])

print("TEST", X_test[:5])
print("Y TEST", len(y_test[y_test[:] == 1]))

train_classical_model(X_train, y_train, X_test, y_test)

TRAIN [[ 4.62640659e-01 -1.78552509e-01  1.53398396e-01  1.91905576e-01
   1.14524432e-01 -2.28885409e-01  1.94929162e-01 -4.50150017e-02
  -1.99579883e-01  9.28729950e-03  2.61412919e-01  2.42209439e-01
  -1.55531247e-01  8.17148144e-02 -3.68155701e-02 -7.33500010e-03
   2.38743727e-02  9.87869835e-02  1.09019084e-01  1.11290941e-02
  -2.10935428e-02  4.24538570e-02  1.73823210e-02 -6.16913797e-02
   5.29200009e-03  5.43937478e-03  5.06209405e-03 -3.94683602e-03
   1.20982405e-03 -1.02421152e-04]
 [-1.26115452e-01  1.93318791e-01 -7.27653214e-02  2.83918706e-01
  -1.81431060e-01 -7.26536000e-03  1.41405373e-01  4.09487685e-02
  -1.34734856e-01 -1.06933115e-01  2.38537191e-01 -2.11342934e-01
   1.65118131e-03 -1.81319034e-01 -1.31995573e-01 -2.13443300e-01
  -1.70524330e-01 -1.11624615e-01  8.40591652e-04  8.19683120e-02
  -7.38152554e-02  2.12223676e-02 -4.66683105e-02 -9.35273927e-02
   6.63427068e-02 -2.77331621e-02  8.83615684e-04 -2.12285322e-02
  -2.77758250e-04  5.33674354e-04]


In [15]:
data_full = pd.read_csv("/Users/miloszglowacki/Desktop/uam/ibm/algo/Project_9/dataset/creditcard.csv")
data_qsvc = make_qsvc_dataset(data_full, n_norm=200, n_anom=50, random_state=0)
print(data_qsvc["Class"].value_counts())

n_qubits = 2

X_train, y_train, X_test, y_test, scaler, pca, fm_scaler = prepare_data_supervised(
    data_qsvc,
    test_size=0.7,
    random_state=0,
    pca_components=n_qubits,
)

qsvc_model = train_qsvc(
    X_train, y_train,
    X_test, y_test,
    n_qubits=n_qubits,
    reps=2,
    entanglement="linear",
)

Class
0    200
1     50
Name: count, dtype: int64


  feature_map = ZZFeatureMap(feature_dimension=n_qubits, reps=reps, entanglement=entanglement)



[QSVC (quantum kernel)] accuracy=0.9371  precision(1)=1.0000  recall(1)=0.6857  f1(1)=0.8136
[[140   0]
 [ 11  24]]
              precision    recall  f1-score   support

           0     0.9272    1.0000    0.9622       140
           1     1.0000    0.6857    0.8136        35

    accuracy                         0.9371       175
   macro avg     0.9636    0.8429    0.8879       175
weighted avg     0.9417    0.9371    0.9325       175

