In [2]:
import numpy as np
import pandas as pd

# UCI data fetcher
import ucimlrepo
from ucimlrepo import fetch_ucirepo

# Scikit-Learn & Imbalanced-Learn
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.decomposition import PCA
from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import RandomUnderSampler
from imblearn.pipeline import Pipeline as imbPipeline

# Metrics
from sklearn.metrics import confusion_matrix, classification_report, recall_score

# PennyLane
import pennylane as qml
from pennylane import numpy as pnp  # PennyLane-compatible NumPy

In [4]:
##############################################################################
#                           DATA FETCHING & PREPROCESSING
##############################################################################

# 1) Fetch Statlog (German Credit) dataset
statlog_german_credit_data = fetch_ucirepo(id=144)

# X: Features, y: Targets (1=Good, 2=Bad)
X = statlog_german_credit_data.data.features
y = statlog_german_credit_data.data.targets  # 1=Good, 2=Bad

# 2) Split features into categorical vs. numerical
categorical_features = X.select_dtypes(include=["object"]).columns
numerical_features   = X.select_dtypes(exclude=["object"]).columns

# 3) One-hot encode categorical columns
#    Remove 'sparse' or 'sparse_output' to support older scikit-learn versions.
#    We'll convert the output to a dense array via `.toarray()`
onehot = OneHotEncoder(handle_unknown="ignore")
X_cat_sparse = onehot.fit_transform(X[categorical_features])
X_cat = X_cat_sparse.toarray()  # explicitly convert to NumPy array

# 4) Standard scale numerical columns
scaler = StandardScaler()
X_num = scaler.fit_transform(X[numerical_features])

# 5) Combine numerical + categorical features
X_processed = np.hstack([X_num, X_cat])

# 6) Dimensionality Reduction via PCA -> 4 components
pca = PCA(n_components=4)
X_reduced = pca.fit_transform(X_processed)

# 7) Train/test split (stratified)
X_train, X_test, y_train, y_test = train_test_split(
    X_reduced, y, test_size=0.2, random_state=42, stratify=y
)

# 8) Class imbalance handling (SMOTE + undersample)
smote = SMOTE(random_state=42, sampling_strategy=0.8)  # oversample minority to ~80%
rus   = RandomUnderSampler(random_state=42, sampling_strategy=0.9)  # undersample majority to ~90%
resampler = imbPipeline([("smote", smote), ("rus", rus)])
X_train_res, y_train_res = resampler.fit_resample(X_train, y_train)

In [6]:
##############################################################################
#                           PENNYLANE QNODE SETUP
##############################################################################

NUM_QUBITS = 4
dev = qml.device("default.qubit", wires=NUM_QUBITS)

@qml.qnode(dev, interface="autograd")
def circuit(emb_params, res_params, x):
    """
    Parametric circuit that:
      1) Applies an initial Hadamard to each qubit.
      2) Embeds classical features x
      3) Applies reservoir layers (res_params)
      4) Returns <Z> on the last qubit.
    """
    # 1) Initial Hadamard
    for w in range(NUM_QUBITS):
        qml.Hadamard(wires=w)

    # 2) Embedding
    for i in range(NUM_QUBITS):
        angle = x[i] if i < len(x) else 0.0
        qml.RY(angle, wires=i)

    # 3) Reservoir layers
    L = len(res_params)
    for layer in range(L):
        for i in range(NUM_QUBITS):
            qml.CNOT(wires=[i, (i + 1) % NUM_QUBITS])
        for i in range(NUM_QUBITS):
            rz_angle, rx_angle = res_params[layer][i]
            qml.RZ(rz_angle, wires=i)
            qml.RX(rx_angle, wires=i)

    # 4) Measure <Z> on the last qubit
    return qml.expval(qml.PauliZ(NUM_QUBITS - 1))

def model(emb_params, res_params, x):
    """
    Convert the circuit's <Z> to a probability for class=2: p_2 = 0.5*(1 - <Z>).
    """
    raw_out = circuit(emb_params, res_params, x)  # in [-1,1]
    return 0.5 * (1.0 - raw_out)


In [7]:
##############################################################################
#                 BINARY CROSS ENTROPY + COST FUNCTION
##############################################################################

def binary_cross_entropy(y_true, y_pred):
    """
    BCE for one sample: map {1->0, 2->1} => BCE.
    """
    y_bin = 1 if y_true == 2 else 0
    eps = 1e-8
    y_clamped = pnp.clip(y_pred, eps, 1 - eps)
    return -(y_bin * pnp.log(y_clamped) + (1 - y_bin) * pnp.log(1 - y_clamped))

def total_cost(emb_params, res_params, X_batch, y_batch):
    """
    Mean BCE over a batch of samples.
    """
    loss = 0.0
    for x, y_ in zip(X_batch, y_batch):
        y_p = model(emb_params, res_params, x)
        loss += binary_cross_entropy(y_, y_p)
    return loss / len(X_batch)


In [8]:
##############################################################################
#                   COST WRAPPER FOR OLDER PENNYLANE
##############################################################################
def cost_fn(params, X_batch, y_batch):
    """
    Single-argument cost function. 'params' is a tuple: (emb_params, res_params).
    """
    emb, res = params
    return total_cost(emb, res, X_batch, y_batch)

In [9]:
##############################################################################
#                       PARAM INITIALIZATION
##############################################################################

L = 2  # number of reservoir layers
emb_params = pnp.zeros(NUM_QUBITS, requires_grad=True)
res_params = 0.01 * pnp.random.randn(L, NUM_QUBITS, 2, requires_grad=True)

# We'll store them in a single tuple to handle older PennyLane versions
params = (emb_params, res_params)

In [10]:
##############################################################################
#                          TRAINING LOOP
##############################################################################

X_train_pnp = pnp.array(X_train_res, requires_grad=False)
y_train_pnp = pnp.array(y_train_res, requires_grad=False)

epochs = 30
batch_size = 16
lr = 0.1

num_batches = int(np.ceil(len(X_train_pnp) / batch_size))

for epoch in range(epochs):
    # Shuffle
    idx = np.random.permutation(len(X_train_pnp))
    X_train_pnp = X_train_pnp[idx]
    y_train_pnp = y_train_pnp[idx]

    avg_loss = 0.0
    for b in range(num_batches):
        start = b * batch_size
        end   = start + batch_size
        X_batch = X_train_pnp[start:end]
        y_batch = y_train_pnp[start:end]

        # 1) grad_fn w.r.t. single param tuple
        grad_fn = qml.grad(lambda p: cost_fn(p, X_batch, y_batch), argnum=0)
        
        # 2) Compute gradient => (grad_emb, grad_res)
        grads = grad_fn(params)

        # 3) Unpack params, do gradient descent
        emb_new = params[0] - lr * grads[0]
        res_new = params[1] - lr * grads[1]
        params  = (emb_new, res_new)

        # 4) Accumulate cost
        loss_val = cost_fn(params, X_batch, y_batch)
        avg_loss += loss_val

    avg_loss /= num_batches
    if (epoch + 1) % 5 == 0 or epoch == epochs - 1:
        print(f"Epoch {epoch+1}/{epochs}, Loss = {avg_loss:0.6f}")

Epoch 5/30, Loss = 0.697570
Epoch 10/30, Loss = 0.691660
Epoch 15/30, Loss = 0.682525
Epoch 20/30, Loss = 0.667749
Epoch 25/30, Loss = 0.663805
Epoch 30/30, Loss = 0.663487


In [11]:
##############################################################################
#                         EVALUATE ON TEST SET
##############################################################################

def predict_class(params, x, threshold=0.3):
    """
    If p_2 >= threshold => class=2 else class=1
    """
    emb, res = params
    p_2 = model(emb, res, x)
    return 2 if p_2 >= threshold else 1

emb_final, res_final = params
y_pred = [predict_class(params, x) for x in X_test]

print("\n--- Evaluation with threshold=0.5 ---")
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

print("\nClassification Report:")
print(classification_report(y_test, y_pred))

minority_recall = recall_score(y_test, y_pred, pos_label=2)
print(f"Minority Class (label=2) Recall: {minority_recall:.3f}")



--- Evaluation with threshold=0.5 ---
Confusion Matrix:
[[ 11 129]
 [  2  58]]

Classification Report:
              precision    recall  f1-score   support

           1       0.85      0.08      0.14       140
           2       0.31      0.97      0.47        60

    accuracy                           0.34       200
   macro avg       0.58      0.52      0.31       200
weighted avg       0.69      0.34      0.24       200

Minority Class (label=2) Recall: 0.967
