In [3]:
import pandas as pd
import numpy as np
import ucimlrepo
from ucimlrepo import fetch_ucirepo

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix, recall_score

from sklearn.decomposition import PCA

from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import RandomUnderSampler
from imblearn.pipeline import Pipeline as imbPipeline

# Qiskit imports
from qiskit import QuantumCircuit, transpile
from qiskit.providers.fake_provider import GenericBackendV2

In [7]:
##############################################################################
#                           DATA FETCHING
##############################################################################

# 1) Fetch Statlog (German Credit) dataset
statlog_german_credit_data = fetch_ucirepo(id=144)

X = statlog_german_credit_data.data.features
y = statlog_german_credit_data.data.targets 

In [8]:
##############################################################################
#                             TRAIN/TEST SPLIT
##############################################################################

X_train_raw, X_test_raw, y_train, y_test = train_test_split(
    X,
    y,
    test_size=0.2,
    random_state=42,
    stratify=y
)


In [9]:
##############################################################################
#                     SEPARATE NUMERIC & CATEGORICAL FEATURES
##############################################################################

categorical_features = X.select_dtypes(include=['object']).columns
numerical_features = X.select_dtypes(exclude=['object']).columns

X_train_cat = X_train_raw[categorical_features]
X_train_num = X_train_raw[numerical_features]

X_test_cat = X_test_raw[categorical_features]
X_test_num = X_test_raw[numerical_features]

In [10]:
##############################################################################
#                   FIT TRANSFORMERS ON TRAIN, TRANSFORM TEST
##############################################################################

# 1) One-hot encode categorical columns (fit on train, transform on test)
onehot = OneHotEncoder(sparse_output=False, handle_unknown='ignore')
onehot.fit(X_train_cat)

X_train_cat_enc = onehot.transform(X_train_cat)
X_test_cat_enc  = onehot.transform(X_test_cat)

# 2) Standard scale numerical columns (fit on train, transform on test)
scaler = StandardScaler()
scaler.fit(X_train_num)

X_train_num_scaled = scaler.transform(X_train_num)
X_test_num_scaled  = scaler.transform(X_test_num)

# 3) Combine the preprocessed numerical + categorical features
X_train_processed = np.hstack([X_train_num_scaled, X_train_cat_enc])
X_test_processed  = np.hstack([X_test_num_scaled,  X_test_cat_enc])

# 4) PCA on the combined training data, then transform the test set
pca = PCA(n_components=8)
pca.fit(X_train_processed)

X_train_reduced = pca.transform(X_train_processed)
X_test_reduced  = pca.transform(X_test_processed)


In [11]:
##############################################################################
#                CLASS IMBALANCE HANDLING (SMOTE + UNDER-SAMPLING)
##############################################################################

# SMOTE oversamples the minority class up to 80% of the majority
smote = SMOTE(random_state=42, sampling_strategy=0.8)
# Slightly undersample the majority to 90%
rus = RandomUnderSampler(random_state=42, sampling_strategy=0.9)

resampler = imbPipeline([('smote', smote), ('rus', rus)])
X_train_resampled, y_train_resampled = resampler.fit_resample(X_train_reduced, y_train)

In [14]:
##############################################################################
#                      QUANTUM RESERVOIR COMPUTING CLASS
##############################################################################

class QuantumReservoirComputing:
    """
    A simplified quantum reservoir computing approach that:
      - Creates an n-qubit circuit.
      - Encodes classical data via rotation gates.
      - Applies reservoir layers (ring entanglement + random rotations).
      - Measures all qubits, converting bitstring distribution to a feature vector.
    """
    def __init__(self, n_qubits=8, n_layers=3):
        self.n_qubits = n_qubits
        self.n_layers = n_layers
        # Use a generic fake backend with a ring coupling map
        self.backend = GenericBackendV2(
            num_qubits=n_qubits,
            basis_gates=['rx', 'ry', 'rz', 'cx', 'h'],
            coupling_map=[[i, i + 1] for i in range(n_qubits - 1)],
            seed=42
        )
    
    def create_quantum_circuit(self, features):
        qc = QuantumCircuit(self.n_qubits)
        
        # 1) Initial Hadamard layer
        for i in range(self.n_qubits):
            qc.h(i)
        
        # 2) Encode features with parametric rotations, 
        #    but remove chain CNOT to avoid redundant entanglement
        for i in range(min(len(features), self.n_qubits)):
            angle = np.arctan(np.clip(features[i], -10, 10))
            qc.ry(angle, i)
            qc.rz(angle * np.pi, i)
        
        # 3) Reservoir layers: ring entanglement + random rotations
        for layer in range(self.n_layers):
            # Ring entanglement
            for i in range(self.n_qubits):
                qc.cx(i, (i + 1) % self.n_qubits)
            
            # Random transformations
            for i in range(self.n_qubits):
                rx_angle = np.sin(layer * np.pi / self.n_layers + i * 2 * np.pi / self.n_qubits)
                rz_angle = np.cos(layer * np.pi / self.n_layers + i * 2 * np.pi / self.n_qubits)
                qc.rx(rx_angle, i)
                qc.rz(rz_angle, i)
        
        qc.measure_all()
        return qc
    
    def get_quantum_features(self, circuit):
        # Transpile and run on the fake backend
        transpiled_circuit = transpile(circuit, self.backend, optimization_level=3)
        job = self.backend.run(transpiled_circuit, shots=3000)
        result = job.result()
        counts = result.get_counts()
        
        # Convert bitstring frequencies into a feature vector
        feature_vector = np.zeros(2 ** self.n_qubits)
        total_shots = sum(counts.values())
        
        for bitstring, count in counts.items():
            index = int(bitstring, 2)
            # Nonlinear scaling
            feature_vector[index] = np.tanh((count / total_shots) * 3)
            
        return feature_vector

In [15]:
##############################################################################
#                       HYBRID QUANTUM-CLASSICAL CLASSIFIER
##############################################################################

class HybridClassifier:
    """
    Uses the QuantumReservoirComputing class to produce quantum feature vectors,
    then trains a LogisticRegression model on top of those features.
    """
    def __init__(self, quantum_reservoir, classifier):
        self.quantum_reservoir = quantum_reservoir
        self.classifier = classifier
        
    def fit(self, X, y):
        quantum_features = []
        print("Generating quantum features for training...")
        
        for i, sample in enumerate(X):
            if i % 10 == 0:
                print(f"  Processing sample {i}/{len(X)}")
            circuit = self.quantum_reservoir.create_quantum_circuit(sample)
            features = self.quantum_reservoir.get_quantum_features(circuit)
            quantum_features.append(features)
        
        quantum_features = np.array(quantum_features)
        print("Training classifier...")
        self.classifier.fit(quantum_features, np.ravel(y))  
    
    def predict(self, X):
        quantum_features = []
        for sample in X:
            circuit = self.quantum_reservoir.create_quantum_circuit(sample)
            features = self.quantum_reservoir.get_quantum_features(circuit)
            quantum_features.append(features)
        quantum_features = np.array(quantum_features)
        return self.classifier.predict(quantum_features)
    
    def predict_proba(self, X):
        quantum_features = []
        for sample in X:
            circuit = self.quantum_reservoir.create_quantum_circuit(sample)
            features = self.quantum_reservoir.get_quantum_features(circuit)
            quantum_features.append(features)
        quantum_features = np.array(quantum_features)
        return self.classifier.predict_proba(quantum_features)

In [19]:
##############################################################################
#                         MODEL TRAINING & THRESHOLD TUNING
##############################################################################

if __name__ == "__main__":
    print("Initializing Quantum Reservoir Computing...")
    qrc = QuantumReservoirComputing(n_qubits=8, n_layers=3)
    
    # Use a balanced class weight to give the minority some emphasis
    balanced_lr = LogisticRegression(
        C=0.3,           # Reduced C => stronger regularization
        max_iter=3000,
        class_weight='balanced',
        solver='saga',
        penalty='elasticnet',
        l1_ratio=0.2
    )
    
    # Create hybrid quantum-classical model
    model = HybridClassifier(qrc, balanced_lr)
    
    print("Starting model training...")
    model.fit(X_train_resampled, y_train_resampled)

    # ------------------------------------------------------------------
    # 1) Standard predictions with built-in threshold=0.5
    # ------------------------------------------------------------------
    print("\nEvaluating with default threshold=0.5 ...")
    y_pred_default = model.predict(X_test_reduced)

    print("Confusion Matrix (default threshold):")
    print(confusion_matrix(y_test, y_pred_default))
    print("\nClassification Report (default threshold):")
    print(classification_report(y_test, y_pred_default))

    minority_recall_default = recall_score(y_test, y_pred_default, pos_label=2)
    print(f"Minority Class Recall (default thr): {minority_recall_default:.3f}")


    # ------------------------------------------------------------------
    # 2) Threshold tuning to reach ~0.5 recall for class=2
    # ------------------------------------------------------------------
    print("\nTuning threshold to target recall ~0.5 for minority class=2...")
    
    classes_ = model.classifier.classes_
    idx_for_2 = np.where(classes_ == 2)[0][0]

    y_test_probs = model.predict_proba(X_test_reduced)[:, idx_for_2]

    thresholds = np.linspace(0, 1, 101)
    target_recall = 0.50
    best_threshold = 0.5
    best_diff = 1.0

    for t in thresholds:
        # Predict class=2 if probability >= t
        y_pred_t = np.where(y_test_probs >= t, 2, 1)
        recall_t = recall_score(y_test, y_pred_t, pos_label=2)
        
        diff = abs(recall_t - target_recall)
        if diff < best_diff:
            best_diff = diff
            best_threshold = t

    print(f"Best threshold for recall ~0.5 is: {best_threshold:.2f} (difference={best_diff:.3f})")

    y_pred_tuned = np.where(y_test_probs >= best_threshold, 2, 1)
    print("\nConfusion Matrix (tuned threshold):")
    print(confusion_matrix(y_test, y_pred_tuned))

    print("\nClassification Report (tuned threshold):")
    print(classification_report(y_test, y_pred_tuned))

    minority_recall_tuned = recall_score(y_test, y_pred_tuned, pos_label=2)
    print(f"Minority Class Recall (tuned thr): {minority_recall_tuned:.3f}")



Initializing Quantum Reservoir Computing...
Starting model training...
Generating quantum features for training...
  Processing sample 0/945
  Processing sample 10/945
  Processing sample 20/945
  Processing sample 30/945
  Processing sample 40/945
  Processing sample 50/945
  Processing sample 60/945
  Processing sample 70/945
  Processing sample 80/945
  Processing sample 90/945
  Processing sample 100/945
  Processing sample 110/945
  Processing sample 120/945
  Processing sample 130/945
  Processing sample 140/945
  Processing sample 150/945
  Processing sample 160/945
  Processing sample 170/945
  Processing sample 180/945
  Processing sample 190/945
  Processing sample 200/945
  Processing sample 210/945
  Processing sample 220/945
  Processing sample 230/945
  Processing sample 240/945
  Processing sample 250/945
  Processing sample 260/945
  Processing sample 270/945
  Processing sample 280/945
  Processing sample 290/945
  Processing sample 300/945
  Processing sample 310/945
