In [36]:
import pandas as pd
import numpy as np
import pennylane as qml
import tensorflow as tf
import keras_tuner as kt
import random
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.utils.class_weight import compute_class_weight
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, InputLayer, Dropout
from tensorflow.keras import losses
from tensorflow.keras.utils import plot_model
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score,
    confusion_matrix, balanced_accuracy_score, average_precision_score,
    matthews_corrcoef, cohen_kappa_score, brier_score_loss, roc_auc_score
)
random.seed(42)
np.random.seed(42)
tf.random.set_seed(42)

In [None]:
#Load dataset
df=pd.read_csv("Indian Liver Patient Dataset (ILPD).csv")

df_cleaned = df.copy()

#Categorical variable
df_cleaned['Gender'] = df_cleaned['Gender'].map({'Male': 0, 'Female': 1})

#Binary output
df_cleaned['Sickness'] = df_cleaned['Sickness'].replace(2, 0)

#NAN values
df_cleaned['A/G'] = df_cleaned['A/G'].fillna(df_cleaned['A/G'].mean())

In [38]:
# Filter positive and negative cases
pos = df_cleaned[df_cleaned['Sickness'] == 1]
neg = df_cleaned[df_cleaned['Sickness'] == 0]

# Determine number of test samples
n_test = min(len(pos), len(neg)) // 2

# Sample test data
test_pos = pos.sample(n=n_test, random_state=42)
test_neg = neg.sample(n=n_test, random_state=42)

# Combine into balanced test set
test = pd.concat([test_pos, test_neg]).reset_index(drop=True)

# Indices to remove
drop_idx = [3, 31, 35, 89, 104, 106, 114, 115, 116, 124, 130, 132, 135, 139, 143, 150, 151, 157, 161]
print(test.loc[drop_idx])
test = test.drop(drop_idx, errors='ignore').reset_index(drop=True)

# Remaining data for training
train = df_cleaned.drop(test.index).reset_index(drop=True)

# Remove duplicates
test = test.drop_duplicates().reset_index(drop=True)
train = train.drop_duplicates().reset_index(drop=True)

# Separate features and labels
X_train, y_train = train.drop(columns=['Sickness']), train['Sickness']
X_test, y_test = test.drop(columns=['Sickness']), test['Sickness']

# Print sizes and class distributions
print(f"X_train: {X_train.shape}, y_train: {y_train.shape}")
print(f"X_test: {X_test.shape}, y_test: {y_test.shape}")
print(y_train.value_counts())
print(y_test.value_counts())

     Age  Gender   TB   DB  Alkphos  Sgpt  Sgot   TP  ALB   A/G  Sickness
3     18       0  1.8  0.7      178    35    36  6.8  3.6  1.10         1
31    17       0  0.9  0.2      224    36    45  6.9  4.2  1.55         1
35    24       0  1.0  0.2      189    52    31  8.0  4.8  1.50         1
89    60       0  2.2  1.0      271    45    52  6.1  2.9  0.90         0
104   60       0  0.8  0.2      215    24    17  6.3  3.0  0.90         0
106   38       1  2.6  1.2      410    59    57  5.6  3.0  0.80         0
114   35       0  2.0  1.1      226    33   135  6.0  2.7  0.80         0
115   11       0  0.7  0.1      592    26    29  7.1  4.2  1.40         0
116   65       0  0.7  0.2      265    30    28  5.2  1.8  0.52         0
124   36       0  5.3  2.3      145    32    92  5.1  2.6  1.00         0
130   48       0  0.7  0.2      208    15    30  4.6  2.1  0.80         0
132   65       0  1.4  0.6      260    28    24  5.2  2.2  0.70         0
135   62       0  0.6  0.1      160   

In [39]:
#Quantum layer
n_qubits = 2
n_layers = 4
dev = qml.device("default.qubit", wires=n_qubits, seed=42)

@qml.qnode(dev)
def qnode(inputs, weights):
    # Encode inputs into qubits
    qml.AngleEmbedding(inputs, wires=range(n_qubits))
    # Trainable quantum layers
    qml.BasicEntanglerLayers(weights, wires=range(n_qubits))
    # Return expectation values
    return [qml.expval(qml.PauliZ(w)) for w in range(n_qubits)]

# Parameter shapes
weight_shapes = {"weights": (n_layers, n_qubits)}

In [81]:
#Early stopping
early_stopping = EarlyStopping(
    monitor='val_loss',  # Monitor validation loss
    patience=10,          # Stop if no improvement after 10 epochs
    restore_best_weights=True  # Restore best weights
)

In [130]:
# Compute original class weights
class_weights = compute_class_weight('balanced', classes=np.unique(y_train), y=y_train)

# Smoothing factor
alpha = 0.7

# Apply formula to smooth the weights
class_weight_dict = {i: 1 + alpha * (weight - 1) for i, weight in enumerate(class_weights)}


In [138]:
# Define Stratified K-Fold
kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
cv_scores = []

# Focal loss for binary classification
loss = losses.BinaryFocalCrossentropy(gamma=1.0, alpha=0.3)

for train_index, val_index in kf.split(X_train, y_train):
    # Split data
    X_train_cv, X_val_cv = X_train.iloc[train_index], X_train.iloc[val_index]
    y_train_cv, y_val_cv = y_train.iloc[train_index], y_train.iloc[val_index]

    # Scale features
    scaler = StandardScaler()
    X_train_cv = scaler.fit_transform(X_train_cv)
    X_val_cv = scaler.transform(X_val_cv)

    # Hybrid quantum-classical model
    model_cv = Sequential([
        InputLayer(input_shape=(X_train_cv.shape[1],)),
        Dense(256, activation="relu", kernel_initializer="glorot_uniform"),
        Dropout(0.3),
        Dense(128, activation="relu", kernel_initializer="glorot_uniform"),
        Dropout(0.3),
        Dense(n_qubits),
        qml.qnn.KerasLayer(qnode, weight_shapes, output_dim=n_qubits),
        Dense(1, activation="sigmoid")
    ])

    # Compile model
    model_cv.compile(loss=loss, optimizer=Adam(learning_rate=0.001), metrics=["accuracy"])

    # Train model
    history_cv = model_cv.fit(
        X_train_cv, y_train_cv,
        batch_size=32, epochs=100,
        validation_data=(X_val_cv, y_val_cv),
        callbacks=[early_stopping],
        class_weight=class_weight_dict,
        verbose=1
    )

    # Predictions
    y_val_pred_prob = model_cv.predict(X_val_cv)
    y_val_pred = (y_val_pred_prob > 0.5).astype(int)

    # Compute metrics
    acc = accuracy_score(y_val_cv, y_val_pred)
    prec = precision_score(y_val_cv, y_val_pred, zero_division=0)
    rec = recall_score(y_val_cv, y_val_pred, zero_division=0)
    f1 = f1_score(y_val_cv, y_val_pred, zero_division=0)
    auc = roc_auc_score(y_val_cv, y_val_pred_prob)
    tn, fp, fn, tp = confusion_matrix(y_val_cv, y_val_pred).ravel()
    spe = tn / (tn + fp)

    cv_scores.append([acc, prec, rec, f1, auc, spe])

# Convert to array and compute mean & std
cv_scores = np.array(cv_scores)
metric_names = ["Accuracy", "Precision", "Recall", "F1-score", "AUC", "Specificity"]

# Print metrics
for i, name in enumerate(metric_names):
    print(f"{name}: Mean={cv_scores[:, i].mean():.4f}, Std={cv_scores[:, i].std():.4f}")


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100


In [26]:
#----------------------------Metrics------------------------------------------------

In [139]:
#To all training data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled  = scaler.transform(X_test)

In [140]:
# Hybrid quantum-classical model
model_final = Sequential([
    InputLayer(input_shape=(X_train_scaled.shape[1],)),
    Dense(256, activation="relu", kernel_initializer="glorot_uniform"),
    Dropout(0.3),
    Dense(128, activation="relu", kernel_initializer="glorot_uniform"),
    Dropout(0.3),
    Dense(n_qubits),
    qml.qnn.KerasLayer(qnode, weight_shapes, output_dim=n_qubits),
    Dense(1, activation="sigmoid")
])

# Compile model
model_final.compile(loss=loss, optimizer=Adam(learning_rate=0.001), metrics=["accuracy"])

# Train model again with all the train data
history_final = model_final.fit(
    X_train_scaled, y_train,
    batch_size=32, epochs=100,
    validation_split = 0.25,
    callbacks=[early_stopping],
    class_weight=class_weight_dict,
    verbose=1
)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100


In [148]:
model_final.summary()

Model: "sequential_89"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_411 (Dense)           (None, 256)               2816      
                                                                 
 dropout_233 (Dropout)       (None, 256)               0         
                                                                 
 dense_412 (Dense)           (None, 128)               32896     
                                                                 
 dropout_234 (Dropout)       (None, 128)               0         
                                                                 
 dense_413 (Dense)           (None, 2)                 258       
                                                                 
 keras_layer_89 (KerasLayer  (None, 2)                 8         
 )                                                               
                                                     

In [141]:
# Predicctions
y_pred_prob = model_final.predict(X_test_scaled)  
y_pred = (y_pred_prob > 0.45).astype("int")  

# Metrics
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)  
recall = recall_score(y_test, y_pred)   # Sensitivity
f1 = f1_score(y_test, y_pred)

# Specificity from confusion matrix
tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()
specificity = tn / (tn + fp)

balanced_acc = balanced_accuracy_score(y_test, y_pred)
pr_auc = average_precision_score(y_test, y_pred_prob)
mcc = matthews_corrcoef(y_test, y_pred)
kappa = cohen_kappa_score(y_test, y_pred)
brier = brier_score_loss(y_test, y_pred_prob)
roc_auc = roc_auc_score(y_test, y_pred_prob)

# Print results
print("=== Metrics ===")
print(f'Accuracy:           {accuracy:.4f}')
print(f'Precision:          {precision:.4f}')
print(f'Recall (Sensitivity): {recall:.4f}')
print(f'Specificity:        {specificity:.4f}')
print(f'F1-score:           {f1:.4f}')
print(f'Balanced Accuracy:  {balanced_acc:.4f}')
print(f'ROC AUC:            {roc_auc:.4f}')
print(f'PR AUC:             {pr_auc:.4f}')
print(f'MCC:                {mcc:.4f}')
print(f'Cohen\'s Kappa:      {kappa:.4f}')
print(f'Brier Score:        {brier:.4f}')

=== Resultados del Modelo ===
Accuracy:           0.8425
Precision:          0.8065
Recall (Sensitivity): 0.9375
Specificity:        0.7273
F1-score:           0.8671
Balanced Accuracy:  0.8324
ROC AUC:            0.9148
PR AUC:             0.9280
MCC:                0.6880
Cohen's Kappa:      0.6764
Brier Score:        0.1541
