In [3]:
import pandas as pd
import numpy as np
import pennylane as qml
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
import warnings

In [4]:
warnings.simplefilter("ignore", category=UserWarning)

# Data Preprocessing
data = pd.read_csv('Diabetes.csv')
data['Sex'] = data['Sex'].map({'female': 0, 'male': 1})
label_encoder = LabelEncoder()
data['Prediction'] = label_encoder.fit_transform(data['Prediction'])

In [5]:
# Feature Selection
feature_columns = data.columns.difference(['ID No', 'Name', 'Prediction']).tolist()


In [6]:
# Initialize tracking for results
accuracy_results = []
column_importance = {col: 0 for col in feature_columns}


In [10]:
# Define Quantum Circuit 2: Enhanced Entanglement with Multi-Controlled Gates
def evaluate_model(selected_columns):
    X = data[selected_columns].values
    y = data['Prediction'].values
    
    y_encoded = (y == 'DM').astype(int)
    
    X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)
    
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)

    n_qubits = len(selected_columns)
    dev = qml.device('default.qubit', wires=n_qubits)

    params = np.random.normal(0, np.pi, size=n_qubits * 3)

    @qml.qnode(dev)
    def quantum_circuit(params, x):
        for i in range(n_qubits):
            qml.RX(x[i], wires=i)
            qml.RZ(params[i], wires=i)
        
        for i in range(n_qubits):
            for j in range(i + 1, n_qubits):
                # Ensure unique target wire that is not in `i` or `j`
                target = (j + 1) % n_qubits
                if target != i and target != j:
                    qml.Toffoli(wires=[i, j, target])
        
        for i in range(n_qubits):
            qml.RX(params[n_qubits + i], wires=i)
            qml.RZ(params[2 * n_qubits + i], wires=i)
        
        return qml.expval(qml.PauliZ(0))


    y_pred = np.array([quantum_circuit(params, X_train[i]) for i in range(len(X_train))])
    y_pred = (y_pred > 0).astype(int)
    
    accuracy = np.mean(y_pred == y_train)
    return accuracy


In [11]:
# Evaluate Model
max_columns = len(feature_columns)
iterations = 5
sample = 1


In [15]:
for num_columns in range(1, max_columns + 1):
    for _ in range(iterations):
        if sample < 41:
            selected_columns = np.random.choice(feature_columns, size=num_columns, replace=False)
            
            
            accuracy = evaluate_model(selected_columns)
            
            if accuracy > 0.6 and len(selected_columns) > 2:
                # Print selected columns
                print(f"Sample {sample} - Selected Columns: {selected_columns}")
            
                print(f"Sample {sample} - Circuit 2")
                print(f"\tNumber of columns: {len(selected_columns)}")
                print(f"\tAccuracy: {accuracy:.2%}")
                accuracy_results.append((selected_columns, accuracy))
                sample += 1

Sample 26 - Selected Columns: ['D.Bilirubin' 'Apolipoprotein-B' 'TSH3']
Sample 26 - Circuit 2
	Number of columns: 3
	Accuracy: 97.50%
Sample 27 - Selected Columns: ['Insulin' 'GTT 2 Hr' 'MGV']
Sample 27 - Circuit 2
	Number of columns: 3
	Accuracy: 89.17%
Sample 28 - Selected Columns: ['HDL' 'T.Bilirubin' 'P']
Sample 28 - Circuit 2
	Number of columns: 3
	Accuracy: 83.33%
Sample 29 - Selected Columns: ['P' 'TGL' 'Alb/Crea Ratio']
Sample 29 - Circuit 2
	Number of columns: 3
	Accuracy: 90.83%
Sample 30 - Selected Columns: ['Albumin' 'T.Bilirubin' 'GTT 2 Hr']
Sample 30 - Circuit 2
	Number of columns: 3
	Accuracy: 95.00%
Sample 31 - Selected Columns: ['HDL' 'TGL' 'Chol/HDL.Ratio' 'Albumin']
Sample 31 - Circuit 2
	Number of columns: 4
	Accuracy: 75.00%
Sample 32 - Selected Columns: ['Hemoglobin' 'SOD' 'LDL' 'Age']
Sample 32 - Circuit 2
	Number of columns: 4
	Accuracy: 60.83%
Sample 33 - Selected Columns: ['Sex' 'LPO' 'GTT 1/2 Hr' 'D.Bilirubin' 'Microalbumin']
Sample 33 - Circuit 2
	Number of 