## **Diabetes Prediction**

### **Importing Libraries**

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pennylane as qml
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score
from sklearn.preprocessing import StandardScaler, LabelEncoder
import seaborn as sns
import warnings

warnings.simplefilter("ignore", category=UserWarning)

### **Data Preprocessing**

In [None]:
data = pd.read_csv('../Diabetes.csv')
data['Sex'] = data['Sex'].map({'female': 0, 'male': 1})
label_encoder = LabelEncoder()
data['Prediction'] = label_encoder.fit_transform(data['Prediction'])

In [None]:
data.head()

In [None]:
data.describe()

### **Feature Selection, Model Definition and Evaluation**

In [None]:
feature_columns = data.columns.difference(['ID No', 'Name', 'Prediction']).tolist()

In [None]:
accuracy_results = []
column_importance = {col: 0 for col in feature_columns}

In [None]:
def evaluate_model(selected_columns):
    X = data[selected_columns].values
    y = data['Prediction'].values
    
    y_encoded = (y == 'DM').astype(int)
    
    X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)
    
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)

    n_qubits = len(selected_columns)
    dev = qml.device('default.qubit', wires=n_qubits)

    params = np.random.normal(0, np.pi, size=n_qubits * 3)

    @qml.qnode(dev)
    def quantum_circuit(params, x):
        for i in range(n_qubits):
            qml.RX(x[i], wires=i)
            qml.RZ(params[i], wires=i)
        
        for i in range(n_qubits):
            for j in range(i + 1, n_qubits):
                target = (j + 1) % n_qubits
                if target != i and target != j:
                    qml.Toffoli(wires=[i, j, target])
        
        for i in range(n_qubits):
            qml.RX(params[n_qubits + i], wires=i)
            qml.RZ(params[2 * n_qubits + i], wires=i)
        
        return qml.expval(qml.PauliZ(0))


    y_pred = np.array([quantum_circuit(params, X_test[i]) for i in range(len(X_test))])
    y_pred = (y_pred > 0).astype(int)
    
    accuracy = accuracy_score(y_test, y_pred)
    return accuracy

In [None]:
max_columns = len(feature_columns)
iterations = 5

In [None]:
sample = 1
for num_columns in range(1, max_columns + 1):
    for _ in range(iterations):
        if sample < 21:
            selected_columns = np.random.choice(feature_columns, size=num_columns, replace=False)
            
            accuracy = evaluate_model(selected_columns)
            
            if accuracy > 0.5 and len(selected_columns) > 2:
                
                print(f"Sample {sample}")
                print(f"\tNumber of columns: {len(selected_columns)}")
                print(f"\tAccuracy: {accuracy:.2%}")
                accuracy_results.append((selected_columns, accuracy))
                sample = sample + 1
                for col in selected_columns:
                    column_importance[col] += accuracy

### **Results and Analysis**

#### Feature sets by accuracy

In [None]:
accuracy_results = pd.DataFrame(accuracy_results, columns=["Features", "Accuracy"])
sorted_results = accuracy_results.sort_values(by=["Accuracy"], ascending=False)
sorted_results["No. Features"] = sorted_results["Features"].apply(len)

print(sorted_results)

#### Visualization of Feature sets by Accuracy

In [None]:
accuracy_results = pd.DataFrame(accuracy_results, columns=["Features", "Accuracy"])
sorted_results = accuracy_results.sort_values(by=["Accuracy"], ascending=False)
sorted_results["Features"] = sorted_results["Features"].apply(lambda x: ', '.join(x))[:10]

plt.figure(figsize=(12, 6))
sns.barplot(x="Features", y="Accuracy", data=sorted_results, palette="viridis", hue="Features")
plt.ylabel('Accuracy')
plt.xlabel('Feature Set')
plt.title('Model Accuracy by Features')
plt.ylim(0, 1)  
plt.xticks([])  
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.legend(title='Legend', loc='upper right', bbox_to_anchor=(1, -0.1))
plt.show()