## **Diabetes Prediction**

### **Importing Libraries**

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pennylane as qml
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score, f1_score
from sklearn.preprocessing import StandardScaler, LabelEncoder
import seaborn as sns
import warnings

warnings.simplefilter("ignore", category=UserWarning)

### **Data Preprocessing**

In [2]:
data = pd.read_csv('Diabetes.csv')
data['Sex'] = data['Sex'].map({'female': 0, 'male': 1})
label_encoder = LabelEncoder()
data['Prediction'] = label_encoder.fit_transform(data['Prediction'])

In [3]:
data.head()

Unnamed: 0,S.No,Age,Sex,Fasting,Post Prandial,GTT 1/2 Hr,GTT 1 Hr,GTT 1-1/2 Hr,GTT 2 Hr,HbA1C,...,D.Bilirubin,Hemoglobin,TSH3,UricAcid,Homosystiene,hs-CRP,SOD,LPO,Apo B/Apo,Prediction
0,1,38,0,87,78.826087,146.0,129.0,112.0,85.0,5.8,...,0.1,10.9,1.31,3.0,13.21,0.651765,1.554,0,0,1
1,2,33,0,119,95.0,89.0,85.0,5.5,111.0,6.9,...,1.5,2.7,15.85,4.158503,13.543922,0.651765,1.554,0,0,1
2,3,45,0,87,0.0,105.0,110.0,80.0,78.0,5.7,...,0.1,14.1,2.74,3.0,11.51,0.0,0.0,0,0,1
3,4,44,0,89,96.0,179.688406,203.391304,196.184783,177.112676,6.2,...,0.2,9.0,2.71,3.2,7.45,0.651765,1.554,0,0,1
4,5,44,1,97,78.826087,215.0,202.0,127.0,77.0,5.4,...,0.17,14.9,5.52,3.8,13.543922,0.651765,1.554,0,0,1


In [4]:
data.describe()

Unnamed: 0,S.No,Age,Sex,Fasting,Post Prandial,GTT 1/2 Hr,GTT 1 Hr,GTT 1-1/2 Hr,GTT 2 Hr,HbA1C,...,D.Bilirubin,Hemoglobin,TSH3,UricAcid,Homosystiene,hs-CRP,SOD,LPO,Apo B/Apo,Prediction
count,150.0,150.0,150.0,150.0,150.0,150.0,150.0,150.0,150.0,150.0,...,150.0,150.0,150.0,150.0,150.0,150.0,150.0,150.0,150.0,150.0
mean,75.5,46.74,0.413333,115.9,78.826087,179.688406,203.391304,196.184783,177.112676,6.764,...,0.180267,13.301333,3.205102,4.158503,13.543922,0.651765,1.554,0.0,0.0,1.0
std,43.445368,9.787974,0.494081,40.588085,46.037285,57.00969,77.48041,87.141771,89.054323,1.595349,...,0.154272,2.362344,3.636707,1.282502,7.318496,0.485002,1.844877,0.0,0.0,0.819232
min,1.0,28.0,0.0,80.0,0.0,0.0,0.0,0.0,77.0,4.8,...,0.1,2.7,0.19,2.6,0.0,0.0,0.0,0.0,0.0,0.0
25%,38.25,40.25,0.0,92.0,78.826087,146.0,143.5,126.25,102.25,5.7,...,0.1,12.1,1.5625,3.2,13.543922,0.651765,1.554,0.0,0.0,0.0
50%,75.5,46.0,0.0,100.5,78.826087,179.688406,203.391304,195.0,154.5,6.15,...,0.1,13.4,2.29,3.9,13.543922,0.651765,1.554,0.0,0.0,1.0
75%,112.75,53.0,1.0,122.0,78.826087,204.75,244.0,251.5,223.0,7.3,...,0.2,14.7,3.205102,4.8,13.543922,0.651765,1.554,0.0,0.0,2.0
max,150.0,73.0,1.0,347.0,390.0,440.0,530.0,577.0,591.0,13.6,...,1.5,18.3,27.86,13.0,50.0,4.6,23.31,0.0,0.0,2.0


### **Feature Selection, Model Definition and Evaluation**

In [5]:
feature_columns = data.columns.difference(['ID No', 'Name', 'Prediction']).tolist()

In [6]:
accuracy_results = []
column_importance = {col: 0 for col in feature_columns}

In [7]:
def evaluate_model(selected_columns):
    X = data[selected_columns].values
    y = data['Prediction'].values
    
    y_encoded = (y == 'DM').astype(int)
    
    X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)
    
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)

    n_qubits = len(selected_columns)
    dev = qml.device('default.qubit', wires=n_qubits)

    params = np.random.normal(0, np.pi, size=n_qubits * 3)

    @qml.qnode(dev)
    def quantum_circuit(params, x):
        for i in range(n_qubits):
            qml.RX(x[i], wires=i)
            qml.RZ(params[i], wires=i)
        
        for i in range(n_qubits):
            for j in range(i + 1, n_qubits):
                target = (j + 1) % n_qubits
                if target != i and target != j:
                    qml.Toffoli(wires=[i, j, target])
        
        for i in range(n_qubits):
            qml.RX(params[n_qubits + i], wires=i)
            qml.RZ(params[2 * n_qubits + i], wires=i)
        
        return qml.expval(qml.PauliZ(0))


    y_pred = np.array([quantum_circuit(params, X_test[i]) for i in range(len(X_test))])
    y_pred = (y_pred > 0).astype(int)
    
    accuracy = np.mean(y_pred == y_test)
    return accuracy

In [8]:
max_columns = len(feature_columns)
iterations = 5

## Hybrid Randomizer so randomize the selected columns for a given length of feature set

### How It Works:

os.urandom:
Generates cryptographically secure random bytes.

hashlib.sha256:
Hashes the random bytes to produce a pseudo-random number.

Modulo Operation:
Maps the hash value into a valid index range for the choices list.

Unique Selection:
Ensures no duplicates by using a set to track selected indices.

In [10]:
import os
import hashlib

def cryptographic_randomizer(choices, num_select):
    """
    A cryptographic randomizer for selecting elements from a list.

    Args:
        choices (list): The list of elements to choose from.
        num_select (int): The number of elements to select.

    Returns:
        list: A list of selected elements.
    """
    if num_select > len(choices):
        raise ValueError("num_select cannot be greater than the number of choices available.")

    # Generate cryptographic random bytes and convert to indices
    chosen_indices = set()
    while len(chosen_indices) < num_select:
        # Use os.urandom to generate random bytes
        random_bytes = os.urandom(16)
        # Hash the bytes and take a slice to ensure it's within the range
        hash_value = int(hashlib.sha256(random_bytes).hexdigest(), 16)
        index = hash_value % len(choices)
        chosen_indices.add(index)

    # Map indices to actual elements
    selected = [choices[i] for i in chosen_indices]
    return selected

# Update your loop to use this randomizer
sample = 1
for num_columns in range(1, max_columns + 1):
    for _ in range(iterations):
        if sample < 41:
            # Use the cryptographic randomizer here
            selected_columns = cryptographic_randomizer(feature_columns, num_columns)

            accuracy = evaluate_model(selected_columns)

            if accuracy > 0.5 and len(selected_columns) > 2:
                print(f"Sample {sample}")
                print(f"\tNumber of columns: {len(selected_columns)}")
                print(f"\tAccuracy: {accuracy:.2%}")
                accuracy_results.append((selected_columns, accuracy))
                sample += 1

                for col in selected_columns:
                    column_importance[col] += accuracy


Sample 1
	Number of columns: 3
	Accuracy: 83.33%
Sample 2
	Number of columns: 3
	Accuracy: 100.00%
Sample 3
	Number of columns: 4
	Accuracy: 53.33%
Sample 4
	Number of columns: 5
	Accuracy: 96.67%
Sample 5
	Number of columns: 5
	Accuracy: 93.33%
Sample 6
	Number of columns: 5
	Accuracy: 100.00%
Sample 7
	Number of columns: 6
	Accuracy: 100.00%
Sample 8
	Number of columns: 6
	Accuracy: 80.00%
Sample 9
	Number of columns: 7
	Accuracy: 83.33%
Sample 10
	Number of columns: 7
	Accuracy: 93.33%
Sample 11
	Number of columns: 8
	Accuracy: 90.00%
Sample 12
	Number of columns: 8
	Accuracy: 100.00%
Sample 13
	Number of columns: 9
	Accuracy: 96.67%
Sample 14
	Number of columns: 9
	Accuracy: 86.67%
Sample 15
	Number of columns: 10
	Accuracy: 96.67%
Sample 16
	Number of columns: 10
	Accuracy: 83.33%
Sample 17
	Number of columns: 10
	Accuracy: 90.00%
Sample 18
	Number of columns: 11
	Accuracy: 86.67%
Sample 19
	Number of columns: 11
	Accuracy: 93.33%
Sample 20
	Number of columns: 11
	Accuracy: 93.33%

### ChaCha20 for generating pseudorandom byte stream

Overview of ChaCha20:

ChaCha20 generates a pseudorandom byte stream that can be used as a cryptographic randomizer. It operates on:

- A 256-bit key (32 bytes)
- A 96-bit nonce (12 bytes)
- A counter (used for ensuring randomness and avoiding reuse)

The algorithm involves:
Initializing a 512-bit state matrix.
Applying 20 rounds of a "quarter round" function.
Producing output blocks as a pseudorandom stream.


How It Works
1. The ChaCha20 randomizer (chacha20_randomizer) generates a sequence of random values using the chacha20_block function.
2. These values are used to select feature indices from feature_columns.
3. The selected feature indices are appended to selected_columns until the desired number of features (num_columns) is reached.
4. This randomization ensures cryptographically secure and unbiased selection of columns.


In [12]:
import struct
import numpy as np

def quarter_round(x, a, b, c, d):
    """
    Perform one quarter round of the ChaCha20 algorithm.
    """
    x[a] = (x[a] + x[b]) & 0xffffffff
    x[d] ^= x[a]
    x[d] = (x[d] << 16) | (x[d] >> 16)

    x[c] = (x[c] + x[d]) & 0xffffffff
    x[b] ^= x[c]
    x[b] = (x[b] << 12) | (x[b] >> 20)

    x[a] = (x[a] + x[b]) & 0xffffffff
    x[d] ^= x[a]
    x[d] = (x[d] << 8) | (x[d] >> 24)

    x[c] = (x[c] + x[d]) & 0xffffffff
    x[b] ^= x[c]
    x[b] = (x[b] << 7) | (x[b] >> 25)

def chacha20_block(key, counter, nonce):
    """
    Generate a ChaCha20 block.
    """
    # Initialize state matrix
    constants = (0x61707865, 0x3320646e, 0x79622d32, 0x6b206574)
    key_words = struct.unpack('<8I', key)
    counter_nonce = (counter,) + struct.unpack('<3I', nonce)

    state = list(constants) + list(key_words) + list(counter_nonce)
    working_state = state[:]

    # Perform 20 rounds of the quarter-round function
    for _ in range(10):  # 10 iterations of double rounds
        # Column rounds
        quarter_round(working_state, 0, 4, 8, 12)
        quarter_round(working_state, 1, 5, 9, 13)
        quarter_round(working_state, 2, 6, 10, 14)
        quarter_round(working_state, 3, 7, 11, 15)
        # Diagonal rounds
        quarter_round(working_state, 0, 5, 10, 15)
        quarter_round(working_state, 1, 6, 11, 12)
        quarter_round(working_state, 2, 7, 8, 13)
        quarter_round(working_state, 3, 4, 9, 14)

    # Add the original state to the working state
    output = [(working_state[i] + state[i]) & 0xffffffff for i in range(16)]

    # Convert to bytes
    return struct.pack('<16I', *output)

def chacha20_randomizer(features, num_columns, key, nonce):
    """
    Randomly select columns using the ChaCha20 algorithm.
    """
    selected_columns = []
    counter = 0

    while len(selected_columns) < num_columns:
        # Generate a random block
        random_block = chacha20_block(key, counter, nonce)
        counter += 1

        # Use random values to select columns
        for i in range(0, len(random_block), 4):
            random_index = struct.unpack('<I', random_block[i:i + 4])[0] % len(features)
            if features[random_index] not in selected_columns:
                selected_columns.append(features[random_index])
                if len(selected_columns) == num_columns:
                    break

    return selected_columns

# Generate a cryptographic key and nonce
import os
key = os.urandom(32)  # 256-bit key
nonce = os.urandom(12)  # 96-bit nonce

# Replace cryptographic_randomizer with the ChaCha20 randomizer
sample = 1
for num_columns in range(1, max_columns + 1):
    for _ in range(iterations):
        if sample < 41:
            # Use the ChaCha20 randomizer here
            selected_columns = chacha20_randomizer(feature_columns, num_columns, key, nonce)

            accuracy = evaluate_model(selected_columns)

            if accuracy > 0.5 and len(selected_columns) > 2:
                print(f"Sample {sample}")
                print(f"\tNumber of columns: {len(selected_columns)}")
                print(f"\tAccuracy: {accuracy:.2%}")
                accuracy_results.append((selected_columns, accuracy))
                sample += 1

                for col in selected_columns:
                    column_importance[col] += accuracy

Sample 1
	Number of columns: 3
	Accuracy: 86.67%
Sample 2
	Number of columns: 3
	Accuracy: 60.00%
Sample 3
	Number of columns: 3
	Accuracy: 90.00%
Sample 4
	Number of columns: 3
	Accuracy: 90.00%
Sample 5
	Number of columns: 4
	Accuracy: 90.00%
Sample 6
	Number of columns: 4
	Accuracy: 90.00%
Sample 7
	Number of columns: 5
	Accuracy: 86.67%
Sample 8
	Number of columns: 6
	Accuracy: 86.67%
Sample 9
	Number of columns: 6
	Accuracy: 63.33%
Sample 10
	Number of columns: 7
	Accuracy: 73.33%
Sample 11
	Number of columns: 7
	Accuracy: 60.00%
Sample 12
	Number of columns: 7
	Accuracy: 90.00%
Sample 13
	Number of columns: 8
	Accuracy: 86.67%
Sample 14
	Number of columns: 9
	Accuracy: 83.33%
Sample 15
	Number of columns: 10
	Accuracy: 76.67%
Sample 16
	Number of columns: 10
	Accuracy: 86.67%
Sample 17
	Number of columns: 10
	Accuracy: 90.00%
Sample 18
	Number of columns: 11
	Accuracy: 86.67%
Sample 19
	Number of columns: 11
	Accuracy: 83.33%
Sample 20
	Number of columns: 12
	Accuracy: 66.67%
Sam