In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import joblib
import random as rn

from sklearn.preprocessing import StandardScaler, MinMaxScaler, OneHotEncoder
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.base import BaseEstimator, TransformerMixin

import pennylane as qml
from pennylane import numpy as np
from pennylane.optimize import AdamOptimizer
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, roc_curve, auc


np.random.seed(42)
rn.seed(42)

In [2]:
aids = pd.read_csv('data/AIDS_Classification.csv')

In [3]:
aids.drop(columns={'z30', 'treat','str2', 'cd420','cd820'}, inplace=True)

In [4]:
class DataFrameTransformer(BaseEstimator, TransformerMixin):
    def __init__(self, transformer, original_columns):
        self.transformer = transformer
        self.original_columns = original_columns
    
    def fit(self, X, y=None):
        self.transformer.fit(X, y)
        return self
    
    def transform(self, X, y=None):
        X_transformed = self.transformer.transform(X)
        if not isinstance(X_transformed, pd.DataFrame):
            X_transformed = pd.DataFrame(X_transformed, columns=self.get_feature_names_out())
        
        original_data = X[self.original_columns]
        
        X_final = pd.concat([original_data.reset_index(drop=True), X_transformed.reset_index(drop=True)], axis=1)
        return X_final
    
    def get_feature_names_out(self):
        return self.transformer.get_feature_names_out()

In [None]:
cols_to_scale = ['time', 'age', 'wtkg', 'karnof', 'preanti', 'cd40', 'cd80']
cols_to_one_hot = ['trt', 'strat']
orginal_columns = [col for col in aids.columns if col not in (cols_to_scale + cols_to_one_hot)]

preprocessor = ColumnTransformer(transformers = [
    ('numerical', StandardScaler(), cols_to_scale),
    ('one_hot', OneHotEncoder(drop='first'), cols_to_one_hot)
])

pipe = Pipeline(steps = [
                ('preprocessor', DataFrameTransformer(preprocessor, orginal_columns)),
])

aids_t = pipe.fit_transform(aids)

aids_t.to_csv('data/aids_t.csv')

In [6]:
X = aids_t.drop(columns=['infected'])
y = aids_t['infected']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

In [None]:
n_qubits = 2  
dev = qml.device("default.qubit", wires=n_qubits)


In [8]:
X_train = X_train.to_numpy()
X_test = X_test.to_numpy()

In [9]:
# Redukcja wymiaru wejściowego
def preprocess(X):
    reduced = np.tanh(np.mean(X, axis=1)).reshape(-1, 1)
    return reduced

X_train_reduced = preprocess(X_train)

In [10]:
import pennylane as qml
from pennylane import numpy as np
from pennylane.optimize import AdamOptimizer

n_qubits = 2
dev = qml.device("default.qubit", wires=n_qubits)

def quantum_layer_one(weights):
    for i in range(n_qubits):
        qml.Rot(weights[i, 0], weights[i, 1], weights[i, 2], wires=i)
    for i in range(n_qubits - 1):
        qml.CNOT(wires=[i, i + 1])
    qml.CNOT(wires=[n_qubits - 1, 0])

@qml.qnode(dev)
def qnn_one(inputs, weights):
    for i in range(n_qubits):
        qml.RX(inputs[i % len(inputs)], wires=i)
    quantum_layer_one(weights)
    return qml.expval(qml.PauliZ(0))

def cost_one(weights, X, y):
    predictions = [qnn_one(X[i], weights) for i in range(len(X))]
    return np.mean((np.array(predictions) - np.array(y)) ** 2)

models = {
    "QNN1": {
        "model": qnn_one,
        "cost": cost_one,
        "neurons": n_qubits,
        "layers": 1,
        "epochs": 100,
        "weights_init": np.random.randn(n_qubits, 3, requires_grad=True)
    }
}

In [11]:
def quantum_layer_two(weights):
    for i in range(n_qubits):
        qml.Rot(weights[i, 0], weights[i, 1], weights[i, 2], wires=i)
    for i in range(n_qubits - 1):
        qml.CNOT(wires=[i, i + 1])
    qml.CNOT(wires=[n_qubits - 1, 0])
    for i in range(n_qubits):
        qml.RY(weights[i, 0], wires=i)
    for i in range(n_qubits - 1):
        qml.CNOT(wires=[i, i + 1])
    qml.CNOT(wires=[n_qubits - 1, 0])

@qml.qnode(dev)
def qnn_two(inputs, weights):
    for i in range(n_qubits):
        qml.RX(inputs[i % len(inputs)], wires=i)
    quantum_layer_two(weights)
    return qml.expval(qml.PauliZ(0))

def cost_two(weights, X, y):
    predictions = [qnn_two(X[i], weights) for i in range(len(X))]
    return np.mean((np.array(predictions) - np.array(y)) ** 2)

models["QNN2"] = {
    "model": qnn_two,
    "cost": cost_two,
    "neurons": n_qubits,
    "layers": 2,
    "epochs": 200,
    "weights_init": np.random.randn(n_qubits, 3, requires_grad=True)
}


In [12]:
def quantum_layer_three(weights):
    for l in range(3):  # Trzy warstwy
        for i in range(n_qubits):
            qml.Rot(weights[l, i, 0], weights[l, i, 1], weights[l, i, 2], wires=i)
        for i in range(n_qubits - 1):
            qml.CNOT(wires=[i, i + 1])
        qml.CNOT(wires=[n_qubits - 1, 0])

@qml.qnode(dev)
def qnn_three(inputs, weights):
    for i in range(n_qubits):
        qml.RX(inputs[i % len(inputs)], wires=i)
    quantum_layer_three(weights)
    return qml.expval(qml.PauliZ(0))

def cost_three(weights, X, y):
    predictions = [qnn_three(X[i], weights) for i in range(len(X))]
    return np.mean((np.array(predictions) - np.array(y)) ** 2)

models["QNN3"] = {
    "model": qnn_three,
    "cost": cost_three,
    "neurons": n_qubits,
    "layers": 3,
    "epochs": 300,
    "weights_init": np.random.randn(3, n_qubits, 3, requires_grad=True)
}


In [None]:
from sklearn.metrics import roc_auc_score, accuracy_score, f1_score
results = []

for name, info in models.items():
    weights = info['weights_init']
    opt = AdamOptimizer(stepsize=0.1)
    for step in range(info['epochs']):
        weights, cost_val = opt.step_and_cost(lambda w: info['cost'](w, X_train_reduced, y_train), weights)
    
    y_pred_prob = [info['model'](X_test[i], weights) for i in range(len(X_test))]
    auc = roc_auc_score(y_test, y_pred_prob)
    gini = 2 * auc - 1
    y_pred = (np.array(y_pred_prob) > 0.5).astype(int)
    accuracy = accuracy_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    error_rate = 1 - accuracy
    
    results.append({
        'Model': name,
        'Neurony': info['neurons'],
        'Warstwy ukryte': info['layers'],
        'Epoki': info['epochs'],
        'AUC': auc,
        'Gini': gini,
        'Accuracy': accuracy,
        'F1-score': f1,
        'Error Rate': error_rate
    })

# Tworzenie DataFrame z wynikami
import pandas as pd
results = pd.DataFrame(results)

  Model  Neurony  Warstwy ukryte  Epoki       AUC      Gini  Accuracy  \
0  QNN1        2               1    100  0.461216 -0.077569  0.757009   
1  QNN2        2               2    200  0.491330 -0.017341  0.757009   
2  QNN3        2               3    300  0.492735 -0.014530  0.757009   

   F1-score  Error Rate  
0       0.0    0.242991  
1       0.0    0.242991  
2       0.0    0.242991  


In [None]:
results.head()

In [None]:
from sklearn.metrics import roc_auc_score, accuracy_score, f1_score
results = []

info = models["QNN1"]
name = "QNN1"
weights = info['weights_init']
opt = AdamOptimizer(stepsize=0.1)
for step in range(info['epochs']):
    weights, cost_val = opt.step_and_cost(lambda w: info['cost'](w, X_train_reduced, y_train), weights)
# y_pred_prob = Parallel(n_jobs=-1)(delayed(info['model'])(X_test[i], weights) for i in range(len(X_test)))
y_pred_prob = [info['model'](X_test[i], weights) for i in range(len(X_test))]
auc = roc_auc_score(y_test, y_pred_prob)
gini = 2 * auc - 1
y_pred = (np.array(y_pred_prob) > 0.5).astype(int)
accuracy = accuracy_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
error_rate = 1 - accuracy

results.append({
    'Model': name,
    'Neurony': info['neurons'],
    'Warstwy ukryte': info['layers'],
    'Epoki': info['epochs'],
    'AUC': auc,
    'Gini': gini,
    'Accuracy': accuracy,
    'F1-score': f1,
    'Error Rate': error_rate
})

# Tworzenie DataFrame z wynikami
import pandas as pd
df = pd.DataFrame(results)
print(df)

In [None]:
from sklearn.metrics import roc_auc_score, accuracy_score, f1_score
results = []

info = models["QNN2"]
name = "QNN2"
weights = info['weights_init']
opt = AdamOptimizer(stepsize=0.1)
for step in range(info['epochs']):
    weights, cost_val = opt.step_and_cost(lambda w: info['cost'](w, X_train_reduced, y_train), weights)
# y_pred_prob = Parallel(n_jobs=-1)(delayed(info['model'])(X_test[i], weights) for i in range(len(X_test)))
y_pred_prob = [info['model'](X_test[i], weights) for i in range(len(X_test))]
auc = roc_auc_score(y_test, y_pred_prob)
gini = 2 * auc - 1
y_pred = (np.array(y_pred_prob) > 0.5).astype(int)
accuracy = accuracy_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
error_rate = 1 - accuracy

results.append({
    'Model': name,
    'Neurony': info['neurons'],
    'Warstwy ukryte': info['layers'],
    'Epoki': info['epochs'],
    'AUC': auc,
    'Gini': gini,
    'Accuracy': accuracy,
    'F1-score': f1,
    'Error Rate': error_rate
})

# Tworzenie DataFrame z wynikami
import pandas as pd
df = pd.DataFrame(results)
print(df)

In [None]:
from sklearn.metrics import roc_auc_score, accuracy_score, f1_score
results = []

info = models["QNN3"]
name = "QNN3"
weights = info['weights_init']
opt = AdamOptimizer(stepsize=0.1)
for step in range(info['epochs']):
    weights, cost_val = opt.step_and_cost(lambda w: info['cost'](w, X_train_reduced, y_train), weights)
# y_pred_prob = Parallel(n_jobs=-1)(delayed(info['model'])(X_test[i], weights) for i in range(len(X_test)))
y_pred_prob = [info['model'](X_test[i], weights) for i in range(len(X_test))]
auc = roc_auc_score(y_test, y_pred_prob)
gini = 2 * auc - 1
y_pred = (np.array(y_pred_prob) > 0.5).astype(int)
accuracy = accuracy_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
error_rate = 1 - accuracy

results.append({
    'Model': name,
    'Neurony': info['neurons'],
    'Warstwy ukryte': info['layers'],
    'Epoki': info['epochs'],
    'AUC': auc,
    'Gini': gini,
    'Accuracy': accuracy,
    'F1-score': f1,
    'Error Rate': error_rate
})

# Tworzenie DataFrame z wynikami
import pandas as pd
df = pd.DataFrame(results)
print(df)

In [14]:
df.head()

Unnamed: 0,Model,Neurony,Warstwy ukryte,Epoki,AUC,Gini,Accuracy,F1-score,Error Rate
0,QNN1,2,1,100,0.461216,-0.077569,0.757009,0.0,0.242991
1,QNN2,2,2,200,0.49133,-0.017341,0.757009,0.0,0.242991
2,QNN3,2,3,300,0.492735,-0.01453,0.757009,0.0,0.242991
