## Application of NN structure applied in the NQE method

In [None]:
import os
from os import listdir
import pandas as pd
import numpy as np
import tqdm

from sklearn.metrics import accuracy_score
from sklearn.metrics import balanced_accuracy_score
import matplotlib.pyplot as plt
from collections import Counter

import torch
import torch.nn as nn
import torch.optim as optim

import data

### Load dataset

In [None]:
target = 'ALDH1'
sampling = '1_6'
feature_reduction = False
classes = [0,1]

quantum_embed = 'ZZ'
n_qubits = 8
kernel = 'RBF'
pretrained = True
loss_function = 'BCE' # MSE / BCE / Linear

n_epochs = 1000
batch_size = 256
learning_rate = 0.000001

In [None]:
X_train, X_test, Y_train, Y_test = data.data_load_and_process(dataset='protein', target=target, sampling=sampling, feature_reduction=feature_reduction, classes=classes)

In [None]:
print("X_train:",X_train.shape,"/ X_test:",X_test.shape,"/Y_train:",Y_train.shape,"/Y_test:",Y_test.shape)

In [None]:
print(Counter(Y_train), Counter(Y_test))

In [None]:
Y_train_tensor = torch.tensor(Y_train, dtype=torch.float)

num_pos = Y_train_tensor.sum()
num_neg = len(Y_train_tensor) - num_pos

pos_weight = num_neg / num_pos
pos_weight = torch.tensor([pos_weight], dtype=torch.float)

print("num_pos:", num_pos.item(), "/ num_neg:", num_neg.item())
print("pos_weight:", pos_weight)

### Apply NN structure used in the NQE method

In [None]:
parameter_file_dir = '/Users/jungguchoi/Library/Mobile Documents/com~apple~CloudDocs/1_Post_doc(Cleveland_clinic:2024.10~2025.09)/1_Research_project/3_quantum_embedding_comparison_sequence(2024.09 ~ XXXX.XX)/2_exp/60_Dr_Park_Meeting_and_comments_SEP1725/2_new_classical_counterparts/15_ALDH1_NN_RBF_1_6_ratio/MLP1_LIT-PCBA_ALDH1_1_6_sampling_MLP_ZZ_8_qubits(RBF).pt'

1. MLP structure without the pretraining

In [None]:
class NN(nn.Module):
    def __init__(self):
        super().__init__()
        self.layer1 = nn.Linear(39, 128)
        self.layer2 = nn.Linear(128, 64)
        self.layer3 = nn.Linear(64, 32)
        self.layer4 = nn.Linear(32, 16)
        self.layer5 = nn.Linear(16, 1)
        self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid()
        self.tanh = nn.Tanh()

    def forward(self, x):
        x = self.relu(self.layer1(x))
        x = self.relu(self.layer2(x))
        x = self.relu(self.layer3(x))
        x = self.relu(self.layer4(x))
        x = self.sigmoid(self.layer5(x))
        return x

model_NN = NN()

2. MLP structure with the pretraining

In [None]:
pretrained = torch.load(parameter_file_dir, map_location="cpu", weights_only=True)

pretrained_MLP_body = nn.Sequential(
    nn.Linear(39, 1024),
    nn.ReLU(),
    nn.Linear(1024, 512),
    nn.ReLU(),
    nn.Linear(512, 256),
    nn.ReLU(),
    nn.Linear(256, 128),
    nn.ReLU(),
    nn.Linear(128, 64),
    nn.ReLU(),
    nn.Linear(64, 32),
    nn.ReLU(),
    nn.Linear(32, 16)
)

pretrained_MLP_body.load_state_dict({
    '0.weight': pretrained['linear_relu_stack.0.weight'],
    '0.bias': pretrained['linear_relu_stack.0.bias'],
    '2.weight': pretrained['linear_relu_stack.2.weight'],
    '2.bias': pretrained['linear_relu_stack.2.bias'],
    '4.weight': pretrained['linear_relu_stack.4.weight'],
    '4.bias': pretrained['linear_relu_stack.4.bias'],
    '6.weight': pretrained['linear_relu_stack.6.weight'],
    '6.bias': pretrained['linear_relu_stack.6.bias'],
    '8.weight': pretrained['linear_relu_stack.8.weight'],
    '8.bias': pretrained['linear_relu_stack.8.bias'],
    '10.weight': pretrained['linear_relu_stack.10.weight'],
    '10.bias': pretrained['linear_relu_stack.10.bias'],
    '12.weight': pretrained['linear_relu_stack.12.weight'],
    '12.bias': pretrained['linear_relu_stack.12.bias'],
    # '14.weight': pretrained['linear_relu_stack.14.weight'],
    # '14.bias': pretrained['linear_relu_stack.14.bias'],
    # '16.weight': pretrained['linear_relu_stack.16.weight'],
    # '16.bias': pretrained['linear_relu_stack.16.bias']
})

pretrained_MLP_body.eval()

class ExtendedNN(nn.Module):
    def __init__(self, pretrained_body):
        super().__init__()
        self.feature_extractor = pretrained_MLP_body
        self.classifier = nn.Sequential(
            nn.Linear(16, 1),
            #nn.Tanh()
            #nn.Sigmoid()
        )

    def forward(self, x):
        x = self.feature_extractor(x)
        x = self.classifier(x)
        return x

pretrained_model_NN = ExtendedNN(pretrained_MLP_body)

3. Define the loss function

In [None]:
def Linear_Loss(predictions, labels):
    loss = 0
    labels = 2 * labels - 1
    for l,p in zip(labels, predictions):
        loss += 0.5 * (1 - l * p)
    return loss / len(labels)

loss_fn_BCE = nn.BCEWithLogitsLoss(pos_weight=pos_weight)
loss_fn_MSE = nn.MSELoss()

In [None]:
def model_train(model, X_train, y_train, X_val, y_val):
    loss_history=[]
    
    if loss_function == 'MSE':
        loss_fn = loss_fn_MSE
    elif loss_function == 'BCE':
        loss_fn = loss_fn_BCE
    elif loss_function == 'Linear':
        loss_fn = Linear_Loss

    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    batch_start = torch.arange(0, len(X_train), batch_size)

    for epoch in range(n_epochs):
        model.train()
        loss_value=0
        with tqdm.tqdm(batch_start, unit="batch", mininterval=0, disable=True) as bar:
            bar.set_description(f"Epoch {epoch}")
            for start in bar:
                # take a batch
                X_batch = X_train[start:start+batch_size]
                y_batch = y_train[start:start+batch_size]
                # forward pass
                y_pred = model(X_batch)
                #print("y_pred:",y_pred.shape,"/y_batch:", y_batch.shape)
                loss = loss_fn(y_pred.view(-1), y_batch)
                loss_value+=loss.detach().numpy()
                # backward pass
                optimizer.zero_grad()
                loss.backward()
                # update weights
                optimizer.step()
            
            
            loss_history.extend([loss_value/batch_size])
            if epoch % 200 == 0:
                print("Epoch:",epoch,"/ Loss:",loss_value/batch_size)
            
    # evaluate accuracy at end of each epoch
    model.eval()
    y_pred = model(X_val)
    if loss_function == 'Linear':
        predicted_values = torch.where(y_pred < 0, -1, 1)
        print("Prediction:", Counter(predicted_values.squeeze().tolist()))
        y_val[y_val == 0] = -1
        acc = accuracy_score(y_val, predicted_values)
    else:
        probs = torch.sigmoid(y_pred)
        predicted_values = (probs >= 0.5).int().numpy()
        print("Prediction:", Counter(predicted_values.flatten()))
        acc = balanced_accuracy_score(y_val, predicted_values)
    
    return acc, loss_history

In [None]:
loss_full=[]
acc_full=[]
for no in range(5):
    acc, loss_history = model_train(model=pretrained_model_NN,
                                    X_train=torch.Tensor(X_train),
                                    y_train=torch.Tensor(Y_train),
                                    X_val=torch.Tensor(X_test),
                                    y_val=torch.Tensor(Y_test))
    
    print("No:",str(no),"/ Accuracy:", acc)
    print("---------------------------------")
    loss_full.append(loss_history)
    acc_full.extend([acc])

In [None]:
acc_full_mean = np.mean(np.array(acc_full))
acc_full_std = np.std(np.array(acc_full))
print(acc_full_mean, acc_full_std)

In [None]:
print(len(loss_full))
loss_full

In [None]:
loss_full_mean = np.array(loss_full).mean(axis=0)
loss_full_std = np.array(loss_full).std(axis=0)
print(len(loss_full_mean))
print(len(loss_full_std))

In [None]:
save_dir = '/Users/jungguchoi/Library/Mobile Documents/com~apple~CloudDocs/1_Post_doc(Cleveland_clinic:2024.10~2025.09)/1_Research_project/3_quantum_embedding_comparison_sequence(2024.09 ~ XXXX.XX)/2_exp/60_Dr_Park_Meeting_and_comments_SEP1725/2_new_classical_counterparts/15_ALDH1_NN_RBF_1_6_ratio/'

In [None]:
f = open(save_dir+'Loss_histories_and_weights('+str(n_epochs)+'iter_'+str(batch_size)+'batch_'+str(learning_rate)+'lr_'+str(loss_function)+').txt', 'w')

for i in range(5):
    f.write(f'Loss History {i + 1}:')
    f.write('\n')
    f.write(str(loss_full[i]))
    f.write('\n')

f.close()

In [None]:
import seaborn as sns

plt.rcParams['figure.figsize'] = [10, 5] 
fig, ax = plt.subplots() 
clrs = sns.color_palette("husl", 3) 
with sns.axes_style("darkgrid"): 
    ax.plot(range(len(loss_full_mean)), loss_full_mean, label="MLP+single layer model", c=clrs[1]) 
    ax.fill_between(range(len(loss_full_mean)), loss_full_mean-loss_full_std, loss_full_mean+loss_full_std, alpha=0.3,facecolor=clrs[1])

ax.set_xlabel("Iteration") 
ax.set_ylabel("Loss") 
ax.set_title("MLP + single layer Loss History") 
ax.legend()