In [6]:
import joblib
import random
import pandas as pd
import os
import optuna 
import torch 
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset,DataLoader
from torchmetrics import R2Score
import numpy as np
from sklearn.metrics import mean_squared_error,mean_absolute_error,r2_score

In [7]:
! pip install pennylane==0.27.0

Collecting pennylane==0.27.0
  Downloading PennyLane-0.27.0-py3-none-any.whl (1.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m3.8 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m0m
[?25hCollecting autoray>=0.3.1
  Downloading autoray-0.5.3-py3-none-any.whl (39 kB)
Collecting pennylane-lightning>=0.27
  Downloading PennyLane_Lightning-0.28.0-py3-none-any.whl (227 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m227.2/227.2 kB[0m [31m20.1 MB/s[0m eta [36m0:00:00[0m
Collecting autograd
  Downloading autograd-1.5-py3-none-any.whl (48 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m48.9/48.9 kB[0m [31m5.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting retworkx
  Downloading retworkx-0.12.1-py3-none-any.whl (10 kB)
Collecting semantic-version>=2.7
  Downloading semantic_version-2.10.0-py2.py3-none-any.whl (15 kB)
Collecting rustworkx==0.12.1
  Downloading rustworkx-0.12.1-cp37-cp37m-manylinux_2_17_x86_64.

In [8]:
import pennylane as qml

In [9]:
device = "cuda" if torch.cuda.is_available else "cpu"

n_qubits = 2
dev = qml.device("default.qubit", wires=n_qubits)

In [10]:
#configuring the quantum node 
@qml.qnode(dev,interface="torch")
def quantum_circuit(params,x):
    # Encoding the input data into quantum states
    qml.RX(x[0], wires=0)
    qml.RY(x[1], wires=1)
    qml.CNOT(wires= [0,1])
    return qml.expval(qml.PauliZ(0)),qml.expval(qml.PauliZ(1)) 

In [18]:
#Defining the NN
class NN(torch.nn.Module):
    def __init__(self,in_feature):
        super().__init__()
        self.in_feature = in_feature
        self.hid1 = torch.nn.Linear(in_feature, 128)
        self.dropout = nn.Dropout(0.25)
#         self.batchnorm1 = nn.BatchNorm1d(128)
        self.hid2 = torch.nn.Linear(128,64)
        self.hid3 = torch.nn.Linear(64,32)
        self.output = torch.nn.Linear(32,1)
        
        torch.nn.init.xavier_uniform_(self.hid1.weight)
        torch.nn.init.zeros_(self.hid1.bias)
        torch.nn.init.xavier_uniform_(self.hid2.weight)
        torch.nn.init.zeros_(self.hid2.bias)
        torch.nn.init.xavier_uniform_(self.hid3.weight)
        torch.nn.init.zeros_(self.hid3.bias)
        torch.nn.init.xavier_uniform_(self.output.weight)
        torch.nn.init.zeros_(self.output.bias)
    
    def forward(self,x):
        z = torch.relu(self.hid1(x))
#         z = self.batchnorm1(z)
        z = self.dropout(z)
        z = torch.relu(self.hid2(z))
        z = self.dropout(z)
        z = torch.relu(self.hid3(z))
        z = self.output(z)
        return z

In [19]:
# Defining the QNN
class QuantNN(torch.nn.Module):
    def __init__(self,quantum_circuit, class_NN):
        super().__init__()
        self.quantum_circuit = quantum_circuit
        self.class_nn = class_NN
        
    def forward(self,x):
        quant_out = self.quantum_circuit(x)
        class_in = torch.tensor(quant_out,device=device)
        class_out = self.class_nn(class_in.float())
        # Debugging
        print(f"quant_in: {x.shape}")
        print(f"quant_out: {quant_out.shape}")
        print(f"class_in: {class_in.shape}")
        print(f"class_out: {class_out.shape}")
        return class_out


In [20]:
#preparing the dataset class for the input
class CustDataset(Dataset):
    
    def __init__(self,df):
        self.labels = df[TAR_COL].to_numpy(dtype=np.float64)
        self.features = df.drop([TAR_COL],axis=1).to_numpy(dtype=np.float64)
        
    def classes(self):
        return self.labels
    def __len__(self):
        return len(self.labels)
    def get_batch_labels(self,idx):
        return np.array(self.labels[idx])
    
    def get_batch_features(self,idx):
        return np.array(self.features[idx])
    
    def __getitem__(self,idx):
        batch_features = self.get_batch_features(idx)
        batch_y = self.get_batch_labels(idx)
        
        return batch_features, batch_y   

In [21]:
#Redirecting the QNN

def build_model(in_features):
    NeuN = NN(in_features)
    QuantNeuN = qml.qnn.TorchLayer(quantum_circuit,NeuN, device)
    return QuantNeuN

In [22]:
FOLD_DICT = joblib.load("../input/perov-fold-data/fold_data_export.z")
USE_DF = pd.read_csv("../input/perov-scaled-data/scaled_trainable.csv")
TAR_COL = "JV_default_PCE_numeric"
EPOCHS = 3
K_FOLD = 2
IN_FEATURES = 103 

In [23]:
def train_eval(params, model,fold,trial):
    
    #loading data 
    train_index = FOLD_DICT[fold]["train"]
    test_index = FOLD_DICT[fold]["test"]
    train = CustDataset(USE_DF.iloc[train_index,:])
    val = CustDataset(USE_DF.iloc[test_index,:])
    train_loader = DataLoader(train,batch_size = 32, shuffle=False)
    val_loader = DataLoader(val,batch_size= 32, shuffle=False)
    
    cuda_bool = torch.cuda.is_available()
    device = torch.device("cuda" if cuda_bool else "cpu")
    
    criterion = nn.MSELoss()
    optimizer = getattr(optim, params["optimizer"])(model.parameters(),lr=params["learning_rate"])
    
    #creating custom early stopping
    patience = 2
    best_val_loss = float("inf")
    counter = 0
    
    if cuda_bool:
        model = model.cuda()
        criterion = criterion.cuda()
        
    
    
    for epoch_num in range(EPOCHS):
        
        loss_list = []
        #training loop 
        for train_x, train_y in train_loader:
            
            train_y = train_y.to(device)
            train_y = train_y.reshape((train_y.shape[0],1))
            train_x = train_x.to(device)
            
            output = model(train_x.float())
            
            batch_loss = criterion(output, train_y.float())
            
            optimizer.zero_grad()
            
            model.zero_grad()
            batch_loss.backward()
            optimizer.step()       
        
        model.eval()
        with torch.no_grad():
            
            loss = 0.0
            #validation loop 
            for val_x,val_y in val_loader:
                
                val_x = val_x.to(device)
                val_y = val_y.reshape((val_y.shape[0],1))
                val_y = val_y.to(device)
                
                output = model(val_x.float())
                
                batch_loss = criterion(output, val_y.float())
                loss += batch_loss.item()
                loss_list.append(batch_loss.item())
            print(f"Validating:[{epoch_num+1}/{EPOCHS}] LOSS: {batch_loss.item()}]")
        if loss < best_val_loss:
            best_val_loss = loss
            counter = 0
        else: 
            counter += 1
            if counter >= patience: 
                print(f"[==] Early Stopping at {loss}")
                break
        
    main_loss = np.mean(loss_list)
    return main_loss

In [24]:
#defing optuna objective function
def objective(trial):
    params = {
        "learning_rate": trial.suggest_float("learning_rate",1e-5, 1e-1),
        "optimizer" : trial.suggest_categorical("optimizer",["Adam", "RMSprop", "SGD"])
    }
    
    fold = TRIAL_FOLD
    model = build_model(IN_FEATURES)
    main_loss = train_eval(params, model,fold, trial)
    
    return main_loss 

In [25]:
# Definging best hyper parameteres
NUM_TRIALS = 10
TRIAL_FOLD = random.choice([x for x in range(K_FOLD)])
study = optuna.create_study(direction="minimize",
                            sampler=optuna.samplers.TPESampler(),
                            pruner=optuna.pruners.MedianPruner())
study.optimize(objective, n_trials=NUM_TRIALS)

[32m[I 2023-01-10 11:38:49,636][0m A new study created in memory with name: no-name-8009cdc2-9096-4d88-9af1-825c78873bb6[0m
[33m[W 2023-01-10 11:38:49,642][0m Trial 0 failed because of the following error: AttributeError("'NN' object has no attribute 'items'")[0m
Traceback (most recent call last):
  File "/opt/conda/lib/python3.7/site-packages/optuna/study/_optimize.py", line 196, in _run_trial
    value_or_values = func(trial)
  File "/tmp/ipykernel_23/1312651667.py", line 9, in objective
    model = build_model(IN_FEATURES)
  File "/tmp/ipykernel_23/1557917407.py", line 5, in build_model
    QuantNeuN = qml.qnn.TorchLayer(quantum_circuit,NeuN, device)
  File "/opt/conda/lib/python3.7/site-packages/pennylane/qnn/torch.py", line 257, in __init__
    for weight, size in weight_shapes.items()
  File "/opt/conda/lib/python3.7/site-packages/torch/nn/modules/module.py", line 1186, in __getattr__
    type(self).__name__, name))
AttributeError: 'NN' object has no attribute 'items'


AttributeError: 'NN' object has no attribute 'items'

In [84]:
#CAPTURING BEST HYPERPARAMETERS
best_param = study.best_params
best_param

{'learning_rate': 0.09392347823813403, 'optimizer': 'Adam'}

In [164]:
#Creating train function for all folds
def main_train(best_param, model,fold):
    
    #loading data 
    train_index = FOLD_DICT[fold]["train"]
    test_index = FOLD_DICT[fold]["test"]
    train = CustDataset(USE_DF.iloc[train_index,:])
    val = CustDataset(USE_DF.iloc[test_index,:])
    
    cuda_bool = torch.cuda.is_available()
    device = torch.device("cuda" if cuda_bool else "cpu")
    
    train_x = torch.tensor(train.features, device=device)
    train_y = torch.tensor(train.labels, device=device)
    val_x = torch.tensor(val.features, device=device)
    val_y = torch.tensor(val.labels , device=device)
    
    criterion = nn.MSELoss()
    optimizer = getattr(optim,best_param["optimizer"])(model.parameters(),lr=best_param["learning_rate"])
    
    
    #creating custom early stopping
    patience = 3
    best_val_loss = float("inf")
    counter = 0 
    
    
    if cuda_bool:
        model = model.cuda()
        criterion = criterion.cuda()
        
    
    
    for epoch_num in range(EPOCHS):
        
        mean_sq_list = []
        r_mean_sq_list = []
        mean_abs_list = []
        r2_list = []
        
            
        optimizer.zero_grad()

        train_y = train_y.to(device)
        train_y = train_y.reshape((train_y.shape[0],1))
        train_x = train_x.to(device)

#             print(train_x.shape)
        print(train_x.shape)
        output = model(train_x.float())
        print(output.shape)
#             print(output.shape)

        batch_loss = criterion(output, train_y.float())
        print(batch_loss)

        model.zero_grad()
        batch_loss.backward()
        optimizer.step()


          
        
        model.eval()
        with torch.no_grad():
            
            #validation loop 
            loss = 0.0
            for val_x,val_y in val_loader:
                
                val_x = val_x.to(device)
                val_y = val_y.reshape((val_y.shape[0],1))
                val_y = val_y.to(device)
                
                print(val_x.shape)
                output = model(val.float())
#                     output_vals.append(output)
                print(len(output_vals))
                print(output.shape)
                batch_loss = criterion(output, val_y.float())
                loss += batch_loss.item()
                mean_abs_error = nn.L1Loss()(output, val_y)
                r_mean_sq_error = torch.sqrt(batch_loss)
                r2_score = R2Score().to(device)(output.reshape((output.shape[0],1)), val_y)
                mean_sq_list.append(batch_loss.item())
                mean_abs_list.append(mean_abs_error.item())
                r2_list.append(r2_score.item())
                r_mean_sq_list.append(r_mean_sq_error.item())
            print(f"Validating:[{epoch_num+1}/{EPOCHS}] LOSS: {batch_loss.item()}]")
        if loss < best_val_loss:
            best_val_loss = loss
            counter = 0
        else: 
            counter += 1
            if counter >= patience: 
                print(f"[==] Early Stopping at {loss}")
                break
        
    sq_mean_loss = np.mean(mean_sq_list)
    r_sq_mean_loss = np.mean(r_mean_sq_list)
    abs_mean_loss = np.mean(mean_abs_list)
    r2_mean_loss = np.mean(r2_list)
    
    error = {"mse_error" : sq_mean_loss ,
    "mae_error" : abs_mean_loss,
    "rmse_error" : r_sq_mean_loss,
    "r2_score" :  r2_mean_loss}
    return error,model

In [165]:
def save_cv_model(i,model_name,model,optim,losses,output_path="./"):

    ''' This function saves cross validation model in the corresponding directory ( if the path does not exist it creates the path for it'''


    if os.path.exists(os.path.join(output_path,f"{i}_{model_name}_{optim}")):
        torch.save(model, os.path.join(output_path,f"{i}_{model_name}_{optim}/{i}_model.z"))
        with open(os.path.join(output_path,f"{i}_{model_name}_{optim}/losses_{fold}.txt"),"w+") as file:file.write(f" mse_loss :: {str(losses)}")
    else:
        os.mkdir(os.path.join(output_path,f"{i}_{model_name}_{optim}"))
        torch.save(model, os.path.join(output_path,f"{i}_{model_name}_{optim}/{i}_model.z"))
        with open(os.path.join(output_path,f"{i}_{model_name}_{optim}/losses_{fold}.txt"),"w+") as file:file.write(f" mse_loss :: {str(losses)}")

In [166]:
model_name = "QNN"
optim_name = best_param["optimizer"]
for fold in [x for x in range(K_FOLD)]:
    dum_model = build_model(IN_FEATURES)
    print(f"Training for fold [{fold+1}/{K_FOLD}] started ")
    error,model = main_train(best_param,dum_model,fold)
    print(f"Saving data for fold [{fold+1}/{K_FOLD}]")
    save_cv_model(fold,model_name,model,optim_name,error,output_path="./")

Training for fold [1/2] started 
torch.Size([44479, 103])
quant_in: torch.Size([44479, 103])
quant_out: torch.Size([103])
class_in: torch.Size([103])
class_out: torch.Size([1])
torch.Size([1])
tensor(489.7986, device='cuda:0', grad_fn=<MseLossBackward0>)
torch.Size([44479, 103])
quant_in: torch.Size([44479, 103])
quant_out: torch.Size([103])
class_in: torch.Size([103])
class_out: torch.Size([1])
torch.Size([1])
tensor(15927.7061, device='cuda:0', grad_fn=<MseLossBackward0>)
torch.Size([44479, 103])
quant_in: torch.Size([44479, 103])
quant_out: torch.Size([103])
class_in: torch.Size([103])
class_out: torch.Size([1])
torch.Size([1])
tensor(7536.1436, device='cuda:0', grad_fn=<MseLossBackward0>)


  # Remove the CWD from sys.path while we load stuff.
  return F.mse_loss(input, target, reduction=self.reduction)


TypeError: cannot unpack non-iterable NoneType object