# LSTM/QK-VQE
Overview
- calss molecule_data: prepare molecule dataset and pre-processing
- class VQE: build VQE
- calss VQEOptimizer: VQE optimization
- class LSTM: build model
- calss ModelTrain: train and evaluate model

Need to Debug
1. test for different molecule (standard VQE)
2. optuna might run for 2-3 hr
3. early convergence in training: dataset size, torch.optim.lr_scheduler.ReduceLROnPlateau
4. FIM approximate
5. val set
6. Gauss. encoding

In [1]:
## My module
import molecule_data
import VQE
import QKLSTM
import Model
## basis
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
## random
import random
import math
from typing import List, Callable, Tuple
## ML
import torch 
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim 
from scipy.optimize import minimize
## QML
import h5py
import pennylane as qml
from pennylane import qchem
## access file 
import os 

torch.manual_seed(42)
np.random.seed(42)
qml.math.random.seed(42)

## Run

In [None]:
def vqe_params(molecule_data):
        electrons = sum(molecule_data.hf_state)
        orbitals = len(molecule_data.hf_state)

        single, double = qchem.excitations(electrons, orbitals)
        s_w, d_w = qml.qchem.excitations_to_wires(single, double)
        num_single = len(s_w)
        num_double = len(d_w)
        return num_single+num_double

In [4]:
# ---Hyperparameters ---
## LSTM
### opt for LSTM train: Adam
model_type = "QK" #LSTM or OK
mapping_type = "Linear" # Linear or DNN
fixed_param_dim = 30
lr_lstm = 0.001
lr_mapping = 0.001
epochs = 5
#steps_per_epoch = 10
conv_tol_lstm = 1e-5
loss_type = "observed improvement" # weighted or observed improvement
steps_recurrent_loop_train = 5 # for train (steps_per_epoch)
steps_recurrent_loop_test = 10 # for test

## VQE
lr_vqe = 0.01
max_iter_vqe = 500
conv_tol_vqe = 1e-6
vqe_optimizer = "ADAM"

# Data set
molecule = [
        ("H2", [0.5]),
        ("H2", [0.54]),
        ("H2", [0.58]),
        ("H2", [0.62]),
        ("H2", [0.66]),
        ("H2", [0.82]),
        ("H2", [0.86]),
        ("H2", [0.9]),
        ("H2", [0.94]),
        ("H2", [0.98]),
        ("H2", [1.02]),
        ("H2", [1.06]),
        ("H2", [1.1]),
        ("H2", [1.14]),
        ("H2", [1.18]),
        ("H2", [1.22]),
        ("H2", [1.26]),
        ("H2", [1.3]),
        ("H2", [1.34]),
        ("H2", [1.38]),
        ("H2", [1.42]),
        ("H2", [1.46]),
        ("H2", [1.5]),
        ("H2", [1.54]),
        ("H2", [1.58]),
        ("H2", [1.62]),
        ("H2", [1.66]),
        ("H2", [1.7]),
        ("H2", [1.74]),
        ("H2", [1.78]),
        ("H2", [1.82]),
        ("H2", [1.86]),
        ("H2", [1.9]),
        ("H2", [1.94]),
        ("H2", [1.98]),
        ("H2", [2.02]),
        ("H2", [2.06]),
        ("H2", [2.1]),
        ("H3+", [0.5]),
        ("H3+", [0.54]),
        ("H3+", [0.58]),
        ("H3+", [0.62]),
        ("H3+", [0.66]),
        ("H3+", [0.7]),
        ("H3+", [0.74]),
        ("H3+", [0.78]),
        ("H3+", [0.82]),
        ("H3+", [0.86]),
        ("H3+", [0.9]),
        ("H3+", [0.94]),
        ("H3+", [0.98]),
        ("H3+", [1.02]),
        ("H3+", [1.06]),
        ("H3+", [1.1]),
        ("H3+", [1.26]),
        ("H3+", [1.3]),
        ("H3+", [1.34]),
        ("H3+", [1.38]),
        ("H3+", [1.42]),
        ("H3+", [1.46]),
        ("H3+", [1.5]),
        ("H3+", [1.54]),
        ("H3+", [0.5]),
        ("H2", None),
        ("H3+", None),
        ("H4", None)
        ]

train_split_index = 65
# Save File Path
Data_save_path = "./datasets2/"
Model_save_path = "model_test2_params"
Results_save_path = "model_test2_result"


In [None]:
"""
Prepare Dataset
"""
# load data from pennylane dataset if you have no data
dataset = molecule_data.molecule_data.load_data(molecule, basis = "STO-3G", folder_path = Data_save_path)
# If you have data in your computer, then
# I don't know
max_qubits, max_s_params, max_d_params, max_params, train_set, test_set = molecule_data.molecule_data.data_params(df = dataset, train_split_index = train_split_index)
print(f"Train set{train_set}")
print(f"Test set{test_set}")


--- Starting Load Data ---

--- Complete Load Data ---
                                                   0
0  <Dataset = molname: H2, basis: STO-3G, bondlen...
Train set[<Dataset = molname: H2, basis: STO-3G, bondlength: 0.742, attributes: ['molname', 'basis', ...]>]
Test set[]


In [None]:
"""
Define model and train
"""
# --- Model Training ---
## define model
beta_model = Model.LSTM(model_type = model_type,
                      mapping_type= mapping_type,
                       input_feature_dim = fixed_param_dim,
                       max_total_params = max_params,
                       max_qubits = max_qubits,
                       loss_function_type = loss_type,
                        max_s_params = max_s_params,
                        max_d_params = max_d_params
                        )
    
print(f"--- Model Summary ---")
print(beta_model)
lstm_params = sum(p.numel() for p in beta_model.lstm.parameters())
print(f"  {model_type} Parameters: {lstm_params}")
mapping_params = sum(p.numel() for p in beta_model.mapping.parameters())
print(f"  Mapping Parameters: {mapping_params}")

trainer = Model.ModelTrain(beta_model,
                         lr_lstm = lr_lstm,
                         lr_mapping= lr_mapping,
                         num_rnn_iteration = steps_recurrent_loop_train)

print(f"\n--- Training {model_type} Model ---")

trainer.train(train_set,
                  epochs = epochs,
                  conv_tol_lstm = conv_tol_lstm)

torch.save(beta_model.state_dict(), f"{Model_save_path}_{model_type}_{loss_type}.pth")
print("Model saved successfully!")

In [None]:
"""
Model Testing
"""
# --- Model Evaluating ---
print(f"\n--- Evaluating {model_type} Model ---")


for i, molecule_data in enumerate(test_set):
        molecule_test_result = {}
        # predicted by model
        #molecule_cost = VQE(molecule_data, max_qubits, True) #max_s_params
        #loss_qnode = molecule_cost.get_loss_function()
        predicted_params_list, predicted_energies_list = trainer.evaluate(
            molecule_data = molecule_data,
            num_rnn_iteration = steps_recurrent_loop_test)

        print(f"the test molecule:{molecule_data.molname}, bondlength: {molecule_data.bondlength}")
        print(f"{model_type} predicted energies:{predicted_energies_list}")

        # use LSTM/QK -FC output as initial params for VQE to optimize
        print(f"\n--- VQE optimization after model ---")
        print(f"the test molecule:{molecule_data.molname}, bondlength: {molecule_data.bondlength}")
        test_vqe = VQE.VQE(molecule_data, max_qubits, True) #max_s_params
        opt_lstm_vqe = VQE.VQEOptimizer(test_vqe)
        conv_iter_lstm, final_params_lstm, final_energy_lstm, params_history_lstm, energy_history_lstm = opt_lstm_vqe.run_optimization(
            initial_params = predicted_params_list[-1],
            optimizer = vqe_optimizer,
            max_iter = max_iter_vqe,
            learning_rate = lr_vqe,
            conv_tol = conv_tol_vqe
            )
        
        vqe_params_test = vqe_params(molecule_data)
        
        # VQE predict without guess by LSTM/QK
        print(f"\n--- Standard VQE, random params ---")
        print(f"the test molecule:{molecule_data.molname}, bondlength: {molecule_data.bondlength}")
        # random params
        params_rand = torch.rand(max_params, dtype = torch.float32)
        vqe_test_rand = VQE.VQE(molecule_data, max_qubits, False)
        opt_rand_vqe = VQE.VQEOptimizer(vqe_test_rand)
        conv_iter_rand, final_params_rand, final_energy_rand, params_history_rand, energy_history_rand = opt_rand_vqe.run_optimization(
            initial_params = params_rand,
            optimizer = vqe_optimizer,
            max_iter = max_iter_vqe,
            learning_rate = lr_vqe,
            conv_tol = conv_tol_vqe
            )

        # params which all zeros
        print(f"\n--- Standard VQE, zero params ---")
        print(f"the test molecule:{molecule_data.molname}, bondlength: {molecule_data.bondlength}")
        params_zeros = torch.zeros(max_params, dtype = torch.float32)
        vqe_test_zeros = VQE.VQE(molecule_data, max_qubits, False)
        opt_zeros_vqe = VQE.VQEOptimizer(vqe_test_zeros)
        conv_iter_zeros, final_params_zeros, final_energy_zeros, params_history_zeros, energy_history_zeros = opt_zeros_vqe.run_optimization(
            initial_params = params_zeros,
            optimizer = vqe_optimizer,
            max_iter = max_iter_vqe,
            learning_rate = lr_vqe,
            conv_tol = conv_tol_vqe
            )
        """""
        print(f"\n--- Standard VQE, pi params ---")
        print(f"the test molecule:{molecule_data.molname}, bondlength: {molecule_data.bondlength}")
        params_pi = torch.ones(max_params,dtype = torch.float32) * math.pi
        vqe_test_pi = VQE.VQE(molecule_data, max_qubits, False)
        opt_pi_vqe = VQE.VQEOptimizer(vqe_test_pi)
        conv_iter_pi, final_params_pi , final_energy_pi , params_history_pi , energy_history_pi  = opt_pi_vqe.run_optimization(
            initial_params = params_pi,
            optimizer = vqe_optimizer,
            max_iter = max_iter_vqe,
            learning_rate = lr_vqe,
            conv_tol = conv_tol_vqe
            )
        """
        # save result and output as csv
        # result to pd
        molecule_test_result = {
            #'molecule': molecule_data.molname,
            #'bondlength': molecule_data.bondlength,
            'LSTM-VQE': pd.Series(predicted_energies_list),
            'VQE after LSTM':pd.Series(energy_history_lstm),
            'VQE Random': pd.Series(energy_history_rand),
            'VQE Zeros': pd.Series(energy_history_zeros),
            #'VQE Pi': pd.Series(energy_history_pi)
            }
        
        df_result = pd.DataFrame(molecule_test_result)
        df_result.to_csv(f"{Results_save_path}_{model_type}_{loss_type}_{molecule_data.molname}_{molecule_data.bondlength}.csv", index = False)
        print("\n--- Saving Complete ---")

        print("Result of VQE Optimization")
        E_fci = test_set[i].fci_energy
        #plt.figure(figsize = (20,10))
        plt.plot(range(1, len(predicted_energies_list) + 1), predicted_energies_list, marker='o', label=f'{model_type}-VQE', ls="dashed")
        plt.plot(range(1, len(energy_history_lstm) + 1), energy_history_lstm, marker='o', label=f'VQE after {model_type}', ls="dashed")
        plt.plot(range(1, len(energy_history_rand) + 1), energy_history_rand, marker='o', label='VQE, Random', ls="dashed")
        plt.plot(range(1, len(energy_history_zeros) + 1), energy_history_zeros, marker='o', label='VQE, Zeros', ls="dashed")
        #plt.plot(range(1, len(energy_history_pi) + 1), energy_history_pi, marker='o', label='VQE, Pi', ls="dashed")
        plt.plot(range(1, len(energy_history_rand) + 1), np.full(len(energy_history_rand) , E_fci), color="red")
        plt.xlabel("Optimization step")
        plt.ylabel("Energy (Hartree)")
        plt.title(f"VQE Optimization Comparison, Test = {molecule_data.molname}, {molecule_data.bondlength}")
        plt.legend()
        plt.grid(True)
        plt.show()

        print("Result of Erros")
        #plt.figure(figsize = (20,10))
        plt.plot(range(1, len(predicted_energies_list) + 1), [item-E_fci for item in predicted_energies_list], marker='o', label=f'{model_type}-VQE', ls="dashed")
        plt.plot(range(1, len(energy_history_lstm) + 1), [item-E_fci for item in energy_history_lstm], marker='o', label=f'VQE after {model_type}', ls="dashed")
        plt.plot(range(1, len(energy_history_zeros) + 1), [item-E_fci for item in energy_history_zeros], marker='o', label='VQE, Zeros', ls="dashed")
        plt.plot(range(1, len(energy_history_lstm) + 1), np.full(len(energy_history_lstm) , 0.0016), color="red")
        plt.xlabel("Optimization step")
        plt.ylabel("Energy Error (Hartree)")
        plt.title(f"VQE Optimization Error Comparison, Test = {molecule_data.molname}, {molecule_data.bondlength}")
        plt.legend()
        plt.grid(True)
        plt.show()

print("\n--- Simulation Complete ---")