### **Requirements**

In [7]:
!pip install qiskit_nature
!pip install pyscf
!pip install rdkit
!pip install qiskit_aer
!pip install openfermion openfermionpyscf
!pip install scikit-learn
!pip install xgboost

Collecting qiskit_nature
  Downloading qiskit_nature-0.7.2-py3-none-any.whl.metadata (8.0 kB)
Collecting qiskit>=0.44 (from qiskit_nature)
  Downloading qiskit-2.2.1-cp39-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (12 kB)
Collecting qiskit-algorithms>=0.2.1 (from qiskit_nature)
  Downloading qiskit_algorithms-0.4.0-py3-none-any.whl.metadata (4.7 kB)
Collecting rustworkx>=0.12 (from qiskit_nature)
  Downloading rustworkx-0.17.1-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (10 kB)
Collecting stevedore>=3.0.0 (from qiskit>=0.44->qiskit_nature)
  Downloading stevedore-5.5.0-py3-none-any.whl.metadata (2.2 kB)
Downloading qiskit_nature-0.7.2-py3-none-any.whl (2.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.2/2.2 MB[0m [31m33.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading qiskit-2.2.1-cp39-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (8.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.0/8.0 MB[0m 

### **Data Preparation**

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [59]:
url = 'https://raw.githubusercontent.com/TastelessCandyzzz/Quantum_Chem_Hackathon/main/bace.csv'

In [60]:
import pandas as pd
df = pd.read_csv(url,usecols=["mol","Class","MW"])
print(df.head())

                                                 mol  Class         MW
0  O1CC[C@@H](NC(=O)[C@@H](Cc2cc3cc(ccc3nc2N)-c2c...      1  431.56979
1  Fc1cc(cc(F)c1)C[C@H](NC(=O)[C@@H](N1CC[C@](NC(...      1  657.81073
2  S1(=O)(=O)N(c2cc(cc3c2n(cc3CC)CC1)C(=O)N[C@H](...      1  591.74091
3  S1(=O)(=O)C[C@@H](Cc2cc(O[C@H](COCC)C(F)(F)F)c...      1  591.67828
4  S1(=O)(=O)N(c2cc(cc3c2n(cc3CC)CC1)C(=O)N[C@H](...      1  629.71283


In [61]:
df=df.loc[df["MW"]<320]
del df["MW"]
df.reset_index(drop=True, inplace=True)
df.shape

(81, 2)

In [62]:
from rdkit import Chem
from rdkit.Chem import AllChem
import pandas as pd
import numpy as np

def smiles_to_fingerprint(smiles, radius=2, n_bits=1024):
    mol = Chem.MolFromSmiles(smiles)
    if mol:
        fp = AllChem.GetMorganFingerprintAsBitVect(mol, radius, nBits=n_bits)
        return np.array(fp)
    else:
        return np.zeros((n_bits,))

# Apply to all molecules
df['features'] = df['mol'].apply(smiles_to_fingerprint)



In [63]:
X = np.stack(df["features"])  # Convert feature column into a NumPy matrix
y = df['Class']

## **Calculating Qubit Hamiltonian, VQE ground-state energy, Hartree–Fock energy, Correlation energy, HOMO LUMO Gap and Dipole Moment**

In [8]:
import numpy as np
from rdkit import Chem
from rdkit.Chem import AllChem, rdmolops, Descriptors

# --- PySCF Imports ---
from pyscf import gto, scf

# --- Qiskit Core Imports ---
from qiskit import transpile
from qiskit_aer import AerSimulator
from qiskit.circuit.library import efficient_su2
from qiskit_algorithms.minimum_eigensolvers import VQE
from qiskit_algorithms.optimizers import COBYLA
from qiskit.primitives import BackendEstimatorV2

# --- Qiskit Nature Imports ---
from qiskit_nature.units import DistanceUnit
from qiskit_nature.second_q.drivers import PySCFDriver
from qiskit_nature.second_q.mappers import JordanWignerMapper
from qiskit_nature.second_q.transformers import ActiveSpaceTransformer, FreezeCoreTransformer

# ============================================================
# Helper: Generate 3D geometry from an RDKit Mol object
# ============================================================
def mol_to_geometry(mol: Chem.Mol):
    mol = Chem.AddHs(mol)
    AllChem.EmbedMolecule(mol, AllChem.ETKDG())
    AllChem.UFFOptimizeMolecule(mol)
    atoms = mol.GetAtoms()
    conf = mol.GetConformer()
    return "; ".join(
        f"{atom.GetSymbol()} "
        f"{conf.GetAtomPosition(atom.GetIdx()).x:.4f} "
        f"{conf.GetAtomPosition(atom.GetIdx()).y:.4f} "
        f"{conf.GetAtomPosition(atom.GetIdx()).z:.4f}"
        for atom in atoms
    )

# ============================================================
# Step 1: Convert geometry → qubit Hamiltonian
# ============================================================
def molecule_to_qubit_hamiltonian(
    geometry,
    basis="sto3g",
    charge=0,
    spin_multiplicity=1,
    active_electrons=2,
    active_orbitals=2,
):
    driver = PySCFDriver(
        atom=geometry, basis=basis, charge=charge,
        spin=(spin_multiplicity - 1), unit=DistanceUnit.ANGSTROM
    )
    es_problem = driver.run()
    freezer = FreezeCoreTransformer(freeze_core=True)
    transformer = ActiveSpaceTransformer(
        num_electrons=active_electrons, num_spatial_orbitals=active_orbitals
    )
    es_problem = freezer.transform(es_problem)
    es_problem = transformer.transform(es_problem)
    mapper = JordanWignerMapper()
    second_q_hamiltonian = es_problem.second_q_ops()[0]
    return {"qubit_operator": mapper.map(second_q_hamiltonian), "es_problem": es_problem}

# ============================================================
# Step 2: Main function to compute all descriptors
# ============================================================
def compute_quantum_descriptors(
    smiles: str,
    basis: str = "sto3g",
    active_electrons: int or tuple = 2,
    active_orbitals: int = 2,
):
    mol_rdkit = Chem.MolFromSmiles(smiles)
    if mol_rdkit is None:
        raise ValueError("Invalid SMILES string provided.")

    charge = rdmolops.GetFormalCharge(mol_rdkit)
    num_radical_electrons = Descriptors.NumRadicalElectrons(mol_rdkit)
    spin_multiplicity = num_radical_electrons + 1

    print(f"\n[•] Processing molecule: {smiles}")
    print(f"[i] Auto-detected Charge: {charge}, Auto-detected Spin: {spin_multiplicity}")

    geometry = mol_to_geometry(mol_rdkit)
    out = molecule_to_qubit_hamiltonian(
        geometry, basis, charge, spin_multiplicity, active_electrons, active_orbitals
    )
    qubit_op, es_problem = out["qubit_operator"], out["es_problem"]
    hf_energy = es_problem.reference_energy
    print(f"[+] Hartree–Fock energy: {hf_energy:.6f} Hartree")

    backend_sim = AerSimulator()
    estimator = BackendEstimatorV2(backend=backend_sim)
    ansatz = efficient_su2(qubit_op.num_qubits, reps=2, entanglement="linear")
    ansatz = transpile(ansatz, backend=backend_sim)
    optimizer = COBYLA(maxiter=200)
    vqe_solver = VQE(estimator, ansatz, optimizer)
    vqe_result = vqe_solver.compute_minimum_eigenvalue(qubit_op)
    vqe_energy = vqe_result.eigenvalue.real
    print(f"[+] VQE ground-state energy: {vqe_energy:.6f} Hartree")

    # --- PySCF calculations for additional properties ---
    mol_pyscf = gto.Mole(
        atom=geometry, basis=basis, charge=charge, spin=spin_multiplicity - 1
    ).build()
    mf = scf.RHF(mol_pyscf).run() if spin_multiplicity == 1 else scf.UHF(mol_pyscf).run()

    # Using the robust, low-level dipole calculation
    dm = mf.make_rdm1()
    dip_ints = mf.mol.intor('int1e_r', comp=3)
    elec_dip = -np.einsum('xij,ji->x', dip_ints, dm)
    nuc_dip = np.einsum('i,ix->x', mf.mol.atom_charges(), mf.mol.atom_coords())
    dipole_vector = elec_dip + nuc_dip
    dipole_moment = np.linalg.norm(dipole_vector)

    # --- Property Extraction ---
    if es_problem.orbital_energies_b is None:
        num_occupied = es_problem.num_alpha
        homo_energy = es_problem.orbital_energies[num_occupied - 1]
        lumo_energy = es_problem.orbital_energies[num_occupied]
    else:
        alpha_homo = es_problem.orbital_energies[es_problem.num_alpha - 1]
        beta_homo = es_problem.orbital_energies_b[es_problem.num_beta - 1]
        homo_energy = max(alpha_homo, beta_homo)
        lumo_energy = es_problem.orbital_energies[es_problem.num_alpha]

    # --- Collect all descriptors ---
    return {
        "smiles": smiles,
        "charge": charge,
        "spin_multiplicity": spin_multiplicity,
        "hf_energy": hf_energy,
        "vqe_energy": vqe_energy,
        "correlation_energy": hf_energy - vqe_energy,
        "n_qubits": qubit_op.num_qubits,
        "homo_energy": homo_energy,
        "lumo_energy": lumo_energy,
        "homo_lumo_gap": lumo_energy - homo_energy,
        "dipole_moment": dipole_moment,
    }

# ============================================================
# Example Run
# ============================================================
if __name__ == "__main__":
    hydroxide_results = compute_quantum_descriptors(smiles="[OH-]")
    print("\n--- Final Descriptors for [OH-] ---")
    for key, value in hydroxide_results.items():
        print(f"{key:<20}: {value:.6f}" if isinstance(value, float) else f"{key:<20}: {value}")
    print("-----------------------------------")


[•] Processing molecule: [OH-]
[i] Auto-detected Charge: -1, Auto-detected Spin: 1
[+] Hartree–Fock energy: -74.060416 Hartree
[+] VQE ground-state energy: -0.299934 Hartree
converged SCF energy = -74.0604157013592

--- Final Descriptors for [OH-] ---
smiles              : [OH-]
charge              : -1
spin_multiplicity   : 1
hf_energy           : -74.060416
vqe_energy          : -0.299934
correlation_energy  : -73.760481
n_qubits            : 4
homo_energy         : 0.247616
lumo_energy         : 1.222037
homo_lumo_gap       : 0.974422
dipole_moment       : 0.804211
-----------------------------------


In [12]:
df.to_csv('/content/drive/MyDrive/BACE_.csv', index=False)

In [None]:
import pandas as pd
import numpy as np
from tqdm import tqdm
import os
import time # Used for the dummy function
from google.colab import files

output_csv_path = '/content/drive/MyDrive/BACE_with_quantum_features.csv'
input_csv_path = '/content/drive/MyDrive/BACE_.csv'
save_interval = 8

# List of new columns we will be adding
quantum_cols = [
    "hf_energy", "vqe_energy", "correlation_energy", "n_qubits",
    "homo_energy", "lumo_energy", "homo_lumo_gap", "dipole_moment"
]

# Check if an output file already exists to resume from it
if os.path.exists(output_csv_path):
    print(f"Resuming from existing file: '{output_csv_path}'")
    # When reading, set index_col=0 to prevent creating a new 'Unnamed: 0' column
    df = pd.read_csv(output_csv_path, index_col=0)
else:
    print(f"Starting a new run. Loading data from '{input_csv_path}'")
    df = pd.read_csv(input_csv_path)
    for col in quantum_cols:
        df[col] = np.nan

# We iterate through the DataFrame's index
for i in tqdm(df.index, desc="Computing quantum descriptors"):
    # Check if the calculation for this row has already been done
    # We only need to check one column; if it's not NaN, we assume the row is complete.
    if pd.notna(df.loc[i, 'hf_energy']):
        continue # Skip to the next molecule

    # Get the SMILES string for the current molecule
    smiles = df.loc[i, 'mol']

    try:
        descriptors = compute_quantum_descriptors(smiles, active_electrons=2, active_orbitals=2)

        if descriptors:
            # If successful, update all columns for the current index 'i'
            for key, value in descriptors.items():
                df.loc[i, key] = value
        else:
            df.loc[i, 'hf_energy'] = 'Failed'

    except Exception as e:
        print(f"[!] Error processing molecule {i} ({smiles}): {e}")
        # Mark with an 'Error' message to avoid re-calculating this row next time
        df.loc[i, 'hf_energy'] = 'Error'


    if (i + 1) % save_interval == 0 or (i + 1) == len(df):
        print(f"\n Processed up to molecule {i + 1}. Saving progress to '{output_csv_path}'...")
        df.to_csv('/content/drive/MyDrive/BACE_with_quantum_features.csv', index=False)

print("\n All molecules processed. Final data saved successfully!")
print(df.head())

Starting a new run. Loading data from '/content/drive/MyDrive/BACE_.csv'


Computing quantum descriptors:   0%|          | 0/81 [00:00<?, ?it/s]


[•] Processing molecule: O=C1N(C)C(N[C@@]1(CC1CCCCC1)CCC1CCCCC1)=N
[i] Auto-detected Charge: 0, Auto-detected Spin: 1
[+] Hartree–Fock energy: -965.086237 Hartree
[+] VQE ground-state energy: -0.861831 Hartree
converged SCF energy = -965.086236582539


Computing quantum descriptors:   1%|          | 1/81 [03:12<4:16:19, 192.24s/it]


[•] Processing molecule: O=C(NC(=[NH2+])N)Cn1c(ccc1-c1ccccc1)-c1ccccc1
[i] Auto-detected Charge: 1, Auto-detected Spin: 1
[+] Hartree–Fock energy: -1010.330855 Hartree
[+] VQE ground-state energy: -0.752921 Hartree
converged SCF energy = -1010.33085493198


Computing quantum descriptors:   2%|▏         | 2/81 [05:52<3:48:13, 173.33s/it]


[•] Processing molecule: Clc1cc(ccc1)-c1cc(ccc1)[C@@]1(OCCC(=[NH+]1)N)C
[i] Auto-detected Charge: 1, Auto-detected Spin: 1
[+] Hartree–Fock energy: -1282.140915 Hartree
[+] VQE ground-state energy: -0.901309 Hartree
converged SCF energy = -1282.14091529092


Computing quantum descriptors:   4%|▎         | 3/81 [08:01<3:19:15, 153.27s/it]


[•] Processing molecule: [nH]1c2cc(ccc2cc1)-c1cc(ccc1)CNc1cccnc1N
[i] Auto-detected Charge: 0, Auto-detected Spin: 1
[+] Hartree–Fock energy: -973.453334 Hartree
[+] VQE ground-state energy: -0.614258 Hartree
converged SCF energy = -973.453334146325


Computing quantum descriptors:   5%|▍         | 4/81 [10:39<3:18:50, 154.94s/it]


[•] Processing molecule: O=C1N(C)C(N[C@](C1)(C)[C@@H]1C[C@H]1c1ccccc1)=N
[i] Auto-detected Charge: 0, Auto-detected Spin: 1
[+] Hartree–Fock energy: -807.135855 Hartree
[+] VQE ground-state energy: -0.734406 Hartree
converged SCF energy = -807.135854772484


Computing quantum descriptors:   6%|▌         | 5/81 [12:25<2:53:51, 137.26s/it]


[•] Processing molecule: O(C)c1cc(ccc1)-c1cc(ccc1)CCc1nc(N)ccc1
[i] Auto-detected Charge: 0, Auto-detected Spin: 1
[+] Hartree–Fock energy: -941.029948 Hartree
[+] VQE ground-state energy: -0.659942 Hartree
converged SCF energy = -941.029948278698


Computing quantum descriptors:   7%|▋         | 6/81 [14:51<2:55:19, 140.27s/it]


[•] Processing molecule: [NH+]=1C(C=2N(CCCN=2)C=1N)(c1ccccc1)c1ccccc1
[i] Auto-detected Charge: 1, Auto-detected Spin: 1
[+] Hartree–Fock energy: -899.179812 Hartree
[+] VQE ground-state energy: -1.005617 Hartree
converged SCF energy = -899.17981238663


Computing quantum descriptors:   9%|▊         | 7/81 [17:20<2:56:42, 143.28s/it]


[•] Processing molecule: O(C)c1cc(cnc1)-c1cc(ccc1)CNc1cccnc1N
[i] Auto-detected Charge: 0, Auto-detected Spin: 1
[+] Hartree–Fock energy: -972.490604 Hartree
[+] VQE ground-state energy: -0.690912 Hartree
converged SCF energy = -972.490604139864


Computing quantum descriptors:  10%|▉         | 8/81 [19:43<2:54:15, 143.22s/it]


 Processed up to molecule 8. Saving progress to '/content/drive/MyDrive/BACE_with_quantum_features.csv'...

[•] Processing molecule: S1CC[C@@](N=C1N)(C)c1ccc(OC)cc1
[i] Auto-detected Charge: 0, Auto-detected Spin: 1
[+] Hartree–Fock energy: -1032.651231 Hartree
[+] VQE ground-state energy: -0.740321 Hartree
converged SCF energy = -1032.65123097288


Computing quantum descriptors:  11%|█         | 9/81 [21:07<2:29:30, 124.59s/it]


[•] Processing molecule: O=C1N(C)C(=N[C@@](C1)(CCc1ccccc1)C)N
[i] Auto-detected Charge: 0, Auto-detected Spin: 1
[+] Hartree–Fock energy: -769.778914 Hartree
[+] VQE ground-state energy: -0.825293 Hartree
converged SCF energy = -769.778914009146


Computing quantum descriptors:  12%|█▏        | 10/81 [22:51<2:19:52, 118.20s/it]


[•] Processing molecule: [nH]1c2cc(ccc2cc1)CCc1nc(N)ccc1
[i] Auto-detected Charge: 0, Auto-detected Spin: 1
[+] Hartree–Fock energy: -730.981417 Hartree
[+] VQE ground-state energy: -0.660779 Hartree
converged SCF energy = -730.981417308447


Computing quantum descriptors:  14%|█▎        | 11/81 [24:23<2:08:45, 110.36s/it]


[•] Processing molecule: O1[C@@H]2COCC[C@@]2(N=C1N)c1cc(ccc1)-c1cncnc1
[i] Auto-detected Charge: 0, Auto-detected Spin: 1
[+] Hartree–Fock energy: -970.431369 Hartree
[+] VQE ground-state energy: -0.742288 Hartree
converged SCF energy = -970.431369060725


Computing quantum descriptors:  15%|█▍        | 12/81 [26:40<2:15:59, 118.25s/it]


[•] Processing molecule: n1cccc(NCc2cc(ccc2)-c2cccnc2)c1N
[i] Auto-detected Charge: 0, Auto-detected Spin: 1
[+] Hartree–Fock energy: -860.078266 Hartree
[+] VQE ground-state energy: -0.695180 Hartree
converged SCF energy = -860.07826649921


Computing quantum descriptors:  16%|█▌        | 13/81 [28:39<2:14:29, 118.66s/it]


[•] Processing molecule: O1CCC(OC(=O)[C@@H]2[NH2+]C[C@]3(C2)c2c(NC3=O)cccc2)CC1
[i] Auto-detected Charge: 1, Auto-detected Spin: 1
[+] Hartree–Fock energy: -1050.866341 Hartree
[+] VQE ground-state energy: -0.986757 Hartree
converged SCF energy = -1050.8663406756


Computing quantum descriptors:  17%|█▋        | 14/81 [31:18<2:26:00, 130.76s/it]


[•] Processing molecule: O=C1NC(=NC(=C1)CCc1cc2[nH]ccc2cc1)N
[i] Auto-detected Charge: 0, Auto-detected Spin: 1
[+] Hartree–Fock energy: -820.539650 Hartree
[+] VQE ground-state energy: -0.670611 Hartree
converged SCF energy = -820.539649698647


Computing quantum descriptors:  19%|█▊        | 15/81 [32:54<2:12:12, 120.19s/it]


[•] Processing molecule: Clc1cc(ccc1OCCCC)CSC(=[NH2+])N
[i] Auto-detected Charge: 1, Auto-detected Spin: 1
[+] Hartree–Fock energy: -1488.275850 Hartree
[+] VQE ground-state energy: -0.998829 Hartree
converged SCF energy = -1488.2758498872


Computing quantum descriptors:  20%|█▉        | 16/81 [34:32<2:02:59, 113.54s/it]


 Processed up to molecule 16. Saving progress to '/content/drive/MyDrive/BACE_with_quantum_features.csv'...

[•] Processing molecule: n1cccc(NCc2ccccc2)c1N
[i] Auto-detected Charge: 0, Auto-detected Spin: 1
[+] Hartree–Fock energy: -617.579529 Hartree
[+] VQE ground-state energy: -0.423995 Hartree
converged SCF energy = -617.579529070738


Computing quantum descriptors:  21%|██        | 17/81 [35:42<1:47:10, 100.48s/it]


[•] Processing molecule: Clc1cc(ccc1)CN1C(=O)[C@](NC1=N)(CC(C)C)C
[i] Auto-detected Charge: 0, Auto-detected Spin: 1
[+] Hartree–Fock energy: -1262.360912 Hartree
[+] VQE ground-state energy: -0.720959 Hartree
converged SCF energy = -1262.36091176769


Computing quantum descriptors:  22%|██▏       | 18/81 [37:49<1:53:44, 108.33s/it]


[•] Processing molecule: Fc1ccc(cc1)CC1CC[NH2+]CC1
[i] Auto-detected Charge: 1, Auto-detected Spin: 1
[+] Hartree–Fock energy: -610.426318 Hartree
[+] VQE ground-state energy: -0.822923 Hartree
converged SCF energy = -610.426318365623


Computing quantum descriptors:  23%|██▎       | 19/81 [38:55<1:38:55, 95.74s/it] 


[•] Processing molecule: O=C1N(C)C(=NC(=C1)[C@H]1C[C@H]1c1ccccc1)N
[i] Auto-detected Charge: 0, Auto-detected Spin: 1
[+] Hartree–Fock energy: -767.355830 Hartree
[+] VQE ground-state energy: -0.681128 Hartree
converged SCF energy = -767.355830266903


Computing quantum descriptors:  25%|██▍       | 20/81 [40:31<1:37:20, 95.75s/it]


[•] Processing molecule: Clc1cc(ccc1)CN1C(=O)C(NC1=N)(C)C
[i] Auto-detected Charge: 0, Auto-detected Spin: 1
[+] Hartree–Fock energy: -1146.628766 Hartree
[+] VQE ground-state energy: -0.815308 Hartree
converged SCF energy = -1146.62876646443


Computing quantum descriptors:  26%|██▌       | 21/81 [42:02<1:34:28, 94.48s/it]


[•] Processing molecule: Clc1cc2nc(n(c2cc1)CCCO)N
[i] Auto-detected Charge: 0, Auto-detected Spin: 1
[+] Hartree–Fock energy: -1070.658830 Hartree
[+] VQE ground-state energy: -0.688604 Hartree
converged SCF energy = -1070.65883047614


Computing quantum descriptors:  27%|██▋       | 22/81 [43:17<1:27:07, 88.61s/it]


[•] Processing molecule: n1c2c(nc(N)c1N1CCCC1)cccc2
[i] Auto-detected Charge: 0, Auto-detected Spin: 1
[+] Hartree–Fock energy: -671.957588 Hartree
[+] VQE ground-state energy: -0.681515 Hartree
converged SCF energy = -671.957587989834


Computing quantum descriptors:  28%|██▊       | 23/81 [44:33<1:22:04, 84.91s/it]


[•] Processing molecule: Clc1cc(ccc1)CCc1cccnc1N
[i] Auto-detected Charge: 0, Auto-detected Spin: 1
[+] Hartree–Fock energy: -1055.860401 Hartree
[+] VQE ground-state energy: -0.769904 Hartree
converged SCF energy = -1055.86040104492


Computing quantum descriptors:  30%|██▉       | 24/81 [45:53<1:19:05, 83.26s/it]


 Processed up to molecule 24. Saving progress to '/content/drive/MyDrive/BACE_with_quantum_features.csv'...

[•] Processing molecule: n1ccc2c(cccc2)c1N
[i] Auto-detected Charge: 0, Auto-detected Spin: 1
[+] Hartree–Fock energy: -448.735587 Hartree
[+] VQE ground-state energy: -0.650692 Hartree
converged SCF energy = -448.735587258011


Computing quantum descriptors:  31%|███       | 25/81 [46:51<1:10:44, 75.80s/it]


[•] Processing molecule: n1c2c(ccc1N)cccc2
[i] Auto-detected Charge: 0, Auto-detected Spin: 1
[+] Hartree–Fock energy: -448.738880 Hartree
[+] VQE ground-state energy: -0.682959 Hartree
converged SCF energy = -448.73887983714


Computing quantum descriptors:  32%|███▏      | 26/81 [47:49<1:04:30, 70.37s/it]


[•] Processing molecule: O1CC[NH+](CC1)CC(O)COc1cc(ccc1C)C
[i] Auto-detected Charge: 1, Auto-detected Spin: 1
[+] Hartree–Fock energy: -850.209466 Hartree
[+] VQE ground-state energy: -0.917630 Hartree
converged SCF energy = -850.209465864134


Computing quantum descriptors:  33%|███▎      | 27/81 [49:52<1:17:40, 86.31s/it]


[•] Processing molecule: O=C1NC(=NC(=C1)CCC)N
[i] Auto-detected Charge: 0, Auto-detected Spin: 1
[+] Hartree–Fock energy: -503.250795 Hartree
[+] VQE ground-state energy: -0.552351 Hartree
converged SCF energy = -503.250795403019


Computing quantum descriptors:  35%|███▍      | 28/81 [50:54<1:09:46, 78.98s/it]


[•] Processing molecule: O=C1N(C)C(=[NH2+])NC1(c1ccccc1)c1ccccc1
[i] Auto-detected Charge: 1, Auto-detected Spin: 1
[+] Hartree–Fock energy: -842.692646 Hartree
[+] VQE ground-state energy: -0.952831 Hartree
converged SCF energy = -842.692646245713


Computing quantum descriptors:  36%|███▌      | 29/81 [52:51<1:18:07, 90.15s/it]


[•] Processing molecule: n1c2c(cc(cc2)-c2ccccc2C#CC(C)(C)C)ccc1N
[i] Auto-detected Charge: 0, Auto-detected Spin: 1
[+] Hartree–Fock energy: -904.546185 Hartree
[+] VQE ground-state energy: -0.625993 Hartree
converged SCF energy = -904.546184819942


Computing quantum descriptors:  37%|███▋      | 30/81 [55:25<1:33:05, 109.52s/it]


[•] Processing molecule: O=C(NCC1CCCCC1)CCc1cc2c(nc1N)cccc2
[i] Auto-detected Charge: 0, Auto-detected Spin: 1
[+] Hartree–Fock energy: -960.339065 Hartree
[+] VQE ground-state energy: -0.579492 Hartree
converged SCF energy = -960.339065120484


Computing quantum descriptors:  38%|███▊      | 31/81 [58:15<1:46:12, 127.45s/it]


[•] Processing molecule: O=C(N(C)C1CCCCC1)CCc1cc2c(nc1N)cccc2
[i] Auto-detected Charge: 0, Auto-detected Spin: 1
[+] Hartree–Fock energy: -960.332844 Hartree
[+] VQE ground-state energy: -0.491296 Hartree
converged SCF energy = -960.332843940019


Computing quantum descriptors:  40%|███▉      | 32/81 [1:01:12<1:56:25, 142.56s/it]


 Processed up to molecule 32. Saving progress to '/content/drive/MyDrive/BACE_with_quantum_features.csv'...

[•] Processing molecule: s1cc(cc1)-c1cc2c(nc(N)cc2)cc1
[i] Auto-detected Charge: 0, Auto-detected Spin: 1
[+] Hartree–Fock energy: -992.682617 Hartree
[+] VQE ground-state energy: -0.567255 Hartree
converged SCF energy = -992.682617032222


Computing quantum descriptors:  41%|████      | 33/81 [1:02:28<1:38:00, 122.50s/it]


[•] Processing molecule: Oc1ccc(cc1C1CCCCC1)CC[NH3+]
[i] Auto-detected Charge: 1, Auto-detected Spin: 1
[+] Hartree–Fock energy: -663.941094 Hartree
[+] VQE ground-state energy: -1.011905 Hartree
converged SCF energy = -663.941093743385


Computing quantum descriptors:  42%|████▏     | 34/81 [1:04:01<1:29:07, 113.77s/it]


[•] Processing molecule: Oc1ccc(cc1CC)CC[NH3+]
[i] Auto-detected Charge: 1, Auto-detected Spin: 1
[+] Hartree–Fock energy: -510.782341 Hartree
[+] VQE ground-state energy: -0.965789 Hartree
converged SCF energy = -510.782340905577


Computing quantum descriptors:  43%|████▎     | 35/81 [1:05:09<1:16:40, 100.01s/it]


[•] Processing molecule: Oc1ccc(cc1)CC([NH3+])C
[i] Auto-detected Charge: 1, Auto-detected Spin: 1
[+] Hartree–Fock energy: -472.208420 Hartree
[+] VQE ground-state energy: -0.999641 Hartree
converged SCF energy = -472.208419995855


Computing quantum descriptors:  44%|████▍     | 36/81 [1:06:12<1:06:29, 88.66s/it] 


[•] Processing molecule: Oc1ccc(cc1)CC[NH3+]
[i] Auto-detected Charge: 1, Auto-detected Spin: 1
[+] Hartree–Fock energy: -433.621560 Hartree
[+] VQE ground-state energy: -1.000321 Hartree
converged SCF energy = -433.62156022659


Computing quantum descriptors:  46%|████▌     | 37/81 [1:07:12<58:53, 80.31s/it]  


[•] Processing molecule: S1CCC(=NC1(C)c1cc(ccc1)-c1cc(OC)ccc1)N
[i] Auto-detected Charge: 0, Auto-detected Spin: 1
[+] Hartree–Fock energy: -1259.402007 Hartree
[+] VQE ground-state energy: -0.685414 Hartree
converged SCF energy = -1259.40200713342


Computing quantum descriptors:  47%|████▋     | 38/81 [1:09:39<1:11:51, 100.26s/it]


[•] Processing molecule: O=C1N(C)C(=NC1(C1CCCCC1)c1ccccc1)N
[i] Auto-detected Charge: 0, Auto-detected Spin: 1
[+] Hartree–Fock energy: -845.791470 Hartree
[+] VQE ground-state energy: -0.792295 Hartree
converged SCF energy = -845.79147041015


Computing quantum descriptors:  48%|████▊     | 39/81 [1:11:49<1:16:20, 109.06s/it]


[•] Processing molecule: S1CCC(=NC1(C)c1cc(ccc1)-c1cncnc1)N
[i] Auto-detected Charge: 0, Auto-detected Spin: 1
[+] Hartree–Fock energy: -1178.482068 Hartree
[+] VQE ground-state energy: -0.763488 Hartree
converged SCF energy = -1178.48206795889


Computing quantum descriptors:  49%|████▉     | 40/81 [1:13:50<1:16:57, 112.61s/it]


 Processed up to molecule 40. Saving progress to '/content/drive/MyDrive/BACE_with_quantum_features.csv'...

[•] Processing molecule: O=C1N(C)C(=NC1(C1CCCCC1)C1CCCCC1)N
[i] Auto-detected Charge: 0, Auto-detected Spin: 1
[+] Hartree–Fock energy: -849.369386 Hartree
[+] VQE ground-state energy: -0.769033 Hartree
converged SCF energy = -849.369386260021


Computing quantum descriptors:  51%|█████     | 41/81 [1:16:17<1:22:05, 123.14s/it]


[•] Processing molecule: O1CC2(N=C1N)CC(Cc1c2cc(cc1)-c1cncnc1)(C)C
[i] Auto-detected Charge: 0, Auto-detected Spin: 1
[+] Hartree–Fock energy: -973.756431 Hartree
[+] VQE ground-state energy: -0.699321 Hartree
converged SCF energy = -973.75643127918


Computing quantum descriptors:  52%|█████▏    | 42/81 [1:18:59<1:27:27, 134.56s/it]


[•] Processing molecule: S1CC(N=C(N)C1)(C)c1cc(ccc1)-c1cc(OC)ccc1
[i] Auto-detected Charge: 0, Auto-detected Spin: 1
[+] Hartree–Fock energy: -1259.403176 Hartree
[+] VQE ground-state energy: -0.623370 Hartree
converged SCF energy = -1259.40317618785


Computing quantum descriptors:  53%|█████▎    | 43/81 [1:21:26<1:27:40, 138.44s/it]


[•] Processing molecule: S1CCC(N=C1N)(C)c1cc(ccc1)-c1cc(OC)ccc1
[i] Auto-detected Charge: 0, Auto-detected Spin: 1
[+] Hartree–Fock energy: -1259.405411 Hartree
[+] VQE ground-state energy: -0.675613 Hartree
converged SCF energy = -1259.40541146157


Computing quantum descriptors:  54%|█████▍    | 44/81 [1:23:55<1:27:19, 141.61s/it]


[•] Processing molecule: Clc1ccc(cc1)C1(N=C(N)N(C)C1=O)c1ccccc1
[i] Auto-detected Charge: 0, Auto-detected Spin: 1
[+] Hartree–Fock energy: -1296.222647 Hartree
[+] VQE ground-state energy: -0.783386 Hartree
converged SCF energy = -1296.22264673429


Computing quantum descriptors:  56%|█████▌    | 45/81 [1:26:06<1:22:58, 138.30s/it]


[•] Processing molecule: Clc1cc(ccc1)C1(N=C(N)N(C)C1=O)c1ccccc1
[i] Auto-detected Charge: 0, Auto-detected Spin: 1
[+] Hartree–Fock energy: -1296.222228 Hartree
[+] VQE ground-state energy: -0.775257 Hartree
converged SCF energy = -1296.22222837249


Computing quantum descriptors:  57%|█████▋    | 46/81 [1:28:12<1:18:38, 134.82s/it]


[•] Processing molecule: O=C1N(C)C(=NC1(c1ccccc1)c1ccncc1)N
[i] Auto-detected Charge: 0, Auto-detected Spin: 1
[+] Hartree–Fock energy: -857.961834 Hartree
[+] VQE ground-state energy: -0.782078 Hartree
converged SCF energy = -857.961833839651


Computing quantum descriptors:  58%|█████▊    | 47/81 [1:30:02<1:12:10, 127.38s/it]


[•] Processing molecule: O(C)c1ccc(cc1)C1(N=C(N)N(C)C1=O)c1ccccc1
[i] Auto-detected Charge: 0, Auto-detected Spin: 1
[+] Hartree–Fock energy: -954.630348 Hartree
[+] VQE ground-state energy: -0.580364 Hartree
converged SCF energy = -954.630348037932


Computing quantum descriptors:  59%|█████▉    | 48/81 [1:32:20<1:11:48, 130.56s/it]


 Processed up to molecule 48. Saving progress to '/content/drive/MyDrive/BACE_with_quantum_features.csv'...

[•] Processing molecule: O1CC(N=C(N)C1)(C)c1cc(ccc1)-c1cc(OC)ccc1
[i] Auto-detected Charge: 0, Auto-detected Spin: 1
[+] Hartree–Fock energy: -940.066574 Hartree
[+] VQE ground-state energy: -0.500651 Hartree
converged SCF energy = -940.066573599079


Computing quantum descriptors:  60%|██████    | 49/81 [1:34:44<1:11:45, 134.53s/it]


[•] Processing molecule: O=C1N(C)C(=NC1(c1ccccc1)c1ccccc1)N
[i] Auto-detected Charge: 0, Auto-detected Spin: 1
[+] Hartree–Fock energy: -842.218371 Hartree
[+] VQE ground-state energy: -0.692902 Hartree
converged SCF energy = -842.218370880246


Computing quantum descriptors:  62%|██████▏   | 50/81 [1:36:36<1:06:01, 127.80s/it]


[•] Processing molecule: O(C)c1cc(ccc1)C1(N=C(N)N(C)C1=O)c1ccccc1
[i] Auto-detected Charge: 0, Auto-detected Spin: 1
[+] Hartree–Fock energy: -954.629590 Hartree
[+] VQE ground-state energy: -0.765461 Hartree


## **Training ML Model Using Classically Obtained Features Only**

In [64]:
import pandas as pd
import numpy as np

d = pd.read_csv("/content/drive/MyDrive/BACE_with_quantum_features.csv")
print(d.head())

                                               mol  Class           features  \
0        O=C1N(C)C(N[C@@]1(CC1CCCCC1)CCC1CCCCC1)=N      0  [0 0 1 ... 0 0 0]   
1    O=C(NC(=[NH2+])N)Cn1c(ccc1-c1ccccc1)-c1ccccc1      0  [0 0 0 ... 0 0 0]   
2   Clc1cc(ccc1)-c1cc(ccc1)[C@@]1(OCCC(=[NH+]1)N)C      0  [0 0 0 ... 0 0 0]   
3         [nH]1c2cc(ccc2cc1)-c1cc(ccc1)CNc1cccnc1N      0  [0 0 0 ... 0 0 0]   
4  O=C1N(C)C(N[C@](C1)(C)[C@@H]1C[C@H]1c1ccccc1)=N      0  [0 0 0 ... 0 0 0]   

     hf_energy  vqe_energy  correlation_energy  n_qubits  homo_energy  \
0  -965.086237   -0.861831         -964.224406       4.0    -0.245345   
1 -1010.330855   -0.752921        -1009.577934       4.0    -0.283762   
2 -1282.140915   -0.901309        -1281.239606       4.0    -0.348090   
3  -973.453334   -0.614258         -972.839076       4.0    -0.197059   
4  -807.135855   -0.734406         -806.401449       4.0    -0.242222   

   lumo_energy  homo_lumo_gap  dipole_moment  \
0     0.281297       0.526642   

In [65]:
d["feature"]=df["features"]
d["feature"]

Unnamed: 0,feature
0,"[0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
1,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
2,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
3,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, ..."
4,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
...,...
76,"[0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
77,"[0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
78,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
79,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, ..."


In [66]:
d.dropna(inplace=True)
d.reset_index(drop=True, inplace=True)

In [67]:
import numpy as np

X = np.stack(d["feature"])  # Convert feature column into a NumPy matrix
y = d['Class']
X.dtype

dtype('int64')

In [72]:
y.value_counts()

Unnamed: 0_level_0,count
Class,Unnamed: 1_level_1
0,39
1,9


In [68]:
from sklearn.decomposition import PCA

pca = PCA(n_components=0.95)
pca.fit(X)
X_pca = pca.transform(X)
print("X_train shape after PCA:", X_pca.shape)

X_train shape after PCA: (48, 35)


In [74]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X_pca, y, test_size=0.2, random_state=1)

print("X_train shape:", X_train.shape)
print("X_test shape:", X_test.shape)
print("y_train shape:", y_train.shape)
print("y_test shape:", y_test.shape)

X_train shape: (38, 35)
X_test shape: (10, 35)
y_train shape: (38,)
y_test shape: (10,)


In [75]:
import xgboost as xgb
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score

# Initialize and train the XGBoost Classifier
xgb_model_classical = xgb.XGBClassifier(objective='binary:logistic', random_state=34, scale_pos_weight=8)
xgb_model_classical.fit(X_train, y_train)

print("XGBoost model (classical features only) trained successfully!")

# Make predictions on the test set
y_pred_xgb_classical = xgb_model_classical.predict(X_test)

# Evaluate the XGBoost model
accuracy_xgb_classical = accuracy_score(y_test, y_pred_xgb_classical)
f1_xgb_classical = f1_score(y_test, y_pred_xgb_classical)
precision_xgb_classical = precision_score(y_test, y_pred_xgb_classical)
recall_xgb_classical = recall_score(y_test, y_pred_xgb_classical)

# Print the evaluation metrics for XGBoost (classical features only)
print("\nXGBoost Model (Classical Features Only) Evaluation:")
print(f"Accuracy: {accuracy_xgb_classical:.4f}")
print(f"F1 Score: {f1_xgb_classical:.4f}")
print(f"Precision: {precision_xgb_classical:.4f}")
print(f"Recall: {recall_xgb_classical:.4f}")

XGBoost model (classical features only) trained successfully!

XGBoost Model (Classical Features Only) Evaluation:
Accuracy: 0.9000
F1 Score: 0.8000
Precision: 1.0000
Recall: 0.6667


## **Training ML Model Using the Quantum Features Also**

In [79]:
X_=np.array(pd.DataFrame(d,columns=["hf_energy","vqe_energy","homo_energy","lumo_energy","dipole_moment"]))
X_Q=np.hstack((X_pca,X_))

from sklearn.model_selection import train_test_split
X_train_Q, X_test_Q, y_train_Q, y_test_Q = train_test_split(X_Q, y, test_size=0.2, random_state=2)

print("X_train shape:", X_train_Q.shape)
print("X_test shape:", X_test_Q.shape)

X_train shape: (38, 40)
X_test shape: (10, 40)


In [84]:
import xgboost as xgb
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score

# Initialize and train the XGBoost Classifier
xgb_model = xgb.XGBClassifier(objective='binary:logistic', random_state=4, scale_pos_weight=8)
xgb_model.fit(X_train_Q, y_train_Q)

print("XGBoost model trained successfully!")

# Make predictions on the test set
y_pred_xgb = xgb_model.predict(X_test_Q)

# Evaluate the XGBoost model
accuracy_xgb = accuracy_score(y_test_Q, y_pred_xgb)
f1_xgb = f1_score(y_test_Q, y_pred_xgb)
precision_xgb = precision_score(y_test_Q, y_pred_xgb)
recall_xgb = recall_score(y_test_Q, y_pred_xgb)

# Print the evaluation metrics for XGBoost
print("\nXGBoost Model Evaluation:")
print(f"Accuracy: {accuracy_xgb:.4f}")
print(f"F1 Score: {f1_xgb:.4f}")
print(f"Precision: {precision_xgb:.4f}")
print(f"Recall: {recall_xgb:.4f}")

XGBoost model trained successfully!

XGBoost Model Evaluation:
Accuracy: 0.9000
F1 Score: 0.6667
Precision: 0.5000
Recall: 1.0000
