In [1]:
import pandas as pd
from rdkit import Chem
from rdkit.Chem import AllChem

In [2]:
import pennylane as qml
from pennylane import numpy as np

In [18]:
import torch
import torch.nn as nn

## SMILES to QMSE-Compatible Molecular Data
To proceed with starting from SMILES strings and converting them into the detailed molecular representations required for Quantum Molecular Structure Encoding (QMSE), here is a step-by-step approach with Python usage leveraging RDKit:

1. Input Canonical SMILES
2. Generate 3D Conformers for accurate geometry
3. Extract bond orders and stereochemistry
4. Calculate Isomerism Parameters `epsilon_D` and `epsilon_T`
5. Construct Hybrid Coulomb-Adjacency Matrix

In [3]:
df = pd.read_csv('qm9.csv')
df.head()

Unnamed: 0,mol_id,smiles,A,B,C,mu,alpha,homo,lumo,gap,...,zpve,u0,u298,h298,g298,cv,u0_atom,u298_atom,h298_atom,g298_atom
0,gdb_1,C,157.7118,157.70997,157.70699,0.0,13.21,-0.3877,0.1171,0.5048,...,0.044749,-40.47893,-40.476062,-40.475117,-40.498597,6.469,-395.999595,-398.64329,-401.014647,-372.471772
1,gdb_2,N,293.60975,293.54111,191.39397,1.6256,9.46,-0.257,0.0829,0.3399,...,0.034358,-56.525887,-56.523026,-56.522082,-56.544961,6.316,-276.861363,-278.620271,-280.399259,-259.338802
2,gdb_3,O,799.58812,437.90386,282.94545,1.8511,6.31,-0.2928,0.0687,0.3615,...,0.021375,-76.404702,-76.401867,-76.400922,-76.422349,6.002,-213.087624,-213.974294,-215.159658,-201.407171
3,gdb_4,C#C,0.0,35.610036,35.610036,0.0,16.28,-0.2845,0.0506,0.3351,...,0.026841,-77.308427,-77.305527,-77.304583,-77.327429,8.574,-385.501997,-387.237686,-389.016047,-365.800724
4,gdb_5,C#N,0.0,44.593883,44.593883,2.8937,12.99,-0.3604,0.0191,0.3796,...,0.016601,-93.411888,-93.40937,-93.408425,-93.431246,6.278,-301.820534,-302.906752,-304.091489,-288.720028


In [16]:
def QMSEMatrix(smiles):
    mol = Chem.MolFromSmiles(smiles, sanitize=True)
    mol = Chem.AddHs(mol)
    Chem.AssignStereochemistry(mol, cleanIt=True, force=True)
    AllChem.EmbedMolecule(mol, randomSeed = 67)
    AllChem.MMFFOptimizeMolecule(mol)
    conf = mol.GetConformer()
    #coords = [conf.GetAtomPosition(i) for i in range(mol.GetNumAtoms())]
    bond_orders_and_stereo = []
    for bond in mol.GetBonds():
        i = bond.GetBeginAtomIdx()
        j = bond.GetEndAtomIdx()
        order = bond.GetBondTypeAsDouble()
        stereo = Chem.BondStereo.STEREONONE
        epsilon_D = 1
        if bond.GetBondType() == Chem.BondType.DOUBLE and bond.GetStereo() == Chem.BondStereo.STEREOZ:
            epsilon_D = -1
        bond_orders_and_stereo.append((i, j, order, epsilon_D))
    chiral_centers = Chem.FindMolChiralCenters(mol, includeUnassigned=True)
    epsilon_T = {}
    for atom_idx, chirality in chiral_centers:
        if chirality == Chem.CHIRALITY_S:
            epsilon_T[atom_idx] = -1
        else:
            epsilon_T[atom_idx] = 1
    num_atoms = mol.GetNumAtoms()
    M = np.zeros((num_atoms, num_atoms))
    Z = [atom.GetAtomicNum() for atom in mol.GetAtoms()]

    for i in range(num_atoms):
        M[i, i] = 0.5 * epsilon_T.get(i, 1) * (Z[i] ** 3)

    for i, j, order, epsilon_D in bond_orders_and_stereo:
        M[i, j] = M[j, i] = (epsilon_D * Z[i] * Z[j]) / order
        
    return M, bond_orders_and_stereo

In [5]:
QMSEMatrix(df.iloc[16]['smiles'])

tensor([[108. ,  36. ,  48. ,   6. ,   6. ,   0. ,   0. ],
        [ 36. , 108. ,  48. ,   0. ,   0. ,   6. ,   6. ],
        [ 48. ,  48. , 256. ,   0. ,   0. ,   0. ,   0. ],
        [  6. ,   0. ,   0. ,   0.5,   0. ,   0. ,   0. ],
        [  6. ,   0. ,   0. ,   0. ,   0.5,   0. ,   0. ],
        [  0. ,   6. ,   0. ,   0. ,   0. ,   0.5,   0. ],
        [  0. ,   6. ,   0. ,   0. ,   0. ,   0. ,   0.5]], requires_grad=True)

## Quantum Circuit Encoding of the QMSE

In [27]:
def FixedQMSECircuit(
    matrix,
    edge_list,
    params,
    n_qubits: int,
    n_layers: int 
):
    dev = qml.device('default.qubit', wires=n_qubits)
    @qml.qnode(dev, interface='torch')
    def applyCircuit():
        for i in range(n_qubits):
            qml.RY(matrix[i, i], wires=i) 
        bond_rotations = []
        for i in range(n_qubits):
            for j in range(i+1, n_qubits):
                if matrix[i, j] != 0:
                    qml.IsingXX(matrix[i, j], wires=[i,j])
        for layer in range(n_layers):
            for edge_idx, (i,j, _, _) in enumerate(edge_list):
                # we are reusing bond_orders_and_stereo, so
                # we ignore the order and stereo
                qml.IsingXX(params[layer, edge_idx], wires=[i,j])
            for i in range(n_qubits):
                qml.RY(params[layer, i + len(edge_list)], wires=i)
        #enhanced features
        feats = []
        for i in range(n_qubits):
            feats.append(qml.expval(qml.PauliZ(i)))
            feats.append(qml.expval(qml.PauliX(i)))
        # edge features as well
        for i, j, _, _ in edge_list:
            feats.append(qml.expval(qml.PauliZ(i) @ qml.PauliZ(j)))
        return feats
    return applyCircuit

In [30]:
mysteryMatrix, mysteryEL = QMSEMatrix(df.iloc[16]['smiles'])
n_qubits = len(mysteryMatrix)
n_layers = 4 # based on lit, 3-5 provides the best results
num_params = len(mysteryEL) + n_qubits
mysteryWeight = torch.randn(n_layers, num_params, requires_grad=True)
mysteryCircuit = FixedQMSECircuit(
    mysteryMatrix,
    mysteryEL,
    mysteryWeight,
    n_qubits,
    n_layers
)

In [31]:
mysteryExpectations = mysteryCircuit()
mysteryExpectations



[tensor(-0.4490, dtype=torch.float64, grad_fn=<DotBackward0>),
 tensor(-0.1280, dtype=torch.float64, grad_fn=<DotBackward0>),
 tensor(0.1378, dtype=torch.float64, grad_fn=<DotBackward0>),
 tensor(0.2788, dtype=torch.float64, grad_fn=<DotBackward0>),
 tensor(0.1877, dtype=torch.float64, grad_fn=<DotBackward0>),
 tensor(-0.6768, dtype=torch.float64, grad_fn=<DotBackward0>),
 tensor(0.4836, dtype=torch.float64, grad_fn=<DotBackward0>),
 tensor(-0.1546, dtype=torch.float64, grad_fn=<DotBackward0>),
 tensor(-0.0048, dtype=torch.float64, grad_fn=<DotBackward0>),
 tensor(-0.5303, dtype=torch.float64, grad_fn=<DotBackward0>),
 tensor(0.9683, dtype=torch.float64, grad_fn=<DotBackward0>),
 tensor(0.0949, dtype=torch.float64, grad_fn=<DotBackward0>),
 tensor(0.7436, dtype=torch.float64, grad_fn=<DotBackward0>),
 tensor(0.2600, dtype=torch.float64, grad_fn=<DotBackward0>),
 tensor(-0.1342, dtype=torch.float64, grad_fn=<DotBackward0>),
 tensor(0.1207, dtype=torch.float64, grad_fn=<DotBackward0>),
 