In [2]:
from rdkit import Chem
from torch_geometric.data import Data
import torch
from library.functions_to_abstract_data import extract_qm9_data
from torch_geometric.datasets import QM9

In [3]:
def smiles_to_graph(smiles, y_value):
    mol = Chem.MolFromSmiles(smiles)
    atoms = mol.GetAtoms()
    bonds = mol.GetBonds()

    # Node features (atomic number)
    x = torch.tensor([atom.GetAtomicNum() for atom in atoms], dtype=torch.long).unsqueeze(1)

    # Edges (bond connections)
    edge_index = []
    for bond in bonds:
        i, j = bond.GetBeginAtomIdx(), bond.GetEndAtomIdx()
        edge_index.append((i, j))
        edge_index.append((j, i))  # undirected

    edge_index = torch.tensor(edge_index, dtype=torch.long).t().contiguous()
    y = torch.tensor([y_value], dtype=torch.float)

    return Data(x=x, edge_index=edge_index, y=y)

In [4]:
# --- Load data ---
dataset_qm9 = QM9(root="../data/QM9")
df_qm9 = extract_qm9_data(dataset_qm9)

smiles = df_qm9["smiles"]
gaps = df_qm9['gap']

# dataset = [smiles_to_graph(smi, gap) for smi, gap in zip(smiles, gaps)]

# Save dataset

In [5]:
import rdkit.Chem as Chem
print(dir(Chem))


['ADJUST_IGNOREALL', 'ADJUST_IGNORECHAINS', 'ADJUST_IGNOREDUMMIES', 'ADJUST_IGNOREMAPPED', 'ADJUST_IGNORENONDUMMIES', 'ADJUST_IGNORENONE', 'ADJUST_IGNORERINGS', 'ALLOW_CHARGE_SEPARATION', 'ALLOW_INCOMPLETE_OCTETS', 'AROMATICITY_CUSTOM', 'AROMATICITY_DEFAULT', 'AROMATICITY_MDL', 'AROMATICITY_MMFF94', 'AROMATICITY_RDKIT', 'AROMATICITY_SIMPLE', 'AddHs', 'AddHsParameters', 'AddMetadataToPNGFile', 'AddMetadataToPNGString', 'AddMolSubstanceGroup', 'AddRecursiveQuery', 'AddStereoAnnotations', 'AddWavyBondsForStereoAny', 'AdjustQueryParameters', 'AdjustQueryProperties', 'AdjustQueryPropertiesWithGenericGroups', 'AdjustQueryWhichFlags', 'AllProps', 'AromaticityModel', 'AssignAtomChiralTagsFromMolParity', 'AssignAtomChiralTagsFromStructure', 'AssignCIPLabels', 'AssignChiralTypesFromBondDirs', 'AssignRadicals', 'AssignStereochemistry', 'AssignStereochemistryFrom3D', 'Atom', 'AtomFromSmarts', 'AtomFromSmiles', 'AtomHasConjugatedBond', 'AtomKekulizeException', 'AtomMonomerInfo', 'AtomMonomerType', 

In [8]:
from rdkit.Chem import rdDistGeom
smile = smiles[100]
print(f'SMILES: {smile}')

dist_mat = rdDistGeom.GetMoleculeBoundsMatrix(Chem.MolFromSmiles(smile))
print(f'Distance matrix: {dist_mat}')

dist_mat[dist_mat == 0.] = 1
print(dist_mat)

SMILES: [H]O[C@@]1([H])C([H])([H])[C@@]1([H])O[H]
Distance matrix: [[0.         1.40384485 2.50681417 2.50681417 3.70995353]
 [1.38384485 0.         1.554      1.554      2.50681417]
 [2.42681417 1.474      0.         1.554      2.50681417]
 [2.42681417 1.474      1.474      0.         1.40384485]
 [2.73604273 2.42681417 2.42681417 1.38384485 0.        ]]
[[1.         1.40384485 2.50681417 2.50681417 3.70995353]
 [1.38384485 1.         1.554      1.554      2.50681417]
 [2.42681417 1.474      1.         1.554      2.50681417]
 [2.42681417 1.474      1.474      1.         1.40384485]
 [2.73604273 2.42681417 2.42681417 1.38384485 1.        ]]
