### In this file, we will reconstruct our data into graphs

Instructions: You should probably have another separate notebook that creates the graph version of the dataset. Again, you should save the data, and for this make use to use the save_graph and load_graph functions of DGL.

Important Libraries:
ase
Structure/Geometry of a molecule

In [2]:
import pandas as pd
import torch
from rdkit import Chem
from dgllife.utils import smiles_to_bigraph
from dgl.data.utils import save_graphs, load_graphs

Using backend: pytorch


In [3]:
def featurize_atoms(mol):
    feats = []
    for atom in mol.GetAtoms():
        feats.append(atom.GetAtomicNum())
        feats.append(atom.GetDegree())
        feats.append(atom.GetTotalDegree())
        feats.append(atom.GetExplicitValence())
        feats.append(atom.GetImplicitValence())
        feats.append(atom.GetTotalNumHs())
        feats.append(atom.GetFormalCharge())
        feats.append(atom.GetNumRadicalElectrons())
        feats.append(atom.GetIsAromatic())
        feats.append(atom.IsInRing())
        feats.append(atom.GetMass() * 0.01)
    return {'atom_feats': torch.tensor(feats).reshape(-1, 11).float()}

In [4]:
def featurize_bonds(mol):
    feats = []
    bond_types = [Chem.rdchem.BondType.SINGLE, Chem.rdchem.BondType.DOUBLE,
                  Chem.rdchem.BondType.TRIPLE, Chem.rdchem.BondType.AROMATIC]
    for bond in mol.GetBonds():
        btype = bond_types.index(bond.GetBondType())
        is_conjugated = bond.GetIsConjugated()
        is_in_ring = bond.IsInRing()
        stereo_config = bond.GetStereo()
        direction = bond.GetBondDir()
        feats.extend([btype, btype])
        feats.extend([is_conjugated, is_conjugated])
        feats.extend([is_in_ring, is_in_ring])
        feats.extend([stereo_config, stereo_config])
        feats.extend([direction, direction])
    return {'bond_feats': torch.tensor(feats).reshape(-1, 5)}

In [7]:
graphs = []
graphs.append(smiles_to_bigraph("C", node_featurizer = featurize_atoms, edge_featurizer = featurize_bonds, explicit_hydrogens = True))
graphs.append(smiles_to_bigraph("N", node_featurizer = featurize_atoms, edge_featurizer = featurize_bonds, explicit_hydrogens = True))
graphs.append(smiles_to_bigraph("O", node_featurizer = featurize_atoms, edge_featurizer = featurize_bonds, explicit_hydrogens = True))

In [8]:
graphs[0].nodes(), graphs[0].ndata

(tensor([0, 1, 2, 3, 4], dtype=torch.int32),
 {'atom_feats': tensor([[1.0000, 1.0000, 1.0000, 1.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0101],
         [6.0000, 4.0000, 4.0000, 4.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.1201],
         [1.0000, 1.0000, 1.0000, 1.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0101],
         [1.0000, 1.0000, 1.0000, 1.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0101],
         [1.0000, 1.0000, 1.0000, 1.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0101]])})

In [22]:
graphs[0].edges(), graphs[0].edata

((tensor([1, 2, 1, 3, 1, 4, 1, 0], dtype=torch.int32),
  tensor([2, 1, 3, 1, 4, 1, 0, 1], dtype=torch.int32)),
 {'bond_feats': tensor([[0, 0, 0, 0, 0],
         [0, 0, 0, 0, 0],
         [0, 0, 0, 0, 0],
         [0, 0, 0, 0, 0],
         [0, 0, 0, 0, 0],
         [0, 0, 0, 0, 0],
         [0, 0, 0, 0, 0],
         [0, 0, 0, 0, 0]])})

In [23]:
save_graphs("./graphs.bin", graphs)

In [24]:
load_graphs("./graphs.bin")

([Graph(num_nodes=5, num_edges=8,
        ndata_schemes={'atom_feats': Scheme(shape=(11,), dtype=torch.float32)}
        edata_schemes={'bond_feats': Scheme(shape=(5,), dtype=torch.int64)}),
  Graph(num_nodes=4, num_edges=6,
        ndata_schemes={'atom_feats': Scheme(shape=(11,), dtype=torch.float32)}
        edata_schemes={'bond_feats': Scheme(shape=(5,), dtype=torch.int64)}),
  Graph(num_nodes=3, num_edges=4,
        ndata_schemes={'atom_feats': Scheme(shape=(11,), dtype=torch.float32)}
        edata_schemes={'bond_feats': Scheme(shape=(5,), dtype=torch.int64)})],
 {})