In [1]:
import pandas as pd

df = pd.read_csv('../polygraphpy/data/polarizability_data.csv')
df = df[df['chain_size'] == 0]
df.to_csv('filterd_polarizability.csv', index=False)

In [2]:
import torch
from torchdrug import data, models, tasks, core
from torchdrug.layers import distribution
from torchdrug.core import Registry as R
from torch import nn, optim

@R.register("datasets.CustomMolecule")
class CustomMoleculeDataset(data.MoleculeDataset):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)

dataset = CustomMoleculeDataset()
dataset.load_csv('filterd_polarizability.csv', smiles_field='smiles', target_fields=['static_polarizability'], kekulize=True, atom_feature='symbol')



In [3]:
model = models.RGCN(input_dim=dataset.num_atom_type, num_relation=dataset.num_bond_type, hidden_dims=[128, 128, 128], batch_norm=True)

num_atom_type = dataset.num_atom_type
num_bond_type = dataset.num_bond_type + 1

node_prior = distribution.IndependentGaussian(torch.zeros(num_atom_type), torch.ones(num_atom_type))
edge_prior = distribution.IndependentGaussian(torch.zeros(num_bond_type), torch.ones(num_bond_type))

node_flow = models.GraphAF(model, node_prior, num_layer=12)
edge_flow = models.GraphAF(model, edge_prior, use_edge=True, num_layer=12)

task = tasks.AutoregressiveGeneration(node_flow, edge_flow, max_node=38, max_edge_unroll=12, criterion='nll')

optimizer = optim.Adam(task.parameters(), lr=1e-3)
solver = core.Engine(task, dataset, None, None, optimizer, gpus=[0], batch_size=32)

16:22:35   Preprocess training set
16:22:35   {'batch_size': 32,
 'class': 'core.Engine',
 'gpus': [0],
 'gradient_interval': 1,
 'log_interval': 100,
 'logger': 'logging',
 'num_worker': 0,
 'optimizer': {'amsgrad': False,
               'betas': (0.9, 0.999),
               'capturable': False,
               'class': 'optim.Adam',
               'differentiable': False,
               'eps': 1e-08,
               'foreach': None,
               'fused': None,
               'lr': 0.001,
               'maximize': False,
               'weight_decay': 0},
 'scheduler': None,
 'task': {'agent_update_interval': 5,
          'baseline_momentum': 0.9,
          'class': 'tasks.AutoregressiveGeneration',
          'criterion': 'nll',
          'edge_model': {'class': 'models.GraphAF',
                         'dequantization_noise': 0.9,
                         'model': {'activation': 'relu',
                                   'batch_norm': True,
                                   'class

In [None]:
solver.train(num_epoch=10)
solver.save('graphaf_model.pkl')

16:22:37   >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
16:22:37   Epoch 0 begin




16:22:38   >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
16:22:38   edge log likelihood: -24.3833
16:22:38   edge mask / graph: 175.812
16:22:38   node log likelihood: -3007.38
16:22:38   node mask / graph: 21.0938




16:22:51   >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
16:22:51   edge log likelihood: -3.77118
16:22:51   edge mask / graph: 182.625
16:22:51   node log likelihood: -15.2756
16:22:51   node mask / graph: 21.7188
16:23:03   >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
16:23:03   edge log likelihood: -2.6299
16:23:03   edge mask / graph: 204.375
16:23:03   node log likelihood: -14.8928
16:23:03   node mask / graph: 23.5312
