### In this file, we will define our model and parameters

Instructions: 

Finally, you should have at least one notebook where you define and train your notebook.

In [3]:
!pip install nbimporter



In [1]:
import pandas as pd
import numpy as np
import torch
from dgl.data import DGLDataset
from dgl.data.utils import save_graphs, load_graphs

import nbimporter
import graphize_data as graphize

Using backend: pytorch


### Model

In [2]:
import dgl
import torch
import torch.nn as nn
import torch.nn.functional as F

from dgl.nn.pytorch import NNConv
from dgl.nn.pytorch import GatedGraphConv

#### Ronan make your model here

In [3]:
# Model

#### RJ model

In [4]:
# Things to experiment with: SumPooling layer? Kind of like a conv, can sum up neighbors and find important structural properties of molecule
# from dgl.nn import SumPooling

# Also a good read here (https://arxiv.org/pdf/1710.10370.pdf) on Topology Adaptive Graph Convolutional layer might be useful.
# From my takeaways, we can use this "topology" to better track/weight the important parts of the structure
# (maybe the center of the molecule is important, or specific edge portions of the molecule) TAG can find this

# SAGEConv

# Ultimately, the NNConv layer, as used in the MPNN model, is the best one for this situation.
# We have come to this conclusion after trying various other convolution modules
# and finally reading up on this paper which relates closely to what we are doing (https://arxiv.org/pdf/1704.01212.pdf)
# (Quantum Chemistry computations)

# Note they used GatedGraphConv first and that worked okay but not as well as NNConv... we could look to backtrack to GGC
class Electron_MPNN(nn.Module):
    def __init__(self, node_in_feats, edge_in_feats, node_out_feats=128,
                 edge_hidden_feats=256):
        super(Electron_MPNN, self).__init__()

        # Projection
        self.fc1 = nn.Sequential(
            nn.Linear(node_in_feats, node_out_feats),
            nn.ReLU()
        )
    
        # Multi-iteration Convolution
#         self.message_passing_steps = message_passing_steps
        edge_func = nn.Sequential(
            nn.Linear(edge_in_feats, edge_hidden_feats),
            nn.ReLU(),
            nn.Linear(edge_hidden_feats, node_out_feats * node_out_feats)
        )
        
        self.gnn1 = NNConv(
            in_feats=node_out_feats,
            out_feats=node_out_feats,
            edge_func=edge_func,
            aggregator_type='sum'
        )
        
        self.gnn2 = GatedGraphConv(
            in_feats=node_out_feats,
            out_feats=node_out_feats,
            n_steps=2,
            n_etypes=edge_in_feats
        )
        
        # GRU used in MPNN to get rid of vanishing gradient on Conv layer
        self.gru = nn.GRU(node_out_feats, node_out_feats)
        
#         self.fc2 = nn.Linear(out_dim, out_dim)


    def forward(self, g, node_feats, edge_feats):
        node_feats = self.fc1(node_feats) # (V, node_out_feats)
        
        hidden_feats = node_feats.unsqueeze(0)           # (1, V, node_out_feats)
        
        # Conv 1
        node_feats = self.gnn1(g, node_feats, edge_feats)
        
        # GRU requires 3 dimensions
        # GRU in between each convolution layer to retain gradient
        node_feats, hidden_feats = self.gru(node_feats.unsqueeze(0), hidden_feats)
        node_feats = node_feats.squeeze(0)
        
        # Conv 2
        node_feats = self.gnn2(g, node_feats, edge_feats)
        
        # Second GRU
        node_feats, hidden_feats = self.gru(node_feats.unsqueeze(0), hidden_feats)
        node_feats = node_feats.squeeze(0)
    
        return node_feats

In [5]:
from dgl.nn.pytorch import Set2Set
class MPNNPredictor(nn.Module):
    def __init__(self,
                 node_in_feats,
                 edge_in_feats,
                 node_out_feats=128,
                 edge_hidden_feats=256,
                 n_tasks=5,
                 num_step_set2set=6,
                 num_layer_set2set=3
                ):
        super(MPNNPredictor, self).__init__()

        self.gnn = Electron_MPNN(node_in_feats=node_in_feats,
                                 node_out_feats=node_out_feats,
                                 edge_in_feats=edge_in_feats,
                                 edge_hidden_feats=edge_hidden_feats
                                )
                    
        self.readout = Set2Set(input_dim=node_out_feats,
                               n_iters=num_step_set2set,
                               n_layers=num_layer_set2set)
        
        self.predict = nn.Sequential(
            nn.Linear(2 * node_out_feats, node_out_feats),
            nn.ReLU(),
            nn.Linear(node_out_feats, n_tasks)
        )
    
    def forward(self, g, node_feats=None, edge_feats=None):

        # If trying to make a prediction based on smiles object (for evaluation), convert to graph
        if isinstance(g, str):

            g = graphize.smiles_to_bigraph(g,
                                       node_featurizer = graphize.featurize_atoms,
                                       edge_featurizer = graphize.featurize_bonds,
                                       explicit_hydrogens = True
                                 )
            edge_feats = g.edata['bond_feats'].float()
            node_feats = g.ndata['atom_feats'].float()
        

        node_feats = self.gnn(g, node_feats, edge_feats)

        graph_feats = self.readout(g, node_feats)

        return self.predict(graph_feats).mean()

#### Load the graphs to get dimensions for our model

In [6]:
graphs, _ = load_graphs("./DataGraphs/data_F_graph.bin")

#### Construct the model

In [7]:
# All graphs in the list have the same scheme size, so pull the dimensions from the first
node_dim = graphs[0].ndata['atom_feats'].shape[1]
edge_dim = graphs[0].edata['bond_feats'].shape[1]
print("Dimensions:", node_dim, "(node),", edge_dim, "(edge)")

Dimensions: 11 (node), 5 (edge)


In [46]:
model = MPNNPredictor(node_dim, edge_dim)
#model = Electron_MPNN(node_dim, edge_dim)

In [8]:
sample_data = graphs[3]
edges = sample_data.edata['bond_feats'].float()
nodes = sample_data.ndata['atom_feats'].float()

In [72]:
model(sample_data, nodes, edges)

RuntimeError: mat1 and mat2 shapes cannot be multiplied (26x11 and 6x64)

In [20]:
# Things to experiment with: SumPooling layer? Kind of like a conv, can sum up neighbors and find important structural properties of molecule
# from dgl.nn import SumPooling

# Also a good read here (https://arxiv.org/pdf/1710.10370.pdf) on Topology Adaptive Graph Convolutional layer might be useful.
# From my takeaways, we can use this "topology" to better track/weight the important parts of the structure
# (maybe the center of the molecule is important, or specific edge portions of the molecule) TAG can find this

# SAGEConv

# Ultimately, the NNConv layer, as used in the MPNN model, is the best one for this situation.
# We have come to this conclusion after trying various other convolution modules
# and finally reading up on this paper which relates closely to what we are doing (https://arxiv.org/pdf/1704.01212.pdf)
# (Quantum Chemistry computations)

# Note they used GatedGraphConv first and that worked okay but not as well as NNConv... we could look to backtrack to GGC
class Electron_MPNN_old(nn.Module):
    def __init__(self, node_in_feats, edge_in_feats, node_out_feats=1,
                 edge_hidden_feats=128):
        super(Electron_MPNN_old, self).__init__()

        # Projection
        self.fc1 = nn.Sequential(
            nn.Linear(node_in_feats, node_out_feats),
            nn.ReLU()
        )
    
        # Multi-iteration Convolution
#         self.message_passing_steps = message_passing_steps
        edge_func = nn.Sequential(
            nn.Linear(edge_in_feats, edge_hidden_feats),
            nn.ReLU(),
            nn.Linear(edge_hidden_feats, node_out_feats * node_out_feats)
        )
        
        self.gnn_layer = NNConv(
            in_feats=node_out_feats,
            out_feats=node_out_feats,
            edge_func=edge_func,
            aggregator_type='sum'
        )
        
        # GRU used in MPNN to get rid of vanishing gradient on Conv layer
#         self.gru = nn.GRU(node_out_feats, node_out_feats)
        
#         self.fc2 = nn.Linear(out_dim, out_dim)


    def forward(self, g, node_feats=None, edge_feats=None):
        # If trying to make a prediction based on smiles object (for evaluation), convert to graph
        if isinstance(g, str):
            g = graphize.smiles_to_bigraph(g,
                                       node_featurizer = graphize.featurize_atoms,
                                       edge_featurizer = graphize.featurize_bonds,
                                       explicit_hydrogens = True
                                 )
            edge_feats = g.edata['bond_feats'].float()
            node_feats = g.ndata['atom_feats'].float()
        
        node_feats = self.fc1(node_feats) # (V, node_out_feats)
        
        hidden_feats = node_feats.unsqueeze(0)           # (1, V, node_out_feats)
        
        node_feats = self.gnn_layer(g, node_feats, edge_feats)
#         node_feats, hidden_feats = self.gru(node_feats.unsqueeze(0), hidden_feats)
#         node_feats = node_feats.squeeze(0)
    
        return [node_feats.mean()]

In [21]:
model2 = Electron_MPNN_old(node_dim, edge_dim)
model2.load_state_dict(torch.load("SavedModels/11.5k_electron_custom.pth")["state_dict"])

<All keys matched successfully>

In [22]:
model2(sample_data, nodes, edges)

[tensor(-2862.0132, grad_fn=<MeanBackward0>)]

In [23]:
from score import test

In [24]:
test(model2, "Electron")

0 / 1291
20 / 1291
40 / 1291
60 / 1291
80 / 1291
100 / 1291
120 / 1291
140 / 1291
160 / 1291
180 / 1291
200 / 1291
220 / 1291
240 / 1291
260 / 1291
280 / 1291
300 / 1291
320 / 1291
340 / 1291
360 / 1291
380 / 1291
400 / 1291
420 / 1291
440 / 1291
460 / 1291
480 / 1291
500 / 1291
520 / 1291
540 / 1291
560 / 1291
580 / 1291
600 / 1291
620 / 1291
640 / 1291
660 / 1291
680 / 1291
700 / 1291
720 / 1291
740 / 1291
760 / 1291
780 / 1291
800 / 1291
820 / 1291
840 / 1291
860 / 1291
880 / 1291
900 / 1291
920 / 1291
940 / 1291
960 / 1291
980 / 1291
1000 / 1291
1020 / 1291
1040 / 1291
1060 / 1291
1080 / 1291
1100 / 1291
1120 / 1291
1140 / 1291
1160 / 1291
1180 / 1291
1200 / 1291
1220 / 1291
1240 / 1291
1260 / 1291
1280 / 1291


18397.111409629724

In [25]:
torch.cuda.is_available()

False