### In this file, we will conduct all of our tests

In [1]:
!pip install nbimporter



In [1]:
import pandas as pd
import numpy as np
import torch
from dgl.dataloading.pytorch import GraphDataLoader
from tqdm.notebook import tqdm
import nbimporter
import dataset as ds
import model as mfile
from score import test

Using backend: pytorch


In [2]:
train_dataset = ds.SyntheticDataset()
batch_size = 1

# We want batch size to be 1 because do not want batched graphs (as this is not the correct structure of our individual molecules)
train_dataloader = GraphDataLoader(train_dataset, batch_size = batch_size, shuffle = False)

In [3]:
from sklearn.metrics import mean_absolute_error as MAE

def train(model, epochs, file_name='electron_mpnn.pth', output=False, debug_batch_interval=5):
    optimizer = torch.optim.Adam(model.parameters(),lr=0.01)
    
    best_mae = None
    model.train()
    for epoch in tqdm(range(epochs), position=0, desc="Epochs"):
        
        running, ct = 0, 0
        print('Epoch', epoch+1)
        for batch_idx, (graph, label) in tqdm(enumerate(train_dataloader), position=1, desc="Batches", total=len(train_dataloader) * batch_size):
            optimizer.zero_grad()

            bf = graph.edata['bond_feats'].float()
            af = graph.ndata['atom_feats'].float()
            y_pred = model(graph, af, bf)
            print(y_pred)
            
#             if y_pred.item() == 0:
#                 print(batch_idx, "pred = 0")
#             if y_pred.item() == 0 and len(DEBUG_PREDS) > 5 and sum(DEBUG_PREDS[-5:-1]) == 0:
#                 print("Cut")
#                 return
            
            # The 23.06 is the same value used in score.py (conversion to kcal/mol)
            # L1 is MAE, L2 is MSE
            loss = torch.nn.functional.l1_loss(y_pred.reshape(1), label) * 23.06 # ((y_pred.reshape(1,-1) - batch_y)**2).sum()
            running += loss.item()
            ct += 1
            loss.backward()
            optimizer.step()
            
            # Every debug_batch_interval iterations, print the data we've churned through (iterations * data per batch)
            if output and batch_idx % (len(train_dataloader) // debug_batch_interval) == 0:                
                print('Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.0f}'.format(
                          epoch, batch_idx, len(train_dataloader) * batch_size,
                          100. * batch_idx / len(train_dataloader), loss.item()))
                
        this_loss = running / ct
        if output:
            print("\nAverage Loss:", round(running / ct * 100) / 100.0,"\n")
        else:
            print("Epoch", epoch+1, "Average Loss:", round(this_loss * 100) / 100.0)
            
        # Save our model
        if not best_mae:
            best_mae = this_loss
            checkpoint = {'state_dict': model.state_dict(),'optimizer': optimizer.state_dict()}
            torch.save(checkpoint, file_name)
        if this_loss < best_mae:
            best_mae = this_loss
            torch.save(checkpoint, file_name)

#### Create and Train Model

In [4]:
# All graphs in the list have the same scheme size, so pull the dimensions from the first
node_dim = train_dataset[0][0].ndata['atom_feats'].shape[1]
edge_dim = train_dataset[0][0].edata['bond_feats'].shape[1]
print("Dimensions:", node_dim, "(node),", edge_dim, "(edge)")

Dimensions: 11 (node), 5 (edge)


In [6]:
model = mfile.Electron_MPNN(node_dim, edge_dim, out_dim=1)

# Attempt to load model if electron_mpnn.pth exists (check with os)

#### Our Model

Basic Description: \
Our model follows a similar architecture as the MPNN model. It consists of a two linear layers (one at the front, one at the end), a convolution layer, and a GRU layer.

- **fc1**: This linear + relu is our first "line of attack," looking for connectings between our data before we lose information on individual atoms via convolution
- **gnn_layer**: This layer uses convolution involving two hidden layers to try and grab information about neighbors in an efficient manner
- **gru**: To be completely honest, I am not entirely sure I understand GRUs. However, it has appeared effective in the MPNN model and thus we will be experimenting/researching with it to better understand and determine its value for our model
- **fc2**: This fully-connected layer serves as our final decision maker, projecting back into 1 dimension (granted there is only 1 dimension at this point anyways) and trying to making sense of the previously convoluted data

In [7]:
print(model)

Electron_MPNN(
  (fc1): Sequential(
    (0): Linear(in_features=11, out_features=1, bias=True)
    (1): ReLU()
  )
  (gnn_layer): NNConv(
    (edge_func): Sequential(
      (0): Linear(in_features=5, out_features=128, bias=True)
      (1): ReLU()
      (2): Linear(in_features=128, out_features=1, bias=True)
    )
  )
  (gru): GRU(1, 1)
  (fc2): Sequential(
    (0): Linear(in_features=1, out_features=1, bias=True)
  )
)


In [6]:
train(model, 1, output=True)

Epochs:   0%|          | 0/1 [00:00<?, ?it/s]

Epoch 1


Batches:   0%|          | 0/1210 [00:00<?, ?it/s]

tensor(-2925.4392, grad_fn=<MeanBackward0>)
tensor(-2925.4592, grad_fn=<MeanBackward0>)
tensor(-2925.4790, grad_fn=<MeanBackward0>)
tensor(-2925.4990, grad_fn=<MeanBackward0>)
tensor(-2925.5076, grad_fn=<MeanBackward0>)
tensor(-2925.5085, grad_fn=<MeanBackward0>)
tensor(-2925.5056, grad_fn=<MeanBackward0>)
tensor(-2925.5068, grad_fn=<MeanBackward0>)
tensor(-2925.5117, grad_fn=<MeanBackward0>)
tensor(-2925.5188, grad_fn=<MeanBackward0>)
tensor(-2925.5212, grad_fn=<MeanBackward0>)
tensor(-2925.5210, grad_fn=<MeanBackward0>)
tensor(-2925.5181, grad_fn=<MeanBackward0>)
tensor(-2925.5127, grad_fn=<MeanBackward0>)
tensor(-2925.5056, grad_fn=<MeanBackward0>)
tensor(-2925.4963, grad_fn=<MeanBackward0>)
tensor(-2925.4863, grad_fn=<MeanBackward0>)
tensor(-2925.4744, grad_fn=<MeanBackward0>)
tensor(-2925.4624, grad_fn=<MeanBackward0>)
tensor(-2925.4485, grad_fn=<MeanBackward0>)
tensor(-2925.4343, grad_fn=<MeanBackward0>)
tensor(-2925.4197, grad_fn=<MeanBackward0>)
tensor(-2925.4045, grad_fn=<Mean

KeyboardInterrupt: 

In [None]:
# Min loss = 89,224,320.50206168
# Min loss = 81,948,332

# Epoch: 24 [0/1210 (0%)]	Loss: 10789
# Epoch: 24 [242/1210 (20%)]	Loss: 28988
# Epoch: 24 [484/1210 (40%)]	Loss: 13231
# Epoch: 24 [726/1210 (60%)]	Loss: 13318
# Epoch: 24 [968/1210 (80%)]	Loss: 735

# Average Loss: 12541.95

### Load best model

In [5]:
best_model = mfile.Electron_MPNN(node_dim, edge_dim, out_dim=1)
best_model.load_state_dict(torch.load("electron_mpnn_v1_ReLU.pth")["state_dict"])

<All keys matched successfully>

In [43]:
best_model.fc2.weight

Parameter containing:
tensor([[1458.2877]], requires_grad=True)