### In this file, we will conduct all of our tests

In [10]:
import pandas as pd
import numpy as np
import torch
from dgl.dataloading.pytorch import GraphDataLoader
from tqdm.notebook import tqdm
import nbimporter
import Data_Structs as ds
import model as mfile
from score import test

In [2]:
# sample code from https://docs.dgl.ai/tutorials/blitz/3_message_passing.html#sphx-glr-tutorials-blitz-3-message-passing-py

In [3]:
def train(g, model):
    optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
    all_logits = []
    best_val_acc = 0
    best_test_acc = 0

    features = g.ndata['feat']
    labels = g.ndata['label']
    train_mask = g.ndata['train_mask']
    val_mask = g.ndata['val_mask']
    test_mask = g.ndata['test_mask']
    for e in range(200):
        # Forward
        logits = model(g, features)

        # Compute prediction
        pred = logits.argmax(1)

        # Compute loss
        # Note that we should only compute the losses of the nodes in the training set,
        # i.e. with train_mask 1.
        loss = F.cross_entropy(logits[train_mask], labels[train_mask])

        # Compute accuracy on training/validation/test
        train_acc = (pred[train_mask] == labels[train_mask]).float().mean()
        val_acc = (pred[val_mask] == labels[val_mask]).float().mean()
        test_acc = (pred[test_mask] == labels[test_mask]).float().mean()

        # Save the best validation accuracy and the corresponding test accuracy.
        if best_val_acc < val_acc:
            best_val_acc = val_acc
            best_test_acc = test_acc

        # Backward
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        all_logits.append(logits.detach())

        if e % 5 == 0:
            print('In epoch {}, loss: {:.3f}, val acc: {:.3f} (best {:.3f}), test acc: {:.3f} (best {:.3f})'.format(
                e, loss, val_acc, best_val_acc, test_acc, best_test_acc))



In [11]:
train_dataset = ds.SyntheticDataset()
train_dataloader = GraphDataLoader(train_dataset, batch_size = 16, shuffle = True)

In [17]:
def train(model, epochs, debug_rate=100):
    optimizer = torch.optim.Adam(model.parameters(),lr=0.001)
    
    mse, running = 0, 0
    for epoch in tqdm(range(epochs)):
        
        count = 0
        for batch_x, batch_y in train_dataloader:
            optimizer.zero_grad()
            
            bf = batch_x.edata['bond_feats'].float()
            af = batch_x.ndata['atom_feats'].float()
            y_pred = model(batch_x, af, bf)
            
            mse = ((y_pred.reshape(1,-1) - batch_y)**2).sum()
            running += mse.item()
            mse.backward()
            optimizer.step()
            
            
            if count % debug_rate == 0:
                print('Sample', count, 'Loss:', mse.item())
#                 printf('Sample %d/%d').format()
            count += 1
        
        return
        print('Epoch', epoch, 'Loss:', mse.item())

#### Create and Train Model

In [18]:
# All graphs in the list have the same scheme size, so pull the dimensions from the first
node_dim = train_dataset[0][0].ndata['atom_feats'].shape[1]
edge_dim = train_dataset[0][0].edata['bond_feats'].shape[1]
print("Dimensions:", node_dim, "(node),", edge_dim, "(edge)")

Dimensions: 11 (node), 5 (edge)


In [19]:
model = mfile.MPNN_Rec(node_dim, edge_dim, out_dim=1)

In [20]:
train(model, 10)

  0%|          | 0/10 [00:00<?, ?it/s]

Sample 0 Loss: tensor(1.7992e+08, dtype=torch.float64, grad_fn=<SumBackward0>)
