In [19]:
import torch.utils.data
from dgl.data import tu
import dgl

from core.dataloader.constants import TRAIN_RATIO, TEST_RATIO
from core.utils import read_params
from core.model import Model
import os

In [12]:
def make_data_loader(batch_size, dataset_name='AIDS', cuda=False):
    """
    Create train/val/test dataloaders
    :param batch_size: batch size (applies for train/test/val)
    :param dataset_name: dataset name, to take from TU dortmund dataset
                         (https://ls11-www.cs.tu-dortmund.de/staff/morris/graphkerneldatasets)
    :param cuda: if cuda is available
    :return: train_dataloader, val_dataloader, test_dataloader
    """

    # 1. create train/val/test datasets
    dataset = tu.LegacyTUDataset(name=dataset_name)
    #print(dataset)
    preprocess(dataset, cuda)

    train_size = int(TRAIN_RATIO * len(dataset))
    print(train_size)
    test_size = int(TEST_RATIO * len(dataset))
    print(test_size)
    val_size = int(len(dataset) - train_size - test_size)
    dataset_train, dataset_val, dataset_test = torch.utils.data.random_split(
        dataset, (train_size, val_size, test_size))

    # 2. create train/val/test dataloader
    train_dataloader = torch.utils.data.DataLoader(dataset_train,
                                                   batch_size=batch_size,
                                                   shuffle=True,
                                                   collate_fn=collate
                                                   )

    val_dataloader = torch.utils.data.DataLoader(dataset_val,
                                                 batch_size=batch_size,
                                                 shuffle=False,
                                                 collate_fn=collate
                                                 )

    test_dataloader = torch.utils.data.DataLoader(dataset_test,
                                                  batch_size=batch_size,
                                                  shuffle=False,
                                                  collate_fn=collate
                                                  )

    #return train_dataloader, val_dataloader, test_dataloader
    return dataset_train

In [13]:
def collate(data):
    """
    Collate function
    """
    graphs, labels = map(list, zip(*data))
    batched = dgl.batch(graphs)
    labels = torch.LongTensor(labels)
    return batched, labels


def preprocess(dataset, cuda):
    """
    Preprocess graphs by casting into FloatTensor and setting to cuda if available
    :param dataset: (LegacyTUDataset)
    :param cuda: (bool) if cuda is available
    :return:
    """
    for g, _ in dataset:
        for key_g, val_g in g.ndata.items():
            processed = g.ndata.pop(key_g)
            processed = processed.type('torch.FloatTensor')
            if cuda:
                processed = processed.cuda()
            g.ndata[key_g] = processed
        for key_g, val_g in g.edata.items():
            processed = g.edata.pop(key_g)
            processed = processed.type('torch.FloatTensor')
            if cuda:
                processed = processed.cuda()
            g.edata[key_g] = processed



In [5]:
dataloader, val_dataloader, test_dataloader = make_data_loader(batch_size=16)

1200
400


In [14]:
data_train = make_data_loader(batch_size=16)

1200
400


In [16]:
data_train.dataset.

<dgl.data.tu.LegacyTUDataset at 0x7f9c864c66d0>

In [6]:
train_count = 0;
val_count = 0;
test_count = 0;


for iter, (graphs, labels) in enumerate(dataloader):
    train_count = train_count+1
    print(labels)
    print(graphs)

for iter, (graphs, labels) in enumerate(val_dataloader):
    val_count = val_count+1
    #print(graphs)
    
for iter, (graphs, labels) in enumerate(test_dataloader):
    test_count = test_count+1
    #print(graphs)

tensor([0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])
Graph(num_nodes=182, num_edges=358,
      ndata_schemes={'feat': Scheme(shape=(4,), dtype=torch.float32), '_ID': Scheme(shape=(), dtype=torch.float32)}
      edata_schemes={'_ID': Scheme(shape=(), dtype=torch.float32)})
tensor([1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1])
Graph(num_nodes=261, num_edges=546,
      ndata_schemes={'feat': Scheme(shape=(4,), dtype=torch.float32), '_ID': Scheme(shape=(), dtype=torch.float32)}
      edata_schemes={'_ID': Scheme(shape=(), dtype=torch.float32)})
tensor([1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1])
Graph(num_nodes=266, num_edges=558,
      ndata_schemes={'feat': Scheme(shape=(4,), dtype=torch.float32), '_ID': Scheme(shape=(), dtype=torch.float32)}
      edata_schemes={'_ID': Scheme(shape=(), dtype=torch.float32)})
tensor([0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1])
Graph(num_nodes=227, num_edges=460,
      ndata_schemes={'feat': Scheme(shape=(4,), dtype=torch.float32), '_ID':

In [7]:
print(train_count)
print(val_count)
print(test_count)

75
25
25


In [8]:
# for iter in enumerate(dataloader):
#     print(iter)

# Explore dataset

In [8]:
dataset = tu.LegacyTUDataset(name='Letter-low')

In [11]:
dataset.graph_lists

[Graph(num_nodes=5, num_edges=6,
       ndata_schemes={'feat': Scheme(shape=(2,), dtype=torch.float64), '_ID': Scheme(shape=(), dtype=torch.int64)}
       edata_schemes={'_ID': Scheme(shape=(), dtype=torch.int64)}),
 Graph(num_nodes=4, num_edges=6,
       ndata_schemes={'feat': Scheme(shape=(2,), dtype=torch.float64), '_ID': Scheme(shape=(), dtype=torch.int64)}
       edata_schemes={'_ID': Scheme(shape=(), dtype=torch.int64)}),
 Graph(num_nodes=4, num_edges=6,
       ndata_schemes={'feat': Scheme(shape=(2,), dtype=torch.float64), '_ID': Scheme(shape=(), dtype=torch.int64)}
       edata_schemes={'_ID': Scheme(shape=(), dtype=torch.int64)}),
 Graph(num_nodes=5, num_edges=8,
       ndata_schemes={'feat': Scheme(shape=(2,), dtype=torch.float64), '_ID': Scheme(shape=(), dtype=torch.int64)}
       edata_schemes={'_ID': Scheme(shape=(), dtype=torch.int64)}),
 Graph(num_nodes=4, num_edges=4,
       ndata_schemes={'feat': Scheme(shape=(2,), dtype=torch.float64), '_ID': Scheme(shape=(), dtype=to

In [9]:
for g, _ in dataset:
    print(g)
    print (_)

Graph(num_nodes=5, num_edges=6,
      ndata_schemes={'feat': Scheme(shape=(2,), dtype=torch.float64), '_ID': Scheme(shape=(), dtype=torch.int64)}
      edata_schemes={'_ID': Scheme(shape=(), dtype=torch.int64)})
tensor(0)
Graph(num_nodes=4, num_edges=6,
      ndata_schemes={'feat': Scheme(shape=(2,), dtype=torch.float64), '_ID': Scheme(shape=(), dtype=torch.int64)}
      edata_schemes={'_ID': Scheme(shape=(), dtype=torch.int64)})
tensor(1)
Graph(num_nodes=4, num_edges=6,
      ndata_schemes={'feat': Scheme(shape=(2,), dtype=torch.float64), '_ID': Scheme(shape=(), dtype=torch.int64)}
      edata_schemes={'_ID': Scheme(shape=(), dtype=torch.int64)})
tensor(2)
Graph(num_nodes=5, num_edges=8,
      ndata_schemes={'feat': Scheme(shape=(2,), dtype=torch.float64), '_ID': Scheme(shape=(), dtype=torch.int64)}
      edata_schemes={'_ID': Scheme(shape=(), dtype=torch.int64)})
tensor(3)
Graph(num_nodes=4, num_edges=4,
      ndata_schemes={'feat': Scheme(shape=(2,), dtype=torch.float64), '_ID': Sch

tensor(2)
Graph(num_nodes=5, num_edges=8,
      ndata_schemes={'feat': Scheme(shape=(2,), dtype=torch.float64), '_ID': Scheme(shape=(), dtype=torch.int64)}
      edata_schemes={'_ID': Scheme(shape=(), dtype=torch.int64)})
tensor(14)
Graph(num_nodes=5, num_edges=6,
      ndata_schemes={'feat': Scheme(shape=(2,), dtype=torch.float64), '_ID': Scheme(shape=(), dtype=torch.int64)}
      edata_schemes={'_ID': Scheme(shape=(), dtype=torch.int64)})
tensor(0)
Graph(num_nodes=5, num_edges=6,
      ndata_schemes={'feat': Scheme(shape=(2,), dtype=torch.float64), '_ID': Scheme(shape=(), dtype=torch.int64)}
      edata_schemes={'_ID': Scheme(shape=(), dtype=torch.int64)})
tensor(6)
Graph(num_nodes=3, num_edges=4,
      ndata_schemes={'feat': Scheme(shape=(2,), dtype=torch.float64), '_ID': Scheme(shape=(), dtype=torch.int64)}
      edata_schemes={'_ID': Scheme(shape=(), dtype=torch.int64)})
tensor(2)
Graph(num_nodes=7, num_edges=10,
      ndata_schemes={'feat': Scheme(shape=(2,), dtype=torch.float64)

tensor(11)
Graph(num_nodes=2, num_edges=2,
      ndata_schemes={'feat': Scheme(shape=(2,), dtype=torch.float64), '_ID': Scheme(shape=(), dtype=torch.int64)}
      edata_schemes={'_ID': Scheme(shape=(), dtype=torch.int64)})
tensor(12)
Graph(num_nodes=5, num_edges=6,
      ndata_schemes={'feat': Scheme(shape=(2,), dtype=torch.float64), '_ID': Scheme(shape=(), dtype=torch.int64)}
      edata_schemes={'_ID': Scheme(shape=(), dtype=torch.int64)})
tensor(6)
Graph(num_nodes=3, num_edges=4,
      ndata_schemes={'feat': Scheme(shape=(2,), dtype=torch.float64), '_ID': Scheme(shape=(), dtype=torch.int64)}
      edata_schemes={'_ID': Scheme(shape=(), dtype=torch.int64)})
tensor(2)
Graph(num_nodes=6, num_edges=8,
      ndata_schemes={'feat': Scheme(shape=(2,), dtype=torch.float64), '_ID': Scheme(shape=(), dtype=torch.int64)}
      edata_schemes={'_ID': Scheme(shape=(), dtype=torch.int64)})
tensor(0)
Graph(num_nodes=5, num_edges=8,
      ndata_schemes={'feat': Scheme(shape=(2,), dtype=torch.float64)

# Run main GNN

In [20]:
#!/usr/bin/env python3
"""
Run model script.
"""
import torch
import argparse
import numpy as np

from core.dataloader.dataloader import make_data_loader
from core.utils import read_params
from core.model import Model


def main():
    
    SAVE_DIR = 'models'
    MODEL_SAVE_PATH = os.path.join(SAVE_DIR, 'mlp-cgnn_v1.pt')
    
    if not os.path.isdir(f'{SAVE_DIR}'):
        os.makedirs(f'{SAVE_DIR}')
        
        
    best_valid_loss = float('inf')
    
    # add all arguments required here:
    
    lr = 1e-3
    weight_decay = 5e-4
    n_epochs = 100
    batch_size = 16
    eval_every = 50
    config_fpath = '../gnn-min-example-master/core/config/config_file_binary.json'

    config_params = read_params(config_fpath)

#     if args.gpu < 0:
#         cuda = False
#     else:
#         cuda = True
#         torch.cuda.set_device(args.gpu)
    
    cuda = False

    print('*** Create data loader ***')
    dataloader, val_dataloader, test_dataloader = make_data_loader(
        batch_size,
        dataset_name='Letter-low',
        cuda=cuda
    )

    print('*** Create model ***')
    model = Model(config=config_params, verbose=True, cuda=cuda)
    if cuda:
        model.cuda()

    # optimizer
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=lr,
                                 weight_decay=weight_decay)
    # loss function
    loss_fcn = torch.nn.CrossEntropyLoss()

    # Start training
    print('*** Start training ***')
    step = 0
    model.train()
    losses = []
    for epoch in range(n_epochs):
        for iter, (graphs, labels) in enumerate(dataloader):

            # forward pass
            logits = model(graphs)

            # compute loss
            loss = loss_fcn(logits, labels)
            losses.append(loss.item())

            # backpropagate
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # testing
            step += 1
            if step % eval_every == 0:
                val_loss, val_acc = test(val_dataloader, model, loss_fcn)
                print(
                    "Step {:05d} | Train loss {:.4f} | Over {} | Val loss {:.4f} |"
                    "Val acc {:.4f}".format(
                        step,
                        np.mean(losses),
                        len(losses),
                        val_loss,
                        val_acc,
                    ))
                model.train()
    
    model.load_state_dict(torch.load(MODEL_SAVE_PATH))
    print('*** Start Testing ***')
    test_loss, test_acc = test(test_dataloader, model, loss_fcn)
    print("Test loss {:.4f} | Test acc {:.4f}".format(test_loss, test_acc))


def test(data_loader, model, loss_fcn):
    """
    Testing
    :param data_loader: (data.Dataloader)
    :param model: (Model)
    :param loss_fcn: (torch.nn loss)
    :return: loss, accuracy
    """
    model.eval()
    losses = []
    accuracies = []
    for iter, (graphs, labels) in enumerate(data_loader):

        logits = model(graphs)

        loss = loss_fcn(logits, labels)
        losses.append(loss.item())

        _, indices = torch.max(logits, dim=1)
        correct = torch.sum(indices == labels)
        accuracies.append(correct.item() * 1.0 / len(labels))

    return np.mean(losses), np.mean(accuracies)

In [21]:
if __name__ == '__main__':

    main()

*** Create data loader ***
*** Create model ***
Creating GNN layers...
Creating new GNN layer:
MLP layer 0 has params Sequential(
  (fc): Linear(in_features=2, out_features=2, bias=True)
  (bn): BatchNorm1d(2, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
MLP layer 1 has params Sequential(
  (fc): Linear(in_features=2, out_features=2, bias=True)
  (bn): BatchNorm1d(2, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU()
)
Creating new GNN layer:
MLP layer 0 has params Sequential(
  (fc): Linear(in_features=2, out_features=2, bias=True)
  (bn): BatchNorm1d(2, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
MLP layer 1 has params Sequential(
  (fc): Linear(in_features=2, out_features=2, bias=True)
  (bn): BatchNorm1d(2, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU()
)
Creating new GNN layer:
MLP layer 0 has params Sequential(
  (fc): Linear(in_features=2, out_features=2, bias=True)
  (bn):

RuntimeError: Assertion `cur_target >= 0 && cur_target < n_classes' failed.  at /opt/conda/conda-bld/pytorch_1570910687650/work/aten/src/THNN/generic/ClassNLLCriterion.c:97