In [1]:
import dgl

import numpy as np
import os
import socket
import time
import random
import glob
import argparse, json
import pickle

import torch
import torch.nn as nn
import torch.nn.functional as F

import torch.optim as optim
from torch.utils.data import DataLoader

from tensorboardX import SummaryWriter
from tqdm import tqdm

class DotDict(dict):
    def __init__(self, **kwds):
        self.update(kwds)
        self.__dict__ = self
        
# """
#     AUTORELOAD IPYTHON EXTENSION FOR RELOADING IMPORTED MODULES
# """

def in_ipynb():
    try:
        cfg = get_ipython().config 
        return True
    except NameError:
        return False
    
notebook_mode = in_ipynb()
print(notebook_mode)

if notebook_mode == True:
    %load_ext autoreload
    %autoreload 2
    

"""
    IMPORTING CUSTOM MODULES/METHODS
"""
from nets.molecules_graphs_regression.load_net import gnn_model # import all GNNS
from data.data import LoadData # import dataset

"""
    GPU Setup
"""
def gpu_setup(use_gpu, gpu_id):
    os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
    os.environ["CUDA_VISIBLE_DEVICES"] = str(gpu_id)  

    if torch.cuda.is_available() and use_gpu:
        print('cuda available with GPU:',torch.cuda.get_device_name(0))
        device = torch.device("cuda")
    else:
        print('cuda not available')
        device = torch.device("cpu")
    return device


# select GPU or CPU
# use_gpu = True; gpu_id = 0; device = None # default GPU
use_gpu = False; gpu_id = -1; device = None # CPU


Using backend: pytorch


True


In [2]:
DATASET_NAME = 'ZINC_reformatted'
print("[I] Loading data (notebook) ...")
dataset = LoadData(DATASET_NAME)
trainset, valset, testset = dataset.train, dataset.val, dataset.test
print("[I] Finished loading.")

[I] Loading data (notebook) ...
[I] Loading dataset ZINC_reformatted...
train, test, val sizes : 10000 1000 1000
[I] Finished loading.
[I] Data load time: 13.1682s
[I] Finished loading.


In [3]:
def compare_models(MODEL_NAME, dataset, params, net_params,seed):
    DATASET_NAME = dataset.name

    if MODEL_NAME in ['GCN']:
        if net_params['self_loop']:
            print("[!] Adding graph self-loops for GCN/GAT models (central node trick).")
            dataset._add_self_loops()
                
    device = net_params['device']

    epoch = 1
        
    # setting seeds
    random.seed(params['seed'])
    np.random.seed(params['seed'])
    torch.manual_seed(params['seed'])
    if device.type == 'cuda':
        torch.cuda.manual_seed(params['seed'])

    model_r = gnn_model(MODEL_NAME, net_params)
    model_r = model_r.to(device)
    model_r.load_state_dict(torch.load('models\\' + MODEL_NAME + '_' + str(seed) + '_state_dict.pt', map_location=torch.device('cpu')))
    model_r.eval()

    # batching exception for Diffpool
    drop_last = False

    from train.train_molecules_graph_regression import train_epoch_sparse as train_epoch, evaluate_network_sparse as evaluate_network

    test_loader_r = DataLoader(dataset.test, batch_size=params['batch_size'], shuffle=False, drop_last=drop_last, collate_fn=dataset.collate)
    _, test_mae_r = evaluate_network(model_r, device, test_loader_r, epoch)
    print(seed)
    print(test_mae_r)
#         dataset_r = LoadData('ZINC_reduced_reformatted')
        
#         model_r = gnn_model(MODEL_NAME, net_params)
#         model_r = model_r.to(device)
#         model_r.load_state_dict(torch.load('models\\' + MODEL_NAME + '_original_' + str(seed) + '_state_dict.pt', map_location=torch.device('cpu')))
#         model_r.eval()
        
#         test_loader_r = DataLoader(dataset_r.test, batch_size=params['batch_size'], shuffle=False, drop_last=drop_last, collate_fn=dataset.collate)
#         _, test_mae_r = evaluate_network(model_r, device, test_loader_r, epoch)

"""
    VIEWING MODEL CONFIG AND PARAMS
"""
def view_model_param(MODEL_NAME, net_params):
    model = gnn_model(MODEL_NAME, net_params)
    total_param = 0
    print("MODEL DETAILS:\n")
    #print(model)
    for param in model.parameters():
        # print(param.data.size())
        total_param += np.prod(list(param.data.size()))
    print('MODEL/Total parameters:', MODEL_NAME, total_param)
    return total_param

In [4]:
def main(MODEL_NAME, seed):
    
#     MODEL_NAME = models[i]
    
    n_heads = -1
    edge_feat = False
    pseudo_dim_MoNet = -1
    kernel = -1
    gnn_per_block = -1
    embedding_dim = -1
    pool_ratio = -1
    n_mlp_GIN = -1
    gated = False
    self_loop = False
    #self_loop = True
    max_time = 12
    pos_enc = True
    #pos_enc = False
    pos_enc_dim = 8
    net_params = {}

    if MODEL_NAME == 'GCN':
#         seed=seeds[i]; 
        epochs=1000; batch_size=5; init_lr=5e-5; lr_reduce_factor=0.5; lr_schedule_patience=25; min_lr = 1e-6; weight_decay=0
        L=4; hidden_dim=145; out_dim=hidden_dim; in_dim=28; edim = 1; dropout=0.0; readout='mean'; dgl_builtin = True
        net_params['edim'] = 1

    if MODEL_NAME == 'GIN':
#         seed=seeds[i]; 
        epochs=1000; batch_size=50; init_lr=5e-4; lr_reduce_factor=0.5; lr_schedule_patience=25; min_lr = 1e-6; weight_decay=0
        L=4; hidden_dim=110; out_dim=hidden_dim; in_dim=28; dropout=0.0; readout='mean'
        n_mlp_GIN = 2; learn_eps_GIN=True; neighbor_aggr_GIN='sum'; dgl_builtin = True

    if MODEL_NAME == 'GraphSage':
#         seed=seeds[i]; 
        epochs=1000; batch_size=50; init_lr=5e-5; lr_reduce_factor=0.5; lr_schedule_patience=25; min_lr = 1e-6; weight_decay=0
        L=4; hidden_dim=108; out_dim=hidden_dim; in_dim=28; dropout=0.0; readout='mean'; dgl_builtin = True


    net_params['num_atom_type'] = 28
    net_params['num_bond_type'] = 4
    net_params['residual'] = True
    net_params['hidden_dim'] = hidden_dim
    net_params['out_dim'] = out_dim
    net_params['in_dim'] = in_dim
    net_params['n_heads'] = n_heads
    net_params['L'] = L  # min L should be 2
    net_params['readout'] = "sum"
    net_params['layer_norm'] = True
    net_params['batch_norm'] = True
    net_params['in_feat_dropout'] = 0.0
    net_params['dropout'] = 0.0
    net_params['edge_feat'] = edge_feat
    net_params['self_loop'] = self_loop
    net_params['dgl_builtin'] = dgl_builtin

    # for MLPNet 
    net_params['gated'] = gated  

    # specific for MoNet
    net_params['pseudo_dim_MoNet'] = pseudo_dim_MoNet
    net_params['kernel'] = kernel

    # specific for GIN
    net_params['n_mlp_GIN'] = n_mlp_GIN
    net_params['learn_eps_GIN'] = True
    net_params['neighbor_aggr_GIN'] = 'sum'

    # specific for graphsage
    net_params['sage_aggregator'] = 'mean'    

    # specific for diffpoolnet
    net_params['data_mode'] = 'default'
    net_params['gnn_per_block'] = gnn_per_block
    net_params['embedding_dim'] = embedding_dim     
    net_params['pool_ratio'] = pool_ratio
    net_params['linkpred'] = True
    net_params['num_pool'] = 1
    net_params['cat'] = False
    net_params['batch_size'] = batch_size   

    # specific for RingGNN
    net_params['radius'] = 2
    num_nodes = [trainset[i][0].number_of_nodes() for i in range(len(trainset))]
    net_params['avg_node_num'] = int(np.ceil(np.mean(num_nodes)))

    # specific for 3WLGNN
    net_params['depth_of_mlp'] = 2

    # calculate assignment dimension: pool_ratio * largest graph's maximum
    # number of nodes  in the dataset
    max_num_node = max(num_nodes)
    net_params['assign_dim'] = int(max_num_node * net_params['pool_ratio']) * net_params['batch_size']

    # specific for pos_enc_dim
    net_params['pos_enc'] = pos_enc
    net_params['pos_enc_dim'] = pos_enc_dim

    view_model_param(MODEL_NAME, net_params)

    config = {}
    # gpu config
    gpu = {}
    gpu['use'] = use_gpu
    gpu['id'] = gpu_id
    config['gpu'] = gpu
    # GNN model, dataset, out_dir
    config['model'] = MODEL_NAME
    config['dataset'] = DATASET_NAME
#     config['out_dir'] = out_dir
    # parameters
    params = {}
    params['seed'] = seed
    params['epochs'] = epochs
    params['batch_size'] = batch_size
    params['init_lr'] = init_lr
    params['lr_reduce_factor'] = lr_reduce_factor 
    params['lr_schedule_patience'] = lr_schedule_patience
    params['min_lr'] = min_lr
    params['weight_decay'] = weight_decay
    params['print_epoch_interval'] = 5
    params['max_time'] = max_time
    config['params'] = params
    # network parameters
    config['net_params'] = net_params

    # parameters
    params = config['params']

    # dataset
#     DATASET_NAME = config['dataset']
#     dataset = LoadData(DATASET_NAME)

    # device
    device = gpu_setup(config['gpu']['use'], config['gpu']['id'])
#     out_dir = config['out_dir']

    # GNN model
    MODEL_NAME = config['model']
    # network parameters
    net_params = config['net_params']
#     net_params['device'] = device
    net_params['device'] = device
    net_params['gpu_id'] = config['gpu']['id']
    net_params['batch_size'] = params['batch_size']

    # ZINC
    net_params['num_atom_type'] = dataset.num_atom_type
    net_params['num_bond_type'] = dataset.num_bond_type

    net_params['total_param'] = view_model_param(MODEL_NAME, net_params)
    compare_models(MODEL_NAME, dataset, params, net_params, seed)

In [5]:
# models = ['GCN', 'GCN', 'GCN', 'GCN', 'GCN', 'GraphSage', 'GraphSage', 'GraphSage', 'GraphSage', 'GraphSage', 'GIN', 'GIN', 'GIN', 'GIN', 'GIN']
# seeds = [40, 41, 42, 43, 44, 40, 41, 42, 43, 44, 40, 41, 42, 43, 44]

main('GraphSage', 40)

MODEL DETAILS:

MODEL/Total parameters: GraphSage 105571
cuda not available
MODEL DETAILS:

MODEL/Total parameters: GraphSage 105571


RuntimeError: Error(s) in loading state_dict for GraphSageNet:
	Missing key(s) in state_dict: "layers.0.sageconv.fc_self.bias", "layers.0.sageconv.fc_neigh.bias", "layers.1.sageconv.fc_self.bias", "layers.1.sageconv.fc_neigh.bias", "layers.2.sageconv.fc_self.bias", "layers.2.sageconv.fc_neigh.bias", "layers.3.sageconv.fc_self.bias", "layers.3.sageconv.fc_neigh.bias". 
	Unexpected key(s) in state_dict: "layers.0.sageconv.bias", "layers.1.sageconv.bias", "layers.2.sageconv.bias", "layers.3.sageconv.bias". 

In [7]:
main('GCN', 40)
main('GCN', 41)
main('GCN', 42)
main('GCN', 43)
main('GCN', 44)

MODEL DETAILS:

MODEL/Total parameters: GCN 103222
cuda not available
MODEL DETAILS:

MODEL/Total parameters: GCN 103222
40
1.4515155777335167
MODEL DETAILS:

MODEL/Total parameters: GCN 103222
cuda not available
MODEL DETAILS:

MODEL/Total parameters: GCN 103222
41
1.1920709024369716
MODEL DETAILS:

MODEL/Total parameters: GCN 103222
cuda not available
MODEL DETAILS:

MODEL/Total parameters: GCN 103222
42
2.1275756004452706
MODEL DETAILS:

MODEL/Total parameters: GCN 103222
cuda not available
MODEL DETAILS:

MODEL/Total parameters: GCN 103222
43
1.278443181067705
MODEL DETAILS:

MODEL/Total parameters: GCN 103222
cuda not available
MODEL DETAILS:

MODEL/Total parameters: GCN 103222
44
2.3681471332907678
