In [1]:
import numpy as np

In [2]:
import dgl

import numpy as np
import os
import socket
import time
import random
import glob
import argparse, json
import pickle

import torch
import torch.nn as nn
import torch.nn.functional as F

import torch.optim as optim
from torch.utils.data import DataLoader

from tensorboardX import SummaryWriter
from tqdm import tqdm

Using backend: pytorch


In [3]:
os.getcwd()

'/users/eleves-a/2020/abdellah.el-mrini/graphtransformer'

In [4]:
from nets.SBMs_node_classification.graph_transformer_net import GraphTransformerNet
from train.train_SBMs_node_classification import evaluate_network
import json

In [5]:
from data.data import LoadData 
from nets.SBMs_node_classification.load_net import gnn_model 
from tensorboardX import SummaryWriter

In [6]:
config_file = 'configs/SBMs_GraphTransformer_LapPE_CLUSTER_500k_sparse_graph_LN.json'

with open(config_file, 'r') as f:
    config = json.load(f)

In [7]:
net_params = config['net_params']
net_params['in_dim']= 7
net_params['n_classes'] = 6

In [8]:
def gpu_setup(use_gpu, gpu_id):
    os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
    os.environ["CUDA_VISIBLE_DEVICES"] = str(gpu_id)  

    if torch.cuda.is_available() and use_gpu:
        print('cuda available with GPU:',torch.cuda.get_device_name(0))
        device = torch.device("cuda")
    else:
        print('cuda not available')
        device = torch.device("cpu")
    return device

In [9]:
device = gpu_setup(config['gpu']['use'], config['gpu']['id'])

cuda available with GPU: Quadro P2200


In [10]:
net_params['device'] = device

In [11]:
model = GraphTransformerNet(net_params)

In [12]:
model.parameters()

<generator object Module.parameters at 0x7f2a9e57e1d0>

In [13]:
model.load_state_dict(torch.load('out/ModelsParams/epoch_82.pkl'))

<All keys matched successfully>

In [14]:
model.eval()

GraphTransformerNet(
  (embedding_lap_pos_enc): Linear(in_features=10, out_features=80, bias=True)
  (embedding_h): Embedding(7, 80)
  (in_feat_dropout): Dropout(p=0.0, inplace=False)
  (layers): ModuleList(
    (0): GraphTransformerLayer(in_channels=80, out_channels=80, heads=8, residual=True)
    (1): GraphTransformerLayer(in_channels=80, out_channels=80, heads=8, residual=True)
    (2): GraphTransformerLayer(in_channels=80, out_channels=80, heads=8, residual=True)
    (3): GraphTransformerLayer(in_channels=80, out_channels=80, heads=8, residual=True)
    (4): GraphTransformerLayer(in_channels=80, out_channels=80, heads=8, residual=True)
    (5): GraphTransformerLayer(in_channels=80, out_channels=80, heads=8, residual=True)
    (6): GraphTransformerLayer(in_channels=80, out_channels=80, heads=8, residual=True)
    (7): GraphTransformerLayer(in_channels=80, out_channels=80, heads=8, residual=True)
    (8): GraphTransformerLayer(in_channels=80, out_channels=80, heads=8, residual=True)


In [15]:
def view_model_param(MODEL_NAME, net_params):
    model = gnn_model(MODEL_NAME, net_params)
    total_param = 0
    print("MODEL DETAILS:\n")
    #print(model)
    for param in model.parameters():
        # print(param.data.size())
        total_param += np.prod(list(param.data.size()))
    print('MODEL/Total parameters:', MODEL_NAME, total_param)
    return total_param


In [16]:
def eval_pipeline(MODEL_NAME, dataset, params, net_params, dirs):
    
    start0 = time.time()
    per_epoch_time = []
    
    DATASET_NAME = dataset.name
    
    if net_params['lap_pos_enc']:
        st = time.time()
        print("[!] Adding Laplacian positional encoding.")
        dataset._add_laplacian_positional_encodings(net_params['pos_enc_dim'])
        print('Time LapPE:',time.time()-st)
        
    if net_params['wl_pos_enc']:
        st = time.time()
        print("[!] Adding WL positional encoding.")
        dataset._add_wl_positional_encodings()
        print('Time WL PE:',time.time()-st)
    
    if net_params['full_graph']:
        st = time.time()
        print("[!] Converting the given graphs to full graphs..")
        dataset._make_full_graph()
        print('Time taken to convert to full graphs:',time.time()-st)
        
    trainset, valset, testset = dataset.train, dataset.val, dataset.test
        
    root_log_dir, root_ckpt_dir, write_file_name, write_config_file = dirs
    device = net_params['device']
    
    # Write network and optimization hyper-parameters in folder config/
    with open(write_config_file + '.txt', 'w') as f:
        f.write("""Dataset: {},\nModel: {}\n\nparams={}\n\nnet_params={}\n\n\nTotal Parameters: {}\n\n"""                .format(DATASET_NAME, MODEL_NAME, params, net_params, net_params['total_param']))
        
    log_dir = os.path.join(root_log_dir, "RUN_" + str(0))
    writer = SummaryWriter(log_dir=log_dir)

    # setting seeds
    random.seed(params['seed'])
    np.random.seed(params['seed'])
    torch.manual_seed(params['seed'])
    if device.type == 'cuda':
        torch.cuda.manual_seed(params['seed'])
    
    print("Training Graphs: ", len(trainset))
    print("Validation Graphs: ", len(valset))
    print("Test Graphs: ", len(testset))
    print("Number of Classes: ", net_params['n_classes'])

    model = gnn_model(MODEL_NAME, net_params)
    model = model.to(device)

    model.load_state_dict(torch.load('out/ModelsParams/epoch_82.pkl'))
    
    epoch_train_losses, epoch_val_losses = [], []
    epoch_train_accs, epoch_val_accs = [], [] 
    
    # import train and evaluate functions
    from train.train_SBMs_node_classification import evaluate_network 

    train_loader = DataLoader(trainset, batch_size=params['batch_size'], shuffle=True, collate_fn=dataset.collate)
    val_loader = DataLoader(valset, batch_size=params['batch_size'], shuffle=False, collate_fn=dataset.collate)
    test_loader = DataLoader(testset, batch_size=params['batch_size'], shuffle=False, collate_fn=dataset.collate)
        
    # At any point you can hit Ctrl + C to break out of training early.
    try:
        with tqdm(range(params['epochs'])) as t:
            for epoch in t:

                t.set_description('Epoch %d' % epoch)

                start = time.time()

                epoch_train_loss, epoch_train_acc = evaluate_network(model, device, train_loader, epoch)
                    
                epoch_val_loss, epoch_val_acc = evaluate_network(model, device, val_loader, epoch)
                _, epoch_test_acc = evaluate_network(model, device, test_loader, epoch)        
                
                epoch_train_losses.append(epoch_train_loss)
                epoch_val_losses.append(epoch_val_loss)
                epoch_train_accs.append(epoch_train_acc)
                epoch_val_accs.append(epoch_val_acc)

                writer.add_scalar('train/_loss', epoch_train_loss, epoch)
                writer.add_scalar('val/_loss', epoch_val_loss, epoch)
                writer.add_scalar('train/_acc', epoch_train_acc, epoch)
                writer.add_scalar('val/_acc', epoch_val_acc, epoch)
                writer.add_scalar('test/_acc', epoch_test_acc, epoch)

                

                per_epoch_time.append(time.time()-start)

                # Saving checkpoint
                ckpt_dir = os.path.join(root_ckpt_dir, "RUN_")
                if not os.path.exists(ckpt_dir):
                    os.makedirs(ckpt_dir)
                torch.save(model.state_dict(), '{}.pkl'.format(ckpt_dir + "/epoch_" + str(epoch)))

                files = glob.glob(ckpt_dir + '/*.pkl')
                for file in files:
                    epoch_nb = file.split('_')[-1]
                    epoch_nb = int(epoch_nb.split('.')[0])
                    if epoch_nb < epoch-1:
                        os.remove(file)

                    
                # Stop training after params['max_time'] hours
                if time.time()-start0 > params['max_time']*3600:
                    print('-' * 89)
                    print("Max_time for training elapsed {:.2f} hours, so stopping".format(params['max_time']))
                    break
    
    except KeyboardInterrupt:
        print('-' * 89)
        print('Exiting from training early because of KeyboardInterrupt')
    
    
    _, test_acc = evaluate_network(model, device, test_loader, epoch)
    _, train_acc = evaluate_network(model, device, train_loader, epoch)
    print("Test Accuracy: {:.4f}".format(test_acc))
    print("Train Accuracy: {:.4f}".format(train_acc))
    print("Convergence Time (Epochs): {:.4f}".format(epoch))
    print("TOTAL TIME TAKEN: {:.4f}s".format(time.time()-start0))
    print("AVG TIME PER EPOCH: {:.4f}s".format(np.mean(per_epoch_time)))

    writer.close()

    """
        Write the results in out_dir/results folder
    """
    if not os.path.exists(write_file_name):
        os.makedirs(write_file_name)
    with open(write_file_name + '.txt', 'w') as f:
        f.write("""Dataset: {},\nModel: {}\n\nparams={}\n\nnet_params={}\n\n{}\n\nTotal Parameters: {}\n\n
    FINAL RESULTS\nTEST ACCURACY: {:.4f}\nTRAIN ACCURACY: {:.4f}\n\n
    Convergence Time (Epochs): {:.4f}\nTotal Time Taken: {:.4f} hrs\nAverage Time Per Epoch: {:.4f} s\n\n\n"""\
          .format(DATASET_NAME, MODEL_NAME, params, net_params, model, net_params['total_param'],
                  test_acc, train_acc, epoch, (time.time()-start0)/3600, np.mean(per_epoch_time)))

        




In [17]:
class DotDict(dict):
    def __init__(self, **kwds):
        self.update(kwds)
        self.__dict__ = self

In [18]:
params = config['params']
net_params["batch_size"] = params['batch_size']
DATASET_NAME = config['dataset']
dataset = LoadData(DATASET_NAME)

[I] Loading dataset SBM_CLUSTER...
train, test, val sizes : 1 1000 1
[I] Finished loading.
[I] Data load time: 0.5012s


In [19]:
out_dir = config['out_dir']
MODEL_NAME =  config['model']
root_log_dir = out_dir + 'logs/' + MODEL_NAME + "_" + DATASET_NAME + "_GPU" + str(config['gpu']['id']) + "_" + time.strftime('%Hh%Mm%Ss_on_%b_%d_%Y')
root_ckpt_dir = out_dir + 'checkpoints/' + MODEL_NAME + "_" + DATASET_NAME + "_GPU" + str(config['gpu']['id']) + "_" + time.strftime('%Hh%Mm%Ss_on_%b_%d_%Y')
write_file_name = out_dir + 'results/result_' + MODEL_NAME + "_" + DATASET_NAME + "_GPU" + str(config['gpu']['id']) + "_" + time.strftime('%Hh%Mm%Ss_on_%b_%d_%Y')
write_config_file = out_dir + 'configs/config_' + MODEL_NAME + "_" + DATASET_NAME + "_GPU" + str(config['gpu']['id']) + "_" + time.strftime('%Hh%Mm%Ss_on_%b_%d_%Y')
if not os.path.exists(out_dir + 'results'):
    os.makedirs(out_dir + 'results')
dirs = root_log_dir, root_ckpt_dir, write_file_name, write_config_file


In [20]:
params["epochs"] = 1

In [21]:
dataset.train

<data.SBMs.load_SBMsDataSetDGL at 0x7f2a9e504e50>

In [22]:
net_params['total_param'] = view_model_param(MODEL_NAME, net_params)

MODEL DETAILS:

MODEL/Total parameters: GraphTransformer 524026


In [23]:
eval_pipeline(MODEL_NAME, dataset, params, net_params, dirs)

[!] Adding Laplacian positional encoding.



	DGLGraph.adjacency_matrix(transpose, scipy_fmt="csr").

Epoch 0:   0%|          | 0/1 [00:00<?, ?it/s]

Time LapPE: 3.369917392730713
Training Graphs:  1
Validation Graphs:  1
Test Graphs:  1000
Number of Classes:  6


	nonzero()
Consider using one of the following signatures instead:
	nonzero(*, bool as_tuple) (Triggered internally at  /opt/conda/conda-bld/pytorch_1595629427478/work/torch/csrc/utils/python_arg_parser.cpp:766.)
  label_count = label_count[label_count.nonzero()].squeeze()
Epoch 0: 100%|██████████| 1/1 [00:07<00:00,  7.70s/it]


Test Accuracy: 19.3180
Train Accuracy: 19.4812
Convergence Time (Epochs): 0.0000
TOTAL TIME TAKEN: 18.2441s
AVG TIME PER EPOCH: 7.6739s


In [24]:
if not os.path.exists(out_dir + 'results'):
    os.makedirs(out_dir + 'results')
        
if not os.path.exists(out_dir + 'configs'):
    os.makedirs(out_dir + 'configs')
    

In [25]:
os.getcwd()

'/users/eleves-a/2020/abdellah.el-mrini/graphtransformer'

In [26]:
write_config_file

'out/SBMs_sparse_LapPE_LN/configs/config_GraphTransformer_SBM_CLUSTER_GPU0_13h22m24s_on_Mar_07_2022'