## EvolveGCN代码复现

* [源仓库地址](https://github.com/IBM/EvolveGCN)

**注意：**

由于使用了jupyter notebook来运行代码，部分python文件进行了相应改动。

###  导入python文件和相关库

In [1]:
import utils as u
import torch
import torch.distributed as dist
import numpy as np
import time
import random

#datasets
import bitcoin_dl as bc
import elliptic_temporal_dl as ell_temp
import uc_irv_mess_dl as ucim
import auto_syst_dl as aus
import sbm_dl as sbm
import reddit_dl as rdt


#taskers
import link_pred_tasker as lpt
import edge_cls_tasker as ect
import node_cls_tasker as nct

#models
import models as mls
import egcn_h
import egcn_o


import splitter as sp
import Cross_Entropy as ce

import trainer as tr

import logger

### 全局参数设置

In [2]:
parser = u.create_parser() # 创建参数解析器

In [3]:
parser

ArgumentParser(prog='ipykernel_launcher.py', usage=None, description=None, formatter_class=<class 'argparse.RawTextHelpFormatter'>, conflict_handler='error', add_help=True)

In [4]:
args = u.parse_args(parser)

<_io.TextIOWrapper name='experiments/parameters_example.yaml' mode='r' encoding='UTF-8'>


In [5]:
args.data_loading_params = {'batch_size': 1, 'num_workers': 0}  # 方便单计算机调试使用

In [6]:
args.model = 'egcn_o'  # 默认: egcn-O; egcn-H  # 采用的模型类型

In [7]:
args.num_epochs = 2

### 参数可视化制表

In [8]:
from texttable import Texttable

def tab_printer(args):
    args = vars(args)
    keys = sorted(args.keys())
    t = Texttable()
    rows = [["Parameter", "Value"]]
    for i in [[k.replace("_", " ").capitalize(), args[k]] for k in keys]:
        rows.append(i)
    # print(rows)
    t.add_rows(rows)
    print(t.draw())

In [9]:
tab_printer(args)

+------------------------+-----------------------------------------------------+
|       Parameter        |                        Value                        |
| Adj mat time window    | 1                                                   |
+------------------------+-----------------------------------------------------+
| Class weights          | [0.1, 0.9]                                          |
+------------------------+-----------------------------------------------------+
| Comments               | ['comments']                                        |
+------------------------+-----------------------------------------------------+
| Data                   | sbm50                                               |
+------------------------+-----------------------------------------------------+
| Data loading params    | {'batch_size': 1, 'num_workers': 0}                 |
+------------------------+-----------------------------------------------------+
| Dev proportion         | 0

### 处理器定义

In [10]:
global rank, wsize, use_cuda
args.use_cuda = (torch.cuda.is_available() and args.use_cuda)
args.device='cpu'
if args.use_cuda:
    args.device='cuda'
print ("use CUDA:", args.use_cuda, "- device:", args.device)

use CUDA: False - device: cpu


### 判断是否处于分布式环境

In [11]:
try:
    dist.init_process_group(backend='mpi') #, world_size=4
    rank = dist.get_rank()
    wsize = dist.get_world_size()
    print('Hello from process {} (out of {})'.format(dist.get_rank(), dist.get_world_size()))
    if args.use_cuda:
        torch.cuda.set_device(rank)  # are we sure of the rank+1????
        print('using the device {}'.format(torch.cuda.current_device()))
except:
    rank = 0
    wsize = 1
    print(('MPI backend not preset. Set process rank to {} (out of {})'.format(rank,
                                                                                wsize)))

MPI backend not preset. Set process rank to 0 (out of 1)


后端分布式进程组名为“mpi”, 但MPI后端并未预先设置,所以需要将进程rank设置为0,  world_size设置为1.

### 设置随机数种子

In [12]:
if args.seed is None and args.seed!='None':
    seed = 123+rank #int(time.time())+rank
else:
    seed = args.seed  #+rank; 定义随机数
print(('Running with seed: {}'.format(seed)))
np.random.seed(seed)
random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
args.seed=seed
args.rank=rank
args.wsize=wsize
print('seed:', args.seed, '; rank:',args.rank, '; wsize:',args.wsize)

Running with seed: 1234
seed: 1234 ; rank: 0 ; wsize: 1


### 设置网络参数

In [13]:
def random_param_value(param, param_min, param_max, type='int'):
	if str(param) is None or str(param).lower()=='none':
		if type=='int':
			return random.randrange(param_min, param_max+1)
		elif type=='logscale':
			interval=np.logspace(np.log10(param_min), np.log10(param_max), num=100)
			return np.random.choice(interval,1)[0]
		else:
			return random.uniform(param_min, param_max)
	else:
		return param

In [14]:
def build_random_hyper_params(args):
	# 网络模型选择
	if args.model == 'all':
		model_types = ['gcn', 'egcn_o', 'egcn_h', 'gruA', 'gruB','egcn','lstmA', 'lstmB']
		args.model=model_types[args.rank]
	elif args.model == 'all_nogcn':
		model_types = ['egcn_o', 'egcn_h', 'gruA', 'gruB','egcn','lstmA', 'lstmB']
		args.model=model_types[args.rank]
	elif args.model == 'all_noegcn3':
		model_types = ['gcn', 'egcn_h', 'gruA', 'gruB','egcn','lstmA', 'lstmB']
		args.model=model_types[args.rank]
	elif args.model == 'all_nogruA':
		model_types = ['gcn', 'egcn_o', 'egcn_h', 'gruB','egcn','lstmA', 'lstmB']
		args.model=model_types[args.rank]
		args.model=model_types[args.rank]
	elif args.model == 'saveembs':
		model_types = ['gcn', 'gcn', 'skipgcn', 'skipgcn']
		args.model=model_types[args.rank]
    
	# 设置learning rate
	args.learning_rate =random_param_value(args.learning_rate, args.learning_rate_min, args.learning_rate_max, type='logscale')
	# args.adj_mat_time_window = random_param_value(args.adj_mat_time_window, args.adj_mat_time_window_min, args.adj_mat_time_window_max, type='int')

	if args.model == 'gcn':
		args.num_hist_steps = 0
	else:  # 设置hist_steps
		args.num_hist_steps = random_param_value(args.num_hist_steps, args.num_hist_steps_min, args.num_hist_steps_max, type='int')
	
	# 设置feats_per_node
	args.gcn_parameters['feats_per_node'] =random_param_value(args.gcn_parameters['feats_per_node'], args.gcn_parameters['feats_per_node_min'], args.gcn_parameters['feats_per_node_max'], type='int')
	args.gcn_parameters['layer_1_feats'] =random_param_value(args.gcn_parameters['layer_1_feats'], args.gcn_parameters['layer_1_feats_min'], args.gcn_parameters['layer_1_feats_max'], type='int')
	if args.gcn_parameters['layer_2_feats_same_as_l1'] or args.gcn_parameters['layer_2_feats_same_as_l1'].lower()=='true':
		args.gcn_parameters['layer_2_feats'] = args.gcn_parameters['layer_1_feats']
	else:
		args.gcn_parameters['layer_2_feats'] =random_param_value(args.gcn_parameters['layer_2_feats'], args.gcn_parameters['layer_1_feats_min'], args.gcn_parameters['layer_1_feats_max'], type='int')
	args.gcn_parameters['lstm_l1_feats'] =random_param_value(args.gcn_parameters['lstm_l1_feats'], args.gcn_parameters['lstm_l1_feats_min'], args.gcn_parameters['lstm_l1_feats_max'], type='int')
	if args.gcn_parameters['lstm_l2_feats_same_as_l1'] or args.gcn_parameters['lstm_l2_feats_same_as_l1'].lower()=='true':
		args.gcn_parameters['lstm_l2_feats'] = args.gcn_parameters['lstm_l1_feats']
	else:
		args.gcn_parameters['lstm_l2_feats'] =random_param_value(args.gcn_parameters['lstm_l2_feats'], args.gcn_parameters['lstm_l1_feats_min'], args.gcn_parameters['lstm_l1_feats_max'], type='int')
	args.gcn_parameters['cls_feats']=random_param_value(args.gcn_parameters['cls_feats'], args.gcn_parameters['cls_feats_min'], args.gcn_parameters['cls_feats_max'], type='int')
	
	return args

In [15]:
# Assign the requested random hyper parameters; 设置args
args = build_random_hyper_params(args)  # 设置网络参数

In [16]:
tab_printer(args)

+------------------------+-----------------------------------------------------+
|       Parameter        |                        Value                        |
| Adj mat time window    | 1                                                   |
+------------------------+-----------------------------------------------------+
| Class weights          | [0.1, 0.9]                                          |
+------------------------+-----------------------------------------------------+
| Comments               | ['comments']                                        |
+------------------------+-----------------------------------------------------+
| Data                   | sbm50                                               |
+------------------------+-----------------------------------------------------+
| Data loading params    | {'batch_size': 1, 'num_workers': 0}                 |
+------------------------+-----------------------------------------------------+
| Dev proportion         | 0

### 创建数据集

In [17]:
def build_dataset(args):  # 数据集: 'sbm50'
	if args.data == 'bitcoinotc' or args.data == 'bitcoinalpha':
		if args.data == 'bitcoinotc':
			args.bitcoin_args = args.bitcoinotc_args
		elif args.data == 'bitcoinalpha':
			args.bitcoin_args = args.bitcoinalpha_args
		return bc.bitcoin_dataset(args)
	# elif args.data == 'aml_sim':
	# 	return aml.Aml_Dataset(args)
	# elif args.data == 'elliptic':
	# 	return ell.Elliptic_Dataset(args)
	# elif args.data == 'elliptic_temporal':
	# 	return ell_temp.Elliptic_Temporal_Dataset(args)
	# elif args.data == 'uc_irv_mess':
	# 	return ucim.Uc_Irvine_Message_Dataset(args)
	# elif args.data == 'dbg':
	# 	return dbg.dbg_dataset(args)
	# elif args.data == 'colored_graph':
	# 	return cg.Colored_Graph(args)
	# elif args.data == 'autonomous_syst':
	# 	return aus.Autonomous_Systems_Dataset(args)
	# elif args.data == 'reddit':
	# 	return rdt.Reddit_Dataset(args)
	elif args.data.startswith('sbm'):
		if args.data == 'sbm20':
			args.sbm_args = args.sbm20_args
		elif args.data == 'sbm50':
			args.sbm_args = args.sbm50_args  # 文件路径
			print(args.sbm_args)
		return sbm.sbm_dataset(args)  # 读取数据
	
	else:
		raise NotImplementedError('only arxiv has been implemented')


In [18]:
import warnings
warnings.filterwarnings("ignore")

# build the dataset
dataset = build_dataset(args)  # 构建数据集

{'folder': './data/', 'edges_file': 'sbm_50t_1000n_adj.csv', 'aggr_time': 1, 'feats_per_node': 3}
max_time: tensor(49)
min_time: tensor(0)
num_nodes: 1000
num_non_existing: -3870863


In [19]:
dataset.edges

{'idx': tensor([[  0,   2,   0],
         [  0,   3,   0],
         [  0,   8,   0],
         ...,
         [999, 959,  49],
         [999, 970,  49],
         [999, 991,  49]]),
 'vals': tensor([1, 1, 1,  ..., 1, 1, 1])}

### 构建任务类型

In [20]:
def build_tasker(args,dataset):
	if args.task == 'link_pred':  # 边预测任务
		return lpt.Link_Pred_Tasker(args,dataset)
	elif args.task == 'edge_cls':
		return ect.Edge_Cls_Tasker(args,dataset)
	elif args.task == 'node_cls':
		return nct.Node_Cls_Tasker(args,dataset)
	elif args.task == 'static_node_cls':
		return nct.Static_Node_Cls_Tasker(args,dataset)

	else:
		raise NotImplementedError('still need to implement the other tasks')

In [21]:
# build the tasker
tasker = build_tasker(args, dataset)  # 预测任务link_pred

### 训练，测试，验证集划分

In [22]:
# build the splitter
splitter = sp.splitter(args,tasker)  # 训练，测试，验证集

Dataset splits sizes:  train 29 dev 5 test 10


In [23]:
splitter.train.__len__()

29

### 构建模型

In [24]:
def build_gcn(args,tasker):
	gcn_args = u.Namespace(args.gcn_parameters)  # 模型参数
	gcn_args.feats_per_node = tasker.feats_per_node  # 节点最大的度; 构造one-hot特征
	print('feats_per_node:',gcn_args.feats_per_node)
	if args.model == 'gcn':
		return mls.Sp_GCN(gcn_args,activation = torch.nn.RReLU()).to(args.device)
	# elif args.model == 'skipgcn':
	# 	return mls.Sp_Skip_GCN(gcn_args,activation = torch.nn.RReLU()).to(args.device)
	# elif args.model == 'skipfeatsgcn':
	# 	return mls.Sp_Skip_NodeFeats_GCN(gcn_args,activation = torch.nn.RReLU()).to(args.device)
	else:
		assert args.num_hist_steps > 0, 'more than one step is necessary to train LSTM'
		if args.model == 'lstmA':
			return mls.Sp_GCN_LSTM_A(gcn_args,activation = torch.nn.RReLU()).to(args.device)
		# elif args.model == 'gruA':
		# 	return mls.Sp_GCN_GRU_A(gcn_args,activation = torch.nn.RReLU()).to(args.device)
		# elif args.model == 'lstmB':
		# 	return mls.Sp_GCN_LSTM_B(gcn_args,activation = torch.nn.RReLU()).to(args.device)
		# elif args.model == 'gruB':
		# 	return mls.Sp_GCN_GRU_B(gcn_args,activation = torch.nn.RReLU()).to(args.device)
		# elif args.model == 'egcn':
		# 	return egcn.EGCN(gcn_args, activation = torch.nn.RReLU()).to(args.device)
		elif args.model == 'egcn_h':
			return egcn_h.EGCN(gcn_args, activation = torch.nn.RReLU(), device = args.device)
		# elif args.model == 'skipfeatsegcn_h':
		# 	return egcn_h.EGCN(gcn_args, activation = torch.nn.RReLU(), device = args.device, skipfeats=True)
		elif args.model == 'egcn_o':
			return egcn_o.EGCN(gcn_args, activation = torch.nn.RReLU(), device = args.device)
		else:
			raise NotImplementedError('need to finish modifying the models')

In [25]:
# build the models
gcn = build_gcn(args, tasker)

feats_per_node: 162


### 构建分类器模型

In [26]:
def build_classifier(args,tasker):
	if 'node_cls' == args.task or 'static_node_cls' == args.task:
		mult = 1
	else:
		mult = 2  # link_pred
	if 'gru' in args.model or 'lstm' in args.model:
		in_feats = args.gcn_parameters['lstm_l2_feats'] * mult
	elif args.model == 'skipfeatsgcn' or args.model == 'skipfeatsegcn_h':
		in_feats = (args.gcn_parameters['layer_2_feats'] + args.gcn_parameters['feats_per_node']) * mult
	else:
		in_feats = args.gcn_parameters['layer_2_feats'] * mult  # EvolveGCN输出结果维度layer_2_feats * 2; link_pred所以*2

	return mls.Classifier(args,in_features = in_feats, out_features = tasker.num_classes).to(args.device)

In [27]:
classifier = build_classifier(args,tasker)  # link_pred分类器

CLS num_feats 200


### 构建损失函数

In [28]:
# build a loss
cross_entropy = ce.Cross_Entropy(args, dataset).to(args.device)

### 构建训练器

In [29]:
# trainer
trainer = tr.Trainer(args,
                        splitter = splitter,  # 训练，测试，验证集
                        gcn = gcn,  # Evolve_GCN模型
                        classifier = classifier,  # link_pred 分类器
                        comp_loss = cross_entropy,  # loss
                        dataset = dataset,  # dataset
                        num_classes = tasker.num_classes)  # 2分类

Log: STDOUT
INFO:root:*** PARAMETERS ***
INFO:root:{'adj_mat_time_window': 1,
 'class_weights': [0.1, 0.9],
 'comments': ['comments'],
 'data': 'sbm50',
 'data_loading_params': {'batch_size': 1, 'num_workers': 0},
 'dev_proportion': 0.1,
 'device': 'cpu',
 'early_stop_patience': 50,
 'eval_after_epochs': 5,
 'gcn_parameters': {'cls_feats': 100,
                    'cls_feats_max': 800,
                    'cls_feats_min': 100,
                    'feats_per_node': 100,
                    'feats_per_node_max': 256,
                    'feats_per_node_min': 50,
                    'k_top_grcu': 200,
                    'layer_1_feats': 100,
                    'layer_1_feats_max': 200,
                    'layer_1_feats_min': 10,
                    'layer_2_feats': 100,
                    'layer_2_feats_same_as_l1': True,
                    'lstm_l1_feats': 100,
                    'lstm_l1_feats_max': 200,
                    'lstm_l1_feats_min': 10,
                    'lstm_l1_lay

### 模型训练

In [46]:
trainer.train()

INFO:root:################ TRAIN epoch 0 ###################
INFO:root:TRAIN mean losses tensor(0.1249)
INFO:root:TRAIN mean errors 0.19567318260669708
INFO:root:TRAIN mean MRR 0.0 - mean MAP 0.15319414805181736
INFO:root:TRAIN tp {0: tensor(22879027), 1: tensor(423118)},fn {0: tensor(3256774), 1: tensor(2412072)},fp {0: tensor(2412072), 1: tensor(3256774)}
INFO:root:TRAIN measures microavg - precision 0.8043 - recall 0.8043 - f1 0.8043 
INFO:root:TRAIN measures for class 0 - precision 0.9046 - recall 0.8754 - f1 0.8898 
INFO:root:TRAIN measures for class 1 - precision 0.1150 - recall 0.1492 - f1 0.1299 
INFO:root:TRAIN measures@10 microavg - precision 0.7094 - recall 0.0000 - f1 0.0000 
INFO:root:TRAIN measures@10 for class 0 - precision 0.7778 - recall 0.0000 - f1 0.0000 
INFO:root:TRAIN measures@10 for class 1 - precision 0.3400 - recall 0.0000 - f1 0.0000 
INFO:root:TRAIN measures@100 microavg - precision 0.6781 - recall 0.0001 - f1 0.0001 
INFO:root:TRAIN measures@100 for class 0 