In [None]:
!pip install dgl-cu101
!pip install dynamicgem
!pip install keras==2.2.4

In [None]:
from google.colab import drive

drive.mount('/content/drive')

!ls /content/drive/'My Drive'/Colab
!ln -s /content/drive/'My Drive'/Colab /content/Colab

!ls -al

In [None]:
# %rm DBLPE_importance.npz
# %ls -hl ./

In [None]:
!cp /content/Colab/Clustering-RGCN/*.py .
!cp /content/Colab/Clustering-RGCN/*.npz .
!ls -al ./

In [None]:
# !cp /content/Colab/Clustering-RGCN/DBLPE_importance_2011.npz .
!ls -al ./

In [None]:
from __future__ import division
from __future__ import print_function

import time
import argparse
import numpy as np

import torch
import torch.nn.functional as F
import torch.optim as optim
import random
from utils import encode_onehot
from models import GCNLSTM,GCN,GAT,GraphSage,EGCN,LSTMGCN,RNNGCN,TRNNGCN
# from models import RGCN,GCNLSTM,GCN,dgl_GCN,GAT,GraphSage,EGCN,LSTMGCN,RNNGCN,TRNNGCN

import tensorflow
# from dynamicgem.embedding.dynAERNN  import DynAERNN

import dgl

import scipy as sp
import scipy.linalg as linalg
import networkx as nx
import matplotlib.pyplot as plt
from scipy.cluster.vq import kmeans,vq
from scipy import stats  

from sklearn.cluster import SpectralClustering
from sklearn import metrics

from itertools import permutations 

try:
  import google.colab
  IN_COLAB = True
except:
  IN_COLAB = False


### Model Training & Testing

In [None]:
def one_hot(l,classnum=1): #classnum fix some special case
    one_hot_l=np.zeros((len(l),max(l.max()+1,classnum)))
    for i in range(len(l)):
        one_hot_l[i][l[i]]=1
    return one_hot_l

In [None]:
def train(epoch, model, optimizer, features, adj, labels, idx_train, idx_val, model_type):
    t = time.time()
    model.train()
    optimizer.zero_grad()
    #print(features.shape)
    output = model(features, adj)
    

    loss_train = F.nll_loss(output[idx_train], labels[idx_train])
    if model_type == "RNNGCN_LL":
        # print(">> Added norm: ", torch.norm(model.LL, p=1))
        loss_train += 0.05*torch.norm(model.LL, p=1)
    if model_type == "RNNGCN_SE_decay":
        # print(">> Added norm: ", torch.norm(model.LL, p=1))
        loss_train += 0.05 *torch.norm(model.Lambda, p=2)

    pred_labels=torch.argmax(output,axis=1)
    acc_train = metrics.accuracy_score(pred_labels[idx_train].cpu().detach().numpy(),labels[idx_train].cpu().detach().numpy())
    
    model.train()
    loss_train.backward(retain_graph=True)
    optimizer.step()
    #print(loss_train,acc_train)

    #validation
    model.eval()
    output = model(features, adj)
    
    loss_val = F.nll_loss(output[idx_val], labels[idx_val])
    if model_type == "RNNGCN_LL":
        loss_val += 0.05*torch.norm(model.LL, p=1)
    
    acc_val = metrics.accuracy_score(pred_labels[idx_val].cpu().detach().numpy(),labels[idx_val].cpu().detach().numpy())
    #print(loss_val,acc_val)
    '''
    print('Epoch: {:04d}'.format(epoch+1),
          'loss_train: {:.4f}'.format(loss_train.item()),
          'acc_train: {:.4f}'.format(acc_train.item()),
          'loss_val: {:.4f}'.format(loss_val.item()),
          'acc_val: {:.4f}'.format(acc_val.item()),
          'time: {:.4f}s'.format(time.time() - t))
    
    a.write('Epoch: {:04d}'.format(epoch+1)+' '+
          'loss_train: {:.4f}'.format(loss_train.item())+' '+
          'acc_train: {:.4f}'.format(acc_train.item())+' '+
          'loss_val: {:.4f}'.format(loss_val.item())+' '+
          'acc_val: {:.4f}'.format(acc_val.item())+' '+
          'time: {:.4f}s'.format(time.time() - t)+'\n')
    a.close()
    '''
    return acc_val

In [None]:
def test(model, features, adj, labels, idx_test):
    model.eval()
    output = model(features, adj)
    pred_labels=torch.argmax(output,axis=1)
    loss_test = F.nll_loss(output[idx_test], labels[idx_test])
    acc_test = metrics.accuracy_score(labels[idx_test].cpu().detach().numpy(), pred_labels[idx_test].cpu().detach().numpy())
    f1_test=metrics.f1_score(labels[idx_test].cpu().detach().numpy(), pred_labels[idx_test].cpu().detach().numpy(),average='weighted')
    auc_test=metrics.roc_auc_score(one_hot(labels[idx_test].cpu().detach().numpy()), output[idx_test].cpu().detach().numpy(),multi_class='ovr',average='weighted')
    
    return loss_test.item(), acc_test, f1_test, auc_test

### Generating datasets

In [None]:
def getNormLaplacian(W):
	"""input matrix W=(w_ij)
	"compute D=diag(d1,...dn)
	"and L=D-W
	"and Lbar=D^(-1/2)LD^(-1/2)
	"return Lbar
	"""
	d=[np.sum(row) for row in W]
	D=np.diag(d)
	L=D-W
	Dn=np.power(np.linalg.matrix_power(D,-1),0.5)
	Lbar=np.dot(np.dot(Dn,L),Dn)
	return Lbar
 
def getKlargestEigVec(Lbar,k):
	"""input
	"matrix Lbar and k
	"return
	"k largest eigen values and their corresponding eigen vectors
	"""
	eigval,eigvec=linalg.eig(Lbar)
	dim=len(eigval)
 
	#find top k largest eigval
	dictEigval=dict(zip(eigval,range(0,dim)))
	kEig=np.sort(eigval)[::-1][:k]#[0:k]
	ix=[dictEigval[k] for k in kEig]
	return eigval[ix],eigvec[:,ix]
 
def getKlargestSigVec(Lbar,k):
	"""input
	"matrix Lbar and k
	"return
	"k largest singular values and their corresponding eigen vectors
	"""
	lsigvec,sigval,rsigvec=linalg.svd(Lbar)
	dim=len(sigval)
 
	#find top k largest left sigval
	dictSigval=dict(zip(sigval,range(0,dim)))
	kSig=np.sort(sigval)[::-1][:k]#[0:k]
	ix=[dictSigval[k] for k in kSig]
	return sigval[ix],lsigvec[:,ix]

def checkResult(Lbar,eigvec,eigval,k):
	"""
	"input
	"matrix Lbar and k eig values and k eig vectors
	"print norm(Lbar*eigvec[:,i]-lamda[i]*eigvec[:,i])
	"""
	check=[np.dot(Lbar,eigvec[:,i])-eigval[i]*eigvec[:,i] for i in range(0,k)]
	length=[np.linalg.norm(e) for e in check]/np.spacing(1)
	print("Lbar*v-lamda*v are %s*%s" % (length,np.spacing(1)))

In [None]:
#setting of data generation

def generate_data(number_of_nodes, Time_steps, class_num, link_inclass_prob, link_outclass_prob, epsilon_vector):
    transit_matrix=[]
    for i in range(class_num):
        transit_one=[epsilon_vector[i]]*i+[1-epsilon_vector[i]]+[epsilon_vector[i]]*(class_num-1-i)
        transit_matrix+=[transit_one]
    #print((number_of_nodes*link_inclass_prob*epsilon_vector[0])**0.5)
    
    adj=torch.zeros(number_of_nodes,Time_steps,number_of_nodes) #n*t*n adj matrix

    #assign initial labels
    labels=torch.randint(0,class_num,(number_of_nodes,)) #assign random label with equal probability
    labels=labels.to(dtype=torch.long)
    #label_node, speed up the generation of edges
    label_node_dict=dict()

    for j in range(class_num):
        label_node_dict[j]=[]

    for i in range(len(labels)):
        label_node_dict[int(labels[i])]+=[int(i)]


    #generate graph
    for i in range(int(Time_steps)):
        change_nodes=[]
        # for all node labels:
        for j in range(len(labels)):
            if random.random()<epsilon_vector[labels[j]]:
                #less than change probability
                tmp=int(labels[j])
                #print(j)
                while(1): #change label
                    labels[j]=torch.tensor(int(torch.randint(0,class_num,(1,))[0]))
                    if labels[j]!=tmp:
                        change_nodes+=[j]
                        break
                
        label_node_dict=dict()
        for j in range(class_num):
            label_node_dict[j]=[]

        for j in range(len(labels)):
            label_node_dict[int(labels[j])]+=[int(j)]
        #
        #generate symmetrix adj matrix at each time step
        for node_id in range(number_of_nodes):
            j=labels[node_id]
            for l in label_node_dict:
                if l==j:
                    for z in label_node_dict[l]:  #z>node_id,  symmetrix matrix, no repeat
                        if z>node_id and random.random()<link_inclass_prob:
                            adj[node_id,i,z]= 1
                            adj[z,i,node_id]= 1
                else:
                    for z in label_node_dict[l]:
                        if z>node_id and random.random()<link_outclass_prob:
                            adj[node_id,i,z]= 1
                            adj[z,i,node_id]= 1
                            


    #generate feature use eye matrix
    features=torch.zeros(number_of_nodes,Time_steps,number_of_nodes)
    for i in range(features.shape[1]):
        features[:,i,:]=torch.eye(features.shape[0],features.shape[2])

    #seprate train,val,test
    idx_train = torch.LongTensor(range(number_of_nodes//5))
    idx_val = torch.LongTensor(range(number_of_nodes//5, number_of_nodes//2))
    idx_test = torch.LongTensor(range(number_of_nodes//2, number_of_nodes))

    #probability matrix at last time_step
    Probability_matrix=torch.zeros(number_of_nodes,number_of_nodes)
    for j in range(number_of_nodes):
        for k in range(number_of_nodes):
          if j==k:
                continue
          elif labels[j]==labels[k]:
            Probability_matrix[j][k]=link_inclass_prob
          else:
            Probability_matrix[j][k]=link_outclass_prob

    return features.float(), adj.float(), labels, idx_train, idx_val, idx_test, Probability_matrix



### Train and test

In [None]:

def single_train_and_test(lambda_matrix, Probability_matrix, features, adj, labels, idx_train, idx_val, idx_test, model_type,normalize=False):

    if model_type=='SPEC' or model_type=='SPEC_sklearn':
        if type(lambda_matrix)!=type(None):
            decay_adj=torch.zeros(adj.shape[0],adj.shape[2])
            for j in range(adj.shape[0]):
                for k in range(adj.shape[2]):
                    decay_adj[j][k]=lambda_matrix[labels[j]][labels[k]]
            now_adj=adj[:,0,:].clone()
            for i in range(1,adj.shape[1]):  #time_steps
                        tmp_adj=adj[:,i,:].clone()
                        
                        now_adj=(1-decay_adj)*now_adj+decay_adj*tmp_adj
          
            adj=now_adj
        else:
            now_adj=adj[:,0,:].clone()
            for i in range(1,adj.shape[1]):  #time_steps
                    now_adj+=adj[:,i,:].clone()
            adj=now_adj
        if normalize==True:
            #normalize in both cases
            
            adj+=torch.eye(adj.shape[0],adj.shape[1])
            d=torch.sum(adj,axis=1)
            D_minus_one_over_2=torch.zeros(adj.shape[0],adj.shape[0])
            D_minus_one_over_2[range(len(D_minus_one_over_2)), range(len(D_minus_one_over_2))] = d**(-0.5)
            adj=torch.mm(torch.mm(D_minus_one_over_2,adj),D_minus_one_over_2)


        Lbar=np.array(adj)  #no normalizaton
        top_k=class_num
        kSigVal,kSigVec=getKlargestSigVec(Lbar,top_k)
        centroid=kmeans(kSigVec.astype(float),class_num)[0] #change kSigvec from complex64 to float
        result=vq(kSigVec.astype(float),centroid)[0]

        
        perm = permutations(range(class_num)) 
        one_hot_result=torch.tensor(one_hot(result,class_num))
        acc_test=0
        f1_test=0
        auc_test=0
        count=0
        for i in perm: 
            count+=1
            one_hot_i=one_hot(np.array(i))
            perm_result=torch.mm(one_hot_result,torch.tensor(one_hot_i))
            pred_labels=torch.argmax(perm_result,axis=1)
            acc_test = max(metrics.accuracy_score(labels,pred_labels),acc_test)
            f1_test=max(metrics.f1_score(labels, pred_labels,average='weighted'),f1_test)
            auc_test=max(metrics.roc_auc_score(one_hot(labels), perm_result,multi_class='ovr',average='weighted'),auc_test)
            if count%10000==0:
                print(count)
                print(acc_test,f1_test,auc_test)   
        print(str(acc_test)+'\t'+str(f1_test)+'\t'+str(auc_test))  
        try:
            spec_norm=getKlargestSigVec(adj-Probability_matrix,2)[0]
        except:
            spec_norm=[]
        return 0,acc_test,spec_norm

    elif model_type=="DynAERNN":
        
        length=adj.shape[1]
        lookup=length-2

        dim_emb  = class_num
        if args_cuda:
            tensorflow.device('/gpu:0')
        embedding = DynAERNN(d   = dim_emb,
            beta           = 5,
            n_prev_graphs  = lookup,
            nu1            = 1e-6,
            nu2            = 1e-6,
            n_aeunits      = [50, 30],
            n_lstmunits    = [50,dim_emb],
            rho            = 0.3,
            n_iter         = args_epochs,
            xeta           = 1e-3,
            n_batch        = 10,
            modelfile      = ['./intermediate/enc_model_dynAERNN.json', 
                              './intermediate/dec_model_dynAERNN.json'],
            weightfile     = ['./intermediate/enc_weights_dynAERNN.hdf5', 
                              './intermediate/dec_weights_dynAERNN.hdf5'],
            savefilesuffix = "testing")
        embs = []
        
        graphs     = [nx.Graph(adj[:,l,:].numpy()) for l in range(length)]
        for temp_var in range(lookup, length):
                        emb, _ = embedding.learn_embeddings(graphs[:temp_var])
                        embs.append(emb)
        centroid=kmeans(embs[-1],class_num)[0] #change kSigvec from complex64 to float
        result=vq(embs[-1],centroid)[0]

        

        perm = permutations(range(class_num)) 
        one_hot_result=torch.tensor(one_hot(result,class_num))
        acc_test=0
        f1_test=0
        auc_test=0
        count=0
        for i in perm: 
            count+=1
            one_hot_i=one_hot(np.array(i))
            perm_result=torch.mm(one_hot_result,torch.tensor(one_hot_i))
            pred_labels=torch.argmax(perm_result,axis=1)
            acc_test = max(metrics.accuracy_score(labels,pred_labels),acc_test)
            f1_test=max(metrics.f1_score(labels, pred_labels,average='weighted'),f1_test)
            auc_test=max(metrics.roc_auc_score(one_hot(labels), perm_result,multi_class='ovr',average='weighted'),auc_test)
            if count%10000==0:
                print(count)
                print(acc_test,f1_test,auc_test)   
        print(str(acc_test)+'\t'+str(f1_test)+'\t'+str(auc_test))  
        try:
            spec_norm=getKlargestSigVec(adj-Probability_matrix,2)[0]
        except:
            spec_norm=[]
        return 0,acc_test,spec_norm
        


    #choose adj matrix
    #GCN:n*n, Others: n*t*n
    attention_w = dict()
    attention_w['simulated'] = [0.05907838, 0.05908475, 0.05908549, 0.0590926,  0.05907808, 0.05907955,
                                0.05909814, 0.05977109, 0.06782387, 0.13779935, 0.16049674, 0.16051194]
    attention_w["None"] = [1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]
    
    attention_w["DBLP3"] = [0.13257283, 0.11761762, 0.09207272, 0.09068672, 
                            0.13907257, 0.08077238, 0.07735384, 0.08926542, 
                            0.07579254, 0.10479335]
    attention_w["DBLP5"] = [0.08490249, 0.07058566, 0.07522732, 0.093458354, 0.1454923, 0.13680391, 0.08714645, 0.11872233, 0.12575606, 0.061905116]
    attention_w["Reddit"] = [0.10407882, 0.087858014, 0.103267744, 0.11869549, 0.111122675, 0.11745782, 0.09139953, 0.08404839, 0.089088246, 0.09298327]
    attention_w["Brain"] = [0.10703684, 0.08153439, 0.06159713, 0.10095412,
                             0.05306721, 0.07350213, 0.12013485, 0.08990081,
                             0.05808821, 0.10553119, 0.07727117, 0.07138196]

    if model_type=='GCN':  
        if type(lambda_matrix)!=type(None):
            decay_adj=torch.zeros(adj.shape[0],adj.shape[0])
            for j in range(adj.shape[0]):
                for k in range(adj.shape[2]):
                    decay_adj[j][k]=lambda_matrix[labels[j]][labels[k]]
            now_adj=adj[:,0,:].clone()
            
            for i in range(1,adj.shape[1]):  #time_steps
                tmp_adj=adj[:,i,:].clone()
                now_adj=(1-decay_adj)*now_adj+decay_adj*tmp_adj
            adj=now_adj
        else:
            now_adj=attention_w[dataset_name][0] * adj[:,0,:].clone()
            for i in range(1,adj.shape[1]):  #time_steps
                now_adj+= attention_w[dataset_name][i] * adj[:,i,:].clone()
            adj=now_adj
            
        #normalize in both cases
        # if normalize==True:
        #     adj+=torch.eye(adj.shape[0],adj.shape[1])
        #     d=torch.sum(adj,axis=1)
        #     D_minus_one_over_2=torch.zeros(adj.shape[0],adj.shape[0])
        #     D_minus_one_over_2[range(len(D_minus_one_over_2)), range(len(D_minus_one_over_2))] = d**(-0.5)
        #     adj=torch.mm(torch.mm(D_minus_one_over_2,adj),D_minus_one_over_2)
            
        features=features[:,-1,:]
          

    elif model_type=='GAT' or model_type=='GraphSage':
        # now_adj=adj[:,0,:].clone()
        # for i in range(1,adj.shape[1]):  #time_steps
        #     now_adj+=adj[:,i,:].clone()
        # adj=now_adj
        now_adj=attention_w[dataset_name][0] * adj[:,0,:].clone()
        for i in range(1,adj.shape[1]):  #time_steps
            now_adj+= attention_w[dataset_name][i] * adj[:,i,:].clone()
        adj=now_adj
        
        #normalize in both cases
        # if normalize==True:
        #     adj+=torch.eye(adj.shape[0],adj.shape[1])
        #     d=torch.sum(adj,axis=1)
        #     D_minus_one_over_2=torch.zeros(adj.shape[0],adj.shape[0])
        #     D_minus_one_over_2[range(len(D_minus_one_over_2)), range(len(D_minus_one_over_2))] = d**(-0.5)
        #     adj=torch.mm(torch.mm(D_minus_one_over_2,adj),D_minus_one_over_2)
            
        features=features[:,-1,:]
    elif model_type=='EGCN':
        adj=torch.transpose(adj,0,1)
        features=torch.transpose(features,0,1)
        

    #define model
    if model_type=='GCN':
        model = GCN(nfeat=features.shape[1],
                nhid=args_hidden,
                nclass=class_num,
                dropout=args_dropout)
    elif model_type=='new_TRNNGCN':
        model = new_TRNNGCN(nfeat=features.shape[2],
                nhid=args_hidden,
                nclass=class_num,
                dropout=args_dropout,
                nnode=features.shape[0],
                use_cuda=args_cuda)
    elif model_type=='original_RNNGCN':
        model = original_RNNGCN(nfeat=features.shape[2],
                nhid=args_hidden,
                nclass=class_num,
                dropout=args_dropout)
    elif model_type=='RNNGCN_2':               
        model = RNNGCN_2(nfeat=features.shape[2],
                nhid=args_hidden,
                nclass=class_num,
                dropout=args_dropout)
        
    elif model_type=='RNNGCN_2_preNN':
        model = RNNGCN_2_preNN(nfeat=features.shape[2],
                nhid=args_hidden,
                nclass=class_num,
                dropout=args_dropout,
                use_cuda=args_cuda) 
                
    elif model_type=='RNNGCN_1_preNN':
        model = RNNGCN_1_preNN(nfeat=features.shape[2],
                nhid=args_hidden,
                nclass=class_num,
                dropout=args_dropout,
                use_cuda=args_cuda)      

    elif model_type=='RNNGCN_LL':
        model = RNNGCN_LL(nfeat=features.shape[2],
                nhid=args_hidden,
                nclass=class_num,
                dropout=args_dropout,
                tot_timestep=features.shape[1],
                use_cuda=args_cuda)
        
    elif model_type=='RNNGCN_SE_decay':
        model = RNNGCN_SE_decay(nfeat=features.shape[2],
                nhid=args_hidden,
                nclass=class_num,
                dropout=args_dropout,
                tot_timestep=adj.shape[1],
                node_num=features.shape[0],
                use_cuda=args_cuda)    

    elif model_type=='RNNGCN_SE':
        model = RNNGCN_SE(nfeat=features.shape[2],
                nhid=args_hidden,
                nclass=class_num,
                dropout=args_dropout,
                tot_timestep=adj.shape[1],
                node_num=features.shape[0],
                use_cuda=args_cuda)


    elif model_type=='RNNGCN_SE_2ws':
        print("building model: %s" % model_type)
        model = RNNGCN_SE_2ws(nfeat=features.shape[2],
                nhid=args_hidden,
                nclass=class_num,
                dropout=args_dropout,
                tot_timestep=adj.shape[1],
                node_num=features.shape[0],
                use_cuda=args_cuda)
           

    elif model_type=='RNNGCN_SE_back_pe':
        print("building model: %s" % model_type)
        model = RNNGCN_SE_back_pe(nfeat=features.shape[2],
                nhid=args_hidden,
                nclass=class_num,
                dropout=args_dropout,
                tot_timestep=adj.shape[1],
                node_num=features.shape[0],
                use_cuda=args_cuda)
           
    elif model_type=='RNNGCN_SE_back':
        print("building model: %s" % model_type)
        model = RNNGCN_SE_back(nfeat=features.shape[2],
                nhid=args_hidden,
                nclass=class_num,
                dropout=args_dropout,
                tot_timestep=adj.shape[1],
                node_num=features.shape[0],
                use_cuda=args_cuda)
                
    elif model_type=='TRNNGCN_SE':
        model = TRNNGCN_SE(nfeat=features.shape[2],
                nhid=args_hidden,
                class_num=class_num,
                dropout=args_dropout,
                tot_timestep=adj.shape[1],
                node_num=features.shape[0],
                use_cuda=args_cuda)      

    elif model_type=='TRNNGCN_SE_full':
        model = TRNNGCN_SE_full(nfeat=features.shape[2],
                nhid=args_hidden,
                class_num=class_num,
                dropout=args_dropout,
                tot_timestep=adj.shape[1],
                node_num=features.shape[0],
                use_cuda=args_cuda)            
    
    elif model_type=='TRNNGCN_LL':
        model = TRNNGCN_LL(nfeat=features.shape[2],
                nhid=args_hidden,
                class_num=class_num,
                dropout=args_dropout,
                tot_timestep=adj.shape[1],
                node_num=features.shape[0],
                use_cuda=args_cuda)    
                
    elif model_type=='TRNNGCN_LL_exp':
        model = TRNNGCN_LL_exp(nfeat=features.shape[2],
                nhid=args_hidden,
                nclass=class_num,
                dropout=args_dropout,
                nnode=features.shape[0],
                tot_timestep=features.shape[1],
                use_cuda=args_cuda)
                
    elif model_type=='RNNGCN_LL_exp':
        model = RNNGCN_LL_exp(nfeat=features.shape[2],
                nhid=args_hidden,
                nclass=class_num,
                dropout=args_dropout,
                tot_timestep=features.shape[1],
                use_cuda=args_cuda)
                          
                  
    elif model_type=='RNNGCN':
        model = RNNGCN(nfeat=features.shape[2],
                nhid=args_hidden,
                nclass=class_num,
                dropout=args_dropout)
        
    elif model_type=='original_TRNNGCN':
        model = original_TRNNGCN(nfeat=features.shape[2],
                nhid=args_hidden,
                nclass=class_num,
                dropout=args_dropout,
                nnode=features.shape[0],
                use_cuda=args_cuda)
    elif model_type=='GCNLSTM':
        model = GCNLSTM(nfeat=features.shape[2],
                nhid=args_hidden,
                nclass=class_num,
                dropout=args_dropout)
    elif model_type=='RGCN':
        model = RGCN(nfeat=features.shape[2],
                nhid=args_hidden,
                nclass=class_num,
                dropout=args_dropout)
    elif model_type=="GAT":
        adj=dgl.from_networkx(nx.Graph(adj.numpy())) #fit in dgl
        model = GAT(nfeat=features.shape[1],
                nhid=args_hidden,
                nclass=class_num,
                dropout=args_dropout)
    elif model_type=="GraphSage":
        adj=dgl.from_networkx(nx.Graph(adj.numpy())) #fit in dgl
        model = GraphSage(nfeat=features.shape[1],
                nhid=args_hidden,
                nclass=class_num,
                dropout=args_dropout)
    elif model_type=="EGCN":
        model = EGCN(nfeat=features.shape[2],
                    nhid=args_hidden,
                    nclass=class_num,
                    device=torch.device('cpu'))

    
        
    if model_type!="SPEC" and model_type!="SPEC_sklearn" and model_type!="DynAERNN":
        if args_cuda:
            if model_type != 'EGCN':
                model=model.to(torch.device('cuda:0'))#.cuda()
                features = features.cuda()
                adj = adj.to(torch.device('cuda:0'))
                labels = labels.cuda()
                idx_train = idx_train.cuda()
                idx_val = idx_val.cuda()
                idx_test = idx_test.cuda()
        #optimizer and train
        optimizer = optim.Adam(model.parameters(),
                              lr=args_lr, weight_decay=args_weight_decay)
        # Train model
        t_total = time.time()
        best_val=0
        for epoch in range(args_epochs):
            acc_val=train(epoch, model, optimizer, features, adj, labels, idx_train, idx_val, model_type)
            if epoch % 50 == 0 :
                if model_type=='RNNGCN' or model_type=='TRNNGCN' or model_type=='new_TRNNGCN':
                    print("epoch %d, Lambda:" % epoch)
                    print(model.Lambda)
                if model_type=='RNNGCN_LL_exp':
                    print("LL:", model.LL)
                    print("lambda:", model.Lambda)
                if model_type=='RNNGCN_SE_decay':
                    print("lambda:", model.Lambda)
                if 'TRNNGCN_SE' in model_type:
                    # print("matrix_weight:", model.matrix_weight)    
                    pass                
            if acc_val>best_val:
                best_val=acc_val
                loss, acc, auc, f1 = test(model, features, adj, labels, idx_test)
                test_best_val=[loss,acc,auc,f1]
            
        # Testing
        loss, acc, auc, f1 = test(model, features, adj, labels, idx_test)
        test_results = [loss, acc, auc, f1]
        
        SE_w = -1
        matrix_w = -1
        global SE_2ws_x
        global SE_2ws_adj
        
        if model_type=='RNNGCN' or model_type=='TRNNGCN' or model_type=='new_TRNNGCN':
            print(model.Lambda, end='\t')
        if model_type=='RNNGCN_SE_decay':
            SE_w = model.attention_weight
            print(model.Lambda, end='\t')
            print("attetion: ", model.attention_weight, end='\t')  
            print("c_weight: ", model.c_weight, end='\t')      
        if 'TRNNGCN_SE' in model_type:
            matrix_w = model.matrix_weight
        if 'RNNGCN_SE' in model_type:
            if '2ws' in model_type:
                SE_2ws_adj = model.channel_adj
                SE_2ws_x = model.channel_x
                print("adj: ", SE_2ws_adj, end='\t')
                print("x: ", SE_2ws_x, end='\t')                
                SE_w = model.channel_adj
                # print("adj and x attetion: ", model.adj_x_2ws, end='\t')  
            else:
                SE_w = model.attention_weight
                print("attetion: ", model.attention_weight, end='\t')  
                print("c_weight: ", model.c_weight, end='\t')               
        if model_type=='RNNGCN_2':
            print(model.x_Lambda, end='\t')            
            print(model.adj_Lambda, end='\t')   
        if model_type=='RNNGCN_LL':
            print(model.LL, end='\t')                  
        if model_type=='RNNGCN_LL_exp':
            print(model.LL_index, end='\t')
            print(model.LL_softmax, end='\t')
        if model_type=='TRNNGCN_LL_exp':
            print(model.LL_index, end='\t')
            print(model.Lambda, end='\t')
        #print(loss,acc)
        print(str(test_best_val[1])+'\t'+str(test_best_val[2])+'\t'+str(test_best_val[3]))#,end='\t')
        try:
            spec_norm=getKlargestSigVec(now_adj-Probability_matrix,2)[0]
        except:
            spec_norm=0 #temperal adj
            
        # save model...
        # print("Model's state_dict:")
        # for param_tensor in model.state_dict():
        #     print(param_tensor, "\t", model.state_dict()[param_tensor].size())

        if model_type == "GCN" or model_type == "GraphSage":
            print("Saving model....")
            torch.save(model.state_dict(), "./Eval_Mask_Model.tar")

        del model
        return loss, acc, spec_norm, test_results, SE_w, matrix_w

### Run Exp for Spectral Clustering and GCN with Decay Rates

In [None]:
# def test_epsilon_vector_onelambda(model_type,number_of_nodes,Time_steps,class_num,link_inclass_prob,link_outclass_prob, epsilon_vector,sample_time):  
#     for times in range(sample_time):     
#         try:
#             features, adj, labels, idx_train, idx_val, idx_test, Probability_matrix=generate_data(number_of_nodes, Time_steps, class_num, link_inclass_prob, link_outclass_prob, epsilon_vector)               
#             for i in np.arange(0.0, 1.01, 0.01):
#                 file_name='uncombined'+'_'+model_type+"_" +"number_of_nodes_"+str(number_of_nodes)+'_' +"Time_steps_"+str(Time_steps)+'_'\
#                           +"class_num_"+str(class_num)+'_' +"link_inclass_prob_"+str(link_inclass_prob)+'_'\
#                           +"link_outclass_prob_"+str(link_outclass_prob)+'_'+"epsilon_vector_"+str(epsilon_vector)+'_'\
#                           +"sample_time_"+str(sample_time)+".txt"
#                 if IN_COLAB==True:
#                     summary_file = open("/content/drive/My Drive/"+file_name,"a+")
#                 else:
#                     summary_file = open(file_name,"a+")
#                 t=time.time()
#                 lambda_matrix=np.full((class_num,class_num),i)
                
#                 total_loss=0
#                 total_acc=0
#                 total_norm=[]
#                 loss, acc, specnorm = single_train_and_test(lambda_matrix,Probability_matrix,features, adj, labels, idx_train, idx_val, idx_test, model_type)

#                 summary_file.write("Weight decay: {}".format(lambda_matrix.flatten()) +
#                         "\tTest set results:" +
#                         "\tloss= {:.6f}".format(loss) + 
#                         "\taccuracy= {:.6f}".format(acc)+
#                         "\tspecnorm= {}\n".format(specnorm))
#                 summary_file.close()
#         except:
#             error=1
                
# def test_epsilon_vector_kxklambda(model_type,number_of_nodes,Time_steps,class_num,link_inclass_prob,link_outclass_prob, epsilon_vector,sample_time):  
#     for times in range(sample_time):     
#         try:
#             features, adj, labels, idx_train, idx_val, idx_test, Probability_matrix=generate_data(number_of_nodes, Time_steps, class_num, link_inclass_prob, link_outclass_prob, epsilon_vector)               
#             for i in np.arange(0.0, 1.01, 0.1):
#                 for j in np.arange(0, 1.01, 0.1):
#                     for k in np.arange(0, 1.01, 0.1):
#                         for l in np.arange(0, 1.01, 0.1):
#                             file_name='uncombined'+'_'+'kxklambda'+'_'+model_type+"_" +"number_of_nodes_"+str(number_of_nodes)+'_' +"Time_steps_"+str(Time_steps)+'_'\
#                                           +"class_num_"+str(class_num)+'_' +"link_inclass_prob_"+str(link_inclass_prob)+'_'\
#                                           +"link_outclass_prob_"+str(link_outclass_prob)+'_'+"epsilon_vector_"+str(epsilon_vector)+'_'\
#                                           +"sample_time_"+str(sample_time)+".txt"
#                             if IN_COLAB==True:
#                                 summary_file = open("/content/drive/My Drive/"+file_name,"a+")
#                             else:
#                                 summary_file = open(file_name,"a+")
#                             t=time.time()
#                             lambda_matrix=np.array([[i,j],[k,l]])
#                             total_loss=0
#                             total_acc=0
#                             total_norm=[]
#                             loss, acc, specnorm = single_train_and_test(lambda_matrix,Probability_matrix,features, adj, labels, idx_train, idx_val, idx_test, model_type)

#                             summary_file.write("Weight decay: {}".format(lambda_matrix.flatten()) +
#                                     "\tTest set results:" +
#                                     "\tloss= {:.6f}".format(loss) + 
#                                     "\taccuracy= {:.6f}".format(acc)+
#                                     "\tspecnorm= {}\n".format(specnorm))
                            
#                             summary_file.close()
#         except:
#             error=1        
            
# def test_kxk_neural_network(model_type,number_of_nodes,Time_steps,class_num,link_inclass_prob,link_outclass_prob, epsilon_vector,sample_time):  
#     for times in range(sample_time):     
#         features, adj, labels, idx_train, idx_val, idx_test, Probability_matrix=generate_data(number_of_nodes, Time_steps, class_num, link_inclass_prob, link_outclass_prob, epsilon_vector)               
#         for i in range(1):
#             file_name='uncombined'+'_'+'kxklambda'+'_'+model_type+"_" +"number_of_nodes_"+str(number_of_nodes)+'_' +"Time_steps_"+str(Time_steps)+'_'\
#                           +"class_num_"+str(class_num)+'_' +"link_inclass_prob_"+str(link_inclass_prob)+'_'\
#                           +"link_outclass_prob_"+str(link_outclass_prob)+'_'+"epsilon_vector_"+str(epsilon_vector)+'_'\
#                           +"sample_time_"+str(sample_time)+".txt"
#             if IN_COLAB==True:
#                 summary_file = open("/content/drive/My Drive/"+file_name,"a+")
#             else:
#                 summary_file = open(file_name,"a+")
#             t=time.time()
#             lambda_matrix=np.full((class_num,class_num),0.2)
#             #print("current matrix: {}".format(lambda_matrix))
#             total_loss=0
#             total_acc=0
#             total_norm=[]
#             loss, acc, specnorm = single_train_and_test(lambda_matrix,Probability_matrix,features, adj, labels, idx_train, idx_val, idx_test, model_type)
            
#             summary_file.write("Weight decay: {}".format(lambda_matrix.flatten()) +
#                     "\tTest set results:" +
#                     "\tloss= {:.6f}".format(loss) + 
#                     "\taccuracy= {:.6f}".format(acc)+
#                     "\tspecnorm= {}\n".format(specnorm))
#             print(i,loss,acc,specnorm)
#             #print(time.time()-t)
#             summary_file.close()


                        
# #For simulated graphs

# sample_time=100
# number_of_nodes=200
# Time_steps=500
# class_num=2
# link_inclass_prob=20/number_of_nodes/5  #when calculation , remove the link in itself
# link_outclass_prob=link_inclass_prob/20

# epsilon_vector=[10/number_of_nodes,20/number_of_nodes] # prob of changing labels



# model_type='SPEC'    #GCN, GAT, GraphSage #SPEC(DynSPEC), DynAERNN #GCNLSTM, EGCN, RNNGCN, TRNNGCN
# args_hidden = class_num
# args_dropout = 0.5
# args_lr = 0.01
# args_weight_decay = 5e-4
# args_epochs = 250
# args_no_cuda=False
# args_cuda = not args_no_cuda and torch.cuda.is_available()







# ##Different setting on simulated graphs

# # test_epsilon_vector_onelambda(model_type,number_of_nodes,Time_steps,class_num,link_inclass_prob,link_outclass_prob, epsilon_vector,sample_time)



# # for number_of_nodes in [100,250,500]:
# #    test_epsilon_vector_onelambda(model_type,number_of_nodes,Time_steps,class_num,link_inclass_prob,link_outclass_prob, epsilon_vector,sample_time)

# # for link_inclass_prob in [10/number_of_nodes/5,20/number_of_nodes/5,30/number_of_nodes/5]:
# #    test_epsilon_vector_onelambda(model_type,number_of_nodes,Time_steps,class_num,link_inclass_prob,link_outclass_prob, epsilon_vector,sample_time)

# # for epsilon_vector in [[10/number_of_nodes,10/number_of_nodes],[20/number_of_nodes,20/number_of_nodes],[30/number_of_nodes,30/number_of_nodes],[40/number_of_nodes,40/number_of_nodes],[50/number_of_nodes,50/number_of_nodes],[60/number_of_nodes,60/number_of_nodes]]:
# #    test_epsilon_vector_onelambda(model_type,number_of_nodes,Time_steps,class_num,link_inclass_prob,link_outclass_prob, epsilon_vector,sample_time)

# # for Time_steps in [1000,2000,5000,10000]: #already have 500
# #    test_epsilon_vector_onelambda(model_type,number_of_nodes,Time_steps,class_num,link_inclass_prob,link_outclass_prob, epsilon_vector,sample_time)

# # for sample_time in [1,10,1000]: #already have 100
# #    test_epsilon_vector_onelambda(model_type,number_of_nodes,Time_steps,class_num,link_inclass_prob,link_outclass_prob, epsilon_vector,sample_time)

# # for epsilon_vector in [[10/number_of_nodes,20/number_of_nodes],[10/number_of_nodes,30/number_of_nodes],[20/number_of_nodes,30/number_of_nodes]]:
# #    test_epsilon_vector_onelambda(model_type,number_of_nodes,Time_steps,class_num,link_inclass_prob,link_outclass_prob, epsilon_vector,sample_time)

# # # for epsilon_vector in [[10/number_of_nodes,20/number_of_nodes],[10/number_of_nodes,30/number_of_nodes],[20/number_of_nodes,30/number_of_nodes]]:
# # for epsilon_vector in [[10/number_of_nodes,40/number_of_nodes],[20/number_of_nodes,40/number_of_nodes],[30/number_of_nodes,40/number_of_nodes]]:
# #    test_epsilon_vector_kxklambda(model_type,number_of_nodes,Time_steps,class_num,link_inclass_prob,link_outclass_prob, epsilon_vector,sample_time)


# # test_kxk_neural_network(model_type,number_of_nodes,Time_steps,class_num,link_inclass_prob,link_outclass_prob, epsilon_vector,sample_time)


### Editing datasets


In [None]:
new_d = np.load("Brain.npz")
for i in new_d.keys():
    print(i)
    print(new_d[i].shape)

# print(new_d.files)
adjs = new_d['adjs']
attmats = new_d['attmats']
labels = new_d['labels']

period = 3
for i in range(1, 4):
    adjs[3*i:3*(i+1), ...] = adjs[:3, ...] 
    attmats[:, 3*i:3*(i+1), :] = attmats[:, :3, :]

# adjs[5:10, ...] = adjs[:5, ...] 
# adjs[10:12, ...] = adjs[:2, ...] 
# attmats[:, 5:10, :] = attmats[:, :5, :]    
# attmats[:, 10:12, :] = attmats[:, :2, :]    

np.savez_compressed("3_Periodic_Brain.npz", adjs=adjs, attmats=attmats, labels=labels)

adjs
(12, 5000, 5000)
attmats
(5000, 12, 20)
labels
(5000, 10)


In [None]:
# # new_d = np.load("DBLPE.npz")
# # new_d = np.load("hospital.npz")
# new_d = np.load("DBLPE_importance.npz")
# for i in new_d.keys():
#     print(i)
#     print(new_d[i].shape)

# # print(new_d.files)
# adjs = new_d['adjs']
# # attmats = new_d['attmats']
# labels = new_d['labels']

# print(labels)

# # abnormal_indices = [3,8,9]

# # for i in abnormal_indices:
# #     print(adjs[i, ...])
# #     adj_sp = adjs[i, ...]
# #     indices = np.random.choice([True, False], adj_sp.shape, [0.1, 0.9])
# #     indices = np.tril(indices, -1)
# #     adj_sp[indices] = 0
# #     adj_sp[indices.T] = 0

# #     adjs[i, ...] = adj_sp
# #     print(adjs[i, ...])

# # np.savez_compressed("sparse_DBLPE.npz", adjs=adjs, attmats=attmats, labels=labels)
# # np.savez_compressed("sparse_DBLPE.npz", adjs=adjs, labels=labels)

In [None]:
# ! ls
# ! cp ./gpu_mem.log /content/Colab/Clustering-RGCN/
# ! cp ./3_Periodic_Brain.npz /content/Colab/Clustering-RGCN/
# ! cp ./sparse_DBLPE.npz /content/Colab/Clustering-RGCN/
! ls -al  /content/Colab/Clustering-RGCN/

# Run Exp on Simulated and Real Datasets

### Load and Test

In [None]:
def load_real_data(dataset_name):
    dataset_dict=dict()
    dataset_dict["DBLP3"]="DBLP3.npz"
    dataset_dict["DBLP5"]="DBLP5.npz"
    dataset_dict["Brain"]="Brain.npz"
    dataset_dict["Reddit"]="reddit.npz"
    dataset_dict["DBLPE"]="DBLPE.npz"
    dataset_dict["3_Periodic"] = "3_Periodic_Brain.npz"
    dataset_dict["5_Periodic"] = "5_Periodic_Brain.npz"
    dataset_dict["sparse_Brain"] = "sparse_Brain.npz"
    dataset_dict["sparse_DBLPE"] = "sparse_DBLPE.npz"
    dataset_dict["hospital"] = "hospital.npz"
    dataset_dict["political_retweet"] = "political_retweet.npz"
    dataset_dict["reality_call"] = "reality_call.npz"
    dataset_dict["DBLPE_importance"] = "DBLPE_importance.npz"
    
    dataset      = np.load(dataset_dict[dataset_name])
    
    Graphs    = torch.LongTensor(dataset['adjs'])    #(n_time, n_node, n_node)
    Graphs=torch.transpose(Graphs,0,1) #(n_node, n_time, n_node)

    now_adj=Graphs[:,0,:].clone()
    for i in range(1,Graphs.shape[1]):  #time_steps
                  now_adj+=Graphs[:,i,:].clone()
    d=torch.sum(now_adj,axis=1)
    non_zero_index=torch.nonzero(d,as_tuple=True)[0]
    Graphs=Graphs[non_zero_index,:,:]
    Graphs=Graphs[:,:,non_zero_index]
    
    no_atts_list = ["DBLPE", "DBLPE_importance", "sparse_DBLPE"]
    fixed_no_atts_list = ["hospital", "political_retweet", "reality_call"]
    if dataset_name in fixed_no_atts_list:
        Labels    = torch.LongTensor(np.argmax(dataset['labels'],axis=1))  #(n_node, num_classes) argmax        
        Features=torch.zeros(Graphs.shape[0], Graphs.shape[1], Graphs.shape[2])
        print("Features", Features.shape)
        for i in range(Features.shape[1]):
            Features[:,i,:]=torch.eye(Features.shape[0],Features.shape[2])    
        Labels=Labels[non_zero_index]
    elif dataset_name in no_atts_list:
        Labels = torch.LongTensor(np.argmax(dataset['labels'],axis=2))  #(n_node, n_time, num_classes) argmax
        Features=torch.zeros(Graphs.shape[0], 1, Graphs.shape[2])
        print("Features", Features.shape)
        for i in range(1):
            Features[:,i,:]=torch.eye(Features.shape[0],Features.shape[2])      
        #   for i in range(Features.shape[1]):
        #       Features[:,i,:]=torch.eye(Features.shape[0],Features.shape[2])
        Labels=Labels[non_zero_index]
        
    else:
        Labels    = torch.LongTensor(np.argmax(dataset['labels'],axis=1))  #(n_node, num_classes) argmax
        Features  = torch.LongTensor(dataset['attmats']) #(n_node, n_time, att_dim)
    
        Features=Features[non_zero_index]
        Labels=Labels[non_zero_index]
    

    
    #shuffle datasets
    number_of_nodes=Graphs.shape[0]
    nodes_id=list(range(number_of_nodes))
    
    random.shuffle(nodes_id)
    idx_train = torch.LongTensor(nodes_id[:(7*number_of_nodes)//10])
    idx_val = torch.LongTensor(nodes_id[(7*number_of_nodes)//10: (9*number_of_nodes)//10])
    idx_test = torch.LongTensor(nodes_id[(9*number_of_nodes)//10: number_of_nodes])
    
    return Features.float(), Graphs.float(), Labels.long(), idx_train, idx_val, idx_test, []

In [None]:
def test_real_dataset():
                  
    file_name=dataset_name+'_'+model_type+".txt"
    # print("filename:", file_name)
    if IN_COLAB==True:
        # summary_file = open("/content/drive/My Drive/"+file_name,"a+")
        summary_file = open("/content/drive/My Drive/RNNGCN_results/"+file_name,"a+")
    else:
        summary_file = open(file_name,"a+")
    t=time.time()
    lambda_matrix=None 
    total_loss=0
    total_acc=0
    total_norm=[]
    loss, acc, specnorm, test_results, SE_w, matrix_w = single_train_and_test(lambda_matrix,Probability_matrix, features, adj, labels, idx_train, idx_val, idx_test, model_type,normalize=args_normalize)
    if type(lambda_matrix)!=type(None):
        summary_file.write("Weight decay: {}".format(lambda_matrix.flatten()) +
                                    "\tTest set results:" +
                                    "\tloss= {:.6f}".format(loss) + 
                                    "\taccuracy= {:.6f}".format(acc)+
                                    "\tspecnorm= {}\n".format(specnorm))
    else:
        summary_file.write("Weight decay: {}".format(0) +
                                    "\tTest set results:" +
                                    "\tloss= {:.6f}".format(loss) + 
                                    "\taccuracy= {:.6f}".format(acc)+
                                    "\tspecnorm= {}\n".format(specnorm))
    
    summary_file.close()
    return test_results, SE_w, matrix_w

### Simulated data

### AH_GC



In [None]:
# import math

# import torch

# from torch.nn.parameter import Parameter
# from torch.nn.modules.module import Module

# class GraphConvolution_AH(Module):
#     """
#     Simple GCN layer, similar to https://arxiv.org/abs/1609.02907
#     """

#     def __init__(self, in_features, out_features, bias=True):
#         super(GraphConvolution_AH, self).__init__()
#         self.in_features = in_features
#         self.out_features = out_features
#         self.weight = Parameter(torch.FloatTensor(in_features, out_features))
#         if bias:
#             self.bias = Parameter(torch.FloatTensor(out_features))
#         else:
#             self.register_parameter('bias', None)
#         self.reset_parameters()

#     def reset_parameters(self):
#         stdv = 1. / math.sqrt(self.weight.size(1))
#         self.weight.data.uniform_(-stdv, stdv)
#         if self.bias is not None:
#             self.bias.data.uniform_(-stdv, stdv)

#     def forward(self, AH):
#         # support = torch.mm(input, self.weight)
#         # output = torch.spmm(adj, support)
#         output = torch.spmm(AH, self.weight)
#         if self.bias is not None:
#             return output + self.bias
#         else:
#             return output

#     def __repr__(self):
#         return self.__class__.__name__ + ' (' \
#                + str(self.in_features) + ' -> ' \
#                + str(self.out_features) + ')'

### Various Baselines

In [None]:
import torch.nn as nn
import torch.nn.functional as F
import torch
import math
from torch.nn.parameter import Parameter
from layers import GraphConvolution

from torch.autograd import Variable
from dgl.nn.pytorch.conv import GraphConv,GATConv,SAGEConv
import dgl

def reset_parameters(self):
        stdv = 1. / math.sqrt(self.weight.size(1))
        self.weight.data.uniform_(-stdv, stdv)

class RNNGCN_1_preNN(nn.Module):
    def __init__(self, nfeat, nhid, nclass, dropout, use_cuda=False):
        super(RNNGCN_1_preNN, self).__init__()

        nfeat_hid = nfeat
        nfeat_out = nclass
        # self.tot_timestep = adj.shape[1]

        self.class_num = nclass
        self.preNN0 = torch.nn.Sequential(nn.Linear(nfeat, nfeat_hid))
        self.preNN1 = torch.nn.Sequential(nn.Linear(nfeat_hid, nfeat_hid))
        self.preNN2 = torch.nn.Sequential(nn.Linear(nfeat_hid, nfeat_out))


        # self.x_rnn = nn.RNN(input_size=nfeat,
        #                     hidden_size=nfeat_hid,
        #                     num_layers=2,
        #                     batch_first=True)

        # self.h_n = None
        self.gc1 = GraphConvolution(nfeat_out, nhid)
        self.gc2 = GraphConvolution(nhid, nclass)
        self.dropout = dropout

        self.adj_Lambda = Parameter(torch.FloatTensor(1))
        self.adj_Lambda.data.uniform_(0.2, 0.2)

        self.x_Lambda = Parameter(torch.FloatTensor(1))
        self.x_Lambda.data.uniform_(0.2, 0.2)

        self.use_cuda=use_cuda

    def forward(self, feats, adj):
        #out=[]
        # x_in = Variable(torch.Tensor().type(torch.FloatTensor))
        x_in = feats.clone()

        if self.use_cuda:
            x_out = torch.zeros(feats.shape[0], feats.shape[1], self.class_num).cuda()
        else:
            x_out = torch.zeros(feats.shape[0], feats.shape[1], self.class_num)

        tot_timestep = adj.shape[1]
        for i in range(tot_timestep):
            # print("x_in_i:", x_in[:, i, :])
            x_out_i = F.relu(self.preNN0(x_in[:, i, :]))
            x_out_i = F.relu(self.preNN1(x_out_i))
            x_out_i = self.preNN2(x_out_i)
            x_out[:, i, :] = x_out_i
        
        
        now_adj = adj[:,0,:].clone()
        now_x = x_out[:,-1,:].clone()

        for i in range(1, tot_timestep):  #time_steps
            now_adj=(1-self.adj_Lambda)*now_adj+self.adj_Lambda*adj[:,i,:]  

        one_out=self.gc1(now_x, now_adj)
        one_out=F.relu(one_out)

        one_out = F.dropout(one_out, self.dropout)
        one_out = self.gc2(one_out, now_adj)

        return F.log_softmax(one_out, dim=1)


class RNNGCN_2_preNN(nn.Module):
    def __init__(self, nfeat, nhid, nclass, dropout, use_cuda=False):
        super(RNNGCN_2_preNN, self).__init__()

        nfeat_hid = nfeat
        nfeat_out = nclass
        # self.tot_timestep = adj.shape[1]

        self.preNN0 = torch.nn.Sequential(nn.Linear(nfeat, nfeat_hid))
        self.preNN1 = torch.nn.Sequential(nn.Linear(nfeat_hid, nfeat_hid))
        self.preNN2 = torch.nn.Sequential(nn.Linear(nfeat_hid, nfeat_out))


        # self.x_rnn = nn.RNN(input_size=nfeat,
        #                     hidden_size=nfeat_hid,
        #                     num_layers=2,
        #                     batch_first=True)

        # self.h_n = None
        self.gc1 = GraphConvolution(nfeat_out, nhid)
        self.gc2 = GraphConvolution(nhid, nclass)
        self.dropout = dropout

        self.adj_Lambda = Parameter(torch.FloatTensor(1))
        self.adj_Lambda.data.uniform_(0.2, 0.2)

        self.x_Lambda = Parameter(torch.FloatTensor(1))
        self.x_Lambda.data.uniform_(0.2, 0.2)

        self.use_cuda=use_cuda

    def forward(self, feats, adj):
        #out=[]
        # x_in = Variable(torch.Tensor().type(torch.FloatTensor))
        x_in = feats.clone()

        if self.use_cuda:
            x_out = torch.zeros(np.shape(feats)).cuda()
        else:
            x_out = torch.zeros(np.shape(feats))

        tot_timestep = adj.shape[1]
        for i in range(tot_timestep):
            # print("x_in_i:", x_in[:, i, :])
            x_out_i = F.relu(self.preNN0(x_in[:, i, :]))
            x_out_i = F.relu(self.preNN1(x_out_i))
            x_out_i = self.preNN2(x_out_i)
            x_out[:, i, :] = x_out_i
        
        
        now_adj = adj[:,0,:].clone()
        now_x = x_out[:,0,:].clone()

        for i in range(1, tot_timestep):  #time_steps
            now_x = (1-self.x_Lambda)*now_x+self.x_Lambda*x_out[:, i, :]
            
        # separate two lambdas
        # loss: sum up all time steps
        # learn history bases
            now_adj=(1-self.adj_Lambda)*now_adj+self.adj_Lambda*adj[:,i,:]  #weight decay

        one_out=self.gc1(now_x, now_adj)
        one_out=F.relu(one_out)

        one_out = F.dropout(one_out, self.dropout)
        one_out = self.gc2(one_out, now_adj)

        return F.log_softmax(one_out, dim=1)

class RNNGCN_2(nn.Module):
    def __init__(self, nfeat, nhid, nclass, dropout):
        super(RNNGCN_2, self).__init__()

        nfeat_hid = nfeat
        nfeat_out = nfeat

        self.h_n = None
        self.gc1 = GraphConvolution(nfeat, nhid)
        self.gc2 = GraphConvolution(nhid, nclass)
        self.dropout = dropout

        self.adj_Lambda = Parameter(torch.FloatTensor(1))
        self.adj_Lambda.data.uniform_(0.2, 0.2)

        self.x_Lambda = Parameter(torch.FloatTensor(1))
        self.x_Lambda.data.uniform_(0.2, 0.2)
        
    def forward(self, feats, adj):
        #out=[]
        # x_in = Variable(torch.Tensor().type(torch.FloatTensor))
        x_out = feats.clone()

        tot_timestep = adj.shape[1]
        now_adj = adj[:,0,:].clone()
        now_x = x_out[:,0,:].clone()

        for i in range(1, tot_timestep):  #time_steps
            now_x = (1-self.x_Lambda)*now_x+self.x_Lambda*x_out[:, i, :]
        # separate two lambdas
        # loss: sum up all time steps
        # learn history bases
            now_adj=(1-self.adj_Lambda)*now_adj+self.adj_Lambda*adj[:,i,:]  #weight decay

        print("now_x: ", now_x.shape)
        print("now_adj:", now_adj.shape)
        one_out=self.gc1(now_x, now_adj)
        one_out=F.relu(one_out)

        one_out = F.dropout(one_out, self.dropout)
        one_out = self.gc2(one_out, now_adj)

        return F.log_softmax(one_out, dim=1)

class RNNGCN_RNN(nn.Module):
    def __init__(self, nfeat, nhid, nclass, dropout):
        super(RNNGCN_RNN, self).__init__()

        self.gc1 = GraphConvolution(nfeat, nhid)
        self.gc2 = GraphConvolution(nhid, nclass)
        self.dropout = dropout

        self.Lambda = Parameter(torch.FloatTensor(1))
        self.Lambda.data.uniform_(0.2, 0.2)
        self.x_rnn = nn.RNN(input_size=nfeat,
                    hidden_size=nfeat_hid,
                    num_layers=2,
                    batch_first=True)
        
        
    def forward(self, x, adj):
        #out=[]
        x_out, h_n = self.x_rnn(x, h_n)
        now_adj=adj[:,0,:].clone()
        for i in range(1,adj.shape[1]):  #time_steps
            now_adj=(1-self.Lambda)*now_adj+self.Lambda*adj[:,i,:]  #weight decay
        one_out=self.gc1(x[:,-1,:],now_adj)
        one_out=F.relu(one_out)

        one_out = F.dropout(one_out, self.dropout)
        one_out = self.gc2(one_out,now_adj)

        return F.log_softmax(one_out, dim=1)

# class RNNGCN_LL(nn.Module):
#     def __init__(self, nfeat, nhid, nclass, dropout, tot_timestep, use_cuda=False):
#         super(RNNGCN_LL, self).__init__()

#         self.tot_timestep = tot_timestep

#         self.LL = Parameter(torch.FloatTensor(self.tot_timestep))
#         self.LL.data.uniform_(0.2, 0.2)
        
#         self.gc1 = GraphConvolution(nfeat, nhid)
#         self.gc2 = GraphConvolution(nhid, nclass)
#         self.dropout = dropout

#         self.use_cuda=use_cuda

#     def forward(self, feats, adj):
#         #out=[]
#         # x_in = Variable(torch.Tensor().type(torch.FloatTensor))

#         tot_timestep = adj.shape[1]
#         assert tot_timestep == self.tot_timestep, "timestep not consistent"
        
#         # now_adj = adj[:,0,:].clone()

#         now_adj = torch.zeros_like(adj[:,0,:])
#         now_x = feats[:,-1,:].clone()

#         for i in range(0, tot_timestep):  #time_steps
#             now_adj += self.LL[i]*adj[:,i,:]  # weighted adj matrix

#         # print("now_x: ", now_x.shape)
#         # print("now_adj:", now_adj.shape)
#         one_out=self.gc1(now_x, now_adj)
#         one_out=F.relu(one_out)

#         one_out = F.dropout(one_out, self.dropout)
#         one_out = self.gc2(one_out, now_adj)

#         return F.log_softmax(one_out, dim=1)


class original_RNNGCN(nn.Module):
    def __init__(self, nfeat, nhid, nclass, dropout):
        super(original_RNNGCN, self).__init__()

        self.gc1 = GraphConvolution(nfeat, nhid)
        self.gc2 = GraphConvolution(nhid, nclass)
        self.dropout = dropout

        self.Lambda = Parameter(torch.FloatTensor(1))
        self.Lambda.data.uniform_(0.2, 0.2)
        
        
    def forward(self, x, adj):
        #out=[]
        now_adj=adj[:,0,:].clone()
        for i in range(1,adj.shape[1]):  #time_steps
            now_adj=(1-self.Lambda)*now_adj+self.Lambda*adj[:,i,:]  #weight decay
        
        one_out=self.gc1(x[:,-1,:],now_adj)
        one_out=F.relu(one_out)

        one_out = F.dropout(one_out, self.dropout)
        one_out = self.gc2(one_out,now_adj)

        return F.log_softmax(one_out, dim=1)

# class new_TRNNGCN(nn.Module):
#     def __init__(self, nfeat, nhid, nclass, dropout,nnode,use_cuda=False):
#         super(new_TRNNGCN, self).__init__()

#         self.gc1 = GraphConvolution_AH(nfeat, nhid)
#         self.gc2 = GraphConvolution(nhid, nclass)
        
#         self.dropout = dropout
#         self.Lambda = Parameter(torch.FloatTensor(nclass,nclass))
#         self.Lambda.data.uniform_(0.5, 0.5)
#         self.use_cuda=use_cuda
        
#         y=torch.randint(0,nclass,(nnode,1)).flatten()
        
#         if self.use_cuda:
#             self.H = torch.zeros(nnode, nclass).cuda()
#         else:
#             self.H = torch.zeros(nnode, nclass)
#         self.H[range(self.H.shape[0]), y]=1
  
        
#     def forward(self, x, adj):

#         w=self.Lambda.data
#         w=w.clamp(0,1)
#         self.Lambda.data=w
#         if self.use_cuda:
#             decay_adj=torch.mm(torch.mm(self.H,self.Lambda),self.H.T).cuda()
#         else:
#             decay_adj=torch.mm(torch.mm(self.H,self.Lambda),self.H.T)
#         # -- H is theta

#         tot_timestep = adj.shape[1]
#         now_adj = adj[:,0,:].clone()
#         now_x = x[:,0,:].clone()
#         now_AH = torch.spmm(now_adj, now_x)

#         # print("now_AH: ", now_AH.shape)
#         # print("decay_adj: ", (1-decay_adj).shape)
#         for i in range(1, tot_timestep):  #time_steps
#             next_AH = torch.spmm(adj[:, i, :], x[:, i, :])
#             now_AH = torch.spmm((1-decay_adj), now_AH) + torch.spmm(decay_adj, next_AH)
#             # now_adj=(1-self.Lambda)*now_adj+self.Lambda*adj[:,i,:]  #weight decay

#             now_adj=(1-decay_adj)*now_adj+decay_adj*adj[:,i,:]       
#         # now_adj=adj[:,0,:].clone()#torch.zeros(adj.shape[0], adj.shape[2])
#         # for i in range(1,adj.shape[1]):  #time_steps
#         #     now_adj=(1-decay_adj)*now_adj+decay_adj*adj[:,i,:]
#         # del decay_adj


#         # one_out=F.relu(self.gc1(x[:,-1,:],now_adj))
#         one_out=F.relu(self.gc1(now_AH))

#         one_out = F.dropout(one_out, self.dropout)
#         one_out = self.gc2(one_out,now_adj)
#         # output=F.log_softmax(one_out, dim=1)
#         output=F.softmax(one_out, dim=1)

#         del self.H
#         self.H = output.clone().detach()
        
#         del now_adj
#         return output


class original_TRNNGCN(nn.Module):
    def __init__(self, nfeat, nhid, nclass, dropout,nnode,use_cuda=False):
        super(original_TRNNGCN, self).__init__()

        self.gc1 = GraphConvolution(nfeat, nhid)
        self.gc2 = GraphConvolution(nhid, nclass)
        self.dropout = dropout
        self.Lambda = Parameter(torch.FloatTensor(nclass,nclass))
        self.Lambda.data.uniform_(0.5, 0.5)
        self.use_cuda=use_cuda
        
        y=torch.randint(0,nclass,(nnode,1)).flatten()
        
        if self.use_cuda:
            self.H = torch.zeros(nnode, nclass).cuda()
        else:
            self.H = torch.zeros(nnode, nclass)
        self.H[range(self.H.shape[0]), y]=1
  
        
    def forward(self, x, adj):

        w=self.Lambda.data
        w=w.clamp(0,1)
        self.Lambda.data=w
        
        if self.use_cuda:
            decay_adj=torch.mm(torch.mm(self.H,self.Lambda),self.H.T).cuda()
        else:
            decay_adj=torch.mm(torch.mm(self.H,self.Lambda),self.H.T)
        
        now_adj=adj[:,0,:].clone()#torch.zeros(adj.shape[0], adj.shape[2])
       
        for i in range(1,adj.shape[1]):  #time_steps
            now_adj=(1-decay_adj)*now_adj+decay_adj*adj[:,i,:]
        del decay_adj
        one_out=F.relu(self.gc1(x[:,-1,:],now_adj))

        one_out = F.dropout(one_out, self.dropout)
        one_out = self.gc2(one_out,now_adj)
        output=F.log_softmax(one_out, dim=1)
        y=torch.argmax(output,dim=1)
        H_shape=self.H.shape
        del self.H
        del now_adj
        if self.use_cuda:
            self.H = torch.zeros(H_shape).cuda()
        else:
            self.H = torch.zeros(H_shape)
        self.H[range(H_shape[0]), y]=1
        return output
        
class TRNNGCN_LL_exp(nn.Module):
    def __init__(self, nfeat, nhid, nclass, dropout, nnode, tot_timestep, use_cuda=False):
        super(TRNNGCN_LL_exp, self).__init__()
        self.nclass = nclass
        self.nnode = nnode
        self.gc1 = GraphConvolution(nfeat, nhid)
        self.gc2 = GraphConvolution(nhid, nclass)
        self.dropout = dropout
        self.Lambda = Parameter(torch.FloatTensor(nclass,nclass))
        self.Lambda.data.uniform_(0.5, 0.5)

        self.LL = Parameter(torch.FloatTensor(tot_timestep))
        self.LL.data.uniform_()        
        self.tot_timestep = tot_timestep

        self.use_cuda=use_cuda
        
        y=torch.randint(0,nclass,(nnode,1)).flatten()
        
        # theta is membership matrix
        if self.use_cuda:
            self.theta = torch.zeros(nnode, nclass).cuda()
        else:
            self.theta = torch.zeros(nnode, nclass)
        self.theta[range(self.theta.shape[0]), y]=1
    def forward(self, x, adj):
        # w=self.Lambda.data
        # w=w.clamp(0,1)
        # self.Lambda.data=w
        if self.use_cuda:
            lambda_matrix = torch.mm(torch.mm(self.theta,self.Lambda),self.theta.T).cuda()
        else:
            lambda_matrix = torch.mm(torch.mm(self.theta,self.Lambda),self.theta.T)
        
        now_adj = torch.zeros_like(adj[:,0,:])
        # now_x = feats[:,-1,:].clone()

        mem_reach = self.tot_timestep

        self.LL_index = torch.topk(self.LL, k=mem_reach)[1]
        # print("index: ", LL_index)
        mem_sum = 0
        
        # print("lambda_matrix:", lambda_matrix.shape)
        for i in range(0, mem_reach):
            mem_sum += torch.matrix_power(lambda_matrix + torch.eye(self.nnode).cuda(), i)
        
        
        mem_sum = mem_sum.cuda()
        self.LL_softmax = []

        for i in range(0, mem_reach):  #time_steps
            LL_exp = torch.matrix_power(lambda_matrix + torch.eye(self.nnode).cuda(), mem_reach-i) / mem_sum
            self.LL_softmax.append(LL_exp)
            LL_i = self.LL_index[i]
            now_adj += LL_exp * adj[:,LL_i,:]  # weighted adj matrix

        # now_adj=adj[:,0,:].clone()#torch.zeros(adj.shape[0], adj.shape[2])
        # for i in range(1,adj.shape[1]):  #time_steps
        #     now_adj=(1-decay_adj)*now_adj+decay_adj*adj[:,i,:]
        # del decay_adj
        one_out=F.relu(self.gc1(x[:,-1,:],now_adj))

        one_out = F.dropout(one_out, self.dropout)
        one_out = self.gc2(one_out,now_adj)
        output=F.log_softmax(one_out, dim=1)
        y=torch.argmax(output,dim=1)
        theta_shape=self.theta.shape
        del self.theta
        del now_adj
        if self.use_cuda:
            self.theta = torch.zeros(theta_shape).cuda()
        else:
            self.theta = torch.zeros(theta_shape)
        self.theta[range(theta_shape[0]), y]=1
        return output 


### RNNGCN_SE

In [None]:
class RNNGCN_SE(nn.Module):
    def __init__(self, nfeat, nhid, nclass, dropout, tot_timestep, node_num, use_cuda=False):
        super(RNNGCN_SE, self).__init__()
        self.tot_timestep = tot_timestep
        self.node_num = node_num
        self.nhid = nhid
        self.class_num = nclass

        self.dropout = dropout
        self.use_cuda=use_cuda
        self.C = self.tot_timestep # numbers of channel
        self.r = 0.5
        self.attention_weight = None
        self.hid_C = int(self.C*self.r) + 1

        print("nhid:", nhid)
        print("nfeat:", nfeat)
        print("class_num = %d, node_num = %d" % (nclass, node_num))
        self.gc1 = GraphConvolution(nfeat, nhid)
        self.gc2 = GraphConvolution(nhid, nhid)
        # self.global_pooling = nn.AvgPool2d(self.node_num)
        self.W1 = nn.Linear(self.C, self.hid_C)
        self.W2 = nn.Linear(self.hid_C, self.C)
        self.last_linear = nn.Linear(nhid, nhid)

        # self.Lambda = Parameter(torch.FloatTensor(1))
        # self.Lambda.data.uniform_()        
        # self.Lambda = Parameter(torch.FloatTensor(1))
        # torch.nn.init.normal_(self.Lambda.data, mean=0.0, std=1.0)
        # print("lambda:", self.Lambda.data)

    def forward(self, feats, adj):
        #out=[]
        # x_in = Variable(torch.Tensor().type(torch.FloatTensor))
        tot_timestep = adj.shape[1]

        if "DBLPE" in dataset_name:
            # print("feats:", feats.shape, "adj:", adj.shape) 
            # feats = feats[:, :tot_timestep, :]  
            assert feats.shape[1] == 1, "feats shape is not 1, instead %d" % feats.shape[1]
        else:
            assert feats.shape[1] == adj.shape[1], "feats and adj has different timestep"
        
        assert tot_timestep == self.tot_timestep, "timestep not consistent, %d, %d" % (tot_timestep, self.tot_timestep)
     
        mem_reach = self.tot_timestep

        # Consider timesteps as channels:
        list_u = torch.zeros((self.tot_timestep, self.node_num, self.nhid))
        list_c = torch.zeros(self.tot_timestep)

        for i in range(0, mem_reach):  #time_steps
            now_adj = adj[:,i,:]
            fixed_no_atts_list = ["hospital", "reality_call", "retweet"]
            if "DBLPE" in dataset_name:
                now_x = feats[:, -1, :] # identity matrix
            elif dataset_name in fixed_no_atts_list:
                now_x = feats[:, -1, :] # identity matrix
            else:
                now_x = feats[:,i,:]
                
            one_out=self.gc1(now_x, now_adj)
            one_out = F.relu(one_out)
            one_out = F.dropout(one_out, self.dropout)
            one_out = self.gc2(one_out, now_adj)
            # squeezing:
            list_u[i, ...] = one_out
            # average pooling
            pooling_out = torch.mean(one_out)

            list_c[i] = pooling_out
            # del now_x, now_adj
            # one_out: node_num X tot_timestep X node_num

        tensor_u = list_u.cuda()
        tensor_c = list_c.cuda()

        # print("tensor_u: ", tensor_u.shape)
        # print("tensor_c: ", tensor_c.shape)

        # exitation
        c_weight = F.relu(self.W1(tensor_c))
        c_weight = torch.sigmoid(self.W2(c_weight)) # or softmax
        self.c_weight = c_weight

        f_weight = c_weight.clone()
        
        # with_decay = 1
        
        # if with_decay:
        #     w=self.Lambda.data
        #     w=w.clamp(0.4, 0.99)
        #     self.Lambda.data=w
            
        #     for i in range(mem_reach):
        #         f_weight[i] = torch.mul(torch.pow(self.Lambda, mem_reach-i), c_weight[i])
        
        f_weight = F.softmax(f_weight, dim=0)

        out = torch.zeros_like(tensor_u[0])
        for i in range(mem_reach):
            out += torch.mul(f_weight[i], tensor_u[i])
        # out = torch.mul(c_weight, tensor_u)

        self.attention_weight = f_weight
        out = self.last_linear(out)

        del tensor_u
        del tensor_c
        del list_u
        del list_c

        torch.cuda.empty_cache()

        return F.log_softmax(out, dim=1)

### RNNGCN_SE_back

In [None]:
# Not considering the dynamic attributes;

class RNNGCN_SE_back(nn.Module):
    def __init__(self, nfeat, nhid, nclass, dropout, tot_timestep, node_num, use_cuda=False):
        super(RNNGCN_SE_back, self).__init__()
        self.tot_timestep = tot_timestep
        self.node_num = node_num
        self.nhid = nhid
        self.class_num = nclass

        self.dropout = dropout
        self.use_cuda=use_cuda
        self.C = self.tot_timestep # numbers of channel
        self.r = 0.5
        self.attention_weight = None
        self.hid_C = int(self.C*self.r) + 1

        print("nhid:", nhid)
        print("nfeat:", nfeat)
        print("class_num = %d, node_num = %d" % (nclass, node_num))
        self.gc1 = GraphConvolution(nfeat, nhid)
        self.gc2 = GraphConvolution(nhid, nhid)

        self.gc3 = GraphConvolution(nfeat, nhid)
        self.gc4 = GraphConvolution(nhid, nhid)        
        # self.global_pooling = nn.AvgPool2d(self.node_num)
        self.W1 = nn.Linear(self.C, self.hid_C)
        self.W2 = nn.Linear(self.hid_C, self.C)
        self.last_linear = nn.Linear(nhid, class_num)

        # self.Lambda = Parameter(torch.FloatTensor(1))
        # self.Lambda.data.uniform_()        
        # self.Lambda = Parameter(torch.FloatTensor(1))
        # torch.nn.init.normal_(self.Lambda.data, mean=0.0, std=1.0)
        # print("lambda:", self.Lambda.data)

    def forward(self, feats, adj):
        tot_timestep = adj.shape[1]

        # if "DBLPE" in dataset_name:
        #     # print("feats:", feats.shape, "adj:", adj.shape) 
        #     # feats = feats[:, :tot_timestep, :]  
        #     assert feats.shape[1] == 1, "feats shape is not 1, instead %d" % feats.shape[1]
        # else:
        #     assert feats.shape[1] == adj.shape[1], "feats and adj has different timestep"
        
        # assert tot_timestep == self.tot_timestep, "timestep not consistent, %d, %d" % (tot_timestep, self.tot_timestep)
     
        mem_reach = self.tot_timestep

        # Consider timesteps as channels:
        list_u = torch.zeros((self.tot_timestep, self.node_num, self.nhid))
        list_c = torch.zeros(self.tot_timestep)

        for i in range(0, mem_reach):  #time_steps
            now_adj = adj[:,i,:]

            # Normal one:
            # if "DBLPE" in dataset_name:
            #     now_x = feats[:, -1, :] # identity matrix
            # else:
            #     now_x = feats[:,i,:]

            # For evaluation (with GCN):
            now_x = feats[:, -1, :]

            one_out=self.gc1(now_x, now_adj)
            one_out = F.relu(one_out)
            one_out = F.dropout(one_out, self.dropout)
            one_out = self.gc2(one_out, now_adj)
            # squeezing:
            list_u[i, ...] = one_out
            # average pooling
            pooling_out = torch.mean(one_out)
            list_c[i] = pooling_out
            # del now_x, now_adj
            # one_out: node_num X tot_timestep X node_num

        tensor_u = list_u.cuda()
        tensor_c = list_c.cuda()

        # print("tensor_u: ", tensor_u.shape)
        # print("tensor_c: ", tensor_c.shape)

        # exitation
        c_weight = F.relu(self.W1(tensor_c))
        c_weight = torch.sigmoid(self.W2(c_weight)) # or softmax
        self.c_weight = c_weight

        f_weight = c_weight.clone()
          
        f_weight = F.softmax(f_weight, dim=0)

        adj_out = torch.zeros_like(adj[:,0,:]).cuda()
        for i in range(mem_reach):        
            adj_out += torch.mul(f_weight[i], adj[:,i,:])
        
        # x_out = torch.zeros_like(feats[:,0,:]).cuda()
        # for i in range(mem_reach):        
        #     x_out += torch.mul(f_weight[i], feats[:,i,:])            
        
        re_adj = adj_out
        re_x = feats[:, -1, :]
        # re_x = x_out

        re_out = self.gc3(re_x, re_adj)
        # re_out = self.gc1(re_x, re_adj)
        re_out = F.relu(re_out)
        re_out = F.dropout(re_out, self.dropout)
        re_out = self.gc4(re_out, re_adj)
        # re_out = self.gc2(re_out, re_adj)        
        
        re_out = self.last_linear(re_out)
        
        self.attention_weight = f_weight
        del tensor_u
        del tensor_c
        del list_u
        del list_c

        torch.cuda.empty_cache()

        return F.log_softmax(re_out, dim=1)

In [None]:
# # Considering an abstract of both atts and adjs, but with the same set of weight;

# class RNNGCN_SE_back(nn.Module):
#     def __init__(self, nfeat, nhid, nclass, dropout, tot_timestep, node_num, use_cuda=False):
#         super(RNNGCN_SE_back, self).__init__()
#         self.tot_timestep = tot_timestep
#         self.node_num = node_num
#         self.nhid = nhid
#         self.class_num = nclass

#         self.dropout = dropout
#         self.use_cuda=use_cuda
#         self.C = self.tot_timestep # numbers of channel
#         self.r = 0.5
#         self.attention_weight = None
#         self.hid_C = int(self.C*self.r) + 1

#         print("nhid:", nhid)
#         print("nfeat:", nfeat)
#         print("class_num = %d, node_num = %d" % (nclass, node_num))
#         self.gc1 = GraphConvolution(nfeat, nhid)
#         self.gc2 = GraphConvolution(nhid, nhid)

#         self.gc3 = GraphConvolution(nfeat, nhid)
#         self.gc4 = GraphConvolution(nhid, nhid)        
#         # self.global_pooling = nn.AvgPool2d(self.node_num)
#         self.W1 = nn.Linear(self.C, self.hid_C)
#         self.W2 = nn.Linear(self.hid_C, self.C)
#         self.last_linear = nn.Linear(nhid, class_num)

#         # self.Lambda = Parameter(torch.FloatTensor(1))
#         # self.Lambda.data.uniform_()        
#         # self.Lambda = Parameter(torch.FloatTensor(1))
#         # torch.nn.init.normal_(self.Lambda.data, mean=0.0, std=1.0)
#         # print("lambda:", self.Lambda.data)

#     def forward(self, feats, adj):
#         tot_timestep = adj.shape[1]

#         # if "DBLPE" in dataset_name:
#         #     # print("feats:", feats.shape, "adj:", adj.shape) 
#         #     # feats = feats[:, :tot_timestep, :]  
#         #     assert feats.shape[1] == 1, "feats shape is not 1, instead %d" % feats.shape[1]
#         # else:
#         #     assert feats.shape[1] == adj.shape[1], "feats and adj has different timestep"
        
#         # assert tot_timestep == self.tot_timestep, "timestep not consistent, %d, %d" % (tot_timestep, self.tot_timestep)
     
#         mem_reach = self.tot_timestep

#         # Consider timesteps as channels:
#         list_u = torch.zeros((self.tot_timestep, self.node_num, self.nhid))
#         list_c = torch.zeros(self.tot_timestep)

#         for i in range(0, mem_reach):  #time_steps
#             now_adj = adj[:,i,:]
#             if "DBLPE" in dataset_name:
#                 now_x = feats[:, -1, :] # identity matrix
#             else:
#                 now_x = feats[:,i,:]
#             one_out=self.gc1(now_x, now_adj)
#             one_out = F.relu(one_out)
#             one_out = F.dropout(one_out, self.dropout)
#             one_out = self.gc2(one_out, now_adj)
#             # squeezing:
#             list_u[i, ...] = one_out
#             # average pooling
#             pooling_out = torch.mean(one_out)
#             list_c[i] = pooling_out
#             # del now_x, now_adj
#             # one_out: node_num X tot_timestep X node_num

#         tensor_u = list_u.cuda()
#         tensor_c = list_c.cuda()

#         # print("tensor_u: ", tensor_u.shape)
#         # print("tensor_c: ", tensor_c.shape)

#         # exitation
#         c_weight = F.relu(self.W1(tensor_c))
#         c_weight = torch.sigmoid(self.W2(c_weight)) # or softmax
#         self.c_weight = c_weight

#         f_weight = c_weight.clone()
          
#         f_weight = F.softmax(f_weight, dim=0)

#         adj_out = torch.zeros_like(adj[:,0,:]).cuda()
#         for i in range(mem_reach):        
#             adj_out += torch.mul(f_weight[i], adj[:,i,:])
        
#         x_out = torch.zeros_like(feats[:,0,:]).cuda()
#         for i in range(mem_reach):        
#             x_out += torch.mul(f_weight[i], feats[:,i,:])            
#         # out = torch.mul(c_weight, tensor_u)
        
#         re_adj = adj_out
#         # re_x = feats[:, -1, :]
#         re_x = x_out

#         re_out = self.gc3(re_x, re_adj)
#         # re_out = self.gc1(re_x, re_adj)
#         re_out = F.relu(re_out)
#         re_out = F.dropout(re_out, self.dropout)
#         re_out = self.gc4(re_out, re_adj)
#         # re_out = self.gc2(re_out, re_adj)        
        
#         re_out = self.last_linear(re_out)
        
#         self.attention_weight = f_weight
#         del tensor_u
#         del tensor_c
#         del list_u
#         del list_c

#         torch.cuda.empty_cache()

#         return F.log_softmax(re_out, dim=1)

### Transformer

In [None]:
# class RNNGCN_SE_trans(nn.Module):
#     def __init__(self, nfeat, nhid, nclass, dropout, tot_timestep, node_num, use_cuda=False):
#         super(RNNGCN_SE_trans, self).__init__()
#         self.tot_timestep = tot_timestep
#         self.node_num = node_num
#         self.nhid = nhid
#         self.class_num = nclass

#         self.dropout = dropout
#         self.use_cuda=use_cuda
#         self.C = self.tot_timestep # numbers of channel
#         self.r = 0.5
#         self.attention_weight = None
#         self.hid_C = int(self.C*self.r) + 1

#         print("nhid:", nhid)
#         print("nfeat:", nfeat)
#         print("class_num = %d, node_num = %d" % (nclass, node_num))
#         self.gc1 = GraphConvolution(nfeat, nhid)
#         self.gc2 = GraphConvolution(nhid, nhid)

#         self.gc3 = GraphConvolution(nfeat, nhid)
#         self.gc4 = GraphConvolution(nhid, nhid)        
#         # self.global_pooling = nn.AvgPool2d(self.node_num)
#         self.W1 = nn.Linear(self.C, self.hid_C)
#         self.W2 = nn.Linear(self.hid_C, self.C)
#         self.last_linear = nn.Linear(nhid, class_num)

#         # self.Lambda = Parameter(torch.FloatTensor(1))
#         # self.Lambda.data.uniform_()        
#         # self.Lambda = Parameter(torch.FloatTensor(1))
#         # torch.nn.init.normal_(self.Lambda.data, mean=0.0, std=1.0)
#         # print("lambda:", self.Lambda.data)

#     def forward(self, feats, adj):
#         tot_timestep = adj.shape[1]

#         # if "DBLPE" in dataset_name:
#         #     # print("feats:", feats.shape, "adj:", adj.shape) 
#         #     # feats = feats[:, :tot_timestep, :]  
#         #     assert feats.shape[1] == 1, "feats shape is not 1, instead %d" % feats.shape[1]
#         # else:
#         #     assert feats.shape[1] == adj.shape[1], "feats and adj has different timestep"
        
#         # assert tot_timestep == self.tot_timestep, "timestep not consistent, %d, %d" % (tot_timestep, self.tot_timestep)
     
#         mem_reach = self.tot_timestep

#         # Consider timesteps as channels:
#         list_u = torch.zeros((self.tot_timestep, self.node_num, self.nhid))
#         list_c = torch.zeros(self.tot_timestep)

#         for i in range(0, mem_reach):  #time_steps
#             now_adj = adj[:,i,:]
#             if "DBLPE" in dataset_name:
#                 now_x = feats[:, -1, :] # identity matrix
#             else:
#                 now_x = feats[:,i,:]
#             one_out=self.gc1(now_x, now_adj)
#             one_out = F.relu(one_out)
#             one_out = F.dropout(one_out, self.dropout)
#             one_out = self.gc2(one_out, now_adj)
#             # squeezing:
#             list_u[i, ...] = one_out
#             # average pooling
#             pooling_out = torch.mean(one_out)
#             list_c[i] = pooling_out
#             # del now_x, now_adj
#             # one_out: node_num X tot_timestep X node_num

#         tensor_u = list_u.cuda()
#         tensor_c = list_c.cuda()

#         # print("tensor_u: ", tensor_u.shape)
#         # print("tensor_c: ", tensor_c.shape)

#         # exitation
#         c_weight = F.relu(self.W1(tensor_c))
#         c_weight = torch.sigmoid(self.W2(c_weight)) # or softmax
#         self.c_weight = c_weight

#         f_weight = c_weight.clone()
          
#         f_weight = F.softmax(f_weight, dim=0)

#         adj_out = torch.zeros_like(adj[:,0,:]).cuda()
#         for i in range(mem_reach):        
#             adj_out += torch.mul(f_weight[i], adj[:,i,:])
        
#         x_out = torch.zeros_like(feats[:,0,:]).cuda()
#         for i in range(mem_reach):        
#             x_out += torch.mul(f_weight[i], feats[:,i,:])            
#         # out = torch.mul(c_weight, tensor_u)
        
#         re_adj = adj_out
#         # re_x = feats[:, -1, :]
#         re_x = x_out

#         re_out = self.gc3(re_x, re_adj)
#         # re_out = self.gc1(re_x, re_adj)
#         re_out = F.relu(re_out)
#         re_out = F.dropout(re_out, self.dropout)
#         re_out = self.gc4(re_out, re_adj)
#         # re_out = self.gc2(re_out, re_adj)        
        
#         re_out = self.last_linear(re_out)
        
#         self.attention_weight = f_weight
#         del tensor_u
#         del tensor_c
#         del list_u
#         del list_c

#         torch.cuda.empty_cache()

#         return F.log_softmax(re_out, dim=1)

### RNNGCN_SE_2ws

In [None]:
# Considering an abstract of both atts and adjs, but with the same set of weight;

class RNNGCN_SE_2ws(nn.Module):
    def __init__(self, nfeat, nhid, nclass, dropout, tot_timestep, node_num, use_cuda=False):
        super(RNNGCN_SE_2ws, self).__init__()
        self.tot_timestep = tot_timestep
        self.node_num = node_num
        self.nhid = nhid
        self.class_num = nclass

        self.dropout = dropout
        self.use_cuda=use_cuda
        self.C = self.tot_timestep # numbers of channel
        self.r = 0.5
        self.attention_weight = None
        self.channel_adj = None
        self.channel_x = None
        self.hid_C = int(self.C*self.r) + 1

        print("nhid:", nhid)
        print("nfeat:", nfeat)
        print("class_num = %d, node_num = %d" % (nclass, node_num))
        self.gc1 = GraphConvolution(nfeat, nhid)
        self.gc2 = GraphConvolution(nhid, nhid)

        self.gc3 = GraphConvolution(nfeat, nhid)
        self.gc4 = GraphConvolution(nhid, nhid)        
        # self.global_pooling = nn.AvgPool2d(self.node_num)
        self.W1_adj = nn.Linear(self.C, self.hid_C)
        self.W2_adj = nn.Linear(self.hid_C, self.C)
        
        self.W1_x = nn.Linear(self.C, self.hid_C)
        self.W2_x = nn.Linear(self.hid_C, self.C)

        self.last_linear = nn.Linear(nhid, class_num)

        # self.Lambda = Parameter(torch.FloatTensor(1))
        # self.Lambda.data.uniform_()        
        # self.Lambda = Parameter(torch.FloatTensor(1))
        # torch.nn.init.normal_(self.Lambda.data, mean=0.0, std=1.0)
        # print("lambda:", self.Lambda.data)

    def forward(self, feats, adj):
        tot_timestep = adj.shape[1]

        # if "DBLPE" in dataset_name:
        #     # print("feats:", feats.shape, "adj:", adj.shape) 
        #     # feats = feats[:, :tot_timestep, :]  
        #     assert feats.shape[1] == 1, "feats shape is not 1, instead %d" % feats.shape[1]
        # else:
        #     assert feats.shape[1] == adj.shape[1], "feats and adj has different timestep"
        
        # assert tot_timestep == self.tot_timestep, "timestep not consistent, %d, %d" % (tot_timestep, self.tot_timestep)
     
        mem_reach = self.tot_timestep

        # Consider timesteps as channels:
        # list_u = torch.zeros((self.tot_timestep, self.node_num, self.nhid))
        list_c = torch.zeros(self.tot_timestep)

        for i in range(0, mem_reach):  #time_steps
            now_adj = adj[:,i,:]
            fixed_no_atts_list = ["hospital", "reality_call", "political_retweet"]
            if "DBLPE" in dataset_name:
                now_x = feats[:, -1, :] # identity matrix
            elif dataset_name in fixed_no_atts_list:
                now_x = feats[:, -1, :] # identity matrix
            else:
                now_x = feats[:,i,:]
            one_out=self.gc1(now_x, now_adj)
            one_out = F.relu(one_out)
            one_out = F.dropout(one_out, self.dropout)
            one_out = self.gc2(one_out, now_adj)
        
            # list_u[i, ...] = one_out
            # average pooling
            pooling_out = torch.mean(one_out)
            list_c[i] = pooling_out
            # del now_x, now_adj
            # one_out: node_num X tot_timestep X node_num

        # tensor_u = list_u.cuda()
        tensor_c = list_c.cuda()

        # print("tensor_u: ", tensor_u.shape)
        # print("tensor_c: ", tensor_c.shape)

        # exitation
        c_weight_adj = F.relu(self.W1_adj(tensor_c))
        c_weight_adj = torch.sigmoid(self.W2_adj(c_weight_adj)) # or softmax
        f_weight_adj = c_weight_adj.clone()
        channel_adj = F.softmax(f_weight_adj, dim=0)
        self.channel_adj = channel_adj

        c_weight_x = F.relu(self.W1_x(tensor_c))
        c_weight_x = torch.sigmoid(self.W2_x(c_weight_x)) # or softmax
        f_weight_x = c_weight_x.clone()
        channel_x = F.softmax(f_weight_x, dim=0)
        self.channel_x = channel_x

        adj_out = torch.zeros_like(adj[:,0,:]).cuda()
        for i in range(mem_reach):        
            adj_out += torch.mul(channel_adj[i], adj[:,i,:])
        
        x_out = torch.zeros_like(feats[:,0,:]).cuda()
        for i in range(mem_reach):        
            x_out += torch.mul(channel_x[i], feats[:,i,:])            
        # out = torch.mul(c_weight, tensor_u)
        
        re_adj = adj_out
        # re_x = feats[:, -1, :]    # Using the last feature for all time steps
        re_x = x_out                # Using aggregated feature

        re_out = self.gc3(re_x, re_adj)
        # re_out = self.gc1(re_x, re_adj)
        re_out = F.relu(re_out)
        re_out = F.dropout(re_out, self.dropout)
        re_out = self.gc4(re_out, re_adj)
        # re_out = self.gc2(re_out, re_adj)        
        
        re_out = self.last_linear(re_out)
        
        del tensor_c
        del list_c

        torch.cuda.empty_cache()

        return F.log_softmax(re_out, dim=1)

### TRNNGCN_LL

In [None]:
class TRNNGCN_LL(nn.Module):
    def __init__(self, nfeat, nhid, class_num, dropout, tot_timestep, node_num, use_cuda=False):
        super(TRNNGCN_LL, self).__init__()
        self.c_weight = None
        self.tot_timestep = tot_timestep
        self.node_num = node_num
        self.nhid = nhid
        self.class_num = class_num

        self.dropout = dropout
        self.use_cuda=use_cuda
        self.C = self.tot_timestep # numbers of channel
        self.r = 1.0
        self.attention_weight = None
        self.matrix_weight = None
        self.hid_C = int(self.C*self.r) + 1


        print("nhid:", nhid)
        print("nfeat:", nfeat)
        print("class_num = %d, node_num = %d" % (class_num, node_num))
        self.gc1 = GraphConvolution(nfeat, nhid)
        self.gc2 = GraphConvolution(nhid, nhid)

        self.gc3 = GraphConvolution(nfeat, nhid)
        self.gc4 = GraphConvolution(nhid, nhid)
        
        self.Lambda = Parameter(torch.FloatTensor(self.C, class_num,class_num))
        self.Lambda.data.uniform_(0.5, 0.5)

        self.W1 = nn.Linear(self.C, self.hid_C)
        self.W2 = nn.Linear(self.hid_C, self.C)
        # self.W_lambda = nn.Linear(self.C, 1)
        self.last_linear = nn.Linear(nhid, class_num)

        # if self.use_cuda:
        #     self.H = torch.zeros(nnode, nclass).cuda()
        # else:

        y=torch.randint(0,class_num,(node_num,1)).flatten()
        self.theta = torch.zeros(node_num, class_num).cuda()
        self.theta[range(self.theta.shape[0]), y]=1

        # self.Lambda = Parameter(torch.FloatTensor(1))
        # torch.nn.init.uniform_(self.Lambda.data, a=-1.0, b=1.0)
        # print("lambda:", self.Lambda.data)
        

    def forward(self, feats, adj):
        w=self.Lambda.data
        w=w.clamp(0,1)
        self.Lambda.data=w

        class_num = self.class_num
        node_num = self.node_num
        if "DBLPE" in dataset_name:
            # print("feats:", feats.shape, "adj:", adj.shape) 
            # feats = feats[:, :tot_timestep, :]  
            assert feats.shape[1] == 1, "feats shape is not 1, instead %d" % feats.shape[1]
        else:
            assert feats.shape[1] == adj.shape[1], "feats and adj has different timestep"
        
        tot_timestep = adj.shape[1]
        assert tot_timestep == self.tot_timestep, "timestep not consistent, %d, %d" % (tot_timestep, self.tot_timestep)
     
        mem_reach = self.tot_timestep
        
        self.matrix_weight = self.Lambda

        out = torch.zeros(node_num, node_num).cuda()
        for i in range(mem_reach):
            tmp_w = torch.mm(torch.mm(self.theta, self.Lambda[i, ...]),self.theta.T).cuda()            
            out +=  tmp_w * adj[:,i,:]

        # out = torch.mul(c_weight, tensor_u)
        re_adj = out
        re_x = feats[:, -1, :]

        re_out = self.gc3(re_x, re_adj)
        re_out = F.relu(re_out)
        re_out = F.dropout(re_out, self.dropout)
        re_out = self.gc4(re_out, re_adj)

        re_out = self.last_linear(re_out)

        # del tensor_u
        # del tensor_c
        # del list_u
        # del list_c

        theta_shape = self.theta.shape
        del self.theta
        # torch.cuda.empty_cache()

        out_for_y = F.log_softmax(re_out, dim=1)
        y = torch.argmax(out_for_y,dim=1)
        
        if self.use_cuda:
            self.theta = torch.zeros(theta_shape).cuda()
        else:
            self.theta = torch.zeros(theta_shape)
        self.theta[range(theta_shape[0]), y]=1


        return out_for_y

### TRNNGCN_SE

In [None]:
class TRNNGCN_SE(nn.Module):
    def __init__(self, nfeat, nhid, class_num, dropout, tot_timestep, node_num, use_cuda=False):
        super(TRNNGCN_SE, self).__init__()
        self.tot_timestep = tot_timestep
        self.node_num = node_num
        self.nhid = nhid
        self.class_num = class_num

        self.dropout = dropout
        self.use_cuda=use_cuda
        self.C = self.tot_timestep # numbers of channel
        self.r = 1.0
        self.attention_weight = None
        self.matrix_weight = None
        self.hid_C = int(self.C*self.r) + 1


        print("nhid:", nhid)
        print("nfeat:", nfeat)
        print("class_num = %d, node_num = %d" % (class_num, node_num))
        self.gc1 = GraphConvolution(nfeat, nhid)
        self.gc2 = GraphConvolution(nhid, nhid)

        self.gc3 = GraphConvolution(nfeat, nhid)
        self.gc4 = GraphConvolution(nhid, nhid)
        
        self.W1 = nn.Linear(self.C, self.hid_C)
        self.W2 = nn.Linear(self.hid_C, self.C * class_num * class_num)
        # self.W_lambda = nn.Linear(self.C, 1)
        self.last_linear = nn.Linear(nhid, class_num)

        # if self.use_cuda:
        #     self.H = torch.zeros(nnode, nclass).cuda()
        # else:

        y=torch.randint(0,class_num,(node_num,1)).flatten()
        self.theta = torch.zeros(node_num, class_num).cuda()
        self.theta[range(self.theta.shape[0]), y]=1

        # self.Lambda = Parameter(torch.FloatTensor(1))
        # torch.nn.init.uniform_(self.Lambda.data, a=-1.0, b=1.0)
        # print("lambda:", self.Lambda.data)
        

    def forward(self, feats, adj):
        class_num = self.class_num
        node_num = self.node_num
        if "DBLPE" in dataset_name:
            # print("feats:", feats.shape, "adj:", adj.shape) 
            # feats = feats[:, :tot_timestep, :]  
            assert feats.shape[1] == 1, "feats shape is not 1, instead %d" % feats.shape[1]
        else:
            assert feats.shape[1] == adj.shape[1], "feats and adj has different timestep"
        
        tot_timestep = adj.shape[1]
        assert tot_timestep == self.tot_timestep, "timestep not consistent, %d, %d" % (tot_timestep, self.tot_timestep)
     
        mem_reach = self.tot_timestep

        # Considering timesteps as channels:
        list_u = torch.zeros((self.tot_timestep, self.node_num, self.nhid))
        list_c = torch.zeros(self.tot_timestep)

        for i in range(0, mem_reach):  #time_steps
            now_adj = adj[:,i,:]
            if "DBLPE" in dataset_name:
                now_x = feats[:, 0, :]
            else:
                now_x = feats[:,i,:]
            one_out=self.gc1(now_x, now_adj)
            one_out = F.relu(one_out)
            one_out = F.dropout(one_out, self.dropout)
            one_out = self.gc2(one_out, now_adj)
            # squeezing:
            list_u[i, ...] = one_out
            # average pooling
            pooling_out = torch.mean(one_out)

            list_c[i] = pooling_out
            # del now_x, now_adj
            # one_out: node_num X tot_timestep X node_num

        tensor_u = list_u.cuda()
        tensor_c = list_c.cuda()

        # print("tensor_u: ", tensor_u.shape)
        # print("tensor_c: ", tensor_c.shape)

        # exitation
        c_weight = F.relu(self.W1(tensor_c))
        c_weight = torch.sigmoid(self.W2(c_weight)) 
        # c_weight = self.W2(c_weight)

        # w=c_weight
        # w=w.clamp(0,1)
        # c_weight=w

        # c_weight = F.elu(self.W2(c_weight)) + 1 # make sure c_weight > 0

        # c_weight = F.softmax(c_weight, dim=0)

        self.c_weight = c_weight

        square_c = self.class_num * self.class_num
        line_weight = torch.zeros((mem_reach, square_c)).cuda()
        line_weight = c_weight.view(mem_reach, square_c)

        # f_weight = F.softmax(f_weight, dim=0)
        
        
        # norm_weight = torch.zeros(mem_reach).cuda()
        # line_weight_normed = torch.zeros_like(line_weight).cuda()

        line_weight_softmax = F.softmax(line_weight, dim=0)
        
        f_weight = torch.zeros((mem_reach, class_num, class_num)).cuda()
        # for i in range(mem_reach):
        #     f_weight[i, ...] = line_weight_softmax[i, ...].view(class_num, class_num)

        for i in range(mem_reach):
            f_weight[i, ...] = line_weight_softmax[i, ...].view(class_num, class_num)
        
        w_norm = torch.zeros(mem_reach).cuda()                
        for i in range(mem_reach):
            w_norm[i, ...] = torch.linalg.norm(f_weight[i, ...])
            
        softmax_norm = F.softmax(w_norm, dim=0)
        self.attention_weight = softmax_norm
        
        # f_weight_clone = f_weight.clone()
        # for i in range(mem_reach):
        #     f_weight_normed[i, ...] = f_weight_clone[i, ...] / w_norm[i]
        
        self.matrix_weight = f_weight

        out = torch.zeros(node_num, node_num).cuda()
        for i in range(mem_reach):
            tmp_w = torch.mm(torch.mm(self.theta, f_weight[i, ...]),self.theta.T).cuda()            
            out +=  tmp_w * adj[:,i,:]

        # out = torch.mul(c_weight, tensor_u)
        re_adj = out
        re_x = feats[:, -1, :]

        re_out = self.gc3(re_x, re_adj)
        re_out = F.relu(re_out)
        re_out = F.dropout(re_out, self.dropout)
        re_out = self.gc4(re_out, re_adj)
        

        re_out = self.last_linear(re_out)

        del tensor_u
        del tensor_c
        del list_u
        del list_c

        theta_shape = self.theta.shape
        del self.theta
        # torch.cuda.empty_cache()

        out_for_y = F.log_softmax(re_out, dim=1)
        y = torch.argmax(out_for_y,dim=1)
        
        if self.use_cuda:
            self.theta = torch.zeros(theta_shape).cuda()
        else:
            self.theta = torch.zeros(theta_shape)
        self.theta[range(theta_shape[0]), y]=1


        return out_for_y

### TRNNGCN_SE_full

In [None]:
class TRNNGCN_SE_full(nn.Module):
    def __init__(self, nfeat, nhid, class_num, dropout, tot_timestep, node_num, use_cuda=False):
        super(TRNNGCN_SE_full, self).__init__()
        self.tot_timestep = tot_timestep
        self.node_num = node_num
        self.nhid = nhid
        self.class_num = class_num

        self.dropout = dropout
        self.use_cuda=use_cuda
        self.C = self.tot_timestep # numbers of channel
        self.r = 1.0
        self.attention_weight = None
        self.matrix_weight = None
        self.hid_C = int(self.C*self.r) + 1


        print("nhid:", nhid)
        print("nfeat:", nfeat)
        print("class_num = %d, node_num = %d" % (class_num, node_num))
        self.gc1 = GraphConvolution(nfeat, nhid)
        self.gc2 = GraphConvolution(nhid, nhid)

        self.gc3 = GraphConvolution(nfeat, nhid)
        self.gc4 = GraphConvolution(nhid, nhid)
        
        self.W1 = nn.Linear(self.C * class_num, self.hid_C)
        self.W2 = nn.Linear(self.hid_C, self.C * class_num * class_num)
        # self.W_lambda = nn.Linear(self.C, 1)
        self.last_linear = nn.Linear(nhid, class_num)

        # if self.use_cuda:
        #     self.H = torch.zeros(nnode, nclass).cuda()
        # else:

        y=torch.randint(0,class_num,(node_num,1)).flatten()
        self.theta = torch.zeros(node_num, class_num).cuda()
        self.theta[range(self.theta.shape[0]), y]=1

        # self.Lambda = Parameter(torch.FloatTensor(1))
        # torch.nn.init.uniform_(self.Lambda.data, a=-1.0, b=1.0)
        # print("lambda:", self.Lambda.data)
        

    def forward(self, feats, adj):
        class_num = self.class_num
        node_num = self.node_num
        if "DBLPE" in dataset_name:
            # print("feats:", feats.shape, "adj:", adj.shape) 
            # feats = feats[:, :tot_timestep, :]  
            assert feats.shape[1] == 1, "feats shape is not 1, instead %d" % feats.shape[1]
        else:
            assert feats.shape[1] == adj.shape[1], "feats and adj has different timestep"
        
        tot_timestep = adj.shape[1]
        assert tot_timestep == self.tot_timestep, "timestep not consistent, %d, %d" % (tot_timestep, self.tot_timestep)
     
        mem_reach = self.tot_timestep

        # Considering timesteps as channels:
        list_u = torch.zeros((self.tot_timestep, self.node_num, self.nhid))
        list_c = torch.zeros(self.tot_timestep * self.class_num)

        for i in range(0, mem_reach):  #time_steps
            now_adj = adj[:,i,:]
            if "DBLPE" in dataset_name:
                now_x = feats[:, 0, :]
            else:
                now_x = feats[:,i,:]
            one_out=self.gc1(now_x, now_adj)
            one_out = F.relu(one_out)
            one_out = F.dropout(one_out, self.dropout)
            one_out = self.gc2(one_out, now_adj)

            # print("one_out", one_out.shape)
            # print("theta", self.theta.shape)
            one_out = one_out * self.theta
            # squeezing:
            list_u[i, ...] = one_out
            # average pooling
            pooling_out = torch.mean(one_out, dim=0)
            # print("pooling_out:", pooling_out.shape)
            list_c[i*self.class_num: (i+1)*self.class_num] = pooling_out
            # del now_x, now_adj
            # one_out: node_num X tot_timestep X node_num

        tensor_u = list_u.cuda()
        tensor_c = list_c.cuda()

        # print("tensor_u: ", tensor_u.shape)
        # print("tensor_c: ", tensor_c.shape)

        # exitation
        c_weight = F.relu(self.W1(tensor_c))
        c_weight = torch.sigmoid(self.W2(c_weight)) 
        # print("c_weight", c_weight.shape)
        # c_weight = self.W2(c_weight)

        # w=c_weight
        # w=w.clamp(0,1)
        # c_weight=w

        # c_weight = F.elu(self.W2(c_weight)) + 1 # make sure c_weight > 0

        # c_weight = F.softmax(c_weight, dim=0)
        

        self.c_weight = c_weight

        square_c = self.class_num * self.class_num
        line_weight = torch.zeros((mem_reach, square_c)).cuda()
        line_weight = c_weight.view(mem_reach, square_c)

        # f_weight = F.softmax(f_weight, dim=0)
        
        
        # norm_weight = torch.zeros(mem_reach).cuda()
        
        line_weight_softmax = F.softmax(line_weight, dim=0)
        
        f_weight = torch.zeros((mem_reach, class_num, class_num)).cuda()
        # f_weight_normed = torch.zeros_like(f_weight).cuda()

        for i in range(mem_reach):
            f_weight[i, ...] = line_weight_softmax[i, ...].view(class_num, class_num)
        
        w_norm = torch.zeros(mem_reach).cuda()                
        for i in range(mem_reach):
            w_norm[i, ...] = torch.linalg.norm(f_weight[i, ...])

        # for i in range(mem_reach):
        #     f_weight_normed[i, ...] = f_weight[i, ...] / w_norm[i, ...]
            
        softmax_norm = F.softmax(w_norm, dim=0)
        self.attention_weight = softmax_norm
        self.matrix_weight = f_weight

        out = torch.zeros(node_num, node_num).cuda()
        for i in range(mem_reach):
            tmp_w = torch.mm(torch.mm(self.theta, f_weight[i, ...]),self.theta.T).cuda()            
            out +=  tmp_w * adj[:,i,:]
        self.matrix_weight = f_weight

        # out = torch.mul(c_weight, tensor_u)
        re_adj = out
        re_x = feats[:, -1, :]

        re_out = self.gc3(re_x, re_adj)
        re_out = F.relu(re_out)
        re_out = F.dropout(re_out, self.dropout)
        re_out = self.gc4(re_out, re_adj)

        re_out = self.last_linear(re_out)

        del tensor_u
        del tensor_c
        del list_u
        del list_c

        theta_shape = self.theta.shape
        del self.theta
        # torch.cuda.empty_cache()

        out_for_y = F.log_softmax(re_out, dim=1)
        y = torch.argmax(out_for_y,dim=1)
        
        if self.use_cuda:
            self.theta = torch.zeros(theta_shape).cuda()
        else:
            self.theta = torch.zeros(theta_shape)
        self.theta[range(theta_shape[0]), y]=1


        return out_for_y

###  RNNGCN_SE_back_pe

In [None]:
class RNNGCN_SE_back_pe(nn.Module):
    def __init__(self, nfeat, nhid, nclass, dropout, tot_timestep, node_num, use_cuda=False):
        super(RNNGCN_SE_back_pe, self).__init__()
        self.tot_timestep = tot_timestep
        self.node_num = node_num
        self.nhid = nhid
        self.class_num = nclass

        self.dropout = dropout
        self.use_cuda=use_cuda
        self.C = self.tot_timestep # numbers of channel
        self.r = 0.5
        self.attention_weight = None
        self.hid_C = int(self.C*self.r) + 1

        print("nhid:", nhid)
        print("nfeat:", nfeat)
        print("class_num = %d, node_num = %d" % (nclass, node_num))
        self.pooling_nn = nn.Linear(nhid, 1)
        self.gc1 = GraphConvolution(nfeat, nhid)
        self.gc2 = GraphConvolution(nhid, nhid)
        self.gc3 = GraphConvolution(nfeat, nhid)
        self.gc4 = GraphConvolution(nhid, nhid)        
        # self.global_pooling = nn.AvgPool2d(self.node_num)
        self.W1 = nn.Linear(self.C, self.hid_C)
        self.W2 = nn.Linear(self.hid_C, self.C)
        self.last_linear = nn.Linear(nhid, class_num)

        # self.Lambda = Parameter(torch.FloatTensor(1))
        # self.Lambda.data.uniform_()        
        # self.Lambda = Parameter(torch.FloatTensor(1))
        # torch.nn.init.normal_(self.Lambda.data, mean=0.0, std=1.0)
        # print("lambda:", self.Lambda.data)

    def get_pe(self, max_len, d_model):
        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))

        pe[:, 0::2] = torch.sin(position * div_term)
        
        j = 0
        for i in range(1, d_model, 2):
            pe[:, i] = torch.cos(position * div_term)[:, j]
            j += 1
        pe = pe.unsqueeze(0)
        return pe

    def forward(self, feats, adj):
        tot_timestep = adj.shape[1]

        # if "DBLPE" in dataset_name:
        #     # print("feats:", feats.shape, "adj:", adj.shape) 
        #     # feats = feats[:, :tot_timestep, :]  
        #     assert feats.shape[1] == 1, "feats shape is not 1, instead %d" % feats.shape[1]
        # else:
        #     assert feats.shape[1] == adj.shape[1], "feats and adj has different timestep"
        
        # assert tot_timestep == self.tot_timestep, "timestep not consistent, %d, %d" % (tot_timestep, self.tot_timestep)
     
        mem_reach = self.tot_timestep

        # Consider timesteps as channels:
        list_u = torch.zeros((self.tot_timestep, self.node_num, self.nhid))
        list_c = torch.zeros(self.tot_timestep)

        pe = self.get_pe(mem_reach, self.nhid).cuda()
        # print("pe:", pe[0][0].shape)
        for i in range(0, mem_reach):  #time_steps
            now_adj = adj[:,i,:]
            if "DBLPE" in dataset_name:
                now_x = feats[:, -1, :] # identity matrix
            else:
                now_x = feats[:,i,:]
            one_out = self.gc1(now_x, now_adj)

            # one_out /= one_out.mean()
            # print("one_out mean:", one_out.mean())
            # print("one_out:", one_out)
            one_out += pe[0][i]
            
            one_out = F.relu(one_out)
            one_out = F.dropout(one_out, self.dropout)
            one_out = self.gc2(one_out, now_adj)
            # squeezing:
            list_u[i, ...] = one_out
            # average pooling
            # pooling_out = self.pooling_nn(one_out)
            pooling_out = torch.mean(one_out)
            list_c[i] = pooling_out
            # del now_x, now_adj
            # one_out: node_num X tot_timestep X node_num

        tensor_u = list_u.cuda()
        tensor_c = list_c.cuda()

        # print("tensor_u: ", tensor_u.shape)
        # print("tensor_c: ", tensor_c.shape)

        # exitation
        c_weight = F.relu(self.W1(tensor_c))
        c_weight = torch.sigmoid(self.W2(c_weight)) # or softmax
        self.c_weight = c_weight

        f_weight = c_weight.clone()
          
        f_weight = F.softmax(f_weight, dim=0)

        out = torch.zeros_like(adj[:,0,:]).cuda()
        for i in range(mem_reach):        
            out += torch.mul(f_weight[i], adj[:,i,:])
        # out = torch.mul(c_weight, tensor_u)
        
        re_adj = out
        re_x = feats[:, -1, :]

        re_out = self.gc3(re_x, re_adj)
        # re_out = self.gc1(re_x, re_adj)
        re_out = F.relu(re_out)
        re_out = F.dropout(re_out, self.dropout)
        re_out = self.gc4(re_out, re_adj)
        # re_out = self.gc2(re_out, re_adj)        
        
        re_out = self.last_linear(re_out)
        
        self.attention_weight = f_weight
        del tensor_u
        del tensor_c
        del list_u
        del list_c

        torch.cuda.empty_cache()

        return F.log_softmax(re_out, dim=1)

In [None]:
# pe testing:

# def get_pe(max_len, d_model):
#     pe = torch.zeros(max_len, d_model)
#     position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
#     div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
#     pe[:, 0::2] = torch.sin(position * div_term)
#     pe[:, 1::2] = torch.cos(position * div_term)
#     pe = pe.unsqueeze(0).transpose(0, 1)
#     return pe

# pe = get_pe(4, 10)
# print(pe)


### Count changed labels

In [None]:
# def get_label_stat(x):
#     total_labels = x.clone().detach().cpu().numpy()
#     print(total_labels.shape)
#     res = []
#     for i in range(1, total_labels.shape[1]): # timestep
#         cur_labels = total_labels[:, i]
#         pre_labels = total_labels[:, i-1]
#         diff = np.abs(cur_labels - pre_labels) > 1e-5
#         res.append(np.sum(diff))
#     return np.array(res)

# # get_label_stat(total_labels)

### With same data sampling

In [None]:
# One data, multi runs


# from gpu_memory_log import gpu_memory_log

# target_res = []
# SE_w_list = []
# SE_w = 0


# # dataset_name="DBLPE"
# # dataset_name="DBLPE_importance"
# # dataset_name="3_Periodic"
# # dataset_name="5_Periodic"
# # dataset_name="DBLP3"
# # dataset_name="DBLP5"
# dataset_name="Reddit"
# # dataset_name="Brain"
# # dataset_name="sparse_Brain"
# # dataset_name="sparse_DBLPE"
# # dataset_name="hospital"

# features, adj, labels, idx_train, idx_val, idx_test, Probability_matrix=load_real_data(dataset_name) 

# class_num=int(labels.max())+1

# print("class_num =", class_num)
# total_adj=adj
# total_labels=labels
# print("total_adjsize: ", total_adj.shape)
# print("total_labelsize: ", total_labels.shape)
# print("features size: ", features.shape)


# for i in range(10):

#     torch.autograd.set_detect_anomaly(True)
#     # Options: GCN, GAT, GraphSage #dynamic_spec, DynAERNN #GCNLSTM, EGCN, RNNGCN, TRNNGCN

#     # model_type = 'RNNGCN_1_preNN'    
#     # model_type = 'RNNGCN_2_preNN'    
#     # model_type = 'RNNGCN_LL'
#     # model_type = 'RNNGCN_LL_exp'
#     # model_type = 'RNNGCN_2'
#     # model_type = 'RNNGCN_RNN'
#     model_type = 'RNNGCN_SE_back'
#     # model_type = 'RNNGCN_SE'
#     # model_type = 'RNNGCN_SE_decay'
#     # model_type = 'new_TRNNGCN' #AH_TRNNGCN
#     # model_type = 'original_RNNGCN'
#     # model_type = 'TRNNGCN_LL_exp'
#     # model_type = 'original_TRNNGCN'
#     # model_type = 'TRNNGCN_SE'
#     # model_type = 'SPEC'
#     # model_type = 'GCN'

#     args_hidden = class_num
#     args_dropout = 0.5
#     args_weight_decay = 5e-4    
    
#     # args_lr = 0.0025
#     args_lr = 0.0025
    
#     # args_epochs = 500
#     args_epochs = 500 

#     # args_no_cuda=True
#     args_no_cuda=False
    
#     args_cuda = not args_no_cuda and torch.cuda.is_available()
#     print("CUDA:", args_cuda)
#     args_normalize=True

#     print(dataset_name)

#     # gpu_memory_log()
#     if mode=='real':
#         res_one, SE_w = test_real_dataset()
#         target_res.append(res_one)
#         if "RNNGCN_SE" in model_type:
#             SE_w_list.append(SE_w.detach().cpu().numpy())


In [None]:
# while 1:
#     pass

### Simulated Mode

#### 2-stage

In [None]:
def generate_data_2stage(number_of_nodes, Time_steps, class_num, link_inclass_prob, link_outclass_prob, epsilon_vector, special_timestep):
    
    transit_matrix=[]
    for i in range(class_num):
        transit_one=[epsilon_vector[i]]*i+[1-epsilon_vector[i]]+[epsilon_vector[i]]*(class_num-1-i)
        transit_matrix+=[transit_one]
    #print((number_of_nodes*link_inclass_prob*epsilon_vector[0])**0.5)
    
    adj=torch.zeros(number_of_nodes,Time_steps,number_of_nodes) #n*t*n adj matrix

    #assign initial labels
    labels=torch.randint(0,class_num,(number_of_nodes,)) #assign random label with equal probability
    labels=labels.to(dtype=torch.long)
    #label_node, speed up the generation of edges
    label_node_dict=dict()

    for j in range(class_num):
        label_node_dict[j]=[]

    for i in range(len(labels)):
        label_node_dict[int(labels[i])]+=[int(i)]

    total_labels=torch.zeros(number_of_nodes,Time_steps)
    
    #generate graph
    for i in range(int(Time_steps)):
        #change node
        change_nodes=[]
        if i in special_timestep:    
            for j in range(len(labels)):
                # Only changing on special_timestep        
                if random.random() < epsilon_vector[labels[j]]:
                    #less than change probability
                    tmp=int(labels[j])
                    while(1): #change label
                        labels[j]=torch.tensor(int(torch.randint(0,class_num,(1,))[0]))
                        if labels[j]!=tmp:
                            change_nodes+=[j]
                            break
                    #labels[j]=torch.tensor(not tmp)
        total_labels[:,i]=labels.clone()
        label_node_dict=dict()
        for j in range(class_num):
            label_node_dict[j]=[]

        for j in range(len(labels)):
            label_node_dict[int(labels[j])]+=[int(j)]
        #
        #generate symmetrix adj matrix at each time step
        for node_id in range(number_of_nodes):
            j=labels[node_id]
            for l in label_node_dict:
                if l==j:
                    for z in label_node_dict[l]:  #z>node_id,  symmetrix matrix, no repeat
                        if z>node_id and random.random()<link_inclass_prob:
                            adj[node_id,i,z]= 1
                            adj[z,i,node_id]= 1
                else:
                    for z in label_node_dict[l]:
                        if z>node_id and random.random()<link_outclass_prob:
                            adj[node_id,i,z]= 1
                            adj[z,i,node_id]= 1

    #generate feature use eye matrix
    features=torch.zeros(number_of_nodes,Time_steps,number_of_nodes)
    for i in range(features.shape[1]):
        features[:,i,:]=torch.eye(features.shape[0],features.shape[2])

    #seprate train,val,test
    idx_train = torch.LongTensor(range(number_of_nodes//5))
    idx_val = torch.LongTensor(range(number_of_nodes//5, number_of_nodes//2))
    idx_test = torch.LongTensor(range(number_of_nodes//2, number_of_nodes))

    #probability matrix at last time_step
    Probability_matrix=torch.zeros(number_of_nodes,number_of_nodes)
    for j in range(number_of_nodes):
        for k in range(number_of_nodes):
            if j==k:
                continue
            elif labels[j]==labels[k]:
                Probability_matrix[j][k]=link_inclass_prob
            else:
                Probability_matrix[j][k]=link_outclass_prob

    return features.float(), adj.float(), total_labels.long(), idx_train, idx_val, idx_test, Probability_matrix


#### Abnormal behavior

In [None]:
def generate_data_glitch(number_of_nodes, Time_steps, class_num, link_inclass_prob, link_outclass_prob, epsilon_vector, special_timestep):
    
    transit_matrix=[]
    for i in range(class_num):
        transit_one=[epsilon_vector[i]]*i+[1-epsilon_vector[i]]+[epsilon_vector[i]]*(class_num-1-i)
        transit_matrix+=[transit_one]
    #print((number_of_nodes*link_inclass_prob*epsilon_vector[0])**0.5)
    
    adj=torch.zeros(number_of_nodes,Time_steps,number_of_nodes) #n*t*n adj matrix

    #assign initial labels
    labels=torch.randint(0,class_num,(number_of_nodes,)) #assign random label with equal probability
    labels=labels.to(dtype=torch.long)

    

    total_labels=torch.zeros(number_of_nodes,Time_steps)
    #generate graph
    for i in range(int(Time_steps)):
        # print("for timestep %d, p = %.4f" % (i, time_vector[i]))
        # No label changing here
        total_labels[:,i]=labels.clone()
        
        label_node_dict=dict()
        for j in range(class_num):
            label_node_dict[j]=[]
        for j in range(len(labels)):
            label_node_dict[int(labels[j])]+=[int(j)]

        #generate symmetrix adj matrix at each time step
        if i in special_timestep:
            glitch_labels = torch.randint(0,class_num,(number_of_nodes,)).to(dtype=torch.long)
            glitch_label_node_dict=dict()
            for j in range(class_num):
                glitch_label_node_dict[j]=[]
            for j in range(len(glitch_labels)):
                glitch_label_node_dict[int(glitch_labels[j])]+=[int(j)]
        
            for node_id in range(number_of_nodes):
                j=glitch_labels[node_id]
                for l in glitch_label_node_dict:
                    if l==j:
                        for z in glitch_label_node_dict[l]:  #z>node_id,  symmetrix matrix, no repeat
                            if z>node_id and random.random()<link_inclass_prob:
                                adj[node_id,i,z]= 1
                                adj[z,i,node_id]= 1
                    else:
                        for z in glitch_label_node_dict[l]:
                            if z>node_id and random.random()<link_outclass_prob:
                                adj[node_id,i,z]= 1
                                adj[z,i,node_id]= 1          
        else:
            for node_id in range(number_of_nodes):
                j=labels[node_id]
                for l in label_node_dict:
                    if l==j:
                        for z in label_node_dict[l]:  #z>node_id,  symmetrix matrix, no repeat
                            if z>node_id and random.random()<link_inclass_prob:
                                adj[node_id,i,z]= 1
                                adj[z,i,node_id]= 1
                    else:
                        for z in label_node_dict[l]:
                            if z>node_id and random.random()<link_outclass_prob:
                                adj[node_id,i,z]= 1
                                adj[z,i,node_id]= 1

    #generate feature use eye matrix
    features=torch.zeros(number_of_nodes,Time_steps,number_of_nodes)
    for i in range(features.shape[1]):
        features[:,i,:]=torch.eye(features.shape[0],features.shape[2])

    #seprate train,val,test
    idx_train = torch.LongTensor(range(number_of_nodes//5))
    idx_val = torch.LongTensor(range(number_of_nodes//5, number_of_nodes//2))
    idx_test = torch.LongTensor(range(number_of_nodes//2, number_of_nodes))

    #probability matrix at last time_step
    Probability_matrix=torch.zeros(number_of_nodes,number_of_nodes)
    for j in range(number_of_nodes):
        for k in range(number_of_nodes):
            if j==k:
                continue
            elif labels[j]==labels[k]:
                Probability_matrix[j][k]=link_inclass_prob
            else:
                Probability_matrix[j][k]=link_outclass_prob

    return features.float(), adj.float(), total_labels.long(), idx_train, idx_val, idx_test, Probability_matrix


#### Fixed labels and Changing labels

In [None]:
#simulated data: setting of data generation

def generate_data_totallabel(number_of_nodes, Time_steps, class_num, link_inclass_prob, link_outclass_prob, epsilon_vector, time_vector):
    
    transit_matrix=[]
    for i in range(class_num):
        transit_one=[epsilon_vector[i]]*i+[1-epsilon_vector[i]]+[epsilon_vector[i]]*(class_num-1-i)
        transit_matrix+=[transit_one]
    #print((number_of_nodes*link_inclass_prob*epsilon_vector[0])**0.5)
    
    adj=torch.zeros(number_of_nodes,Time_steps,number_of_nodes) #n*t*n adj matrix

    #assign initial labels
    labels=torch.randint(0,class_num,(number_of_nodes,)) #assign random label with equal probability
    labels=labels.to(dtype=torch.long)
    #label_node, speed up the generation of edges
    label_node_dict=dict()

    for j in range(class_num):
        label_node_dict[j]=[]

    for i in range(len(labels)):
        label_node_dict[int(labels[i])]+=[int(i)]

    total_labels=torch.zeros(number_of_nodes,Time_steps)
    #generate graph
    for i in range(int(Time_steps)):
        # print("for timestep %d, p = %.4f" % (i, time_vector[i]))
        #change node
        change_nodes=[]
        for j in range(len(labels)):
            if random.random() < epsilon_vector[labels[j]]:
                #less than change probability
                tmp=int(labels[j])
                #print(j)
                while(1): #change label
                    labels[j]=torch.tensor(int(torch.randint(0,class_num,(1,))[0]))
                    if labels[j]!=tmp:
                        change_nodes+=[j]
                        break
                #labels[j]=torch.tensor(not tmp)
        total_labels[:,i]=labels.clone()
        label_node_dict=dict()
        for j in range(class_num):
            label_node_dict[j]=[]

        for j in range(len(labels)):
            label_node_dict[int(labels[j])]+=[int(j)]
        #
        #generate symmetrix adj matrix at each time step
        for node_id in range(number_of_nodes):
            j=labels[node_id]
            for l in label_node_dict:
                if l==j:
                    for z in label_node_dict[l]:  #z>node_id,  symmetrix matrix, no repeat
                        if z>node_id and random.random()<link_inclass_prob * time_vector[j, i]:
                            adj[node_id,i,z]= 1
                            adj[z,i,node_id]= 1
                else:
                    for z in label_node_dict[l]:
                        if z>node_id and random.random()<link_outclass_prob * time_vector[j, i]:
                            adj[node_id,i,z]= 1
                            adj[z,i,node_id]= 1

    #generate feature use eye matrix
    features=torch.zeros(number_of_nodes,Time_steps,number_of_nodes)
    for i in range(features.shape[1]):
        features[:,i,:]=torch.eye(features.shape[0],features.shape[2])

    #seprate train,val,test
    idx_train = torch.LongTensor(range(number_of_nodes//5))
    idx_val = torch.LongTensor(range(number_of_nodes//5, number_of_nodes//2))
    idx_test = torch.LongTensor(range(number_of_nodes//2, number_of_nodes))

    #probability matrix at last time_step
    Probability_matrix=torch.zeros(number_of_nodes,number_of_nodes)
    for j in range(number_of_nodes):
        for k in range(number_of_nodes):
            if j==k:
                continue
            elif labels[j]==labels[k]:
                Probability_matrix[j][k]=link_inclass_prob
            else:
                Probability_matrix[j][k]=link_outclass_prob

    return features.float(), adj.float(), total_labels.long(), idx_train, idx_val, idx_test, Probability_matrix


#simulated data: setting of data generation

def generate_data_totallabel_fixed(number_of_nodes, Time_steps, class_num, link_inclass_prob, link_outclass_prob, epsilon_vector, time_vector):
    
    transit_matrix=[]
    for i in range(class_num):
        transit_one=[epsilon_vector[i]]*i+[1-epsilon_vector[i]]+[epsilon_vector[i]]*(class_num-1-i)
        transit_matrix+=[transit_one]
    #print((number_of_nodes*link_inclass_prob*epsilon_vector[0])**0.5)
    
    adj=torch.zeros(number_of_nodes,Time_steps,number_of_nodes) #n*t*n adj matrix

    #assign initial labels
    labels=torch.randint(0,class_num,(number_of_nodes,)) #assign random label with equal probability
    labels=labels.to(dtype=torch.long)
    #label_node, speed up the generation of edges
    label_node_dict=dict()

    for j in range(class_num):
        label_node_dict[j]=[]

    for i in range(len(labels)):
        label_node_dict[int(labels[i])]+=[int(i)]

    total_labels=torch.zeros(number_of_nodes,Time_steps)
    #generate graph
    for i in range(int(Time_steps)):
        # print("for timestep %d, p = %.4f" % (i, time_vector[i]))
        # No label changing here
        total_labels[:,i]=labels.clone()
        
        label_node_dict=dict()
        for j in range(class_num):
            label_node_dict[j]=[]

        for j in range(len(labels)):
            label_node_dict[int(labels[j])]+=[int(j)]
        #
        #generate symmetrix adj matrix at each time step
        for node_id in range(number_of_nodes):
            j=labels[node_id]
            for l in label_node_dict:
                if l==j:
                    for z in label_node_dict[l]:  #z>node_id,  symmetrix matrix, no repeat
                        if z>node_id and random.random()<link_inclass_prob * time_vector[j, i]:
                            adj[node_id,i,z]= 1
                            adj[z,i,node_id]= 1
                else:
                    for z in label_node_dict[l]:
                        if z>node_id and random.random()<link_outclass_prob * time_vector[j, i]:
                            adj[node_id,i,z]= 1
                            adj[z,i,node_id]= 1

    #generate feature use eye matrix
    features=torch.zeros(number_of_nodes,Time_steps,number_of_nodes)
    for i in range(features.shape[1]):
        features[:,i,:]=torch.eye(features.shape[0],features.shape[2])

    #seprate train,val,test
    idx_train = torch.LongTensor(range(number_of_nodes//5))
    idx_val = torch.LongTensor(range(number_of_nodes//5, number_of_nodes//2))
    idx_test = torch.LongTensor(range(number_of_nodes//2, number_of_nodes))

    #probability matrix at last time_step
    Probability_matrix=torch.zeros(number_of_nodes,number_of_nodes)
    for j in range(number_of_nodes):
        for k in range(number_of_nodes):
            if j==k:
                continue
            elif labels[j]==labels[k]:
                Probability_matrix[j][k]=link_inclass_prob
            else:
                Probability_matrix[j][k]=link_outclass_prob

    return features.float(), adj.float(), total_labels.long(), idx_train, idx_val, idx_test, Probability_matrix


#### Run on simulated graphs

In [None]:
# mode="simulated"

# if mode == 'simulated':   
#     target_res = []
#     SE_w_list = []
#     SE_w = None
#     matrix_w_list = []
#     matrix_w = None
#     SE_2ws_adj_list = []
#     SE_2ws_x_list = []

#     target_res_baseline = []
#     SE_w_list_baseline = []
#     matrix_w_list_baseline = []
    
#     for i in range(10):
#         print("="*20)
#         dataset_name='simulated'
#         number_of_nodes=200
#         Time_steps = 12
#         class_num = 4
#         link_inclass_prob = 20/number_of_nodes  #when calculation , remove the link in itself
#         #EGCN good when network is dense 20/number_of_nodes  #fails when network is sparse. 20/number_of_nodes/5

#         link_outclass_prob=link_inclass_prob/20
#         # epsilon_vector=[10/number_of_nodes,20/number_of_nodes, 40/number_of_nodes, 50/number_of_nodes]
#         epsilon_vector=[0.05, 0.10, 0.20, 0.25]
#         # epsilon_vector=[0.8, 0.8, 0.8, 0.8]

#         # time_vector = [1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]
#         special_timestep = [0,1,2,3,4]

#         # time_vector = np.ones((class_num, Time_steps))
#         # time_vector[0, 1] = 3.
#         # time_vector[1, 1] = 3.
#         # time_vector[2, 4] = 3.
#         # time_vector[3, 4] = 3.
        
#         features, adj, labels, idx_train, idx_val, idx_test, Probability_matrix = generate_data_glitch(number_of_nodes, Time_steps, class_num, link_inclass_prob, link_outclass_prob, epsilon_vector, special_timestep)   
#         # features, adj, labels, idx_train, idx_val, idx_test, Probability_matrix = generate_data_2stage(number_of_nodes, Time_steps, class_num, link_inclass_prob, link_outclass_prob, epsilon_vector, special_timestep)   
#         # features, adj, labels, idx_train, idx_val, idx_test, Probability_matrix = generate_data_totallabel(number_of_nodes, Time_steps, class_num, link_inclass_prob, link_outclass_prob, epsilon_vector, time_vector)               
#         # features, adj, labels, idx_train, idx_val, idx_test, Probability_matrix = generate_data_totallabel_fixed(number_of_nodes, Time_steps, class_num, link_inclass_prob, link_outclass_prob, epsilon_vector, time_vector)               
#         total_adj=adj
#         total_labels=labels

#         tot_timestep = total_labels.shape[1]
#         print("adj:", total_adj.shape)
#         print("labels:", total_labels.shape)
#         print("timestep:", tot_timestep)
#         print("idx_train:", idx_train.shape)
#         print("idx_test:", idx_test.shape)
#         print("idx_val:", idx_val.shape)

#         args_hidden = class_num
#         args_dropout = 0.5
#         args_weight_decay = 5e-4    
#         args_lr = 0.0025
#         args_epochs = 500 
#         # args_no_cuda=True
#         args_no_cuda=False
#         args_cuda = not args_no_cuda and torch.cuda.is_available()
#         print("CUDA:", args_cuda)
#         args_normalize=True
#         print(dataset_name)

#         # Options: GCN, GAT, GraphSage #dynamic_spec, DynAERNN #GCNLSTM, EGCN, RNNGCN, TRNNGCN
#         # model_type = 'RNNGCN_SE_2ws'
#         model_type = 'RNNGCN_SE_back'

#         for target_time in range(tot_timestep-1, tot_timestep):
#             print(target_time,end='\t')
#             adj = total_adj[:,:target_time+1,:]
#             labels = total_labels[:,target_time]
      
#             res_one, SE_w, matrix_w = test_real_dataset()
#             target_res.append(res_one)
#             if "TRNNGCN_SE" in model_type:
#                 matrix_w_list.append(matrix_w.detach().detach().cpu().numpy())
#             if "RNNGCN_SE" in model_type:
#                 if "2ws" in model_type:
#                     SE_2ws_adj_list.append(SE_2ws_adj.detach().cpu().numpy())
#                     SE_2ws_x_list.append(SE_2ws_x.detach().cpu().numpy())
#                 elif SE_w != None:
#                     SE_w_list.append(SE_w.detach().cpu().numpy())            
#             print(' ',end='\n')

#         # model_type = 'RNNGCN_SE_back'
#         # for target_time in range(tot_timestep-1, tot_timestep):
#         #     print(target_time,end='\t')
#         #     adj = total_adj[:,:target_time+1,:]
#         #     labels = total_labels[:,target_time]
      
#         #     res_one, SE_w, matrix_w = test_real_dataset()
#         #     target_res_baseline.append(res_one)
#         #     if "TRNNGCN_SE" in model_type:
#         #         matrix_w_list_baseline.append(matrix_w.detach().cpu().numpy())
#         #     if "RNNGCN_SE" in model_type:
#         #         if "2ws" in model_type:
#         #             SE_2ws_adj_list.append(SE_2ws_adj.detach().cpu().numpy())
#         #             SE_2ws_x_list.append(SE_2ws_x.detach().cpu().numpy())                
#         #         elif SE_w != None:
#         #             SE_w_list_baseline.append(SE_w.detach().cpu().numpy())            
#         #     print(' ',end='\n')        

# SE_array = np.array(SE_w_list)
# print(np.mean(SE_array, axis=0))
# plt.plot(np.mean(SE_array, axis=0))
# plt.show()

In [None]:

# while 1:
#     pass

### Real Mode

In [None]:
mode = 'real'
#
if mode=='real':

    SE_2ws_adj = None
    SE_2ws_x = None

    SE_2ws_adj_list = []
    SE_2ws_x_list = []
    target_res = []
    SE_w_list = []
    SE_w = None
    matrix_w_list = []
    matrix_w = None
    target_res_baseline = []
    SE_w_list_baseline = []
    matrix_w_list_baseline = []

    acc_drop_list = []

    for i in range(5):
        print("-"*20)
        # dataset_name="DBLPE"
        # dataset_name="DBLPE_importance"
        # dataset_name="3_Periodic"
        # dataset_name="5_Periodic"
        # dataset_name="DBLP3"
        dataset_name="DBLP5"
        # dataset_name="Reddit"
        # dataset_name="Brain"
        # dataset_name="sparse_Brain"
        # dataset_name="sparse_DBLPE"
        # dataset_name="hospital"
        # dataset_name="reality_call"
        # dataset_name="political_retweet"

        features, adj, labels, idx_train, idx_val, idx_test, Probability_matrix = load_real_data(dataset_name) 
        class_num=int(labels.max())+1
        print("class_num =", class_num)
        total_adj=adj
        total_labels=labels
        print("total_adjsize: ", total_adj.shape)
        print("total_labelsize: ", total_labels.shape)
        print("features size: ", features.shape)

        # Options: GCN, GAT, GraphSage #dynamic_spec, DynAERNN #GCNLSTM, EGCN, RNNGCN, TRNNGCN
        model_type = 'RNNGCN_SE_2ws'
        # model_type = 'RNNGCN_SE_back'
        # model_type = 'GraphSage'
        # model_type = 'GCNLSTM'
        # model_type = 'GCN'

        args_hidden = class_num
        # args_hidden = 10
        args_dropout = 0.5
        args_weight_decay = 5e-4    
        args_lr = 0.0025
        args_epochs = 500 
        # args_no_cuda=True
        args_no_cuda=False
        args_cuda = not args_no_cuda and torch.cuda.is_available()
        print("CUDA:", args_cuda)
        args_normalize=True
        print(dataset_name)
        
        no_atts_list = ["DBLPE_importance", "DBLPE", "sparse_DBLPE"]
        fixed_no_atts_list = ["hospital", "reality_call", "political_retweet"]
        
        if dataset_name in fixed_no_atts_list:
            res_one, SE_w, matrix_w = test_real_dataset()   
            target_res.append(res_one)
            if "RNNGCN_SE" in model_type:
                if SE_w != None:
                    SE_w_list.append(SE_w.detach().cpu().numpy())
            print(' ',end='\n')   
         
        elif dataset_name in no_atts_list:
        # if dataset_name=="DBLPE" or dataset_name == "sparse_DBLPE":
            #target_time=13 #0-13
            for target_time in range(13,14):
                # gpu_memory_log()
                torch.cuda.empty_cache()

                print(target_time)
                adj = total_adj[:,:target_time+1,:]
                labels = total_labels[:,target_time]

                res_one, SE_w, matrix_w = test_real_dataset()   
                   
                if "RNNGCN_SE" in model_type:
                   if SE_w != None:
                        SE_w_list.append(SE_w.detach().cpu().numpy())
                target_res.append(res_one)
                # gpu_memory_log()
                print(' ',end='\n')
        else:
            res_one, SE_w, matrix_w = test_real_dataset()
            target_res.append(res_one)
            # if "TRNNGCN_SE" in model_type:
            #     matrix_w_list.append(matrix_w.detach().cpu().numpy())
            if "RNNGCN_SE" in model_type:
                if "2ws" in model_type:
                    SE_2ws_adj_list.append(SE_2ws_adj.detach().cpu().numpy())               
                    SE_2ws_x_list.append(SE_2ws_x.detach().cpu().numpy())               
                elif SE_w != None:
                    SE_w_list.append(SE_w.detach().cpu().numpy())
        
            # # Another one
            # model_type = 'RNNGCN_SE_back'
            # res_one, SE_w, matrix_w = test_real_dataset()
            # target_res_baseline.append(res_one)
            # if "TRNNGCN_SE" in model_type:
            #     matrix_w_list_baseline.append(matrix_w.detach().cpu().numpy())
            # if "RNNGCN_SE" in model_type:
            #     if SE_w != None:
            #         SE_w_list_baseline.append(SE_w.detach().cpu().numpy())            
            # print(' ',end='\n')                       
        
    

In [None]:
plt.plot(np.mean(SE_2ws_x_list, axis=0))
plt.plot(np.mean(SE_2ws_adj_list, axis=0))
plt.plot(np.mean(SE_w_list, axis=0))



### ACC / AUC / F1 output

In [None]:
# arr_res = np.array(target_res)
# # arr_res = np.array(target_res_baseline)

# print(arr_res)
# print(len(arr_res))
# print("\nModel: **%s**;\nDataset: **%s**\n" % (model_type, dataset_name))
# print("args_dropout: %.4f" % args_dropout)
# print("args_lr: %.6f" % args_lr)
# # print("args_weight_decay: %f" % args_weight_decay)
# print("args_epochs: %d\n" % args_epochs)

# no_atts_list = [ "DBLPE"]
# if dataset_name in no_atts_list:
#     print("|Time step| ACC | AUC | F1 |")
#     print("|:--- |:--- |:------|:-----|")
#     for i in range(len(target_res)):
#         print("|step #%d |%.6f | %.6f | %.6f|" % (i, target_res[i][1], target_res[i][2], target_res[i][3])) # ACC, AUC, F1

# else:
#     print("| ACC | AUC | F1 |")
#     print("|:--- |:------|:-----|")
#     for i in range(len(arr_res)):
#         print("|%.6f | %.6f | %.6f|" % (arr_res[i][1], arr_res[i][2], arr_res[i][3])) # ACC, AUC, F1
#     print("\nAVG: ")
#     print((np.sum(arr_res, axis=0)/len(arr_res))[1:])


##### RNNGCN_SE_back / Mean


In [None]:
# tot_timestep = adj.shape[1]
# model_type = "RNNGCN_SE_back"

# # w_all = np.array(SE_2ws_adj_list) # weights
# w_all = np.array(SE_w_list)
# w_mean = np.mean(w_all, axis=0)
# print(dataset_name, ", attention", w_mean)
# # tot_timestep = Features.shape[1]
# tot = w_all.shape[0] 
# x = range(tot_timestep)

# plt.xlabel('Timesteps')
# plt.ylabel('Attention Weights (adj)')
# plt.grid(True)    
# plot_w_all, = plt.plot(x, w_mean, label="attention weights")
# plt.ylim(-0.02 + np.min(w_all), 0.02 + np.max(w_all))
# plt.legend(handles=[plot_w_all])    

In [None]:
# while 1:
#     pass

### Masking attention in trained GCN / GraphSage


### Mask one

In [None]:
# def eval_test(original_features, original_adj, original_labels, idx_train, idx_val, idx_test, model_type,normalize=False):
#     t=time.time()
#     lambda_matrix=None 
#     total_loss=0
#     total_acc=0
#     total_norm=[]    

#     #choose adj matrix
#     #GCN:n*n, Others: n*t*n
    
#     if dataset_name == "Brain" or dataset_name == "simulated":
#         meta_timestep = 12
#     else:
#         meta_timestep = 10

#     attention_w = dict()
#     # attention_w = [1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]
#     attention_w['simulated'] = [0.05907838, 0.05908475, 0.05908549, 0.0590926,  0.05907808, 0.05907955,
#                                 0.05909814, 0.05977109, 0.06782387, 0.13779935, 0.16049674, 0.16051194]
#     attention_w["None"] = [1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]
    
#     attention_w["DBLP3"] = [0.13257283, 0.11761762, 0.09207272, 0.09068672, 
#                             0.13907257, 0.08077238, 0.07735384, 0.08926542, 
#                             0.07579254, 0.10479335]
#     attention_w["DBLP5"] = [0.08490249, 0.07058566, 0.07522732, 0.093458354, 0.1454923, 0.13680391, 0.08714645, 0.11872233, 0.12575606, 0.061905116]
#     attention_w["Reddit"] = [0.10407882, 0.087858014, 0.103267744, 0.11869549, 0.111122675, 0.11745782, 0.09139953, 0.08404839, 0.089088246, 0.09298327]
#     attention_w["Brain"] = [0.10703684, 0.08153439, 0.06159713, 0.10095412,
#                              0.05306721, 0.07350213, 0.12013485, 0.08990081,
#                              0.05808821, 0.10553119, 0.07727117, 0.07138196]

#     adj = original_adj
#     features = original_features
#     labels = original_labels

#     print("adj shape:", adj.shape)
#     test_results_list = []
#     for spec_t in range(meta_timestep):
#         adj = original_adj
#         features = original_features
#         labels = original_labels
    
#         att_w = np.array(attention_w[dataset_name])
#         att_w[spec_t] = 0.
#         att_w /= np.sum(att_w)
#         print("att_w", att_w)

#         if model_type=='GCN':  
#             now_adj=att_w[0] * adj[:,0,:].clone()
#             for i in range(1,adj.shape[1]):  #time_steps
#                 now_adj+= att_w[i] * adj[:,i,:].clone()
#             adj=now_adj
#             features=features[:,-1,:]
#         elif model_type=='GraphSage':
#             now_adj=att_w[0] * adj[:,0,:].clone()
#             for i in range(1,adj.shape[1]):  #time_steps
#                 now_adj+= att_w[i] * adj[:,i,:].clone()
#             adj=now_adj
#             features=features[:,-1,:]

#         #define model
#         if model_type=='GCN':
#             model = GCN(nfeat=features.shape[1],
#                     nhid=args_hidden,
#                     nclass=class_num,
#                     dropout=args_dropout)
#             model.load_state_dict(torch.load("./Eval_Mask_Model.tar"))
#             model.eval()
#         elif model_type == 'GraphSage':
#             adj=dgl.from_networkx(nx.Graph(adj.numpy())) #fit in dgl
#             model = GraphSage(nfeat=features.shape[1],
#                     nhid=args_hidden,
#                     nclass=class_num,
#                     dropout=args_dropout)
#             model.load_state_dict(torch.load("./Eval_Mask_Model.tar"))
#             model.eval()
        
        
#         if args_cuda:
#             if model_type != 'EGCN':
#                 model=model.to(torch.device('cuda:0'))#.cuda()
#                 features = features.cuda()
#                 adj = adj.to(torch.device('cuda:0'))
#                 labels = labels.cuda()
#                 idx_train = idx_train.cuda()
#                 idx_val = idx_val.cuda()
#                 idx_test = idx_test.cuda()

#         # #optimizer and train
#         # optimizer = optim.Adam(model.parameters(),
#         #                         lr=args_lr, weight_decay=args_weight_decay)
#         # Train model
#         t_total = time.time()

#         # Testing
#         loss, acc, auc, f1 = test(model, features, adj, labels, idx_test)
#         test_results = [loss, acc, auc, f1]
#         test_results_list.append(test_results[1])

#         print("masking timestep", spec_t, ":", str(test_results[1])+'\t'+str(test_results[2])+'\t'+str(test_results[3]))#,end='\t')
#         # try:
#         #     spec_norm=getKlargestSigVec(now_adj-Probability_matrix,2)[0]
#         # except:
#         #     spec_norm=0 #temperal adj

#         del model
#     return test_results_list

### Mask all but one

In [None]:
# def eval_test_oneleft(original_features, original_adj, original_labels, idx_train, idx_val, idx_test, model_type,normalize=False):
#     t=time.time()
#     lambda_matrix=None 
#     total_loss=0
#     total_acc=0
#     total_norm=[]    

#     #choose adj matrix
#     #GCN:n*n, Others: n*t*n
    
#     if dataset_name == "Brain" or dataset_name == "simulated":
#         meta_timestep = 12
#     else:
#         meta_timestep = 10

#     attention_w = dict()
#     attention_w['simulated'] = [0.05907838, 0.05908475, 0.05908549, 0.0590926,  0.05907808, 0.05907955,
#                                 0.05909814, 0.05977109, 0.06782387, 0.13779935, 0.16049674, 0.16051194]
#     attention_w["None"] = [1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]
    
#     attention_w["DBLP3"] = [0.13257283, 0.11761762, 0.09207272, 0.09068672, 
#                             0.13907257, 0.08077238, 0.07735384, 0.08926542, 
#                             0.07579254, 0.10479335]
#     attention_w["DBLP5"] = [0.08490249, 0.07058566, 0.07522732, 0.093458354, 0.1454923, 0.13680391, 0.08714645, 0.11872233, 0.12575606, 0.061905116]
#     attention_w["Reddit"] = [0.10407882, 0.087858014, 0.103267744, 0.11869549, 0.111122675, 0.11745782, 0.09139953, 0.08404839, 0.089088246, 0.09298327]
#     attention_w["Brain"] = [0.10703684, 0.08153439, 0.06159713, 0.10095412,
#                              0.05306721, 0.07350213, 0.12013485, 0.08990081,
#                              0.05808821, 0.10553119, 0.07727117, 0.07138196]

#     adj = original_adj
#     features = original_features
#     labels = original_labels

#     print("adj shape:", adj.shape)
#     test_results_list = []
#     for spec_t in range(meta_timestep):
#         adj = original_adj
#         features = original_features
#         labels = original_labels
    
#         att_w = np.array(attention_w[dataset_name])
#         for mask_t in range(meta_timestep):
#             if mask_t != spec_t:
#                 att_w[mask_t] = 0
#         att_w /= np.sum(att_w)
#         print("att_w", att_w)

#         if model_type=='GCN':  
#             now_adj=att_w[0] * adj[:,0,:].clone()
#             for i in range(1,adj.shape[1]):  #time_steps
#                 now_adj+= att_w[i] * adj[:,i,:].clone()
#             adj=now_adj
#             features=features[:,-1,:]
#         elif model_type=='GraphSage':
#             now_adj=att_w[0] * adj[:,0,:].clone()
#             for i in range(1,adj.shape[1]):  #time_steps
#                 now_adj+= att_w[i] * adj[:,i,:].clone()
#             adj=now_adj
#             features=features[:,-1,:]

#         #define model
#         if model_type=='GCN':
#             model = GCN(nfeat=features.shape[1],
#                     nhid=args_hidden,
#                     nclass=class_num,
#                     dropout=args_dropout)
#             model.load_state_dict(torch.load("./Eval_Mask_Model.tar"))
#             model.eval()
#         elif model_type == 'GraphSage':
#             adj=dgl.from_networkx(nx.Graph(adj.numpy())) #fit in dgl
#             model = GraphSage(nfeat=features.shape[1],
#                     nhid=args_hidden,
#                     nclass=class_num,
#                     dropout=args_dropout)
#             model.load_state_dict(torch.load("./Eval_Mask_Model.tar"))
#             model.eval()
        
        
#         if args_cuda:
#             if model_type != 'EGCN':
#                 model=model.to(torch.device('cuda:0'))#.cuda()
#                 features = features.cuda()
#                 adj = adj.to(torch.device('cuda:0'))
#                 labels = labels.cuda()
#                 idx_train = idx_train.cuda()
#                 idx_val = idx_val.cuda()
#                 idx_test = idx_test.cuda()
                
#         # #optimizer and train
#         # optimizer = optim.Adam(model.parameters(),
#         #                         lr=args_lr, weight_decay=args_weight_decay)
#         # Train model
#         t_total = time.time()

#         # Testing
#         loss, acc, auc, f1 = test(model, features, adj, labels, idx_test)
#         test_results = [loss, acc, auc, f1]
#         test_results_list.append(test_results[1])

#         print("masking timestep", spec_t, ":", str(test_results[1])+'\t'+str(test_results[2])+'\t'+str(test_results[3]))#,end='\t')
#         # try:
#         #     spec_norm=getKlargestSigVec(now_adj-Probability_matrix,2)[0]
#         # except:
#         #     spec_norm=0 #temperal adj

#         del model
#     return test_results_list

### Mask one on Simulated data

In [None]:
# #simulated data: setting of data generation

# def generate_data_totallabel(number_of_nodes, Time_steps, class_num, link_inclass_prob, link_outclass_prob, epsilon_vector, time_vector):
    
#     transit_matrix=[]
#     for i in range(class_num):
#         transit_one=[epsilon_vector[i]]*i+[1-epsilon_vector[i]]+[epsilon_vector[i]]*(class_num-1-i)
#         transit_matrix+=[transit_one]
#     #print((number_of_nodes*link_inclass_prob*epsilon_vector[0])**0.5)
    
#     adj=torch.zeros(number_of_nodes,Time_steps,number_of_nodes) #n*t*n adj matrix

#     #assign initial labels
#     labels=torch.randint(0,class_num,(number_of_nodes,)) #assign random label with equal probability
#     labels=labels.to(dtype=torch.long)
#     #label_node, speed up the generation of edges
#     label_node_dict=dict()

#     for j in range(class_num):
#         label_node_dict[j]=[]

#     for i in range(len(labels)):
#         label_node_dict[int(labels[i])]+=[int(i)]

#     total_labels=torch.zeros(number_of_nodes,Time_steps)
#     #generate graph
#     for i in range(int(Time_steps)):
#         # print("for timestep %d, p = %.4f" % (i, time_vector[i]))
#         #change node
#         change_nodes=[]
#         for j in range(len(labels)):
#             if random.random() < epsilon_vector[labels[j]]:
#                 #less than change probability
#                 tmp=int(labels[j])
#                 #print(j)
#                 while(1): #change label
#                     labels[j]=torch.tensor(int(torch.randint(0,class_num,(1,))[0]))
#                     if labels[j]!=tmp:
#                         change_nodes+=[j]
#                         break
#                 #labels[j]=torch.tensor(not tmp)
#         total_labels[:,i]=labels.clone()
#         label_node_dict=dict()
#         for j in range(class_num):
#             label_node_dict[j]=[]

#         for j in range(len(labels)):
#             label_node_dict[int(labels[j])]+=[int(j)]
#         #
#         #generate symmetrix adj matrix at each time step
#         for node_id in range(number_of_nodes):
#             j=labels[node_id]
#             for l in label_node_dict:
#                 if l==j:
#                     for z in label_node_dict[l]:  #z>node_id,  symmetrix matrix, no repeat
#                         if z>node_id and random.random()<link_inclass_prob:
#                             adj[node_id,i,z]= 1
#                             adj[z,i,node_id]= 1
#                 else:
#                     for z in label_node_dict[l]:
#                         if z>node_id and random.random()<link_outclass_prob:
#                             adj[node_id,i,z]= 1
#                             adj[z,i,node_id]= 1

#     #generate feature use eye matrix
#     features=torch.zeros(number_of_nodes,Time_steps,number_of_nodes)
#     for i in range(features.shape[1]):
#         features[:,i,:]=torch.eye(features.shape[0],features.shape[2])

#     #seprate train,val,test
#     idx_train = torch.LongTensor(range(number_of_nodes//5))
#     idx_val = torch.LongTensor(range(number_of_nodes//5, number_of_nodes//2))
#     idx_test = torch.LongTensor(range(number_of_nodes//2, number_of_nodes))

#     #probability matrix at last time_step
#     Probability_matrix=torch.zeros(number_of_nodes,number_of_nodes)
#     for j in range(number_of_nodes):
#         for k in range(number_of_nodes):
#             if j==k:
#                 continue
#             elif labels[j]==labels[k]:
#                 Probability_matrix[j][k]=link_inclass_prob
#             else:
#                 Probability_matrix[j][k]=link_outclass_prob

#     return features.float(), adj.float(), total_labels.long(), idx_train, idx_val, idx_test, Probability_matrix



In [None]:
# mode="simulated"

# if mode == 'simulated':   
#     target_res = []
#     SE_w_list = []
#     SE_w = None
#     matrix_w_list = []
#     matrix_w = None
#     SE_2ws_adj_list = []
#     SE_2ws_x_list = []

#     target_res_baseline = []
#     SE_w_list_baseline = []
#     matrix_w_list_baseline = []
#     acc_drop_list = []

#     for i in range(10):
#         print("="*20)
#         dataset_name='simulated'
#         number_of_nodes=200
#         Time_steps = 12
#         class_num = 4
#         link_inclass_prob = 20/number_of_nodes  #when calculation , remove the link in itself
#         #EGCN good when network is dense 20/number_of_nodes  #fails when network is sparse. 20/number_of_nodes/5

#         link_outclass_prob=link_inclass_prob/20
#         # epsilon_vector=[10/number_of_nodes,20/number_of_nodes, 40/number_of_nodes, 50/number_of_nodes]
#         epsilon_vector=[0.05, 0.10, 0.20, 0.25]
#         # epsilon_vector=[0.8, 0.8, 0.8, 0.8]

#         time_vector = [1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]
#         # special_timestep = [5]

#         # features, adj, labels, idx_train, idx_val, idx_test, Probability_matrix = generate_data_glitch(number_of_nodes, Time_steps, class_num, link_inclass_prob, link_outclass_prob, epsilon_vector, special_timestep)   
#         # features, adj, labels, idx_train, idx_val, idx_test, Probability_matrix = generate_data_2stage(number_of_nodes, Time_steps, class_num, link_inclass_prob, link_outclass_prob, epsilon_vector, special_timestep)   
#         features, adj, labels, idx_train, idx_val, idx_test, Probability_matrix = generate_data_totallabel(number_of_nodes, Time_steps, class_num, link_inclass_prob, link_outclass_prob, epsilon_vector, time_vector)               
#         # features, adj, labels, idx_train, idx_val, idx_test, Probability_matrix = generate_data_totallabel_fixed(number_of_nodes, Time_steps, class_num, link_inclass_prob, link_outclass_prob, epsilon_vector, time_vector)               
#         total_adj=adj
#         total_labels=labels

#         tot_timestep = total_labels.shape[1]
#         print("adj:", total_adj.shape)
#         print("labels:", total_labels.shape)
#         print("timestep:", tot_timestep)
#         print("idx_train:", idx_train.shape)
#         print("idx_test:", idx_test.shape)
#         print("idx_val:", idx_val.shape)

#         args_hidden = class_num
#         args_dropout = 0.5
#         args_weight_decay = 5e-4    
#         args_lr = 0.0025
#         args_epochs = 500 
#         # args_no_cuda=True
#         args_no_cuda=False
#         args_cuda = not args_no_cuda and torch.cuda.is_available()
#         print("CUDA:", args_cuda)
#         args_normalize=True
#         print(dataset_name)

#         # Options: GCN, GAT, GraphSage #dynamic_spec, DynAERNN #GCNLSTM, EGCN, RNNGCN, TRNNGCN
#         # model_type = 'RNNGCN_SE_2ws'
#         model_type = 'GCN'
#         # model_type = 'RNNGCN_SE_back'

#         for target_time in range(tot_timestep-1, tot_timestep):
#             print(target_time,end='\t')
#             adj = total_adj[:,:target_time+1,:]
#             labels = total_labels[:,target_time]
      
#             res_one, SE_w, matrix_w = test_real_dataset()
#             target_res.append(res_one)
#             if "TRNNGCN_SE" in model_type:
#                 matrix_w_list.append(matrix_w.detach().detach().cpu().numpy())
#             if "RNNGCN_SE" in model_type:
#                 if "2ws" in model_type:
#                     SE_2ws_adj_list.append(SE_2ws_adj.detach().cpu().numpy())
#                     SE_2ws_x_list.append(SE_2ws_x.detach().cpu().numpy())
#                 elif SE_w != None:
#                     SE_w_list.append(SE_w.detach().cpu().numpy())            
#             print(' ',end='\n')

#         eval_res = eval_test_oneleft(features, adj, labels, idx_train, idx_val, idx_test, model_type,normalize=args_normalize)
#         acc_drop = res_one[1] - np.array(eval_res)
#         print("acc_drop:", acc_drop)
#         acc_drop_list.append(acc_drop) 

In [None]:
# tot_timestep = adj.shape[1]
# model_type = "RNNGCN_SE_2ws"
# if "2ws" in model_type: 
#     w_all = np.array(SE_2ws_adj_list) # weights
#     # w_all = np.array(SE_w_list)
#     w_mean = np.mean(w_all, axis=0)
#     print(dataset_name, ", adj:", w_mean)
#     # tot_timestep = Features.shape[1]
#     tot = w_all.shape[0]
#     x = range(tot_timestep)
    
#     plt.xlabel('Timesteps')
#     plt.ylabel('Attention Weights (adj)')
#     plt.grid(True)    
#     plot_w_all, = plt.plot(x, w_mean, label="attention for adj")
#     plt.ylim(-0.02 + np.min(w_all), 0.02 + np.max(w_all))
#     plt.legend(handles=[plot_w_all])    

In [None]:
# while 1:
#     pass

### Evaluation: training & testing and output

#### Output attention W

In [None]:
# tot_timestep = adj.shape[1]

# # w_all = np.array(SE_2ws_adj_list) # for RNNGCN_SE_2ws
# w_all = np.array(SE_w_list) # weights
# w_mean = np.mean(w_all, axis=0)

# print(dataset_name, ", acc_drop")
# print("["+", ".join(str(bit) for bit in w_mean)+"]")

# # tot_timestep = Features.shape[1]
# tot = w_all.shape[0] 
# x = range(tot_timestep)

# plt.xlabel('Timesteps')
# plt.ylabel('Attention Weights (adj)')
# plt.grid(True)    
# plot_w_all, = plt.plot(x, w_mean, label="attention weights")
# plt.ylim(-0.02 + np.min(w_mean), 0.02 + np.max(w_mean))
# plt.legend(handles=[plot_w_all])    

In [None]:
# # print(eval_res)
# # print(res_one[1])
# # acc_drop = res_one[1] - np.array(eval_res)
# # print("acc_drop:", acc_drop)


# # w_all = np.array(SE_2ws_adj_list) # for RNNGCN_SE_2ws
# w_all = np.array(SE_w_list) # weights
# # print("["+", ".join(str(bit) for bit in w_all)+"]")
# # print(SE_w_list)
# tot_timestep = w_all.shape[1]
# tot_num = w_all.shape[0]
# x = range(tot_timestep)

# plt.xlabel('Timesteps')
# plt.ylabel('Weights Learned (adj)')
# plt.grid(True)    
# plt.ylim(-0.01 + np.min(w_all), 0.01 + np.max(w_all))


# for i in range(tot_num):
#     w_i = w_all[i, ...]
#     plt.plot(x, w_i)  


#### Output Acc Drop


In [None]:
tot_timestep = adj.shape[1]

# w_all = np.array(SE_2ws_adj_list) # weights
print(len(acc_drop_list))
w_all = np.array(acc_drop_list)

# w_mean = np.mean(w_all, axis=1)
# print(w_mean.shape)

# bi = np.argmax(w_mean)
# print(w_all[bi])
# w_all = np.delete(w_all, bi, 0)
# print(w_all.shape)

# ci = np.argmin(w_mean)
# print(w_all[ci])
# w_all = np.delete(w_all, ci, 0)
# print(w_all.shape)

w_mean = np.mean(w_all, axis=0)

print(dataset_name, ", acc_drop")
print("["+", ".join(str(bit) for bit in w_mean)+"]")
# tot_timestep = Features.shape[1]
tot = w_all.shape[0] 
x = range(tot_timestep)

plt.xlabel('Timesteps')
plt.ylabel('Attention Weights (adj)')
plt.grid(True)    
plot_w_all, = plt.plot(x, w_mean, label="attention weights")
plt.ylim(-0.002 + np.min(w_mean), 0.002 + np.max(w_mean))
plt.legend(handles=[plot_w_all])    

In [None]:
w_all = np.array(acc_drop_list)
print(acc_drop_list, len(acc_drop_list))
# w_all = np.array(SE_w_list) # weights

w_mean = np.mean(w_all, axis=1)
print(w_mean.shape)

# bi = np.argmax(w_mean)
# print(w_all[bi])
# w_all = np.delete(w_all, bi, 0)
# print(w_all.shape)

# ci = np.argmin(w_mean)
# print(w_all[ci])
# w_all = np.delete(w_all, ci, 0)
# print(w_all.shape)

tot_timestep = w_all.shape[1]
tot_num = w_all.shape[0]
x = range(tot_timestep)

plt.xlabel('Timesteps')
plt.ylabel('Weights Learned (adj)')
plt.grid(True)    
plt.ylim(-0.01 + np.min(w_all), 0.01 + np.max(w_all))


for i in range(tot_num):
    w_i = w_all[i, ...]
    plt.plot(x, w_i)  
  

In [None]:
while 1:
    pass

### Evaluation: masking out each channel

In [None]:
# mode = 'real'

# if mode=='real':
#     SE_2ws_adj = None
#     SE_2ws_x = None

#     SE_2ws_adj_list = []
#     SE_2ws_x_list = []
#     target_res = []
#     SE_w_list = []
#     SE_w = None
#     matrix_w_list = []
#     matrix_w = None
#     target_res_baseline = []
#     SE_w_list_baseline = []
#     matrix_w_list_baseline = []

#     all_drop_list = []

#     for i in range(15):
#         dataset_name="DBLP3"
#         # dataset_name="DBLP5"
#         # dataset_name="Reddit"
#         # dataset_name="Brain"
#         # dataset_name="reality_call"
#         # dataset_name="political_retweet"

#         features_origin, adj_origin, labels_origin, idx_train, idx_val, idx_test, Probability_matrix = load_real_data(dataset_name) 
#         class_num=int(labels_origin.max())+1
#         print("class_num =", class_num)
#         # total_adj=adj_origin
#         # total_labels=labels_origin
#         print("total_adjsize: ", adj_origin.shape)
#         print("total_labelsize: ", labels_origin.shape)
#         print("features size: ", features_origin.shape)

#         # Options: GCN, GAT, GraphSage #dynamic_spec, DynAERNN #GCNLSTM, EGCN, RNNGCN, TRNNGCN
#         # model_type = 'RNNGCN_SE_2ws'
#         # model_type = 'RNNGCN_SE'
#         # model_type = 'original_RNNGCN'
#         # model_type = 'GraphSage'
#         # model_type = 'GCN'
#         model_type = 'GCNLSTM'

#         args_hidden = class_num
#         # args_hidden = 10
#         args_dropout = 0.5
#         args_weight_decay = 5e-4    
#         args_lr = 0.0025
#         args_epochs = 500 
#         # args_no_cuda=True
#         args_no_cuda=False
#         args_cuda = not args_no_cuda and torch.cuda.is_available()
#         print("CUDA:", args_cuda)
#         args_normalize=True
#         print(dataset_name)
        
#         no_atts_list = ["DBLPE_importance", "DBLPE", "sparse_DBLPE"]
#         fixed_no_atts_list = ["hospital", "reality_call", "political_retweet"]

#         meta_time_step = 10
#         mean_drop = np.zeros(meta_time_step)
#         single_drop_list = []

#         mean_drop_t = 0

#         for t in range(meta_time_step):
#             print(' ---------- %d ----------' % t) 
        
#             # Without masking:
#             features = features_origin
#             adj = adj_origin
#             labels = labels_origin

#             res_one, SE_w, matrix_w = test_real_dataset()
#             target_res_baseline.append(res_one)
#             drop_acc_res_baseline = res_one[1]
            

#             # Applying a mask...
#             eval_mask = []
#             eval_mask.extend(range(t))
#             eval_mask.extend(range(t+1, meta_time_step))

#             adj = adj_origin[:, eval_mask, :]
#             features = features_origin[:, eval_mask, :]
#             # No masks for labels except for DBLPE
#             if dataset_name in no_atts_list:            # DBLPE
#                 labels = labels_origin[eval_mask, :]
#             # elif dataset_name in fixed_no_atts_list:    # hospital etc
#             #     total_label = labels[:, eval_mask, :]
#             # else:                                       # Brain etc
#             #     total_label = labels[:, eval_mask, :]

#             # Masking complete, start running..

#             res_one, SE_w, matrix_w = test_real_dataset()
#             target_res.append(res_one)
#             drop_acc_res = res_one[1]
#             drop_this_time = drop_acc_res_baseline - drop_acc_res
#             print("#%d: acc drop %.4f" % (i, drop_this_time))
#             single_drop_list.append(drop_this_time)
#             print("len of single:", len(single_drop_list))
#         all_drop_list.append(single_drop_list)


In [None]:
# drop_arr = np.array(all_drop_list)
# print(drop_arr.shape)
# print(model_type, ",", dataset_name, ":")
# print(np.mean(drop_arr, axis=0))
# x = range(meta_time_step)
# plt.xlabel('Timesteps')
# plt.ylabel('Acc drop: ' + model_type + " / " + dataset_name)
# plt.grid(True)    
# plot_w_all, = plt.plot(x, np.mean(drop_arr, axis=0))
# # plt.ylim(-0.02 + np.min(w_all), 0.02 + np.max(w_all))
# plt.legend(handles=[plot_w_all])    


In [None]:
# arr_res = np.array(target_res)
# arr_res_baseline = np.array(target_res_baseline)

# print(arr_res)
# print(len(arr_res))
# print("\nModel: **%s**;\nDataset: **%s**\n" % (model_type, dataset_name))
# print("args_dropout: %.4f" % args_dropout)
# print("args_lr: %.6f" % args_lr)
# # print("args_weight_decay: %f" % args_weight_decay)
# print("args_epochs: %d\n" % args_epochs)

# print("| ACC drop | AUC drop | F1 drop |")
# print("|:--- |:------|:-----|")
# for i in range(len(arr_res)):
#     print("|%.6f | %.6f | %.6f|" % (arr_res_baseline[i][1] - arr_res[i][1], 
#                                     arr_res_baseline[i][2] - arr_res[i][2], 
#                                     arr_res_baseline[i][3] - arr_res[i][3])) # ACC, AUC, F1
# print("\nAVG: ")
# print((np.sum(arr_res, axis=0)/len(arr_res))[1:])


# # no_atts_list = [ "DBLPE"]
# # if dataset_name in no_atts_list:
# #     print("|Time step| ACC | AUC | F1 |")
# #     print("|:--- |:--- |:------|:-----|")
# #     for i in range(len(target_res)):
# #         print("|step #%d |%.6f | %.6f | %.6f|" % (i, target_res[i][1], target_res[i][2], target_res[i][3])) # ACC, AUC, F1

# # else:
#     # print("| ACC | AUC | F1 |")
#     # print("|:--- |:------|:-----|")
#     # for i in range(len(arr_res)):
#     #     print("|%.6f | %.6f | %.6f|" % (arr_res[i][1], arr_res[i][2], arr_res[i][3])) # ACC, AUC, F1
#     # print("\nAVG: ")
#     # print((np.sum(arr_res, axis=0)/len(arr_res))[1:])

# drop_arr = np.array(arr_res_baseline[:, 1] - arr_res[:, 1])
# x = range(meta_time_step)
# plt.xlabel('Timesteps')
# plt.ylabel('Acc drop')
# plt.grid(True)    
# plot_w_all, = plt.plot(x, drop_arr)
# # plt.ylim(-0.02 + np.min(w_all), 0.02 + np.max(w_all))
# plt.legend(handles=[plot_w_all])    


### Weight Curve (all)

##### 2ws: adj_w and x_w / Mean

In [None]:
# tot_timestep = adj.shape[1]
# model_type = "RNNGCN_SE_2ws"
# if "2ws" in model_type: 
#     w_all = np.array(SE_2ws_adj_list) # weights
#     # w_all = np.array(SE_w_list)
#     w_mean = np.mean(w_all, axis=0)
#     print(dataset_name, ", adj:", w_mean)
#     # tot_timestep = Features.shape[1]
#     tot = w_all.shape[0]
#     x = range(tot_timestep)
    
#     plt.xlabel('Timesteps')
#     plt.ylabel('Attention Weights (adj)')
#     plt.grid(True)    
#     plot_w_all, = plt.plot(x, w_mean, label="attention for adj")
#     plt.ylim(-0.02 + np.min(w_all), 0.02 + np.max(w_all))
#     plt.legend(handles=[plot_w_all])    

In [None]:
# if "2ws" in model_type: 
#     w_all = np.array(SE_2ws_x_list) # weights
#     w_mean = np.mean(w_all, axis=0)

#     # tot_timestep = Features.shape[1]
#     tot = w_all.shape[0]
#     x = range(tot_timestep)
    
#     print(dataset_name, ", x:", w_mean)
#     plt.xlabel('Timesteps')
#     plt.ylabel("Attention Weights (x)")
#     plt.grid(True)    
#     plot_w_all, = plt.plot(x, w_mean, label="attention for x")
#     plt.ylim(-0.02 + np.min(w_all), 0.02 + np.max(w_all))
#     plt.legend(handles=[plot_w_all])        


##### All:

In [None]:
# if "2ws" in model_type:
#     w_all = np.array(SE_2ws_adj_list)
#     # w_all = np.array(SE_w_list) # weights
#     print(SE_w_list)
#     tot_timestep = w_all.shape[1]
#     tot_num = w_all.shape[0]
#     x = range(tot_timestep)

#     plt.xlabel('Timesteps')
#     plt.ylabel('Weights Learned (adj)')
#     plt.grid(True)    
#     plt.ylim(-0.01 + np.min(w_all), 0.01 + np.max(w_all))


#     for i in range(tot_num):
#         w_i = w_all[i, ...]
#         plt.plot(x, w_i)  


In [None]:
# if "2ws" in model_type:
#     w_all = np.array(SE_2ws_x_list)
#     # w_all = np.array(SE_w_list_baseline) # weights
#     # print(SE_w_list)
#     tot_timestep = w_all.shape[1]
#     tot_num = w_all.shape[0]
#     x = range(tot_timestep)

#     plt.xlabel('Timesteps')
#     plt.ylabel('Weights Learned (x)')
#     plt.grid(True)    
#     plt.ylim(-0.01 + np.min(w_all), 0.01 + np.max(w_all))


#     for i in range(tot_num):
#         w_i = w_all[i, ...]
#         plt.plot(x, w_i)          

In [None]:
# while 1:
#     pass

#### Other models

In [None]:
no_atts_list = ["DBLPE_importance", "DBLPE", "sparse_DBLPE", "hospital", "retweet"]
import matplotlib.pyplot as plt

if "SE" in model_type:
    if dataset_name in no_atts_list:
        # x = range(14)
        timestep = 14
        x = range(timestep)
        weights = SE_w_list[timestep-1] 
        # weights = SE_w_list_basline[timestep-1]
        assert len(weights) == timestep, "weights len: %d " % len(weights)
        weights_y = weights
        print(weights_y)

        stat = get_label_stat(total_labels) 
        stat_y = stat / np.sum(stat[:timestep])
        stat_y = [None] + list(stat_y)
        stat_y = stat_y[:len(weights)]
        
        plot_w, = plt.plot(x, weights_y, label="weights")

        plt.legend(handles=[plot_w, plot_stat])
        plt.xlabel('Timesteps')
        plt.ylabel('Weights Learned')
        
        plt.grid(True)
        # plt.ylim(0.05, 0.16)

    else:
        w_all = np.array(SE_w_list)
        # w_all = np.array(SE_w_list_baseline) # weights
        # print(SE_w_list)
        tot_timestep = w_all.shape[1]
        tot_num = w_all.shape[0]
        x = range(tot_timestep)

        plt.xlabel('Timesteps')
        plt.ylabel('Weights Learned')
        plt.grid(True)    
        plt.ylim(-0.01 + np.min(w_all), 0.01 + np.max(w_all))


        for i in range(tot_num):
            w_i = w_all[i, ...]
            plt.plot(x, w_i)


### Weight Curve (mean)

In [None]:
# target res
tot_timestep = adj.shape[1]

if target_res_baseline != []:
    if "SE" in model_type:
        if dataset_name != "DBLPE" or dataset_name != "DBLPE_importance":    
            w_all = np.array(SE_w_list) # weights
            w_mean = np.mean(w_all, axis=0)

            w_all_baseline = np.array(SE_w_list_baseline) # weights
            w_mean_baseline = np.mean(w_all_baseline, axis=0)  

            # tot_timestep = Features.shape[1]
            tot = w_all.shape[0]
            x = range(tot_timestep)
            
            plt.xlabel('Timesteps')
            plt.ylabel('Weights Learned')
            plt.grid(True)    
            plot_w_all, = plt.plot(x, w_mean, label="with pe")
            plot_w_all_baseline, = plt.plot(x, w_mean_baseline, label="without pe")
            plt.ylim(-0.02 + np.min(w_all), 0.02 + np.max(w_all))
            plt.legend(handles=[plot_w_all, plot_w_all_baseline])
else:
    if "SE" in model_type:
        if dataset_name != "DBLPE" or dataset_name != "DBLPE_importance":    
            w_all = np.array(SE_w_list) # weights
            w_mean = np.mean(w_all, axis=0)

            # tot_timestep = Features.shape[1]
            tot = w_all.shape[0]
            x = range(tot_timestep)
            
            plt.xlabel('Timesteps')
            plt.ylabel('Weights Learned')
            plt.grid(True)    
            plot_w_all, = plt.plot(x, w_mean, label=model_type)
            plt.ylim(-0.02 + np.min(w_all), 0.02 + np.max(w_all))
            plt.legend(handles=[plot_w_all])    
    else:
        x = range(len(target_res))
        plt.xlabel('Timesteps')
        plt.ylabel('Accuracy')
        plt.grid(True)    
        y = []
        
        for i in target_res:
            y.append(i[1])
        curve1,  = plt.plot(x, y, label=model_type)    
        # plt.ylim(0, 0.05 + np.max(w_all))
        plt.legend(handles=[curve1], loc='upper left')

In [None]:
# weight_exp = np.array(w_mean)

# print(weight_exp.shape)
# print(np.sum(weight_exp))

# exp_name = "EVL_" + dataset_name + "_" + model_type + "_" + str(len(weight_exp)) +".npy"
# with open(exp_name, 'wb') as f:
#     np.save(f, weight_exp)
# print(exp_name)

In [None]:
# %cp ./EVL_Brain_RNNGCN_SE_12.npy /content/Colab/Clustering-RGCN/
# %ls  /content/Colab/Clustering-RGCN/

In [None]:
while True:
    pass

In [None]:
# w_all = np.array(SE_w_list) # weights
# print(SE_w_list)
# tot_timestep = w_all.shape[1]
# tot_num = w_all.shape[0]
# x = range(tot_timestep)

# plt.xlabel('Timesteps')
# plt.ylabel('Weights Learned')
# plt.grid(True)    
# plt.ylim(-0.02 + np.min(w_all), 0.02 + np.max(w_all))


# for i in range(tot_num):
#     w_i = w_all[i, ...]
#     plt.plot(x, w_i)

# x = range(len(target_res))
# plt.xlabel('Timesteps')
# plt.ylabel('Accuracy')
# plt.grid(True)    
# y = []

# for i in target_res:
#     y.append(i[1])
# curve1,  = plt.plot(x, y, label=model_type)    
# # plt.ylim(0, 0.05 + np.max(w_all))
# plt.legend(handles=[curve1], loc='upper left')

# DBLPE plotting

In [None]:
baseline_res_list = []
baseline_SE_w_all = []
# baseline_model_list = ["RNNGCN_SE_back", "original_RNNGCN", "RNNGCN_SE_back_pe"]
baseline_model_list = ["GCN"]
# Options: GCN, GAT, GraphSage #dynamic_spec, DynAERNN #GCNLSTM, EGCN, RNNGCN, TRNNGCN

dataset_name="DBLPE"
features, adj, labels, idx_train, idx_val, idx_test, Probability_matrix=load_real_data(dataset_name)
class_num=int(labels.max())+1
print("class_num =", class_num)
total_adj=adj
total_labels=labels
print("total_adjsize: ", total_adj.shape)
print("total_labelsize: ", total_labels.shape)
print("features size: ", features.shape)

for model_name_i in baseline_model_list:
    baseline_res = []
    SE_w_list = []
    SE_w = 0
    mode="real"
    print(model_name_i)

    model_type = model_name_i
    torch.autograd.set_detect_anomaly(True)

    # Options: GCN, GAT, GraphSage #dynamic_spec, DynAERNN #GCNLSTM, EGCN, RNNGCN, TRNNGCN
    args_hidden = class_num
    args_dropout = 0.5
    args_lr = 0.0025
    # args_lr = 0.025
    args_weight_decay = 5e-4
    # args_epochs = 500
    args_epochs = 500 

    # args_no_cuda=True
    args_no_cuda=False
    
    args_cuda = not args_no_cuda and torch.cuda.is_available()
    print("CUDA:", args_cuda)
    args_normalize=True

    print(dataset_name)

    if mode=='real':
        if dataset_name=="DBLPE":
            #target_time=13 #0-13
            for target_time in range(0, 14):
                print(target_time)
                adj = total_adj[:,:target_time+1,:]
                labels = total_labels[:,target_time]
                res_one, SE_w, matrix_w = test_real_dataset()     
                if "SE" in model_type:
                    print("SE is here!")
                    SE_w_list.append(SE_w.detach().cpu().numpy())
                baseline_res.append(res_one)

                # gpu_memory_log()
                print(' ',end='\n')


    print("\nModel: **%s**;\nDataset: **%s**\n" % (model_type, dataset_name))
    print("args_dropout: %.4f" % args_dropout)
    print("args_lr: %.6f" % args_lr)
    # print("args_weight_decay: %f" % args_weight_decay)
    print("args_epochs: %d\n" % args_epochs)

    print("|Time step| ACC | AUC | F1 |")
    print("|:--- |:--- |:------|:-----|")
    for i in range(len(baseline_res)):
        print("|step #%d |%.6f | %.6f | %.6f|" % (i, baseline_res[i][1], 
                                                  baseline_res[i][2], 
                                                  baseline_res[i][3])) # ACC, AUC, F1    
    baseline_res_list.append(baseline_res)
    baseline_SE_w_all.append(SE_w_list)



In [None]:

if dataset_name == "DBLPE":
    plt.xlabel('Timesteps')
    plt.ylabel('Accuracy')
    plt.grid(True) 
    print(len(baseline_res_list))
    x = range(len(baseline_res_list[0]))

    # target_y = np.array(target_res)[:, 1]
    baseline_y = [None] * 3
    # Only plotting Acc
    for i in range(len(baseline_res_list)):
        baseline_y[i] = np.array(baseline_res_list[i])[:, 1]

        
    # assert len(baseline_res[0]) == len(target_res), "%d, %d" % (len(baseline_res[0]), len(target_res))
    
    # curve_target,  = plt.plot(x, target_y, label="RNNGCN_SE_decay")
    # curve_legend = [curvet_target]
    curve_legend = []
    for i in range(len(baseline_res_list)):
        curve_i,  = plt.plot(x, baseline_y[i], label=baseline_model_list[i])    
        curve_legend.append(curve_i)
    plt.legend(handles=curve_legend, loc='lower right')

In [None]:
SE_w_list = baseline_SE_w_all[0]

w_all = np.array(SE_w_list[-1]) # weights

# tot_timestep = w_all.shape[1]
tot_num = w_all.shape[0]
# x = range(tot_timestep)
x = range(tot_num)

plt.xlabel('Timesteps')
plt.ylabel('Weights Learned')
plt.grid(True)    
plt.ylim(-0.02 + np.min(w_all), 0.02 + np.max(w_all))

print(len(baseline_SE_w_all[0]))
curve_legend = []
curve_0, = plt.plot(x, baseline_SE_w_all[0][-1], label=baseline_model_list[0])
curve_1, = plt.plot(x, baseline_SE_w_all[2][-1], label=baseline_model_list[2])
curve_legend.append(curve_0)
curve_legend.append(curve_1)

plt.legend(handles=curve_legend, loc='upper left')