In [None]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
import time
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score
from google.colab import drive
drive.mount('/content/drive')
device='cuda' if torch.cuda.is_available() else 'cpu'
x=np.load('/content/drive/MyDrive/gnn/data/data_2383_linkpred/2383_linkpred_x.npy').transpose()
y=np.load('/content/drive/MyDrive/gnn/data/data_2383_linkpred/2383_linkpred_y.npy').transpose()
w=np.load('/content/drive/MyDrive/gnn/data/data_2383_linkpred/2383_linkpred_w.npy').transpose()
print(torch.cuda.get_device_name(0))

Mounted at /content/drive
Tesla P100-PCIE-16GB


In [None]:
# sort activity
activity=np.sum(y,axis=0)
# edges=np.argsort(activity)[::-1]
edges=np.argsort(activity)[::-1]
edge=edges[:10]
y=y[:,edge]

x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.2, random_state=18)
print('Training data size:',x_train.shape)
print('Training label size:',y_train.shape)

class Dataset(torch.utils.data.Dataset):
    def __init__(self, features, labels, device='cpu'):
        self.features=torch.from_numpy(features).float()
        self.labels=torch.from_numpy(labels).float()
    def __len__(self):
        return len(self.features)
    def __getitem__(self, idx):
        if torch.is_tensor(idx): idx = idx.tolist()
        # Select sample
        X = self.features[idx]  # shape = (24,)
        y = self.labels[idx]    # shape = (24,)
        X = torch.reshape(X,(3,2383))
        return X, y
params = {'batch_size': 256,
          'shuffle': True,
          'num_workers': 2}
# Dataset Generators
training_set = Dataset(features=x_train,labels=y_train,device=device)
training_generator = torch.utils.data.DataLoader(training_set,**params)
validation_set = Dataset(features=x_test,labels=y_test,device=device)
validation_generator = torch.utils.data.DataLoader(validation_set,**params)

Training data size: (12844, 3, 2383)
Training label size: (12844, 10)


In [None]:
W1 = w.copy()
W1 = np.asarray(W1)
W1 = W1 / 1
W2 = W1.copy()
print(type(w))
lam,v = np.linalg.eig(W1)
print(lam[0])
for i in range(2):
  W1 = np.matmul(W1,W2) / np.max(np.matmul(W1,W2))
  lam,v = np.linalg.eig(W1)
  print(lam[0])

<class 'numpy.ndarray'>
11.028229112322602
(1.3513537483764269+0j)
(1.434517098823738+0j)


In [None]:
class v2v(nn.Module):
    def __init__(self,in_feats,out_feats,w,n,bias=True):
        super(v2v,self).__init__()
        self.register_buffer('w',torch.from_numpy(w).float())
        self.register_buffer('n0',torch.tensor(n))
        # self.mapping=nn.Parameter(torch.Tensor(n,W.shape[0],W.shape[1]))
        # torch.nn.init.xavier_uniform_(self.mapping.data)
        self.scale0=nn.Parameter(torch.Tensor(out_feats,in_feats))
        torch.nn.init.xavier_uniform_(self.scale0.data)
        self.scale=nn.Parameter(torch.Tensor(n-1,out_feats,out_feats))
        torch.nn.init.xavier_uniform_(self.scale.data)
        self.bias=nn.Parameter(torch.Tensor(out_feats,1))
        torch.nn.init.xavier_uniform_(self.bias.data)

    def forward(self,input):
        # h=torch.mul(self.mapping[0,:,:],self.w) # element-wise
        # print(h.shape,input.shape)
        h=torch.matmul(input,self.w) # transpose due to batch generator
        h=torch.matmul(self.scale0,h)
        for i in range(self.n0 - 1):
          W1 = torch.matrix_power(self.w,i+2)/torch.max(torch.matrix_power(self.w,i+2))
          h1=torch.matmul(h,W1) # transpose due to batch generator 
          h=torch.matmul(self.scale[i,:,:],h)
        return h+self.bias 

# GNN using DGL v2v graph convolution and our own v2e graph convolution
class GCN(nn.Module):
    def __init__(self, in_feats, hidden_size, W, num_bus, n_hop, fc_params):
        super(GCN, self).__init__()
        self.v2v1=v2v(in_feats,hidden_size[0],W,n_hop)
        self.v2v2=v2v(hidden_size[0],hidden_size[1],W,n_hop)
        self.v2v3=v2v(hidden_size[1],hidden_size[2],W,n_hop)
        self.v2v4=v2v(hidden_size[2],hidden_size[3],W,n_hop)
        # self.v2v5=v2v(hidden_size[3],hidden_size[4],W,n_hop)
        # self.v2v6=v2v(hidden_size[4],hidden_size[5],W,n_hop)
        self.linear1=nn.Linear(num_bus*hidden_size[-1],fc_params[0])
        self.linear2=nn.Linear(fc_params[0],2*fc_params[1])
         
    def forward(self, inputs):
        h=self.v2v1(inputs)
        h=torch.relu(h)
        h=self.v2v2(h)
        h=torch.relu(h)
        h=self.v2v3(h)
        h=torch.relu(h)
        h=self.v2v4(h)
        # h=torch.relu(h)
        # h=self.v2v5(h)
        # h=torch.relu(h)
        # h=self.v2v6(h)
        h=torch.reshape(h,(h.shape[0],-1))
        h=self.linear1(h)
        h=torch.relu(h)
        h=self.linear2(h)
        h=torch.reshape(h,(h.shape[0],2,-1))
        return h
n_bus=x.shape[2]
w_params=[10,20,20,1]
n_hop = 4
fc_params=[1000,y_train.shape[1]]
# W = W / 10 # normalize
net=GCN(3,w_params,w,n_bus,n_hop,fc_params) # Laplacian
net=net.to(device)

# set loss_func & optimizer
optimizer=torch.optim.Adam(net.parameters())#,weight_decay=0.01)
loss_optm=[]
loss_val=[]
print(net)
print('number of params: %d'%(sum(temp.numel() for temp in net.parameters() if temp.requires_grad)))

GCN(
  (v2v1): v2v()
  (v2v2): v2v()
  (v2v3): v2v()
  (v2v4): v2v()
  (linear1): Linear(in_features=2383, out_features=1000, bias=True)
  (linear2): Linear(in_features=1000, out_features=20, bias=True)
)
number of params: 2407424


In [None]:
for local_batch,local_label in training_generator:
  local_batch,local_label=local_batch.to(device),local_label.to(device)
  logits=net(local_batch)
  break
print(logits.shape)
print(local_label.shape)

torch.Size([256, 2, 10])
torch.Size([256, 10])


In [None]:
## Training
t0=time.time()
max_epochs=50
eval_epoch=5

# earlystopping
tolerance=5
min_delta=1e-5
previous=0

W_tensor = torch.from_numpy(w).float().to(device)
my_loss_func=nn.CrossEntropyLoss()
for epoch in range(max_epochs):
  # training loop
  train_loss=0.0
  for local_batch,local_label in training_generator:
    optimizer.zero_grad() # clear the past gradient
    local_batch,local_label=local_batch.to(device),local_label.to(device)
    logits=net(local_batch)
    loss=my_loss_func(logits,local_label.long())
    loss.backward()
    train_loss+=loss.item()
    optimizer.step() # update parameters of net
  loss_optm.append(train_loss/len(training_generator.dataset))
  print("Epoch %d | Training loss: %.8f"%(epoch,train_loss/len(training_generator.dataset)))
  # eval
  if (epoch+1)%eval_epoch==0:
    net.eval()
    eval_loss=0.0
    for eval_batch,eval_label in validation_generator:
      eval_batch,eval_label=eval_batch.to(device),eval_label.to(device)
      logits=net(eval_batch)
      loss=my_loss_func(logits,eval_label.long())
      eval_loss+=loss.item()
    eval_avg=eval_loss/len(validation_generator.dataset)
    if (epoch==0): previous=eval_avg
    else:
      if previous-eval_avg<min_delta: tolerance-=1
      if tolerance==0: break
      previous=eval_avg
    print("Epoch %d | Eval loss: %.8f" % (epoch, eval_avg))
    loss_val.append([epoch, eval_loss/len(validation_generator.dataset)])
    net.train()
t1=time.time()
print("Training time:%.4fs"%(t1-t0))
path='/content/drive/MyDrive/gnn/linkpred/2383_gnn.pickle'
torch.save(net.state_dict(),path)

Epoch 0 | Training loss: 0.00181210
Epoch 1 | Training loss: 0.00162874
Epoch 2 | Training loss: 0.00161955
Epoch 3 | Training loss: 0.00162133
Epoch 4 | Training loss: 0.00162042
Epoch 4 | Eval loss: 0.00166512
Epoch 5 | Training loss: 0.00157874
Epoch 6 | Training loss: 0.00151862
Epoch 7 | Training loss: 0.00146273
Epoch 8 | Training loss: 0.00138984
Epoch 9 | Training loss: 0.00136052
Epoch 9 | Eval loss: 0.00135628
Epoch 10 | Training loss: 0.00132365
Epoch 11 | Training loss: 0.00126654
Epoch 12 | Training loss: 0.00123694
Epoch 13 | Training loss: 0.00118599
Epoch 14 | Training loss: 0.00113746
Epoch 14 | Eval loss: 0.00111755
Epoch 15 | Training loss: 0.00110016
Epoch 16 | Training loss: 0.00106728
Epoch 17 | Training loss: 0.00107680
Epoch 18 | Training loss: 0.00103870
Epoch 19 | Training loss: 0.00101060
Epoch 19 | Eval loss: 0.00103106
Epoch 20 | Training loss: 0.00102431
Epoch 21 | Training loss: 0.00102713
Epoch 22 | Training loss: 0.00098941
Epoch 23 | Training loss: 0.0

In [None]:
net.load_state_dict(torch.load(path))
# validate on test set
net.eval()
x_test_feed = torch.from_numpy(x_test).float()
x_test_feed = x_test_feed.to(device)
y_pred = net(x_test_feed)
y_pred=torch.argmax(y_pred,dim=1)
y_pred1 = y_pred.cpu().detach()
y_pred1 = y_pred1.numpy().transpose()
y_test=y_test.transpose()
print('Validation dataset size:',x_test_feed.shape)
print(y_pred.shape)
print(y_test.shape)

Validation dataset size: torch.Size([3212, 3, 2383])
torch.Size([3212, 10])
(10, 3212)


In [None]:
y_diff = np.abs(y_test - y_pred1)
print(np.sum(y_diff)/np.sum(y_test))
# print(y_pred1,y_test)
print(np.sum(y_pred1),np.sum(y_test))
print('--')
print('positive accuracy:',np.sum(y_pred1==y_test)/y_test.shape[0]/y_test.shape[1])
print('--')
for edge in range(10):
  print(f1_score(y_pred1[edge,:],y_test[edge,:]))
print('--')
print(f1_score(y_pred1.reshape(-1,),y_test.reshape(-1,)))

0.3600313655203316
8329 8927.0
--
positive accuracy: 0.8999377334993773
--
0.9036832775260981
0.9690771852934016
0.8315930388219545
0.7005758157389637
0.3364485981308411
0.39545454545454545
0.6197916666666667
0.6019417475728155
0.06722689075630252
0.1176470588235294
--
0.813745943439963


In [None]:
y_all=np.load('/content/drive/MyDrive/gnn/data/data_2383_linkpred/2383_linkpred_y.npy').transpose()

print(np.sum(y_test)/(np.size(y_test,0)*np.size(y_test,1)))
print(np.sum(y_all)/(np.size(y_all,0)*np.size(y_all,1)))

0.2779265255292653
0.0009633756738840453


In [None]:
y_test

array([[1., 1., 1., ..., 0., 0., 0.],
       [1., 1., 1., ..., 0., 1., 0.],
       [1., 1., 1., ..., 0., 1., 1.],
       ...,
       [0., 0., 0., ..., 0., 0., 1.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])