In [1]:
import numpy as np
import pandas as pd
import time
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
import time
from google.colab import drive
from sklearn.metrics import f1_score
drive.mount('/content/drive')
device='cuda' if torch.cuda.is_available() else 'cpu'
dataset=118
x=np.load('/content/drive/MyDrive/gnn/data/data_%d_linkpred/%d_linkpred_x.npy'%(dataset,dataset))
y=np.load('/content/drive/MyDrive/gnn/data/data_%d_linkpred/%d_linkpred_y.npy'%(dataset,dataset)).transpose()
y[y==2]=1
W=np.load('/content/drive/MyDrive/gnn/data/data_%d_linkpred/%d_linkpred_w.npy'%(dataset,dataset))
print(torch.cuda.get_device_name(0))

Mounted at /content/drive
Tesla V100-SXM2-16GB


In [2]:
# sort activity
activity=np.sum(y,axis=0)
edges=np.argsort(activity)[::-1]
edge=edges[:10]
y=y[:,edge]

gpw=(W!=0).astype(int)
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test = train_test_split(x.transpose(),y,test_size=0.2, random_state=18)
x_train=x_train.transpose()
x_test=x_test.transpose()
y_train=y_train.transpose()
y_test=y_test.transpose()
print('Training data size:',x_train.shape)
print('Training label size:',y_train.shape)

class Dataset(torch.utils.data.Dataset):
    def __init__(self, features, labels, device='cpu'):
        self.features=torch.from_numpy(np.transpose(features)).float()
        self.labels=torch.from_numpy(np.transpose(labels)).float()
    def __len__(self):
        return len(self.features)
    def __getitem__(self, idx):
        if torch.is_tensor(idx): idx = idx.tolist()
        # Select sample
        X = self.features[idx]  # shape = (24,)
        y = self.labels[idx]    # shape = (24,)
        X = torch.reshape(X.t(),(1,-1))
        return X,y
params = {'batch_size':128,
          'shuffle': True,
          'num_workers':2}
# Dataset Generators
training_set=Dataset(features=x_train,labels=y_train,device=device)
training_generator=torch.utils.data.DataLoader(training_set,**params)
validation_set=Dataset(features=x_test,labels=y_test,device=device)
validation_generator=torch.utils.data.DataLoader(validation_set,**params)

Training data size: (118, 4, 8000)
Training label size: (10, 8000)


In [3]:
W1=W.copy()
W1=np.asarray(W1)
W1=W1/1
W2=W1.copy()
print(type(W))
lam,v = np.linalg.eig(W1)
print(lam[0])
for i in range(5):
  W1 = np.matmul(W1,W2) / np.max(np.matmul(W1,W2))
  lam,v = np.linalg.eig(W1)
  print(lam[0])

<class 'numpy.ndarray'>
10.391198194095404
1.1997444434330222
1.2195765287586433
1.2295963414197528
1.2352295835641862
1.2386748374057457


In [4]:
from torch.autograd import Variable
# One layer Graph convolution from nodes to edges
class graph_prune(nn.Module):
    def __init__(self,in_features,out_features,w,bias=True):
        super(graph_prune,self).__init__()
        # enlarge w
        W=np.zeros([w.shape[0]*in_features,w.shape[0]*out_features])
        for i in range(w.shape[0]):
          for j in range(w.shape[1]):
            W[i*in_features:(i+1)*in_features,j*out_features:(j+1)*out_features]+=1
        self.register_buffer('w',torch.from_numpy(W).float())
        self.mapping=nn.Parameter(torch.Tensor(W.shape[0],W.shape[1]))
        self.bias=nn.Parameter(torch.Tensor(1,W.shape[1]))
        torch.nn.init.xavier_uniform_(self.mapping.data)
        torch.nn.init.xavier_uniform_(self.bias.data)

    def forward(self,input):
        h=torch.mul(self.mapping,self.w)
        h=torch.matmul(input,h)
        h=h+self.bias
        return h 

# graph prune NN
class GP(nn.Module):
    def __init__(self,in_feats,hidden_size,W,fc_params):
        super(GP, self).__init__()
        self.conv_v2v1=graph_prune(in_feats,hidden_size[0],W)
        self.conv_v2v2=graph_prune(hidden_size[0],hidden_size[1],W)
        self.conv_v2v3=graph_prune(hidden_size[1],hidden_size[2],W)
        self.conv_v2v4=graph_prune(hidden_size[2],hidden_size[3],W)
        self.conv_v2v5=graph_prune(hidden_size[3],hidden_size[4],W)
        self.conv_v2v6=graph_prune(hidden_size[4],hidden_size[5],W)
        self.linear1=nn.Linear(hidden_size[-1]*x_train.shape[0],fc_params[0])
        self.linear2=nn.Linear(fc_params[0],2*fc_params[1])
         
    def forward(self, inputs):
        # m = nn.ELU()
        # m = nn.LeakyReLU()
        # m = nn.Tanh()
        m = nn.ReLU()
        h=self.conv_v2v1(inputs)
        h=m(h)
        h=self.conv_v2v2(h)
        h=m(h)
        h=self.conv_v2v3(h)
        h=m(h)
        h=self.conv_v2v4(h)
        h=m(h)
        h=self.conv_v2v5(h)
        h=m(h)
        h=self.conv_v2v6(h)
        h=m(h)
        h=torch.squeeze(h)
        h=self.linear1(h)
        h=m(h)
        h=self.linear2(h)
        h=torch.reshape(h,(h.shape[0],2,-1))
        return h
hidden_size=[10,20,20,20,20,10]
net=GP(x_train.shape[1],hidden_size,gpw,[1000,len(edge)]) # Laplacian
net=net.to(device)

optimizer=torch.optim.Adam(net.parameters())
loss_optm=[]
loss_val=[]
print(net)
print('number of params: %d'%(sum(temp.numel() for temp in net.parameters() if temp.requires_grad)))

GP(
  (conv_v2v1): graph_prune()
  (conv_v2v2): graph_prune()
  (conv_v2v3): graph_prune()
  (conv_v2v4): graph_prune()
  (conv_v2v5): graph_prune()
  (conv_v2v6): graph_prune()
  (linear1): Linear(in_features=1180, out_features=1000, bias=True)
  (linear2): Linear(in_features=1000, out_features=20, bias=True)
)
number of params: 24048180


In [5]:
t0=time.time()
max_epochs=200
eval_epoch=5

# earlystopping
tolerance=10
min_delta=1e-3
previous=0

W_tensor=torch.from_numpy(W).float().to(device)
my_loss_func=nn.CrossEntropyLoss()
for epoch in range(max_epochs):
  # training loop
  train_loss=0.0
  for local_batch,local_label in training_generator:
    optimizer.zero_grad() # clear the past gradient
    local_batch,local_label=local_batch.to(device),local_label.to(device)
    logits=net(local_batch)
    loss=my_loss_func(logits,local_label.long())
    loss.backward()
    train_loss+=loss.item()
    optimizer.step() # update parameters of net
  loss_optm.append(train_loss/len(training_generator.dataset))
  print("Epoch %d | Training loss: %.8f"%(epoch,train_loss/len(training_generator.dataset)))
  # eval
  if (epoch+1)%eval_epoch==0:
    net.eval()
    eval_loss=0.0
    for eval_batch,eval_label in validation_generator:
      eval_batch,eval_label=eval_batch.to(device),eval_label.to(device)
      logits=net(eval_batch)
      loss=loss=my_loss_func(logits,eval_label.long())
      eval_loss+=loss.item()
    eval_avg=eval_loss/len(validation_generator.dataset)
    if (epoch==0): previous=eval_avg
    else:
      if previous-eval_avg<min_delta: tolerance-=1
      if tolerance==0: break
      previous=eval_avg
    print("Epoch %d | Eval loss: %.8f" % (epoch, eval_avg))
    loss_val.append([epoch, eval_loss/len(validation_generator.dataset)])
    net.train()
t1=time.time()
print("Training time:%.4fs"%(t1-t0))
path='/content/drive/MyDrive/gnn/linkpred/%d_gp.pickle'%(dataset)
torch.save(net.state_dict(),path)

Epoch 0 | Training loss: 0.00796363
Epoch 1 | Training loss: 0.00055859
Epoch 2 | Training loss: 0.00054976
Epoch 3 | Training loss: 0.00054015
Epoch 4 | Training loss: 0.00053983
Epoch 4 | Eval loss: 0.00051349
Epoch 5 | Training loss: 0.00050672
Epoch 6 | Training loss: 0.00051367
Epoch 7 | Training loss: 0.00051070
Epoch 8 | Training loss: 0.00049619
Epoch 9 | Training loss: 0.00051307
Epoch 9 | Eval loss: 0.00047553
Epoch 10 | Training loss: 0.00049341
Epoch 11 | Training loss: 0.00051003
Epoch 12 | Training loss: 0.00049269
Epoch 13 | Training loss: 0.00050377
Epoch 14 | Training loss: 0.00049002
Epoch 14 | Eval loss: 0.00048882
Epoch 15 | Training loss: 0.00050473
Epoch 16 | Training loss: 0.00048915
Epoch 17 | Training loss: 0.00049634
Epoch 18 | Training loss: 0.00048611
Epoch 19 | Training loss: 0.00047892
Epoch 19 | Eval loss: 0.00050540
Epoch 20 | Training loss: 0.00048697
Epoch 21 | Training loss: 0.00048413
Epoch 22 | Training loss: 0.00048322
Epoch 23 | Training loss: 0.0

In [6]:
net.load_state_dict(torch.load(path))
# validate on test set
net.eval()
x_test_feed=torch.from_numpy(x_test.transpose()).float()
x_test_feed=torch.reshape(x_test_feed,(x_test_feed.shape[0],1,-1))
x_test_feed=x_test_feed.to(device)
y_pred=net(x_test_feed)
y_pred=torch.argmax(y_pred,dim=1)
y_pred1=y_pred.cpu().detach()
y_pred1=y_pred1.numpy().transpose()
print('Validation dataset size:',x_test_feed.shape)
print(y_pred.shape)
print(y_test.shape)

Validation dataset size: torch.Size([2000, 1, 472])
torch.Size([2000, 10])
(10, 2000)


In [7]:
y_diff = np.abs(y_test - y_pred1)
print(np.sum(y_diff)/np.sum(y_test))
# print(y_pred1,y_test)
print(np.sum(y_pred1),np.sum(y_test))
print('--')
print('positive accuracy:',np.sum(y_pred1==y_test)/y_test.shape[0]/y_test.shape[1])
print('--')
for edge in range(10):
  print(f1_score(y_pred1[edge,:],y_test[edge,:]))
print('--')
print(f1_score(y_pred1.reshape(-1,),y_test.reshape(-1,)))

0.07456445993031359
4000 4305.0
--
positive accuracy: 0.98395
--
1.0
0.9979959919839679
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
--
0.9613485851896448


  average, "true nor predicted", 'F-score is', len(true_sum)
