In [1]:
import pickle
from os import path
from sklearn.model_selection import KFold, train_test_split
from sklearn.metrics import f1_score
import matplotlib.pyplot as plt
import numpy as np
import torch
import torch.nn as nn
from time import time
import pandas as pd
from google.colab import drive
drive.mount('/content/drive')
device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')
dataset=118
x=np.load('/content/drive/MyDrive/gnn/data/data_%d_linkpred/%d_linkpred_x.npy'%(dataset,dataset)).transpose([2,0,1]).astype('float32')
y=np.load('/content/drive/MyDrive/gnn/data/data_%d_linkpred/%d_linkpred_y.npy'%(dataset,dataset)).transpose([1,0]).astype('float32')
y[y==2]=1
w=np.load('/content/drive/MyDrive/gnn/data/data_%d_linkpred/%d_linkpred_w.npy'%(dataset,dataset)).astype('float32')

# sort activity
activity=np.sum(y,axis=0)
edges=np.argsort(activity)[::-1]
edge=edges[:10]
y=y[:,edge]

print(torch.cuda.get_device_name(0))

Mounted at /content/drive
Tesla P100-PCIE-16GB


In [2]:
n_bus=x.shape[1]
x=np.reshape(x,(x.shape[0],-1))
# train val test split
x_total,x_test,y_total,y_test=train_test_split(x,y,test_size=0.2,random_state=23)
kf=KFold(n_splits=10,shuffle=True)
for train_index,val_index in kf.split(x_total):
  x_train=x_total[train_index]
  y_train=y_total[train_index]
  x_val=x_total[val_index]
  y_val=y_total[val_index]
  break
# data loader
class Dataset(torch.utils.data.Dataset):
    def __init__(self,x,y,device):
        self.x=torch.from_numpy(x).float()
        self.y=torch.from_numpy(y).float()
    def __len__(self):
        return len(self.x)
    def __getitem__(self, idx):
        if torch.is_tensor(idx):
          idx=idx.tolist()
        # Select sample
        return self.x[idx],self.y[idx]
params={'batch_size':128,
        'shuffle': True,
        'num_workers':2}
train=Dataset(x_train,y_train,device)
train_set=torch.utils.data.DataLoader(train,**params)
val=Dataset(x_val,y_val,device)
val_set=torch.utils.data.DataLoader(val,**params)

In [3]:
class dnn(torch.nn.Module):
  def __init__(self,shape,dropout=0):
    super(dnn,self).__init__()
    layers=[]
    for idx in range(len(shape)-3):
      layers.extend([
        nn.Linear(shape[idx],shape[idx+1],bias=True),
        nn.ReLU(),
        nn.BatchNorm1d(shape[idx+1]),
        nn.Dropout(dropout)
      ])
    layers.extend([
      nn.Linear(shape[-3],shape[-2],bias=True),
      nn.ReLU(),
      nn.Linear(shape[-2],2*shape[-1],bias=True),
      ])
    self.features=nn.Sequential(*layers)
    # initialize
    for temp in self.features:
      if type(temp)==nn.Linear:
        torch.nn.init.xavier_uniform_(temp.weight)
  def forward(self,x):
    x=self.features(x)
    x=torch.reshape(x,(x.shape[0],2,-1))
    return x
net=dnn([x.shape[1],n_bus*10,n_bus*50,n_bus*50,n_bus*50,n_bus*50,2000,y_train.shape[1]])
net=net.to(device)
print('number of params: %d'%(sum(temp.numel() for temp in net.parameters() if temp.requires_grad)))

number of params: 123865320


In [4]:
train_loss=[]
val_loss=[]
optimizer=torch.optim.Adam(net.parameters())
my_loss_func=nn.CrossEntropyLoss()
epochs=500
val_epoch=5

# early stop
tolerance=5
min_delta=1e-4
previous=0
t0=time()
for epoch in range(epochs):
  epoch_loss=0.0
  for local_batch,local_label in train_set:
    optimizer.zero_grad()
    local_batch,local_label=local_batch.to(device),local_label.to(device)
    output=net(local_batch)
    loss=my_loss_func(output,local_label.long())
    loss.backward()
    epoch_loss+=loss.item()
    # update parameters of net
    optimizer.step()
  train_loss.append(epoch_loss/len(train_set.dataset))
  print("Epoch %d | Train loss: %.8f"%(epoch,train_loss[-1]))
  # val
  if (epoch+1)%val_epoch==0:
    net.eval()
    epoch_loss=0.0
    for local_batch,local_label in val_set:
      local_batch,local_label=local_batch.to(device),local_label.to(device)
      output=net(local_batch)
      loss=my_loss_func(output,local_label.long())
      epoch_loss+=loss.item()
    val_avg=epoch_loss/len(val_set.dataset)
    if (epoch==0): previous=val_avg
    else:
      if previous-val_avg<min_delta:
        tolerance-=1
        if tolerance==0:
          break
      previous=val_avg
    print("Epoch %d | Eval loss: %.8f" %(epoch,val_avg))
    val_loss.append([epoch,val_avg])
    net.train()
t1=time()
print('Training time: %.4f'%(t1-t0))
path='/content/drive/MyDrive/gnn/linkpred/%d_dnn.pickle'%(dataset)
torch.save(net.state_dict(),path)

Epoch 0 | Train loss: 0.00297867
Epoch 1 | Train loss: 0.00150397
Epoch 2 | Train loss: 0.00091552
Epoch 3 | Train loss: 0.00099318
Epoch 4 | Train loss: 0.00090821
Epoch 4 | Eval loss: 0.00156173
Epoch 5 | Train loss: 0.00063982
Epoch 6 | Train loss: 0.00055789
Epoch 7 | Train loss: 0.00043344
Epoch 8 | Train loss: 0.00039654
Epoch 9 | Train loss: 0.00044990
Epoch 9 | Eval loss: 0.00619422
Epoch 10 | Train loss: 0.00055400
Epoch 11 | Train loss: 0.00077429
Epoch 12 | Train loss: 0.00099251
Epoch 13 | Train loss: 0.00098868
Epoch 14 | Train loss: 0.00096421
Epoch 14 | Eval loss: 0.00080326
Epoch 15 | Train loss: 0.00049967
Epoch 16 | Train loss: 0.00045032
Epoch 17 | Train loss: 0.00037002
Epoch 18 | Train loss: 0.00036170
Epoch 19 | Train loss: 0.00049102
Epoch 19 | Eval loss: 0.00046764
Epoch 20 | Train loss: 0.00039448
Epoch 21 | Train loss: 0.00033988
Epoch 22 | Train loss: 0.00032810
Epoch 23 | Train loss: 0.00037230
Epoch 24 | Train loss: 0.00033259
Epoch 24 | Eval loss: 0.000934

In [5]:
net.load_state_dict(torch.load(path))
# validate on test set
net.eval()
x_test_feed = torch.from_numpy(x_test).float()
x_test_feed = x_test_feed.to(device)
y_pred = net(x_test_feed)
y_pred=torch.argmax(y_pred,dim=1)
y_pred1 = y_pred.cpu().detach()
y_pred1 = y_pred1.numpy().transpose()
y_test=y_test.transpose()
print('Validation dataset size:',x_test_feed.shape)
print(y_pred.shape)
print(y_test.shape)

Validation dataset size: torch.Size([2000, 472])
torch.Size([2000, 10])
(10, 2000)


In [8]:
y_diff = np.abs(y_test - y_pred1)
print(np.sum(y_diff)/np.sum(y_test))
# print(y_pred1,y_test)
print(np.sum(y_pred1),np.sum(y_test))
print('--')
print('positive accuracy:',np.sum(y_pred1==y_test)/y_test.shape[0]/y_test.shape[1])
print('--')
for edge in range(10):
  print(f1_score(y_pred1[edge,:],y_test[edge,:]))
print('--')
print(f1_score(y_pred1.reshape(-1,),y_test.reshape(-1,)))

0.10692113129455047
4300 4349.0
--
positive accuracy: 0.97675
--
1.0
0.9982469321312297
0.30718954248366015
0.3880597014925373
0.24691358024691357
0.27906976744186046
0.0
0.0
0.0
0.0
--
0.946236559139785


  average, "true nor predicted", 'F-score is', len(true_sum)
