In [86]:
from __future__ import division
from __future__ import print_function
import numpy as np
import torch

import sys
BASE_PATH = globals()['_dh'][0].parent.parent.parent.parent.absolute()
sys.path.insert(1, str(BASE_PATH))
# import torch
from src.utils.utils import exp_vocsp, make_uniform_schedule
# from src.models.iterativeModels import iterativeGCN_vocsp
# from src.utils.metrics import MAD
from torch_geometric.datasets import LRGBDataset
# from torch_geometric.loader import DataLoader
# from torch.optim import AdamW
# from torch.optim.lr_scheduler import ReduceLROnPlateau, OneCycleLR


import wandb
wandb.login()


train_dataset = LRGBDataset(root="data/", name="Peptides-func", split="train")
val_dataset = LRGBDataset(root="data/", name="Peptides-func", split="val")
test_dataset = LRGBDataset(root="data/", name="Peptides-func", split="test")

In [74]:
from sklearn.metrics import average_precision_score
def eval_ap(y_true, y_pred):
        '''
            compute Average Precision (AP) averaged across tasks
        '''

        ap_list = []

        for i in range(y_true.shape[1]):
            #AUC is only defined when there is at least one positive data.
            if np.sum(y_true[:,i] == 1) > 0 and np.sum(y_true[:,i] == 0) > 0:
                # ignore nan values
                is_labeled = y_true[:,i] == y_true[:,i]
                ap = average_precision_score(y_true[is_labeled,i], y_pred[is_labeled,i])
                print("ap",ap)
                ap_list.append(ap)

        if len(ap_list) == 0:
            raise RuntimeError('No positively labeled data available. Cannot compute Average Precision.')

        return sum(ap_list)/len(ap_list)


In [77]:
from torch_geometric.loader import DataLoader
train_loader = DataLoader(dataset=train_dataset, batch_size=16, shuffle=True)
val_loader = DataLoader(dataset=val_dataset, batch_size=32, shuffle=False)

In [87]:
from src.models.iterativeModels import iterativeGCN_peptides
model = iterativeGCN_peptides(out_dim=train_dataset.num_classes,
                              hidden_dim=32,
                              train_schedule=[0.5, 0.5]
                              )

In [88]:
y_true = []
preds = []
for batched_data in val_loader:
    y_true.append(batched_data.y.detach())
    pred = model(batched_data.x, batched_data.edge_index, batched_data.edge_attr,batched_data.batch)
    preds.append(pred)
y_true = torch.concat(y_true)
preds = torch.concat(preds)
print(y_true.shape)
print(preds.shape)

torch.Size([2331, 10])
torch.Size([2331, 10])


In [91]:
eval_ap(y_true.detach().numpy(), preds.detach().numpy())

true [[1. 0. 0. ... 1. 1. 1.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 1.]
 [0. 0. 0. ... 0. 0. 0.]
 [1. 0. 0. ... 0. 0. 0.]]
pred [[-0.08293815  0.08614053  0.14155483 ... -0.45164433 -0.31345788
   0.09716818]
 [-0.0948697   0.02251315  0.13115355 ... -0.42408094 -0.37856725
   0.11396343]
 [-0.04704923  0.19769663  0.25790745 ... -0.39948735 -0.45777595
   0.10346394]
 ...
 [-0.08958013  0.06446217  0.10771471 ... -0.41582912 -0.26132357
   0.12289611]
 [-0.06635819  0.10537678  0.12020095 ... -0.4801312  -0.26969352
   0.04356698]
 [-0.01072649  0.00812696 -0.09428862 ... -0.4327428  -0.17169869
   0.21387328]]
10
ap 0.09341149143819545
ap 0.025261973430720123
ap 0.07245924881817692
ap 0.08782007913893959
ap 0.5905330246901417
ap 0.23832441643906108
ap 0.26279590846862066
ap 0.150919427048299
ap 0.01720393030304569
ap 0.26957900745843066


0.18083085072336308

In [99]:
from sklearn.metrics import f1_score
y_true = []
y_pred = []
preds = []

for step, batched_data in enumerate(val_loader):  # Iterate in batches over the training dataset.
        
    pred = model(batched_data.x, batched_data.edge_index, batched_data.edge_attr,batched_data.batch) # size of pred is [number of nodes, number of features]
    true = batched_data.y

    pred_val = pred.max(dim=1)[1] # pred_val contains actually class predictions
    y_pred.append(pred_val.detach())
    y_true.append(true.detach())
    preds.append(pred.detach())

    
y_true = torch.cat(y_true, dim = 0).cpu().numpy()
y_pred = torch.cat(y_pred, dim = 0).cpu().numpy()
preds = torch.cat(preds, dim=0).cpu().numpy()
print(y_pred.shape)
# val_f1 = f1_score(y_true, y_pred, average="macro")
# val_ap = eval_ap(y_true, preds)
    

(2331,)


In [54]:
batch = next(iter(train_dataset))

In [62]:
pred = torch.rand(batch.y.shape)

10

In [75]:
eval_ap(batch.y.numpy(), pred.numpy())

true [[0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]]
pred [[0.4729731  0.48898375 0.00603795 0.40174198 0.33372813 0.14399242
  0.7643688  0.7350955  0.01468372 0.07714814]]
10


RuntimeError: No positively labeled data available. Cannot compute Average Precision.