In [1]:
import torch
from tqdm import tqdm
from ogb.graphproppred import Evaluator
import torch.optim as optim
import wandb
from ogb.graphproppred import PygGraphPropPredDataset
from torch_geometric.loader import DataLoader
import sys
BASE_PATH = globals()['_dh'][0].parent.parent.parent.parent.absolute()
sys.path.insert(1, str(BASE_PATH))
from src.models.models import GCN
from src.utils.utils import make_uniform_schedule, count_parameters
import torch.nn.functional as F
from torch_geometric.datasets import LRGBDataset
import torch.nn as nn
from torch_geometric.nn import GCNConv
from torch_geometric.nn import MLP, Linear
from sklearn.metrics import f1_score
import time

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
train_dataset = LRGBDataset(root="data/LRGB",
                      name="PascalVOC-SP",
                      split="train")
val_dataset = LRGBDataset(root="data/LRGB",
                      name="PascalVOC-SP",
                      split="val")
test_dataset = LRGBDataset(root="data/LRGB",
                      name="PascalVOC-SP",
                      split="test")
print("train:", len(train_dataset))
print("val:", len(val_dataset))
print("test:", len(test_dataset))

Downloading https://www.dropbox.com/s/8x722ai272wqwl4/pascalvocsp.zip?dl=1
Extracting data/LRGB/pascalvocsp.zip
Processing...
Processing train dataset: 100%|██████████| 8498/8498 [00:00<00:00, 10813.73it/s]
Processing val dataset: 100%|██████████| 1428/1428 [00:00<00:00, 9368.10it/s]
Processing test dataset: 100%|██████████| 1429/1429 [00:00<00:00, 14948.65it/s]
Done!


train: 8498
val: 1428
test: 1429


In [3]:
train_dataset.num_classes

21

In [14]:
from sklearn.metrics import f1_score
import numpy as np
y_pred = np.zeros(val_dataset.y.size()[0])
f1_score(val_dataset.y.numpy(), y_pred, average='macro')

0.03909621650257977

In [None]:
import random
seed = 1
random.seed(seed)

In [None]:
def small_dataset(train_len, val_len, test_len):
    train_idx = random.sample(range(len(train_dataset)), train_len)
    val_idx = random.sample(range(len(val_dataset)), val_len)
    test_idx = random.sample(range(len(test_dataset)), test_len)
    small_train_set = torch.utils.data.Subset(train_dataset, train_idx)
    small_val_set = torch.utils.data.Subset(val_dataset, val_idx)
    small_test_set = torch.utils.data.Subset(test_dataset, test_idx)

    return small_train_set, small_val_set, small_test_set
    
train_small_set, val_small_set, test_small_set = small_dataset(85, 14, 14)
print("train:", len(train_small_set))
print("val:", len(val_small_set))
print("test:", len(test_small_set))

In [None]:
train_loader = DataLoader(dataset=train_small_set, batch_size=32, shuffle=True)
val_loader = DataLoader(dataset=val_small_set, batch_size=32, shuffle=False)
test_loader = DataLoader(dataset=test_small_set, batch_size=32, shuffle=False)

In [None]:
train_dataset.num_features

In [None]:
import torch

"""
=== Description of the VOCSuperpixels dataset === 
Each graph is a tuple (x, edge_attr, edge_index, y)
Shape of x : [num_nodes, 14]
Shape of edge_attr : [num_edges, 1] or [num_edges, 2]
Shape of edge_index : [2, num_edges]
Shape of y : [num_nodes]
"""

VOC_node_input_dim = 14
# VOC_edge_input_dim = 1 or 2; defined in class VOCEdgeEncoder

class VOCNodeEncoder(torch.nn.Module):
    def __init__(self, emb_dim):
        super().__init__()

        self.encoder = torch.nn.Linear(VOC_node_input_dim, emb_dim)
        # torch.nn.init.xavier_uniform_(self.encoder.weight.data)

    def forward(self, x):
        x = self.encoder(x)
        return x


In [None]:
class GNNInductiveNodeHead(nn.Module):
    """
    GNN prediction head for inductive node prediction tasks.

    Args:
        dim_in (int): Input dimension
        dim_out (int): Output dimension. For binary prediction, dim_out=1.
    """

    def __init__(self, in_dim, hid_dim, out_dim, num_layers):
        super(GNNInductiveNodeHead, self).__init__()
        layers = []
        if num_layers > 1:
            layers.append(MLP(in_channels=in_dim,
                                 hidden_channels=hid_dim,
                                 out_channels=hid_dim,
                                 num_layers=num_layers - 1,
                                 bias=True))
            layers.append(Linear(in_channels=hid_dim, out_channels=out_dim, bias=True))
        else:
            layers.append(Linear(in_channels=in_dim, out_channels=out_dim, bias=True))

        self.layer_post_mp = nn.Sequential(*layers)
                          
            

    def forward(self, x):
        x = self.layer_post_mp(x)
        return x


In [None]:
class iterativeGCN_vocsp(nn.Module):
    '''
    This iterative version of GCN is for inductive tasks on the ogbg-mol* datasets.
    Apart from most basic ingredients of iterativeGCNs, it uses:
        - the AtomEncoder provided by the OGB team
        - the BondEncoder provided by the OGB team
        - a slightly different implementation of the GCNConv layer provided by the OGB team
            - It differs from the PyG version by adding BondEncoder to edge_attr
        - a global mean pooling over the batch, since it's doing an inductive task
    '''
    def __init__(self,  
                 out_dim: int,
                 hidden_dim: int,
                 train_schedule,
                 MLP_layers=3,
                 dropout=0.5,
                 eval_schedule=None,
                 xavier_init=False
                 ):
        super().__init__() 
        self.out_dim = out_dim
        self.dropout = dropout
        self.hidden_dim = hidden_dim
        self.train_schedule = train_schedule
        if eval_schedule is not None:
            self.eval_schedule = eval_schedule
        else:
            self.eval_schedule = self.train_schedule

        self.atom_encoder = VOCNodeEncoder(hidden_dim)
        self.graph_conv = GCNConv(hidden_dim, hidden_dim)
        self.batch_norm = nn.BatchNorm1d(hidden_dim)
        
        self.graph_pred_linear = GNNInductiveNodeHead(in_dim=hidden_dim, hid_dim=hidden_dim, out_dim=out_dim, num_layers=MLP_layers)
    
    def _init_xavier(self):
        for m in self.modules():
            if isinstance(m, nn.Linear): # GCNConv layers are already Xavier initilized
                nn.init.xavier_normal_(m.weight)
                if m.bias is not None:
                    nn.init.zeros_(m.bias)
      
    def _next_x(self, old_x, new_x, smooth_fac):
        next_x = smooth_fac * old_x + (1 - smooth_fac) * new_x
        return next_x
    
    def forward(self, x, edge_index, edge_attr, batch):
        if self.training:
            schedule = self.train_schedule
        else:
            schedule = self.eval_schedule
        
        x = self.atom_encoder(x)

        for smooth_fac in range(len(schedule)):      
            old_x = x
            x = self.graph_conv(x, edge_index)
            x = F.relu(x)
            x = self.batch_norm(x)
            new_x = F.dropout(x, self.dropout, training=self.training)
            x = self._next_x(old_x, new_x, smooth_fac) 
        x = self.graph_pred_linear(x)

        return x

In [None]:
def weighted_cross_entropy(pred, true):
    """Weighted cross-entropy for unbalanced classes.
    """
    
    # calculating label weights for weighted loss computation
    V = true.size(0)
    
    n_classes = pred.shape[1] if pred.ndim > 1 else 2
    label_count = torch.bincount(true)
    label_count = label_count[label_count.nonzero(as_tuple=True)].squeeze()
    cluster_sizes = torch.zeros(n_classes, device=pred.device).long()
    cluster_sizes[torch.unique(true)] = label_count
    weight = (V - cluster_sizes).float() / V
    weight *= (cluster_sizes > 0).float()

    # multiclass
    if pred.ndim > 1:
        pred = F.log_softmax(pred, dim=-1)
        loss = F.nll_loss(pred, true, weight=weight)
        
        return loss
    # binary
    else:
        loss = F.binary_cross_entropy_with_logits(pred, true.float(),
                                                    weight=weight[true])
        return loss


In [None]:
from sklearn.metrics import accuracy_score
from tqdm import tqdm

def train_vocsp_epoch(model, loader, optimizer, scheduler, device):
    model.train()
    criterion = weighted_cross_entropy
    epoch_loss = 0
    for step, batched_data in enumerate(loader):  # Iterate in batches over the training dataset.
        batched_data = batched_data.to(device)
        pred = model(batched_data.x, batched_data.edge_index, batched_data.edge_attr,batched_data.batch) # size of pred is [number of nodes, number of features]
        true = batched_data.y
        loss = criterion(pred, true)
        epoch_loss += loss.item() * batched_data.y.size()[0]
        optimizer.zero_grad()  
        loss.backward() 
        optimizer.step()
        
    return epoch_loss

def eval_vocsp(model, loader, device):
    model.eval()
    y_true = []
    y_pred = []
    criterion = weighted_cross_entropy
    val_loss = 0
    for step, batched_data in enumerate(loader):  # Iterate in batches over the training dataset.
        batched_data = batched_data.to(device)
        pred = model(batched_data.x, batched_data.edge_index, batched_data.edge_attr,batched_data.batch) # size of pred is [number of nodes, number of features]
        true = batched_data.y
        loss = criterion(pred, true)
        val_loss += loss.item() * batched_data.y.size()[0]

        pred_val = pred.max(dim=1)[1] # pred_val contains actually class predictions

        print(batched_data)
        print("true:", len(true))

        y_pred.append(pred_val.detach())
        y_true.append(true.detach())
    
    y_true = torch.cat(y_true, dim = 0).cpu().numpy()
    y_pred = torch.cat(y_pred, dim = 0).cpu().numpy()
    val_f1 = f1_score(y_true, y_pred, average="macro")
    val_acc = accuracy_score(y_true, y_pred)
    print("y_true:")
    print(len(y_true))
    print("y_pred:")
    print(len(y_pred))
        
    return val_loss, val_f1, val_acc

def train_vocsp(model, optimizer, scheduler, train_loader, valid_loader, num_epochs, device):
    # wandb.watch(model, log="all", log_freq=10)
    for epoch in tqdm(range(num_epochs)):
        train_loss = train_vocsp_epoch(model, train_loader, optimizer, scheduler, device)
        val_loss, val_f1, val_acc = eval_vocsp(model, valid_loader, device)
        
        # wandb.log({
        #     "Train loss": train_loss,
        #     "Validate f1": val_f1,
        #     "Validate loss": val_loss,
        #     "epoch": epoch+1
        # })
        print("Epoch {}: train loss {:.4}, valid loss {:.4}, valid F1 {:.4}, valid accuracy {:.4}".format(epoch+1, train_loss, val_loss, val_f1, val_acc))
        scheduler.step(val_loss)
        
def exp_vocsp(model, optimizer, scheduler,train_loader, valid_loader, test_loader, num_epochs,device):
    num_params = count_parameters(model)
    # wandb.log({ 
    #         'num_param': num_params
    # }) 
    start = time.time()
    train_vocsp(model, optimizer, scheduler,train_loader, valid_loader, num_epochs, device)
    test_loss, test_f1, test_acc =eval_vocsp(model, test_loader, device)
    # wandb.log({
    #     "Test loss": test_loss,
    #     "Test f1": test_f1
    # })
    end = time.time()
    print("Experiment ends! Time elasped: {:.2}s, Test loss: {:.4}, Test F1: {:.6}, Test accuracy".format(end-start, test_loss, test_f1, test_acc))
    
    

In [None]:
train_schedule = make_uniform_schedule(5, 0.5)
iGCN = iterativeGCN_vocsp(out_dim=train_dataset.num_classes,
                          hidden_dim=220,
                          train_schedule=train_schedule,
                          MLP_layers=3,
                          dropout=0.5
                          )


In [None]:
eval_vocsp(iGCN, val_loader, "cpu")

In [None]:
from torch.optim import AdamW
from torch.optim.lr_scheduler import ReduceLROnPlateau
import numpy as np
np.set_printoptions(threshold=sys.maxsize)
# wandb.init(job_type="Run", 
#                project="IterativeMethods", 
#                notes="try small vocsp",
#                tags=["iGCN"])
optimizer = AdamW(iGCN.parameters(), lr=0.0005, weight_decay=0.0)
scheduler = ReduceLROnPlateau(optimizer=optimizer, mode='min', factor=0.5, patience=10, min_lr=1e-5, verbose=True)
exp_vocsp(iGCN, optimizer, scheduler, train_loader, val_loader, test_loader, num_epochs=200, device="cpu")

In [None]:
from src.models.models import GCN_vocsp
gcn = GCN_vocsp(out_dim=train_dataset.num_classes,
                          hidden_dim=220,
                          MLP_layers=3,
                          num_layers=8,
                          dropout=0.5)
optimizer = AdamW(gcn.parameters(), lr=0.0005, weight_decay=0.0)
scheduler = ReduceLROnPlateau(optimizer=optimizer, mode='min', factor=0.5, patience=10, min_lr=1e-5, verbose=True)
exp_vocsp(gcn, optimizer, scheduler, train_loader, val_loader, test_loader, num_epochs=200, device="cpu")