In [1]:
import torch

In [2]:
from begin.algorithms.bare.nc_class import NCClassILBareTrainer
from begin.scenarios.nodes import NCScenarioLoader

scenario = NCScenarioLoader(dataset_name='cora', num_tasks=3, metric='accuracy', save_path='data', incr_type='class', task_shuffle=1)

Using backend: pytorch


  NumNodes: 2708
  NumEdges: 10556
  NumFeats: 1433
  NumClasses: 7
  NumTrainingSamples: 140
  NumValidationSamples: 500
  NumTestSamples: 1000
Done loading data from cached files.
class split information: (tensor([6, 1]), tensor([5, 3]), tensor([0, 4]))


In [3]:
NCClassILBareTrainer

begin.algorithms.bare.nc_class.NCClassILBareTrainer

In [4]:
import torch
from torch import nn
from dgl.nn import GraphConv, SumPooling, AvgPooling, MaxPooling
import torch.nn.functional as F
from dgl.base import DGLError
from dgl.utils import expand_as_pair
import dgl.function as fn
from torch_scatter import segment_csr

class AdaptiveLinear(nn.Module):
    def __init__(self, in_channels, out_channels, bias=True, accum=True):
        super().__init__()
        self.lin = nn.Linear(in_channels, out_channels, bias)
        self.bias = bias
        self.accum = accum
        self.num_outputs = out_channels
        self.output_masks = None
        self.observed = torch.zeros(out_channels, dtype=torch.bool)
        
    def observe_outputs(self, new_outputs, verbose=True):
        device = self.lin.weight.data.device
        new_outputs = torch.unique(new_outputs)
        new_num_outputs = max(self.num_outputs, new_outputs.max() + 1)
        new_output_mask = torch.zeros(new_num_outputs, dtype=torch.bool).to(device)
        new_output_mask[new_outputs] = True
        
        prv_observed = self.observed
        
        if self.output_masks is None: self.output_masks = [new_output_mask]
        else:
            if new_num_outputs > self.num_outputs:
                self.output_masks = [torch.cat((output_mask, torch.zeros(new_num_outputs - self.num_outputs, dtype=torch.bool).to(device)), dim=-1) for output_mask in self.output_masks]
            if self.accum: self.output_masks.append(self.output_masks[-1] | new_output_mask)
            else: self.output_masks.append(new_output_mask)    
        
        if new_num_outputs > self.num_outputs:
            prev_weight, prev_bias = self.lin.weight.data[prv_observed], (self.lin.bias.data[prv_observed] if self.bias else None)
            self.observed = torch.cat((self.observed.to(device), torch.zeros(new_num_outputs - self.num_outputs, dtype=torch.bool).to(device)), dim=-1)
            self.lin = nn.Linear(in_features, new_num_outputs, bias=self.bias)
            self.lin.weight.data[self.observed] = prev_weight
            if self.bias: self.lin.bias.data[self.observed] = prev_bias    
            self.num_outputs = new_num_outputs
        self.observed = self.observed.to(device) | new_output_mask    
        
        # print(self.accum)
        # print(self.output_masks)
    def get_output_mask(self, task_ids=None):
        if task_ids is None: return self.output_masks[-1]
        else:
            mask = torch.zeros(task_ids.shape[0], self.num_outputs).bool().to(task_ids.device)
            observed_mask = task_ids < len(self.output_masks)
            
            # print(mask.device, observed_mask.device, task_ids.device)
            mask[observed_mask] = torch.stack(self.output_masks, dim=0)[task_ids[observed_mask]]
            return mask
    
    def forward(self, x, task_masks=None):
        out = self.lin(x)
        if task_masks is None:
            out[..., ~self.observed] = -1e12
        else:
            out[~task_masks] = -1e12
        
        return out
    
class GCN(nn.Module):
    def __init__(self, in_feats, n_classes, n_hidden, activation = F.relu, dropout=0.0, n_layers=3, incr_type='class', use_classifier=True):
        super().__init__()
        self.n_layers = n_layers
        self.n_hidden = n_hidden
        self.n_classes = n_classes
        self.convs = nn.ModuleList()
        self.norms = nn.ModuleList()
        for i in range(n_layers):
            in_hidden = n_hidden if i > 0 else in_feats
            out_hidden = n_hidden
            self.convs.append(GraphConv(in_hidden, out_hidden, "both", bias=False, allow_zero_in_degree=True))
            self.norms.append(nn.BatchNorm1d(out_hidden))
        self.dropout = nn.Dropout(dropout)
        self.activation = activation
        if use_classifier:
            self.classifier = AdaptiveLinear(n_hidden, n_classes, bias=True, accum = False if incr_type == 'task' else True)
        else:
            self.classifier = None
            
    def forward(self, graph, feat, task_masks=None):
        h = feat
        h = self.dropout(h)
        for i in range(self.n_layers):
            conv = self.convs[i](graph, h)
            h = conv
            h = self.norms[i](h)
            h = self.activation(h)
            h = self.dropout(h)
        if self.classifier is not None:
            h = self.classifier(h, task_masks)
        return h
    
    def observe_labels(self, new_labels, verbose=True):
        self.classifier.observe_outputs(new_labels, verbose=verbose)
    
    def get_observed_labels(self, tid=None):
        if tid is None or tid < 0:
            return self.classifier.observed
        else:
            return self.classifier.output_masks[tid]


In [5]:
model = GCN(scenario.num_feats, scenario.num_classes, 256, dropout=0.0)

In [6]:
benchmark = NCClassILBareTrainer(model = model,
                                 scenario = scenario,
                                 optimizer_fn = lambda x: torch.optim.Adam(x, lr=1e-3, weight_decay=0),
                                 loss_fn = torch.nn.CrossEntropyLoss(ignore_index=-1),
                                 device = torch.device('cuda:0'),
                                 scheduler_fn = lambda x: torch.optim.lr_scheduler.ReduceLROnPlateau(x, mode='min', patience=20, min_lr=1e-3 * 0.001 * 2., verbose=True),
                                 benchmark = True, seed = 42)

In [7]:
results = benchmark.run(epoch_per_task = 1000)

task_id: 0 Epoch #0: train_acc: 0.7 val_acc: 0.5538 train_loss: 0.6196 val_loss: 0.6893
task_id: 0 Epoch #10: train_acc: 1.0 val_acc: 0.5538 train_loss: 0.0004 val_loss: 0.7089
task_id: 0 Epoch #20: train_acc: 1.0 val_acc: 0.5538 train_loss: 0.0 val_loss: 0.866
Epoch    27: reducing learning rate of group 0 to 1.0000e-04.
task_id: 0 Epoch #30: train_acc: 1.0 val_acc: 0.5538 train_loss: 0.0 val_loss: 0.9603
task_id: 0 Epoch #40: train_acc: 1.0 val_acc: 0.5538 train_loss: 0.0 val_loss: 0.944
Epoch    48: reducing learning rate of group 0 to 1.0000e-05.
task_id: 0 Epoch #50: train_acc: 1.0 val_acc: 0.5538 train_loss: 0.0 val_loss: 0.8493
task_id: 0 Epoch #60: train_acc: 1.0 val_acc: 0.5692 train_loss: 0.0 val_loss: 0.6745
task_id: 0 Epoch #70: train_acc: 1.0 val_acc: 0.7077 train_loss: 0.0 val_loss: 0.4325
task_id: 0 Epoch #80: train_acc: 1.0 val_acc: 0.8769 train_loss: 0.0 val_loss: 0.228
task_id: 0 Epoch #90: train_acc: 1.0 val_acc: 0.9385 train_loss: 0.0 val_loss: 0.132
task_id: 0 Epoc

KeyError: 'exp_AP'