In [2]:
import torch
from src.data.data_utils import load_data, load_node_to_nearest_training
from src.utils import set_global_seeds, arg_parse, name_model, create_nested_defaultdict, \
    metric_mean, metric_std, default_cal_wdecay, save_prediction
from src.model.model import create_model
import os
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"

In [3]:
import torch_geometric
import torch_geometric.transforms as T
from torch_geometric.datasets import Planetoid
from torch_geometric.data import Dataset
from torch_geometric.nn import MLP

In [4]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [5]:
import os.path as osp
import os
import time
from src.model.model import create_model
from pathlib import Path
from src.calibrator import rbs
from torch_geometric.utils import to_networkx
import scipy
import numpy as np
from torch import nn
import torch.nn.functional as F
import copy

In [6]:
import argparse
from src.calibloss import ECE

parser = argparse.ArgumentParser(description='train.py and calibration.py share the same arguments')
parser.add_argument('--seed', type=int, default=10, help='Random Seed')
parser.add_argument('--dataset', type=str, default='Cora', choices=['Cora','Citeseer', 'Pubmed', 
                    'Computers', 'Photo', 'CS', 'Physics', 'CoraFull'])
parser.add_argument('--split_type', type=str, default='5_3f_85', help='k-fold and test split')
parser.add_argument('--model', type=str, default='GCN', choices=['GCN', 'GAT'])
parser.add_argument('--verbose', action='store_true', default=False, help='Show training and validation loss')
parser.add_argument('--wdecay', type=float, default=5e-4, help='Weight decay for training phase')
parser.add_argument('--dropout_rate', type=float, default=0.5, help='Dropout rate. 1.0 denotes drop all the weights to zero')
parser.add_argument('--calibration', type=str, default='GATS',  help='Post-hoc calibrators')
parser.add_argument('--cal_wdecay', type=float, default=None, help='Weight decay for calibration phase')
parser.add_argument('--cal_dropout_rate', type=float, default=0.5, help='Dropout rate for calibrators (CaGCN)')
parser.add_argument('--folds', type=int, default=3, help='K folds cross-validation for calibration')
parser.add_argument('--ece-bins', type=int, default=15, help='number of bins for ece')
parser.add_argument('--ece-scheme', type=str, default='equal_width', choices=ECE.binning_schemes, help='binning scheme for ece')
parser.add_argument('--ece-norm', type=float, default=1.0, help='norm for ece')
parser.add_argument('--save_prediction', action='store_true', default=False)
parser.add_argument('--config', action='store_true', default=False)

gats_parser = parser.add_argument_group('optional GATS arguments')
gats_parser.add_argument('--heads', type=int, default=2, help='Number of heads for GATS. Hyperparameter set: {1,2,4,8,16}')
gats_parser.add_argument('--bias', type=float, default=1, help='Bias initialization for GATS')
args = parser.parse_args(['--dataset', 'Cora', '--calibration', 'TS', '--model', 'GAT'])

parser.add_argument("--alpha", type=float, default=0.98)
parser.add_argument("--lmbda", type=float, default=1.0)
parser.add_argument("--num_bins_rbs", type=int, default=2)

args_dict = {}
for group in parser._action_groups:
    if group.title == 'optional GATS arguments':
        group_dict={a.dest:getattr(args,a.dest,None) for a in group._group_actions}
        args_dict['gats_args'] = argparse.Namespace(**group_dict)
    else:
        group_dict={a.dest:getattr(args,a.dest,None) for a in group._group_actions}
        args_dict.update(group_dict)
args = argparse.Namespace(**args_dict)

In [7]:
args

Namespace(alpha=None, cal_dropout_rate=0.5, cal_wdecay=None, calibration='TS', config=False, dataset='Cora', dropout_rate=0.5, ece_bins=15, ece_norm=1.0, ece_scheme='equal_width', folds=3, gats_args=Namespace(bias=1, heads=2), help=None, lmbda=None, model='GAT', num_bins_rbs=None, save_prediction=False, seed=10, split_type='5_3f_85', verbose=False, wdecay=0.0005)

In [8]:
def load_base_data(name: str) -> Dataset:
    """
    name: str, the name of the dataset
    """
    transform = T.NormalizeFeatures()
    if name in ['Cora','Citeseer', 'Pubmed']:
        dataset = Planetoid(root='./data/', name=name, transform=transform)
    return dataset

file_name = 'model_labelrate/model/Cora/labelrate_20/GAT_run3.pt'
checkpoint = torch.load(file_name)
dataset = load_base_data(args.dataset)
data = dataset.data.to(device)
data.train_mask = checkpoint['train_mask']
data.val_mask = checkpoint['val_mask']
data.test_mask = checkpoint['test_mask']

model1 = create_model(dataset, args).to(device)
model1.load_state_dict(checkpoint['model_state_dict'])
torch.cuda.empty_cache()

In [9]:
print(data)
print(data.train_mask.sum().item())
print(data.val_mask.sum().item())
print(data.test_mask.sum().item())

Data(x=[2708, 1433], edge_index=[2, 10556], y=[2708], train_mask=[2708], val_mask=[2708], test_mask=[2708])
140
500
1000


In [12]:
model1

GAT(
  (layer_list): ModuleDict(
    (conv1): GATConv(1433, 8, heads=8)
    (conv2): GATConv(64, 7, heads=1)
  )
)

In [238]:
from torch_geometric.nn import GCNConv
class GCN_dcgc(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels, num_edge, dropout):
        super().__init__()
        self.dropout = dropout
        self.conv1 = GCNConv(in_channels, hidden_channels)
        self.conv2 = GCNConv(hidden_channels, out_channels)

        self.edge_weight = torch.ones(num_edge).cuda()

    def forward(self, x, edge_index, edge_weight=None):
        if edge_weight is None:
            edge_weight = torch.ones(len(edge_index[0])).cuda()
        x = F.dropout(x, p=self.dropout, training=self.training)
        x = self.conv1(x=x, edge_index=edge_index, edge_weight=edge_weight).relu()
        x = F.dropout(x, p=self.dropout, training=self.training)
        x = self.conv2(x=x, edge_index=edge_index, edge_weight=edge_weight)
        return x

In [239]:
model_dcgc = GCN_dcgc(in_channels=data.x.shape[1], hidden_channels=16, out_channels=dataset.num_classes, num_edge=data.num_edges, dropout=0.7).to(device)

In [16]:
edge_weight = torch.ones(len(data.edge_index[0])).cuda()
edge_weight.shape
data.num_features

1433

In [276]:
print(model)
print(model_dcgc)
from torchsummary import summary
# summary(model_dcgc, [(2708, 1433), (2,10566)] ) 
len(model.feature_list)

GCN(
  (layer_list): ModuleDict(
    (conv1): GCNConv(1433, 64)
    (conv2): GCNConv(64, 7)
  )
)
GCN_dcgc(
  (conv1): GCNConv(1433, 16)
  (conv2): GCNConv(16, 7)
)


3

In [19]:
from torch_geometric.nn import GCNConv, GATConv
class GAT(torch.nn.Module):
    def __init__(self, in_channels, num_classes, num_hidden, attention_head, drop_rate, num_layers):
        super().__init__()
        self.drop_rate = drop_rate
        self.feature_list = [in_channels, num_hidden, num_classes]
        for _ in range(num_layers-2):
            self.feature_list.insert(-1, num_hidden)
        attention_head = [1] + attention_head
        layer_list = []
        for i in range(len(self.feature_list)-1):
            concat = False if i == num_layers-1 else True 
            layer_list.append(["conv"+str(i+1), GATConv(self.feature_list[i]* attention_head[i], self.feature_list[i+1], 
                                                        heads=attention_head[i+1], dropout=drop_rate, concat=concat)])
        self.layer_list = torch.nn.ModuleDict(layer_list)

    def forward(self, x, edge_index, edge_weight=None):
        for i in range(len(self.feature_list)-1):
            x = F.dropout(x, self.drop_rate, self.training)
            x = self.layer_list["conv"+str(i+1)](x, edge_index)
            if i < len(self.feature_list)-2:
                x = F.elu(x)
        return x

In [20]:
model2 = GAT(in_channels=dataset.num_features, num_classes = dataset.num_classes, num_hidden=8,
                    attention_head=[8,1], drop_rate=0.6, num_layers=2)

In [21]:
model2

GAT(
  (layer_list): ModuleDict(
    (conv1): GATConv(1433, 8, heads=8)
    (conv2): GATConv(64, 7, heads=1)
  )
)

In [22]:
model1

GAT(
  (layer_list): ModuleDict(
    (conv1): GATConv(1433, 8, heads=8)
    (conv2): GATConv(64, 7, heads=1)
  )
)

In [23]:
from src.calibloss import NodewiseECE

ece_fn = NodewiseECE(data.test_mask, bins=15, scheme='equal_width', norm=1)

In [24]:
class Edge_Weight(torch.nn.Module):
    def __init__(self, model, out_channels, dropout):
        super(Edge_Weight, self).__init__()
        self.model = model
        self.extractor = MLP([out_channels*2, out_channels*4, 1], dropout=dropout)

        for para in self.model.parameters():
            para.requires_grad = False

    def forward(self, x, edge_index, edge_weight=None):
        if edge_weight is None:
            edge_weight = self.get_weight(x, edge_index)
        print('edge_weight requires_grad: ', edge_weight.requires_grad)
        logist = self.model(x, edge_index, edge_weight)
        print('logist requires_grad: ', logist.requires_grad)
        return logist
    
    def fit(self, data, train_mask, test_mask, wdecay, lr=0.01, edge_weight=None, verbose=False):
        self.to(device)
        self.optimizer = torch.optim.Adam(self.extractor.parameters(),lr=lr, weight_decay=wdecay)
        fit_calibration_dcgc(self, data, train_mask, test_mask, edge_weight=edge_weight, verbose=verbose)
        return self

    def get_weight(self, x, edge_index):

        emb = self.model(x, edge_index)
        col, row = edge_index
        f1, f2 = emb[col], emb[row]
        f12 = torch.cat([f1, f2], dim=-1)
        edge_weight = self.extractor(f12)
        return edge_weight.relu()

In [25]:
def fit_calibration_dcgc(temp_model, data, train_mask, test_mask, edge_weight=None, patience = 100, verbose=False):
    """
    Train calibrator dcgc
    """    
    vlss_mn = float('Inf')

    labels = data.y
    model_dict = temp_model.state_dict()
    parameters = {k: v for k,v in model_dict.items() if k.split(".")[0] != "model"}
    for epoch in range(2000):

        temp_model.train()
        temp_model.optimizer.zero_grad()
        logits = temp_model(data.x, data.edge_index, edge_weight)
        print()
        
        loss = F.cross_entropy(logits[train_mask], labels[train_mask])
        print(logits.requires_grad)
        # print(loss)
        print('loss requires_grad: ', loss.requires_grad)
        loss.backward()
        temp_model.optimizer.step()

        with torch.no_grad():
            temp_model.eval()
            # logits = temp_model(data.x, data.edge_index, edge_weight)
            
            val_loss = F.cross_entropy(logits[test_mask], labels[test_mask])
            if val_loss <= vlss_mn:
                state_dict_early_model = copy.deepcopy(parameters)
                vlss_mn = np.min((val_loss.cpu().numpy(), vlss_mn))
                # for debug
                preds = torch.argmax(logits, dim=1)[test_mask]
                acc = torch.mean((preds == data.y[test_mask]).to(torch.get_default_dtype())).item()
                ece = ece_fn(logits, data.y)
                curr_step = 0
            else:
                curr_step += 1
                if curr_step >= patience:
                    break
            if verbose:
                print(f'Epoch: : {epoch+1:03d}, Accuracy: {acc:.4f}, NNL: {val_loss:.4f}, ECE: {ece:.4f}')
    model_dict.update(state_dict_early_model)
    
    temp_model.load_state_dict(model_dict)

In [28]:
# ew = Edge_Weight(model, dataset.num_classes, dropout=0.7).to(device)
ew1 = Edge_Weight(model1, dataset.num_classes, dropout=0.7).to(device)
ew2 = Edge_Weight(model2, dataset.num_classes, dropout=0.7).to(device)
# ew.fit(data, data.val_mask, data.train_mask, wdecay=0, edge_weight=None, verbose=False)


In [29]:
ew1.fit(data, data.val_mask, data.train_mask, wdecay=0, edge_weight=None, verbose=False)

edge_weight requires_grad:  True


TypeError: forward() takes 3 positional arguments but 4 were given

In [31]:
ew2.fit(data, data.val_mask, data.train_mask, wdecay=0, edge_weight=None, verbose=False)

edge_weight requires_grad:  True
x requires_grad:  False
logist requires_grad:  False

loss requires_grad:  False


RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn

In [19]:
class TS(nn.Module):
    def __init__(self, model):
        super().__init__()
        self.model = model
        self.temperature = nn.Parameter(torch.ones(1) * 1.5)

    def forward(self, x, edge_index, edge_weight=None):
        logits = self.model(x, edge_index, edge_weight)
        temperature = self.temperature_scale(logits)
        return logits / temperature

    def temperature_scale(self, logits):
        """
        Expand temperature to match the size of logits
        """
        temperature = self.temperature.unsqueeze(1).expand(logits.size(0), logits.size(1))
        return temperature

    def fit(self, data, train_mask, test_mask, wdecay, edge_weight=None, verbose=False):
        self.to(device)
        def eval(logits):
            temperature = self.temperature_scale(logits)
            calibrated = logits / temperature
            return calibrated

        self.train_param = [self.temperature]
        self.optimizer = torch.optim.Adam(self.train_param, lr=0.01, weight_decay=wdecay)
        fit_calibration(self, eval, data, train_mask, test_mask, edge_weight=edge_weight, verbose=verbose)
        return self

In [20]:
def fit_calibration(temp_model, eval, data, train_mask, test_mask, edge_weight=None, patience = 100, verbose=False):
    """
    Train calibrator
    """    
    vlss_mn = float('Inf')
    with torch.no_grad():
        logits = temp_model.model(data.x, data.edge_index, edge_weight)
        labels = data.y
        edge_index = data.edge_index
        model_dict = temp_model.state_dict()
        parameters = {k: v for k,v in model_dict.items() if k.split(".")[0] != "model"}
    for epoch in range(2000):
        temp_model.optimizer.zero_grad()
        temp_model.train()
        # Post-hoc calibration set the classifier to the evaluation mode
        temp_model.model.eval()
        assert not temp_model.model.training
        calibrated = eval(logits)
        loss = F.cross_entropy(calibrated[train_mask], labels[train_mask])
        # dist_reg = intra_distance_loss(calibrated[train_mask], labels[train_mask])
        # margin_reg = 0.
        # loss = loss + margin_reg * dist_reg
        loss.backward()
        temp_model.optimizer.step()

        with torch.no_grad():
            temp_model.eval()
            calibrated = eval(logits)
            val_loss = F.cross_entropy(calibrated[test_mask], labels[test_mask])
            # dist_reg = intra_distance_loss(calibrated[train_mask], labels[train_mask])
            # val_loss = val_loss + margin_reg * dist_reg
            if val_loss <= vlss_mn:
                state_dict_early_model = copy.deepcopy(parameters)
                vlss_mn = np.min((val_loss.cpu().numpy(), vlss_mn))
                # for debug
                preds = torch.argmax(logits, dim=1)[test_mask]
                acc = torch.mean((preds == data.y[test_mask]).to(torch.get_default_dtype())).item()
                ece = ece_fn(logits, data.y)
                curr_step = 0
            else:
                curr_step += 1
                if curr_step >= patience:
                    break
            if verbose:
                print(f'Epoch: : {epoch+1:03d}, Accuracy: {acc:.4f}, NNL: {val_loss:.4f}, ECE:{ece:.4f}')
    model_dict.update(state_dict_early_model)
    temp_model.load_state_dict(model_dict)

RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn

In [21]:
dcgc_beta=10
dcgc_alpha = 0.5
for run in range(10):
    model1 = create_model(dataset, args).to(device)
    model1.load_state_dict(checkpoint['model_state_dict'])
    
    ew = Edge_Weight(model1, dataset.num_classes, dropout=0.7).to(device)
    # training 
    ew.fit(data, data.val_mask, data.train_mask, wdecay=0, edge_weight=None, verbose=False)
    with torch.no_grad():
        ew.eval()
        logits = ew(data.x, data.edge_index)
        prob = F.softmax(logits, dim=1)
        edge_weight = ew.get_weight(data.x, data.edge_index)
    ece1 = ece_fn(logits, data.y)
    print(f'run {run} ECE1 : ', ece1.item())
    
    pred = torch.exp(dcgc_beta * prob)
    pred /= torch.sum(pred, dim=1, keepdim=True)

    col, row = data.edge_index
    coefficient = torch.norm(pred[col] - pred[row], dim=1)
    coefficient = 1 / (coefficient + dcgc_alpha)
    
    edge_weight = edge_weight.reshape(-1)
    edge_weight = edge_weight * coefficient
    edge_weight = edge_weight.reshape([data.num_edges, 1])
    
    with torch.no_grad():
        ew.eval()
        logits = ew(data.x, data.edge_index, edge_weight)
        prob = F.softmax(logits, dim=1)
    ece2 = ece_fn(logits, data.y)
    print(f'run {run} DCGC ECE : ', ece2.item())
    
    ts = TS(model1)
    ts.fit(data, data.val_mask, data.train_mask, wdecay=0, edge_weight=None)
    with torch.no_grad():
        ts.eval()
        logits = ts(data.x, data.edge_index)
    ece3 = ece_fn(logits, data.y)
    print(f'run {run} TS ECE : ', ece3.item())
    
    
    ts = TS(model1)
    ts.fit(data, data.val_mask, data.train_mask, wdecay=0, edge_weight=edge_weight)
    with torch.no_grad():
        ts.eval()
        logits = ts(data.x, data.edge_index, edge_weight)
    ece4 = ece_fn(logits, data.y)
    print(f'run {run} TS+DCGC ECE : ', ece4.item())
    print('----------------------------------------------')
    

RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn

In [None]:
ew = Edge_Weight(model, dataset.num_classes, dropout=0.7).to(device)

In [177]:
ew

Edge_Weight(
  (model): GCN(
    (layer_list): ModuleDict(
      (conv1): GCNConv(1433, 64)
      (conv2): GCNConv(64, 7)
    )
  )
  (extractor): MLP(14, 28, 1)
)

In [178]:
for name, param in ew.named_parameters():
    # if param.requires_grad:
    #     print(name)
    print(name, 'requires_grad: ', param.requires_grad)

model.layer_list.conv1.bias requires_grad:  False
model.layer_list.conv1.lin.weight requires_grad:  False
model.layer_list.conv2.bias requires_grad:  False
model.layer_list.conv2.lin.weight requires_grad:  False
extractor.lins.0.weight requires_grad:  True
extractor.lins.0.bias requires_grad:  True
extractor.lins.1.weight requires_grad:  True
extractor.lins.1.bias requires_grad:  True
extractor.norms.0.module.weight requires_grad:  True
extractor.norms.0.module.bias requires_grad:  True


In [180]:
for name, param in ts.named_parameters():
    # if param.requires_grad:
    #     print(name)
    print(name, 'requires_grad: ', param.requires_grad)

temperature requires_grad:  True
model.layer_list.conv1.att_src requires_grad:  True
model.layer_list.conv1.att_dst requires_grad:  True
model.layer_list.conv1.bias requires_grad:  True
model.layer_list.conv1.lin_src.weight requires_grad:  True
model.layer_list.conv2.att_src requires_grad:  True
model.layer_list.conv2.att_dst requires_grad:  True
model.layer_list.conv2.bias requires_grad:  True
model.layer_list.conv2.lin_src.weight requires_grad:  True


In [158]:
optimizer1 = torch.optim.Adam(ew.extractor.parameters(),lr=0.01, weight_decay=5e-4)

optimizer2 = torch.optim.Adam(filter(lambda p: p.requires_grad, ew.parameters()),lr=0.01, weight_decay=5e-4)

optimizer3 = torch.optim.Adam(filter(lambda p: p.requires_grad, ts.parameters()),lr=0.01, weight_decay=5e-4)

In [168]:
for param_group in optimizer1.param_groups:
    print(param_group['params'][0].shape) 
    
for param_group in optimizer2.param_groups:
    print(param_group['params'][0].shape) 
    
for param_group in optimizer3.param_groups:
    print(param_group['params'][2]) 

torch.Size([28, 14])
torch.Size([28, 14])
Parameter containing:
tensor([[[ 0.3796, -0.1712,  0.2955, -0.4129,  0.3349, -0.3706,  0.1070,
           0.2653],
         [-0.3712, -0.1141,  0.2433, -0.3011,  0.0007, -0.0482, -0.2178,
           0.4061],
         [-0.1434,  0.0167,  0.4106,  0.2659,  0.4136,  0.1064, -0.1923,
          -0.1996],
         [-0.4281, -0.0906, -0.1002, -0.4382,  0.3606, -0.1047,  0.1988,
          -0.0653],
         [ 0.0889,  0.3991, -0.1549,  0.2003, -0.0655,  0.4254, -0.2080,
           0.1142],
         [ 0.0125,  0.5047,  0.0303,  0.2931,  0.2347,  0.1075, -0.3888,
          -0.1143],
         [-0.3364, -0.0560,  0.0879, -0.1971, -0.2688,  0.2203, -0.0550,
           0.3173],
         [-0.4845, -0.5075,  0.2733,  0.1145,  0.0184,  0.3727,  0.3794,
          -0.0495]]], device='cuda:0', requires_grad=True)


In [182]:
model_dict = ew.state_dict()
parameters = {k: v for k,v in model_dict.items() if k.split(".")[0] != "model"}

In [183]:
model_dict.items()

odict_items([('model.layer_list.conv1.bias', tensor([0.1471, 0.0852, 0.1140, 0.1511, 0.1553, 0.1403, 0.1438, 0.1173, 0.1504,
        0.1091, 0.1358, 0.0904, 0.1467, 0.1499, 0.0903, 0.0788, 0.1143, 0.1191,
        0.1619, 0.1390, 0.1049, 0.1211, 0.1010, 0.1368, 0.0684, 0.0936, 0.1228,
        0.1543, 0.1843, 0.0738, 0.0934, 0.0971, 0.1567, 0.1015, 0.1315, 0.0935,
        0.1566, 0.0909, 0.0914, 0.1183, 0.0942, 0.0845, 0.1034, 0.1373, 0.0609,
        0.1030, 0.1177, 0.0831, 0.1021, 0.1106, 0.0708, 0.1199, 0.1163, 0.1396,
        0.1266, 0.0947, 0.1399, 0.0789, 0.1100, 0.1114, 0.0961, 0.1010, 0.0808,
        0.1446], device='cuda:0')), ('model.layer_list.conv1.lin.weight', tensor([[-0.0105, -0.0289,  0.0527,  ..., -0.0041, -0.0372, -0.0432],
        [-0.0093,  0.0293, -0.0208,  ..., -0.0130, -0.0551,  0.0081],
        [-0.0080,  0.0388, -0.0297,  ..., -0.0002,  0.0090, -0.0137],
        ...,
        [-0.0652,  0.0062,  0.0349,  ..., -0.0067, -0.0839,  0.0179],
        [-0.0358,  0.0062,  

In [184]:
parameters = {k: v for k,v in model_dict.items() if k.split(".")[0] != "model"}
print(parameters.keys())
print('------------------------------------------')
print(model_dict.keys())

dict_keys(['extractor.lins.0.weight', 'extractor.lins.0.bias', 'extractor.lins.1.weight', 'extractor.lins.1.bias', 'extractor.norms.0.module.weight', 'extractor.norms.0.module.bias', 'extractor.norms.0.module.running_mean', 'extractor.norms.0.module.running_var', 'extractor.norms.0.module.num_batches_tracked'])
------------------------------------------
odict_keys(['model.layer_list.conv1.bias', 'model.layer_list.conv1.lin.weight', 'model.layer_list.conv2.bias', 'model.layer_list.conv2.lin.weight', 'extractor.lins.0.weight', 'extractor.lins.0.bias', 'extractor.lins.1.weight', 'extractor.lins.1.bias', 'extractor.norms.0.module.weight', 'extractor.norms.0.module.bias', 'extractor.norms.0.module.running_mean', 'extractor.norms.0.module.running_var', 'extractor.norms.0.module.num_batches_tracked'])


In [156]:

# fit_calibration_dcgc(ew, data, data.train_mask, data.test_mask, edge_weight=None, verbose=True)
ew.fit(data, data.val_mask, data.train_mask, wdecay=0)

RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn

In [155]:

for run in range(10):
    model1 = create_model(dataset, args).to(device)
    model1.load_state_dict(checkpoint['model_state_dict'])
    
    ts = TS(model1)
    ts.fit(data, data.val_mask, data.train_mask, wdecay=0, edge_weight=None, verbose=True)
    print(f'run {run+1} TS tempreture: ', ts.temperature.item())

Epoch: : 001, Accuracy: 0.9143, NNL: 0.4729, ECE:0.0751
Epoch: : 002, Accuracy: 0.9143, NNL: 0.4704, ECE:0.0751
Epoch: : 003, Accuracy: 0.9143, NNL: 0.4679, ECE:0.0751
Epoch: : 004, Accuracy: 0.9143, NNL: 0.4654, ECE:0.0751
Epoch: : 005, Accuracy: 0.9143, NNL: 0.4629, ECE:0.0751
Epoch: : 006, Accuracy: 0.9143, NNL: 0.4604, ECE:0.0751
Epoch: : 007, Accuracy: 0.9143, NNL: 0.4579, ECE:0.0751
Epoch: : 008, Accuracy: 0.9143, NNL: 0.4554, ECE:0.0751
Epoch: : 009, Accuracy: 0.9143, NNL: 0.4529, ECE:0.0751
Epoch: : 010, Accuracy: 0.9143, NNL: 0.4504, ECE:0.0751
Epoch: : 011, Accuracy: 0.9143, NNL: 0.4479, ECE:0.0751
Epoch: : 012, Accuracy: 0.9143, NNL: 0.4454, ECE:0.0751
Epoch: : 013, Accuracy: 0.9143, NNL: 0.4430, ECE:0.0751
Epoch: : 014, Accuracy: 0.9143, NNL: 0.4405, ECE:0.0751
Epoch: : 015, Accuracy: 0.9143, NNL: 0.4381, ECE:0.0751
Epoch: : 016, Accuracy: 0.9143, NNL: 0.4357, ECE:0.0751
Epoch: : 017, Accuracy: 0.9143, NNL: 0.4333, ECE:0.0751
Epoch: : 018, Accuracy: 0.9143, NNL: 0.4309, ECE

In [187]:

    
    # print(f'run {run+1} ES tempreture: ', ts.temperature.item())

run 0 ECE1 :  0.07813651859760284
run 0 DCGC ECE :  0.11692075431346893
run 0 TS ECE :  0.047383226454257965
run 0 TS+DCGC ECE :  0.05302067846059799
----------------------------------------------
run 1 ECE1 :  0.06994426250457764
run 1 DCGC ECE :  0.1017957553267479
run 1 TS ECE :  0.047383226454257965
run 1 TS+DCGC ECE :  0.06746464222669601
----------------------------------------------
run 2 ECE1 :  0.06502808630466461
run 2 DCGC ECE :  0.09834396839141846
run 2 TS ECE :  0.047383226454257965
run 2 TS+DCGC ECE :  0.061257507652044296
----------------------------------------------
run 3 ECE1 :  0.06497398018836975
run 3 DCGC ECE :  0.09130603820085526
run 3 TS ECE :  0.04738324135541916
run 3 TS+DCGC ECE :  0.056458279490470886
----------------------------------------------
run 4 ECE1 :  0.06122978776693344
run 4 DCGC ECE :  0.08220722526311874
run 4 TS ECE :  0.047383226454257965
run 4 TS+DCGC ECE :  0.06695885956287384
----------------------------------------------
run 5 ECE1 :  0

In [154]:
ew


Edge_Weight(
  (model): GAT(
    (layer_list): ModuleDict(
      (conv1): GATConv(1433, 8, heads=8)
      (conv2): GATConv(64, 7, heads=1)
    )
  )
  (extractor): MLP(14, 28, 1)
)

In [215]:
from torch.utils.data import random_split
def get_dataset(data):
    # 10 5 85
    torch.manual_seed(np.random.randint(0, 10000))
    train_num = int(data.num_nodes * 0.1)
    val_num = int(data.num_nodes * 0.05)
    test_num = data.num_nodes - train_num - val_num
    idx = range(data.num_nodes)
    train_idx, test_idx = random_split(dataset=idx, lengths=[train_num, val_num + test_num])
    val_idx, test_idx = random_split(dataset=test_idx, lengths=[val_num, test_num])

    return list(train_idx), list(val_idx), list(test_idx)

dataset_cora = Planetoid('./data/', args.dataset, transform=T.NormalizeFeatures())
data_cora = dataset_cora[0].to(device)
idx_train, idx_val, idx_test = get_dataset(data_cora)

In [218]:
print(len(idx_train))
print(len(idx_val))
print(len(idx_test))

270
135
2303


In [225]:
train_label = [(data_cora.y[idx_train]==k).sum().item() for k in range(dataset_cora.num_classes)]
val_label = [(data_cora.y[idx_val]==k).sum().item() for k in range(dataset_cora.num_classes)]
test_label = [(data_cora.y[idx_test]==k).sum().item() for k in range(dataset_cora.num_classes)]
print(train_label)
print(val_label)
print(test_label)

[29, 30, 48, 78, 45, 26, 14]
[21, 17, 13, 40, 24, 13, 7]
[301, 170, 357, 700, 357, 259, 159]


In [221]:
dataset_cora.num_classes

7