# Node Classification

In [8]:
import argparse
from types import SimpleNamespace
import sys
sys.path.append('/data/lige/HKN')# Please change accordingly!

from __future__ import division
from __future__ import print_function

from geoopt import ManifoldParameter as geoopt_ManifoldParameter
from manifolds.base import ManifoldParameter as base_ManifoldParameter

import datetime
import json
import logging
from optim import RiemannianAdam, RiemannianSGD
import os
import pickle
import time

import numpy as np
import torch
from config import parser
from models.base_models import NCModel, LPModel, GCModel
from utils.data_utils import load_data, get_nei, GCDataset, split_batch
from utils.train_utils import get_dir_name, format_metrics
from utils.eval_utils import acc_f1

from geoopt import ManifoldParameter as geoopt_ManifoldParameter
from manifolds.base import ManifoldParameter as base_ManifoldParameter


os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'max_split_size_mb:128'
torch.cuda.empty_cache()



config_args = {
    'training_config': {
        'lr': (1e-3, 'learning rate'),
        'dropout': (0.3, 'dropout probability'),
        'cuda': (1, 'which cuda device to use (-1 for cpu training)'),
        'epochs': (1000, 'maximum number of epochs to train for'),
        'weight_decay': (1e-3, 'l2 regularization strength'),
        'optimizer': ('radam', 'which optimizer to use, can be any of [rsgd, radam]'),
        'momentum': (0.999, 'momentum in optimizer'),
        'patience': (30, 'patience for early stopping'),
        'seed': (28, 'seed for training'),
        'log_freq': (1, 'how often to compute print train/val metrics (in epochs)'),
        'eval_freq': (1, 'how often to compute val metrics (in epochs)'),
        'save': (0, '1 to save model and logs and 0 otherwise'),
        'save_dir': (None, 'path to save training logs and model weights (defaults to logs/task/date/run/)'),
        'sweep_c': (0, ''),
        'lr_reduce_freq': (None, 'reduce lr every lr-reduce-freq or None to keep lr constant'),
        'gamma': (0.5, 'gamma for lr scheduler'),
        'print_epoch': (True, ''),
        'grad_clip': (None, 'max norm for gradient clipping, or None for no gradient clipping'),
        'min_epochs': (300, 'do not early stop before min-epochs')
    },
    'model_config': {
        'use_geoopt': (False, "which manifold class to use, if false then use basd.manifold"),
        'AggKlein':(True, "if false, then use hyperboloid centorid for aggregation"),
        'corr': (1,'0: Agg{x_ik,d(x_i ominus x, x_k)}, 1: Agg{x_ik,d(x_ik,x_k)}, 2:Agg{x_i ominus x,d(x_i ominus x, x_k)}'),
        'task': ('nc', 'which tasks to train on, can be any of [lp, nc]'),
        'model': ('BKNet', 'which encoder to use, can be any of [Shallow, MLP, HNN, GCN, GAT, HyperGCN, HyboNet,BKNet,BMLP]'),
        #'dim': (32, 'embedding dimension'), #The final dimension as the embedded vector
        'dim': (64, 'embedding dimension'),
        'manifold': ('PoincareBall', 'which manifold to use, can be any of [Euclidean, Hyperboloid, PoincareBall, Lorentz]'),
        'c': (1.0, 'hyperbolic radius, set to None for trainable curvature'),
        'r': (2., 'fermi-dirac decoder parameter for lp'),
        't': (1., 'fermi-dirac decoder parameter for lp'),
        'margin': (2., 'margin of MarginLoss'),
        'pretrained_embeddings': (None, 'path to pretrained embeddings (.npy file) for Shallow node classification'),
        'pos_weight': (0, 'whether to upweight positive class in node classification tasks'),
        'num_layers': (4, 'number of hidden layers in encoder'),
        'bias': (1, 'whether to use bias (1) or not (0)'),
        'act': ('relu', 'which activation function to use (or None for no activation)'),
        'n_heads': (4, 'number of attention heads for graph attention networks, must be a divisor dim'),
        'alpha': (0.2, 'alpha for leakyrelu in graph attention networks'),
        'double_precision': ('1', 'whether to use double precision'),
        'use_att': (0, 'whether to use hyperbolic attention or not'),
        'local_agg': (0, 'whether to local tangent space aggregation or not'),
        'kernel_size': (6, 'number of kernels'),
        'KP_extent': (0.66, 'influence radius of each kernel point'),
        'radius': (1, 'radius used for kernel point init'),
        'deformable': (False, 'deformable kernel'),
        'linear_before': (64, 'dim of linear before gcn')#The dimension after linear_before(dimensionality reduction if you would)
        #'linear_before': (32, 'dim of linear before gcn')#64
    },
    'data_config': {
        'dataset': ('cornell', 'which dataset to use(cornell,wisconsin,texas,squirrel,cora)'),
        #'dataset': ('film', 'which dataset to use(cornell,wisconsin,texas,squirrel,cora)'),
        'batch_size': (32, 'batch size for gc'),
        'val_prop': (0.05, 'proportion of validation edges for link prediction'),
        'test_prop': (0.1, 'proportion of test edges for link prediction'),
        'use_feats': (1, 'whether to use node features or not'),
        'normalize_feats': (1, 'whether to normalize input node features'),
        'normalize_adj': (1, 'whether to row-normalize the adjacency matrix'),
        'split_seed': (28, 'seed for data splits (train/test/val)'),
        'split_graph': (False, 'whether to split the graph')
    }
}

# 将所有参数转换为 SimpleNamespace
args = SimpleNamespace(
    **{k: v[0] for config in config_args.values() for k, v in config.items()}
)

#choose which manifold class to follow 
if args.use_geoopt == False:
    ManifoldParameter = base_ManifoldParameter
else:
    ManifoldParameter = geoopt_ManifoldParameter
np.random.seed(args.seed)#args.seed
torch.manual_seed(args.seed)#args.seed
if int(args.cuda):#args.double_precision
    torch.set_default_dtype(torch.float64)
if int(args.cuda) >= 0:#args.cuda
    torch.cuda.manual_seed(args.seed)#args.seed
args.device = 'cuda:' + str(args.cuda) if int(args.cuda) >= 0 else 'cpu' #args.device actually,<-args.cuda
args.patience = args.epochs if not args.patience else args.patience #args.patience<-args.epochs|args.patience

print(f'Using: {args.device}')
print("Using seed {}.".format(args.seed))
print(f"Dataset: {args.dataset}")

# Load data
data = load_data(args, os.path.join('data', args.dataset))
if args.task == 'gc':
    args.n_nodes, args.feat_dim = data['features'][0].shape
else:
    args.n_nodes, args.feat_dim = data['features'].shape
if args.task == 'nc':
    Model = NCModel
    args.n_classes = int(data['labels'].max() + 1)
    args.data = data
    print(f'Num classes: {args.n_classes}')
elif args.task == 'gc':
    Model = GCModel
    args.n_classes = int(data['labels'].max() + 1)
    print(f'Num classes: {args.n_classes}')
else:
    args.nb_false_edges = len(data['train_edges_false'])
    args.nb_edges = len(data['train_edges'])
    if args.task == 'lp':
        Model = LPModel
        args.n_classes = 2

if not args.lr_reduce_freq:
    args.lr_reduce_freq = args.epochs


###A simple check on data
print(data.keys())
print(data['adj_train'].todense().shape)
print(data['features'].shape)
###A simple check on data

# Model and optimizer
model = Model(args)
print(str(model))
no_decay = ['bias', 'scale']
optimizer_grouped_parameters = [{
    'params': [
        p for n, p in model.named_parameters()
        if p.requires_grad and not any(
            nd in n
            for nd in no_decay) and not isinstance(p, ManifoldParameter)
    ],
    'weight_decay':
    args.weight_decay
}, {
    'params': [
        p for n, p in model.named_parameters() if p.requires_grad and any(
            nd in n
            for nd in no_decay) or isinstance(p, ManifoldParameter)
    ],
    'weight_descay':
    0.0
}]
if args.optimizer == 'radam':
    optimizer = RiemannianAdam(params=optimizer_grouped_parameters,
                                lr=args.lr,
                                stabilize=10)
elif args.optimizer == 'rsgd':
    optimizer = RiemannianSGD(params=optimizer_grouped_parameters,
                                lr=args.lr,
                                stabilize=10)
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                step_size=int(
                                                args.lr_reduce_freq),
                                                gamma=float(args.gamma))
tot_params = sum([np.prod(p.size()) for p in model.parameters()])
model = model.to(args.device)
for x, val in data.items():
    if torch.is_tensor(data[x]):
        data[x] = data[x].to(args.device)
print(f"Total number of parameters: {tot_params}")

# Train model for nc:
t_total = time.time()
counter = 0
best_val_metrics = model.init_metric_dict()
best_test_metrics = None
best_emb = None
if args.n_classes > 2:
    f1_average = 'micro'
else:
    f1_average = 'binary'

if args.model == 'HKPNet':
    nei, nei_mask = get_nei(data['adj_train'])
    nei = nei.to(args.device)
    nei_mask = nei_mask.to(args.device)
elif args.model == 'BKNet':
    nei, nei_mask = get_nei(data['adj_train'])
    nei = nei.to(args.device)
    nei_mask = nei_mask.to(args.device) #nei/nei_mask on cuda now


print(f"Memory allocated: {torch.cuda.memory_allocated() / 1024 ** 2:.2f} MB")  # 打印当前分配的显存


Using: cuda:1
Using seed 28.
Dataset: cornell
Num classes: 5
dict_keys(['adj_train', 'features', 'labels', 'idx_train', 'idx_val', 'idx_test', 'adj_train_norm'])
(183, 183)
torch.Size([183, 1703])
NCModel(
  (encoder): BKNet(
    (linear_before): BLinear(
      in_features=1703, out_features=64, c=tensor([1.], device='cuda:1'), use_bias=1, act=None, dropout_rate=0.3
      (dropout): Dropout(p=0.3, inplace=False)
      (E_linear): Linear(in_features=1703, out_features=64, bias=False)
    )
    (layers): Sequential(
      (0): KPGraphConvolution(
        (net): KernelPointAggregation(
          (linears): ModuleList(
            (0): BLinear(
              in_features=64, out_features=64, c=tensor([1.], device='cuda:1'), use_bias=1, act=None, dropout_rate=0.3
              (dropout): Dropout(p=0.3, inplace=False)
              (E_linear): Linear(in_features=64, out_features=64, bias=False)
            )
            (1): BLinear(
              in_features=64, out_features=64, c=tensor([1.

  adj = nx.adjacency_matrix(G, sorted(G.nodes()))


In [9]:
decoded_values=[]
embedded_values=[]
model_linear_weight=[]

# 检查权重更新
def check_weights(model, epoch):
    print(f"Epoch {epoch + 1} - Model Linear Weight:")
    for name, param in model.named_parameters():
        if 'weight' in name:
            print(f"{name}: {param.data}")

for epoch in range(args.epochs):
    t = time.time()
    model.train()
    optimizer.zero_grad()
    if args.model == 'HKPNet':
        embeddings = model.encode(data['features'], (nei, nei_mask))
    elif args.model == 'BKNet':
        embeddings = model.encode(data['features'], (nei, nei_mask))
    else:
        embeddings = model.encode(data['features'], data['adj_train_norm'])

    idx=data[f'idx_train']
    output=model.decode(embeddings, data['adj_train_norm'], idx)
    train_metrics = model.compute_metrics(embeddings, data, 'train')
    
    # 检查 decoded_values 和 embeddings
    embedded_values.append(embeddings)
    decoded_values.append(output)
    #If corr==0/1 use this one
    model_linear_weight.append(model.encoder.layers[0].net.linears[0].E_linear.weight.clone())
    #If corr==2 use this one
    #model_linear_weight.append(model.encoder.layers[0].net.single_linear.E_linear.weight.clone())
    
    # 检查梯度是否被正确计算
    train_metrics['loss'].backward()
    #loss = F.cross_entropy(output, data['labels'][idx])
    #loss.backward()

    for name, param in model.named_parameters():
        if param.grad is not None:
            print(f"Grad of {name}: {param.grad.abs().mean()}")
        else:
            print(f"Grad of {name}: None")  # 检查梯度是否为 None
    
    if args.grad_clip is not None:
        torch.nn.utils.clip_grad_norm_(model.parameters(), args.grad_clip)
        
    optimizer.step()
    lr_scheduler.step()

    #torch.cuda.empty_cache()# try this

    # 打印和检查权重是否被更新
    check_weights(model, epoch)
    
    if (epoch + 1) % args.log_freq == 0:
        print(" ".join([
            'Epoch: {:04d}'.format(epoch + 1),
            'lr: {}'.format(lr_scheduler.get_last_lr()),
            format_metrics(train_metrics, 'train'),
            'time: {:.4f}s'.format(time.time() - t)
        ]))
    
    with torch.no_grad():
        if (epoch + 1) % args.eval_freq == 0:
            model.eval()
            if args.model == 'HKPNet':
                embeddings = model.encode(data['features'], (nei, nei_mask))
            elif args.model == 'BKNet':
                embeddings = model.encode(data['features'], (nei, nei_mask))
            else:
                embeddings = model.encode(data['features'],
                                        data['adj_train_norm'])
            val_metrics = model.compute_metrics(embeddings, data, 'val')
            if (epoch + 1) % args.log_freq == 0:
                print(" ".join([
                    'Epoch: {:04d}'.format(epoch + 1),
                    format_metrics(val_metrics, 'val')
                ]))
            if model.has_improved(best_val_metrics, val_metrics):
                best_test_metrics = model.compute_metrics(
                    embeddings, data, 'test')
                best_emb = embeddings.cpu()
                if args.save:
                    np.save(os.path.join(save_dir, 'embeddings.npy'),
                            best_emb.detach().numpy())
                best_val_metrics = val_metrics
                counter = 0
            else:
                counter += 1
                if counter == args.patience and epoch > args.min_epochs:
                    print("Early stopping")
                    break

print("Optimization Finished!")
print("Total time elapsed: {:.4f}s".format(time.time() - t_total))
if not best_test_metrics:
    model.eval()
    best_emb = model.encode(data['features'], data['adj_train_norm'])
    best_test_metrics = model.compute_metrics(best_emb, data, 'test')
print(" ".join(
    ["Val set results:",
    format_metrics(best_val_metrics, 'val')]))
print(" ".join(
    ["Test set results:",
    format_metrics(best_test_metrics, 'test')]))
if args.save:
    np.save(os.path.join(save_dir, 'embeddings.npy'),
            best_emb.cpu().detach().numpy())
    if hasattr(model.encoder, 'att_adj'):
        filename = os.path.join(save_dir, args.dataset + '_att_adj.p')
        pickle.dump(model.encoder.att_adj.cpu().to_dense(),
                    open(filename, 'wb'))
        print('Dumped attention adj: ' + filename)

    torch.save(model.state_dict(), os.path.join(save_dir, 'model.pth'))
    json.dump(vars(args), open(os.path.join(save_dir, 'config.json'), 'w'))
    logging.info(f"Saved model in {save_dir}")


Grad of encoder.linear_before.bias: 4.190862199562999e-09
Grad of encoder.linear_before.E_linear.weight: 1.510900702058212e-09
Grad of encoder.layers.0.net.kernel_tangents: None
Grad of encoder.layers.0.net.linears.0.bias: 5.513522347299383e-07
Grad of encoder.layers.0.net.linears.0.E_linear.weight: 1.6016825808542643e-09
Grad of encoder.layers.0.net.linears.1.bias: 4.86450464136694e-07
Grad of encoder.layers.0.net.linears.1.E_linear.weight: 1.7452624292303352e-09
Grad of encoder.layers.0.net.linears.2.bias: 4.846719060880405e-07
Grad of encoder.layers.0.net.linears.2.E_linear.weight: 1.7573469961516177e-09
Grad of encoder.layers.0.net.linears.3.bias: 4.801840573310571e-07
Grad of encoder.layers.0.net.linears.3.E_linear.weight: 1.7283225568426197e-09
Grad of encoder.layers.0.net.linears.4.bias: 4.820955133827388e-07
Grad of encoder.layers.0.net.linears.4.E_linear.weight: 1.7385678877529608e-09
Grad of encoder.layers.0.net.linears.5.bias: 4.835543169610067e-07
Grad of encoder.layers.0.n

# Graph Classification

In [1]:
import argparse
from types import SimpleNamespace
import sys
sys.path.append('/data/lige/HKN')# Please change accordingly!

from __future__ import division
from __future__ import print_function

from geoopt import ManifoldParameter as geoopt_ManifoldParameter
from manifolds.base import ManifoldParameter as base_ManifoldParameter

import datetime
import json
import logging
from optim import RiemannianAdam, RiemannianSGD
import os
import pickle
import time

import numpy as np
import torch
from config import parser
from models.base_models import NCModel, LPModel, GCModel
from utils.data_utils import load_data, get_nei, GCDataset, split_batch
from utils.train_utils import get_dir_name, format_metrics
from utils.eval_utils import acc_f1

from geoopt import ManifoldParameter as geoopt_ManifoldParameter
from manifolds.base import ManifoldParameter as base_ManifoldParameter


os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'max_split_size_mb:128'
torch.cuda.empty_cache()



config_args = {
    'training_config': {
        'lr': (1e-3, 'learning rate'),
        'dropout': (0.3, 'dropout probability'),
        'cuda': (1, 'which cuda device to use (-1 for cpu training)'),
        'epochs': (1500, 'maximum number of epochs to train for'),
        'weight_decay': (1e-3, 'l2 regularization strength'),
        'optimizer': ('radam', 'which optimizer to use, can be any of [rsgd, radam]'),
        'momentum': (0.999, 'momentum in optimizer'),
        'patience': (20, 'patience for early stopping'),
        'seed': (20, 'seed for training'),
        'log_freq': (1, 'how often to compute print train/val metrics (in epochs)'),
        'eval_freq': (1, 'how often to compute val metrics (in epochs)'),
        'save': (0, '1 to save model and logs and 0 otherwise'),
        'save_dir': (None, 'path to save training logs and model weights (defaults to logs/task/date/run/)'),
        'sweep_c': (0, ''),
        'lr_reduce_freq': (None, 'reduce lr every lr-reduce-freq or None to keep lr constant'),
        'gamma': (0.5, 'gamma for lr scheduler'),
        'print_epoch': (True, ''),
        'grad_clip': (None, 'max norm for gradient clipping, or None for no gradient clipping'),
        'min_epochs': (300, 'do not early stop before min-epochs')
    },
    'model_config': {
        'use_geoopt': (False, "which manifold class to use, if false then use basd.manifold"),
        'AggKlein':(True, "if false, then use hyperboloid centorid for aggregation"),
        'corr': (1,'0: d(x_i ominus x, x_k), 1: d(x_ik,x_k)'),
        'task': ('gc', 'which tasks to train on, can be any of [lp, nc]'),
        'model': ('BKNet', 'which encoder to use, can be any of [Shallow, MLP, HNN, GCN, GAT, HyperGCN, HyboNet,BKNet,BMLP]'),
        'dim': (32, 'embedding dimension'), #The final dimension as the embedded vector
        #'dim': (64, 'embedding dimension'),
        'manifold': ('PoincareBall', 'which manifold to use, can be any of [Euclidean, Hyperboloid, PoincareBall, Lorentz]'),
        'c': (1.0, 'hyperbolic radius, set to None for trainable curvature'),
        'r': (2., 'fermi-dirac decoder parameter for lp'),
        't': (1., 'fermi-dirac decoder parameter for lp'),
        'margin': (2., 'margin of MarginLoss'),
        'pretrained_embeddings': (None, 'path to pretrained embeddings (.npy file) for Shallow node classification'),
        'pos_weight': (0, 'whether to upweight positive class in node classification tasks'),
        'num_layers': (2, 'number of hidden layers in encoder'),
        'bias': (1, 'whether to use bias (1) or not (0)'),
        'act': ('relu', 'which activation function to use (or None for no activation)'),
        'n_heads': (4, 'number of attention heads for graph attention networks, must be a divisor dim'),
        'alpha': (0.2, 'alpha for leakyrelu in graph attention networks'),
        'double_precision': ('1', 'whether to use double precision'),
        'use_att': (0, 'whether to use hyperbolic attention or not'),
        'local_agg': (0, 'whether to local tangent space aggregation or not'),
        'kernel_size': (6, 'number of kernels'),
        'KP_extent': (0.66, 'influence radius of each kernel point'),
        'radius': (1, 'radius used for kernel point init'),
        'deformable': (False, 'deformable kernel'),
        #'linear_before': (64, 'dim of linear before gcn')#The dimension after linear_before(dimensionality reduction if you would)
        'linear_before': (32, 'dim of linear before gcn')#64
    },
    'data_config': {
        #'dataset': ('pubmed', 'which dataset to use(cornell,wisconsin,texas,squirrel,cora)'),
        'dataset': ('PTC', 'which dataset to use(cornell,wisconsin,texas,squirrel,cora)'),
        'batch_size': (344, 'batch size for gc'),
        'val_prop': (0.05, 'proportion of validation edges for link prediction'),
        'test_prop': (0.1, 'proportion of test edges for link prediction'),
        'use_feats': (1, 'whether to use node features or not'),
        'normalize_feats': (1, 'whether to normalize input node features'),
        'normalize_adj': (1, 'whether to row-normalize the adjacency matrix'),
        'split_seed': (20, 'seed for data splits (train/test/val)'),
        'split_graph': (False, 'whether to split the graph')
    }
}

# 将所有参数转换为 SimpleNamespace
args = SimpleNamespace(
    **{k: v[0] for config in config_args.values() for k, v in config.items()}
)

#choose which manifold class to follow 
if args.use_geoopt == False:
    ManifoldParameter = base_ManifoldParameter
else:
    ManifoldParameter = geoopt_ManifoldParameter
np.random.seed(args.seed)#args.seed
torch.manual_seed(args.seed)#args.seed
if int(args.cuda):#args.double_precision
    torch.set_default_dtype(torch.float64)
if int(args.cuda) >= 0:#args.cuda
    torch.cuda.manual_seed(args.seed)#args.seed
args.device = 'cuda:' + str(args.cuda) if int(args.cuda) >= 0 else 'cpu' #args.device actually,<-args.cuda
args.patience = args.epochs if not args.patience else args.patience #args.patience<-args.epochs|args.patience

print(f'Using: {args.device}')
print("Using seed {}.".format(args.seed))
print(f"Dataset: {args.dataset}")

# Load data
data = load_data(args, os.path.join('data', args.dataset))
if args.task == 'gc':
    args.n_nodes, args.feat_dim = data['features'][0].shape
else:
    args.n_nodes, args.feat_dim = data['features'].shape
if args.task == 'nc':
    Model = NCModel
    args.n_classes = int(data['labels'].max() + 1)
    args.data = data
    print(f'Num classes: {args.n_classes}')
elif args.task == 'gc':
    Model = GCModel
    args.n_classes = int(data['labels'].max() + 1)
    print(f'Num classes: {args.n_classes}')
else:
    args.nb_false_edges = len(data['train_edges_false'])
    args.nb_edges = len(data['train_edges'])
    if args.task == 'lp':
        Model = LPModel
        args.n_classes = 2

if not args.lr_reduce_freq:
    args.lr_reduce_freq = args.epochs


###A simple check on data
print(data.keys())
###A simple check on data

# Model and optimizer
model = Model(args)
print(str(model))
no_decay = ['bias', 'scale']
optimizer_grouped_parameters = [{
    'params': [
        p for n, p in model.named_parameters()
        if p.requires_grad and not any(
            nd in n
            for nd in no_decay) and not isinstance(p, ManifoldParameter)
    ],
    'weight_decay':
    args.weight_decay
}, {
    'params': [
        p for n, p in model.named_parameters() if p.requires_grad and any(
            nd in n
            for nd in no_decay) or isinstance(p, ManifoldParameter)
    ],
    'weight_descay':
    0.0
}]
if args.optimizer == 'radam':
    optimizer = RiemannianAdam(params=optimizer_grouped_parameters,
                                lr=args.lr,
                                stabilize=10)
elif args.optimizer == 'rsgd':
    optimizer = RiemannianSGD(params=optimizer_grouped_parameters,
                                lr=args.lr,
                                stabilize=10)
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                step_size=int(
                                                args.lr_reduce_freq),
                                                gamma=float(args.gamma))
tot_params = sum([np.prod(p.size()) for p in model.parameters()])
model = model.to(args.device)
for x, val in data.items():
    if torch.is_tensor(data[x]):
        data[x] = data[x].to(args.device)
print(f"Total number of parameters: {tot_params}")

# Train model for nc:
t_total = time.time()
counter = 0
best_val_metrics = model.init_metric_dict()
best_test_metrics = None
best_emb = None
if args.n_classes > 2:
    f1_average = 'micro'
else:
    f1_average = 'binary'

print(f"Memory allocated: {torch.cuda.memory_allocated() / 1024 ** 2:.2f} MB")  # 打印当前分配的显存


  from .autonotebook import tqdm as notebook_tqdm


Using: cuda:1
Using seed 20.
Dataset: PTC
loading data
# classes: 2
# maximum node tag: 19
# data: 344
Num classes: 2
dict_keys(['adj_train', 'features', 'labels', 'idx_train', 'idx_val', 'idx_test'])


  adjs.append(sp.csr_matrix(nx.adjacency_matrix(g.g)))


GCModel(
  (encoder): BKNet(
    (linear_before): BLinear(
      in_features=19, out_features=32, c=tensor([1.], device='cuda:1'), use_bias=1, act=None, dropout_rate=0.3
      (dropout): Dropout(p=0.3, inplace=False)
      (E_linear): Linear(in_features=19, out_features=32, bias=False)
    )
    (layers): Sequential(
      (0): KPGraphConvolution(
        (net): KernelPointAggregation(
          (linears): ModuleList(
            (0): BLinear(
              in_features=32, out_features=32, c=tensor([1.], device='cuda:1'), use_bias=1, act=None, dropout_rate=0.3
              (dropout): Dropout(p=0.3, inplace=False)
              (E_linear): Linear(in_features=32, out_features=32, bias=False)
            )
            (1): BLinear(
              in_features=32, out_features=32, c=tensor([1.], device='cuda:1'), use_bias=1, act=None, dropout_rate=0.3
              (dropout): Dropout(p=0.3, inplace=False)
              (E_linear): Linear(in_features=32, out_features=32, bias=False)
        

In [2]:
if args.task == 'gc':
    dataset = GCDataset((data['adj_train'], data['features'], data['labels']), KP=(args.model == 'HKPNet' or args.model == 'BKNet'),
                            normlize=args.normalize_adj, device=args.device)

for epoch in range(args.epochs):
    t = time.time()
    model.train()
    tot_metrics = {'loss': 0, 'acc': 0, 'f1': 0}
    outs = None
    labs = None
    bats = 0
    
    for i in range(0, len(data['idx_train']), args.batch_size):
        optimizer.zero_grad()
        selected_idx = data['idx_train'][i : i + args.batch_size]
        if len(selected_idx) == 0:
            continue
        if args.model == 'HKPNet':
            nei, nei_mask, features, labels, ed_idx = dataset[selected_idx]
            embeddings = model.encode(features, (nei, nei_mask))
        elif args.model == 'BKNet':
            nei, nei_mask, features, labels, ed_idx = dataset[selected_idx]
            embeddings = model.encode(features, (nei, nei_mask))
        else:
            adj, features, labels, ed_idx = dataset[selected_idx]
            embeddings = model.encode(features, adj)
        
        train_metrics = model.compute_metrics(embeddings, labels, ed_idx, type=2)
        tot_metrics['loss'] += train_metrics['loss'].detach().cpu().numpy()
        bats += 1
        train_metrics['loss'].backward()
        
        # 打印每个可训练参数的梯度
        print(f"Epoch {epoch + 1}, Batch {i // args.batch_size + 1}:")
        for name, param in model.named_parameters():
            if param.grad is not None:
                print(f"Parameter: {name}, Gradient: {param.grad.norm().item()}")
        
        if args.grad_clip is not None:
            max_norm = float(args.grad_clip)
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm)
        optimizer.step()
        
        if outs is None:
            outs = train_metrics['output']
        else:
            outs = torch.cat([outs, train_metrics['output']], 0)
        if labs is None:
            labs = labels
        else:
            labs = torch.cat([labs, labels], 0)
    
    lr_scheduler.step()
    tot_metrics['acc'], tot_metrics['f1'] = acc_f1((outs), (labs), f1_average)
    tot_metrics['loss'] /= bats

    if (epoch + 1) % args.log_freq == 0:
        print(" ".join(['Epoch: {:04d}'.format(epoch + 1),
                        'lr: {}'.format(lr_scheduler.get_last_lr()),
                        format_metrics(tot_metrics, 'train'),
                        'time: {:.4f}s'.format(time.time() - t)
                        ]))

    if (epoch + 1) % args.eval_freq == 0:
        model.eval()
        val_metrics = {'loss': 0, 'acc': 0, 'f1': 0}
        outs = None
        labs = None
        bats = 0
        
        for i in range(0, len(data['idx_val']), args.batch_size):
            selected_idx = data['idx_val'][i : i + args.batch_size]
            if len(selected_idx) == 0:
                continue
            if args.model == 'HKPNet':
                nei, nei_mask, features, labels, ed_idx = dataset[selected_idx]
                embeddings = model.encode(features, (nei, nei_mask))
            elif args.model == 'BKNet':
                nei, nei_mask, features, labels, ed_idx = dataset[selected_idx]
                embeddings = model.encode(features, (nei, nei_mask))
            else:
                adj, features, labels, ed_idx = dataset[selected_idx]
                embeddings = model.encode(features, adj)
            
            metrics = model.compute_metrics(embeddings, labels, ed_idx, type=2)
            val_metrics['loss'] += metrics['loss'].detach().cpu().numpy()
            bats += 1
            
            if outs is None:
                outs = metrics['output']
            else:
                outs = torch.cat([outs, metrics['output']], 0)
            if labs is None:
                labs = labels
            else:
                labs = torch.cat([labs, labels], 0)
        
        val_metrics['acc'], val_metrics['f1'] = acc_f1((outs), (labs), f1_average)
        val_metrics['loss'] /= bats
        
        if (epoch + 1) % args.log_freq == 0:
            print(" ".join(['Epoch: {:04d}'.format(epoch + 1), format_metrics(val_metrics, 'val')]))

        if model.has_improved(best_val_metrics, val_metrics):
            best_val_metrics = val_metrics
            best_test_metrics = {'loss': 0, 'acc': 0, 'f1': 0}
            outs = None
            labs = None
            bats = 0
            
            for i in range(0, len(data['idx_test']), args.batch_size):
                selected_idx = data['idx_test'][i : i + args.batch_size]
                if len(selected_idx) == 0:
                    continue
                if args.model == 'HKPNet':
                    nei, nei_mask, features, labels, ed_idx = dataset[selected_idx]
                    embeddings = model.encode(features, (nei, nei_mask))
                elif args.model == 'BKNet':
                    nei, nei_mask, features, labels, ed_idx = dataset[selected_idx]
                    embeddings = model.encode(features, (nei, nei_mask))
                else:
                    adj, features, labels, ed_idx = dataset[selected_idx]
                    embeddings = model.encode(features, adj)
                
                test_metrics = model.compute_metrics(embeddings, labels, ed_idx, type=2)
                best_test_metrics['loss'] += test_metrics['loss'].detach().cpu().numpy()
                bats += 1
                
                if outs is None:
                    outs = test_metrics['output']
                else:
                    outs = torch.cat([outs, test_metrics['output']], 0)
                if labs is None:
                    labs = labels
                else:
                    labs = torch.cat([labs, labels], 0)
            
            best_test_metrics['loss'] /= bats
            if outs is not None:
                best_test_metrics['acc'], best_test_metrics['f1'] = acc_f1((outs), (labs), f1_average)
            counter = 0
        else:
            counter += 1
            if counter == args.patience and epoch > args.min_epochs:
                print("Early stopping")
                break

print("Optimization Finished!")
print("Total time elapsed: {:.4f}s".format(time.time() - t_total))

if best_test_metrics['loss'] == 0:
    model.eval()
    best_test_metrics = {'loss': 0, 'acc': 0, 'f1': 0}
    outs = None
    labs = None
    bats = 0
    
    for i in range(0, len(data['idx_test']), args.batch_size):
        selected_idx = data['idx_test'][i : i + args.batch_size]
        if len(selected_idx) == 0:
            continue
        if args.model == 'HKPNet':
            nei, nei_mask, features, labels, ed_idx = dataset[selected_idx]
            embeddings = model.encode(features, (nei, nei_mask))
        elif args.model == 'BKNet':
            nei, nei_mask, features, labels, ed_idx = dataset[selected_idx]
            embeddings = model.encode(features, (nei, nei_mask))
        else:
            adj, features, labels, ed_idx = dataset[selected_idx]
            embeddings = model.encode(features, adj)
        
        test_metrics = model.compute_metrics(embeddings, labels, ed_idx, type=2)
        best_test_metrics['loss'] += test_metrics['loss'].detach().cpu().numpy()
        bats += 1
        
        if outs is None:
            outs = test_metrics['output']
        else:
            outs = torch.cat([outs, test_metrics['output']], 0)
        if labs is None:
            labs = labels
        else:
            labs = torch.cat([labs, labels], 0)
    
    best_test_metrics['loss'] /= bats
    if outs is not None:
        best_test_metrics['acc'], best_test_metrics['f1'] = acc_f1((outs), (labs), f1_average)

print(" ".join(["Val set results:", format_metrics(best_val_metrics, 'val')]))
print(" ".join(["Test set results:", format_metrics(best_test_metrics, 'test')]))


Epoch 1, Batch 1:
Parameter: encoder.linear_before.bias, Gradient: 0.08408150170085271
Parameter: encoder.linear_before.E_linear.weight, Gradient: 0.15023446712741065
Parameter: encoder.layers.0.net.linears.0.bias, Gradient: 0.0034354669110489935
Parameter: encoder.layers.0.net.linears.0.E_linear.weight, Gradient: 0.032000898437907224
Parameter: encoder.layers.0.net.linears.1.bias, Gradient: 0.003946888639333101
Parameter: encoder.layers.0.net.linears.1.E_linear.weight, Gradient: 0.030693774704803986
Parameter: encoder.layers.0.net.linears.2.bias, Gradient: 3.021955994209549e-153
Parameter: encoder.layers.0.net.linears.2.E_linear.weight, Gradient: 0.033927660423161005
Parameter: encoder.layers.0.net.linears.3.bias, Gradient: 0.0035181417324065692
Parameter: encoder.layers.0.net.linears.3.E_linear.weight, Gradient: 0.03356566499047886
Parameter: encoder.layers.0.net.linears.4.bias, Gradient: 0.003947037008060052
Parameter: encoder.layers.0.net.linears.4.E_linear.weight, Gradient: 0.0314

In [2]:
if args.task == 'gc':
    dataset = GCDataset((data['adj_train'], data['features'], data['labels']), KP=(args.model == 'HKPNet' or args.model == 'BKNet'),
                            normlize=args.normalize_adj, device=args.device)
"""
#This is the old train scripts

for epoch in range(args.epochs):
    t = time.time()
    model.train()
    tot_metrics = {'loss': 0, 'acc': 0, 'f1': 0}
    outs = None
    labs = None
    bats = 0
    for i in range(0, len(data['idx_train']), args.batch_size):
        optimizer.zero_grad()
        selected_idx = data['idx_train'][i : i + args.batch_size]
        if len(selected_idx) == 0:
            continue
        if args.model == 'HKPNet':
            # Note ed_idx seems to be the index of graphs
            nei, nei_mask, features, labels, ed_idx = dataset[selected_idx]
            embeddings = model.encode(features, (nei, nei_mask))
        elif args.model == 'BKNet':
            # shape(nei/nei_mask)=(sum(n),max_nei_num), like concat all graph neibor together
            # It works if every node on every graph is represented by different numbers yeah
            nei, nei_mask, features, labels, ed_idx = dataset[selected_idx]
            embeddings = model.encode(features, (nei, nei_mask))
        else:
            adj, features, labels, ed_idx = dataset[selected_idx]
            embeddings = model.encode(features, adj)
        # print(embeddings.shape,labels.shape,ed_idx.shape,len(ed_idx))
        train_metrics = model.compute_metrics(embeddings, labels, ed_idx, type=2)
        tot_metrics['loss'] += train_metrics['loss'].detach().cpu().numpy()
        bats += 1
        train_metrics['loss'].backward()
        
        # Print gradients of each trainable parameter
        print(f"Epoch {epoch + 1}, Batch {i//args.batch_size + 1}:")
        for name, param in model.named_parameters():
            if param.grad is not None:
                print(f"Parameter: {name}, Gradient: {param.grad.norm().item()}")

        if args.grad_clip is not None:
            max_norm = float(args.grad_clip)
            all_params = list(model.parameters())
            for param in all_params:
                torch.nn.utils.clip_grad_norm_(param, max_norm)
        optimizer.step()

        if outs is None:
            outs = train_metrics['output']
        else:
            outs = torch.cat([outs, train_metrics['output']], 0)
        if labs is None:
            labs = labels
        else:
            labs = torch.cat([labs, labels], 0)
    lr_scheduler.step()
    tot_metrics['acc'], tot_metrics['f1'] = acc_f1((outs), (labs), f1_average)
    tot_metrics['loss'] /= bats

    if (epoch + 1) % args.log_freq == 0:
        print(" ".join(['Epoch: {:04d}'.format(epoch + 1),
                        'lr: {}'.format(lr_scheduler.get_last_lr()),
                        format_metrics(tot_metrics, 'train'),
                        'time: {:.4f}s'.format(time.time() - t)
                        ]))
    if (epoch + 1) % args.eval_freq == 0:
        model.eval()
        tot_metrics = {'loss': 0, 'acc': 0, 'f1': 0}
        minibatch = args.batch_size
        outs = None
        labs = None
        bats = 0
        for i in range(0, len(data['idx_val']), minibatch):
            selected_idx = data['idx_val'][i : i + minibatch]
            if len(selected_idx) == 0:
                continue
            if args.model == 'HKPNet':
                nei, nei_mask, features, labels, ed_idx = dataset[selected_idx]
                embeddings = model.encode(features, (nei, nei_mask))
            elif args.model == 'BKNet':
                nei, nei_mask, features, labels, ed_idx = dataset[selected_idx]
                embeddings = model.encode(features, (nei, nei_mask))
            else:
                adj, features, labels, ed_idx = dataset[selected_idx]
                embeddings = model.encode(features, adj)
            val_metrics = model.compute_metrics(embeddings, labels, ed_idx, type=2)
            tot_metrics['loss'] += val_metrics['loss'].detach().cpu().numpy()
            bats += 1
            if outs is None:
                outs = val_metrics['output']
            else:
                outs = torch.cat([outs, val_metrics['output']], 0)
            if labs is None:
                labs = labels
            else:
                labs = torch.cat([labs, labels], 0)
        tot_metrics['acc'], tot_metrics['f1'] = acc_f1((outs), (labs), f1_average)
        tot_metrics['loss'] /= bats
        if (epoch + 1) % args.log_freq == 0:
            print(" ".join(['Epoch: {:04d}'.format(epoch + 1), format_metrics(tot_metrics, 'val')]))
        if model.has_improved(best_val_metrics, tot_metrics):
            best_val_metrics = tot_metrics
            best_test_metrics = {'loss': 0, 'acc': 0, 'f1': 0}
            minibatch = args.batch_size
            outs = None
            labs = None
            bats = 0
            for i in range(0, len(data['idx_test']), minibatch):
                selected_idx = data['idx_test'][i : i + minibatch]
                if len(selected_idx) == 0:
                    continue
                if args.model == 'HKPNet':
                    nei, nei_mask, features, labels, ed_idx = dataset[selected_idx]
                    embeddings = model.encode(features, (nei, nei_mask))
                elif args.model == 'BKNet':
                    print('coming here')
                    nei, nei_mask, features, labels, ed_idx = dataset[selected_idx]
                    embeddings = model.encode(features, (nei, nei_mask))
                else:
                    adj, features, labels, ed_idx = dataset[selected_idx]
                    embeddings = model.encode(features, adj)
                test_metrics = model.compute_metrics(embeddings, labels, ed_idx, type=2)

                bats += 1
                #print("test_metrics['loss']:", test_metrics['loss'])
                tot_metrics['loss'] += test_metrics['loss'].detach().cpu().numpy()
                if outs is None:
                    outs = test_metrics['output']
                else:
                    outs = torch.cat([outs, test_metrics['output']], 0)
                if labs is None:
                    labs = labels
                else:
                    labs = torch.cat([labs, labels], 0)
            
            print("tot_metrics['loss']:",tot_metrics['loss'])
            if outs is None:
                best_test_metrics = best_val_metrics
            else:
                best_val_metrics = tot_metrics
                best_test_metrics['acc'], best_test_metrics['f1'] = acc_f1((outs), (labs), f1_average)
                #print('Coming to the second place',best_test_metrics['loss'])
                best_test_metrics['loss'] /= bats
            counter = 0
        else:
            counter += 1
            if counter == args.patience and epoch > args.min_epochs:
                print("Early stopping")
                break
                
print("Optimization Finished!")
print("Total time elapsed: {:.4f}s".format(time.time() - t_total))
if not best_test_metrics:
    model.eval()
    best_test_metrics = {'loss': 0, 'acc': 0, 'f1': 0}
    tot_metrics = {'loss': 0, 'acc': 0, 'f1': 0}
    minibatch = args.batch_size
    outs = None
    labs = None
    bats = 0
    for i in range(0, len(data['idx_test']), minibatch):
        selected_idx = data['idx_test'][i : i + minibatch]
        if len(selected_idx) == 0:
            continue
        if args.model == 'HKPNet':
            nei, nei_mask, features, labels, ed_idx = dataset[selected_idx]
            embeddings = model.encode(features, (nei, nei_mask))
        elif args.model == 'BKNet':
            nei, nei_mask, features, labels, ed_idx = dataset[selected_idx]
            embeddings = model.encode(features, (nei, nei_mask))
        else:
            adj, features, labels, ed_idx = dataset[selected_idx]
            embeddings = model.encode(features, adj)

        test_metrics = model.compute_metrics(embeddings, labels, ed_idx, type=2)

        bats += 1
        tot_metrics['loss'] += test_metrics['loss'].detach().cpu().numpy()
        if outs is None:
            outs = test_metrics['output']
        else:
            outs = torch.cat([outs, test_metrics['output']], 0)
        if labs is None:
            labs = labels
        else:
            labs = torch.cat([labs, labels], 0)
    if outs is None:
        best_test_metrics = best_val_metrics
    else:
        best_val_metrics = tot_metrics
        best_test_metrics['acc'], best_test_metrics['f1'] = acc_f1((outs), (labs), f1_average)
        best_test_metrics['loss'] /= bats

print(" ".join(["Val set results:", format_metrics(best_val_metrics, 'val')]))
print(" ".join(["Test set results:", format_metrics(best_test_metrics, 'test')]))
"

Epoch 1, Batch 1:
Parameter: encoder.linear_before.bias, Gradient: 0.10144904025803587
Parameter: encoder.linear_before.E_linear.weight, Gradient: 0.037934807836700106
Parameter: encoder.layers.0.net.linears.0.bias, Gradient: 0.030856952560695577
Parameter: encoder.layers.0.net.linears.0.E_linear.weight, Gradient: 0.024039217029188084
Parameter: encoder.layers.0.net.linears.1.bias, Gradient: 0.04266819347199047
Parameter: encoder.layers.0.net.linears.1.E_linear.weight, Gradient: 0.0230375484261579
Parameter: encoder.layers.0.net.linears.2.bias, Gradient: 0.042489672384281814
Parameter: encoder.layers.0.net.linears.2.E_linear.weight, Gradient: 0.026938684278866094
Parameter: encoder.layers.0.net.linears.3.bias, Gradient: 0.04241248764698544
Parameter: encoder.layers.0.net.linears.3.E_linear.weight, Gradient: 0.025049108396838233
Parameter: encoder.layers.0.net.linears.4.bias, Gradient: 0.04249341395803177
Parameter: encoder.layers.0.net.linears.4.E_linear.weight, Gradient: 0.02421750522

KeyboardInterrupt: 