In [27]:
import argparse
from types import SimpleNamespace
import sys
sys.path.append('/data/lige/HKN')# Please change accordingly!

from __future__ import division
from __future__ import print_function

from geoopt import ManifoldParameter as geoopt_ManifoldParameter
from manifolds.base import ManifoldParameter as base_ManifoldParameter

import datetime
import json
import logging
from optim import RiemannianAdam, RiemannianSGD
import os
import pickle
import time

import numpy as np
import torch
from config import parser
from models.base_models import NCModel, LPModel, GCModel
from utils.data_utils import load_data, get_nei, GCDataset, split_batch
from utils.train_utils import get_dir_name, format_metrics
from utils.eval_utils import acc_f1

from geoopt import ManifoldParameter as geoopt_ManifoldParameter
from manifolds.base import ManifoldParameter as base_ManifoldParameter

#import torch.nn.functional as F


config_args = {
    'training_config': {
        'use_geoopt': (False, "which manifold class to use, if false then use basd.manifold"),
        'lr': (1e-4, 'learning rate'),
        'dropout': (0.25, 'dropout probability'),
        'cuda': (0, 'which cuda device to use (-1 for cpu training)'),
        'epochs': (1000, 'maximum number of epochs to train for'),
        'weight_decay': (1e-3, 'l2 regularization strength'),
        'optimizer': ('radam', 'which optimizer to use, can be any of [rsgd, radam]'),
        'momentum': (0.999, 'momentum in optimizer'),
        'patience': (15, 'patience for early stopping'),
        'seed': (1234, 'seed for training'),
        'log_freq': (5, 'how often to compute print train/val metrics (in epochs)'),
        'eval_freq': (1, 'how often to compute val metrics (in epochs)'),
        'save': (0, '1 to save model and logs and 0 otherwise'),
        'save_dir': (None, 'path to save training logs and model weights (defaults to logs/task/date/run/)'),
        'sweep_c': (0, ''),
        'lr_reduce_freq': (None, 'reduce lr every lr-reduce-freq or None to keep lr constant'),
        'gamma': (0.5, 'gamma for lr scheduler'),
        'print_epoch': (True, ''),
        'grad_clip': (None, 'max norm for gradient clipping, or None for no gradient clipping'),
        'min_epochs': (300, 'do not early stop before min-epochs')
    },
    'model_config': {
        'task': ('nc', 'which tasks to train on, can be any of [lp, nc]'),
        'model': ('BMLP', 'which encoder to use, can be any of [Shallow, MLP, HNN, GCN, GAT, HyperGCN, HyboNet,BKNet,BMLP]'),
        'dim': (64, 'embedding dimension'),
        'manifold': ('PoincareBall', 'which manifold to use, can be any of [Euclidean, Hyperboloid, PoincareBall, Lorentz]'),
        'c': (1.0, 'hyperbolic radius, set to None for trainable curvature'),
        'r': (2., 'fermi-dirac decoder parameter for lp'),
        't': (1., 'fermi-dirac decoder parameter for lp'),
        'margin': (2., 'margin of MarginLoss'),
        'pretrained_embeddings': (None, 'path to pretrained embeddings (.npy file) for Shallow node classification'),
        'pos_weight': (0, 'whether to upweight positive class in node classification tasks'),
        'num_layers': (2, 'number of hidden layers in encoder'),
        'bias': (1, 'whether to use bias (1) or not (0)'),
        'act': ('relu', 'which activation function to use (or None for no activation)'),
        'n_heads': (4, 'number of attention heads for graph attention networks, must be a divisor dim'),
        'alpha': (0.2, 'alpha for leakyrelu in graph attention networks'),
        'double_precision': ('1', 'whether to use double precision'),
        'use_att': (0, 'whether to use hyperbolic attention or not'),
        'local_agg': (0, 'whether to local tangent space aggregation or not'),
        'kernel_size': (8, 'number of kernels'),
        'KP_extent': (0.66, 'influence radius of each kernel point'),
        'radius': (1, 'radius used for kernel point init'),
        'deformable': (False, 'deformable kernel'),
        'linear_before': (64, 'dim of linear before gcn')#64
    },
    'data_config': {
        'dataset': ('wisconsin', 'which dataset to use(cornell,wisconsin,squirrel,cora)'),
        'batch_size': (32, 'batch size for gc'),
        'val_prop': (0.05, 'proportion of validation edges for link prediction'),
        'test_prop': (0.1, 'proportion of test edges for link prediction'),
        'use_feats': (1, 'whether to use node features or not'),
        'normalize_feats': (1, 'whether to normalize input node features'),
        'normalize_adj': (1, 'whether to row-normalize the adjacency matrix'),
        'split_seed': (1234, 'seed for data splits (train/test/val)'),
        'split_graph': (False, 'whether to split the graph')
    }
}

# 将所有参数转换为 SimpleNamespace
args = SimpleNamespace(
    **{k: v[0] for config in config_args.values() for k, v in config.items()}
)

#choose which manifold class to follow 
if args.use_geoopt == False:
    ManifoldParameter = base_ManifoldParameter
else:
    ManifoldParameter = geoopt_ManifoldParameter
np.random.seed(args.seed)#args.seed
torch.manual_seed(args.seed)#args.seed
if int(args.cuda):#args.double_precision
    torch.set_default_dtype(torch.float64)
if int(args.cuda) >= 0:#args.cuda
    torch.cuda.manual_seed(args.seed)#args.seed
args.device = 'cuda:' + str(args.cuda) if int(args.cuda) >= 0 else 'cpu' #args.device actually,<-args.cuda
args.patience = args.epochs if not args.patience else args.patience #args.patience<-args.epochs|args.patience

print(f'Using: {args.device}')
print("Using seed {}.".format(args.seed))
print(f"Dataset: {args.dataset}")

# Load data
data = load_data(args, os.path.join('data', args.dataset))
if args.task == 'gc':
    args.n_nodes, args.feat_dim = data['features'][0].shape
else:
    args.n_nodes, args.feat_dim = data['features'].shape
if args.task == 'nc':
    Model = NCModel
    args.n_classes = int(data['labels'].max() + 1)
    args.data = data
    print(f'Num classes: {args.n_classes}')
elif args.task == 'gc':
    Model = GCModel
    args.n_classes = int(data['labels'].max() + 1)
    print(f'Num classes: {args.n_classes}')
else:
    args.nb_false_edges = len(data['train_edges_false'])
    args.nb_edges = len(data['train_edges'])
    if args.task == 'lp':
        Model = LPModel
        args.n_classes = 2

if not args.lr_reduce_freq:
    args.lr_reduce_freq = args.epochs


###A simple check on data
print(data.keys())
print(data['adj_train'].todense().shape)
print(data['features'].shape)
###A simple check on data

# Model and optimizer
model = Model(args)
print(str(model))
no_decay = ['bias', 'scale']
optimizer_grouped_parameters = [{
    'params': [
        p for n, p in model.named_parameters()
        if p.requires_grad and not any(
            nd in n
            for nd in no_decay) and not isinstance(p, ManifoldParameter)
    ],
    'weight_decay':
    args.weight_decay
}, {
    'params': [
        p for n, p in model.named_parameters() if p.requires_grad and any(
            nd in n
            for nd in no_decay) or isinstance(p, ManifoldParameter)
    ],
    'weight_decay':
    0.0
}]
if args.optimizer == 'radam':
    optimizer = RiemannianAdam(params=optimizer_grouped_parameters,
                                lr=args.lr,
                                stabilize=10)
elif args.optimizer == 'rsgd':
    optimizer = RiemannianSGD(params=optimizer_grouped_parameters,
                                lr=args.lr,
                                stabilize=10)
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                step_size=int(
                                                args.lr_reduce_freq),
                                                gamma=float(args.gamma))
tot_params = sum([np.prod(p.size()) for p in model.parameters()])
model = model.to(args.device)
for x, val in data.items():
    if torch.is_tensor(data[x]):
        data[x] = data[x].to(args.device)
print(f"Total number of parameters: {tot_params}")

# Train model for nc:
t_total = time.time()
counter = 0
best_val_metrics = model.init_metric_dict()
best_test_metrics = None
best_emb = None
if args.n_classes > 2:
    f1_average = 'micro'
else:
    f1_average = 'binary'

if args.model == 'HKPNet':
    nei, nei_mask = get_nei(data['adj_train'])
    nei = nei.to(args.device)
    nei_mask = nei_mask.to(args.device)
elif args.model == 'BKNet':
    nei, nei_mask = get_nei(data['adj_train'])
    nei = nei.to(args.device)
    nei_mask = nei_mask.to(args.device) #nei/nei_mask on cuda now

Using: cuda:0
Using seed 1234.
Dataset: wisconsin
Num classes: 5
dict_keys(['adj_train', 'features', 'labels', 'idx_train', 'idx_val', 'idx_test', 'adj_train_norm'])
(251, 251)
torch.Size([251, 1703])
NCModel(
  (encoder): BMLP(
    (layers): Sequential(
      (0): BMLP(
        (linear): BLinear(
          in_features=1703, out_features=64, c=tensor([1.], device='cuda:0'), use_bias=1, act=<function relu at 0x7f11307432e0>, dropout_rate=0.25
          (E_linear): Linear(in_features=1703, out_features=64, bias=True)
          (dropout): Dropout(p=0.25, inplace=False)
        )
      )
    )
  )
  (decoder): PoincareDecoder()
)
Total number of parameters: 109445


In [28]:
if config_args['model_config']['model'][0] == 'BMLP':
    for name, param in model.named_parameters():
        if param.requires_grad:
            print(name, param.data)
    print(model.encode(data['features'], data['adj_train_norm']))#[2]
    for name, param in model.named_parameters():
        print(name, param.requires_grad)  # 确保所有参数的 requires_grad 都是 True
elif config_args['model_config']['model'][0] == 'BKNet':
    for name, param in model.named_parameters():
        if param.requires_grad:
            print(name, param.data)
    print(model.encode(data['features'], (nei, nei_mask)))#[2]
    for name, param in model.named_parameters():
        print(name, param.requires_grad)  # 确保所有参数的 requires_grad 都是 True


encoder.layers.0.linear.E_linear.weight tensor([[ 4.1756e-02, -1.8398e-02,  5.8131e-02,  ..., -7.5607e-02,
         -7.7221e-02,  6.1167e-02],
        [ 6.5736e-02, -4.8146e-02, -6.1437e-02,  ..., -9.6945e-03,
         -7.6701e-02,  4.9474e-05],
        [ 3.6671e-02, -2.2466e-02, -4.6759e-02,  ...,  5.6713e-02,
         -3.2809e-02,  3.4089e-02],
        ...,
        [ 3.2771e-02, -8.7429e-04,  1.9891e-02,  ..., -5.7626e-02,
         -5.3839e-02, -6.5566e-02],
        [-6.9515e-02,  3.3123e-02,  6.7455e-02,  ..., -6.1532e-02,
         -3.4309e-02, -2.0445e-02],
        [-7.1281e-02, -3.0733e-02,  4.4339e-02,  ...,  4.7381e-02,
         -5.6454e-02, -9.1399e-03]], device='cuda:0', dtype=torch.float64)
encoder.layers.0.linear.E_linear.bias tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 

In [29]:
decoded_values=[]
embedded_values=[]
model_linear_weight=[]

# 检查权重更新
def check_weights(model, epoch):
    print(f"Epoch {epoch + 1} - Model Linear Weight:")
    for name, param in model.named_parameters():
        if 'weight' in name:
            print(f"{name}: {param.data}")

for epoch in range(args.epochs):
    t = time.time()
    model.train()
    optimizer.zero_grad()
    if args.model == 'HKPNet':
        embeddings = model.encode(data['features'], (nei, nei_mask))
    elif args.model == 'BKNet':
        embeddings = model.encode(data['features'], (nei, nei_mask))
    else:
        embeddings = model.encode(data['features'], data['adj_train_norm'])

    idx=data[f'idx_train']
    output=model.decode(embeddings, data['adj_train_norm'], idx)
    train_metrics = model.compute_metrics(embeddings, data, 'train')
    
    # 检查 decoded_values 和 embeddings
    embedded_values.append(embeddings)
    #idx = data[f'idx_train']
    #output = model.decode(embeddings, data['adj_train_norm'], idx)
    decoded_values.append(output)
    model_linear_weight.append(model.encoder.layers[0].linear.E_linear.weight.clone())
    
    # 检查梯度是否被正确计算
    #train_metrics['loss'].backward()
    loss = F.cross_entropy(output, data['labels'][idx])
    loss.backward()

    for name, param in model.named_parameters():
        if param.grad is not None:
            print(f"Grad of {name}: {param.grad.abs().mean()}")
        else:
            print(f"Grad of {name}: None")  # 检查梯度是否为 None
    
    if args.grad_clip is not None:
        torch.nn.utils.clip_grad_norm_(model.parameters(), args.grad_clip)
    optimizer.step()
    lr_scheduler.step()
    
    # 打印和检查权重是否被更新
    check_weights(model, epoch)
    
    if (epoch + 1) % args.log_freq == 0:
        print(" ".join([
            'Epoch: {:04d}'.format(epoch + 1),
            'lr: {}'.format(lr_scheduler.get_last_lr()),
            format_metrics(train_metrics, 'train'),
            'time: {:.4f}s'.format(time.time() - t)
        ]))
    
    with torch.no_grad():
        if (epoch + 1) % args.eval_freq == 0:
            model.eval()
            if args.model == 'HKPNet':
                embeddings = model.encode(data['features'], (nei, nei_mask))
            elif args.model == 'BKNet':
                embeddings = model.encode(data['features'], (nei, nei_mask))
            else:
                embeddings = model.encode(data['features'],
                                        data['adj_train_norm'])
            val_metrics = model.compute_metrics(embeddings, data, 'val')
            if (epoch + 1) % args.log_freq == 0:
                print(" ".join([
                    'Epoch: {:04d}'.format(epoch + 1),
                    format_metrics(val_metrics, 'val')
                ]))
            if model.has_improved(best_val_metrics, val_metrics):
                best_test_metrics = model.compute_metrics(
                    embeddings, data, 'test')
                best_emb = embeddings.cpu()
                if args.save:
                    np.save(os.path.join(save_dir, 'embeddings.npy'),
                            best_emb.detach().numpy())
                best_val_metrics = val_metrics
                counter = 0
            else:
                counter += 1
                if counter == args.patience and epoch > args.min_epochs:
                    print("Early stopping")
                    break

print("Optimization Finished!")
print("Total time elapsed: {:.4f}s".format(time.time() - t_total))
if not best_test_metrics:
    model.eval()
    best_emb = model.encode(data['features'], data['adj_train_norm'])
    best_test_metrics = model.compute_metrics(best_emb, data, 'test')
print(" ".join(
    ["Val set results:",
    format_metrics(best_val_metrics, 'val')]))
print(" ".join(
    ["Test set results:",
    format_metrics(best_test_metrics, 'test')]))
if args.save:
    np.save(os.path.join(save_dir, 'embeddings.npy'),
            best_emb.cpu().detach().numpy())
    if hasattr(model.encoder, 'att_adj'):
        filename = os.path.join(save_dir, args.dataset + '_att_adj.p')
        pickle.dump(model.encoder.att_adj.cpu().to_dense(),
                    open(filename, 'wb'))
        print('Dumped attention adj: ' + filename)

    torch.save(model.state_dict(), os.path.join(save_dir, 'model.pth'))
    json.dump(vars(args), open(os.path.join(save_dir, 'config.json'), 'w'))
    logging.info(f"Saved model in {save_dir}")


Grad of encoder.layers.0.linear.E_linear.weight: 3.191465245646017e-05
Grad of encoder.layers.0.linear.E_linear.bias: 0.03859303847131641
Grad of decoder.origin: None
Grad of decoder.cls: 0.0638054153155486
Grad of decoder.bias: 0.1329139918088913
Epoch 1 - Model Linear Weight:
encoder.layers.0.linear.E_linear.weight: tensor([[ 4.1756e-02, -1.8398e-02,  5.8131e-02,  ..., -7.5510e-02,
         -7.7321e-02,  6.1167e-02],
        [ 6.5736e-02, -4.8146e-02, -6.1437e-02,  ..., -9.7940e-03,
         -7.6801e-02, -5.0412e-05],
        [ 3.6771e-02, -2.2368e-02, -4.6759e-02,  ...,  5.6812e-02,
         -3.2709e-02,  3.4089e-02],
        ...,
        [ 3.2671e-02, -7.7446e-04,  1.9891e-02,  ..., -5.7526e-02,
         -5.3739e-02, -6.5666e-02],
        [-6.9515e-02,  3.3024e-02,  6.7455e-02,  ..., -6.1623e-02,
         -3.4409e-02, -2.0346e-02],
        [-7.1281e-02, -3.0633e-02,  4.4339e-02,  ...,  4.7282e-02,
         -5.6354e-02, -9.1399e-03]], device='cuda:0', dtype=torch.float64)
Grad of en

In [30]:
for epoch in range(args.epochs):
    t = time.time()
    model.train()
    optimizer.zero_grad()
    if args.model == 'HKPNet':
        embeddings = model.encode(data['features'], (nei, nei_mask))
    elif args.model == 'BKNet':
        embeddings = model.encode(data['features'], (nei, nei_mask))
    else:
        embeddings = model.encode(data['features'], data['adj_train_norm'])

    #idx=data[f'idx_train']
    #output=model.decode(embeddings, data['adj_train_norm'], idx)
    train_metrics = model.compute_metrics(embeddings, data, 'train')
    
    # 检查 decoded_values 和 embeddings
    #embedded_values.append(embeddings)
    #idx = data[f'idx_train']
    #output = model.decode(embeddings, data['adj_train_norm'], idx)
    #decoded_values.append(output)
    #model_linear_weight.append(model.encoder.layers[0].linear.E_linear.weight.clone())
    
    # 检查梯度是否被正确计算
    train_metrics['loss'].backward()
    #loss = F.cross_entropy(output, data['labels'][idx])
    #loss.backward()

    #for name, param in model.named_parameters():
        #if param.grad is not None:
            #print(f"Grad of {name}: {param.grad.abs().mean()}")
        #else:
            #print(f"Grad of {name}: None")  # 检查梯度是否为 None
    
    if args.grad_clip is not None:
        torch.nn.utils.clip_grad_norm_(model.parameters(), args.grad_clip)
        
    optimizer.step()
    lr_scheduler.step()
    
    # 打印和检查权重是否被更新
    #check_weights(model, epoch)
    
    if (epoch + 1) % args.log_freq == 0:
        print(" ".join([
            'Epoch: {:04d}'.format(epoch + 1),
            'lr: {}'.format(lr_scheduler.get_last_lr()),
            format_metrics(train_metrics, 'train'),
            'time: {:.4f}s'.format(time.time() - t)
        ]))
    
    with torch.no_grad():
        if (epoch + 1) % args.eval_freq == 0:
            model.eval()
            if args.model == 'HKPNet':
                embeddings = model.encode(data['features'], (nei, nei_mask))
            elif args.model == 'BKNet':
                embeddings = model.encode(data['features'], (nei, nei_mask))
            else:
                embeddings = model.encode(data['features'],
                                        data['adj_train_norm'])
            val_metrics = model.compute_metrics(embeddings, data, 'val')
            if (epoch + 1) % args.log_freq == 0:
                print(" ".join([
                    'Epoch: {:04d}'.format(epoch + 1),
                    format_metrics(val_metrics, 'val')
                ]))
            if model.has_improved(best_val_metrics, val_metrics):
                best_test_metrics = model.compute_metrics(
                    embeddings, data, 'test')
                best_emb = embeddings.cpu()
                if args.save:
                    np.save(os.path.join(save_dir, 'embeddings.npy'),
                            best_emb.detach().numpy())
                best_val_metrics = val_metrics
                counter = 0
            else:
                counter += 1
                if counter == args.patience and epoch > args.min_epochs:
                    print("Early stopping")
                    break

print("Optimization Finished!")
print("Total time elapsed: {:.4f}s".format(time.time() - t_total))
if not best_test_metrics:
    model.eval()
    best_emb = model.encode(data['features'], data['adj_train_norm'])
    best_test_metrics = model.compute_metrics(best_emb, data, 'test')
print(" ".join(
    ["Val set results:",
    format_metrics(best_val_metrics, 'val')]))
print(" ".join(
    ["Test set results:",
    format_metrics(best_test_metrics, 'test')]))
if args.save:
    np.save(os.path.join(save_dir, 'embeddings.npy'),
            best_emb.cpu().detach().numpy())
    if hasattr(model.encoder, 'att_adj'):
        filename = os.path.join(save_dir, args.dataset + '_att_adj.p')
        pickle.dump(model.encoder.att_adj.cpu().to_dense(),
                    open(filename, 'wb'))
        print('Dumped attention adj: ' + filename)

    torch.save(model.state_dict(), os.path.join(save_dir, 'model.pth'))
    json.dump(vars(args), open(os.path.join(save_dir, 'config.json'), 'w'))
    logging.info(f"Saved model in {save_dir}")


Epoch: 0005 lr: [5e-05, 5e-05] train_loss: 1.067529 train_acc: 0.531847 train_f1: 0.531847 time: 0.0049s
Epoch: 0005 val_loss: 0.993680 val_acc: 0.592593 val_f1: 0.592593
Epoch: 0010 lr: [5e-05, 5e-05] train_loss: 1.060791 train_acc: 0.531847 train_f1: 0.531847 time: 0.0046s
Epoch: 0010 val_loss: 0.984395 val_acc: 0.592593 val_f1: 0.592593
Epoch: 0015 lr: [5e-05, 5e-05] train_loss: 1.063553 train_acc: 0.531847 train_f1: 0.531847 time: 0.0047s
Epoch: 0015 val_loss: 0.984270 val_acc: 0.592593 val_f1: 0.592593
Epoch: 0020 lr: [5e-05, 5e-05] train_loss: 1.054629 train_acc: 0.531847 train_f1: 0.531847 time: 0.0045s
Epoch: 0020 val_loss: 0.985793 val_acc: 0.592593 val_f1: 0.592593
Epoch: 0025 lr: [5e-05, 5e-05] train_loss: 1.057709 train_acc: 0.531847 train_f1: 0.531847 time: 0.0045s
Epoch: 0025 val_loss: 0.992272 val_acc: 0.592593 val_f1: 0.592593
Epoch: 0030 lr: [5e-05, 5e-05] train_loss: 1.055495 train_acc: 0.531847 train_f1: 0.531847 time: 0.0045s
Epoch: 0030 val_loss: 0.988252 val_acc: 

In [31]:
print(data['labels'][idx].shape)
data['labels'][idx]

torch.Size([314])


tensor([2, 4, 2, 1, 2, 2, 2, 4, 1, 1, 2, 4, 2, 3, 2, 2, 2, 3, 2, 2, 1, 3, 2, 4,
        2, 3, 2, 1, 3, 2, 3, 2, 2, 3, 2, 2, 2, 2, 1, 2, 1, 2, 3, 3, 4, 2, 2, 4,
        2, 3, 1, 1, 2, 2, 2, 1, 2, 2, 3, 1, 1, 4, 3, 1, 2, 2, 1, 2, 3, 4, 3, 4,
        3, 2, 2, 3, 1, 2, 2, 3, 2, 2, 4, 2, 2, 2, 2, 2, 4, 1, 2, 2, 1, 2, 4, 3,
        2, 1, 1, 2, 4, 4, 2, 2, 2, 2, 1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 2, 1, 1, 1,
        2, 2, 2, 2, 2, 1, 1, 2, 4, 2, 1, 1, 1, 2, 1, 1, 2, 1, 1, 2, 2, 1, 2, 2,
        1, 3, 2, 4, 1, 2, 2, 1, 2, 2, 3, 1, 2, 1, 1, 1, 4, 4, 1, 2, 1, 2, 2, 1,
        2, 1, 3, 3, 1, 2, 3, 2, 2, 1, 4, 1, 3, 4, 1, 3, 3, 2, 1, 3, 3, 4, 2, 2,
        2, 2, 0, 2, 3, 3, 4, 2, 2, 4, 3, 3, 2, 4, 2, 0, 4, 3, 3, 4, 2, 2, 2, 4,
        2, 4, 2, 2, 2, 2, 2, 2, 2, 4, 2, 4, 2, 2, 2, 2, 2, 3, 2, 2, 2, 3, 3, 0,
        0, 2, 2, 2, 2, 2, 2, 2, 4, 2, 2, 2, 2, 2, 2, 3, 3, 2, 2, 2, 3, 2, 2, 2,
        2, 3, 3, 2, 2, 2, 2, 3, 0, 2, 2, 2, 2, 2, 0, 3, 2, 2, 3, 2, 2, 2, 3, 3,
        4, 2, 0, 4, 2, 2, 2, 2, 3, 3, 2,

In [32]:
print(output.shape)
output

torch.Size([314, 5])


tensor([[1.6154, 3.2373, 5.0736, 3.1639, 2.8448],
        [1.5120, 3.0960, 4.7351, 3.2128, 3.0585],
        [1.4549, 3.3854, 4.7049, 3.0955, 2.8074],
        ...,
        [1.4449, 3.1963, 4.7316, 3.2006, 2.8304],
        [1.4641, 3.3139, 4.7305, 3.0647, 2.8652],
        [1.6434, 3.2017, 5.0942, 3.1965, 2.9009]], device='cuda:0',
       dtype=torch.float64, grad_fn=<IndexBackward0>)

In [33]:
embedded_values

[tensor([[0.0000, 0.0030, 0.0067,  ..., 0.0001, 0.0052, 0.0009],
         [0.0000, 0.0032, 0.0000,  ..., 0.0000, 0.0005, 0.0000],
         [0.0000, 0.0000, 0.0000,  ..., 0.0021, 0.0026, 0.0000],
         ...,
         [0.0000, 0.0000, 0.0000,  ..., 0.0102, 0.0000, 0.0021],
         [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0025, 0.0042],
         [0.0000, 0.0043, 0.0082,  ..., 0.0000, 0.0000, 0.0072]],
        device='cuda:0', dtype=torch.float64, grad_fn=<WhereBackward0>),
 tensor([[0.0000e+00, 5.1564e-03, 1.0330e-03,  ..., 1.3714e-03, 1.1053e-02,
          0.0000e+00],
         [0.0000e+00, 0.0000e+00, 0.0000e+00,  ..., 0.0000e+00, 5.4929e-03,
          0.0000e+00],
         [0.0000e+00, 0.0000e+00, 0.0000e+00,  ..., 1.7568e-03, 4.2233e-03,
          0.0000e+00],
         ...,
         [0.0000e+00, 0.0000e+00, 0.0000e+00,  ..., 8.7554e-03, 0.0000e+00,
          5.5732e-03],
         [0.0000e+00, 6.7576e-04, 0.0000e+00,  ..., 0.0000e+00, 9.5988e-05,
          4.0005e-03],
         [0.0

# Now let's use this linear transformation for BKNet

In [3]:
import argparse
from types import SimpleNamespace
import sys
sys.path.append('/data/lige/HKN')# Please change accordingly!

from __future__ import division
from __future__ import print_function

from geoopt import ManifoldParameter as geoopt_ManifoldParameter
from manifolds.base import ManifoldParameter as base_ManifoldParameter

import datetime
import json
import logging
from optim import RiemannianAdam, RiemannianSGD
import os
import pickle
import time

import numpy as np
import torch
from config import parser
from models.base_models import NCModel, LPModel, GCModel
from utils.data_utils import load_data, get_nei, GCDataset, split_batch
from utils.train_utils import get_dir_name, format_metrics
from utils.eval_utils import acc_f1

from geoopt import ManifoldParameter as geoopt_ManifoldParameter
from manifolds.base import ManifoldParameter as base_ManifoldParameter

#import torch.nn.functional as F


config_args = {
    'training_config': {
        'use_geoopt': (False, "which manifold class to use, if false then use basd.manifold"),
        'lr': (1e-4, 'learning rate'),
        'dropout': (0.25, 'dropout probability'),
        'cuda': (0, 'which cuda device to use (-1 for cpu training)'),
        'epochs': (1000, 'maximum number of epochs to train for'),
        'weight_decay': (1e-4, 'l2 regularization strength'),
        'optimizer': ('radam', 'which optimizer to use, can be any of [rsgd, radam]'),
        'momentum': (0.999, 'momentum in optimizer'),
        'patience': (15, 'patience for early stopping'),
        'seed': (18, 'seed for training'),
        'log_freq': (1, 'how often to compute print train/val metrics (in epochs)'),
        'eval_freq': (1, 'how often to compute val metrics (in epochs)'),
        'save': (0, '1 to save model and logs and 0 otherwise'),
        'save_dir': (None, 'path to save training logs and model weights (defaults to logs/task/date/run/)'),
        'sweep_c': (0, ''),
        'lr_reduce_freq': (None, 'reduce lr every lr-reduce-freq or None to keep lr constant'),
        'gamma': (0.5, 'gamma for lr scheduler'),
        'print_epoch': (True, ''),
        'grad_clip': (None, 'max norm for gradient clipping, or None for no gradient clipping'),
        'min_epochs': (300, 'do not early stop before min-epochs')
    },
    'model_config': {
        'task': ('nc', 'which tasks to train on, can be any of [lp, nc]'),
        'model': ('BKNet', 'which encoder to use, can be any of [Shallow, MLP, HNN, GCN, GAT, HyperGCN, HyboNet,BKNet,BMLP]'),
        'dim': (32, 'embedding dimension'),
        'manifold': ('PoincareBall', 'which manifold to use, can be any of [Euclidean, Hyperboloid, PoincareBall, Lorentz]'),
        'c': (1.0, 'hyperbolic radius, set to None for trainable curvature'),
        'r': (2., 'fermi-dirac decoder parameter for lp'),
        't': (1., 'fermi-dirac decoder parameter for lp'),
        'margin': (2., 'margin of MarginLoss'),
        'pretrained_embeddings': (None, 'path to pretrained embeddings (.npy file) for Shallow node classification'),
        'pos_weight': (0, 'whether to upweight positive class in node classification tasks'),
        'num_layers': (2, 'number of hidden layers in encoder'),
        'bias': (1, 'whether to use bias (1) or not (0)'),
        'act': ('relu', 'which activation function to use (or None for no activation)'),
        'n_heads': (4, 'number of attention heads for graph attention networks, must be a divisor dim'),
        'alpha': (0.2, 'alpha for leakyrelu in graph attention networks'),
        'double_precision': ('1', 'whether to use double precision'),
        'use_att': (0, 'whether to use hyperbolic attention or not'),
        'local_agg': (0, 'whether to local tangent space aggregation or not'),
        'kernel_size': (6, 'number of kernels'),
        'KP_extent': (0.66, 'influence radius of each kernel point'),
        'radius': (1, 'radius used for kernel point init'),
        'deformable': (False, 'deformable kernel'),
        'linear_before': (64, 'dim of linear before gcn')#64
    },
    'data_config': {
        'dataset': ('wisconsin', 'which dataset to use(cornell,wisconsin,squirrel,cora)'),
        'batch_size': (32, 'batch size for gc'),
        'val_prop': (0.05, 'proportion of validation edges for link prediction'),
        'test_prop': (0.1, 'proportion of test edges for link prediction'),
        'use_feats': (1, 'whether to use node features or not'),
        'normalize_feats': (1, 'whether to normalize input node features'),
        'normalize_adj': (1, 'whether to row-normalize the adjacency matrix'),
        'split_seed': (1234, 'seed for data splits (train/test/val)'),
        'split_graph': (False, 'whether to split the graph')
    }
}

# 将所有参数转换为 SimpleNamespace
args = SimpleNamespace(
    **{k: v[0] for config in config_args.values() for k, v in config.items()}
)

#choose which manifold class to follow 
if args.use_geoopt == False:
    ManifoldParameter = base_ManifoldParameter
else:
    ManifoldParameter = geoopt_ManifoldParameter
np.random.seed(args.seed)#args.seed
torch.manual_seed(args.seed)#args.seed
if int(args.cuda):#args.double_precision
    torch.set_default_dtype(torch.float64)
if int(args.cuda) >= 0:#args.cuda
    torch.cuda.manual_seed(args.seed)#args.seed
args.device = 'cuda:' + str(args.cuda) if int(args.cuda) >= 0 else 'cpu' #args.device actually,<-args.cuda
args.patience = args.epochs if not args.patience else args.patience #args.patience<-args.epochs|args.patience

print(f'Using: {args.device}')
print("Using seed {}.".format(args.seed))
print(f"Dataset: {args.dataset}")

# Load data
data = load_data(args, os.path.join('data', args.dataset))
if args.task == 'gc':
    args.n_nodes, args.feat_dim = data['features'][0].shape
else:
    args.n_nodes, args.feat_dim = data['features'].shape
if args.task == 'nc':
    Model = NCModel
    args.n_classes = int(data['labels'].max() + 1)
    args.data = data
    print(f'Num classes: {args.n_classes}')
elif args.task == 'gc':
    Model = GCModel
    args.n_classes = int(data['labels'].max() + 1)
    print(f'Num classes: {args.n_classes}')
else:
    args.nb_false_edges = len(data['train_edges_false'])
    args.nb_edges = len(data['train_edges'])
    if args.task == 'lp':
        Model = LPModel
        args.n_classes = 2

if not args.lr_reduce_freq:
    args.lr_reduce_freq = args.epochs


###A simple check on data
print(data.keys())
print(data['adj_train'].todense().shape)
print(data['features'].shape)
###A simple check on data

# Model and optimizer
model = Model(args)
print(str(model))
no_decay = ['bias', 'scale']
optimizer_grouped_parameters = [{
    'params': [
        p for n, p in model.named_parameters()
        if p.requires_grad and not any(
            nd in n
            for nd in no_decay) and not isinstance(p, ManifoldParameter)
    ],
    'weight_decay':
    args.weight_decay
}, {
    'params': [
        p for n, p in model.named_parameters() if p.requires_grad and any(
            nd in n
            for nd in no_decay) or isinstance(p, ManifoldParameter)
    ],
    'weight_decay':
    0.0
}]
if args.optimizer == 'radam':
    optimizer = RiemannianAdam(params=optimizer_grouped_parameters,
                                lr=args.lr,
                                stabilize=10)
elif args.optimizer == 'rsgd':
    optimizer = RiemannianSGD(params=optimizer_grouped_parameters,
                                lr=args.lr,
                                stabilize=10)
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                step_size=int(
                                                args.lr_reduce_freq),
                                                gamma=float(args.gamma))
tot_params = sum([np.prod(p.size()) for p in model.parameters()])
model = model.to(args.device)
for x, val in data.items():
    if torch.is_tensor(data[x]):
        data[x] = data[x].to(args.device)
print(f"Total number of parameters: {tot_params}")

# Train model for nc:
t_total = time.time()
counter = 0
best_val_metrics = model.init_metric_dict()
best_test_metrics = None
best_emb = None
if args.n_classes > 2:
    f1_average = 'micro'
else:
    f1_average = 'binary'

if args.model == 'HKPNet':
    nei, nei_mask = get_nei(data['adj_train'])
    nei = nei.to(args.device)
    nei_mask = nei_mask.to(args.device)
elif args.model == 'BKNet':
    nei, nei_mask = get_nei(data['adj_train'])
    nei = nei.to(args.device)
    nei_mask = nei_mask.to(args.device) #nei/nei_mask on cuda now


Using: cuda:0
Using seed 18.
Dataset: wisconsin
Num classes: 5
dict_keys(['adj_train', 'features', 'labels', 'idx_train', 'idx_val', 'idx_test', 'adj_train_norm'])
(251, 251)
torch.Size([251, 1703])
NCModel(
  (encoder): BKNet(
    (linear_before): BLinear(
      in_features=1703, out_features=64, c=tensor([1.], device='cuda:0'), use_bias=1, act=<function relu at 0x7f24c2f832e0>, dropout_rate=0.25
      (E_linear): Linear(in_features=1703, out_features=64, bias=True)
      (dropout): Dropout(p=0.25, inplace=False)
    )
    (layers): Sequential(
      (0): KPGraphConvolution(
        (net): KernelPointAggregation(
          (linears): ModuleList(
            (0): BLinear(
              in_features=64, out_features=32, c=tensor([1.], device='cuda:0'), use_bias=1, act=<function relu at 0x7f24c2f832e0>, dropout_rate=0.25
              (E_linear): Linear(in_features=64, out_features=32, bias=True)
              (dropout): Dropout(p=0.25, inplace=False)
            )
            (1): BLinea

In [4]:
if config_args['model_config']['model'][0] == 'BMLP':
    for name, param in model.named_parameters():
        if param.requires_grad:
            print(name, param.data)
    print(model.encode(data['features'], data['adj_train_norm']))#[2]
    for name, param in model.named_parameters():
        print(name, param.requires_grad)  # 确保所有参数的 requires_grad 都是 True
elif config_args['model_config']['model'][0] == 'BKNet':
    for name, param in model.named_parameters():
        if param.requires_grad:
            print(name, param.data)
    print(model.encode(data['features'], (nei, nei_mask)))#[2]
    for name, param in model.named_parameters():
        print(name, param.requires_grad)  # 确保所有参数的 requires_grad 都是 True

encoder.linear_before.E_linear.weight tensor([[ 0.0685, -0.0666, -0.0226,  ...,  0.0301, -0.0054, -0.0300],
        [ 0.0034, -0.0680,  0.0291,  ..., -0.0720, -0.0134,  0.0161],
        [-0.0485, -0.0098, -0.0064,  ..., -0.0677, -0.0774, -0.0189],
        ...,
        [-0.0503,  0.0233, -0.0401,  ..., -0.0612, -0.0225,  0.0024],
        [-0.0190,  0.0750, -0.0392,  ..., -0.0543,  0.0629, -0.0066],
        [-0.0325, -0.0337,  0.0090,  ...,  0.0140,  0.0550,  0.0002]],
       device='cuda:0', dtype=torch.float64)
encoder.linear_before.E_linear.bias tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       device='cuda:0', dtype=torch.float64)
encoder.layers.0.net.linears.0.E_linear.weight tensor([[ 0.0961, -0.1197, -0.0319,  ...,  0.0256, -0.0263,  0.2986],
    

In [5]:
for epoch in range(args.epochs):
    t = time.time()
    model.train()
    optimizer.zero_grad()
    if args.model == 'HKPNet':
        embeddings = model.encode(data['features'], (nei, nei_mask))
    elif args.model == 'BKNet':
        embeddings = model.encode(data['features'], (nei, nei_mask))
    else:
        embeddings = model.encode(data['features'], data['adj_train_norm'])

    #idx=data[f'idx_train']
    #output=model.decode(embeddings, data['adj_train_norm'], idx)
    train_metrics = model.compute_metrics(embeddings, data, 'train')
    
    # 检查 decoded_values 和 embeddings
    #embedded_values.append(embeddings)
    #idx = data[f'idx_train']
    #output = model.decode(embeddings, data['adj_train_norm'], idx)
    #decoded_values.append(output)
    #model_linear_weight.append(model.encoder.layers[0].linear.E_linear.weight.clone())
    
    # 检查梯度是否被正确计算
    train_metrics['loss'].backward()
    #loss = F.cross_entropy(output, data['labels'][idx])
    #loss.backward()

    #for name, param in model.named_parameters():
        #if param.grad is not None:
            #print(f"Grad of {name}: {param.grad.abs().mean()}")
        #else:
            #print(f"Grad of {name}: None")  # 检查梯度是否为 None
    
    if args.grad_clip is not None:
        torch.nn.utils.clip_grad_norm_(model.parameters(), args.grad_clip)
        
    optimizer.step()
    lr_scheduler.step()
    
    # 打印和检查权重是否被更新
    #check_weights(model, epoch)
    
    if (epoch + 1) % args.log_freq == 0:
        print(" ".join([
            'Epoch: {:04d}'.format(epoch + 1),
            'lr: {}'.format(lr_scheduler.get_last_lr()),
            format_metrics(train_metrics, 'train'),
            'time: {:.4f}s'.format(time.time() - t)
        ]))
    
    with torch.no_grad():
        if (epoch + 1) % args.eval_freq == 0:
            model.eval()
            if args.model == 'HKPNet':
                embeddings = model.encode(data['features'], (nei, nei_mask))
            elif args.model == 'BKNet':
                embeddings = model.encode(data['features'], (nei, nei_mask))
            else:
                embeddings = model.encode(data['features'],
                                        data['adj_train_norm'])
            val_metrics = model.compute_metrics(embeddings, data, 'val')
            if (epoch + 1) % args.log_freq == 0:
                print(" ".join([
                    'Epoch: {:04d}'.format(epoch + 1),
                    format_metrics(val_metrics, 'val')
                ]))
            if model.has_improved(best_val_metrics, val_metrics):
                best_test_metrics = model.compute_metrics(
                    embeddings, data, 'test')
                best_emb = embeddings.cpu()
                if args.save:
                    np.save(os.path.join(save_dir, 'embeddings.npy'),
                            best_emb.detach().numpy())
                best_val_metrics = val_metrics
                counter = 0
            else:
                counter += 1
                if counter == args.patience and epoch > args.min_epochs:
                    print("Early stopping")
                    break

print("Optimization Finished!")
print("Total time elapsed: {:.4f}s".format(time.time() - t_total))
if not best_test_metrics:
    model.eval()
    best_emb = model.encode(data['features'], data['adj_train_norm'])
    best_test_metrics = model.compute_metrics(best_emb, data, 'test')
print(" ".join(
    ["Val set results:",
    format_metrics(best_val_metrics, 'val')]))
print(" ".join(
    ["Test set results:",
    format_metrics(best_test_metrics, 'test')]))
if args.save:
    np.save(os.path.join(save_dir, 'embeddings.npy'),
            best_emb.cpu().detach().numpy())
    if hasattr(model.encoder, 'att_adj'):
        filename = os.path.join(save_dir, args.dataset + '_att_adj.p')
        pickle.dump(model.encoder.att_adj.cpu().to_dense(),
                    open(filename, 'wb'))
        print('Dumped attention adj: ' + filename)

    torch.save(model.state_dict(), os.path.join(save_dir, 'model.pth'))
    json.dump(vars(args), open(os.path.join(save_dir, 'config.json'), 'w'))
    logging.info(f"Saved model in {save_dir}")


Epoch: 0001 lr: [0.0001, 0.0001] train_loss: 1.609458 train_acc: 0.076433 train_f1: 0.076433 time: 0.0526s
Epoch: 0001 val_loss:  nan val_acc: 0.018519 val_f1: 0.018519
Epoch: 0002 lr: [0.0001, 0.0001] train_loss:  nan train_acc: 0.025478 train_f1: 0.025478 time: 0.0480s
Epoch: 0002 val_loss:  nan val_acc: 0.018519 val_f1: 0.018519
Epoch: 0003 lr: [0.0001, 0.0001] train_loss:  nan train_acc: 0.025478 train_f1: 0.025478 time: 0.0469s
Epoch: 0003 val_loss:  nan val_acc: 0.018519 val_f1: 0.018519
Epoch: 0004 lr: [0.0001, 0.0001] train_loss:  nan train_acc: 0.025478 train_f1: 0.025478 time: 0.0446s
Epoch: 0004 val_loss:  nan val_acc: 0.018519 val_f1: 0.018519
Epoch: 0005 lr: [0.0001, 0.0001] train_loss:  nan train_acc: 0.025478 train_f1: 0.025478 time: 0.0437s
Epoch: 0005 val_loss:  nan val_acc: 0.018519 val_f1: 0.018519
Epoch: 0006 lr: [0.0001, 0.0001] train_loss:  nan train_acc: 0.025478 train_f1: 0.025478 time: 0.0445s
Epoch: 0006 val_loss:  nan val_acc: 0.018519 val_f1: 0.018519
Epoch:

KeyboardInterrupt: 

We need to forward check what is causing NAN

In [6]:
import argparse
from types import SimpleNamespace
import sys
sys.path.append('/data/lige/HKN')# Please change accordingly!

from __future__ import division
from __future__ import print_function

from geoopt import ManifoldParameter as geoopt_ManifoldParameter
from manifolds.base import ManifoldParameter as base_ManifoldParameter

import datetime
import json
import logging
from optim import RiemannianAdam, RiemannianSGD
import os
import pickle
import time

import numpy as np
import torch
from config import parser
from models.base_models import NCModel, LPModel, GCModel
from utils.data_utils import load_data, get_nei, GCDataset, split_batch
from utils.train_utils import get_dir_name, format_metrics
from utils.eval_utils import acc_f1

from geoopt import ManifoldParameter as geoopt_ManifoldParameter
from manifolds.base import ManifoldParameter as base_ManifoldParameter

#import torch.nn.functional as F


config_args = {
    'training_config': {
        'use_geoopt': (False, "which manifold class to use, if false then use basd.manifold"),
        'lr': (1e-4, 'learning rate'),
        'dropout': (0.25, 'dropout probability'),
        'cuda': (0, 'which cuda device to use (-1 for cpu training)'),
        'epochs': (1000, 'maximum number of epochs to train for'),
        'weight_decay': (1e-4, 'l2 regularization strength'),
        'optimizer': ('radam', 'which optimizer to use, can be any of [rsgd, radam]'),
        'momentum': (0.999, 'momentum in optimizer'),
        'patience': (15, 'patience for early stopping'),
        'seed': (18, 'seed for training'),
        'log_freq': (1, 'how often to compute print train/val metrics (in epochs)'),
        'eval_freq': (1, 'how often to compute val metrics (in epochs)'),
        'save': (0, '1 to save model and logs and 0 otherwise'),
        'save_dir': (None, 'path to save training logs and model weights (defaults to logs/task/date/run/)'),
        'sweep_c': (0, ''),
        'lr_reduce_freq': (None, 'reduce lr every lr-reduce-freq or None to keep lr constant'),
        'gamma': (0.5, 'gamma for lr scheduler'),
        'print_epoch': (True, ''),
        'grad_clip': (None, 'max norm for gradient clipping, or None for no gradient clipping'),
        'min_epochs': (300, 'do not early stop before min-epochs')
    },
    'model_config': {
        'task': ('nc', 'which tasks to train on, can be any of [lp, nc]'),
        'model': ('BKNet', 'which encoder to use, can be any of [Shallow, MLP, HNN, GCN, GAT, HyperGCN, HyboNet,BKNet,BMLP]'),
        'dim': (32, 'embedding dimension'),
        'manifold': ('PoincareBall', 'which manifold to use, can be any of [Euclidean, Hyperboloid, PoincareBall, Lorentz]'),
        'c': (1.0, 'hyperbolic radius, set to None for trainable curvature'),
        'r': (2., 'fermi-dirac decoder parameter for lp'),
        't': (1., 'fermi-dirac decoder parameter for lp'),
        'margin': (2., 'margin of MarginLoss'),
        'pretrained_embeddings': (None, 'path to pretrained embeddings (.npy file) for Shallow node classification'),
        'pos_weight': (0, 'whether to upweight positive class in node classification tasks'),
        'num_layers': (2, 'number of hidden layers in encoder'),
        'bias': (1, 'whether to use bias (1) or not (0)'),
        'act': ('relu', 'which activation function to use (or None for no activation)'),
        'n_heads': (4, 'number of attention heads for graph attention networks, must be a divisor dim'),
        'alpha': (0.2, 'alpha for leakyrelu in graph attention networks'),
        'double_precision': ('1', 'whether to use double precision'),
        'use_att': (0, 'whether to use hyperbolic attention or not'),
        'local_agg': (0, 'whether to local tangent space aggregation or not'),
        'kernel_size': (6, 'number of kernels'),
        'KP_extent': (0.66, 'influence radius of each kernel point'),
        'radius': (1, 'radius used for kernel point init'),
        'deformable': (False, 'deformable kernel'),
        'linear_before': (64, 'dim of linear before gcn')#64
    },
    'data_config': {
        'dataset': ('wisconsin', 'which dataset to use(cornell,wisconsin,squirrel,cora)'),
        'batch_size': (32, 'batch size for gc'),
        'val_prop': (0.05, 'proportion of validation edges for link prediction'),
        'test_prop': (0.1, 'proportion of test edges for link prediction'),
        'use_feats': (1, 'whether to use node features or not'),
        'normalize_feats': (1, 'whether to normalize input node features'),
        'normalize_adj': (1, 'whether to row-normalize the adjacency matrix'),
        'split_seed': (1234, 'seed for data splits (train/test/val)'),
        'split_graph': (False, 'whether to split the graph')
    }
}

# 将所有参数转换为 SimpleNamespace
args = SimpleNamespace(
    **{k: v[0] for config in config_args.values() for k, v in config.items()}
)

#choose which manifold class to follow 
if args.use_geoopt == False:
    ManifoldParameter = base_ManifoldParameter
else:
    ManifoldParameter = geoopt_ManifoldParameter
np.random.seed(args.seed)#args.seed
torch.manual_seed(args.seed)#args.seed
if int(args.cuda):#args.double_precision
    torch.set_default_dtype(torch.float64)
if int(args.cuda) >= 0:#args.cuda
    torch.cuda.manual_seed(args.seed)#args.seed
args.device = 'cuda:' + str(args.cuda) if int(args.cuda) >= 0 else 'cpu' #args.device actually,<-args.cuda
args.patience = args.epochs if not args.patience else args.patience #args.patience<-args.epochs|args.patience

print(f'Using: {args.device}')
print("Using seed {}.".format(args.seed))
print(f"Dataset: {args.dataset}")

# Load data
data = load_data(args, os.path.join('data', args.dataset))
if args.task == 'gc':
    args.n_nodes, args.feat_dim = data['features'][0].shape
else:
    args.n_nodes, args.feat_dim = data['features'].shape
if args.task == 'nc':
    Model = NCModel
    args.n_classes = int(data['labels'].max() + 1)
    args.data = data
    print(f'Num classes: {args.n_classes}')
elif args.task == 'gc':
    Model = GCModel
    args.n_classes = int(data['labels'].max() + 1)
    print(f'Num classes: {args.n_classes}')
else:
    args.nb_false_edges = len(data['train_edges_false'])
    args.nb_edges = len(data['train_edges'])
    if args.task == 'lp':
        Model = LPModel
        args.n_classes = 2

if not args.lr_reduce_freq:
    args.lr_reduce_freq = args.epochs


###A simple check on data
print(data.keys())
print(data['adj_train'].todense().shape)
print(data['features'].shape)
###A simple check on data

# Model and optimizer
model = Model(args)
print(str(model))
no_decay = ['bias', 'scale']
optimizer_grouped_parameters = [{
    'params': [
        p for n, p in model.named_parameters()
        if p.requires_grad and not any(
            nd in n
            for nd in no_decay) and not isinstance(p, ManifoldParameter)
    ],
    'weight_decay':
    args.weight_decay
}, {
    'params': [
        p for n, p in model.named_parameters() if p.requires_grad and any(
            nd in n
            for nd in no_decay) or isinstance(p, ManifoldParameter)
    ],
    'weight_decay':
    0.0
}]
if args.optimizer == 'radam':
    optimizer = RiemannianAdam(params=optimizer_grouped_parameters,
                                lr=args.lr,
                                stabilize=10)
elif args.optimizer == 'rsgd':
    optimizer = RiemannianSGD(params=optimizer_grouped_parameters,
                                lr=args.lr,
                                stabilize=10)
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                step_size=int(
                                                args.lr_reduce_freq),
                                                gamma=float(args.gamma))
tot_params = sum([np.prod(p.size()) for p in model.parameters()])
model = model.to(args.device)
for x, val in data.items():
    if torch.is_tensor(data[x]):
        data[x] = data[x].to(args.device)
print(f"Total number of parameters: {tot_params}")

# Train model for nc:
t_total = time.time()
counter = 0
best_val_metrics = model.init_metric_dict()
best_test_metrics = None
best_emb = None
if args.n_classes > 2:
    f1_average = 'micro'
else:
    f1_average = 'binary'

if args.model == 'HKPNet':
    nei, nei_mask = get_nei(data['adj_train'])
    nei = nei.to(args.device)
    nei_mask = nei_mask.to(args.device)
elif args.model == 'BKNet':
    nei, nei_mask = get_nei(data['adj_train'])
    nei = nei.to(args.device)
    nei_mask = nei_mask.to(args.device) #nei/nei_mask on cuda now

#Initialize an untrained Model

Using: cuda:0
Using seed 18.
Dataset: wisconsin
Num classes: 5
dict_keys(['adj_train', 'features', 'labels', 'idx_train', 'idx_val', 'idx_test', 'adj_train_norm'])
(251, 251)
torch.Size([251, 1703])
NCModel(
  (encoder): BKNet(
    (linear_before): BLinear(
      in_features=1703, out_features=64, c=tensor([1.], device='cuda:0'), use_bias=1, act=<function relu at 0x7f24c2f832e0>, dropout_rate=0.25
      (E_linear): Linear(in_features=1703, out_features=64, bias=True)
      (dropout): Dropout(p=0.25, inplace=False)
    )
    (layers): Sequential(
      (0): KPGraphConvolution(
        (net): KernelPointAggregation(
          (linears): ModuleList(
            (0): BLinear(
              in_features=64, out_features=32, c=tensor([1.], device='cuda:0'), use_bias=1, act=<function relu at 0x7f24c2f832e0>, dropout_rate=0.25
              (E_linear): Linear(in_features=64, out_features=32, bias=True)
              (dropout): Dropout(p=0.25, inplace=False)
            )
            (1): BLinea

  adj = nx.adjacency_matrix(G, sorted(G.nodes()))


In [8]:
print(data['features'].shape)
print(torch.norm(data['features'], p=2, dim=1))
encoded_value = model.encode(data['features'], (nei, nei_mask))
encoded_value

torch.Size([251, 1703])
tensor([0.1179, 0.1429, 0.1400, 0.1741, 0.1250, 0.0830, 0.0643, 0.1443, 0.1132,
        0.1085, 0.1459, 0.1195, 0.1291, 0.1162, 0.1543, 0.1260, 0.1361, 0.1562,
        0.1231, 0.1961, 0.1443, 0.0880, 0.1741, 0.1222, 0.1015, 0.0891, 0.1031,
        0.0842, 0.0839, 0.1204, 0.2673, 0.0962, 0.1162, 0.1667, 0.0698, 0.0634,
        0.1162, 0.0921, 0.0811, 0.1562, 0.1302, 0.1222, 0.1091, 0.1195, 0.0976,
        0.1443, 0.1140, 0.0917, 0.0833, 0.2000, 0.1104, 0.0704, 0.0867, 0.1508,
        0.0647, 0.1280, 0.1400, 0.1260, 0.0909, 0.1400, 0.1400, 0.1543, 0.2000,
        0.1031, 0.1280, 0.1543, 0.1147, 0.0725, 0.0967, 0.1195, 0.1581, 0.0995,
        0.0985, 0.1031, 0.1222, 0.0774, 0.1400, 0.0798, 0.1000, 0.1508, 0.1187,
        0.2085, 0.1231, 0.1313, 0.1260, 0.1581, 0.1429, 0.1118, 0.1400, 0.0995,
        0.1961, 0.1111, 0.1021, 0.0737, 0.1508, 0.1231, 0.1474, 0.0830, 0.1072,
        0.1562, 0.0925, 0.1491, 0.1195, 0.2132, 0.0786, 0.1125, 0.2041, 0.0657,
        0.0921, 

tensor([[2.4141e-04, 1.6168e-04, 5.1777e-05,  ..., 1.7272e-04, 1.6434e-04,
         9.6065e-05],
        [0.0000e+00, 0.0000e+00, 0.0000e+00,  ..., 0.0000e+00, 0.0000e+00,
         0.0000e+00],
        [4.1340e-05, 1.3359e-05, 3.1609e-05,  ..., 6.6924e-05, 9.0057e-06,
         5.2397e-06],
        ...,
        [3.5302e-05, 1.2965e-05, 1.9350e-05,  ..., 4.6686e-05, 2.5231e-05,
         1.0390e-05],
        [2.4220e-05, 1.3229e-05, 2.0279e-05,  ..., 2.3395e-05, 3.0502e-05,
         5.8228e-05],
        [2.5113e-05, 1.4558e-05, 3.4037e-05,  ..., 1.5150e-06, 2.3621e-05,
         3.8177e-06]], device='cuda:0', dtype=torch.float64,
       grad_fn=<MulBackward0>)

In [32]:
print(encoded_value.shape)
print(torch.norm(encoded_value, p=2, dim=1).min(),'\n',torch.norm(encoded_value, p=2, dim=1).max())
idx=data[f'idx_train']
decoded_value=model.decode(encoded_value, data['adj_train_norm'], idx)

random_indices = torch.randperm( decoded_value.size(0))[:10]
print(decoded_value[random_indices])

torch.Size([251, 32])
tensor(0., device='cuda:0', dtype=torch.float64, grad_fn=<MinBackward1>) 
 tensor(0.0201, device='cuda:0', dtype=torch.float64, grad_fn=<MaxBackward1>)
tensor([[2.0001, 1.9999, 2.0000, 2.0000, 2.0004],
        [2.0000, 1.9999, 1.9999, 2.0000, 2.0001],
        [2.0000, 2.0000, 2.0000, 2.0001, 2.0001],
        [2.0002, 2.0001, 2.0001, 2.0001, 2.0001],
        [2.0002, 2.0001, 2.0002, 2.0001, 2.0002],
        [2.0000, 2.0000, 2.0000, 2.0000, 2.0000],
        [2.0000, 2.0000, 2.0001, 2.0001, 2.0002],
        [2.0001, 2.0000, 2.0000, 2.0001, 2.0001],
        [2.0002, 1.9999, 2.0000, 2.0000, 2.0003],
        [2.0000, 2.0000, 2.0000, 2.0000, 2.0000]], device='cuda:0',
       dtype=torch.float64, grad_fn=<IndexBackward0>)
