## reproduce of paper Graph Transformer Networks

#### unzip dataset (the dataset is provided by the author)

In [1]:
!unzip -o -d ./data data/data.zip 

Archive:  data/data.zip
  inflating: ./data/ACM/.ipynb_checkpoints/ACM_data-checkpoint.ipynb  
  inflating: ./data/ACM/.ipynb_checkpoints/Untitled-checkpoint.ipynb  
  inflating: ./data/ACM/edges.pkl    
  inflating: ./data/ACM/labels.pkl   
  inflating: ./data/ACM/node_features.pkl  
  inflating: ./data/DBLP/.ipynb_checkpoints/Untitled-checkpoint.ipynb  
  inflating: ./data/DBLP/.ipynb_checkpoints/author_label-checkpoint.txt  
  inflating: ./data/DBLP/.ipynb_checkpoints/paper_author-checkpoint.txt  
  inflating: ./data/DBLP/.ipynb_checkpoints/small_data-checkpoint.ipynb  
  inflating: ./data/DBLP/.ipynb_checkpoints/term-checkpoint.txt  
  inflating: ./data/DBLP/.ipynb_checkpoints/total_data-checkpoint.ipynb  
  inflating: ./data/DBLP/edges.pkl   
  inflating: ./data/DBLP/labels.pkl  
  inflating: ./data/DBLP/node_features.pkl  
  inflating: ./data/IMDB/edges.pkl   
  inflating: ./data/IMDB/labels.pkl  
  inflating: ./data/IMDB/node_features.pkl  


In [2]:
import torch
import numpy as np
import torch.nn as nn
from model import GTN
import pickle
from utils import f1_score,set_global_random_seed
from dataclasses import dataclass

In [4]:
@dataclass   # substitute for argparse
class Argument:
    dataset: str  # choice in ["ACM","DBLP","IMDB"]
    epoch: int = 40
    node_dim: int = 64
    num_channels: int = 2
    lr: float = 0.005
    weight_decay: float = 0.001
    num_layers: int = 2
    norm: str = 'true'
    adaptive_lr: str = 'true'
    seed:int =16


In [5]:
def train(args:Argument):
    device = torch.device("cuda:0") 
    epochs = args.epoch
    node_dim = args.node_dim
    num_channels = args.num_channels
    lr = args.lr
    weight_decay = args.weight_decay
    num_layers = args.num_layers
    norm = args.norm
    adaptive_lr = args.adaptive_lr
    with open('./data/'+args.dataset+'/node_features.pkl','rb') as f:
        node_features = pickle.load(f)
    with open('./data/'+args.dataset+'/edges.pkl','rb') as f:
        edges = pickle.load(f)
    with open('./data/'+args.dataset+'/labels.pkl','rb') as f:
        labels = pickle.load(f)
    num_nodes = edges[0].shape[0]

    for i,edge in enumerate(edges):
        if i ==0:
            A = torch.from_numpy(edge.todense()).type(torch.FloatTensor).unsqueeze(-1).to(device)
        else:
            A = torch.cat([A.to(device),torch.from_numpy(edge.todense()).type(torch.FloatTensor).unsqueeze(-1).to(device)], dim=-1)
    A = torch.cat([A,torch.eye(num_nodes).type(torch.FloatTensor).unsqueeze(-1).to(device)], dim=-1)

    node_features = torch.from_numpy(node_features).type(torch.FloatTensor).to(device)
    print("node_features.shape",node_features.shape)
    train_node = torch.from_numpy(np.array(labels[0])[:,0]).type(torch.LongTensor).to(device)
    train_target = torch.from_numpy(np.array(labels[0])[:,1]).type(torch.LongTensor).to(device)
    valid_node = torch.from_numpy(np.array(labels[1])[:,0]).type(torch.LongTensor).to(device)
    valid_target = torch.from_numpy(np.array(labels[1])[:,1]).type(torch.LongTensor).to(device)
    test_node = torch.from_numpy(np.array(labels[2])[:,0]).type(torch.LongTensor).to(device)
    test_target = torch.from_numpy(np.array(labels[2])[:,1]).type(torch.LongTensor).to(device)

    num_classes = torch.max(train_target).item()+1
    final_f1 = 0
    for l in range(1):
        model = GTN(num_edge=A.shape[-1],
                            num_channels=num_channels,
                            w_in = node_features.shape[1],
                            w_out = node_dim,
                            num_class=num_classes,
                            num_layers=num_layers,
                            norm=norm).to(device)
        if adaptive_lr == 'false':
            optimizer = torch.optim.Adam(model.parameters(), lr=0.005, weight_decay=0.001)
        else:
            optimizer = torch.optim.Adam([{'params':model.weight},
                                        {'params':model.linear1.parameters()},
                                        {'params':model.linear2.parameters()},
                                        {"params":model.layers.parameters(), "lr":0.5}
                                        ], lr=0.005, weight_decay=0.001)
        loss = nn.CrossEntropyLoss()
        # Train & Valid & Test
        best_val_loss = 10000
        best_test_loss = 10000
        best_train_loss = 10000
        best_train_f1 = 0
        best_val_f1 = 0
        best_test_f1 = 0
        print("ready to train")
        for i in range(epochs):
            for param_group in optimizer.param_groups:
                if param_group['lr'] > 0.005:
                    param_group['lr'] = param_group['lr'] * 0.9
            print('Epoch:  ',i+1)
            model.zero_grad()
            model.train()
            loss,y_train,Ws = model(A, node_features, train_node, train_target)
            train_f1 = torch.mean(f1_score(torch.argmax(y_train.detach(),dim=1), train_target, num_classes=num_classes)).cpu().numpy()
            print('Train - Loss: {:.4f}, Macro_F1: {:.4f}'.format(loss.detach().cpu().numpy(), train_f1))
            loss.backward()
            optimizer.step()
            model.eval()
            # Valid
            with torch.no_grad():
                val_loss, y_valid,_ = model.forward(A, node_features, valid_node, valid_target)
                val_f1 = torch.mean(f1_score(torch.argmax(y_valid,dim=1), valid_target, num_classes=num_classes)).cpu().numpy()
                print('Valid - Loss: {:.4f}, Macro_F1: {:.4f}'.format(val_loss.detach().cpu().numpy(), val_f1))
                test_loss, y_test,W = model.forward(A, node_features, test_node, test_target)
                test_f1 = torch.mean(f1_score(torch.argmax(y_test,dim=1), test_target, num_classes=num_classes)).cpu().numpy()
                print('Test  - Loss: {:.4f}, Macro_F1: {:.4f}\n'.format(test_loss.detach().cpu().numpy(), test_f1))
            if val_f1 > best_val_f1:
                best_val_loss = val_loss.detach().cpu().numpy()
                best_test_loss = test_loss.detach().cpu().numpy()
                best_train_loss = loss.detach().cpu().numpy()
                best_train_f1 = train_f1
                best_val_f1 = val_f1
                best_test_f1 = test_f1 
        print('---------------Best Results--------------------')
        print('Train - Loss: {:.4f}, Macro_F1: {:.4f}'.format(best_train_loss, best_train_f1))
        print('Valid - Loss: {:.4f}, Macro_F1: {:.4f}'.format(best_val_loss, best_val_f1))
        print('Test  - Loss: {:.4f}, Macro_F1: {:.4f}'.format(best_test_loss, best_test_f1))
        final_f1 += best_test_f1

## Train
- restart the notebook after each experiment on a certain dataset

### Reproduce on dataset ACM

In [6]:
args = Argument(dataset="ACM",num_layers=2,epoch=40,adaptive_lr=True)
set_global_random_seed(args.seed) 
train(args)

  edges = pickle.load(f)
  edges = pickle.load(f)


node_features.shape torch.Size([8994, 1902])
ready to train
Epoch:   1
Train - Loss: 1.0982, Macro_F1: 0.1667
Valid - Loss: 1.0629, Macro_F1: 0.3044
Test  - Loss: 1.0679, Macro_F1: 0.3102

Epoch:   2
Train - Loss: 1.0535, Macro_F1: 0.3365
Valid - Loss: 1.0795, Macro_F1: 0.3638
Test  - Loss: 1.0709, Macro_F1: 0.3701

Epoch:   3
Train - Loss: 1.0659, Macro_F1: 0.4221
Valid - Loss: 0.9898, Macro_F1: 0.2195
Test  - Loss: 0.9834, Macro_F1: 0.2397

Epoch:   4
Train - Loss: 0.9708, Macro_F1: 0.2629
Valid - Loss: 0.9163, Macro_F1: 0.6245
Test  - Loss: 0.9183, Macro_F1: 0.6350

Epoch:   5
Train - Loss: 0.8883, Macro_F1: 0.6835
Valid - Loss: 0.8082, Macro_F1: 0.8952
Test  - Loss: 0.8170, Macro_F1: 0.8913

Epoch:   6
Train - Loss: 0.7707, Macro_F1: 0.9345
Valid - Loss: 0.6630, Macro_F1: 0.9301
Test  - Loss: 0.6740, Macro_F1: 0.9201

Epoch:   7
Train - Loss: 0.6146, Macro_F1: 0.9548
Valid - Loss: 0.5302, Macro_F1: 0.8924
Test  - Loss: 0.5424, Macro_F1: 0.8855

Epoch:   8
Train - Loss: 0.4725, Macr

### Reproduce on dataset IMDB


In [5]:
args = Argument(dataset="IMDB",num_layers=3,epoch=40,adaptive_lr=True)
set_global_random_seed(args.seed) 
train(args)

  edges = pickle.load(f)


node_features.shape torch.Size([12772, 1256])
ready to train
Epoch:   1
Train - Loss: 1.1035, Macro_F1: 0.1667
Valid - Loss: 1.0995, Macro_F1: 0.1667
Test  - Loss: 1.0643, Macro_F1: 0.2348

Epoch:   2
Train - Loss: 1.0951, Macro_F1: 0.1667
Valid - Loss: 1.0950, Macro_F1: 0.1667
Test  - Loss: 1.0705, Macro_F1: 0.2348

Epoch:   3
Train - Loss: 1.0856, Macro_F1: 0.1667
Valid - Loss: 1.0885, Macro_F1: 0.1940
Test  - Loss: 1.0735, Macro_F1: 0.2513

Epoch:   4
Train - Loss: 1.0711, Macro_F1: 0.2950
Valid - Loss: 1.0782, Macro_F1: 0.4525
Test  - Loss: 1.0715, Macro_F1: 0.4601

Epoch:   5
Train - Loss: 1.0490, Macro_F1: 0.7879
Valid - Loss: 1.0625, Macro_F1: 0.6485
Test  - Loss: 1.0625, Macro_F1: 0.5754

Epoch:   6
Train - Loss: 1.0164, Macro_F1: 0.9303
Valid - Loss: 1.0387, Macro_F1: 0.6744
Test  - Loss: 1.0450, Macro_F1: 0.5925

Epoch:   7
Train - Loss: 0.9697, Macro_F1: 0.9434
Valid - Loss: 1.0048, Macro_F1: 0.6833
Test  - Loss: 1.0196, Macro_F1: 0.5962

Epoch:   8
Train - Loss: 0.9051, Mac

### Reproduce on dataset DBLP

In [5]:
args = Argument(dataset="DBLP",num_layers=3,epoch=40,adaptive_lr=False)
set_global_random_seed(args.seed) 
train(args)

  edges = pickle.load(f)
  edges = pickle.load(f)


node_features.shape torch.Size([18405, 334])
ready to train
Epoch:   1
Train - Loss: 1.3866, Macro_F1: 0.1087
Valid - Loss: 1.3591, Macro_F1: 0.2508
Test  - Loss: 1.3665, Macro_F1: 0.1987

Epoch:   2
Train - Loss: 1.3591, Macro_F1: 0.2403
Valid - Loss: 1.3017, Macro_F1: 0.5891
Test  - Loss: 1.3104, Macro_F1: 0.5370

Epoch:   3
Train - Loss: 1.3002, Macro_F1: 0.5919
Valid - Loss: 1.1967, Macro_F1: 0.7573
Test  - Loss: 1.2067, Macro_F1: 0.7228

Epoch:   4
Train - Loss: 1.1942, Macro_F1: 0.7683
Valid - Loss: 1.0251, Macro_F1: 0.9208
Test  - Loss: 1.0305, Macro_F1: 0.8845

Epoch:   5
Train - Loss: 1.0254, Macro_F1: 0.8938
Valid - Loss: 0.8264, Macro_F1: 0.9400
Test  - Loss: 0.8319, Macro_F1: 0.9259

Epoch:   6
Train - Loss: 0.8320, Macro_F1: 0.9206
Valid - Loss: 0.7740, Macro_F1: 0.7475
Test  - Loss: 0.7951, Macro_F1: 0.6875

Epoch:   7
Train - Loss: 0.7865, Macro_F1: 0.7166
Valid - Loss: 0.7782, Macro_F1: 0.7261
Test  - Loss: 0.8349, Macro_F1: 0.6852

Epoch:   8
Train - Loss: 0.7628, Macr

Table 1: Evaluation results on the node classification task (F1 score)
|            | result in paper    |  reproduce result  |
| :-:   | :-:   | :-: |
| DBLP        | 94.18      |   94.02    |
| ACM        | 92.68      |   92.12    |
| IMDB        | 60.92      |   58.75    |