# Test of GAT
- use DGL
- predict `graphs`
- test: 0~99
- validation: 100~199
- train: 200~999
- larger lr with scheduler
- try the sklearn report

In [9]:
import os
import dgl
import json
import torch
import torch as th
# from tqdm import tqdm
from tqdm.notebook import tqdm  # 使用 notebook 版本的 tqdm
import torch.nn as nn
from dgl.nn import GraphConv, GATConv
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from transformers import get_linear_schedule_with_warmup
from torch.optim import AdamW
from sklearn.metrics import classification_report


- check the GPU and assign the GPU by the best memory usage

In [10]:
import subprocess
import torch

def get_free_gpu():
    try:
        # Run nvidia-smi command to get GPU details
        _output_to_list = lambda x: x.decode('ascii').split('\n')[:-1]
        command = "nvidia-smi --query-gpu=memory.free --format=csv,nounits,noheader"
        memory_free_info = _output_to_list(subprocess.check_output(command.split())) 
        memory_free_values = [int(x) for i, x in enumerate(memory_free_info)]
        
        # Get the GPU with the maximum free memory
        best_gpu_id = memory_free_values.index(max(memory_free_values))
        return best_gpu_id
    except:
        # If any exception occurs, default to GPU 0 (this handles cases where nvidia-smi isn't installed)
        return 0

if torch.cuda.is_available():
    # Get the best GPU ID based on free memory and set it
    best_gpu_id = get_free_gpu()
    device = torch.device(f"cuda:{best_gpu_id}")
else:
    device = torch.device("cpu")
    print("there's no available GPU")

# device = torch.device(f"cuda:1")
print(device)


cuda:2


## Fix the seed

In [11]:
import numpy as np
import torch
import random

#fix seed
def same_seeds(seed = 8787):
    torch.manual_seed(seed)
    # random.seed(seed) 
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)  
    np.random.seed(seed)  
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True

## Data Loader

In [12]:
class GraphDataset(Dataset):
    def __init__(self, data_list, device):
        self.data_list = data_list
        self.device = device

    def __len__(self):
        return len(self.data_list)
    
    def __getitem__(self, idx):
        data = self.data_list[idx]

        g = dgl.graph((th.tensor(data["edge_index"][0]), th.tensor(data["edge_index"][1])), num_nodes=data["num_nodes"]).to(self.device)

        g.ndata['feat'] = th.tensor(data["node_feat"]).to(self.device)
        g.edata['feat'] = th.tensor(data["edge_attr"]).to(self.device)  # Add edge features to graph

        return g, th.tensor(data["label"]).to(self.device)


def collate(samples):
    # The input `samples` is a list of pairs
    #  (graph, label).
    graphs, labels = map(list, zip(*samples))
    batched_graph = dgl.batch(graphs)
    return batched_graph, torch.tensor(labels)


In [13]:
datasets = ['train', 'valid', 'test']
# datasets = ['test']
dataset_data = {}

for dataset_name in tqdm(datasets):
#     file_path = f"../../data_processing/dgl/data/test_graph/repeated_{dataset_name}.jsonl"
    file_path = f"../../data_processing/dgl/data_new/training_data/transH_50/{dataset_name}.jsonl"
    
    print(file_path)
    with open(file_path) as f:
        data_list = [json.loads(line) for line in tqdm(f, position=0, leave=True)]
    
    dataset_data[dataset_name] = GraphDataset(data_list, device)

print("Datasets loaded!")

  0%|          | 0/3 [00:00<?, ?it/s]

../../data_processing/dgl/data_new/training_data/transH_50/train.jsonl


0it [00:00, ?it/s]

../../data_processing/dgl/data_new/training_data/transH_50/valid.jsonl


0it [00:00, ?it/s]

../../data_processing/dgl/data_new/training_data/transH_50/test.jsonl


0it [00:00, ?it/s]

Datasets loaded!


- choose batch size

In [14]:
def create_dataloaders(batch_size, shuffle=True):
    dataloaders = {}
    for dataset_name, dataset in dataset_data.items():
        # do not shuffle the testing dataset
        if dataset_name == "test":
            dataloaders[dataset_name] = DataLoader(dataset, batch_size=batch_size, shuffle=False, collate_fn=collate)    
        else:
            dataloaders[dataset_name] = DataLoader(dataset, batch_size=batch_size, shuffle=shuffle, collate_fn=collate)
    return dataloaders

# dataloaders = create_dataloaders(4)
dataloaders = create_dataloaders(16)


if (len(dataloaders['test'].dataset) + len(dataloaders['valid'].dataset) + len(dataloaders['train'].dataset)) % 165 != 0: print("Error data!!")
else: print("OK!")


OK!


- Turn the print message to a log file

In [15]:
# print(dataloaders['test'][5])
sample = dataset_data['train'][5000]
print(sample)

print(len(dataloaders['test'].dataset))
print(len(dataloaders['valid'].dataset))
print(len(dataloaders['train'].dataset))
print(len(dataloaders['test'].dataset) + len(dataloaders['valid'].dataset) + len(dataloaders['train'].dataset))

(Graph(num_nodes=26, num_edges=67,
      ndata_schemes={'feat': Scheme(shape=(50,), dtype=torch.float32)}
      edata_schemes={'feat': Scheme(shape=(50,), dtype=torch.float32)}), tensor(7, device='cuda:2'))
16500
16500
132000
165000


In [16]:
import datetime

now = datetime.datetime.now()

formatted_time = now.strftime("%m%d_%H:%M")

log_file_path = f"../log_message/{formatted_time}_GAT_transH_50.log"

def add_log_msg(msg, log_file_path=log_file_path):
    with open(log_file_path, 'a') as f:
        f.write(f'{datetime.datetime.now().strftime("%m/%d/%Y, %H:%M:%S")}# {msg}\n')
    print(f'{datetime.datetime.now().strftime("%m/%d/%Y, %H:%M:%S")}# {msg}')

print(log_file_path)

../log_message/0829_00:56_GAT_transH_50.log


### Model

In [17]:
class GAT(nn.Module):
    def __init__(self, in_dim, hidden_dim, out_dim, num_heads, dropout_prob=0.25):
        super(GAT, self).__init__()
        
        # do not check the zero in_degree since we have all the complete graph
        self.layer1 = GATConv(in_dim, hidden_dim, num_heads=num_heads, activation=F.relu, allow_zero_in_degree=True)
        self.layer2 = GATConv(hidden_dim * num_heads, out_dim, num_heads=num_heads, allow_zero_in_degree=True)
        
        # Adding Batch Normalization after each GAT layer
        self.batchnorm1 = nn.BatchNorm1d(hidden_dim * num_heads)
        self.batchnorm2 = nn.BatchNorm1d(out_dim)
        
        # Adding Dropout for regularization
        self.dropout = nn.Dropout(dropout_prob)

    def forward(self, g, h):
        # Apply GAT layers
        h = self.layer1(g, h)
        h = h.view(h.shape[0], -1)
        h = F.relu(h)
        h = self.dropout(h)
        h = self.layer2(g, h).squeeze(1)
        
        # Store the output as a new node feature
        g.ndata['h_out'] = h

        # Use mean pooling to aggregate this new node feature
        h_agg = dgl.mean_nodes(g, feat='h_out')
        return h_agg

    

- Model Forward  

In [18]:
def model_fn(data, model, criterion, device, count=1, which_type='train'):
    """Forward a batch through the model."""
    batched_g, labels = data
    batched_g = batched_g.to(device)
    
    labels = labels.to(device)
    logits = model(batched_g, batched_g.ndata['feat'].float()) # for GAT
    logits = logits.mean(dim=1)
    
    loss = criterion(logits, labels)

    # Get the class id with the highest probability
    preds = logits.argmax(1)
    
    # Compute accuracy
    accuracy = torch.mean((preds == labels).float())
    
    if which_type == 'validation' and count % 1000 == 0:
        add_log_msg(f"labels of Validation: {labels} {labels.shape}")
        add_log_msg(f"predicted of Validation: {preds} {preds.shape}")
        
    elif which_type == 'test'  and count % 1000 == 0:
        add_log_msg(f"labels of Test: {labels} {labels.shape}")
        add_log_msg(f"predicted of Test: {preds} {preds.shape}")
        
    if count % 5000 == 0: 
        add_log_msg(f"labels of {count}: {labels} {labels.shape}")
        add_log_msg(f"predicted of {count}: {preds} {preds.shape}")
        
    return loss, accuracy, preds

### Training

- Fix the seed and save the model.state_dict that contains the initial weight

In [19]:
seed = 8787
same_seeds(seed)

model = GAT(in_dim=50, hidden_dim=16, out_dim=168, num_heads=8)
torch.save(model.state_dict(), 'model1_initial/initial_weight.pth')

In [20]:
model.layer1.fc.weight

Parameter containing:
tensor([[-0.1806, -0.0598,  0.0091,  ...,  0.0719,  0.2496,  0.0873],
        [ 0.1694, -0.0015, -0.0139,  ...,  0.0147,  0.0892,  0.0146],
        [ 0.0969, -0.0595, -0.0115,  ..., -0.0474,  0.0529, -0.0565],
        ...,
        [-0.0433, -0.2248,  0.3002,  ...,  0.0850,  0.1621,  0.0422],
        [ 0.2097, -0.2492,  0.0612,  ..., -0.0041,  0.0365, -0.1483],
        [ 0.0971, -0.2221,  0.1652,  ..., -0.1312, -0.2610,  0.0077]],
       requires_grad=True)

- Check if model really load the model_dict

In [21]:
model = GAT(in_dim=50, hidden_dim=16, out_dim=168, num_heads=8)
model.load_state_dict(torch.load('model1_initial/initial_weight.pth'))
model.layer1.fc.weight

Parameter containing:
tensor([[-0.1806, -0.0598,  0.0091,  ...,  0.0719,  0.2496,  0.0873],
        [ 0.1694, -0.0015, -0.0139,  ...,  0.0147,  0.0892,  0.0146],
        [ 0.0969, -0.0595, -0.0115,  ..., -0.0474,  0.0529, -0.0565],
        ...,
        [-0.0433, -0.2248,  0.3002,  ...,  0.0850,  0.1621,  0.0422],
        [ 0.2097, -0.2492,  0.0612,  ..., -0.0041,  0.0365, -0.1483],
        [ 0.0971, -0.2221,  0.1652,  ..., -0.1312, -0.2610,  0.0077]],
       requires_grad=True)

### test of valid and test part is ``graph``

- Batch size = 4
- use large lr and scheduler

In [None]:
import csv
import pandas as pd
from sklearn.metrics import classification_report
from torch.optim import AdamW, lr_scheduler

seed = 8787
same_seeds(seed)

model = GAT(in_dim=50, hidden_dim=16, out_dim=168, num_heads=8)
# in_dim means the dimension of the node_feat(50 dim, since the 50-dim embedding)
# out_dim means the # of the categories -> 168 for out tasks
model.load_state_dict(torch.load('model1_initial/initial_weight.pth'))
best_model_path = "../checkpoint_GAT/best_model_GAT_transH_50.pt"

model = model.to(device)

# optimizer = torch.optim.AdamW(model.parameters(), lr=1e-5)
optimizer = AdamW(model.parameters(), lr=5e-4)
# scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=18, num_training_steps=total_steps)

# T_max control the period of the lr changing -> set 1/10 first
scheduler = lr_scheduler.CosineAnnealingLR(optimizer, T_max=36, eta_min=0, last_epoch=- 1, verbose=False)


criterion = nn.CrossEntropyLoss()
total_steps = 100

# save the best model
best_val_loss = float('inf')
patience = 10  # Number of epochs with no improvement after which training will be stopped.
waiting = 0  # The number of epochs with no improvement so far.


# Training Part
for epoch in tqdm(range(total_steps)):
    # Train
    model.train()
    total_loss = 0.0
    total_accuracy = 0.0
    num_batches = 0
    
    for data in tqdm(dataloaders['train'], desc="Training", position=0, leave=True):
        num_batches += 1
        loss, accuracy, _ = model_fn(data, model, criterion, device, num_batches, which_type='train')
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        total_accuracy += accuracy.item()

        
#     scheduler.step()
    add_log_msg(f"total batches: {num_batches}")

    avg_loss = total_loss / num_batches
    avg_accuracy = total_accuracy / num_batches

    add_log_msg(f'Epoch {epoch} | Train Loss: {avg_loss:.4f} | Train Accuracy: {avg_accuracy:.4f}')

    
    # Validation Part
    model.eval()
    total_accuracy = 0.0
    total_loss = 0.0
    num_batches = 0


    with torch.no_grad():
        for data in tqdm(dataloaders['valid'], desc="Validation", position=0, leave=True):
            loss, accuracy, _ = model_fn(data, model, criterion, device, num_batches, which_type='validation')
            total_accuracy += accuracy.item()
            total_loss += loss.item()
            num_batches += 1

    avg_accuracy = total_accuracy / num_batches
    current_loss = total_loss / num_batches
    
    add_log_msg(f'Validation Loss: {current_loss:.4f} | Validation Accuracy: {avg_accuracy:.4f}\n')
    
            
    if current_loss < best_val_loss:
        best_val_loss = current_loss
        waiting = 0
        
        if os.path.exists(best_model_path):
            os.remove(best_model_path)
            add_log_msg("Find a better model!!")

        torch.save(model.state_dict(), best_model_path)

        
#         print(best_model_path)

    else:
        waiting += 1
        if waiting >= patience:
            add_log_msg("============================== Early stopping ==================================")
            break

  0%|          | 0/100 [00:00<?, ?it/s]

Training:   0%|          | 0/8250 [00:00<?, ?it/s]

08/29/2023, 01:08:09# labels of 5000: tensor([ 61, 119,  49, 116, 136, 157, 161,  16,  57,  79,  70, 144, 162,  59,
        165, 101], device='cuda:2') torch.Size([16])
08/29/2023, 01:08:09# predicted of 5000: tensor([ 17, 132, 154, 154,  17, 132, 154,  17, 154,   7, 154, 154, 132, 132,
         70, 132], device='cuda:2') torch.Size([16])
08/29/2023, 01:10:32# total batches: 8250
08/29/2023, 01:10:32# Epoch 0 | Train Loss: 4.5774 | Train Accuracy: 0.0619


Validation:   0%|          | 0/1032 [00:00<?, ?it/s]

08/29/2023, 01:10:32# labels of Validation: tensor([134,  42,  29,  24, 113, 156, 134,  42, 125, 138, 117,  47, 118,  60,
        125,  11], device='cuda:2') torch.Size([16])
08/29/2023, 01:10:32# predicted of Validation: tensor([134,   6, 165,   6,   6, 156, 134,   6,   6,  16,  35,   6, 118,   6,
          6,   6], device='cuda:2') torch.Size([16])
08/29/2023, 01:10:32# labels of 0: tensor([134,  42,  29,  24, 113, 156, 134,  42, 125, 138, 117,  47, 118,  60,
        125,  11], device='cuda:2') torch.Size([16])
08/29/2023, 01:10:32# predicted of 0: tensor([134,   6, 165,   6,   6, 156, 134,   6,   6,  16,  35,   6, 118,   6,
          6,   6], device='cuda:2') torch.Size([16])
08/29/2023, 01:11:10# labels of Validation: tensor([138,  48,  36, 134,  72,  71,  26,  75, 101,  43, 103, 146, 166,  88,
         59, 147], device='cuda:2') torch.Size([16])
08/29/2023, 01:11:10# predicted of Validation: tensor([ 16,   6,   6, 134,  35,   6,   6,   6,   6,   6,   6, 146,   6,   6,
          6,

Training:   0%|          | 0/8250 [00:00<?, ?it/s]

08/29/2023, 01:14:48# labels of 5000: tensor([ 83,  63,  34, 165, 135, 159, 103,  87,  41, 160,  97,  74, 132, 100,
         17, 112], device='cuda:2') torch.Size([16])
08/29/2023, 01:14:48# predicted of 5000: tensor([103, 132,  91, 165, 132, 159, 132, 132, 114, 132, 132, 103, 132, 132,
         17, 132], device='cuda:2') torch.Size([16])
08/29/2023, 01:17:24# total batches: 8250
08/29/2023, 01:17:24# Epoch 1 | Train Loss: 3.7687 | Train Accuracy: 0.2320


Validation:   0%|          | 0/1032 [00:00<?, ?it/s]

08/29/2023, 01:17:24# labels of Validation: tensor([ 84,  63,  62,  75, 105, 120, 151, 154, 149, 109, 144,  83,  58, 158,
         39,  36], device='cuda:2') torch.Size([16])
08/29/2023, 01:17:24# predicted of Validation: tensor([ 84,  77,  62,  77, 105, 120,  77, 154, 149,  77,  77,  77,  77,  77,
         45,  77], device='cuda:2') torch.Size([16])
08/29/2023, 01:17:24# labels of 0: tensor([ 84,  63,  62,  75, 105, 120, 151, 154, 149, 109, 144,  83,  58, 158,
         39,  36], device='cuda:2') torch.Size([16])
08/29/2023, 01:17:24# predicted of 0: tensor([ 84,  77,  62,  77, 105, 120,  77, 154, 149,  77,  77,  77,  77,  77,
         45,  77], device='cuda:2') torch.Size([16])
08/29/2023, 01:18:09# labels of Validation: tensor([ 61,  72,  58, 102,  43, 131,  54,  76, 117,  51,  45,  93,  27, 151,
         13, 118], device='cuda:2') torch.Size([16])
08/29/2023, 01:18:09# predicted of Validation: tensor([ 35,  73,  77,  77,  77,  28,  77,  77, 117,  28,  45,  28,  27,  77,
         13,

Training:   0%|          | 0/8250 [00:00<?, ?it/s]

08/29/2023, 01:22:27# labels of 5000: tensor([ 37, 120,  12,  10,  98, 147, 119, 129,  57,  17,  85, 111, 103, 143,
        124,  53], device='cuda:2') torch.Size([16])
08/29/2023, 01:22:27# predicted of 5000: tensor([132, 120,   8, 112,  44, 147,  24, 132,   8,  17,  85,  24, 132, 112,
        112,   6], device='cuda:2') torch.Size([16])
08/29/2023, 01:25:19# total batches: 8250
08/29/2023, 01:25:19# Epoch 2 | Train Loss: 3.2928 | Train Accuracy: 0.3295


Validation:   0%|          | 0/1032 [00:00<?, ?it/s]

08/29/2023, 01:25:19# labels of Validation: tensor([ 67,  80,  97,  36, 102,   3, 140, 107, 151, 113,  71, 139, 129,  72,
         41,  41], device='cuda:2') torch.Size([16])
08/29/2023, 01:25:19# predicted of Validation: tensor([ 78,  80, 101, 101, 101,   3, 140, 101, 101, 101, 101, 101, 101, 126,
        101, 101], device='cuda:2') torch.Size([16])
08/29/2023, 01:25:19# labels of 0: tensor([ 67,  80,  97,  36, 102,   3, 140, 107, 151, 113,  71, 139, 129,  72,
         41,  41], device='cuda:2') torch.Size([16])
08/29/2023, 01:25:19# predicted of 0: tensor([ 78,  80, 101, 101, 101,   3, 140, 101, 101, 101, 101, 101, 101, 126,
        101, 101], device='cuda:2') torch.Size([16])
08/29/2023, 01:26:00# labels of Validation: tensor([130,  42,  37, 137,  70,  83,  57, 165,  42,  41,  92, 117, 124,  37,
         46,  20], device='cuda:2') torch.Size([16])
08/29/2023, 01:26:00# predicted of Validation: tensor([130, 101, 101, 101,  70, 101, 101, 165, 101, 101, 101, 117, 101, 101,
        101,

Training:   0%|          | 0/8250 [00:00<?, ?it/s]

08/29/2023, 01:30:13# labels of 5000: tensor([ 37,   2,  86, 137,  30,  95,  31, 133, 146, 106, 157, 132, 128, 146,
        164,  93], device='cuda:2') torch.Size([16])
08/29/2023, 01:30:13# predicted of 5000: tensor([ 51,  33, 121, 155,  24,  95,  51, 155, 146, 106,  71, 132, 128, 146,
        155,   6], device='cuda:2') torch.Size([16])
08/29/2023, 01:32:57# total batches: 8250
08/29/2023, 01:32:57# Epoch 3 | Train Loss: 3.0339 | Train Accuracy: 0.3599


Validation:   0%|          | 0/1032 [00:00<?, ?it/s]

08/29/2023, 01:32:57# labels of Validation: tensor([102, 114,   1,  16, 126, 103, 104,  63, 142,  95, 148,  62,  75, 100,
         44,  36], device='cuda:2') torch.Size([16])
08/29/2023, 01:32:57# predicted of Validation: tensor([ 81, 132,  81,  16, 126, 132,  81,  81,  81,  95, 132,  62,  81, 132,
         81,  81], device='cuda:2') torch.Size([16])
08/29/2023, 01:32:57# labels of 0: tensor([102, 114,   1,  16, 126, 103, 104,  63, 142,  95, 148,  62,  75, 100,
         44,  36], device='cuda:2') torch.Size([16])
08/29/2023, 01:32:57# predicted of 0: tensor([ 81, 132,  81,  16, 126, 132,  81,  81,  81,  95, 132,  62,  81, 132,
         81,  81], device='cuda:2') torch.Size([16])
08/29/2023, 01:33:39# labels of Validation: tensor([107, 118,  74,  54,  93, 122, 121,  34, 150, 117, 134,  12,   6,   1,
         73, 112], device='cuda:2') torch.Size([16])
08/29/2023, 01:33:39# predicted of Validation: tensor([132, 118,  81,  81, 132, 122,  81,  81,  81, 117, 134,  81, 132,  81,
         73,

Training:   0%|          | 0/8250 [00:00<?, ?it/s]

08/29/2023, 01:37:35# labels of 5000: tensor([116, 163,  71,  40, 142, 101,  77, 116, 157,  95,  41,  38, 152, 110,
         23, 100], device='cuda:2') torch.Size([16])
08/29/2023, 01:37:35# predicted of 5000: tensor([ 20,  11,  49,  40,  37,  51, 155,  37,  11,  95,  49,  51, 113,  51,
         23, 132], device='cuda:2') torch.Size([16])
08/29/2023, 01:40:08# total batches: 8250
08/29/2023, 01:40:08# Epoch 4 | Train Loss: 2.9101 | Train Accuracy: 0.3694


Validation:   0%|          | 0/1032 [00:00<?, ?it/s]

08/29/2023, 01:40:08# labels of Validation: tensor([  2, 132, 107, 111, 136,  45,   1, 108, 125, 165,  73,  49, 105,  45,
        126, 108], device='cuda:2') torch.Size([16])
08/29/2023, 01:40:08# predicted of Validation: tensor([137, 161, 155, 137, 136,  45, 137, 161, 137, 165,  73, 137, 105,  45,
        126, 161], device='cuda:2') torch.Size([16])
08/29/2023, 01:40:08# labels of 0: tensor([  2, 132, 107, 111, 136,  45,   1, 108, 125, 165,  73,  49, 105,  45,
        126, 108], device='cuda:2') torch.Size([16])
08/29/2023, 01:40:08# predicted of 0: tensor([137, 161, 155, 137, 136,  45, 137, 161, 137, 165,  73, 137, 105,  45,
        126, 161], device='cuda:2') torch.Size([16])
08/29/2023, 01:40:44# labels of Validation: tensor([160,   0, 115,  73, 113,  20,  30, 140,   2,  98, 161, 158,  21, 141,
        127, 118], device='cuda:2') torch.Size([16])
08/29/2023, 01:40:44# predicted of Validation: tensor([155,   0, 115,  73, 155, 137, 137, 140, 137, 137, 161, 137, 161, 155,
        127,

Training:   0%|          | 0/8250 [00:00<?, ?it/s]

08/29/2023, 01:44:17# labels of 5000: tensor([ 43, 158,  74,   4,  66, 115,  81,  63,  16,  48,  24,  78,  54,  85,
        104, 126], device='cuda:2') torch.Size([16])
08/29/2023, 01:44:17# predicted of 5000: tensor([ 21,  31,  20, 110, 132, 115,  20,  21,  16,  21,  31,  67,  21,  85,
         98, 126], device='cuda:2') torch.Size([16])
08/29/2023, 01:46:42# total batches: 8250
08/29/2023, 01:46:42# Epoch 5 | Train Loss: 2.8525 | Train Accuracy: 0.3721


Validation:   0%|          | 0/1032 [00:00<?, ?it/s]

08/29/2023, 01:46:43# labels of Validation: tensor([ 52, 159, 137,  77,  11,  84,  81, 110,   8,  74,  86, 115, 127, 112,
         25,   1], device='cuda:2') torch.Size([16])
08/29/2023, 01:46:43# predicted of Validation: tensor([ 52, 159,  48,  28,  48,  84,  48,  28,  28,  48,  48, 115, 127,  48,
         25,  48], device='cuda:2') torch.Size([16])
08/29/2023, 01:46:43# labels of 0: tensor([ 52, 159, 137,  77,  11,  84,  81, 110,   8,  74,  86, 115, 127, 112,
         25,   1], device='cuda:2') torch.Size([16])
08/29/2023, 01:46:43# predicted of 0: tensor([ 52, 159,  48,  28,  48,  84,  48,  28,  28,  48,  48, 115, 127,  48,
         25,  48], device='cuda:2') torch.Size([16])
08/29/2023, 01:47:15# labels of Validation: tensor([161, 138, 106, 119, 137,   8,  49, 150, 102,  49, 124,  41,  73,  84,
         42,  96], device='cuda:2') torch.Size([16])
08/29/2023, 01:47:15# predicted of Validation: tensor([ 28, 138, 106,  48,  48,  28,  48,  48,  48,  48,  48,  48,  73,  84,
         48,

Training:   0%|          | 0/8250 [00:00<?, ?it/s]

08/29/2023, 01:50:48# labels of 5000: tensor([103, 129, 128, 165, 109, 166,  75,   6,  84,  17, 154, 115,  84, 122,
        151,  58], device='cuda:2') torch.Size([16])
08/29/2023, 01:50:48# predicted of 5000: tensor([  6,   6, 128, 165,  37,  37, 148,   6,  84,  17, 154, 115,  84, 122,
         37,  34], device='cuda:2') torch.Size([16])
08/29/2023, 01:53:10# total batches: 8250
08/29/2023, 01:53:10# Epoch 6 | Train Loss: 2.8233 | Train Accuracy: 0.3751


Validation:   0%|          | 0/1032 [00:00<?, ?it/s]

08/29/2023, 01:53:10# labels of Validation: tensor([156, 111,  35,  16,  68,  85,  51,   2,   5,  61, 102,  27,  13,  96,
         61,  74], device='cuda:2') torch.Size([16])
08/29/2023, 01:53:10# predicted of Validation: tensor([156,  14,  35,  16, 108,  85, 132,  14,   5,  61,  14,  27,  13,  96,
         61,  14], device='cuda:2') torch.Size([16])
08/29/2023, 01:53:10# labels of 0: tensor([156, 111,  35,  16,  68,  85,  51,   2,   5,  61, 102,  27,  13,  96,
         61,  74], device='cuda:2') torch.Size([16])
08/29/2023, 01:53:10# predicted of 0: tensor([156,  14,  35,  16, 108,  85, 132,  14,   5,  61,  14,  27,  13,  96,
         61,  14], device='cuda:2') torch.Size([16])
08/29/2023, 01:53:46# labels of Validation: tensor([ 41,  61,  11,  76,  43, 113,   5, 100,  83, 150,  84,  53,  16, 102,
         59,  92], device='cuda:2') torch.Size([16])
08/29/2023, 01:53:46# predicted of Validation: tensor([ 14,  61,  14,  14,  14, 108,   5, 108,  14,  14,  84,  14,  16,  14,
        132,

Training:   0%|          | 0/8250 [00:00<?, ?it/s]

08/29/2023, 01:57:17# labels of 5000: tensor([ 11,  26,  33,  57, 156, 113,  47,  78, 165,  89, 142,  21,  26,  77,
         86,  41], device='cuda:2') torch.Size([16])
08/29/2023, 01:57:17# predicted of 5000: tensor([112,  54,  30,  54, 156, 160,  54,  67, 165,  89, 137, 161,  54, 132,
        164, 160], device='cuda:2') torch.Size([16])
08/29/2023, 01:59:39# total batches: 8250
08/29/2023, 01:59:39# Epoch 7 | Train Loss: 2.8044 | Train Accuracy: 0.3772


Validation:   0%|          | 0/1032 [00:00<?, ?it/s]

08/29/2023, 01:59:39# labels of Validation: tensor([ 84, 122, 166, 109,  31,  87, 135, 162, 143,   5, 119, 163,  77,  14,
         89,  46], device='cuda:2') torch.Size([16])
08/29/2023, 01:59:39# predicted of Validation: tensor([ 84, 122, 161,  83,  83,  83,  83,  83,  83,   5,  83,  83, 161,  83,
         89,  83], device='cuda:2') torch.Size([16])
08/29/2023, 01:59:39# labels of 0: tensor([ 84, 122, 166, 109,  31,  87, 135, 162, 143,   5, 119, 163,  77,  14,
         89,  46], device='cuda:2') torch.Size([16])
08/29/2023, 01:59:39# predicted of 0: tensor([ 84, 122, 161,  83,  83,  83,  83,  83,  83,   5,  83,  83, 161,  83,
         89,  83], device='cuda:2') torch.Size([16])
08/29/2023, 02:00:15# labels of Validation: tensor([ 81, 159,  35,  77,  53, 102,  16, 110,  89, 143, 163, 166, 121,  87,
        125, 160], device='cuda:2') torch.Size([16])
08/29/2023, 02:00:15# predicted of Validation: tensor([ 83, 159,  35, 161,  83,  83,  16, 131,  89,  83,  83, 161,  83,  83,
         83,

Training:   0%|          | 0/8250 [00:00<?, ?it/s]

08/29/2023, 02:03:53# labels of 5000: tensor([ 75, 120,  25, 121,  99,  75, 131, 121,  29,  16,  53, 130,  53,  62,
         32,  27], device='cuda:2') torch.Size([16])
08/29/2023, 02:03:53# predicted of 5000: tensor([107, 120,  25, 125,  99,   1, 108, 116,  29,  16, 107, 130, 103,  62,
         32,  27], device='cuda:2') torch.Size([16])
08/29/2023, 02:06:16# total batches: 8250
08/29/2023, 02:06:16# Epoch 8 | Train Loss: 2.7912 | Train Accuracy: 0.3782


Validation:   0%|          | 0/1032 [00:00<?, ?it/s]

08/29/2023, 02:06:16# labels of Validation: tensor([  8, 119,  29, 106,   4,  92,  96,  91, 100, 147,  47, 166,  47, 126,
          7,  67], device='cuda:2') torch.Size([16])
08/29/2023, 02:06:16# predicted of Validation: tensor([  6,  49,  29, 106,  49,  49,  96, 132,   6, 147,  49, 132,  49, 126,
          7,  67], device='cuda:2') torch.Size([16])
08/29/2023, 02:06:16# labels of 0: tensor([  8, 119,  29, 106,   4,  92,  96,  91, 100, 147,  47, 166,  47, 126,
          7,  67], device='cuda:2') torch.Size([16])
08/29/2023, 02:06:16# predicted of 0: tensor([  6,  49,  29, 106,  49,  49,  96, 132,   6, 147,  49, 132,  49, 126,
          7,  67], device='cuda:2') torch.Size([16])
08/29/2023, 02:06:52# labels of Validation: tensor([129,  12,  32,  96,  75,  32, 129, 107,  79, 129,  21,  38,  47,  82,
         12, 134], device='cuda:2') torch.Size([16])
08/29/2023, 02:06:52# predicted of Validation: tensor([  6,  49,  32,  96,  49,  32,   6,   6,  79,   6, 132,  49,  49,  82,
         49,

Training:   0%|          | 0/8250 [00:00<?, ?it/s]

08/29/2023, 02:10:20# labels of 5000: tensor([ 26, 166, 153,  48,  14,  26,  15,  57,  52,  69,  12, 137,  75,  74,
        111, 153], device='cuda:2') torch.Size([16])
08/29/2023, 02:10:20# predicted of 5000: tensor([112, 132, 150, 112,  11,  88,  15,  33,  52,  69,  10,  10, 125,  54,
        143, 112], device='cuda:2') torch.Size([16])
08/29/2023, 02:12:31# total batches: 8250
08/29/2023, 02:12:31# Epoch 9 | Train Loss: 2.7805 | Train Accuracy: 0.3794


Validation:   0%|          | 0/1032 [00:00<?, ?it/s]

08/29/2023, 02:12:31# labels of Validation: tensor([ 12,  46, 153, 154, 112,  66,  20, 144, 125,  58, 102, 132,  17, 102,
        131, 164], device='cuda:2') torch.Size([16])
08/29/2023, 02:12:31# predicted of Validation: tensor([  2,   2,   2, 154,   2,  28,   2,   2,   2,   2,   2, 132,  17,   2,
         28,   2], device='cuda:2') torch.Size([16])
08/29/2023, 02:12:31# labels of 0: tensor([ 12,  46, 153, 154, 112,  66,  20, 144, 125,  58, 102, 132,  17, 102,
        131, 164], device='cuda:2') torch.Size([16])
08/29/2023, 02:12:31# predicted of 0: tensor([  2,   2,   2, 154,   2,  28,   2,   2,   2,   2,   2, 132,  17,   2,
         28,   2], device='cuda:2') torch.Size([16])
08/29/2023, 02:12:59# labels of Validation: tensor([  9,   3,  89,  29, 108,  70, 103, 100, 108,  32,  61,  15,  34, 110,
        126, 102], device='cuda:2') torch.Size([16])
08/29/2023, 02:12:59# predicted of Validation: tensor([  2,   3,  89,  29,  28,  70,  28,  28,  28,  32,  61,  15,   2,  28,
        126,

Training:   0%|          | 0/8250 [00:00<?, ?it/s]

08/29/2023, 02:16:15# labels of 5000: tensor([156,  68,  61,  75, 105,  56, 126,  66,  20,  87,   5,   0,  21,  35,
         43, 163], device='cuda:2') torch.Size([16])
08/29/2023, 02:16:15# predicted of 5000: tensor([156,   6,  61,  41, 105,  56, 126,   6,  71,  20,   5,   0,   6,  35,
         41, 107], device='cuda:2') torch.Size([16])
08/29/2023, 02:18:26# total batches: 8250
08/29/2023, 02:18:26# Epoch 10 | Train Loss: 2.7718 | Train Accuracy: 0.3792


Validation:   0%|          | 0/1032 [00:00<?, ?it/s]

08/29/2023, 02:18:26# labels of Validation: tensor([ 76,  64, 106,  82, 118, 154, 165,  11, 161, 126,   6, 161, 117,  40,
          5, 153], device='cuda:2') torch.Size([16])
08/29/2023, 02:18:26# predicted of Validation: tensor([ 74, 108, 106,  82, 118, 154, 165,  74, 132, 126, 132, 132, 117,  40,
          5,  74], device='cuda:2') torch.Size([16])
08/29/2023, 02:18:26# labels of 0: tensor([ 76,  64, 106,  82, 118, 154, 165,  11, 161, 126,   6, 161, 117,  40,
          5, 153], device='cuda:2') torch.Size([16])
08/29/2023, 02:18:26# predicted of 0: tensor([ 74, 108, 106,  82, 118, 154, 165,  74, 132, 126, 132, 132, 117,  40,
          5,  74], device='cuda:2') torch.Size([16])
08/29/2023, 02:18:53# labels of Validation: tensor([ 13, 157,  11,  79,  21, 166, 130,   0, 128,  25,  60, 158,  24,  16,
         33,  70], device='cuda:2') torch.Size([16])
08/29/2023, 02:18:53# predicted of Validation: tensor([ 13,  74,  74,  79, 108, 108, 130,   0, 128,  25,  74,  74,  74,  16,
         74,

Training:   0%|          | 0/8250 [00:00<?, ?it/s]

08/29/2023, 02:22:21# labels of 5000: tensor([130, 139,  25,  74, 125,  46, 103,  68, 112, 165, 114,  69,  20, 106,
        118,  38], device='cuda:2') torch.Size([16])
08/29/2023, 02:22:21# predicted of 5000: tensor([130,  10,  25,  53,  10, 157,  66, 161,  20, 165,   6,  69,  47, 106,
        118,  10], device='cuda:2') torch.Size([16])
08/29/2023, 02:24:34# total batches: 8250
08/29/2023, 02:24:34# Epoch 11 | Train Loss: 2.7644 | Train Accuracy: 0.3808


Validation:   0%|          | 0/1032 [00:00<?, ?it/s]

08/29/2023, 02:24:34# labels of Validation: tensor([148, 154,   6,  96, 157,  69,  23,  88,  72, 139,  85,  38, 102,   2,
         12,  77], device='cuda:2') torch.Size([16])
08/29/2023, 02:24:34# predicted of Validation: tensor([108, 154, 161,  96, 109,  69,  23, 108,  72, 109,  85, 109, 109, 109,
        109, 108], device='cuda:2') torch.Size([16])
08/29/2023, 02:24:34# labels of 0: tensor([148, 154,   6,  96, 157,  69,  23,  88,  72, 139,  85,  38, 102,   2,
         12,  77], device='cuda:2') torch.Size([16])
08/29/2023, 02:24:34# predicted of 0: tensor([108, 154, 161,  96, 109,  69,  23, 108,  72, 109,  85, 109, 109, 109,
        109, 108], device='cuda:2') torch.Size([16])
08/29/2023, 02:25:02# labels of Validation: tensor([108,   8, 148,  68, 146, 127,   0, 148, 123, 143, 150,  64, 131,  16,
         63,  69], device='cuda:2') torch.Size([16])
08/29/2023, 02:25:02# predicted of Validation: tensor([  6, 108, 108, 108, 146, 127,   0, 108, 123, 109, 109, 108, 161,  16,
        109,

Training:   0%|          | 0/8250 [00:00<?, ?it/s]

08/29/2023, 02:28:33# labels of 5000: tensor([160,  10, 134, 124, 131, 130, 132, 133, 146, 151, 162,  30, 151, 105,
        166,  72], device='cuda:2') torch.Size([16])
08/29/2023, 02:28:33# predicted of 5000: tensor([129,  19, 134,  19,   6, 130, 161,  77, 146,  19,  19,  19,  19, 105,
        161,  72], device='cuda:2') torch.Size([16])
08/29/2023, 02:30:43# total batches: 8250
08/29/2023, 02:30:43# Epoch 12 | Train Loss: 2.7576 | Train Accuracy: 0.3821


Validation:   0%|          | 0/1032 [00:00<?, ?it/s]

08/29/2023, 02:30:43# labels of Validation: tensor([ 46, 127,  34, 129,  79,  64, 158,  42,  57,  10, 119,  75,   6, 115,
         47, 101], device='cuda:2') torch.Size([16])
08/29/2023, 02:30:43# predicted of Validation: tensor([ 55, 127,  55,  77,  79,  77,  55,  55,  55,  55,  55,  55, 132, 115,
         55, 132], device='cuda:2') torch.Size([16])
08/29/2023, 02:30:43# labels of 0: tensor([ 46, 127,  34, 129,  79,  64, 158,  42,  57,  10, 119,  75,   6, 115,
         47, 101], device='cuda:2') torch.Size([16])
08/29/2023, 02:30:43# predicted of 0: tensor([ 55, 127,  55,  77,  79,  77,  55,  55,  55,  55,  55,  55, 132, 115,
         55, 132], device='cuda:2') torch.Size([16])
08/29/2023, 02:31:10# labels of Validation: tensor([142,  80, 155,  36, 164,  48,  38, 121,  25,  28,  41, 126,  12,  93,
         88, 142], device='cuda:2') torch.Size([16])
08/29/2023, 02:31:10# predicted of Validation: tensor([ 55,  80, 132,  55,  55,  55,  55,  55,  25, 132,  55, 126,  55,  77,
         77,

Training:   0%|          | 0/8250 [00:00<?, ?it/s]

08/29/2023, 02:34:37# labels of 5000: tensor([150,  50,  94,   3, 147, 133, 123, 159,   2,  50,   4, 121,  86,  30,
         58,  36], device='cuda:2') torch.Size([16])
08/29/2023, 02:34:37# predicted of 5000: tensor([163,  50,  94,   3, 147,  51, 123, 159,  33,  50,  33,  54,  33,  33,
         42, 163], device='cuda:2') torch.Size([16])
08/29/2023, 02:36:43# total batches: 8250
08/29/2023, 02:36:43# Epoch 13 | Train Loss: 2.7519 | Train Accuracy: 0.3822


Validation:   0%|          | 0/1032 [00:00<?, ?it/s]

08/29/2023, 02:36:43# labels of Validation: tensor([ 72,  91, 124,   9,  69,  93,  56,   3, 148, 130,  73, 124,  98, 120,
        155, 113], device='cuda:2') torch.Size([16])
08/29/2023, 02:36:43# predicted of Validation: tensor([ 72,  28,  87,  87,  69,  77,  56,   3,  77, 130,  73,  87,  87, 120,
         28,  77], device='cuda:2') torch.Size([16])
08/29/2023, 02:36:43# labels of 0: tensor([ 72,  91, 124,   9,  69,  93,  56,   3, 148, 130,  73, 124,  98, 120,
        155, 113], device='cuda:2') torch.Size([16])
08/29/2023, 02:36:43# predicted of 0: tensor([ 72,  28,  87,  87,  69,  77,  56,   3,  77, 130,  73,  87,  87, 120,
         28,  77], device='cuda:2') torch.Size([16])
08/29/2023, 02:37:07# labels of Validation: tensor([139,  92,  14,  38,  48,  48,  30,  72, 104,  72, 164,  41,  51,  44,
         52,  94], device='cuda:2') torch.Size([16])
08/29/2023, 02:37:07# predicted of Validation: tensor([ 87,  87,  87,  87,  87,  87,  87,  72,  87,  72,  87,  87, 161,  87,
         52,

Training:   0%|          | 0/8250 [00:00<?, ?it/s]

08/29/2023, 02:40:25# labels of 5000: tensor([137,  92,  30,  37,  39,  90,  51, 166,  17, 151,  88, 150,  74, 106,
         67, 108], device='cuda:2') torch.Size([16])
08/29/2023, 02:40:25# predicted of 5000: tensor([  2,  18,  98, 108,  39, 131, 132, 132,  17,  53, 132, 113,  31, 106,
         78, 108], device='cuda:2') torch.Size([16])
08/29/2023, 02:42:34# total batches: 8250
08/29/2023, 02:42:34# Epoch 14 | Train Loss: 2.7459 | Train Accuracy: 0.3831


Validation:   0%|          | 0/1032 [00:00<?, ?it/s]

08/29/2023, 02:42:34# labels of Validation: tensor([ 56,  70,  15,  31,  29, 163,  61,  75,  14, 117, 152,   3,  63,   1,
         59,  75], device='cuda:2') torch.Size([16])
08/29/2023, 02:42:34# predicted of Validation: tensor([ 56,  70,  15,  43,  29,  43,  61,  43,  43, 117,  43,   3,  43,  43,
        132,  43], device='cuda:2') torch.Size([16])
08/29/2023, 02:42:34# labels of 0: tensor([ 56,  70,  15,  31,  29, 163,  61,  75,  14, 117, 152,   3,  63,   1,
         59,  75], device='cuda:2') torch.Size([16])
08/29/2023, 02:42:34# predicted of 0: tensor([ 56,  70,  15,  43,  29,  43,  61,  43,  43, 117,  43,   3,  43,  43,
        132,  43], device='cuda:2') torch.Size([16])
08/29/2023, 02:42:58# labels of Validation: tensor([ 89, 128, 114,  68, 152,   0,  82, 139, 164, 158, 129, 132, 101,  90,
         73, 128], device='cuda:2') torch.Size([16])
08/29/2023, 02:42:58# predicted of Validation: tensor([ 89, 128, 101, 101,  43,   0,  82,  43,  43,  43, 101, 132, 132, 101,
         73,

Training:   0%|          | 0/8250 [00:00<?, ?it/s]

### Testing Part

In [None]:
# load the pretrained model
pretrained_model_path = '../checkpoint_GAT/best_model_GAT_transH_50.pt'
model.load_state_dict(torch.load(pretrained_model_path))

model.to(device)
model.eval()

total = 0
correct = 0
count = 0

true_labels = []
predicted_labels = []

with torch.no_grad():
    for data in tqdm(dataloaders['test'], desc="Testing", position=0, leave=True):

        loss, accuracy, predicted = model_fn(data, model, criterion, device, count, which_type='test')
        labels = data[1].to(device)
        
        true_labels.extend(labels.cpu().numpy())
        predicted_labels.extend(predicted.cpu().numpy())
        
        if count % 5000 == 0:
            add_log_msg(f"labels: {labels} {labels.shape}")
            add_log_msg(f"predicted: {predicted} {predicted.shape}")
            
        count += 1
        
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

add_log_msg(f'Test Accuracy: {100 * correct / total} %\n\n\n')


# ======================================== handlig the output excel files ========================================
mapping_file = './new_mapping.txt'
label_mapping = {}
with open(mapping_file, 'r') as f:
    for line in f:
        parts = line.strip().split(': ')
        label_mapping[int(parts[1])] = parts[0]
        
# 将映射后的标签应用到true和predicted标签列表
mapped_true_labels = [label_mapping[label] for label in true_labels]
mapped_predicted_labels = [label_mapping[label] for label in predicted_labels]

# 生成Scikit-learn报告信息的DataFrame
report_data = classification_report(mapped_true_labels, mapped_predicted_labels, output_dict=True)
report_df = pd.DataFrame(report_data).transpose()

report_folder = 'classification_report'
os.makedirs(report_folder, exist_ok=True)

count = 0
while True:
    report_filename = f'classification_report-{count}.xlsx'
    labels_filename = f'mapped_true_predicted_labels-{count}.xlsx'
    
    report_path = os.path.join(report_folder, report_filename)
    labels_path = os.path.join(report_folder, labels_filename)
    
    if not os.path.exists(report_path) and not os.path.exists(labels_path):
        break
    count += 1

    
report_df.to_excel(report_path, index_label='Label')

mapped_labels_df = pd.DataFrame({'true_label': mapped_true_labels, 'predicted_label': mapped_predicted_labels})
mapped_labels_df.to_excel(labels_path, index=False)

add_log_msg(f"report path: {report_path}")
add_log_msg(f"label path: {labels_path}")

mapped_report = classification_report(mapped_true_labels, mapped_predicted_labels)
add_log_msg(f"mapped_report:\n{mapped_report}")