# Test of GAT
- use DGL
- predict `graphs`
- test: 0~99
- validation: 100~199
- train: 200~999
- larger lr with scheduler
- try the sklearn report

In [1]:
import os
import dgl
import json
import torch
import torch as th
# from tqdm import tqdm
from tqdm.notebook import tqdm  # 使用 notebook 版本的 tqdm
import torch.nn as nn
from dgl.nn import GraphConv, GATConv
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from transformers import get_linear_schedule_with_warmup
from torch.optim import AdamW
from sklearn.metrics import classification_report


- check the GPU and assign the GPU by the best memory usage

In [2]:
import subprocess
import torch

def get_free_gpu():
    try:
        # Run nvidia-smi command to get GPU details
        _output_to_list = lambda x: x.decode('ascii').split('\n')[:-1]
        command = "nvidia-smi --query-gpu=memory.free --format=csv,nounits,noheader"
        memory_free_info = _output_to_list(subprocess.check_output(command.split())) 
        memory_free_values = [int(x) for i, x in enumerate(memory_free_info)]
        
        # Get the GPU with the maximum free memory
        best_gpu_id = memory_free_values.index(max(memory_free_values))
        return best_gpu_id
    except:
        # If any exception occurs, default to GPU 0 (this handles cases where nvidia-smi isn't installed)
        return 0

if torch.cuda.is_available():
    # Get the best GPU ID based on free memory and set it
    best_gpu_id = get_free_gpu()
    device = torch.device(f"cuda:{best_gpu_id}")
else:
    device = torch.device("cpu")
    print("there's no available GPU")

# device = torch.device(f"cuda:1")
print(device)


cuda:2


## Fix the seed

In [3]:
import numpy as np
import torch
import random

#fix seed
def same_seeds(seed = 8787):
    torch.manual_seed(seed)
    # random.seed(seed) 
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)  
    np.random.seed(seed)  
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True

## Data Loader

In [4]:
class GraphDataset(Dataset):
    def __init__(self, data_list, device):
        self.data_list = data_list
        self.device = device

    def __len__(self):
        return len(self.data_list)
    
    def __getitem__(self, idx):
        data = self.data_list[idx]

        g = dgl.graph((th.tensor(data["edge_index"][0]), th.tensor(data["edge_index"][1])), num_nodes=data["num_nodes"]).to(self.device)

        g.ndata['feat'] = th.tensor(data["node_feat"]).to(self.device)
        g.edata['feat'] = th.tensor(data["edge_attr"]).to(self.device)  # Add edge features to graph

        return g, th.tensor(data["label"]).to(self.device)


def collate(samples):
    # The input `samples` is a list of pairs
    #  (graph, label).
    graphs, labels = map(list, zip(*samples))
    batched_graph = dgl.batch(graphs)
    return batched_graph, torch.tensor(labels)


In [5]:
datasets = ['train', 'valid', 'test']
# datasets = ['test']
dataset_data = {}

for dataset_name in tqdm(datasets):
#     file_path = f"../../data_processing/dgl/data/test_graph/repeated_{dataset_name}.jsonl"
    file_path = f"../../data_processing/dgl/data_new/training_data/transH_150/{dataset_name}.jsonl"
    
    print(file_path)
    with open(file_path) as f:
        data_list = [json.loads(line) for line in tqdm(f, position=0, leave=True)]
    
    dataset_data[dataset_name] = GraphDataset(data_list, device)

print("Datasets loaded!")

  0%|          | 0/3 [00:00<?, ?it/s]

../../data_processing/dgl/data_new/training_data/transH_150/train.jsonl


0it [00:00, ?it/s]

../../data_processing/dgl/data_new/training_data/transH_150/valid.jsonl


0it [00:00, ?it/s]

../../data_processing/dgl/data_new/training_data/transH_150/test.jsonl


0it [00:00, ?it/s]

Datasets loaded!


- choose batch size

In [6]:
def create_dataloaders(batch_size, shuffle=True):
    dataloaders = {}
    for dataset_name, dataset in dataset_data.items():
        # do not shuffle the testing dataset
        if dataset_name == "test":
            dataloaders[dataset_name] = DataLoader(dataset, batch_size=batch_size, shuffle=False, collate_fn=collate)    
        else:
            dataloaders[dataset_name] = DataLoader(dataset, batch_size=batch_size, shuffle=shuffle, collate_fn=collate)
    return dataloaders

# dataloaders = create_dataloaders(4)
dataloaders = create_dataloaders(16)


if (len(dataloaders['test'].dataset) + len(dataloaders['valid'].dataset) + len(dataloaders['train'].dataset)) % 165 != 0: print("Error data!!")
else: print("OK!")


OK!


- Turn the print message to a log file

In [7]:
# print(dataloaders['test'][5])
sample = dataset_data['train'][5000]
print(sample)

print(len(dataloaders['test'].dataset))
print(len(dataloaders['valid'].dataset))
print(len(dataloaders['train'].dataset))
print(len(dataloaders['test'].dataset) + len(dataloaders['valid'].dataset) + len(dataloaders['train'].dataset))

(Graph(num_nodes=26, num_edges=67,
      ndata_schemes={'feat': Scheme(shape=(150,), dtype=torch.float32)}
      edata_schemes={'feat': Scheme(shape=(150,), dtype=torch.float32)}), tensor(7, device='cuda:2'))
16500
16500
132000
165000


In [9]:
import datetime

now = datetime.datetime.now()

formatted_time = now.strftime("%m%d_%H:%M")

log_file_path = f"../log_message/{formatted_time}_GAT_transH_150.log"

def add_log_msg(msg, log_file_path=log_file_path):
    with open(log_file_path, 'a') as f:
        f.write(f'{datetime.datetime.now().strftime("%m/%d/%Y, %H:%M:%S")}# {msg}\n')
    print(f'{datetime.datetime.now().strftime("%m/%d/%Y, %H:%M:%S")}# {msg}')

print(log_file_path)

../log_message/0829_10:29_GAT_transH_150.log


### Model

In [10]:
class GAT(nn.Module):
    def __init__(self, in_dim, hidden_dim, out_dim, num_heads, dropout_prob=0.25):
        super(GAT, self).__init__()
        
        # do not check the zero in_degree since we have all the complete graph
        self.layer1 = GATConv(in_dim, hidden_dim, num_heads=num_heads, activation=F.relu, allow_zero_in_degree=True)
        self.layer2 = GATConv(hidden_dim * num_heads, out_dim, num_heads=num_heads, allow_zero_in_degree=True)
        
        # Adding Batch Normalization after each GAT layer
        self.batchnorm1 = nn.BatchNorm1d(hidden_dim * num_heads)
        self.batchnorm2 = nn.BatchNorm1d(out_dim)
        
        # Adding Dropout for regularization
        self.dropout = nn.Dropout(dropout_prob)

    def forward(self, g, h):
        # Apply GAT layers
        h = self.layer1(g, h)
        h = h.view(h.shape[0], -1)
        h = F.relu(h)
        h = self.dropout(h)
        h = self.layer2(g, h).squeeze(1)
        
        # Store the output as a new node feature
        g.ndata['h_out'] = h

        # Use mean pooling to aggregate this new node feature
        h_agg = dgl.mean_nodes(g, feat='h_out')
        return h_agg

    

- Model Forward  

In [11]:
def model_fn(data, model, criterion, device, count=1, which_type='train'):
    """Forward a batch through the model."""
    batched_g, labels = data
    batched_g = batched_g.to(device)
    
    labels = labels.to(device)
    logits = model(batched_g, batched_g.ndata['feat'].float()) # for GAT
    logits = logits.mean(dim=1)
    
    loss = criterion(logits, labels)

    # Get the class id with the highest probability
    preds = logits.argmax(1)
    
    # Compute accuracy
    accuracy = torch.mean((preds == labels).float())
    
    if which_type == 'validation' and count % 1000 == 0:
        add_log_msg(f"labels of Validation: {labels} {labels.shape}")
        add_log_msg(f"predicted of Validation: {preds} {preds.shape}")
        
    elif which_type == 'test'  and count % 1000 == 0:
        add_log_msg(f"labels of Test: {labels} {labels.shape}")
        add_log_msg(f"predicted of Test: {preds} {preds.shape}")
        
    if count % 5000 == 0: 
        add_log_msg(f"labels of {count}: {labels} {labels.shape}")
        add_log_msg(f"predicted of {count}: {preds} {preds.shape}")
        
    return loss, accuracy, preds

### Training

- Fix the seed and save the model.state_dict that contains the initial weight

In [12]:
seed = 8787
same_seeds(seed)

model = GAT(in_dim=150, hidden_dim=16, out_dim=168, num_heads=8)
torch.save(model.state_dict(), 'model1_initial/initial_weight.pth')

In [13]:
model.layer1.fc.weight

Parameter containing:
tensor([[-0.1088, -0.0760,  0.1288,  ..., -0.1192, -0.0175,  0.0021],
        [ 0.0325,  0.0195, -0.1396,  ...,  0.0864, -0.0855,  0.1670],
        [-0.0041, -0.0149, -0.0657,  ..., -0.0180, -0.0733,  0.1429],
        ...,
        [ 0.2402, -0.1547, -0.0973,  ...,  0.0845, -0.2335, -0.0765],
        [ 0.1358,  0.1819, -0.1881,  ..., -0.0790, -0.0578, -0.2301],
        [ 0.0297, -0.0572, -0.1424,  ...,  0.2224, -0.1195,  0.0005]],
       requires_grad=True)

- Check if model really load the model_dict

In [14]:
model = GAT(in_dim=150, hidden_dim=16, out_dim=168, num_heads=8)
model.load_state_dict(torch.load('model1_initial/initial_weight.pth'))
model.layer1.fc.weight

Parameter containing:
tensor([[-0.1088, -0.0760,  0.1288,  ..., -0.1192, -0.0175,  0.0021],
        [ 0.0325,  0.0195, -0.1396,  ...,  0.0864, -0.0855,  0.1670],
        [-0.0041, -0.0149, -0.0657,  ..., -0.0180, -0.0733,  0.1429],
        ...,
        [ 0.2402, -0.1547, -0.0973,  ...,  0.0845, -0.2335, -0.0765],
        [ 0.1358,  0.1819, -0.1881,  ..., -0.0790, -0.0578, -0.2301],
        [ 0.0297, -0.0572, -0.1424,  ...,  0.2224, -0.1195,  0.0005]],
       requires_grad=True)

### test of valid and test part is ``graph``

- Batch size = 4
- use large lr and scheduler

In [None]:
import csv
import pandas as pd
from sklearn.metrics import classification_report
from torch.optim import AdamW, lr_scheduler

seed = 8787
same_seeds(seed)

model = GAT(in_dim=150, hidden_dim=16, out_dim=168, num_heads=8)
# in_dim means the dimension of the node_feat(50 dim, since the 50-dim embedding)
# out_dim means the # of the categories -> 168 for out tasks
model.load_state_dict(torch.load('model1_initial/initial_weight.pth'))
best_model_path = "../checkpoint_GAT/best_model_GAT_transH_150.pt"

model = model.to(device)

# optimizer = torch.optim.AdamW(model.parameters(), lr=1e-5)
optimizer = AdamW(model.parameters(), lr=5e-4)
# scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=18, num_training_steps=total_steps)

# T_max control the period of the lr changing -> set 1/10 first
scheduler = lr_scheduler.CosineAnnealingLR(optimizer, T_max=36, eta_min=0, last_epoch=- 1, verbose=False)


criterion = nn.CrossEntropyLoss()
total_steps = 30

# save the best model
best_val_loss = float('inf')
patience = 10  # Number of epochs with no improvement after which training will be stopped.
waiting = 0  # The number of epochs with no improvement so far.


# Training Part
for epoch in tqdm(range(total_steps)):
    # Train
    model.train()
    total_loss = 0.0
    total_accuracy = 0.0
    num_batches = 0
    
    for data in tqdm(dataloaders['train'], desc="Training", position=0, leave=True):
        num_batches += 1
        loss, accuracy, _ = model_fn(data, model, criterion, device, num_batches, which_type='train')
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        total_accuracy += accuracy.item()

        
#     scheduler.step()
    add_log_msg(f"total batches: {num_batches}")

    avg_loss = total_loss / num_batches
    avg_accuracy = total_accuracy / num_batches

    add_log_msg(f'Epoch {epoch} | Train Loss: {avg_loss:.4f} | Train Accuracy: {avg_accuracy:.4f}')

    
    # Validation Part
    model.eval()
    total_accuracy = 0.0
    total_loss = 0.0
    num_batches = 0


    with torch.no_grad():
        for data in tqdm(dataloaders['valid'], desc="Validation", position=0, leave=True):
            loss, accuracy, _ = model_fn(data, model, criterion, device, num_batches, which_type='validation')
            total_accuracy += accuracy.item()
            total_loss += loss.item()
            num_batches += 1

    avg_accuracy = total_accuracy / num_batches
    current_loss = total_loss / num_batches
    
    add_log_msg(f'Validation Loss: {current_loss:.4f} | Validation Accuracy: {avg_accuracy:.4f}\n')
    
            
    if current_loss < best_val_loss:
        best_val_loss = current_loss
        waiting = 0
        
        if os.path.exists(best_model_path):
            os.remove(best_model_path)
            add_log_msg("Find a better model!!")

        torch.save(model.state_dict(), best_model_path)

        
#         print(best_model_path)

    else:
        waiting += 1
        if waiting >= patience:
            add_log_msg("============================== Early stopping ==================================")
            break

  0%|          | 0/30 [00:00<?, ?it/s]

Training:   0%|          | 0/8250 [00:00<?, ?it/s]

08/29/2023, 10:34:13# labels of 5000: tensor([ 45, 142, 133, 139,  37, 134, 113,  50,  81,  57,  70,  87, 140,  85,
        150, 156], device='cuda:2') torch.Size([16])
08/29/2023, 10:34:13# predicted of 5000: tensor([146, 132, 132,  21, 132, 134, 132, 136, 132, 132, 154, 132, 154, 136,
        132, 156], device='cuda:2') torch.Size([16])
08/29/2023, 10:37:29# total batches: 8250
08/29/2023, 10:37:29# Epoch 0 | Train Loss: 4.6445 | Train Accuracy: 0.0469


Validation:   0%|          | 0/1032 [00:00<?, ?it/s]

08/29/2023, 10:37:30# labels of Validation: tensor([ 95,  44,  55,  99,  97,  79, 143, 145, 113,  28, 153,  96, 137,  92,
         64, 142], device='cuda:2') torch.Size([16])
08/29/2023, 10:37:30# predicted of Validation: tensor([ 62, 131, 131,  99, 131, 128, 131,  80, 132, 132, 131, 128, 131, 131,
        132, 131], device='cuda:2') torch.Size([16])
08/29/2023, 10:37:30# labels of 0: tensor([ 95,  44,  55,  99,  97,  79, 143, 145, 113,  28, 153,  96, 137,  92,
         64, 142], device='cuda:2') torch.Size([16])
08/29/2023, 10:37:30# predicted of 0: tensor([ 62, 131, 131,  99, 131, 128, 131,  80, 132, 132, 131, 128, 131, 131,
        132, 131], device='cuda:2') torch.Size([16])
08/29/2023, 10:38:21# labels of Validation: tensor([ 73, 142, 151,  87,   2, 103,  69, 152, 158, 108,  18, 133, 135, 109,
         68,  72], device='cuda:2') torch.Size([16])
08/29/2023, 10:38:21# predicted of Validation: tensor([127, 131, 131, 131, 131, 132, 127, 131, 131, 132, 131, 132, 131, 131,
        132,

Training:   0%|          | 0/8250 [00:00<?, ?it/s]

08/29/2023, 10:43:36# labels of 5000: tensor([ 44, 104, 141, 117, 137,  57, 161,  47,  84,  34,  55, 100,  45,  42,
        116,   6], device='cuda:2') torch.Size([16])
08/29/2023, 10:43:36# predicted of 5000: tensor([ 37, 132, 161, 122,  37,  66, 161, 132,  32,  37, 132, 161, 105, 161,
         51, 132], device='cuda:2') torch.Size([16])
08/29/2023, 10:46:59# total batches: 8250
08/29/2023, 10:46:59# Epoch 1 | Train Loss: 3.9339 | Train Accuracy: 0.1569


Validation:   0%|          | 0/1032 [00:00<?, ?it/s]

08/29/2023, 10:46:59# labels of Validation: tensor([107,  72, 164, 128,   2, 133,  96, 139,  63, 132,  71, 123, 127,  19,
         13, 145], device='cuda:2') torch.Size([16])
08/29/2023, 10:46:59# predicted of Validation: tensor([108, 122, 107, 128, 107, 108,  96, 107, 107, 132, 107, 123, 127, 107,
         13, 146], device='cuda:2') torch.Size([16])
08/29/2023, 10:46:59# labels of 0: tensor([107,  72, 164, 128,   2, 133,  96, 139,  63, 132,  71, 123, 127,  19,
         13, 145], device='cuda:2') torch.Size([16])
08/29/2023, 10:46:59# predicted of 0: tensor([108, 122, 107, 128, 107, 108,  96, 107, 107, 132, 107, 123, 127, 107,
         13, 146], device='cuda:2') torch.Size([16])
08/29/2023, 10:47:54# labels of Validation: tensor([ 94,  97,  80, 135,  30, 156, 126,   0, 163, 145,  73, 153, 141,  40,
          7,  98], device='cuda:2') torch.Size([16])
08/29/2023, 10:47:54# predicted of Validation: tensor([ 40, 107, 146, 107, 107, 156,  95,  32, 107,  32, 146, 107, 108, 146,
          7,

Training:   0%|          | 0/8250 [00:00<?, ?it/s]

08/29/2023, 10:53:13# labels of 5000: tensor([ 20, 136,   3,  86,  17, 156,  42, 123, 141, 155,  31, 160,  90,  97,
         80,  13], device='cuda:2') torch.Size([16])
08/29/2023, 10:53:13# predicted of 5000: tensor([ 63, 147,   3, 143,  17, 156,  37, 123,  51, 132,  37, 132,  37, 144,
         99,  56], device='cuda:2') torch.Size([16])
08/29/2023, 10:56:54# total batches: 8250
08/29/2023, 10:56:54# Epoch 2 | Train Loss: 3.6141 | Train Accuracy: 0.2118


Validation:   0%|          | 0/1032 [00:00<?, ?it/s]

08/29/2023, 10:56:54# labels of Validation: tensor([ 54, 146,  53,  57,   3,  90,  34,  82,  64,  45,  27, 114,  55, 145,
          9, 157], device='cuda:2') torch.Size([16])
08/29/2023, 10:56:54# predicted of Validation: tensor([103, 146, 103, 103,   3, 132, 103,  82, 132,  67,  62, 132, 103, 145,
        103, 103], device='cuda:2') torch.Size([16])
08/29/2023, 10:56:54# labels of 0: tensor([ 54, 146,  53,  57,   3,  90,  34,  82,  64,  45,  27, 114,  55, 145,
          9, 157], device='cuda:2') torch.Size([16])
08/29/2023, 10:56:54# predicted of 0: tensor([103, 146, 103, 103,   3, 132, 103,  82, 132,  67,  62, 132, 103, 145,
        103, 103], device='cuda:2') torch.Size([16])
08/29/2023, 10:57:56# labels of Validation: tensor([103,   7,  37,  19,  46,  85, 106,   8,  56, 136, 156,  61, 107, 109,
         52, 162], device='cuda:2') torch.Size([16])
08/29/2023, 10:57:56# predicted of Validation: tensor([132,   7, 132, 103, 103,  85, 106, 132,  56, 136, 156,  61, 132, 103,
         52,

Training:   0%|          | 0/8250 [00:00<?, ?it/s]

08/29/2023, 11:03:16# labels of 5000: tensor([154, 141,  15, 112, 141,   8,  11,  32,  89, 107,  70,   6, 160, 153,
        149,  48], device='cuda:2') torch.Size([16])
08/29/2023, 11:03:16# predicted of 5000: tensor([154, 132,  15, 158, 132, 132,  71, 159,  56, 158,  70, 132, 132, 108,
        149, 132], device='cuda:2') torch.Size([16])
08/29/2023, 11:07:18# total batches: 8250
08/29/2023, 11:07:18# Epoch 3 | Train Loss: 3.4417 | Train Accuracy: 0.2483


Validation:   0%|          | 0/1032 [00:00<?, ?it/s]

08/29/2023, 11:07:18# labels of Validation: tensor([ 35, 117, 131, 113,  79,  58,  93, 136, 128,  63,  21, 143, 151,  26,
        156, 160], device='cuda:2') torch.Size([16])
08/29/2023, 11:07:18# predicted of Validation: tensor([ 35, 117, 161, 161,  94, 129, 161, 136, 128, 129, 161, 129, 129, 129,
        156, 161], device='cuda:2') torch.Size([16])
08/29/2023, 11:07:18# labels of 0: tensor([ 35, 117, 131, 113,  79,  58,  93, 136, 128,  63,  21, 143, 151,  26,
        156, 160], device='cuda:2') torch.Size([16])
08/29/2023, 11:07:18# predicted of 0: tensor([ 35, 117, 161, 161,  94, 129, 161, 136, 128, 129, 161, 129, 129, 129,
        156, 161], device='cuda:2') torch.Size([16])
08/29/2023, 11:08:33# labels of Validation: tensor([162, 128,  78, 156,  38,  10,  45, 135,  43,  63, 152, 147, 126,  60,
        162,  95], device='cuda:2') torch.Size([16])
08/29/2023, 11:08:33# predicted of Validation: tensor([129, 128,  67, 156, 129, 129,   5, 129, 129, 129, 129, 147, 117, 129,
        129,

Training:   0%|          | 0/8250 [00:00<?, ?it/s]

08/29/2023, 11:15:54# labels of 5000: tensor([ 55,  93,  18, 127, 139, 119, 156, 123, 130,  72, 111, 115, 166,  57,
        121, 126], device='cuda:2') torch.Size([16])
08/29/2023, 11:15:54# predicted of 5000: tensor([ 31,  31, 132, 127, 166, 166, 156, 123, 130,  78, 114, 145, 132, 114,
        157,  56], device='cuda:2') torch.Size([16])
08/29/2023, 11:20:34# total batches: 8250
08/29/2023, 11:20:34# Epoch 4 | Train Loss: 3.3231 | Train Accuracy: 0.2720


Validation:   0%|          | 0/1032 [00:00<?, ?it/s]

08/29/2023, 11:20:34# labels of Validation: tensor([ 80,  89,  55, 147,  40,  57,  17,  18,  71, 131,  49, 105, 133,  15,
        149,  85], device='cuda:2') torch.Size([16])
08/29/2023, 11:20:34# predicted of Validation: tensor([ 80, 115, 158, 147, 115, 158,  17, 158, 158,  28, 158, 105,   6,  94,
        149,  85], device='cuda:2') torch.Size([16])
08/29/2023, 11:20:34# labels of 0: tensor([ 80,  89,  55, 147,  40,  57,  17,  18,  71, 131,  49, 105, 133,  15,
        149,  85], device='cuda:2') torch.Size([16])
08/29/2023, 11:20:34# predicted of 0: tensor([ 80, 115, 158, 147, 115, 158,  17, 158, 158,  28, 158, 105,   6,  94,
        149,  85], device='cuda:2') torch.Size([16])
08/29/2023, 11:21:50# labels of Validation: tensor([ 92,  25,  34,  10, 115,  66,  27,  88,  41,  29,  36, 114, 115,  97,
         64,  58], device='cuda:2') torch.Size([16])
08/29/2023, 11:21:50# predicted of Validation: tensor([158,  25, 158, 158, 115,   6,  27,   6, 158,  29, 158,   6, 115, 158,
          6,

Training:   0%|          | 0/8250 [00:00<?, ?it/s]

08/29/2023, 11:29:06# labels of 5000: tensor([159,  74, 137,  37,  98,  17,  37,  14,  86, 136,  19,   5,  56, 112,
         31,  67], device='cuda:2') torch.Size([16])
08/29/2023, 11:29:06# predicted of 5000: tensor([159,  11,  64,  11,  74,  17,   6,  55, 101, 147, 101,   5,  56,  98,
         11, 130], device='cuda:2') torch.Size([16])
08/29/2023, 11:33:43# total batches: 8250
08/29/2023, 11:33:43# Epoch 5 | Train Loss: 3.2360 | Train Accuracy: 0.2870


Validation:   0%|          | 0/1032 [00:00<?, ?it/s]

08/29/2023, 11:33:43# labels of Validation: tensor([ 90, 122,  47, 132,  42,  40,  27, 155,   5,  67,  60,  85, 152,   9,
        154, 132], device='cuda:2') torch.Size([16])
08/29/2023, 11:33:43# predicted of Validation: tensor([ 51, 122,  26, 161,  26,  62,  40, 161,  50,  78,  26,  85,  26,  26,
        154, 161], device='cuda:2') torch.Size([16])
08/29/2023, 11:33:43# labels of 0: tensor([ 90, 122,  47, 132,  42,  40,  27, 155,   5,  67,  60,  85, 152,   9,
        154, 132], device='cuda:2') torch.Size([16])
08/29/2023, 11:33:43# predicted of 0: tensor([ 51, 122,  26, 161,  26,  62,  40, 161,  50,  78,  26,  85,  26,  26,
        154, 161], device='cuda:2') torch.Size([16])
08/29/2023, 11:35:04# labels of Validation: tensor([127,  73,  84,  74,  64,  40, 124, 127,  66, 121,  47, 147,  91, 149,
        118, 120], device='cuda:2') torch.Size([16])
08/29/2023, 11:35:04# predicted of Validation: tensor([127,  40,  84,  26,  51,  40,  26, 127, 161,  26,  26, 147, 161, 149,
        118,

Training:   0%|          | 0/8250 [00:00<?, ?it/s]

08/29/2023, 11:42:08# labels of 5000: tensor([149,  97, 131,  62, 160,  27, 101,  87,  28,  97,  61, 109,  94,  19,
         14, 124], device='cuda:2') torch.Size([16])
08/29/2023, 11:42:08# predicted of 5000: tensor([149,  10, 132,  62, 132,  72,  68,  68, 132,  10,   5,  68,  39,  10,
         68,  10], device='cuda:2') torch.Size([16])
08/29/2023, 11:46:43# total batches: 8250
08/29/2023, 11:46:43# Epoch 6 | Train Loss: 3.1709 | Train Accuracy: 0.2992


Validation:   0%|          | 0/1032 [00:00<?, ?it/s]

08/29/2023, 11:46:44# labels of Validation: tensor([ 90,   5,  67,  93, 132, 106,  64,  37,  86,  49,  91, 150,  62,  32,
         90, 137], device='cuda:2') torch.Size([16])
08/29/2023, 11:46:44# predicted of Validation: tensor([ 66,   5,  67,  66, 161, 106,  66,  66,  20,  20,  66,  20,  62,  32,
         66,  20], device='cuda:2') torch.Size([16])
08/29/2023, 11:46:44# labels of 0: tensor([ 90,   5,  67,  93, 132, 106,  64,  37,  86,  49,  91, 150,  62,  32,
         90, 137], device='cuda:2') torch.Size([16])
08/29/2023, 11:46:44# predicted of 0: tensor([ 66,   5,  67,  66, 161, 106,  66,  66,  20,  20,  66,  20,  62,  32,
         66,  20], device='cuda:2') torch.Size([16])
08/29/2023, 11:48:04# labels of Validation: tensor([111, 145,  37, 146,  60,  77, 166, 116,  39, 126,  39,  37,  93,  69,
        107,  55], device='cuda:2') torch.Size([16])
08/29/2023, 11:48:04# predicted of Validation: tensor([ 20, 145,  66, 146,  20,  66,  66,  20, 115, 126,   0,  66,  66,  69,
         66,

Training:   0%|          | 0/8250 [00:00<?, ?it/s]

08/29/2023, 11:56:32# labels of 5000: tensor([  9,  74, 129,  40,  14, 102, 111, 129, 158, 137, 134, 160, 129,  98,
        102,  96], device='cuda:2') torch.Size([16])
08/29/2023, 11:56:32# predicted of 5000: tensor([  4,   4,   4,  40,   4,   4,  21,  21,   4,   4, 134,  21, 161,   4,
          4, 146], device='cuda:2') torch.Size([16])
08/29/2023, 12:01:50# total batches: 8250
08/29/2023, 12:01:50# Epoch 7 | Train Loss: 3.1191 | Train Accuracy: 0.3088


Validation:   0%|          | 0/1032 [00:00<?, ?it/s]

08/29/2023, 12:01:51# labels of Validation: tensor([ 40, 111,  23,  77,  37,   6,  15, 163, 126, 124, 102, 149,  53,  68,
        135, 128], device='cuda:2') torch.Size([16])
08/29/2023, 12:01:51# predicted of Validation: tensor([ 40, 163,  23, 132,  66, 132,  39, 163, 126, 163, 163, 149, 163,  66,
        163, 128], device='cuda:2') torch.Size([16])
08/29/2023, 12:01:51# labels of 0: tensor([ 40, 111,  23,  77,  37,   6,  15, 163, 126, 124, 102, 149,  53,  68,
        135, 128], device='cuda:2') torch.Size([16])
08/29/2023, 12:01:51# predicted of 0: tensor([ 40, 163,  23, 132,  66, 132,  39, 163, 126, 163, 163, 149, 163,  66,
        163, 128], device='cuda:2') torch.Size([16])
08/29/2023, 12:03:16# labels of Validation: tensor([ 82, 154,  82,  75,  81, 139, 142, 124, 103,  48,  49,  31,  94,  69,
         67, 162], device='cuda:2') torch.Size([16])
08/29/2023, 12:03:16# predicted of Validation: tensor([ 82, 154,  82, 163, 163, 163, 163, 163,  66, 163, 163, 163,  79,  69,
         78,

Training:   0%|          | 0/8250 [00:00<?, ?it/s]

08/29/2023, 12:10:39# labels of 5000: tensor([145, 155,  58,  28,  31,  81, 114,  19, 147, 119, 110, 106,  29,   1,
         69,  80], device='cuda:2') torch.Size([16])
08/29/2023, 12:10:39# predicted of 5000: tensor([145, 161, 104, 132, 104, 104, 161,  91, 147,  33, 132, 106,  29,  24,
         69,  80], device='cuda:2') torch.Size([16])
08/29/2023, 12:15:23# total batches: 8250
08/29/2023, 12:15:23# Epoch 8 | Train Loss: 3.0766 | Train Accuracy: 0.3167


Validation:   0%|          | 0/1032 [00:00<?, ?it/s]

08/29/2023, 12:15:23# labels of Validation: tensor([  3,  88, 156,  62,  94,  94,  94, 142, 118,  16,  37, 119,  79,  94,
         96, 100], device='cuda:2') torch.Size([16])
08/29/2023, 12:15:23# predicted of Validation: tensor([  3, 108, 156,  62,  95,  78,   5, 152, 118,  16, 108, 152,  72,  89,
         96, 108], device='cuda:2') torch.Size([16])
08/29/2023, 12:15:23# labels of 0: tensor([  3,  88, 156,  62,  94,  94,  94, 142, 118,  16,  37, 119,  79,  94,
         96, 100], device='cuda:2') torch.Size([16])
08/29/2023, 12:15:23# predicted of 0: tensor([  3, 108, 156,  62,  95,  78,   5, 152, 118,  16, 108, 152,  72,  89,
         96, 108], device='cuda:2') torch.Size([16])
08/29/2023, 12:16:43# labels of Validation: tensor([158,  61, 119,  11, 140,  81, 100, 161, 164, 143, 103, 109,  60,  90,
         95,  16], device='cuda:2') torch.Size([16])
08/29/2023, 12:16:43# predicted of Validation: tensor([152,  61, 152, 152, 140, 152, 108, 132, 152, 152, 108, 152, 152, 108,
         95,

Training:   0%|          | 0/8250 [00:00<?, ?it/s]

08/29/2023, 12:23:53# labels of 5000: tensor([126,  31, 118,  69,  17,  74,  83,  26,  50,  49, 143, 104, 102,   6,
         74,  18], device='cuda:2') torch.Size([16])
08/29/2023, 12:23:53# predicted of 5000: tensor([126, 141, 118,  69,  17,  43, 141, 113,  39,  11,  60,  60,   1, 132,
         60, 141], device='cuda:2') torch.Size([16])
08/29/2023, 12:28:35# total batches: 8250
08/29/2023, 12:28:35# Epoch 9 | Train Loss: 3.0433 | Train Accuracy: 0.3234


Validation:   0%|          | 0/1032 [00:00<?, ?it/s]

08/29/2023, 12:28:35# labels of Validation: tensor([  3,   1,  30,  79,  66, 151,   4,  71,  50, 127,  11, 129, 102,  38,
          7,  89], device='cuda:2') torch.Size([16])
08/29/2023, 12:28:35# predicted of Validation: tensor([  3,  36,  36,  79, 132,  36,  36,  36, 126, 127,  36, 108,  36,  36,
          7,  89], device='cuda:2') torch.Size([16])
08/29/2023, 12:28:35# labels of 0: tensor([  3,   1,  30,  79,  66, 151,   4,  71,  50, 127,  11, 129, 102,  38,
          7,  89], device='cuda:2') torch.Size([16])
08/29/2023, 12:28:35# predicted of 0: tensor([  3,  36,  36,  79, 132,  36,  36,  36, 126, 127,  36, 108,  36,  36,
          7,  89], device='cuda:2') torch.Size([16])
08/29/2023, 12:29:54# labels of Validation: tensor([126,  71,  74, 133, 115,  27,  93, 144,  35,  28, 100, 151,  94, 106,
         99, 166], device='cuda:2') torch.Size([16])
08/29/2023, 12:29:54# predicted of Validation: tensor([126,  36,  36, 108, 115,  27, 108,  36,  35, 132, 108,  36,   5, 106,
         99,

Training:   0%|          | 0/8250 [00:00<?, ?it/s]

08/29/2023, 12:36:57# labels of 5000: tensor([161,  98,  23, 148,  28,  73,  18, 131,  63, 140,  72, 116,  49, 101,
        112,   1], device='cuda:2') torch.Size([16])
08/29/2023, 12:36:57# predicted of 5000: tensor([132, 148,  23, 132, 132,  73,  74, 132,  74, 140,  79,  74,  74, 132,
         63,  63], device='cuda:2') torch.Size([16])
08/29/2023, 12:41:32# total batches: 8250
08/29/2023, 12:41:32# Epoch 10 | Train Loss: 3.0164 | Train Accuracy: 0.3282


Validation:   0%|          | 0/1032 [00:00<?, ?it/s]

08/29/2023, 12:41:32# labels of Validation: tensor([145,   7,  19,  54, 152, 114, 117,  31, 106, 135, 151,  69, 103,  28,
        138, 152], device='cuda:2') torch.Size([16])
08/29/2023, 12:41:32# predicted of Validation: tensor([145,   7,  92,  92,  92,   6, 117,  92, 106,  92,  92,  69,   6, 132,
        138,  92], device='cuda:2') torch.Size([16])
08/29/2023, 12:41:32# labels of 0: tensor([145,   7,  19,  54, 152, 114, 117,  31, 106, 135, 151,  69, 103,  28,
        138, 152], device='cuda:2') torch.Size([16])
08/29/2023, 12:41:32# predicted of 0: tensor([145,   7,  92,  92,  92,   6, 117,  92, 106,  92,  92,  69,   6, 132,
        138,  92], device='cuda:2') torch.Size([16])
08/29/2023, 12:42:49# labels of Validation: tensor([158, 156, 114,   0, 166,  92, 153,  89, 162, 115,  38,  35,  95, 150,
         12, 139], device='cuda:2') torch.Size([16])
08/29/2023, 12:42:49# predicted of Validation: tensor([ 92, 156,   6,   0, 132,  92,  92,  89,  92, 115,  92,  35,  73,  92,
         92,

Training:   0%|          | 0/8250 [00:00<?, ?it/s]

08/29/2023, 12:49:48# labels of 5000: tensor([111,  17, 133, 160, 140,  29,  84, 103,  60, 156,  29, 153, 121,  84,
         20, 142], device='cuda:2') torch.Size([16])
08/29/2023, 12:49:48# predicted of 5000: tensor([ 97,  17, 131,  97, 140,  29,  84, 141,  97, 156,  29,  10,  97,   3,
        155,  18], device='cuda:2') torch.Size([16])
08/29/2023, 12:54:16# total batches: 8250
08/29/2023, 12:54:16# Epoch 11 | Train Loss: 2.9932 | Train Accuracy: 0.3322


Validation:   0%|          | 0/1032 [00:00<?, ?it/s]

08/29/2023, 12:54:16# labels of Validation: tensor([ 54, 147,  96,  73, 139, 112, 109, 100,   8, 137,  29, 137,  49,  94,
         74, 133], device='cuda:2') torch.Size([16])
08/29/2023, 12:54:16# predicted of Validation: tensor([137, 147,  96,  73, 137, 137, 137,   6,   6, 137,  29, 137, 137,  94,
        137,   6], device='cuda:2') torch.Size([16])
08/29/2023, 12:54:16# labels of 0: tensor([ 54, 147,  96,  73, 139, 112, 109, 100,   8, 137,  29, 137,  49,  94,
         74, 133], device='cuda:2') torch.Size([16])
08/29/2023, 12:54:16# predicted of 0: tensor([137, 147,  96,  73, 137, 137, 137,   6,   6, 137,  29, 137, 137,  94,
        137,   6], device='cuda:2') torch.Size([16])
08/29/2023, 12:55:31# labels of Validation: tensor([165, 101, 149,  68,  52,  32, 138, 118,  19,  80,   2,  93, 102,  84,
         19,  59], device='cuda:2') torch.Size([16])
08/29/2023, 12:55:31# predicted of Validation: tensor([165,   6, 149,   6,  52,  32, 138, 118, 137,  80, 137,   6, 137,  84,
        137,

Training:   0%|          | 0/8250 [00:00<?, ?it/s]

08/29/2023, 13:02:21# labels of 5000: tensor([110, 143, 130, 138,  26,  17,  17,  97, 148,  87,  95,  54,  77, 164,
         90,  81], device='cuda:2') torch.Size([16])
08/29/2023, 13:02:21# predicted of 5000: tensor([ 21, 143, 130, 138,   2,  17,  17,  74, 155,  74, 117,  58, 155, 121,
        155, 121], device='cuda:2') torch.Size([16])
08/29/2023, 13:06:53# total batches: 8250
08/29/2023, 13:06:53# Epoch 12 | Train Loss: 2.9731 | Train Accuracy: 0.3348


Validation:   0%|          | 0/1032 [00:00<?, ?it/s]

08/29/2023, 13:06:53# labels of Validation: tensor([ 80,  11,  75,  31,  99,  83, 116,  16,  76,  38,  72, 108,  61,  92,
        132, 107], device='cuda:2') torch.Size([16])
08/29/2023, 13:06:53# predicted of Validation: tensor([ 80, 109, 109, 109,  99, 109, 109,  16, 109, 109,  15, 161,  61, 109,
        161, 131], device='cuda:2') torch.Size([16])
08/29/2023, 13:06:53# labels of 0: tensor([ 80,  11,  75,  31,  99,  83, 116,  16,  76,  38,  72, 108,  61,  92,
        132, 107], device='cuda:2') torch.Size([16])
08/29/2023, 13:06:53# predicted of 0: tensor([ 80, 109, 109, 109,  99, 109, 109,  16, 109, 109,  15, 161,  61, 109,
        161, 131], device='cuda:2') torch.Size([16])
08/29/2023, 13:08:08# labels of Validation: tensor([107,   1,  47,  83, 127, 129, 101,  82, 118,  86, 110,  97,  12, 121,
        142, 139], device='cuda:2') torch.Size([16])
08/29/2023, 13:08:08# predicted of Validation: tensor([131, 109, 109, 109, 127, 131, 131,  82, 118, 109, 131, 109, 109, 109,
        109,

Training:   0%|          | 0/8250 [00:00<?, ?it/s]

08/29/2023, 13:15:09# labels of 5000: tensor([129,  94, 162,   4, 142,  34,  75,  25,  15, 127,   5, 125,  15,  38,
        155,  20], device='cuda:2') torch.Size([16])
08/29/2023, 13:15:09# predicted of 5000: tensor([108, 145, 109,  55,  34, 108, 107,  25,  15, 127,   5, 135,  15, 164,
        161, 164], device='cuda:2') torch.Size([16])
08/29/2023, 13:19:43# total batches: 8250
08/29/2023, 13:19:43# Epoch 13 | Train Loss: 2.9557 | Train Accuracy: 0.3389


Validation:   0%|          | 0/1032 [00:00<?, ?it/s]

08/29/2023, 13:19:43# labels of Validation: tensor([159,   8,  73,  36,  18,  28,  26,  82,  24, 136, 126, 146, 104, 142,
        112,  82], device='cuda:2') torch.Size([16])
08/29/2023, 13:19:43# predicted of Validation: tensor([159, 131,  73,  87,  87, 132,  87,  82,  87, 136, 117, 146,  87,  87,
         87,  82], device='cuda:2') torch.Size([16])
08/29/2023, 13:19:43# labels of 0: tensor([159,   8,  73,  36,  18,  28,  26,  82,  24, 136, 126, 146, 104, 142,
        112,  82], device='cuda:2') torch.Size([16])
08/29/2023, 13:19:43# predicted of 0: tensor([159, 131,  73,  87,  87, 132,  87,  82,  87, 136, 117, 146,  87,  87,
         87,  82], device='cuda:2') torch.Size([16])
08/29/2023, 13:20:57# labels of Validation: tensor([ 49,  53, 164, 114,  66,  98,  97,  52,  73, 149, 163,  61,  46, 107,
         92,  29], device='cuda:2') torch.Size([16])
08/29/2023, 13:20:57# predicted of Validation: tensor([ 87,  87,  87, 131, 131,  87,  87,  52,  73, 149,  87,  61,  87, 131,
         87,

Training:   0%|          | 0/8250 [00:00<?, ?it/s]

08/29/2023, 13:27:58# labels of 5000: tensor([ 16,  63,  57, 154,  85,  71,  54,  45,  60,  99, 153,   7,  68,  13,
        146, 159], device='cuda:2') torch.Size([16])
08/29/2023, 13:27:58# predicted of 5000: tensor([ 16, 116, 162, 154,  85,  19,  19,  45,  63,  99, 162,   7, 129,  13,
        146, 159], device='cuda:2') torch.Size([16])
08/29/2023, 13:31:31# total batches: 8250
08/29/2023, 13:31:31# Epoch 14 | Train Loss: 2.9411 | Train Accuracy: 0.3410


Validation:   0%|          | 0/1032 [00:00<?, ?it/s]

08/29/2023, 13:31:31# labels of Validation: tensor([158, 138, 150, 130,  76, 166, 134, 160, 143,  41,  41, 111, 120, 164,
         83,  95], device='cuda:2') torch.Size([16])
08/29/2023, 13:31:31# predicted of Validation: tensor([ 86, 138,  86, 130,  86,   6, 134,   6,  86,  86,  86,  86, 120,  86,
         86,  95], device='cuda:2') torch.Size([16])
08/29/2023, 13:31:31# labels of 0: tensor([158, 138, 150, 130,  76, 166, 134, 160, 143,  41,  41, 111, 120, 164,
         83,  95], device='cuda:2') torch.Size([16])
08/29/2023, 13:31:31# predicted of 0: tensor([ 86, 138,  86, 130,  86,   6, 134,   6,  86,  86,  86,  86, 120,  86,
         86,  95], device='cuda:2') torch.Size([16])
08/29/2023, 13:32:28# labels of Validation: tensor([ 37, 145,   9, 109, 143, 161, 102,  11, 147,  15, 114,  15, 127,  50,
        116, 103], device='cuda:2') torch.Size([16])
08/29/2023, 13:32:28# predicted of Validation: tensor([  6, 145,  86,  86,  86,   6,  86,  86, 147,  15,   6,  94, 127,  50,
         86,

Training:   0%|          | 0/8250 [00:00<?, ?it/s]

08/29/2023, 13:38:24# labels of 5000: tensor([ 18, 136, 150, 156, 114, 158,  67, 158,  17, 163,  63, 132, 127,  81,
        128,  84], device='cuda:2') torch.Size([16])
08/29/2023, 13:38:24# predicted of 5000: tensor([ 43, 136,  19, 156, 131,  57,  78,  57,  17,  97, 164, 161, 127,  57,
        128,  84], device='cuda:2') torch.Size([16])


### Testing Part

In [None]:
# load the pretrained model
pretrained_model_path = '../checkpoint_GAT/best_model_GAT_transH_150.pt'
model.load_state_dict(torch.load(pretrained_model_path))

model.to(device)
model.eval()

total = 0
correct = 0
count = 0

true_labels = []
predicted_labels = []

with torch.no_grad():
    for data in tqdm(dataloaders['test'], desc="Testing", position=0, leave=True):

        loss, accuracy, predicted = model_fn(data, model, criterion, device, count, which_type='test')
        labels = data[1].to(device)
        
        true_labels.extend(labels.cpu().numpy())
        predicted_labels.extend(predicted.cpu().numpy())
        
        if count % 5000 == 0:
            add_log_msg(f"labels: {labels} {labels.shape}")
            add_log_msg(f"predicted: {predicted} {predicted.shape}")
            
        count += 1
        
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

add_log_msg(f'Test Accuracy: {100 * correct / total} %\n\n\n')


# ======================================== handlig the output excel files ========================================
mapping_file = './new_mapping.txt'
label_mapping = {}
with open(mapping_file, 'r') as f:
    for line in f:
        parts = line.strip().split(': ')
        label_mapping[int(parts[1])] = parts[0]
        
# 将映射后的标签应用到true和predicted标签列表
mapped_true_labels = [label_mapping[label] for label in true_labels]
mapped_predicted_labels = [label_mapping[label] for label in predicted_labels]

# 生成Scikit-learn报告信息的DataFrame
report_data = classification_report(mapped_true_labels, mapped_predicted_labels, output_dict=True)
report_df = pd.DataFrame(report_data).transpose()

report_folder = 'classification_report'
os.makedirs(report_folder, exist_ok=True)

count = 0
while True:
    report_filename = f'classification_report-{count}.xlsx'
    labels_filename = f'mapped_true_predicted_labels-{count}.xlsx'
    
    report_path = os.path.join(report_folder, report_filename)
    labels_path = os.path.join(report_folder, labels_filename)
    
    if not os.path.exists(report_path) and not os.path.exists(labels_path):
        break
    count += 1

    
report_df.to_excel(report_path, index_label='Label')

mapped_labels_df = pd.DataFrame({'true_label': mapped_true_labels, 'predicted_label': mapped_predicted_labels})
mapped_labels_df.to_excel(labels_path, index=False)

add_log_msg(f"report path: {report_path}")
add_log_msg(f"label path: {labels_path}")

mapped_report = classification_report(mapped_true_labels, mapped_predicted_labels)
add_log_msg(f"mapped_report:\n{mapped_report}")