# Test of GraphSAGE
- use DGL
- predict `graphs`
- valid, test data are in the training dataset

In [1]:
import os
import dgl
import json
import torch
import torch as th
import dgl.nn as dglnn
# from tqdm import tqdm
from tqdm.notebook import tqdm  # 使用 notebook 版本的 tqdm
import torch.nn as nn
from dgl.nn import GraphConv, GATConv, SAGEConv
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from transformers import get_linear_schedule_with_warmup

- check the GPU and assign the GPU by the best memory usage

In [2]:
import subprocess
import torch

def get_free_gpu():
    try:
        # Run nvidia-smi command to get GPU details
        _output_to_list = lambda x: x.decode('ascii').split('\n')[:-1]
        command = "nvidia-smi --query-gpu=memory.free --format=csv,nounits,noheader"
        memory_free_info = _output_to_list(subprocess.check_output(command.split())) 
        memory_free_values = [int(x) for i, x in enumerate(memory_free_info)]
        
        # Get the GPU with the maximum free memory
        best_gpu_id = memory_free_values.index(max(memory_free_values))
        return best_gpu_id
    except:
        # If any exception occurs, default to GPU 0 (this handles cases where nvidia-smi isn't installed)
        return 0

if torch.cuda.is_available():
    # Get the best GPU ID based on free memory and set it
    best_gpu_id = get_free_gpu()
    device = torch.device(f"cuda:{best_gpu_id}")
else:
    device = torch.device("cpu")
    print("there's no available GPU")

# device = torch.device(f"cuda:{1}")
print(device)


cuda:1


## Fix the seed

In [3]:
import numpy as np
import torch
import random

#fix seed
def same_seeds(seed = 8787):
    torch.manual_seed(seed)
    # random.seed(seed) 
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)  
    np.random.seed(seed)  
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True

## Data Loader

In [4]:
class GraphDataset(Dataset):
    def __init__(self, data_list, device):
        self.data_list = data_list
        self.device = device

    def __len__(self):
        return len(self.data_list)
    
    def __getitem__(self, idx):
        data = self.data_list[idx]

        g = dgl.graph((th.tensor(data["edge_index"][0]), th.tensor(data["edge_index"][1])), num_nodes=data["num_nodes"]).to(self.device)

        g.ndata['feat'] = th.tensor(data["node_feat"]).to(self.device)
        g.edata['feat'] = th.tensor(data["edge_attr"]).to(self.device)  # Add edge features to graph

        return g, th.tensor(data["label"]).to(self.device)


def collate(samples):
    # The input `samples` is a list of pairs
    #  (graph, label).
    graphs, labels = map(list, zip(*samples))
    batched_graph = dgl.batch(graphs)
    return batched_graph, torch.tensor(labels)


In [5]:
datasets = ['train', 'valid', 'test']
dataset_data = {}

for dataset_name in tqdm(datasets):
    file_path = f"../../data_processing/dgl/data_new/exp1-2/training_data/exp_1/transH_50/{dataset_name}.jsonl"
    
    print(file_path)
    with open(file_path) as f:
        data_list = [json.loads(line) for line in tqdm(f, position=0, leave=True)]
    
    dataset_data[dataset_name] = GraphDataset(data_list, device)

print("Datasets loaded!")

  0%|          | 0/3 [00:00<?, ?it/s]

../../data_processing/dgl/data_new/exp1-2/training_data/exp_1/transH_50/train.jsonl


0it [00:00, ?it/s]

../../data_processing/dgl/data_new/exp1-2/training_data/exp_1/transH_50/valid.jsonl


0it [00:00, ?it/s]

../../data_processing/dgl/data_new/exp1-2/training_data/exp_1/transH_50/test.jsonl


0it [00:00, ?it/s]

Datasets loaded!


- choose batch size

In [6]:
def create_dataloaders(batch_size, shuffle=True):
    dataloaders = {}
    for dataset_name, dataset in dataset_data.items():
        # do not shuffle the testing dataset
        if dataset_name == "test":
            dataloaders[dataset_name] = DataLoader(dataset, batch_size=batch_size, shuffle=False, collate_fn=collate)    
        else:
            dataloaders[dataset_name] = DataLoader(dataset, batch_size=batch_size, shuffle=shuffle, collate_fn=collate)
    return dataloaders

# dataloaders = create_dataloaders(4)
dataloaders = create_dataloaders(16)

- Turn the print message to a log file

In [7]:
import datetime

now = datetime.datetime.now()

formatted_time = now.strftime("%m%d_%H:%M")

log_file_path = f"../log_message/{formatted_time}_GraphSAGE.log"

def add_log_msg(msg, log_file_path=log_file_path):
    with open(log_file_path, 'a') as f:
        f.write(f'{datetime.datetime.now().strftime("%m/%d/%Y, %H:%M:%S")}# {msg}\n')
    print(f'{datetime.datetime.now().strftime("%m/%d/%Y, %H:%M:%S")}# {msg}')

print(log_file_path)

../log_message/0911_18:54_GraphSAGE.log


### Model

In [8]:
class GraphSAGE(nn.Module):
    def __init__(self, in_dim, hidden_dim, out_dim):
        super(GraphSAGE, self).__init__()
        self.layer1 = dglnn.SAGEConv(in_dim, hidden_dim, 'mean')
        self.layer2 = dglnn.SAGEConv(hidden_dim, out_dim, 'mean')  # Output layer

    def forward(self, g, inputs):
        h = self.layer1(g, inputs)
        h = torch.relu(h)
        h = self.layer2(g, h)  # You can apply another non-linearity here if needed
        
        g.ndata['h'] = h
        hg = dgl.mean_nodes(g, 'h')
        return hg


- Model Forward  

In [9]:
def model_fn(data, model, criterion, device, count=1, which_type='train'):
    """Forward a batch through the model."""
    batched_g, labels = data
    batched_g = batched_g.to(device)
    
    labels = labels.to(device)
#     labels = labels.float()

    logits = model(batched_g, batched_g.ndata['feat'].float()) # for GAT
#     logits = logits.mean(dim=1)
#     print(labels)
#     print(labels.shape)
#     print(logits)
#     print(logits.shape)

    
    loss = criterion(logits, labels)

    # Get the class id with the highest probability
    preds = logits.argmax(1) # for GAT
#     print(preds)
#     preds = logits.argmax() # for graphSAGE
    
    '''
    问题出在GAT和GraphSAGE模型的输出形状上。
    在你的 GAT 模型中，最后一层的输出是一个一维的张量，因此 logits 是一个形状为 (batch_size,) 的张量。这是因为 GAT 模型最后一层的输出被处理成了一个节点的预测，而不是整个图的预测。
    在 GraphSAGE 模型中，最后一层的输出形状是 (num_nodes, out_dim)，也就是说，它返回了整个图的节点级别的预测。因此，logits 是一个二维的张量，形状为 (num_nodes, out_dim)。
    当你尝试在一个一维张量上调用 logits.argmax(1) 时，会产生维度错误，因为它期望一个二维张量来执行操作。
    所以，在 GraphSAGE 模型中，你应该将以下行：
    '''
    
    
    # Compute accuracy
    accuracy = torch.mean((preds == labels).float())
    
    if which_type == 'validation' and count % 1000 == 0:
        add_log_msg(f"labels of Validation: {labels} {labels.shape}")
        add_log_msg(f"predicted of Validation: {preds} {preds.shape}")
        
    elif which_type == 'test'  and count % 1000 == 0:
        add_log_msg(f"labels of Test: {labels} {labels.shape}")
        add_log_msg(f"predicted of Test: {preds} {preds.shape}")
        
    if count % 5000 == 0: 
        add_log_msg(f"labels of {count}: {labels} {labels.shape}")
        add_log_msg(f"predicted of {count}: {preds} {preds.shape}")
        
    return loss, accuracy, preds

In [None]:
import csv
import pandas as pd
from sklearn.metrics import classification_report
from torch.optim import AdamW, lr_scheduler

seed = 8787
same_seeds(seed)

model = GraphSAGE(in_dim=50, hidden_dim=16, out_dim=167)
# in_dim means the dimension of the node_feat(50 dim, since the 50-dim embedding)
# out_dim means the # of the categories -> 168 for out tasks
model.load_state_dict(torch.load('model3_initial(graphsage)/initial_weight.pth'))
best_model_path = "../checkpoint_graphSAGE/best_model_GraphSAGE_transH_50.pt"

model = model.to(device)

# optimizer = torch.optim.AdamW(model.parameters(), lr=1e-5)
optimizer = AdamW(model.parameters(), lr=5e-4)
# scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=18, num_training_steps=total_steps)

# T_max control the period of the lr changing -> set 1/10 first
scheduler = lr_scheduler.CosineAnnealingLR(optimizer, T_max=36, eta_min=0, last_epoch=- 1, verbose=False)


criterion = nn.CrossEntropyLoss()
# criterion = torch.nn.BCEWithLogitsLoss()

total_steps = 50

# save the best model
best_val_loss = float('inf')
patience = 10  # Number of epochs with no improvement after which training will be stopped.
waiting = 0  # The number of epochs with no improvement so far.


# Training Part
for epoch in tqdm(range(total_steps)):
    # Train
    model.train()
    total_loss = 0.0
    total_accuracy = 0.0
    num_batches = 0
    
    for data in tqdm(dataloaders['train'], desc="Training", position=0, leave=True):
        num_batches += 1
        loss, accuracy, _ = model_fn(data, model, criterion, device, num_batches, which_type='train')
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        total_accuracy += accuracy.item()

        
#     scheduler.step()
    add_log_msg(f"total batches: {num_batches}")

    avg_loss = total_loss / num_batches
    avg_accuracy = total_accuracy / num_batches

    add_log_msg(f'Epoch {epoch} | Train Loss: {avg_loss:.4f} | Train Accuracy: {avg_accuracy:.4f}')

    
    # Validation Part
    model.eval()
    total_accuracy = 0.0
    total_loss = 0.0
    num_batches = 0


    with torch.no_grad():
        for data in tqdm(dataloaders['valid'], desc="Validation", position=0, leave=True):
            loss, accuracy, _ = model_fn(data, model, criterion, device, num_batches, which_type='validation')
            total_accuracy += accuracy.item()
            total_loss += loss.item()
            num_batches += 1

    avg_accuracy = total_accuracy / num_batches
    current_loss = total_loss / num_batches
    
    add_log_msg(f'Validation Loss: {current_loss:.4f} | Validation Accuracy: {avg_accuracy:.4f}\n')
    
            
    if current_loss < best_val_loss:
        best_val_loss = current_loss
        waiting = 0
        
        if os.path.exists(best_model_path):
            os.remove(best_model_path)
            add_log_msg("Find a better model!!")

        torch.save(model.state_dict(), best_model_path)

#         print(best_model_path)

    else:
        waiting += 1
        if waiting >= patience:
            add_log_msg("============================== Early stopping ==================================")
            break

  0%|          | 0/50 [00:00<?, ?it/s]

Training:   0%|          | 0/8250 [00:00<?, ?it/s]

09/11/2023, 18:56:40# labels of 5000: tensor([ 49,  52,  31,  91,  46,  87, 163,  54,  93, 150, 143, 162, 146,  14,
        102,  24], device='cuda:1') torch.Size([16])
09/11/2023, 18:56:40# predicted of 5000: tensor([ 76,  52,  92, 131,  98,  92,  31, 144, 131, 131, 140,  76, 146,  31,
        102, 131], device='cuda:1') torch.Size([16])
09/11/2023, 18:58:27# total batches: 8250
09/11/2023, 18:58:27# Epoch 0 | Train Loss: 3.9895 | Train Accuracy: 0.3968


Validation:   0%|          | 0/1032 [00:00<?, ?it/s]

09/11/2023, 18:58:27# labels of Validation: tensor([ 23, 118,   1,  90, 147,  92,  86, 109,  48,  67,  80, 146,  50,  71,
        103, 144], device='cuda:1') torch.Size([16])
09/11/2023, 18:58:27# predicted of Validation: tensor([ 23, 118, 158, 108, 147, 158,  86, 158, 158,  67,  80, 146,  50,  71,
        108, 158], device='cuda:1') torch.Size([16])
09/11/2023, 18:58:27# labels of 0: tensor([ 23, 118,   1,  90, 147,  92,  86, 109,  48,  67,  80, 146,  50,  71,
        103, 144], device='cuda:1') torch.Size([16])
09/11/2023, 18:58:27# predicted of 0: tensor([ 23, 118, 158, 108, 147, 158,  86, 158, 158,  67,  80, 146,  50,  71,
        108, 158], device='cuda:1') torch.Size([16])
09/11/2023, 18:58:56# labels of Validation: tensor([117,  78,   3,  73,  80,  67,   6,   3,  43, 104, 146, 161, 143,  39,
        160,  49], device='cuda:1') torch.Size([16])
09/11/2023, 18:58:56# predicted of Validation: tensor([117,  67,   3,   3,  80,  67,   6,   3,  43, 158, 146, 161, 158,  39,
        160,

Training:   0%|          | 0/8250 [00:00<?, ?it/s]

09/11/2023, 19:01:49# labels of 5000: tensor([110,  89, 121,  82,  20, 128, 131,  13,  40,  58,  46, 135,  71, 117,
         19, 133], device='cuda:1') torch.Size([16])
09/11/2023, 19:01:49# predicted of 5000: tensor([  8,  89,  38,  82,  20, 128, 131,  13,  40,  58,  46, 135,  71, 117,
         19, 133], device='cuda:1') torch.Size([16])
09/11/2023, 19:03:44# total batches: 8250
09/11/2023, 19:03:44# Epoch 1 | Train Loss: 2.1523 | Train Accuracy: 0.5882


Validation:   0%|          | 0/1032 [00:00<?, ?it/s]

09/11/2023, 19:03:44# labels of Validation: tensor([151,  59,  32,  81, 139, 113,  78,  31,  16, 127, 117,  10, 125,   5,
         32, 125], device='cuda:1') torch.Size([16])
09/11/2023, 19:03:44# predicted of Validation: tensor([104, 108,  32, 104, 139,  88,  67, 104,  16, 127, 117,  10, 104,   5,
         32, 104], device='cuda:1') torch.Size([16])
09/11/2023, 19:03:44# labels of 0: tensor([151,  59,  32,  81, 139, 113,  78,  31,  16, 127, 117,  10, 125,   5,
         32, 125], device='cuda:1') torch.Size([16])
09/11/2023, 19:03:44# predicted of 0: tensor([104, 108,  32, 104, 139,  88,  67, 104,  16, 127, 117,  10, 104,   5,
         32, 104], device='cuda:1') torch.Size([16])
09/11/2023, 19:04:15# labels of Validation: tensor([ 81,  17, 131,  27,   1, 137,  13, 138, 108,  20,   3,  99,  45,  43,
          2,  27], device='cuda:1') torch.Size([16])
09/11/2023, 19:04:15# predicted of Validation: tensor([104,  17, 131,  27, 104, 137,  13, 138, 108,  20,   3,  99,  45,  43,
        104,

Training:   0%|          | 0/8250 [00:00<?, ?it/s]

09/11/2023, 19:07:16# labels of 5000: tensor([  8,  74, 137, 125,   8, 163,  55,  57,  51,  63,   9, 143, 151, 165,
        128,  19], device='cuda:1') torch.Size([16])
09/11/2023, 19:07:16# predicted of 5000: tensor([ 59,  14, 137,  81,  59,  81,  87,  14,  51,  63,  81,  81,  81, 165,
        128,  19], device='cuda:1') torch.Size([16])
09/11/2023, 19:09:11# total batches: 8250
09/11/2023, 19:09:11# Epoch 2 | Train Loss: 1.7043 | Train Accuracy: 0.5959


Validation:   0%|          | 0/1032 [00:00<?, ?it/s]

09/11/2023, 19:09:11# labels of Validation: tensor([ 62,  36,  81,  28, 125, 105,  56,  88, 163,  60, 131, 121,  21,  76,
         48,  42], device='cuda:1') torch.Size([16])
09/11/2023, 19:09:11# predicted of Validation: tensor([ 62,  60,  60,  28,  60, 105,  56,  72,  60,  60, 131,  60,  21,  60,
         60,  60], device='cuda:1') torch.Size([16])
09/11/2023, 19:09:11# labels of 0: tensor([ 62,  36,  81,  28, 125, 105,  56,  88, 163,  60, 131, 121,  21,  76,
         48,  42], device='cuda:1') torch.Size([16])
09/11/2023, 19:09:11# predicted of 0: tensor([ 62,  60,  60,  28,  60, 105,  56,  72,  60,  60, 131,  60,  21,  60,
         60,  60], device='cuda:1') torch.Size([16])
09/11/2023, 19:09:42# labels of Validation: tensor([131,  92,  54,  81,  69, 155, 111, 163,  75,  25, 123, 165,  26,  40,
        128, 110], device='cuda:1') torch.Size([16])
09/11/2023, 19:09:42# predicted of Validation: tensor([131,  60,  60,  60,  69, 155,   1,  60,  60,  25, 123, 165,  26,  40,
        128,

Training:   0%|          | 0/8250 [00:00<?, ?it/s]

09/11/2023, 19:12:43# labels of 5000: tensor([ 90,  72,  94, 152,  86,  14,  52, 130,  87,   0, 155, 164,   1, 133,
         89,  49], device='cuda:1') torch.Size([16])
09/11/2023, 19:12:43# predicted of 5000: tensor([114,  72,  94,  60,  86,  60,  52, 130, 151,   0, 155, 104, 104, 133,
         89, 151], device='cuda:1') torch.Size([16])
09/11/2023, 19:14:37# total batches: 8250
09/11/2023, 19:14:37# Epoch 3 | Train Loss: 1.6072 | Train Accuracy: 0.6000


Validation:   0%|          | 0/1032 [00:00<?, ?it/s]

09/11/2023, 19:14:38# labels of Validation: tensor([ 21,  67,  92, 122,  23,  35,  43,   5,  79, 122,  55,  52, 149,  31,
         62, 116], device='cuda:1') torch.Size([16])
09/11/2023, 19:14:38# predicted of Validation: tensor([ 21,  78, 163, 122,  23,  35,  43,   5,  72, 122, 163,  52, 149, 163,
         62, 163], device='cuda:1') torch.Size([16])
09/11/2023, 19:14:38# labels of 0: tensor([ 21,  67,  92, 122,  23,  35,  43,   5,  79, 122,  55,  52, 149,  31,
         62, 116], device='cuda:1') torch.Size([16])
09/11/2023, 19:14:38# predicted of 0: tensor([ 21,  78, 163, 122,  23,  35,  43,   5,  72, 122, 163,  52, 149, 163,
         62, 163], device='cuda:1') torch.Size([16])
09/11/2023, 19:15:06# labels of Validation: tensor([ 87,  52,  45,  61,  62, 135,  93, 135,  20, 155, 164,   3,   5,  25,
         35,  90], device='cuda:1') torch.Size([16])
09/11/2023, 19:15:06# predicted of Validation: tensor([163,  52,  45,  61,  62, 135,  93, 135,  20, 155, 163,   3,   5,  25,
         35,

Training:   0%|          | 0/8250 [00:00<?, ?it/s]

09/11/2023, 19:18:06# labels of 5000: tensor([ 44, 163,   5,  35,  96,  94,  66, 115,  99,  29, 135,  16, 103,  19,
        115,  69], device='cuda:1') torch.Size([16])
09/11/2023, 19:18:06# predicted of 5000: tensor([ 97,  49,   5,  35,  96,  94,  66, 115,  99,  29, 135,  16, 110,  19,
        115,  69], device='cuda:1') torch.Size([16])
09/11/2023, 19:20:02# total batches: 8250
09/11/2023, 19:20:02# Epoch 4 | Train Loss: 1.5722 | Train Accuracy: 0.6024


Validation:   0%|          | 0/1032 [00:00<?, ?it/s]

09/11/2023, 19:20:02# labels of Validation: tensor([147, 102,  48, 120, 100,   0, 127, 166, 144, 150, 144,  73,  47,  27,
        111,  14], device='cuda:1') torch.Size([16])
09/11/2023, 19:20:02# predicted of Validation: tensor([147, 102,  48, 120, 100,   0, 127, 166,  48,  48,  48,  73,  48,  27,
         48,  48], device='cuda:1') torch.Size([16])
09/11/2023, 19:20:02# labels of 0: tensor([147, 102,  48, 120, 100,   0, 127, 166, 144, 150, 144,  73,  47,  27,
        111,  14], device='cuda:1') torch.Size([16])
09/11/2023, 19:20:02# predicted of 0: tensor([147, 102,  48, 120, 100,   0, 127, 166,  48,  48,  48,  73,  48,  27,
         48,  48], device='cuda:1') torch.Size([16])
09/11/2023, 19:20:31# labels of Validation: tensor([107,  94,   3,   2, 114,  10,  27,  74,  78, 137, 146,  71, 108,  69,
        157,  77], device='cuda:1') torch.Size([16])
09/11/2023, 19:20:31# predicted of Validation: tensor([107,  94,   3,  48,  72,  10,  27,  48,  67, 137, 146,  71, 131,  69,
         48,

Training:   0%|          | 0/8250 [00:00<?, ?it/s]

09/11/2023, 19:23:27# labels of 5000: tensor([165,  60,  44, 119,  84, 144,  44,   0,  89,  38,  45, 143,  93,  57,
         77,  39], device='cuda:1') torch.Size([16])
09/11/2023, 19:23:27# predicted of 5000: tensor([165,  30, 124, 124,  84, 124, 124,   0,  89, 124,  45, 124, 110, 124,
         77,  39], device='cuda:1') torch.Size([16])
09/11/2023, 19:24:54# total batches: 8250
09/11/2023, 19:24:54# Epoch 5 | Train Loss: 1.5534 | Train Accuracy: 0.6039


Validation:   0%|          | 0/1032 [00:00<?, ?it/s]

09/11/2023, 19:24:54# labels of Validation: tensor([134, 157, 153,  70,  88, 145,  17,  99,  78, 146,  82,  97, 143,  71,
         12, 149], device='cuda:1') torch.Size([16])
09/11/2023, 19:24:54# predicted of Validation: tensor([134,  34, 153,  70,  90, 145,  17,  99,  78, 146,  82,  76,  76,  71,
         76, 149], device='cuda:1') torch.Size([16])
09/11/2023, 19:24:54# labels of 0: tensor([134, 157, 153,  70,  88, 145,  17,  99,  78, 146,  82,  97, 143,  71,
         12, 149], device='cuda:1') torch.Size([16])
09/11/2023, 19:24:54# predicted of 0: tensor([134,  34, 153,  70,  90, 145,  17,  99,  78, 146,  82,  76,  76,  71,
         76, 149], device='cuda:1') torch.Size([16])
09/11/2023, 19:25:13# labels of Validation: tensor([141, 100,  77,  83,  13, 154,  77, 140,  39,  73, 166,  81,  59, 142,
         96, 147], device='cuda:1') torch.Size([16])
09/11/2023, 19:25:13# predicted of Validation: tensor([141, 100,  77,  76,  13, 154,  77, 140,  39,  73, 166,  76,  91,  76,
         96,

Training:   0%|          | 0/8250 [00:00<?, ?it/s]

09/11/2023, 19:27:13# labels of 5000: tensor([158, 140,  71,  96,  80, 111, 134,  15,  78, 142,   4, 155, 107, 164,
         89,  31], device='cuda:1') torch.Size([16])
09/11/2023, 19:27:13# predicted of 5000: tensor([ 36, 140,  71,  96,  80, 158, 134,  15,  78,  87,  57, 155, 107,  57,
         89,  57], device='cuda:1') torch.Size([16])
09/11/2023, 19:28:30# total batches: 8250
09/11/2023, 19:28:30# Epoch 6 | Train Loss: 1.5408 | Train Accuracy: 0.6048


Validation:   0%|          | 0/1032 [00:00<?, ?it/s]

09/11/2023, 19:28:31# labels of Validation: tensor([ 95, 138, 129, 160,  97, 109, 109, 128, 126, 114,  41, 129,  24,  23,
        115,  20], device='cuda:1') torch.Size([16])
09/11/2023, 19:28:31# predicted of Validation: tensor([ 95, 138, 129, 160,  55,  55,  55, 128, 126,  90,  41, 129,  55,  23,
        115,  20], device='cuda:1') torch.Size([16])
09/11/2023, 19:28:31# labels of 0: tensor([ 95, 138, 129, 160,  97, 109, 109, 128, 126, 114,  41, 129,  24,  23,
        115,  20], device='cuda:1') torch.Size([16])
09/11/2023, 19:28:31# predicted of 0: tensor([ 95, 138, 129, 160,  55,  55,  55, 128, 126,  90,  41, 129,  55,  23,
        115,  20], device='cuda:1') torch.Size([16])
09/11/2023, 19:28:50# labels of Validation: tensor([ 84, 110, 142,  16,  23,  31,  74,  99, 124, 116,  97,  45,  71,  46,
        155,  41], device='cuda:1') torch.Size([16])
09/11/2023, 19:28:50# predicted of Validation: tensor([ 84,  90,  55,  16,  23,  55,  55,  99, 104, 104,  55,  45,  71,  46,
        155,

Training:   0%|          | 0/8250 [00:00<?, ?it/s]

09/11/2023, 19:30:48# labels of 5000: tensor([135,  98, 158, 103,  67,  25,  73,  73,  29,  12,  43, 125,  63,  75,
         66,  67], device='cuda:1') torch.Size([16])
09/11/2023, 19:30:48# predicted of 5000: tensor([135,  98,   9,  91,  67,  25,  73,  73,  29,   9,  43,   4,  63,   9,
         66,  67], device='cuda:1') torch.Size([16])
09/11/2023, 19:32:04# total batches: 8250
09/11/2023, 19:32:04# Epoch 7 | Train Loss: 1.5315 | Train Accuracy: 0.6057


Validation:   0%|          | 0/1032 [00:00<?, ?it/s]

09/11/2023, 19:32:04# labels of Validation: tensor([164, 108,   3,  72,  86, 160, 113, 154,  59, 110,  45,  57,  84, 110,
         50,  93], device='cuda:1') torch.Size([16])
09/11/2023, 19:32:04# predicted of Validation: tensor([ 31, 131,   3,  72,  86, 160,  91, 154, 131,  68,  45,  31,  84,  68,
         50,  91], device='cuda:1') torch.Size([16])
09/11/2023, 19:32:04# labels of 0: tensor([164, 108,   3,  72,  86, 160, 113, 154,  59, 110,  45,  57,  84, 110,
         50,  93], device='cuda:1') torch.Size([16])
09/11/2023, 19:32:04# predicted of 0: tensor([ 31, 131,   3,  72,  86, 160,  91, 154, 131,  68,  45,  31,  84,  68,
         50,  91], device='cuda:1') torch.Size([16])
09/11/2023, 19:32:24# labels of Validation: tensor([ 24,  72,  21,  40, 166, 145,  94,  92, 152,  60,  59, 125,   2,  21,
        144,  29], device='cuda:1') torch.Size([16])
09/11/2023, 19:32:24# predicted of Validation: tensor([ 31,  72,  21,  40, 166, 145,  94,  76, 151, 116,  91, 158,  31,  21,
         31,

Training:   0%|          | 0/8250 [00:00<?, ?it/s]

09/11/2023, 19:34:23# labels of 5000: tensor([159, 139,  61,  20, 148, 126,  38,  55,  93,  31,  34,  26, 129, 164,
        144,  89], device='cuda:1') torch.Size([16])
09/11/2023, 19:34:23# predicted of 5000: tensor([159, 139,  61,  20, 148, 126,  74,  74, 110,  74,  74,  26, 129,  74,
         74,  89], device='cuda:1') torch.Size([16])
09/11/2023, 19:35:42# total batches: 8250
09/11/2023, 19:35:42# Epoch 8 | Train Loss: 1.5238 | Train Accuracy: 0.6082


Validation:   0%|          | 0/1032 [00:00<?, ?it/s]

09/11/2023, 19:35:42# labels of Validation: tensor([ 26, 136, 111, 166, 144,  92,  43,  78,  64,  54,  77, 124,  79, 100,
         46, 164], device='cuda:1') torch.Size([16])
09/11/2023, 19:35:42# predicted of Validation: tensor([ 26, 136,  47, 166,  47,  47,  43,  67,   8,  47,  77,  47,  79, 100,
         46,  47], device='cuda:1') torch.Size([16])
09/11/2023, 19:35:42# labels of 0: tensor([ 26, 136, 111, 166, 144,  92,  43,  78,  64,  54,  77, 124,  79, 100,
         46, 164], device='cuda:1') torch.Size([16])
09/11/2023, 19:35:42# predicted of 0: tensor([ 26, 136,  47, 166,  47,  47,  43,  67,   8,  47,  77,  47,  79, 100,
         46,  47], device='cuda:1') torch.Size([16])
09/11/2023, 19:36:02# labels of Validation: tensor([ 61, 152,  53,  58,  66,  90,  62,  14,  25,  11,  56,  75,  60,  94,
        130,   4], device='cuda:1') torch.Size([16])
09/11/2023, 19:36:02# predicted of Validation: tensor([ 61,  47,  47,  58,  66,  68,  62,  47,  25,  47,  56,  47,  47,  94,
        130,

Training:   0%|          | 0/8250 [00:00<?, ?it/s]

09/11/2023, 19:38:04# labels of 5000: tensor([  3,  13, 137, 145,  85, 158, 145,  70,  14,  91, 138,  70, 162,  53,
         26,  78], device='cuda:1') torch.Size([16])
09/11/2023, 19:38:04# predicted of 5000: tensor([  3,  13, 137, 145,  85,  31, 145,  70,  31, 108, 138,  70,  31,  31,
         26,  78], device='cuda:1') torch.Size([16])
09/11/2023, 19:39:22# total batches: 8250
09/11/2023, 19:39:22# Epoch 9 | Train Loss: 1.5179 | Train Accuracy: 0.6086


Validation:   0%|          | 0/1032 [00:00<?, ?it/s]

09/11/2023, 19:39:22# labels of Validation: tensor([ 44, 101, 120, 148,  71,  94, 145, 166,  99, 103,  93, 152,  33, 164,
        136, 129], device='cuda:1') torch.Size([16])
09/11/2023, 19:39:22# predicted of Validation: tensor([125, 101, 120, 148,  71,  94, 145, 166,  99, 114, 114, 125, 125, 125,
        136, 129], device='cuda:1') torch.Size([16])
09/11/2023, 19:39:22# labels of 0: tensor([ 44, 101, 120, 148,  71,  94, 145, 166,  99, 103,  93, 152,  33, 164,
        136, 129], device='cuda:1') torch.Size([16])
09/11/2023, 19:39:22# predicted of 0: tensor([125, 101, 120, 148,  71,  94, 145, 166,  99, 114, 114, 125, 125, 125,
        136, 129], device='cuda:1') torch.Size([16])
09/11/2023, 19:39:44# labels of Validation: tensor([ 97,  43, 108, 134,  67,  67,  99,  78,  36, 116, 152,  31, 102,   7,
         62, 120], device='cuda:1') torch.Size([16])
09/11/2023, 19:39:44# predicted of Validation: tensor([125,  43, 108, 134,  67,  67,  99,  67, 125, 125, 125, 125, 102,   7,
         62,

Training:   0%|          | 0/8250 [00:00<?, ?it/s]

09/11/2023, 19:41:43# labels of 5000: tensor([ 46, 151,  89,  83, 157,  58,  30,  68, 106,  78,  17, 121, 151, 144,
         78, 147], device='cuda:1') torch.Size([16])
09/11/2023, 19:41:43# predicted of 5000: tensor([ 46,   4,  89,   4,   4,  58, 104,   8, 106,  67,  17,   4,   4,   4,
         67, 147], device='cuda:1') torch.Size([16])
09/11/2023, 19:43:00# total batches: 8250
09/11/2023, 19:43:00# Epoch 10 | Train Loss: 1.5132 | Train Accuracy: 0.6096


Validation:   0%|          | 0/1032 [00:00<?, ?it/s]

09/11/2023, 19:43:00# labels of Validation: tensor([ 63,   9,  35,  32,  43,  38, 142,  19, 148,   2,  52,  59,  51,  35,
         70, 158], device='cuda:1') torch.Size([16])
09/11/2023, 19:43:00# predicted of Validation: tensor([ 63, 104,  35,  32,  43,  92,  92,  19, 148,  92,  52,  91,  51,  35,
         70,  92], device='cuda:1') torch.Size([16])
09/11/2023, 19:43:00# labels of 0: tensor([ 63,   9,  35,  32,  43,  38, 142,  19, 148,   2,  52,  59,  51,  35,
         70, 158], device='cuda:1') torch.Size([16])
09/11/2023, 19:43:00# predicted of 0: tensor([ 63, 104,  35,  32,  43,  92,  92,  19, 148,  92,  52,  91,  51,  35,
         70,  92], device='cuda:1') torch.Size([16])
09/11/2023, 19:43:20# labels of Validation: tensor([111, 105,  36, 135, 162,  80, 108,  45,  63,  33, 151,  89,  16,  48,
        139,  26], device='cuda:1') torch.Size([16])
09/11/2023, 19:43:20# predicted of Validation: tensor([ 92, 105,  92, 135, 152,  80, 108,  45,  63,  92,  92,  89,  16,  92,
        139,

Training:   0%|          | 0/8250 [00:00<?, ?it/s]

09/11/2023, 19:45:14# labels of 5000: tensor([161,  91,   0, 152,  42, 159,  26, 146,   0, 113, 103,  37,  13,  14,
        100, 137], device='cuda:1') torch.Size([16])
09/11/2023, 19:45:14# predicted of 5000: tensor([161,  59,   0,  30,  30, 159,  26, 146,   0,  88,  64,  37,  13,  30,
        100, 137], device='cuda:1') torch.Size([16])
09/11/2023, 19:46:09# total batches: 8250
09/11/2023, 19:46:09# Epoch 11 | Train Loss: 1.5093 | Train Accuracy: 0.6094


Validation:   0%|          | 0/1032 [00:00<?, ?it/s]

09/11/2023, 19:46:09# labels of Validation: tensor([ 64, 162, 156,   8,  21,  13,   6, 133,  44,  30,  41,  29,  89, 140,
         83,  16], device='cuda:1') torch.Size([16])
09/11/2023, 19:46:09# predicted of Validation: tensor([ 93,  38, 156,  93,  21,  13,   6, 133,  38,  38,  41,  29,  89, 140,
         38,  16], device='cuda:1') torch.Size([16])
09/11/2023, 19:46:09# labels of 0: tensor([ 64, 162, 156,   8,  21,  13,   6, 133,  44,  30,  41,  29,  89, 140,
         83,  16], device='cuda:1') torch.Size([16])
09/11/2023, 19:46:09# predicted of 0: tensor([ 93,  38, 156,  93,  21,  13,   6, 133,  38,  38,  41,  29,  89, 140,
         38,  16], device='cuda:1') torch.Size([16])
09/11/2023, 19:46:22# labels of Validation: tensor([152,  54, 115, 140, 111,  53, 149,  45, 158,  79,  74, 151,   8, 114,
         21,  75], device='cuda:1') torch.Size([16])
09/11/2023, 19:46:22# predicted of Validation: tensor([ 38,  38, 115, 140,  38,  38, 149,  45,  38,  79,  38,  38,  93,  93,
         21,

Training:   0%|          | 0/8250 [00:00<?, ?it/s]

09/11/2023, 19:47:45# labels of 5000: tensor([ 52,  62,  49,  70, 153, 142,  85, 135,  96,  52,  44, 161,   8,  74,
        153,  64], device='cuda:1') torch.Size([16])
09/11/2023, 19:47:45# predicted of 5000: tensor([ 52,  62, 144,  70, 153, 144,  85, 135,  96,  52, 144, 161,  93, 144,
        153, 103], device='cuda:1') torch.Size([16])
09/11/2023, 19:48:36# total batches: 8250
09/11/2023, 19:48:36# Epoch 12 | Train Loss: 1.5059 | Train Accuracy: 0.6102


Validation:   0%|          | 0/1032 [00:00<?, ?it/s]

09/11/2023, 19:48:37# labels of Validation: tensor([124, 122, 163,  53, 121,  80,  14,  47,  69, 154,  78, 153,  24,   6,
         36, 165], device='cuda:1') torch.Size([16])
09/11/2023, 19:48:37# predicted of Validation: tensor([158, 122, 158, 158, 158,  80, 158, 158,  69, 154,  67, 153, 158,   6,
        158, 165], device='cuda:1') torch.Size([16])
09/11/2023, 19:48:37# labels of 0: tensor([124, 122, 163,  53, 121,  80,  14,  47,  69, 154,  78, 153,  24,   6,
         36, 165], device='cuda:1') torch.Size([16])
09/11/2023, 19:48:37# predicted of 0: tensor([158, 122, 158, 158, 158,  80, 158, 158,  69, 154,  67, 153, 158,   6,
        158, 165], device='cuda:1') torch.Size([16])
09/11/2023, 19:48:49# labels of Validation: tensor([ 30,  27,  72, 110,  91, 126,  28, 134,  74, 138,   6,  27, 159, 115,
        136,  74], device='cuda:1') torch.Size([16])
09/11/2023, 19:48:49# predicted of Validation: tensor([158,  27,  72,   8,  59, 126,  28, 134, 164, 138,   6,  27, 159, 115,
        136,

Training:   0%|          | 0/8250 [00:00<?, ?it/s]

09/11/2023, 19:50:12# labels of 5000: tensor([124,  92,  73,  14,  31, 149,  93,  35, 131, 108, 150, 108, 126,  11,
         51,   5], device='cuda:1') torch.Size([16])
09/11/2023, 19:50:12# predicted of 5000: tensor([ 54,  54,  73,  54,  54, 149, 113,  35, 131, 108,  54, 108, 126,  54,
         51,   5], device='cuda:1') torch.Size([16])
09/11/2023, 19:51:04# total batches: 8250
09/11/2023, 19:51:04# Epoch 13 | Train Loss: 1.5036 | Train Accuracy: 0.6110


Validation:   0%|          | 0/1032 [00:00<?, ?it/s]

09/11/2023, 19:51:04# labels of Validation: tensor([ 61,  24, 125, 123, 101,  13,  91,  40, 119,  88,   7, 124,  48, 115,
         93,   1], device='cuda:1') torch.Size([16])
09/11/2023, 19:51:04# predicted of Validation: tensor([ 61,  38,  38, 123, 101,  13,  59,  40,  38,   8,   7,  38,  38, 115,
          8,  55], device='cuda:1') torch.Size([16])
09/11/2023, 19:51:04# labels of 0: tensor([ 61,  24, 125, 123, 101,  13,  91,  40, 119,  88,   7, 124,  48, 115,
         93,   1], device='cuda:1') torch.Size([16])
09/11/2023, 19:51:04# predicted of 0: tensor([ 61,  38,  38, 123, 101,  13,  59,  40,  38,   8,   7,  38,  38, 115,
          8,  55], device='cuda:1') torch.Size([16])
09/11/2023, 19:51:17# labels of Validation: tensor([ 50, 108, 125,  21,  82,  69, 117,  47,  15,  76,  38, 162,   7,  28,
        101, 154], device='cuda:1') torch.Size([16])
09/11/2023, 19:51:17# predicted of Validation: tensor([ 50, 131,  38,  21,  82,  69, 117,  38,  15,  38,  55,  38,   7,  28,
        101,

Training:   0%|          | 0/8250 [00:00<?, ?it/s]

09/11/2023, 19:52:39# labels of 5000: tensor([  6,  40,  73,  95,  99, 119,  20,  47,  77,  23, 120,  53,  33,  72,
        148, 134], device='cuda:1') torch.Size([16])
09/11/2023, 19:52:39# predicted of 5000: tensor([  6,  40,  73,  95,  99,  87,  20,  87,  77,  23, 120,  87,  87,  72,
        148, 134], device='cuda:1') torch.Size([16])
09/11/2023, 19:53:29# total batches: 8250
09/11/2023, 19:53:29# Epoch 14 | Train Loss: 1.5012 | Train Accuracy: 0.6128


Validation:   0%|          | 0/1032 [00:00<?, ?it/s]

09/11/2023, 19:53:29# labels of Validation: tensor([163, 162,  56, 115,   4,  55,  82, 135,  89, 126,  18,  73, 122, 138,
         75,  71], device='cuda:1') torch.Size([16])
09/11/2023, 19:53:29# predicted of Validation: tensor([ 31,  31,  56, 115,  31,  31,  82, 135,  89, 126,  31,  73, 122, 138,
         31,  71], device='cuda:1') torch.Size([16])
09/11/2023, 19:53:29# labels of 0: tensor([163, 162,  56, 115,   4,  55,  82, 135,  89, 126,  18,  73, 122, 138,
         75,  71], device='cuda:1') torch.Size([16])
09/11/2023, 19:53:29# predicted of 0: tensor([ 31,  31,  56, 115,  31,  31,  82, 135,  89, 126,  31,  73, 122, 138,
         31,  71], device='cuda:1') torch.Size([16])
09/11/2023, 19:53:42# labels of Validation: tensor([ 81,   6, 106, 155, 102,  15,   7, 101, 138,  48,  91, 109, 118,   2,
         66, 124], device='cuda:1') torch.Size([16])
09/11/2023, 19:53:42# predicted of Validation: tensor([ 31,   6, 106, 155, 102,  15,   7, 101, 138,  31, 108,  31, 118,  31,
         66,

Training:   0%|          | 0/8250 [00:00<?, ?it/s]

09/11/2023, 19:54:59# labels of 5000: tensor([162, 138,  99,  29,  74, 108,  56, 115,  15, 141, 119,  20,  36,  37,
        117, 160], device='cuda:1') torch.Size([16])
09/11/2023, 19:54:59# predicted of 5000: tensor([ 54, 138,  99,  29,  54, 108,  56, 115,  15, 141,  54,  20,  54,  37,
        117, 160], device='cuda:1') torch.Size([16])
09/11/2023, 19:55:50# total batches: 8250
09/11/2023, 19:55:50# Epoch 15 | Train Loss: 1.5001 | Train Accuracy: 0.6112


Validation:   0%|          | 0/1032 [00:00<?, ?it/s]

09/11/2023, 19:55:50# labels of Validation: tensor([ 30, 138,  99, 110,   7,  15,  11, 150, 145, 160,  58,  57, 160,  43,
        149,   2], device='cuda:1') torch.Size([16])
09/11/2023, 19:55:50# predicted of Validation: tensor([124, 138,  99,  90,   7,  15,  12,  12, 145, 160,  58, 124, 160,  43,
        149, 124], device='cuda:1') torch.Size([16])
09/11/2023, 19:55:50# labels of 0: tensor([ 30, 138,  99, 110,   7,  15,  11, 150, 145, 160,  58,  57, 160,  43,
        149,   2], device='cuda:1') torch.Size([16])
09/11/2023, 19:55:50# predicted of 0: tensor([124, 138,  99,  90,   7,  15,  12,  12, 145, 160,  58, 124, 160,  43,
        149, 124], device='cuda:1') torch.Size([16])
09/11/2023, 19:56:03# labels of Validation: tensor([ 94, 122, 127,   6, 132,  49, 112,  58,  37,  74, 136, 155,  80,  37,
         21,  11], device='cuda:1') torch.Size([16])
09/11/2023, 19:56:03# predicted of Validation: tensor([ 94, 122, 127,   6, 132, 124, 124,  58,  37,  12, 136, 155,  80,  37,
         21,

Training:   0%|          | 0/8250 [00:00<?, ?it/s]

09/11/2023, 19:57:24# labels of 5000: tensor([111,  48,  28, 147, 150, 140, 115,  17, 116,  61, 156,  70, 135, 140,
         19, 117], device='cuda:1') torch.Size([16])
09/11/2023, 19:57:24# predicted of 5000: tensor([ 31,  31,  28, 147,  31, 140, 115,  17,  31,  61, 156,  70, 135, 140,
         19, 117], device='cuda:1') torch.Size([16])
09/11/2023, 19:58:16# total batches: 8250
09/11/2023, 19:58:16# Epoch 16 | Train Loss: 1.4984 | Train Accuracy: 0.6116


Validation:   0%|          | 0/1032 [00:00<?, ?it/s]

09/11/2023, 19:58:16# labels of Validation: tensor([151, 109,  14,   7, 104,   6,  78,  52,  25,  98,  72,   9, 152,  86,
         31,  19], device='cuda:1') torch.Size([16])
09/11/2023, 19:58:16# predicted of Validation: tensor([36,  9, 36,  7, 36,  6, 67, 52, 25, 98, 72, 36, 36, 86, 36, 19],
       device='cuda:1') torch.Size([16])
09/11/2023, 19:58:16# labels of 0: tensor([151, 109,  14,   7, 104,   6,  78,  52,  25,  98,  72,   9, 152,  86,
         31,  19], device='cuda:1') torch.Size([16])
09/11/2023, 19:58:16# predicted of 0: tensor([36,  9, 36,  7, 36,  6, 67, 52, 25, 98, 72, 36, 36, 86, 36, 19],
       device='cuda:1') torch.Size([16])
09/11/2023, 19:58:29# labels of Validation: tensor([ 75,  28,  17, 100, 137, 112,  38, 147,  15, 148,  36, 147, 110, 103,
        122, 130], device='cuda:1') torch.Size([16])
09/11/2023, 19:58:29# predicted of Validation: tensor([  9,  28,  17, 100, 137,   9,   9, 147,  15, 148,  36, 147,   8,   8,
        122, 130], device='cuda:1') torch.Size

Training:   0%|          | 0/8250 [00:00<?, ?it/s]

09/11/2023, 19:59:50# labels of 5000: tensor([ 23,  49,  36,  87,  25,  53,  89, 138, 103, 162,  74,   6, 107,  81,
        149, 115], device='cuda:1') torch.Size([16])
09/11/2023, 19:59:50# predicted of 5000: tensor([ 23,  42,  42,  42,  25,  42,  89, 138, 113,  42,  42,   6, 107,  42,
        149, 115], device='cuda:1') torch.Size([16])
09/11/2023, 20:00:42# total batches: 8250
09/11/2023, 20:00:42# Epoch 17 | Train Loss: 1.4971 | Train Accuracy: 0.6116


Validation:   0%|          | 0/1032 [00:00<?, ?it/s]

09/11/2023, 20:00:42# labels of Validation: tensor([150,  61,  33,   5,   1, 120, 166, 120,  33,  20, 129,  50,  41, 111,
        150,  27], device='cuda:1') torch.Size([16])
09/11/2023, 20:00:42# predicted of Validation: tensor([ 87,  61,  87,   5,  87, 120, 166, 120,  87,  20, 129,  50,  41,  87,
         87,  27], device='cuda:1') torch.Size([16])
09/11/2023, 20:00:42# labels of 0: tensor([150,  61,  33,   5,   1, 120, 166, 120,  33,  20, 129,  50,  41, 111,
        150,  27], device='cuda:1') torch.Size([16])
09/11/2023, 20:00:42# predicted of 0: tensor([ 87,  61,  87,   5,  87, 120, 166, 120,  87,  20, 129,  50,  41,  87,
         87,  27], device='cuda:1') torch.Size([16])
09/11/2023, 20:00:54# labels of Validation: tensor([132, 156,  60,  43, 150,  13,  59, 123,  18,  45, 116,  35, 164, 103,
        142, 111], device='cuda:1') torch.Size([16])
09/11/2023, 20:00:54# predicted of Validation: tensor([132, 156,  81,  43,  81,  13,  91, 123,  87,  45,  87,  35,   9,  93,
         87,

Training:   0%|          | 0/8250 [00:00<?, ?it/s]

09/11/2023, 20:02:18# labels of 5000: tensor([  4, 102, 142,  54, 106, 116,  48,  77,  68, 116, 111,  27,  64,  52,
         36, 111], device='cuda:1') torch.Size([16])
09/11/2023, 20:02:18# predicted of 5000: tensor([ 38, 102,  38,  38, 106,  38, 125,  77,  64,  38,  38,  27,  64,  52,
        125, 125], device='cuda:1') torch.Size([16])
09/11/2023, 20:03:11# total batches: 8250
09/11/2023, 20:03:11# Epoch 18 | Train Loss: 1.4958 | Train Accuracy: 0.6134


Validation:   0%|          | 0/1032 [00:00<?, ?it/s]

09/11/2023, 20:03:11# labels of Validation: tensor([ 36,  49, 123,  97, 103,  91,  79, 104,  15,  67,  50,  96,  21, 120,
        114, 136], device='cuda:1') torch.Size([16])
09/11/2023, 20:03:11# predicted of Validation: tensor([ 30,  30, 123,  30, 113,  91,  79,  30,  15,  67,  50,  96,  21, 120,
        113, 136], device='cuda:1') torch.Size([16])
09/11/2023, 20:03:11# labels of 0: tensor([ 36,  49, 123,  97, 103,  91,  79, 104,  15,  67,  50,  96,  21, 120,
        114, 136], device='cuda:1') torch.Size([16])
09/11/2023, 20:03:11# predicted of 0: tensor([ 30,  30, 123,  30, 113,  91,  79,  30,  15,  67,  50,  96,  21, 120,
        113, 136], device='cuda:1') torch.Size([16])
09/11/2023, 20:03:24# labels of Validation: tensor([100,  28,  21,  43,  75, 136,  41, 128, 127,  80,  33,  10,  39, 114,
        124,  18], device='cuda:1') torch.Size([16])
09/11/2023, 20:03:24# predicted of Validation: tensor([100,  28,  21,  43,  30, 136,  41, 128, 127,  80,  30,  10,  39, 113,
         30,

Training:   0%|          | 0/8250 [00:00<?, ?it/s]

09/11/2023, 20:04:45# labels of 5000: tensor([109,  10, 145,  49,  85,  13,  48, 161,   3, 150,  83,  15,  34, 132,
         29,  41], device='cuda:1') torch.Size([16])
09/11/2023, 20:04:45# predicted of 5000: tensor([151,  10, 145, 151,  85,  13, 151, 161,   3, 151, 151,  15, 151, 132,
         29,  41], device='cuda:1') torch.Size([16])


In [None]:
# load the pretrained model
pretrained_model_path = '../checkpoint_graphSAGE/best_model_GraphSAGE_transH_50.pt'
model.load_state_dict(torch.load(pretrained_model_path))

model.to(device)
model.eval()

total = 0
correct = 0
count = 0

true_labels = []
predicted_labels = []

with torch.no_grad():
    for data in tqdm(dataloaders['test'], desc="Testing", position=0, leave=True):
#         print(f"data:{data[1]}")
        loss, accuracy, predicted = model_fn(data, model, criterion, device, count, which_type='test')
        labels = data[1].to(device)
        
        true_labels.extend(labels.cpu().numpy())
        predicted_labels.extend(predicted.cpu().numpy())
        
        if count % 5000 == 0:
            add_log_msg(f"labels: {labels} {labels.shape}")
            add_log_msg(f"predicted: {predicted} {predicted.shape}")
            
        count += 1
        
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

add_log_msg(f'Test Accuracy: {100 * correct / total} %\n\n\n')


# ======================================== handlig the output excel files ========================================
mapping_file = './new_mapping.txt'
label_mapping = {}
with open(mapping_file, 'r') as f:
    for line in f:
        parts = line.strip().split(': ')
        label_mapping[int(parts[1])] = parts[0]
        
# 将映射后的标签应用到true和predicted标签列表
mapped_true_labels = [label_mapping[label] for label in true_labels]
mapped_predicted_labels = [label_mapping[label] for label in predicted_labels]

# 生成Scikit-learn报告信息的DataFrame
report_data = classification_report(mapped_true_labels, mapped_predicted_labels, output_dict=True)
report_df = pd.DataFrame(report_data).transpose()

# mapped_true_labels_np = np.array(mapped_true_labels)
# mapped_predicted_labels_np = np.array(mapped_predicted_labels)

# print("mapped_true_labels 的形状:", mapped_true_labels_np.shape)
# print("mapped_predicted_labels 的形状:", mapped_predicted_labels_np.shape)

report_folder = 'classification_report'
os.makedirs(report_folder, exist_ok=True)

count = 0
while True:
    report_filename = f'classification_report-transH_50-graphSAGE-{count}.xlsx'
    labels_filename = f'mapped_true_predicted_labels-transH_50-graphSAGE-{count}.xlsx'
    
    report_path = os.path.join(report_folder, report_filename)
    labels_path = os.path.join(report_folder, labels_filename)
    
    if not os.path.exists(report_path) and not os.path.exists(labels_path):
        break
    count += 1

    
report_df.to_excel(report_path, index_label='Label')

mapped_labels_df = pd.DataFrame({'true_label': mapped_true_labels, 'predicted_label': mapped_predicted_labels})
mapped_labels_df.to_excel(labels_path, index=False)

add_log_msg(f"report path: {report_path}")
add_log_msg(f"label path: {labels_path}")

mapped_report = classification_report(mapped_true_labels, mapped_predicted_labels)
add_log_msg(f"mapped_report:\n{mapped_report}")

### Training

- Fix the seed and save the model.state_dict that contains the initial weight

In [None]:
seed = 8787
same_seeds(seed)

model = GraphSAGE(in_dim=50, hidden_dim=16, out_dim=167)
torch.save(model.state_dict(), 'model3_initial(graphsage)/initial_weight.pth')

In [None]:
model.layer1.fc_self.weight

- Check if model really load the model_dict

In [None]:
model = GraphSAGE(in_dim=50, hidden_dim=16, out_dim=167)
model.load_state_dict(torch.load('model3_initial(graphsage)/initial_weight.pth'))
model.layer1.fc_self.weight

### test of valid and test part is ``graph``

- 60 APs in training x 10000times
- 5 APs in validation x 4 times
- 3 APs in test x 4 times
- Batch size = 4

In [None]:
seed = 8787
same_seeds(seed)

model = GraphSAGE(in_dim=50, hidden_dim=16, out_dim=167)
# in_dim means the dimension of the node_feat(50 dim, since the 50-dim embedding)
# out_dim means the # of the categories -> 168 for out tasks
model.load_state_dict(torch.load('model3_initial(graphsage)/initial_weight.pth'))

model = model.to(device)

optimizer = torch.optim.AdamW(model.parameters(), lr=1e-5)
# scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=100, num_training_steps=total_steps)

criterion = nn.CrossEntropyLoss()
total_steps = 180

# save the best model
best_val_loss = float('inf')
patience = 10  # Number of epochs with no improvement after which training will be stopped.
waiting = 0  # The number of epochs with no improvement so far.


# Training Part
for epoch in tqdm(range(total_steps)):
    # Train
    model.train()
    total_loss = 0.0
    total_accuracy = 0.0
    num_batches = 0
    
    count = 0 
    
    for data in tqdm(dataloaders['train'], desc="Training", position=0, leave=True):
        
        count += 1
        loss, accuracy, _ = model_fn(data, model, criterion, device, count, type='train')
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        total_accuracy += accuracy.item()
        num_batches += 1
        
#     scheduler.step()
    add_log_msg(f"total count: {count}")

    avg_loss = total_loss / num_batches
    avg_accuracy = total_accuracy / num_batches

    add_log_msg(f'Epoch {epoch} | Train Loss: {avg_loss:.4f} | Train Accuracy: {avg_accuracy:.4f}')

    
    # Validation Part
    model.eval()
    total_accuracy = 0.0
    total_loss = 0.0
    num_batches = 0

    with torch.no_grad():
        for batched_g in dataloaders['valid']:
            loss, accuracy, _ = model_fn(batched_g, model, criterion, device, type=='validation')
            total_accuracy += accuracy.item()
            total_loss += loss.item()
            num_batches += 1

    avg_accuracy = total_accuracy / num_batches
    current_loss = total_loss / num_batches
    
    add_log_msg(f'Validation Loss: {current_loss:.4f} | Validation Accuracy: {avg_accuracy:.4f}\n')
    
    
    if current_loss < best_val_loss:
        best_val_loss = current_loss
        waiting = 0
        
#         torch.save(model.state_dict(), 'best_model.pth')
        torch.save({
                'epoch': epoch,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'loss': loss,
                }, f"../checkpoint_GAT/best_model_{epoch}.pt")
    
    else:
        waiting += 1
        if waiting >= patience:
            add_log_msg("Early stopping")
            break

            
# Testing Part
model.eval()
total = 0
correct = 0

with torch.no_grad():
    for data in dataloaders['test']:
        loss, accuracy, predicted = model_fn(data, model, criterion, device, type=='test')
        labels = data[1].to(device)  # Assuming labels are the second element in the tuple
        
        add_log_msg(f"labels: {labels} {labels.shape}")
        add_log_msg(f"predicted: {predicted} {predicted.shape}")
        
        total += labels.size(0) # label.size(0) is the batch size
        correct += (predicted == labels).sum().item() 
        # (predicted == labels).sum() would return how many of them are equal; 
        # .item() would make the tensor to the regular value
        
#     print('Test Accuracy: %d %%' % (100 * correct / total))
add_log_msg(f'Test Accuracy: {100 * correct / total} %%')