# Test of GraphSAGE
- use DGL
- predict `graphs`
- valid, test data are in the training dataset

In [1]:
import os
import dgl
import json
import torch
import torch as th
import dgl.nn as dglnn
# from tqdm import tqdm
from tqdm.notebook import tqdm  # 使用 notebook 版本的 tqdm
import torch.nn as nn
from dgl.nn import GraphConv, GATConv, SAGEConv
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from transformers import get_linear_schedule_with_warmup

- check the GPU and assign the GPU by the best memory usage

In [2]:
import subprocess
import torch

def get_free_gpu():
    try:
        # Run nvidia-smi command to get GPU details
        _output_to_list = lambda x: x.decode('ascii').split('\n')[:-1]
        command = "nvidia-smi --query-gpu=memory.free --format=csv,nounits,noheader"
        memory_free_info = _output_to_list(subprocess.check_output(command.split())) 
        memory_free_values = [int(x) for i, x in enumerate(memory_free_info)]
        
        # Get the GPU with the maximum free memory
        best_gpu_id = memory_free_values.index(max(memory_free_values))
        return best_gpu_id
    except:
        # If any exception occurs, default to GPU 0 (this handles cases where nvidia-smi isn't installed)
        return 0

if torch.cuda.is_available():
    # Get the best GPU ID based on free memory and set it
    best_gpu_id = get_free_gpu()
    device = torch.device(f"cuda:{best_gpu_id}")
else:
    device = torch.device("cpu")
    print("there's no available GPU")

# device = torch.device(f"cuda:{1}")
print(device)


cuda:1


## Fix the seed

In [3]:
import numpy as np
import torch
import random

#fix seed
def same_seeds(seed = 8787):
    torch.manual_seed(seed)
    # random.seed(seed) 
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)  
    np.random.seed(seed)  
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True

## Data Loader

In [4]:
class GraphDataset(Dataset):
    def __init__(self, data_list, device):
        self.data_list = data_list
        self.device = device

    def __len__(self):
        return len(self.data_list)
    
    def __getitem__(self, idx):
        data = self.data_list[idx]

        g = dgl.graph((th.tensor(data["edge_index"][0]), th.tensor(data["edge_index"][1])), num_nodes=data["num_nodes"]).to(self.device)

        g.ndata['feat'] = th.tensor(data["node_feat"]).to(self.device)
        g.edata['feat'] = th.tensor(data["edge_attr"]).to(self.device)  # Add edge features to graph

        return g, th.tensor(data["label"]).to(self.device)


def collate(samples):
    # The input `samples` is a list of pairs
    #  (graph, label).
    graphs, labels = map(list, zip(*samples))
    batched_graph = dgl.batch(graphs)
    return batched_graph, torch.tensor(labels)


In [5]:
datasets = ['train', 'valid', 'test']
dataset_data = {}

for dataset_name in tqdm(datasets):
    file_path = f"../../data_processing/dgl/data_new/exp1-2/training_data/exp_2/transH_50/{dataset_name}.jsonl"
    
    print(file_path)
    with open(file_path) as f:
        data_list = [json.loads(line) for line in tqdm(f, position=0, leave=True)]
    
    dataset_data[dataset_name] = GraphDataset(data_list, device)

print("Datasets loaded!")

  0%|          | 0/3 [00:00<?, ?it/s]

../../data_processing/dgl/data_new/exp1-2/training_data/exp_2/transH_50/train.jsonl


0it [00:00, ?it/s]

../../data_processing/dgl/data_new/exp1-2/training_data/exp_2/transH_50/valid.jsonl


0it [00:00, ?it/s]

../../data_processing/dgl/data_new/exp1-2/training_data/exp_2/transH_50/test.jsonl


0it [00:00, ?it/s]

Datasets loaded!


- choose batch size

In [6]:
def create_dataloaders(batch_size, shuffle=True):
    dataloaders = {}
    for dataset_name, dataset in dataset_data.items():
        # do not shuffle the testing dataset
        if dataset_name == "test":
            dataloaders[dataset_name] = DataLoader(dataset, batch_size=batch_size, shuffle=False, collate_fn=collate)    
        else:
            dataloaders[dataset_name] = DataLoader(dataset, batch_size=batch_size, shuffle=shuffle, collate_fn=collate)
    return dataloaders

# dataloaders = create_dataloaders(4)
dataloaders = create_dataloaders(16)

- Turn the print message to a log file

In [7]:
import datetime

now = datetime.datetime.now()

formatted_time = now.strftime("%m%d_%H:%M")

log_file_path = f"../log_message/{formatted_time}_GraphSAGE.log"

def add_log_msg(msg, log_file_path=log_file_path):
    with open(log_file_path, 'a') as f:
        f.write(f'{datetime.datetime.now().strftime("%m/%d/%Y, %H:%M:%S")}# {msg}\n')
    print(f'{datetime.datetime.now().strftime("%m/%d/%Y, %H:%M:%S")}# {msg}')

print(log_file_path)

../log_message/0918_14:34_GraphSAGE.log


### Model

In [8]:
class GraphSAGE(nn.Module):
    def __init__(self, in_dim, hidden_dim, out_dim):
        super(GraphSAGE, self).__init__()
        self.layer1 = dglnn.SAGEConv(in_dim, hidden_dim, 'pool')
        self.layer2 = dglnn.SAGEConv(hidden_dim, out_dim, 'pool')  # Output layer

    def forward(self, g, inputs):
        h = self.layer1(g, inputs)
        h = torch.relu(h)
        h = self.layer2(g, h)  # You can apply another non-linearity here if needed
        
        g.ndata['h'] = h
        hg = dgl.mean_nodes(g, 'h')
        return hg


- Model Forward  

In [9]:
def model_fn(data, model, criterion, device, count=1, which_type='train'):
    """Forward a batch through the model."""
    batched_g, labels = data
    batched_g = batched_g.to(device)
    
    labels = labels.to(device)
#     labels = labels.float()

    logits = model(batched_g, batched_g.ndata['feat'].float()) # for GAT
#     logits = logits.mean(dim=1)
#     print(labels)
#     print(labels.shape)
#     print(logits)
#     print(logits.shape)

    
    loss = criterion(logits, labels)

    # Get the class id with the highest probability
    preds = logits.argmax(1) # for GAT
#     print(preds)
#     preds = logits.argmax() # for graphSAGE
    
    '''
    问题出在GAT和GraphSAGE模型的输出形状上。
    在你的 GAT 模型中，最后一层的输出是一个一维的张量，因此 logits 是一个形状为 (batch_size,) 的张量。这是因为 GAT 模型最后一层的输出被处理成了一个节点的预测，而不是整个图的预测。
    在 GraphSAGE 模型中，最后一层的输出形状是 (num_nodes, out_dim)，也就是说，它返回了整个图的节点级别的预测。因此，logits 是一个二维的张量，形状为 (num_nodes, out_dim)。
    当你尝试在一个一维张量上调用 logits.argmax(1) 时，会产生维度错误，因为它期望一个二维张量来执行操作。
    所以，在 GraphSAGE 模型中，你应该将以下行：
    '''
    
    
    # Compute accuracy
    accuracy = torch.mean((preds == labels).float())
    
    if which_type == 'validation' and count % 1000 == 0:
        add_log_msg(f"labels of Validation: {labels} {labels.shape}")
        add_log_msg(f"predicted of Validation: {preds} {preds.shape}")
        
    elif which_type == 'test'  and count % 1000 == 0:
        add_log_msg(f"labels of Test: {labels} {labels.shape}")
        add_log_msg(f"predicted of Test: {preds} {preds.shape}")
        
    if count % 5000 == 0: 
        add_log_msg(f"labels of {count}: {labels} {labels.shape}")
        add_log_msg(f"predicted of {count}: {preds} {preds.shape}")
        
    return loss, accuracy, preds

In [10]:
import csv
import pandas as pd
from sklearn.metrics import classification_report
from torch.optim import AdamW, lr_scheduler

seed = 8787
same_seeds(seed)

model = GraphSAGE(in_dim=50, hidden_dim=16, out_dim=167)
# in_dim means the dimension of the node_feat(50 dim, since the 50-dim embedding)
# out_dim means the # of the categories -> 168 for out tasks
# model.load_state_dict(torch.load('model3_initial(graphsage)/initial_weight.pth'))
best_model_path = "../checkpoint_graphSAGE/best_model_GraphSAGE_transH_50.pt"

model = model.to(device)

# optimizer = torch.optim.AdamW(model.parameters(), lr=1e-5)
optimizer = AdamW(model.parameters(), lr=5e-4)
# scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=18, num_training_steps=total_steps)

# T_max control the period of the lr changing -> set 1/10 first
scheduler = lr_scheduler.CosineAnnealingLR(optimizer, T_max=36, eta_min=0, last_epoch=- 1, verbose=False)


criterion = nn.CrossEntropyLoss()
# criterion = torch.nn.BCEWithLogitsLoss()

total_steps = 25

# save the best model
best_val_loss = float('inf')
patience = 10  # Number of epochs with no improvement after which training will be stopped.
waiting = 0  # The number of epochs with no improvement so far.


# Training Part
for epoch in tqdm(range(total_steps)):
    # Train
    model.train()
    total_loss = 0.0
    total_accuracy = 0.0
    num_batches = 0
    
    for data in tqdm(dataloaders['train'], desc="Training", position=0, leave=True):
        num_batches += 1
        loss, accuracy, _ = model_fn(data, model, criterion, device, num_batches, which_type='train')
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        total_accuracy += accuracy.item()

        
#     scheduler.step()
    add_log_msg(f"total batches: {num_batches}")

    avg_loss = total_loss / num_batches
    avg_accuracy = total_accuracy / num_batches

    add_log_msg(f'Epoch {epoch} | Train Loss: {avg_loss:.4f} | Train Accuracy: {avg_accuracy:.4f}')

    
    # Validation Part
    model.eval()
    total_accuracy = 0.0
    total_loss = 0.0
    num_batches = 0


    with torch.no_grad():
        for data in tqdm(dataloaders['valid'], desc="Validation", position=0, leave=True):
            loss, accuracy, _ = model_fn(data, model, criterion, device, num_batches, which_type='validation')
            total_accuracy += accuracy.item()
            total_loss += loss.item()
            num_batches += 1

    avg_accuracy = total_accuracy / num_batches
    current_loss = total_loss / num_batches
    
    add_log_msg(f'Validation Loss: {current_loss:.4f} | Validation Accuracy: {avg_accuracy:.4f}\n')
    
            
    if current_loss < best_val_loss:
        best_val_loss = current_loss
        waiting = 0
        
        if os.path.exists(best_model_path):
            os.remove(best_model_path)
            add_log_msg("Find a better model!!")

        torch.save(model.state_dict(), best_model_path)

#         print(best_model_path)

    else:
        waiting += 1
        if waiting >= patience:
            add_log_msg("============================== Early stopping ==================================")
            break

  0%|          | 0/25 [00:00<?, ?it/s]

Training:   0%|          | 0/8300 [00:00<?, ?it/s]

09/18/2023, 14:36:18# labels of 5000: tensor([126,   8,  63,  92, 158,  76, 130,  85, 166,  99, 101,  45, 137,   8,
        151,  33], device='cuda:1') torch.Size([16])
09/18/2023, 14:36:18# predicted of 5000: tensor([ 39, 131,  63,  93,  21, 107,  56,  85, 166,  99, 161,  95,  20,  93,
        131, 131], device='cuda:1') torch.Size([16])
09/18/2023, 14:38:12# total batches: 8300
09/18/2023, 14:38:12# Epoch 0 | Train Loss: 3.7201 | Train Accuracy: 0.2360


Validation:   0%|          | 0/1038 [00:00<?, ?it/s]

09/18/2023, 14:38:12# labels of Validation: tensor([ 34, 124,  14,   9,  96,  64,  21,  38,  97,  80, 151,  67,  93,   7,
         71,  67], device='cuda:1') torch.Size([16])
09/18/2023, 14:38:12# predicted of Validation: tensor([ 59,  71,  41, 110,  25, 148,  66,  41, 135,  80, 121,  78, 131,   7,
         71,  78], device='cuda:1') torch.Size([16])
09/18/2023, 14:38:12# labels of 0: tensor([ 34, 124,  14,   9,  96,  64,  21,  38,  97,  80, 151,  67,  93,   7,
         71,  67], device='cuda:1') torch.Size([16])
09/18/2023, 14:38:12# predicted of 0: tensor([ 59,  71,  41, 110,  25, 148,  66,  41, 135,  80, 121,  78, 131,   7,
         71,  78], device='cuda:1') torch.Size([16])
09/18/2023, 14:38:41# labels of Validation: tensor([ 34,  61, 107,  39, 153, 107, 164, 143,  39,  39,  51,   6,  19, 126,
        102, 133], device='cuda:1') torch.Size([16])
09/18/2023, 14:38:41# predicted of Validation: tensor([121,  50, 107,  39, 153, 107, 121, 121,  39,  39,  51,   6,  19,  32,
         98,

Training:   0%|          | 0/8300 [00:00<?, ?it/s]

09/18/2023, 14:41:42# labels of 5000: tensor([ 26,  32,  49, 156, 145, 136,  38, 158,   1, 123, 111,  92, 115, 114,
         56,   2], device='cuda:1') torch.Size([16])
09/18/2023, 14:41:42# predicted of 5000: tensor([ 26,  32, 158, 156, 145, 136, 158, 125, 155, 123,  57, 158, 115, 131,
         56,  42], device='cuda:1') torch.Size([16])
09/18/2023, 14:43:38# total batches: 8300
09/18/2023, 14:43:38# Epoch 1 | Train Loss: 2.1285 | Train Accuracy: 0.5600


Validation:   0%|          | 0/1038 [00:00<?, ?it/s]

09/18/2023, 14:43:39# labels of Validation: tensor([ 74,   0, 120,  25, 150,  26,  82,  88, 143,   0, 109, 158,   4,  48,
         28,  66], device='cuda:1') torch.Size([16])
09/18/2023, 14:43:39# predicted of Validation: tensor([ 36,   0, 120,  25, 124,  26,  82,  91,   8,   0, 116,  36, 144,  36,
         28,  66], device='cuda:1') torch.Size([16])
09/18/2023, 14:43:39# labels of 0: tensor([ 74,   0, 120,  25, 150,  26,  82,  88, 143,   0, 109, 158,   4,  48,
         28,  66], device='cuda:1') torch.Size([16])
09/18/2023, 14:43:39# predicted of 0: tensor([ 36,   0, 120,  25, 124,  26,  82,  91,   8,   0, 116,  36, 144,  36,
         28,  66], device='cuda:1') torch.Size([16])
09/18/2023, 14:44:10# labels of Validation: tensor([110,  83, 145, 111,  27,  40, 105,  98, 111,  47, 124,  35,  45, 156,
        144,  76], device='cuda:1') torch.Size([16])
09/18/2023, 14:44:10# predicted of Validation: tensor([ 91,  44, 145,  44,  27,  40, 105,  98,  44,  44,  44,  35,  45, 156,
         37,

Training:   0%|          | 0/8300 [00:00<?, ?it/s]

09/18/2023, 14:47:10# labels of 5000: tensor([ 58,  16, 159, 117, 135, 117,  48, 153,  10,  40,  44,  87,  51,  59,
        152, 140], device='cuda:1') torch.Size([16])
09/18/2023, 14:47:10# predicted of 5000: tensor([ 58,  16, 159, 117, 135, 117,  11, 153,  10,  40,  37,  55,  51, 131,
         11, 140], device='cuda:1') torch.Size([16])
09/18/2023, 14:49:10# total batches: 8300
09/18/2023, 14:49:10# Epoch 2 | Train Loss: 1.7046 | Train Accuracy: 0.5981


Validation:   0%|          | 0/1038 [00:00<?, ?it/s]

09/18/2023, 14:49:11# labels of Validation: tensor([ 43,  78,  58,  53, 141,  35,  43,  87,  24, 138,  39, 165,  61,  15,
         66,  24], device='cuda:1') torch.Size([16])
09/18/2023, 14:49:11# predicted of Validation: tensor([ 43,  67,  58,   4, 141,  35,  43,   4, 116, 138,  39, 165,  61,  15,
         66,   4], device='cuda:1') torch.Size([16])
09/18/2023, 14:49:11# labels of 0: tensor([ 43,  78,  58,  53, 141,  35,  43,  87,  24, 138,  39, 165,  61,  15,
         66,  24], device='cuda:1') torch.Size([16])
09/18/2023, 14:49:11# predicted of 0: tensor([ 43,  67,  58,   4, 141,  35,  43,   4, 116, 138,  39, 165,  61,  15,
         66,   4], device='cuda:1') torch.Size([16])
09/18/2023, 14:49:41# labels of Validation: tensor([ 44, 164,  14,  35,  16, 153, 119,  49, 102,  68,  87, 141,   4, 145,
         58, 140], device='cuda:1') torch.Size([16])
09/18/2023, 14:49:41# predicted of Validation: tensor([  4,  31, 150,  35,  16, 153, 116,  42, 102, 108, 116, 141, 116, 145,
         58,

Training:   0%|          | 0/8300 [00:00<?, ?it/s]

09/18/2023, 14:52:42# labels of 5000: tensor([ 28,  19,  49,  68,  25,  50, 112,  92, 115,  73,  62,  46, 148, 122,
        130,  54], device='cuda:1') torch.Size([16])
09/18/2023, 14:52:42# predicted of 5000: tensor([ 28,  19,  76, 103,  25,  50, 152, 152, 115,  73,  62,  46, 148, 122,
        130, 152], device='cuda:1') torch.Size([16])
09/18/2023, 14:54:41# total batches: 8300
09/18/2023, 14:54:41# Epoch 3 | Train Loss: 1.6005 | Train Accuracy: 0.6041


Validation:   0%|          | 0/1038 [00:00<?, ?it/s]

09/18/2023, 14:54:41# labels of Validation: tensor([ 58,  37,  53, 125, 150,   4,  76, 137,   2,  47,  46,  63,  50,  39,
         47,  96], device='cuda:1') torch.Size([16])
09/18/2023, 14:54:41# predicted of Validation: tensor([ 58,  37, 114,   1, 151, 151,   1, 137, 151,   1,  46,  63,  50,  39,
        151,  96], device='cuda:1') torch.Size([16])
09/18/2023, 14:54:41# labels of 0: tensor([ 58,  37,  53, 125, 150,   4,  76, 137,   2,  47,  46,  63,  50,  39,
         47,  96], device='cuda:1') torch.Size([16])
09/18/2023, 14:54:41# predicted of 0: tensor([ 58,  37, 114,   1, 151, 151,   1, 137, 151,   1,  46,  63,  50,  39,
        151,  96], device='cuda:1') torch.Size([16])
09/18/2023, 14:55:12# labels of Validation: tensor([ 51,  34, 150,  91,  15,  20, 152, 164,  46, 119, 125,  29, 153, 134,
        148, 150], device='cuda:1') torch.Size([16])
09/18/2023, 14:55:12# predicted of Validation: tensor([ 51,   1, 151, 114,  15,  20,   1, 151,  46, 151, 151,  29, 153, 134,
        148,

Training:   0%|          | 0/8300 [00:00<?, ?it/s]

09/18/2023, 14:58:14# labels of 5000: tensor([162, 108,  27, 111, 155,  56, 128,  96,  34, 137, 151, 162, 143, 117,
        161,  81], device='cuda:1') torch.Size([16])
09/18/2023, 14:58:14# predicted of 5000: tensor([ 87, 108,  27,  31, 155,  56, 128,  96,   4, 137,  87,  31,  87, 117,
        161,  87], device='cuda:1') torch.Size([16])
09/18/2023, 15:00:14# total batches: 8300
09/18/2023, 15:00:14# Epoch 4 | Train Loss: 1.5566 | Train Accuracy: 0.6068


Validation:   0%|          | 0/1038 [00:00<?, ?it/s]

09/18/2023, 15:00:14# labels of Validation: tensor([118, 115,   8,  29, 158, 161,  98, 156,  30, 124,  23,  13,  74,  71,
        134,  83], device='cuda:1') torch.Size([16])
09/18/2023, 15:00:14# predicted of Validation: tensor([118, 115,  90,  29, 111, 161,  98, 156, 111,  30,  23,  13, 111,  71,
        134, 111], device='cuda:1') torch.Size([16])
09/18/2023, 15:00:14# labels of 0: tensor([118, 115,   8,  29, 158, 161,  98, 156,  30, 124,  23,  13,  74,  71,
        134,  83], device='cuda:1') torch.Size([16])
09/18/2023, 15:00:14# predicted of 0: tensor([118, 115,  90,  29, 111, 161,  98, 156, 111,  30,  23,  13, 111,  71,
        134, 111], device='cuda:1') torch.Size([16])
09/18/2023, 15:00:48# labels of Validation: tensor([ 65, 101,  26, 145,  16,  79,  92, 104, 115,  27, 122,  85,  77,  34,
         19, 131], device='cuda:1') torch.Size([16])
09/18/2023, 15:00:48# predicted of Validation: tensor([ 65, 101,  26, 145,  16,  79, 111, 111, 115,  27, 122,  85,  77, 111,
         19,

Training:   0%|          | 0/8300 [00:00<?, ?it/s]

09/18/2023, 15:03:49# labels of 5000: tensor([151, 147,  98, 141,   6, 104,  34,   6, 139,  73, 154, 155,  97,   0,
         27,  51], device='cuda:1') torch.Size([16])
09/18/2023, 15:03:49# predicted of 5000: tensor([ 60, 147,  98, 141,   6,  60,  55,   6, 139,  73, 154, 155,  38,   0,
         27,  51], device='cuda:1') torch.Size([16])
09/18/2023, 15:05:45# total batches: 8300
09/18/2023, 15:05:45# Epoch 5 | Train Loss: 1.5306 | Train Accuracy: 0.6092


Validation:   0%|          | 0/1038 [00:00<?, ?it/s]

09/18/2023, 15:05:45# labels of Validation: tensor([ 43, 155,  61,  48, 127,  97,  88,  52, 161, 148,  91,  51,  35,  95,
         64, 101], device='cuda:1') torch.Size([16])
09/18/2023, 15:05:45# predicted of Validation: tensor([ 43, 155,  61, 111, 127,  33,  59,  52, 161, 148,  59,  51,  35,  95,
         90, 101], device='cuda:1') torch.Size([16])
09/18/2023, 15:05:45# labels of 0: tensor([ 43, 155,  61,  48, 127,  97,  88,  52, 161, 148,  91,  51,  35,  95,
         64, 101], device='cuda:1') torch.Size([16])
09/18/2023, 15:05:45# predicted of 0: tensor([ 43, 155,  61, 111, 127,  33,  59,  52, 161, 148,  59,  51,  35,  95,
         90, 101], device='cuda:1') torch.Size([16])
09/18/2023, 15:06:16# labels of Validation: tensor([ 83, 120, 127,  71,  29,  39, 108, 129,  19, 100, 160,  68,  67,  10,
         60,  38], device='cuda:1') torch.Size([16])
09/18/2023, 15:06:16# predicted of Validation: tensor([111, 120, 127,  71,  29,  39, 108, 129,  19, 100, 160,  59,  78,  10,
        111,

Training:   0%|          | 0/8300 [00:00<?, ?it/s]

09/18/2023, 15:09:16# labels of 5000: tensor([139, 163, 126,  27, 159, 112,  41,  72,  82, 162,  23, 143,  25, 165,
         82, 166], device='cuda:1') torch.Size([16])
09/18/2023, 15:09:16# predicted of 5000: tensor([139,  30, 126,  27, 159,  30,  41,  72,  82, 104,  23, 151,  25, 165,
         82, 166], device='cuda:1') torch.Size([16])
09/18/2023, 15:11:08# total batches: 8300
09/18/2023, 15:11:08# Epoch 6 | Train Loss: 1.5134 | Train Accuracy: 0.6109


Validation:   0%|          | 0/1038 [00:00<?, ?it/s]

09/18/2023, 15:11:08# labels of Validation: tensor([102, 130,  62,  19, 114,  95,  43,  86, 120,  49, 135, 152, 133,  81,
         52,  84], device='cuda:1') torch.Size([16])
09/18/2023, 15:11:08# predicted of Validation: tensor([102, 130,  62,  19, 113,  95,  43,  86, 120, 152, 135, 150, 133, 125,
         52,  84], device='cuda:1') torch.Size([16])
09/18/2023, 15:11:08# labels of 0: tensor([102, 130,  62,  19, 114,  95,  43,  86, 120,  49, 135, 152, 133,  81,
         52,  84], device='cuda:1') torch.Size([16])
09/18/2023, 15:11:08# predicted of 0: tensor([102, 130,  62,  19, 113,  95,  43,  86, 120, 152, 135, 150, 133, 125,
         52,  84], device='cuda:1') torch.Size([16])
09/18/2023, 15:11:36# labels of Validation: tensor([112, 133, 160,  15, 126,   5, 110, 110,  88,  76, 141,  83,  69,  71,
         50,  16], device='cuda:1') torch.Size([16])
09/18/2023, 15:11:36# predicted of Validation: tensor([125, 133, 160,  15, 126,   5, 113, 113, 113, 150, 141, 150,  69,  71,
         50,

Training:   0%|          | 0/8300 [00:00<?, ?it/s]

09/18/2023, 15:14:29# labels of 5000: tensor([ 29,  67, 134, 119,  75, 133, 121,  27, 148, 139,  30,  97,  33, 123,
         71, 144], device='cuda:1') torch.Size([16])
09/18/2023, 15:14:29# predicted of 5000: tensor([ 29,  67, 134, 164, 143, 133, 164,  27, 148, 139, 143, 143, 143, 123,
         71, 143], device='cuda:1') torch.Size([16])
09/18/2023, 15:16:25# total batches: 8300
09/18/2023, 15:16:25# Epoch 7 | Train Loss: 1.5010 | Train Accuracy: 0.6113


Validation:   0%|          | 0/1038 [00:00<?, ?it/s]

09/18/2023, 15:16:25# labels of Validation: tensor([149, 150,  86, 119,  29, 140,  19,   5,  18,  11, 123, 158,  16,  28,
         24, 126], device='cuda:1') torch.Size([16])
09/18/2023, 15:16:25# predicted of Validation: tensor([149, 162,  86, 162,  29, 140,  19,   5, 162, 162, 123, 162,  16,  28,
        162, 126], device='cuda:1') torch.Size([16])
09/18/2023, 15:16:25# labels of 0: tensor([149, 150,  86, 119,  29, 140,  19,   5,  18,  11, 123, 158,  16,  28,
         24, 126], device='cuda:1') torch.Size([16])
09/18/2023, 15:16:25# predicted of 0: tensor([149, 162,  86, 162,  29, 140,  19,   5, 162, 162, 123, 162,  16,  28,
        162, 126], device='cuda:1') torch.Size([16])
09/18/2023, 15:16:53# labels of Validation: tensor([143,  11, 110,  29,  82,  10,  61, 158,  10,  87,  90,  94,  69,  66,
         32, 120], device='cuda:1') torch.Size([16])
09/18/2023, 15:16:53# predicted of Validation: tensor([162, 162, 114,  29,  82,  10,  61, 162,  10, 162, 114,  94,  69,  66,
         32,

Training:   0%|          | 0/8300 [00:00<?, ?it/s]

09/18/2023, 15:19:46# labels of 5000: tensor([131,  65,  35,   5, 145, 159,  29, 165, 156,  95,  46,  90,  93, 150,
        100,  45], device='cuda:1') torch.Size([16])
09/18/2023, 15:19:46# predicted of 5000: tensor([131,  65,  35,   5, 145, 159,  29, 165, 156,  95,  46, 114, 114, 150,
        100,  45], device='cuda:1') torch.Size([16])
09/18/2023, 15:21:37# total batches: 8300
09/18/2023, 15:21:37# Epoch 8 | Train Loss: 1.4926 | Train Accuracy: 0.6127


Validation:   0%|          | 0/1038 [00:00<?, ?it/s]

09/18/2023, 15:21:37# labels of Validation: tensor([107,   2, 112,  62,  85,  63, 113, 153, 115,  48,  23,  36,  58, 131,
         55,   5], device='cuda:1') torch.Size([16])
09/18/2023, 15:21:37# predicted of Validation: tensor([107, 116, 116,  62,  85,  63, 114, 153, 115, 150,  23, 116,  58, 131,
        116,   5], device='cuda:1') torch.Size([16])
09/18/2023, 15:21:37# labels of 0: tensor([107,   2, 112,  62,  85,  63, 113, 153, 115,  48,  23,  36,  58, 131,
         55,   5], device='cuda:1') torch.Size([16])
09/18/2023, 15:21:37# predicted of 0: tensor([107, 116, 116,  62,  85,  63, 114, 153, 115, 150,  23, 116,  58, 131,
        116,   5], device='cuda:1') torch.Size([16])
09/18/2023, 15:22:04# labels of Validation: tensor([166, 148,   3,   2, 152,  25,  48, 158, 140, 122,  13,   0,  59,   8,
         62, 136], device='cuda:1') torch.Size([16])
09/18/2023, 15:22:04# predicted of Validation: tensor([166, 148,   3, 116, 116,  25, 116, 116, 140, 122,  13,   0,  59,  90,
         62,

Training:   0%|          | 0/8300 [00:00<?, ?it/s]

09/18/2023, 15:25:00# labels of 5000: tensor([139, 166, 133,  11, 132,   0,  48,  86,  87, 155,   4, 135,  16, 166,
         74,  29], device='cuda:1') torch.Size([16])
09/18/2023, 15:25:00# predicted of 5000: tensor([139, 166, 133,   2, 132,   0,   2,  86,   2, 155,   2, 135,  16, 166,
          2,  29], device='cuda:1') torch.Size([16])
09/18/2023, 15:26:58# total batches: 8300
09/18/2023, 15:26:58# Epoch 9 | Train Loss: 1.4854 | Train Accuracy: 0.6140


Validation:   0%|          | 0/1038 [00:00<?, ?it/s]

09/18/2023, 15:26:58# labels of Validation: tensor([142,  10, 126,  97, 129,  69, 156,  38,  59, 104, 130,  97,  50,  30,
          1,  61], device='cuda:1') torch.Size([16])
09/18/2023, 15:26:58# predicted of Validation: tensor([ 38,  10, 126, 104, 129,  69, 156,  55,  91,  54, 130,  30,  50,  38,
        104,  61], device='cuda:1') torch.Size([16])
09/18/2023, 15:26:58# labels of 0: tensor([142,  10, 126,  97, 129,  69, 156,  38,  59, 104, 130,  97,  50,  30,
          1,  61], device='cuda:1') torch.Size([16])
09/18/2023, 15:26:58# predicted of 0: tensor([ 38,  10, 126, 104, 129,  69, 156,  55,  91,  54, 130,  30,  50,  38,
        104,  61], device='cuda:1') torch.Size([16])
09/18/2023, 15:27:29# labels of Validation: tensor([154,  40, 103,  62,  89, 143,  23, 115,  54, 114,  26,  20,  56,  17,
          5, 111], device='cuda:1') torch.Size([16])
09/18/2023, 15:27:29# predicted of Validation: tensor([154,  40,  68,  62,  89,  54,  23, 115,  54,  68,  26,  20,  56,  17,
          5,

Training:   0%|          | 0/8300 [00:00<?, ?it/s]

09/18/2023, 15:30:22# labels of 5000: tensor([ 31,  97, 111, 164,  77,  26,  93, 108,  70,  94, 101,  65,  33, 129,
        126,  40], device='cuda:1') torch.Size([16])
09/18/2023, 15:30:22# predicted of 5000: tensor([ 83,  83, 163, 121,  77,  26,  93, 131,  70,  94, 101,  65, 125, 129,
        126,  40], device='cuda:1') torch.Size([16])
09/18/2023, 15:32:15# total batches: 8300
09/18/2023, 15:32:15# Epoch 10 | Train Loss: 1.4798 | Train Accuracy: 0.6144


Validation:   0%|          | 0/1038 [00:00<?, ?it/s]

09/18/2023, 15:32:15# labels of Validation: tensor([158, 140,  45,  82,  35,  17,  19,  95,  30,  32, 157,  12, 157, 157,
         72,  10], device='cuda:1') torch.Size([16])
09/18/2023, 15:32:15# predicted of Validation: tensor([121, 140,  45,  82,  35,  17,  19,  95,  49,  32,  49,  49,  49, 109,
         72,  10], device='cuda:1') torch.Size([16])
09/18/2023, 15:32:15# labels of 0: tensor([158, 140,  45,  82,  35,  17,  19,  95,  30,  32, 157,  12, 157, 157,
         72,  10], device='cuda:1') torch.Size([16])
09/18/2023, 15:32:15# predicted of 0: tensor([121, 140,  45,  82,  35,  17,  19,  95,  49,  32,  49,  49,  49, 109,
         72,  10], device='cuda:1') torch.Size([16])
09/18/2023, 15:32:43# labels of Validation: tensor([ 32,  71, 104, 131,  55, 141, 151, 110,  50,  25,  61,  44,  10, 160,
        132, 156], device='cuda:1') torch.Size([16])
09/18/2023, 15:32:43# predicted of Validation: tensor([ 32,  71, 121, 131,  49, 141,  49, 113,  50,  25,  61,  49,  10, 160,
        132,

Training:   0%|          | 0/8300 [00:00<?, ?it/s]

09/18/2023, 15:35:37# labels of 5000: tensor([ 73, 143,  65,  99, 119, 118, 165, 128,  86,  57,  53,   3,  56, 123,
        102,  11], device='cuda:1') torch.Size([16])
09/18/2023, 15:35:37# predicted of 5000: tensor([ 73,  76,  65,  99, 152, 118, 165, 128,  86,  76,  76,   3,  56, 123,
        102,  76], device='cuda:1') torch.Size([16])
09/18/2023, 15:37:34# total batches: 8300
09/18/2023, 15:37:34# Epoch 11 | Train Loss: 1.4752 | Train Accuracy: 0.6158


Validation:   0%|          | 0/1038 [00:00<?, ?it/s]

09/18/2023, 15:37:34# labels of Validation: tensor([ 28,  93,  63,  38,  69, 121, 103, 157,  93, 119, 140, 163,  29,  28,
         62, 160], device='cuda:1') torch.Size([16])
09/18/2023, 15:37:34# predicted of Validation: tensor([ 28,  93,  63, 124,  69, 116,  93, 124,  93, 124, 140, 124,  29,  28,
         62, 160], device='cuda:1') torch.Size([16])
09/18/2023, 15:37:34# labels of 0: tensor([ 28,  93,  63,  38,  69, 121, 103, 157,  93, 119, 140, 163,  29,  28,
         62, 160], device='cuda:1') torch.Size([16])
09/18/2023, 15:37:34# predicted of 0: tensor([ 28,  93,  63, 124,  69, 116,  93, 124,  93, 124, 140, 124,  29,  28,
         62, 160], device='cuda:1') torch.Size([16])
09/18/2023, 15:38:03# labels of Validation: tensor([ 60, 157,  54,   0,   8,  59,  31,  46,  21,  40,  65,   8,  15,  86,
        118,  19], device='cuda:1') torch.Size([16])
09/18/2023, 15:38:03# predicted of Validation: tensor([124, 124, 124,   0,  93, 108, 151,  46,  21,  40,  65,  93,  15,  86,
        118,

Training:   0%|          | 0/8300 [00:00<?, ?it/s]

09/18/2023, 15:41:00# labels of 5000: tensor([ 60, 118, 158,  31,   2, 117,  51,  98, 156, 101, 163,  95, 140,  54,
         45, 143], device='cuda:1') torch.Size([16])
09/18/2023, 15:41:00# predicted of 5000: tensor([ 57, 118,  34, 121,  75, 117,  51,  98, 156, 101,  34,  95, 140, 121,
         45,  75], device='cuda:1') torch.Size([16])
09/18/2023, 15:42:56# total batches: 8300
09/18/2023, 15:42:56# Epoch 12 | Train Loss: 1.4722 | Train Accuracy: 0.6155


Validation:   0%|          | 0/1038 [00:00<?, ?it/s]

09/18/2023, 15:42:56# labels of Validation: tensor([112,  47,  30, 155, 145,  72,   5, 127,  17, 138, 132,  94, 131, 124,
        157,  54], device='cuda:1') torch.Size([16])
09/18/2023, 15:42:56# predicted of Validation: tensor([162, 162, 152, 155, 145,  72,   5, 127,  17, 138, 132,  94, 131,  54,
        162, 162], device='cuda:1') torch.Size([16])
09/18/2023, 15:42:56# labels of 0: tensor([112,  47,  30, 155, 145,  72,   5, 127,  17, 138, 132,  94, 131, 124,
        157,  54], device='cuda:1') torch.Size([16])
09/18/2023, 15:42:56# predicted of 0: tensor([162, 162, 152, 155, 145,  72,   5, 127,  17, 138, 132,  94, 131,  54,
        162, 162], device='cuda:1') torch.Size([16])
09/18/2023, 15:43:25# labels of Validation: tensor([ 90,  38,  54, 156, 120, 132,  83, 123,  30,  94,  67,  62,  49,  20,
         29,   1], device='cuda:1') torch.Size([16])
09/18/2023, 15:43:25# predicted of Validation: tensor([113, 152,  76, 156, 120, 132,  76, 123, 152,  94,  67,  62, 152,  20,
         29,

Training:   0%|          | 0/8300 [00:00<?, ?it/s]

09/18/2023, 15:46:20# labels of 5000: tensor([  3,   0,  40, 148, 130,  47,  37,  98,   1, 109, 127, 105, 145, 120,
         28,  55], device='cuda:1') torch.Size([16])
09/18/2023, 15:46:20# predicted of 5000: tensor([  3,   0,  40, 148, 130,  92,  37,  98,  83,  83, 127, 105, 145, 120,
         28,  83], device='cuda:1') torch.Size([16])
09/18/2023, 15:48:16# total batches: 8300
09/18/2023, 15:48:16# Epoch 13 | Train Loss: 1.4689 | Train Accuracy: 0.6171


Validation:   0%|          | 0/1038 [00:00<?, ?it/s]

09/18/2023, 15:48:16# labels of Validation: tensor([  9,  87, 112, 115,  71, 115,   0, 106, 162,  85,  40,  43,  17, 143,
         32, 160], device='cuda:1') torch.Size([16])
09/18/2023, 15:48:16# predicted of Validation: tensor([ 36,  36,  36, 115,  71, 115,   0, 106,  36,  85,  40,  43,  17,  36,
         32, 160], device='cuda:1') torch.Size([16])
09/18/2023, 15:48:16# labels of 0: tensor([  9,  87, 112, 115,  71, 115,   0, 106, 162,  85,  40,  43,  17, 143,
         32, 160], device='cuda:1') torch.Size([16])
09/18/2023, 15:48:16# predicted of 0: tensor([ 36,  36,  36, 115,  71, 115,   0, 106,  36,  85,  40,  43,  17,  36,
         32, 160], device='cuda:1') torch.Size([16])
09/18/2023, 15:48:44# labels of Validation: tensor([123, 138,  53, 124,  48,  65,  32,  31,  89,  37, 110,  75,  40, 101,
         21,  29], device='cuda:1') torch.Size([16])
09/18/2023, 15:48:44# predicted of Validation: tensor([123, 138,  24,  36,  36,  65,  32,  36,  89,  37,  64,  36,  40, 101,
         21,

Training:   0%|          | 0/8300 [00:00<?, ?it/s]

09/18/2023, 15:51:39# labels of 5000: tensor([102,  99,  12,  83,  84, 149,  15, 141, 156,  53,  41, 163,  87,  56,
         99,  63], device='cuda:1') torch.Size([16])
09/18/2023, 15:51:39# predicted of 5000: tensor([102,  99, 151, 151,  84, 149,  15, 141, 156, 151,  41, 162, 162,  56,
         99,  63], device='cuda:1') torch.Size([16])
09/18/2023, 15:53:31# total batches: 8300
09/18/2023, 15:53:31# Epoch 14 | Train Loss: 1.4668 | Train Accuracy: 0.6171


Validation:   0%|          | 0/1038 [00:00<?, ?it/s]

09/18/2023, 15:53:31# labels of Validation: tensor([165, 120, 141, 139, 132,  51,  52,  18, 146, 161, 154, 126, 155, 121,
         70,  35], device='cuda:1') torch.Size([16])
09/18/2023, 15:53:31# predicted of Validation: tensor([165, 120, 141, 139, 132,  51,  52,  97, 146, 161, 154, 126, 155,  97,
         70,  35], device='cuda:1') torch.Size([16])
09/18/2023, 15:53:31# labels of 0: tensor([165, 120, 141, 139, 132,  51,  52,  18, 146, 161, 154, 126, 155, 121,
         70,  35], device='cuda:1') torch.Size([16])
09/18/2023, 15:53:31# predicted of 0: tensor([165, 120, 141, 139, 132,  51,  52,  97, 146, 161, 154, 126, 155,  97,
         70,  35], device='cuda:1') torch.Size([16])
09/18/2023, 15:53:59# labels of Validation: tensor([ 81, 150,  11,  48, 149,   8, 146, 152,  30, 108, 159,   7,  66,  10,
        144,  98], device='cuda:1') torch.Size([16])
09/18/2023, 15:53:59# predicted of Validation: tensor([ 42,  97,  76,  97, 149,  68, 146,  42,  97, 131, 159,   7,  66,  10,
         97,

Training:   0%|          | 0/8300 [00:00<?, ?it/s]

09/18/2023, 15:56:46# labels of 5000: tensor([106,  44, 127,   4, 120, 127,   4, 160,  56,  72,   6,  17,  10,  31,
        106,  91], device='cuda:1') torch.Size([16])
09/18/2023, 15:56:46# predicted of 5000: tensor([106,  97, 127,  83, 120, 127,  83, 160,  56,  72,   6,  17,  10,  83,
        106,  59], device='cuda:1') torch.Size([16])
09/18/2023, 15:58:01# total batches: 8300
09/18/2023, 15:58:01# Epoch 15 | Train Loss: 1.4646 | Train Accuracy: 0.6177


Validation:   0%|          | 0/1038 [00:00<?, ?it/s]

09/18/2023, 15:58:02# labels of Validation: tensor([120, 125, 152,  90,  55,  56,  33, 109,  96, 124,  21, 117,   1, 124,
         35,  76], device='cuda:1') torch.Size([16])
09/18/2023, 15:58:02# predicted of Validation: tensor([120,  54, 158, 110,  54,  56,  54,  54,  96, 162,  21, 117,  54,  54,
         35,  54], device='cuda:1') torch.Size([16])
09/18/2023, 15:58:02# labels of 0: tensor([120, 125, 152,  90,  55,  56,  33, 109,  96, 124,  21, 117,   1, 124,
         35,  76], device='cuda:1') torch.Size([16])
09/18/2023, 15:58:02# predicted of 0: tensor([120,  54, 158, 110,  54,  56,  54,  54,  96, 162,  21, 117,  54,  54,
         35,  54], device='cuda:1') torch.Size([16])
09/18/2023, 15:58:19# labels of Validation: tensor([121,  73,   8,  60, 144,   4, 105, 155,  34, 123,  58, 143,  97, 117,
        109,   2], device='cuda:1') torch.Size([16])
09/18/2023, 15:58:19# predicted of Validation: tensor([ 54,  73, 110,  14,  54,  54, 105, 155, 111, 123,  58,  54, 158, 117,
         54,

Training:   0%|          | 0/8300 [00:00<?, ?it/s]

09/18/2023, 16:00:09# labels of 5000: tensor([112, 131,  41, 118, 100,  71, 134, 116,  62,  18, 138, 136,  13,  77,
         98,  19], device='cuda:1') torch.Size([16])
09/18/2023, 16:00:09# predicted of 5000: tensor([150, 131,  41, 118, 100,  71, 134, 150,  62,  49, 138, 136,  13,  77,
         98,  19], device='cuda:1') torch.Size([16])
09/18/2023, 16:01:06# total batches: 8300
09/18/2023, 16:01:06# Epoch 16 | Train Loss: 1.4627 | Train Accuracy: 0.6183


Validation:   0%|          | 0/1038 [00:00<?, ?it/s]

09/18/2023, 16:01:06# labels of Validation: tensor([ 99, 148, 121,  84,  96, 105,  99, 163,  92, 133, 163, 116, 162,  51,
         64,  16], device='cuda:1') torch.Size([16])
09/18/2023, 16:01:06# predicted of Validation: tensor([ 99, 148, 143,  84,  96, 105,  99, 150, 150, 133, 124, 150, 150,  51,
         88,  16], device='cuda:1') torch.Size([16])
09/18/2023, 16:01:06# labels of 0: tensor([ 99, 148, 121,  84,  96, 105,  99, 163,  92, 133, 163, 116, 162,  51,
         64,  16], device='cuda:1') torch.Size([16])
09/18/2023, 16:01:06# predicted of 0: tensor([ 99, 148, 143,  84,  96, 105,  99, 150, 150, 133, 124, 150, 150,  51,
         88,  16], device='cuda:1') torch.Size([16])
09/18/2023, 16:01:20# labels of Validation: tensor([120,  70, 105,  71, 146,  78,  71,  39, 119,  87,  60,   8,  40, 104,
         86, 116], device='cuda:1') torch.Size([16])
09/18/2023, 16:01:20# predicted of Validation: tensor([120,  70, 105,  71, 146,  78,  71,  39, 143, 150, 143,  88,  40, 150,
         86,

Training:   0%|          | 0/8300 [00:00<?, ?it/s]

09/18/2023, 16:02:48# labels of 5000: tensor([147, 114,  53,   3, 162, 143, 113, 117, 130, 120, 148,   1, 145,  63,
        143,  62], device='cuda:1') torch.Size([16])
09/18/2023, 16:02:48# predicted of 5000: tensor([147, 114, 163,   3, 152, 163, 113, 117, 130, 120, 148, 163, 145,  63,
        163,  62], device='cuda:1') torch.Size([16])
09/18/2023, 16:03:45# total batches: 8300
09/18/2023, 16:03:45# Epoch 17 | Train Loss: 1.4615 | Train Accuracy: 0.6181


Validation:   0%|          | 0/1038 [00:00<?, ?it/s]

09/18/2023, 16:03:45# labels of Validation: tensor([125,  14, 128, 106,  62,  80, 108,  41, 159, 163,  80, 104, 135,  91,
         83, 107], device='cuda:1') torch.Size([16])
09/18/2023, 16:03:45# predicted of Validation: tensor([111, 111, 128, 106,  62,  80,  91,  41, 159, 111,  80, 111, 135,  59,
        111, 107], device='cuda:1') torch.Size([16])
09/18/2023, 16:03:45# labels of 0: tensor([125,  14, 128, 106,  62,  80, 108,  41, 159, 163,  80, 104, 135,  91,
         83, 107], device='cuda:1') torch.Size([16])
09/18/2023, 16:03:45# predicted of 0: tensor([111, 111, 128, 106,  62,  80,  91,  41, 159, 111,  80, 111, 135,  59,
        111, 107], device='cuda:1') torch.Size([16])
09/18/2023, 16:03:58# labels of Validation: tensor([155,  14,  18, 114,  72,   4,  39,  44,  75,  41,  41, 121,  18,  18,
        113, 164], device='cuda:1') torch.Size([16])
09/18/2023, 16:03:58# predicted of Validation: tensor([155, 111, 111,  64,  72, 111,  39, 111, 111,  41,  41, 111, 111, 111,
          8,

Training:   0%|          | 0/8300 [00:00<?, ?it/s]

09/18/2023, 16:05:26# labels of 5000: tensor([114,  42,  89, 152,  97, 153, 132,  98,  15,  56, 112, 114, 103,  34,
         33,   5], device='cuda:1') torch.Size([16])
09/18/2023, 16:05:27# predicted of 5000: tensor([ 64,  75,  89,  75,  75, 153, 132,  98,  15,  56,  75,  64,  64,  75,
         75,   5], device='cuda:1') torch.Size([16])
09/18/2023, 16:06:24# total batches: 8300
09/18/2023, 16:06:24# Epoch 18 | Train Loss: 1.4603 | Train Accuracy: 0.6172


Validation:   0%|          | 0/1038 [00:00<?, ?it/s]

09/18/2023, 16:06:24# labels of Validation: tensor([119,  36,   5,  79, 130,  90,  85,  39, 100,  16,  87, 150, 133, 119,
        154, 104], device='cuda:1') torch.Size([16])
09/18/2023, 16:06:24# predicted of Validation: tensor([  2,   2,   5,  79, 130, 114,  85,  39, 100,  16,   2,   2, 133,   2,
        154,   2], device='cuda:1') torch.Size([16])
09/18/2023, 16:06:24# labels of 0: tensor([119,  36,   5,  79, 130,  90,  85,  39, 100,  16,  87, 150, 133, 119,
        154, 104], device='cuda:1') torch.Size([16])
09/18/2023, 16:06:24# predicted of 0: tensor([  2,   2,   5,  79, 130, 114,  85,  39, 100,  16,   2,   2, 133,   2,
        154,   2], device='cuda:1') torch.Size([16])
09/18/2023, 16:06:40# labels of Validation: tensor([146,  97,  62,  28, 113,  74, 134,   9,   8, 128,  58, 152,  94, 153,
        152, 135], device='cuda:1') torch.Size([16])
09/18/2023, 16:06:40# predicted of Validation: tensor([146,   2,  62,  28, 114,   2, 134,  12, 114, 128,  58,   2,  94, 153,
          2,

Training:   0%|          | 0/8300 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [11]:
# load the pretrained model
pretrained_model_path = '../checkpoint_graphSAGE/best_model_GraphSAGE_transH_50.pt'
model.load_state_dict(torch.load(pretrained_model_path))

model.to(device)
model.eval()

total = 0
correct = 0
count = 0

true_labels = []
predicted_labels = []

with torch.no_grad():
    for data in tqdm(dataloaders['test'], desc="Testing", position=0, leave=True):
#         print(f"data:{data[1]}")
        loss, accuracy, predicted = model_fn(data, model, criterion, device, count, which_type='test')
        labels = data[1].to(device)
        
        true_labels.extend(labels.cpu().numpy())
        predicted_labels.extend(predicted.cpu().numpy())
        
        if count % 5000 == 0:
            add_log_msg(f"labels: {labels} {labels.shape}")
            add_log_msg(f"predicted: {predicted} {predicted.shape}")
            
        count += 1
        
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

add_log_msg(f'Test Accuracy: {100 * correct / total} %\n\n\n')


# ======================================== handlig the output excel files ========================================
mapping_file = './new_mapping.txt'
label_mapping = {}
with open(mapping_file, 'r') as f:
    for line in f:
        parts = line.strip().split(': ')
        label_mapping[int(parts[1])] = parts[0]
        
# 将映射后的标签应用到true和predicted标签列表
mapped_true_labels = [label_mapping[label] for label in true_labels]
mapped_predicted_labels = [label_mapping[label] for label in predicted_labels]

# 生成Scikit-learn报告信息的DataFrame
report_data = classification_report(mapped_true_labels, mapped_predicted_labels, output_dict=True)
report_df = pd.DataFrame(report_data).transpose()

# mapped_true_labels_np = np.array(mapped_true_labels)
# mapped_predicted_labels_np = np.array(mapped_predicted_labels)

# print("mapped_true_labels 的形状:", mapped_true_labels_np.shape)
# print("mapped_predicted_labels 的形状:", mapped_predicted_labels_np.shape)

report_folder = 'classification_report'
os.makedirs(report_folder, exist_ok=True)

count = 0
while True:
    report_filename = f'classification_report-transH_50-graphSAGE-{count}.xlsx'
    labels_filename = f'mapped_true_predicted_labels-transH_50-graphSAGE-{count}.xlsx'
    
    report_path = os.path.join(report_folder, report_filename)
    labels_path = os.path.join(report_folder, labels_filename)
    
    if not os.path.exists(report_path) and not os.path.exists(labels_path):
        break
    count += 1

    
report_df.to_excel(report_path, index_label='Label')

mapped_labels_df = pd.DataFrame({'true_label': mapped_true_labels, 'predicted_label': mapped_predicted_labels})
mapped_labels_df.to_excel(labels_path, index=False)

add_log_msg(f"report path: {report_path}")
add_log_msg(f"label path: {labels_path}")

mapped_report = classification_report(mapped_true_labels, mapped_predicted_labels)
add_log_msg(f"mapped_report:\n{mapped_report}")

Testing:   0%|          | 0/1038 [00:00<?, ?it/s]

09/18/2023, 16:07:29# labels of Test: tensor([65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65],
       device='cuda:1') torch.Size([16])
09/18/2023, 16:07:29# predicted of Test: tensor([65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65],
       device='cuda:1') torch.Size([16])
09/18/2023, 16:07:29# labels of 0: tensor([65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65],
       device='cuda:1') torch.Size([16])
09/18/2023, 16:07:29# predicted of 0: tensor([65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65],
       device='cuda:1') torch.Size([16])
09/18/2023, 16:07:29# labels: tensor([65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65],
       device='cuda:1') torch.Size([16])
09/18/2023, 16:07:29# predicted: tensor([65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65],
       device='cuda:1') torch.Size([16])
09/18/2023, 16:07:43# labels of Test: tensor([162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


09/18/2023, 16:07:45# report path: classification_report/classification_report-transH_50-graphSAGE-0.xlsx
09/18/2023, 16:07:45# label path: classification_report/mapped_true_predicted_labels-transH_50-graphSAGE-0.xlsx


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


09/18/2023, 16:07:46# mapped_report:
                                                precision    recall  f1-score   support

T1003.001_0ef4cc7b-611c-4237-b20b-db36b6906554       1.00      1.00      1.00       100
    T1003.001_35d92515122effdd73801c6ac3021da7       1.00      1.00      1.00       100
    T1003.002_5a484b65c247675e3b7ada4ba648d376       1.00      1.00      1.00       100
    T1003.002_7fa4ea18694f2552547b65e23952cabb       1.00      1.00      1.00       100
    T1003.003_9f73269695e54311dd61dc68940fb3e1       0.00      0.00      0.00       100
    T1003.003_f049b89533298c2d6cd37a940248b219       0.00      0.00      0.00       100
        T1003_18f31c311ac208802e88ab8d5af8603e       1.00      1.00      1.00       100
        T1007_9d03c91bdae5a80f17f89c987942b5a8       1.00      1.00      1.00       100
    T1007_c6607391-d02c-44b5-9b13-d3492ca58599       0.00      0.00      0.00       100
        T1007_d6bb2a19da7246731ed9c44831b135f8       0.49      0.65      0.56     

  _warn_prf(average, modifier, msg_start, len(result))


### Training

- Fix the seed and save the model.state_dict that contains the initial weight

In [None]:
seed = 8787
same_seeds(seed)

model = GraphSAGE(in_dim=50, hidden_dim=16, out_dim=167)
torch.save(model.state_dict(), 'model3_initial(graphsage)/initial_weight.pth')

In [None]:
model.layer1.fc_self.weight

- Check if model really load the model_dict

In [None]:
model = GraphSAGE(in_dim=50, hidden_dim=16, out_dim=167)
model.load_state_dict(torch.load('model3_initial(graphsage)/initial_weight.pth'))
model.layer1.fc_self.weight

### test of valid and test part is ``graph``

- 60 APs in training x 10000times
- 5 APs in validation x 4 times
- 3 APs in test x 4 times
- Batch size = 4

In [None]:
seed = 8787
same_seeds(seed)

model = GraphSAGE(in_dim=50, hidden_dim=16, out_dim=167)
# in_dim means the dimension of the node_feat(50 dim, since the 50-dim embedding)
# out_dim means the # of the categories -> 168 for out tasks
model.load_state_dict(torch.load('model3_initial(graphsage)/initial_weight.pth'))

model = model.to(device)

optimizer = torch.optim.AdamW(model.parameters(), lr=1e-5)
# scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=100, num_training_steps=total_steps)

criterion = nn.CrossEntropyLoss()
total_steps = 180

# save the best model
best_val_loss = float('inf')
patience = 10  # Number of epochs with no improvement after which training will be stopped.
waiting = 0  # The number of epochs with no improvement so far.


# Training Part
for epoch in tqdm(range(total_steps)):
    # Train
    model.train()
    total_loss = 0.0
    total_accuracy = 0.0
    num_batches = 0
    
    count = 0 
    
    for data in tqdm(dataloaders['train'], desc="Training", position=0, leave=True):
        
        count += 1
        loss, accuracy, _ = model_fn(data, model, criterion, device, count, type='train')
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        total_accuracy += accuracy.item()
        num_batches += 1
        
#     scheduler.step()
    add_log_msg(f"total count: {count}")

    avg_loss = total_loss / num_batches
    avg_accuracy = total_accuracy / num_batches

    add_log_msg(f'Epoch {epoch} | Train Loss: {avg_loss:.4f} | Train Accuracy: {avg_accuracy:.4f}')

    
    # Validation Part
    model.eval()
    total_accuracy = 0.0
    total_loss = 0.0
    num_batches = 0

    with torch.no_grad():
        for batched_g in dataloaders['valid']:
            loss, accuracy, _ = model_fn(batched_g, model, criterion, device, type=='validation')
            total_accuracy += accuracy.item()
            total_loss += loss.item()
            num_batches += 1

    avg_accuracy = total_accuracy / num_batches
    current_loss = total_loss / num_batches
    
    add_log_msg(f'Validation Loss: {current_loss:.4f} | Validation Accuracy: {avg_accuracy:.4f}\n')
    
    
    if current_loss < best_val_loss:
        best_val_loss = current_loss
        waiting = 0
        
#         torch.save(model.state_dict(), 'best_model.pth')
        torch.save({
                'epoch': epoch,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'loss': loss,
                }, f"../checkpoint_GAT/best_model_{epoch}.pt")
    
    else:
        waiting += 1
        if waiting >= patience:
            add_log_msg("Early stopping")
            break

            
# Testing Part
model.eval()
total = 0
correct = 0

with torch.no_grad():
    for data in dataloaders['test']:
        loss, accuracy, predicted = model_fn(data, model, criterion, device, type=='test')
        labels = data[1].to(device)  # Assuming labels are the second element in the tuple
        
        add_log_msg(f"labels: {labels} {labels.shape}")
        add_log_msg(f"predicted: {predicted} {predicted.shape}")
        
        total += labels.size(0) # label.size(0) is the batch size
        correct += (predicted == labels).sum().item() 
        # (predicted == labels).sum() would return how many of them are equal; 
        # .item() would make the tensor to the regular value
        
#     print('Test Accuracy: %d %%' % (100 * correct / total))
add_log_msg(f'Test Accuracy: {100 * correct / total} %%')