# Test of GraphSAGE
- the version of larger hidden dimension
- use DGL
- predict `graphs`
- valid, test data are in the training dataset

In [1]:
import os
import dgl
import json
import torch
import torch as th
import dgl.nn as dglnn
# from tqdm import tqdm
from tqdm.notebook import tqdm  # 使用 notebook 版本的 tqdm
import torch.nn as nn
from dgl.nn import GraphConv, GATConv, SAGEConv
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from transformers import get_linear_schedule_with_warmup

- check the GPU and assign the GPU by the best memory usage

In [2]:
import subprocess
import torch

def get_free_gpu():
    try:
        # Run nvidia-smi command to get GPU details
        _output_to_list = lambda x: x.decode('ascii').split('\n')[:-1]
        command = "nvidia-smi --query-gpu=memory.free --format=csv,nounits,noheader"
        memory_free_info = _output_to_list(subprocess.check_output(command.split())) 
        memory_free_values = [int(x) for i, x in enumerate(memory_free_info)]
        
        # Get the GPU with the maximum free memory
        best_gpu_id = memory_free_values.index(max(memory_free_values))
        return best_gpu_id
    except:
        # If any exception occurs, default to GPU 0 (this handles cases where nvidia-smi isn't installed)
        return 0

if torch.cuda.is_available():
    # Get the best GPU ID based on free memory and set it
    best_gpu_id = get_free_gpu()
    device = torch.device(f"cuda:{best_gpu_id}")
else:
    device = torch.device("cpu")
    print("there's no available GPU")

# device = torch.device(f"cuda:{1}")
print(device)


cuda:2


## Fix the seed

In [3]:
import numpy as np
import torch
import random

#fix seed
def same_seeds(seed = 8787):
    torch.manual_seed(seed)
    # random.seed(seed) 
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)  
    np.random.seed(seed)  
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True

## Data Loader

In [4]:
class GraphDataset(Dataset):
    def __init__(self, data_list, device):
        self.data_list = data_list
        self.device = device

    def __len__(self):
        return len(self.data_list)
    
    def __getitem__(self, idx):
        data = self.data_list[idx]
        return data

def collate(samples):
    data_list = samples
    batched_graphs = []
    for data in data_list:
        g = dgl.graph((th.tensor(data["edge_index"][0]), th.tensor(data["edge_index"][1])), num_nodes=data["num_nodes"])

        g.ndata['feat'] = th.tensor(data["node_feat"])
        g.edata['feat'] = th.tensor(data["edge_attr"])
        g.edata['label'] = th.tensor(data["labels"])  # Add edge labels to graph

        batched_graphs.append(g)
    
    return dgl.batch(batched_graphs)

In [36]:
datasets = ['repeat_train_320', 'valid', 'test']
# datasets = ['valid']
dataset_data = {}

for dataset_name in tqdm(datasets):
    file_path = f"../../data_processing/dgl/data_new/exp3/training_data_repeat/transR_50/{dataset_name}.jsonl"
    
    print(file_path)
    with open(file_path) as f:
        data_list = [json.loads(line) for line in tqdm(f, position=0, leave=True)]
    
    
    dataset_data[dataset_name] = GraphDataset(data_list, device)

print("Datasets loaded!")

  0%|          | 0/3 [00:00<?, ?it/s]

../../data_processing/dgl/data_new/exp3/training_data_repeat/transR_50/repeat_train_320.jsonl


0it [00:00, ?it/s]

../../data_processing/dgl/data_new/exp3/training_data_repeat/transR_50/valid.jsonl


0it [00:00, ?it/s]

../../data_processing/dgl/data_new/exp3/training_data_repeat/transR_50/test.jsonl


0it [00:00, ?it/s]

Datasets loaded!


- choose batch size

In [38]:
def create_dataloaders(batch_size, shuffle=True):
    dataloaders = {}
    for dataset_name, dataset in dataset_data.items():
        # do not shuffle the testing dataset
        if dataset_name == "test":
            dataloaders[dataset_name] = DataLoader(dataset, batch_size=batch_size, shuffle=False, collate_fn=collate)    
        else:
            dataloaders[dataset_name] = DataLoader(dataset, batch_size=batch_size, shuffle=shuffle, collate_fn=collate)
    return dataloaders

dataloaders = create_dataloaders(64)

- Turn the print message to a log file

In [39]:
import datetime

now = datetime.datetime.now()

formatted_time = now.strftime("%m%d_%H:%M")

log_file_path = f"../log_message/{formatted_time}_GraphSAGE_transE_50.log"

def add_log_msg(msg, log_file_path=log_file_path):
    with open(log_file_path, 'a') as f:
        f.write(f'{datetime.datetime.now().strftime("%m/%d/%Y, %H:%M:%S")}# {msg}\n')
    print(f'{datetime.datetime.now().strftime("%m/%d/%Y, %H:%M:%S")}# {msg}')

print(log_file_path)

../log_message/1017_02:33_GraphSAGE_transE_50.log


### Model

In [10]:
class GraphSAGE(nn.Module):
    def __init__(self, in_dim, hidden_dim, out_dim):
        super(GraphSAGE, self).__init__()
        self.layer1 = dglnn.SAGEConv(in_dim, hidden_dim, 'pool')
        self.layer2 = dglnn.SAGEConv(hidden_dim, out_dim, 'pool')
        self.dropout = nn.Dropout(0.25)

    def forward(self, g, inputs):
        h = self.layer1(g, inputs)
        h = torch.relu(h)
#         h = self.dropout(h)
        h = self.layer2(g, h)
        return h

In [11]:
class MLPPredictor(nn.Module):
    def __init__(self, out_feats, out_classes):
        super().__init__()
        self.W = nn.Linear(out_feats*2, out_classes)

    def apply_edges(self, edges):
        h_u = edges.src['h']
        h_v = edges.dst['h']
        score = self.W(torch.cat([h_u, h_v], 1))
        return {'score': score}

    def forward(self, graph, h):
        with graph.local_scope():
            graph.ndata['h'] = h
            graph.apply_edges(self.apply_edges)
            return graph.edata['score']

In [12]:
class Model(nn.Module):
    def __init__(self, in_features, hidden_features, out_features, num_classes):
        super().__init__()
        self.sage = GraphSAGE(in_features, hidden_features, out_features)
        self.pred = MLPPredictor(out_features, num_classes)
      
    def forward(self, g, node_feat, return_logits=False):
        h = self.sage(g, node_feat)
        logits = self.pred(g, h)
        
        return logits

- Model Forward  

In [13]:
def model_fn(batched_g, model, criterion, device, count=1, which_type='train'):
    """Forward a batch through the model."""
#     batched_g, labels = data
    batched_g = batched_g.to(device)
    
    labels = batched_g.edata['label'].to(device)
    
    logits = model(batched_g, batched_g.ndata['feat'].float())

    loss = criterion(logits, labels)

    output = torch.softmax(logits, dim=1)
    preds = output.argmax(1)
    
    # Compute accuracy
    accuracy = torch.mean((preds == labels).float())
    
    if which_type == 'validation' and count % 1000 == 0:
        add_log_msg(f"labels of Validation: {labels} {labels.shape}")
        add_log_msg(f"predicted of Validation: {preds} {preds.shape}")
        
    elif which_type == 'test'  and count % 1000 == 0:
        add_log_msg(f"labels of Test: {labels} {labels.shape}")
        add_log_msg(f"predicted of Test: {preds} {preds.shape}")
        
    if count % 50000 == 0: 
        add_log_msg(f"labels of {count}: {labels} {labels.shape}")
        add_log_msg(f"predicted of {count}: {preds} {preds.shape}")
        
    return loss, accuracy, preds

### Training

- Fix the seed and save the model.state_dict that contains the initial weight

In [14]:
seed = 8787
same_seeds(seed)

model = Model(in_features=50, hidden_features=64, out_features=128, num_classes=167)
torch.save(model.state_dict(), 'model3_initial(graphsage)/initial_weight.pth')

In [15]:
# model.layer1.fc_self.weight
model.sage.layer1.fc_self.weight

Parameter containing:
tensor([[-0.0682,  0.0153, -0.1769,  ...,  0.0375,  0.2321, -0.2812],
        [-0.2271,  0.2290, -0.1997,  ..., -0.0095,  0.1509,  0.2686],
        [-0.2743,  0.0406, -0.1222,  ...,  0.1036, -0.1590, -0.2555],
        ...,
        [-0.0758,  0.0461,  0.1273,  ...,  0.1367,  0.0671, -0.2605],
        [-0.2425, -0.1362,  0.2474,  ..., -0.3221, -0.0595,  0.3141],
        [ 0.0234, -0.2783,  0.2146,  ..., -0.3020, -0.1751,  0.0528]],
       requires_grad=True)

- Check if model really load the model_dict

In [17]:
model = Model(in_features=50, hidden_features=64, out_features=128, num_classes=167)
model.load_state_dict(torch.load('model3_initial(graphsage)/initial_weight.pth'))
model.sage.layer1.fc_self.weight

Parameter containing:
tensor([[-0.0682,  0.0153, -0.1769,  ...,  0.0375,  0.2321, -0.2812],
        [-0.2271,  0.2290, -0.1997,  ..., -0.0095,  0.1509,  0.2686],
        [-0.2743,  0.0406, -0.1222,  ...,  0.1036, -0.1590, -0.2555],
        ...,
        [-0.0758,  0.0461,  0.1273,  ...,  0.1367,  0.0671, -0.2605],
        [-0.2425, -0.1362,  0.2474,  ..., -0.3221, -0.0595,  0.3141],
        [ 0.0234, -0.2783,  0.2146,  ..., -0.3020, -0.1751,  0.0528]],
       requires_grad=True)

In [18]:
mapping_file = './new_mapping.txt'
label_mapping = {}
with open(mapping_file, 'r') as f:
    for line in f:
        parts = line.strip().split(': ')
        label_mapping[int(parts[1])] = parts[0]

In [24]:
import csv
import pandas as pd
from sklearn.metrics import classification_report
from torch.optim import AdamW, lr_scheduler

seed = 8787
same_seeds(seed)

# model = GraphSAGE(in_dim=50, hidden_dim=16, out_dim=167)
model = Model(in_features=50, hidden_features=64, out_features=128, num_classes=167)
# in_dim means the dimension of the node_feat(50 dim, since the 50-dim embedding)
# out_dim means the # of the categories -> 168 for out tasks
model.load_state_dict(torch.load('model3_initial(graphsage)/initial_weight.pth'))
best_model_path = "../checkpoint_graphSAGE/best_model_GraphSAGE_transR_50_hidden_dim.pt"

model = model.to(device)

# optimizer = torch.optim.AdamW(model.parameters(), lr=1e-5)
optimizer = AdamW(model.parameters(), lr=5e-4)
# scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=18, num_training_steps=total_steps)

# T_max control the period of the lr changing -> set 1/10 first
scheduler = lr_scheduler.CosineAnnealingLR(optimizer, T_max=36, eta_min=0, last_epoch=- 1, verbose=False)

criterion = nn.CrossEntropyLoss()
# criterion = torch.nn.BCEWithLogitsLoss()

total_steps = 5

# save the best model
best_val_loss = float('inf')
patience = 4  # Number of epochs with no improvement after which training will be stopped.
waiting = 0  # The number of epochs with no improvement so far.
early_stop = False

# Training Part
for epoch in tqdm(range(total_steps)):
    # Train
    model.train()
    total_loss = 0.0
    total_accuracy = 0.0
    num_batches = 0
        
    for batched_g in tqdm(dataloaders['train'], desc="Training", position=0, leave=True):
        num_batches += 1
        loss, accuracy, _ = model_fn(batched_g, model, criterion, device, num_batches, which_type='train')
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        total_accuracy += accuracy.item()

        
#     scheduler.step()
    add_log_msg(f"total batches: {num_batches}")

    avg_loss = total_loss / num_batches
    avg_accuracy = total_accuracy / num_batches

    add_log_msg(f'Epoch {epoch} | Train Loss: {avg_loss:.4f} | Train Accuracy: {avg_accuracy:.4f}')

    
    # Validation Part
    model.eval()
    total_accuracy = 0.0
    total_loss = 0.0
    num_batches = 0


    with torch.no_grad():
        for batched_g in tqdm(dataloaders['valid'], desc="Validation", position=0, leave=True):
            loss, accuracy, _ = model_fn(batched_g, model, criterion, device, num_batches, which_type='validation')
            total_accuracy += accuracy.item()
            total_loss += loss.item()
            num_batches += 1

    avg_accuracy = total_accuracy / num_batches
    current_loss = total_loss / num_batches
    
    add_log_msg(f'Validation Loss: {current_loss:.4f} | Validation Accuracy: {avg_accuracy:.4f}\n')
    
    
            
    if current_loss < best_val_loss:
        best_val_loss = current_loss
        waiting = 0
        
        if os.path.exists(best_model_path):
            os.remove(best_model_path)
            add_log_msg("Find a better model!!")

        torch.save(model.state_dict(), best_model_path)

#         print(best_model_path)

    else:
        waiting += 1
        if waiting >= patience:
            early_stop = True
#             add_log_msg("============================== Early stopping ==================================")
#             break
            
    if epoch == total_steps - 1 or early_stop == True:  # Last epoch
        pretrained_model_path = '../checkpoint_graphSAGE/best_model_GraphSAGE_transR_50_hidden_dim.pt'
        model.load_state_dict(torch.load(pretrained_model_path))
        model.to(device)
        
        true_labels = []
        predicted_labels = []

        with torch.no_grad():
            for batched_g in tqdm(dataloaders['train'], desc="Generating Train Report", position=0, leave=True):
                loss, accuracy, predicted = model_fn(batched_g, model, criterion, device, which_type='train')
                labels = batched_g.edata['label'].to(device)
                
                true_labels.extend(labels.cpu().numpy())
                predicted_labels.extend(predicted.cpu().numpy())

        # Assuming that the mapping and other things are defined earlier or globally
        mapped_true_labels = [label_mapping[label] for label in true_labels]
        mapped_predicted_labels = [label_mapping[label] for label in predicted_labels]

        # Generate classification report
        train_report = classification_report(mapped_true_labels, mapped_predicted_labels)
        add_log_msg(f"Train Classification Report at Epoch {epoch}:\n{train_report}")

    
    if early_stop:
        add_log_msg("============================== Early stopping ==================================")
        break
        

  0%|          | 0/1 [00:00<?, ?it/s]

Training:   0%|          | 0/2063 [00:00<?, ?it/s]

10/17/2023, 01:13:09# total batches: 2063
10/17/2023, 01:13:09# Epoch 0 | Train Loss: 1.3894 | Train Accuracy: 0.7114


Validation:   0%|          | 0/258 [00:00<?, ?it/s]

10/17/2023, 01:13:09# labels of Validation: tensor([ 43,  43,  36,  97,  31,   9, 155,  65,  65,  65, 155, 155, 155, 136,
        136, 136, 136, 136, 136, 136,  59,  59,  59,  49,  24,  29,  29,  29,
         29,  29,  29,  29,  29,  29,  29,   8,   8, 121, 130, 130, 130, 130,
        130, 142, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118,
        118, 118, 118, 118, 118, 118, 118, 118, 118,  30,  63,  63,  63,  63,
         19,  19,  19,  19,  19, 107, 107, 107, 107, 107, 107, 107, 107,  55,
        116,  91,  91,  91, 122, 122, 122, 122, 122, 122, 122,  69,  69,  69,
         69,  69,  69,  84,  84,  84,  84,  84,  84,  84,  84,  84,  28,  28,
         28,  28,  28,  28,  28,  28,  28,  28,  28,  28,  97, 162, 152, 121,
        163, 130, 130, 130, 130, 130,  78,  78,  78,  78,  78, 127, 127, 127,
        127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
        127, 127, 127,  77,  77,  77,  77,  77,  77,  77,  77,  40,  40,  40,
         40,  40,  4

10/17/2023, 01:13:17# Validation Loss: 0.3372 | Validation Accuracy: 0.9406



Generating Train Report:   0%|          | 0/2063 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


10/17/2023, 01:16:16# Train Classification Report at Epoch 0:
                                                precision    recall  f1-score   support

T1003.001_0ef4cc7b-611c-4237-b20b-db36b6906554       1.00      1.00      1.00     51200
    T1003.001_35d92515122effdd73801c6ac3021da7       0.94      0.92      0.93      4800
    T1003.002_5a484b65c247675e3b7ada4ba648d376       1.00      1.00      1.00      4000
    T1003.002_7fa4ea18694f2552547b65e23952cabb       0.88      0.90      0.89     12000
    T1003.003_9f73269695e54311dd61dc68940fb3e1       0.00      0.00      0.00       800
    T1003.003_f049b89533298c2d6cd37a940248b219       0.00      0.00      0.00       800
        T1003_18f31c311ac208802e88ab8d5af8603e       1.00      0.83      0.91      4800
        T1007_9d03c91bdae5a80f17f89c987942b5a8       0.99      1.00      1.00      4800
    T1007_c6607391-d02c-44b5-9b13-d3492ca58599       0.00      0.00      0.00       800
        T1007_d6bb2a19da7246731ed9c44831b135f8       0.00

  _warn_prf(average, modifier, msg_start, len(result))


In [28]:
import csv
import pandas as pd
from sklearn.metrics import classification_report
from torch.optim import AdamW, lr_scheduler

seed = 8787
same_seeds(seed)

# model = GraphSAGE(in_dim=50, hidden_dim=16, out_dim=167)
model = Model(in_features=50, hidden_features=64, out_features=256, num_classes=167)
# in_dim means the dimension of the node_feat(50 dim, since the 50-dim embedding)
# out_dim means the # of the categories -> 168 for out tasks
# model.load_state_dict(torch.load('model3_initial(graphsage)/initial_weight.pth'))
best_model_path = "../checkpoint_graphSAGE/best_model_GraphSAGE_transR_50_hidden_dim.pt"

model = model.to(device)

# optimizer = torch.optim.AdamW(model.parameters(), lr=1e-5)
optimizer = AdamW(model.parameters(), lr=5e-4)
# scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=18, num_training_steps=total_steps)

# T_max control the period of the lr changing -> set 1/10 first
scheduler = lr_scheduler.CosineAnnealingLR(optimizer, T_max=36, eta_min=0, last_epoch=- 1, verbose=False)

criterion = nn.CrossEntropyLoss()
# criterion = torch.nn.BCEWithLogitsLoss()

total_steps = 10

# save the best model
best_val_loss = float('inf')
patience = 4  # Number of epochs with no improvement after which training will be stopped.
waiting = 0  # The number of epochs with no improvement so far.
early_stop = False

# Training Part
for epoch in tqdm(range(total_steps)):
    # Train
    model.train()
    total_loss = 0.0
    total_accuracy = 0.0
    num_batches = 0

    for batched_g in tqdm(dataloaders['train'], desc="Training", position=0, leave=True):
        num_batches += 1
        loss, accuracy, _ = model_fn(batched_g, model, criterion, device, num_batches, which_type='train')
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        total_accuracy += accuracy.item()

        
#     scheduler.step()
    add_log_msg(f"total batches: {num_batches}")

    avg_loss = total_loss / num_batches
    avg_accuracy = total_accuracy / num_batches

    add_log_msg(f'Epoch {epoch} | Train Loss: {avg_loss:.4f} | Train Accuracy: {avg_accuracy:.4f}')

    
    # Validation Part
    model.eval()
    total_accuracy = 0.0
    total_loss = 0.0
    num_batches = 0


    with torch.no_grad():
        for batched_g in tqdm(dataloaders['valid'], desc="Validation", position=0, leave=True):
            loss, accuracy, _ = model_fn(batched_g, model, criterion, device, num_batches, which_type='validation')
            total_accuracy += accuracy.item()
            total_loss += loss.item()
            num_batches += 1

    avg_accuracy = total_accuracy / num_batches
    current_loss = total_loss / num_batches
    
    add_log_msg(f'Validation Loss: {current_loss:.4f} | Validation Accuracy: {avg_accuracy:.4f}\n')
    
    
            
    if current_loss < best_val_loss:
        best_val_loss = current_loss
        waiting = 0
        
        if os.path.exists(best_model_path):
            os.remove(best_model_path)
            add_log_msg("Find a better model!!")

        torch.save(model.state_dict(), best_model_path)

#         print(best_model_path)

    else:
        waiting += 1
        if waiting >= patience:
            early_stop = True
#             add_log_msg("============================== Early stopping ==================================")
#             break
            
    if epoch == total_steps - 1 or early_stop == True:  # Last epoch
        pretrained_model_path = '../checkpoint_graphSAGE/best_model_GraphSAGE_transR_50_hidden_dim.pt'
        model.load_state_dict(torch.load(pretrained_model_path))
        model.to(device)
        
        true_labels = []
        predicted_labels = []

        with torch.no_grad():
            for batched_g in tqdm(dataloaders['train'], desc="Generating Train Report", position=0, leave=True):
                loss, accuracy, predicted = model_fn(batched_g, model, criterion, device, which_type='train')
                labels = batched_g.edata['label'].to(device)
                
                true_labels.extend(labels.cpu().numpy())
                predicted_labels.extend(predicted.cpu().numpy())

        # Assuming that the mapping and other things are defined earlier or globally
        mapped_true_labels = [label_mapping[label] for label in true_labels]
        mapped_predicted_labels = [label_mapping[label] for label in predicted_labels]

        # Generate classification report
        train_report = classification_report(mapped_true_labels, mapped_predicted_labels)
        add_log_msg(f"Train Classification Report at Epoch {epoch}:\n{train_report}")

    
    if early_stop:
        add_log_msg("============================== Early stopping ==================================")
        break
        

  0%|          | 0/10 [00:00<?, ?it/s]

Training:   0%|          | 0/4125 [00:00<?, ?it/s]

10/17/2023, 01:38:12# total batches: 4125
10/17/2023, 01:38:12# Epoch 0 | Train Loss: 0.9295 | Train Accuracy: 0.8099


Validation:   0%|          | 0/516 [00:00<?, ?it/s]

10/17/2023, 01:38:12# labels of Validation: tensor([118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118,
        118, 118, 118, 118, 118, 118, 118,  23,  23,  23,  23,  23, 140, 140,
        140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140,  36,  42, 161,
        161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161,
        161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161,
        161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161,
        161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161,
        161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161,
        161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161,
        161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161,
        161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161,
        161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161,
        161, 161, 16

10/17/2023, 01:38:20# Validation Loss: 0.2553 | Validation Accuracy: 0.9471

10/17/2023, 01:38:20# Find a better model!!


Training:   0%|          | 0/4125 [00:00<?, ?it/s]

10/17/2023, 01:39:39# total batches: 4125
10/17/2023, 01:39:39# Epoch 1 | Train Loss: 0.2188 | Train Accuracy: 0.9496


Validation:   0%|          | 0/516 [00:00<?, ?it/s]

10/17/2023, 01:39:39# labels of Validation: tensor([166, 166, 166, 166, 166, 166,  85,  85,  85,  85,  85,  85,  85,  85,
         85,  85,  85, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140,
        140, 140,  98,  80,  80,  80,  80,  80,  80, 166, 166, 166, 166, 166,
        166,  92, 157,  86,  86, 101, 101, 101, 101, 101, 101, 101, 101, 101,
        101, 101, 101, 101, 101, 101, 101, 130, 130, 130, 130, 130,   3,   3,
          3,   3,   3,   3,   3,   3,   3,   3,   3,   3,   3,   3,   3, 132,
        132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132,
        132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132,
        132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132,
        132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132,
        132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132,
        132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132,
        132, 132, 13

10/17/2023, 01:39:47# Validation Loss: 0.2015 | Validation Accuracy: 0.9511

10/17/2023, 01:39:47# Find a better model!!


Training:   0%|          | 0/4125 [00:00<?, ?it/s]

10/17/2023, 01:41:04# total batches: 4125
10/17/2023, 01:41:04# Epoch 2 | Train Loss: 0.1954 | Train Accuracy: 0.9515


Validation:   0%|          | 0/516 [00:00<?, ?it/s]

10/17/2023, 01:41:04# labels of Validation: tensor([122, 122, 122, 122, 122, 122, 122,  82,  82,  82,  82,  82,  82,  82,
         82,  82,  82,  82,  82,  82,  82,  82,  82,  82,  82,  82,  82,  82,
         82,  82,  82,  82,  82,  82, 145, 145, 145, 145, 145, 145, 166, 166,
        166, 166, 166, 166,  72,  72, 152,  85,  85,  85,  85,  85,  85,  85,
         85,  85,  85,  85,  10,  10,  48,  53,  50,  50,  50,  47, 116, 100,
        100, 100, 100, 100, 100, 162,  85,  85,  85,  85,  85,  85,  85,  85,
         85,  85,  85, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105,
        105, 105,  97, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132,
        132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132,
        132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132,
        132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132,
        132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132,
        132, 132, 13

10/17/2023, 01:41:12# Validation Loss: 0.1944 | Validation Accuracy: 0.9513

10/17/2023, 01:41:12# Find a better model!!


Training:   0%|          | 0/4125 [00:00<?, ?it/s]

10/17/2023, 01:42:31# total batches: 4125
10/17/2023, 01:42:31# Epoch 3 | Train Loss: 0.1882 | Train Accuracy: 0.9522


Validation:   0%|          | 0/516 [00:00<?, ?it/s]

10/17/2023, 01:42:31# labels of Validation: tensor([ 14,  65,  65,  ...,  81,  49, 104], device='cuda:2') torch.Size([4176])
10/17/2023, 01:42:31# predicted of Validation: tensor([ 47,  65,  65,  ..., 110,  47, 110], device='cuda:2') torch.Size([4176])
10/17/2023, 01:42:31# labels of 0: tensor([ 14,  65,  65,  ...,  81,  49, 104], device='cuda:2') torch.Size([4176])
10/17/2023, 01:42:31# predicted of 0: tensor([ 47,  65,  65,  ..., 110,  47, 110], device='cuda:2') torch.Size([4176])
10/17/2023, 01:42:39# Validation Loss: 0.2002 | Validation Accuracy: 0.9491



Training:   0%|          | 0/4125 [00:00<?, ?it/s]

10/17/2023, 01:44:02# total batches: 4125
10/17/2023, 01:44:02# Epoch 4 | Train Loss: 0.1858 | Train Accuracy: 0.9523


Validation:   0%|          | 0/516 [00:00<?, ?it/s]

10/17/2023, 01:44:03# labels of Validation: tensor([153, 153, 153, 159, 159, 159, 159, 159, 159, 159, 159, 159, 159,  80,
         80,  80,  80,  80,  80, 120, 120, 120, 120, 120, 120,  65,  65,  65,
         65,  65,  65, 149, 149, 149, 149, 149, 149, 149, 149, 149, 149, 149,
        149, 149, 149, 149, 149, 102, 102,   0,   0,   0,   0,   0,   0, 147,
         65, 147, 147, 147, 147,  37,  37,  37,  37,  37, 163,  71,  71,  71,
         71,  71,  71,  65,  46,  46,  62,  62,  62,  62,  62,  62,  11,  58,
         58,  58,  58,  58,  58,  58,  77,  77,  77,  77,  77,  77,  77,  77,
        129, 129, 129, 129, 129, 129, 129,  82,  82,  82,  82,  82,  82,  82,
         82,  82,  82,  82,  82,  82,  82,  82,  82,  82,  82,  82,  82,  82,
         82,  82,  82,  82,  82,  82,  42, 137, 137, 137, 137, 137,  74,   4,
          0,   0,   0,   0,   0,   0,  48, 132, 132, 132, 132, 132, 132, 132,
        132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132,
        132, 132, 13

10/17/2023, 01:44:11# Validation Loss: 0.1882 | Validation Accuracy: 0.9517

10/17/2023, 01:44:11# Find a better model!!


Training:   0%|          | 0/4125 [00:00<?, ?it/s]

10/17/2023, 01:45:38# total batches: 4125
10/17/2023, 01:45:38# Epoch 5 | Train Loss: 0.1844 | Train Accuracy: 0.9527


Validation:   0%|          | 0/516 [00:00<?, ?it/s]

10/17/2023, 01:45:38# labels of Validation: tensor([ 56,  56,  56,  56,  56,  56,  39,  39,  39,  68,  68, 111,  65,  65,
         65,  65,  65, 135, 135,   2, 157,  12,  59,  59,  59,  64,  64,  95,
         95,  95,  36,  65,  65,  65,  65,  70,  70,  70,  70,  70,  70,  70,
         70,  70,  70,  70,  70,  70,  70,  70,  70,  70,  70,  70,  70,  70,
         70,  70,  48, 133, 133, 133, 133, 133, 133, 133, 133,  80,  80,  80,
         80,  80,  80,  30,   7,   7,   7,   7,   7,   7,   7,   7,   7,   7,
          7,   7,   7,   7,   7,   7,   7,   7,   7,   7,   7,   7,   7,   7,
          7,   7,   7,   7,   7,   7,   7,   7,   7,   7,   7,   7,   7,   7,
          7,   7,   7,   7,   7,   7,   7,   7,   7,   7,   7,   7,   7,   7,
          7,   7,   7,   7,   7,   7,   7,   7,   7,   7,   7,   7,   7,   7,
          7, 137, 137, 137, 137, 137, 144,  23,  23,  23,  23,  23, 108, 108,
        108, 108,  88,  88, 100, 100, 100, 100, 100, 100,  67,  67,  67,  67,
         67,  59,  5

Training:   0%|          | 0/4125 [00:00<?, ?it/s]

10/17/2023, 01:47:06# total batches: 4125
10/17/2023, 01:47:06# Epoch 6 | Train Loss: 0.1840 | Train Accuracy: 0.9527


Validation:   0%|          | 0/516 [00:00<?, ?it/s]

10/17/2023, 01:47:06# labels of Validation: tensor([ 87,  50,  50,  50,  95,  95,  95, 121,  32,  32,  32,  32,  32,  32,
         32,  32,  32,  32,  32,  32,  32,  32,  32,  99,  99,  99,  99,  99,
         99,  99,  99,  99,  65,  99,  99,  99,  99,  99,  99,  99,  99,  99,
         99,  99,  99,  99,   3,   3,   3,   3,   3,   3,   3,   3,   3,   3,
          3,   3,   3,   3,   3, 153, 153, 153,  13,  13,  13,  13,  13, 160,
        160, 160, 160, 160, 160, 160, 160, 129, 129, 129, 129, 129, 129, 129,
         65,  65, 135, 135,  69,  69,  69,  69,  69,  69,  37,  37,  37,  37,
         37,  63,  63,  63,  63,  44, 150, 125,  54, 105, 105, 105, 105, 105,
        105, 105, 105, 105, 105, 105, 105, 105,  38,  65,  65,  65,  65,  65,
         65,  65,  65,  65,  65,  65,  65,  65,  65,  65,  65,  65,  65,  65,
         65,  65,  65,  65,  65,  65,  65,  65,  65,  65,  65,  65,  65,  65,
         65,  65,  65,  65,  65,  65,  65,  65,  65,  65,  65,  65,  65,  65,
         65,  65,  6

10/17/2023, 01:47:14# Validation Loss: 0.1798 | Validation Accuracy: 0.9533

10/17/2023, 01:47:14# Find a better model!!


Training:   0%|          | 0/4125 [00:00<?, ?it/s]

10/17/2023, 01:48:31# total batches: 4125
10/17/2023, 01:48:31# Epoch 7 | Train Loss: 0.1806 | Train Accuracy: 0.9535


Validation:   0%|          | 0/516 [00:00<?, ?it/s]

10/17/2023, 01:48:31# labels of Validation: tensor([ 31,  65, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123,
        123, 123, 123,  72,  72,  13,  13,  13,  13,  13,  87,  26,  26,  26,
         26,  26,  26,  98,  80,  80,  80,  80,  80,  80, 146, 146, 146, 146,
        146, 146,  87, 103, 103, 121,  41,  41,  41,  41,  41,  48, 122, 122,
        122, 122, 122, 122, 122, 143, 103, 103, 162,  65, 123, 123, 123, 123,
        123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 123, 104,  64,  64,
        126, 126, 126, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132,
        132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132,
        132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132,
        132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132,
        132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132,
        132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132,
        132, 132, 13

10/17/2023, 01:48:39# Validation Loss: 0.1884 | Validation Accuracy: 0.9518



Training:   0%|          | 0/4125 [00:00<?, ?it/s]

10/17/2023, 01:49:56# total batches: 4125
10/17/2023, 01:49:56# Epoch 8 | Train Loss: 0.1810 | Train Accuracy: 0.9532


Validation:   0%|          | 0/516 [00:00<?, ?it/s]

10/17/2023, 01:49:56# labels of Validation: tensor([162, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
        127, 127, 127, 127, 127, 127, 127, 147,  65,  65,  65,  65,  65,  65,
         65,  65,  65,  65,  65,  65,  65,  65,  65, 147, 147, 147, 147,  30,
         12,  77,  77,  77,  77,  77,  77,  77,  77,   6,  65,  65,  65,  65,
         65,  65,  65,  65,  65,  65,  65,  65,   6,   6,   6,   6,   6,   6,
          6,   6,   6,   6,   6,   6,   6,   6,   6,   6,   6,   6,   2, 159,
        159, 159, 159, 159, 159, 159, 159, 159, 159,  29,  29,  29,  29,  29,
         29,  29,  29,  29,  29,  10,  10, 160, 160, 160, 160, 160, 160, 160,
        160,  59,  59,  59, 111,  89,  89,  31,  10,  10,  63,  63,  63,  63,
         49,  53,  58,  58,  58,  58,  58,  58,  58,  63,  63,  63,  63, 113,
        113,  90,  90,  41,  41,  41,  41,  41,  58,  58,  58,  58,  58,  58,
         58,  88,  88, 100, 100, 100, 100, 100, 100,  51,  65,  65,  51,  51,
         51,  51, 12

Training:   0%|          | 0/4125 [00:00<?, ?it/s]

10/17/2023, 01:51:29# total batches: 4125
10/17/2023, 01:51:29# Epoch 9 | Train Loss: 0.1795 | Train Accuracy: 0.9533


Validation:   0%|          | 0/516 [00:00<?, ?it/s]

10/17/2023, 01:51:29# labels of Validation: tensor([ 51,  65,  51,  51,  51,  51,  93,  93,  29,  29,  29,  29,  29,  29,
         29,  29,  29,  29, 114, 114,  65,  65,  65,  65,  65,  65,  65,  65,
         65,  65,  65,  65,  65,  65,  65,  65, 149, 149, 149, 149, 149, 149,
        149, 149, 149, 149, 149, 149, 149, 149, 149, 149, 138, 138, 138, 138,
        138, 138, 138, 138, 138, 138, 125, 144, 126, 126, 126, 153, 153, 153,
        122, 122, 122, 122, 122, 122, 122,   4,  61,  61,  61,  58,  58,  58,
         58,  58,  58,  58,  66,  66,  66,  66,  66,  66,  66,  66,  66,  66,
        118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118,
        118, 118, 118, 118, 118, 118, 118,  99,  99,  99,  99,  99,  99,  99,
         99,  99,  65,  99,  99,  99,  99,  99,  99,  99,  99,  99,  99,  99,
         99,  99,   5,   5,   5, 103, 103, 116, 101, 101, 101, 101, 101, 101,
        101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 138, 138, 138, 138,
        138, 138, 13

10/17/2023, 01:51:37# Validation Loss: 0.1784 | Validation Accuracy: 0.9537

10/17/2023, 01:51:37# Find a better model!!


Generating Train Report:   0%|          | 0/4125 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


10/17/2023, 01:54:32# Train Classification Report at Epoch 9:
                                                precision    recall  f1-score   support

T1003.001_0ef4cc7b-611c-4237-b20b-db36b6906554       1.00      1.00      1.00     51200
    T1003.001_35d92515122effdd73801c6ac3021da7       1.00      1.00      1.00      4800
    T1003.002_5a484b65c247675e3b7ada4ba648d376       1.00      1.00      1.00      4000
    T1003.002_7fa4ea18694f2552547b65e23952cabb       1.00      1.00      1.00     12000
    T1003.003_9f73269695e54311dd61dc68940fb3e1       0.02      0.03      0.02       800
    T1003.003_f049b89533298c2d6cd37a940248b219       0.02      0.02      0.02       800
        T1003_18f31c311ac208802e88ab8d5af8603e       1.00      1.00      1.00      4800
        T1007_9d03c91bdae5a80f17f89c987942b5a8       1.00      1.00      1.00      4800
    T1007_c6607391-d02c-44b5-9b13-d3492ca58599       0.02      0.01      0.02       800
        T1007_d6bb2a19da7246731ed9c44831b135f8       0.11

  _warn_prf(average, modifier, msg_start, len(result))


In [42]:
import csv
import pandas as pd
from sklearn.metrics import classification_report
from torch.optim import AdamW, lr_scheduler

seed = 8787
same_seeds(seed)

# model = GraphSAGE(in_dim=50, hidden_dim=16, out_dim=167)
model = Model(in_features=50, hidden_features=128, out_features=256, num_classes=167)
# in_dim means the dimension of the node_feat(50 dim, since the 50-dim embedding)
# out_dim means the # of the categories -> 168 for out tasks
# model.load_state_dict(torch.load('model3_initial(graphsage)/initial_weight.pth'))
best_model_path = "../checkpoint_graphSAGE/best_model_GraphSAGE_transR_50_hidden_dim.pt"

model = model.to(device)

# optimizer = torch.optim.AdamW(model.parameters(), lr=1e-5)
optimizer = AdamW(model.parameters(), lr=5e-4)
# scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=18, num_training_steps=total_steps)

# T_max control the period of the lr changing -> set 1/10 first
scheduler = lr_scheduler.CosineAnnealingLR(optimizer, T_max=36, eta_min=0, last_epoch=- 1, verbose=False)

criterion = nn.CrossEntropyLoss()
# criterion = torch.nn.BCEWithLogitsLoss()

total_steps = 7

# save the best model
best_val_loss = float('inf')
patience = 4  # Number of epochs with no improvement after which training will be stopped.
waiting = 0  # The number of epochs with no improvement so far.
early_stop = False

# Training Part
for epoch in tqdm(range(total_steps)):
    # Train
    model.train()
    total_loss = 0.0
    total_accuracy = 0.0
    num_batches = 0

    for batched_g in tqdm(dataloaders['repeat_train_320'], desc="Training", position=0, leave=True):
        num_batches += 1
        loss, accuracy, _ = model_fn(batched_g, model, criterion, device, num_batches, which_type='train')
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        total_accuracy += accuracy.item()

        
#     scheduler.step()
    add_log_msg(f"total batches: {num_batches}")

    avg_loss = total_loss / num_batches
    avg_accuracy = total_accuracy / num_batches

    add_log_msg(f'Epoch {epoch} | Train Loss: {avg_loss:.4f} | Train Accuracy: {avg_accuracy:.4f}')

    
    # Validation Part
    model.eval()
    total_accuracy = 0.0
    total_loss = 0.0
    num_batches = 0


    with torch.no_grad():
        for batched_g in tqdm(dataloaders['valid'], desc="Validation", position=0, leave=True):
            loss, accuracy, _ = model_fn(batched_g, model, criterion, device, num_batches, which_type='validation')
            total_accuracy += accuracy.item()
            total_loss += loss.item()
            num_batches += 1

    avg_accuracy = total_accuracy / num_batches
    current_loss = total_loss / num_batches
    
    add_log_msg(f'Validation Loss: {current_loss:.4f} | Validation Accuracy: {avg_accuracy:.4f}\n')
    
    
            
    if current_loss < best_val_loss:
        best_val_loss = current_loss
        waiting = 0
        
        if os.path.exists(best_model_path):
            os.remove(best_model_path)
            add_log_msg("Find a better model!!")

        torch.save(model.state_dict(), best_model_path)

#         print(best_model_path)

    else:
        waiting += 1
        if waiting >= patience:
            early_stop = True
#             add_log_msg("============================== Early stopping ==================================")
#             break
            
    if epoch == total_steps - 1 or early_stop == True:  # Last epoch
        pretrained_model_path = '../checkpoint_graphSAGE/best_model_GraphSAGE_transR_50_hidden_dim.pt'
        model.load_state_dict(torch.load(pretrained_model_path))
        model.to(device)
        
        true_labels = []
        predicted_labels = []

        with torch.no_grad():
            for batched_g in tqdm(dataloaders['train'], desc="Generating Train Report", position=0, leave=True):
                loss, accuracy, predicted = model_fn(batched_g, model, criterion, device, which_type='train')
                labels = batched_g.edata['label'].to(device)
                
                true_labels.extend(labels.cpu().numpy())
                predicted_labels.extend(predicted.cpu().numpy())

        # Assuming that the mapping and other things are defined earlier or globally
        mapped_true_labels = [label_mapping[label] for label in true_labels]
        mapped_predicted_labels = [label_mapping[label] for label in predicted_labels]

        # Generate classification report
        train_report = classification_report(mapped_true_labels, mapped_predicted_labels)
        add_log_msg(f"Train Classification Report at Epoch {epoch}:\n{train_report}")

    
    if early_stop:
        add_log_msg("============================== Early stopping ==================================")
        break
        

  0%|          | 0/7 [00:00<?, ?it/s]

Training:   0%|          | 0/213400 [00:00<?, ?it/s]

10/17/2023, 02:39:33# labels of 5000: tensor([ 53, 152, 157, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118,
        118, 118, 118, 118, 118, 118, 118, 118, 118, 118,  74, 119, 121,  60,
        125,  11,  75, 121, 162,  57,  54,  75,  74,  11, 152, 104,  76,  57,
         74, 125, 109, 111, 163,  87,  14,  49, 151,   9, 143,  18, 143,  38,
        111,  36,   2,  33, 151, 111,  11,  57,  53,  31,   9,  18,  18,  31,
         97,  83,  24,  76, 111,   9,  92,  34,  74,  87,  92,  92,  24, 151],
       device='cuda:2') torch.Size([84])
10/17/2023, 02:39:33# predicted of 5000: tensor([14, 14, 14, 52, 52, 17, 17, 17, 52, 52, 52, 52, 52, 52, 17, 52, 17, 17,
        17, 17, 17, 17, 17, 17, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 76,
        76, 14, 76, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
        14, 76, 14, 14, 14, 14, 14, 14, 14, 76, 14, 14, 14, 14, 14, 14, 14, 76,
        14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14], device='cuda:2') torch.Size([84])


10/17/2023, 03:02:39# labels of 50000: tensor([ 75, 163,  74,  38,  75, 119,  49,   2, 164, 112,  36, 162,  83,  47,
        125,  44,  24,   1,  74,   9, 151, 109, 104,  33, 151, 142,  49,  12,
        143,  76,  76,  87, 142,  31,   4,  49,  57,  38, 119, 125,  36, 164,
        112,  14, 151, 104,   4,  55,  36,  12,  44,  53,  34, 121,  76,  53,
         48,  87,  53,  42, 142,  34,  11, 111], device='cuda:2') torch.Size([64])
10/17/2023, 03:02:39# predicted of 50000: tensor([ 83,  53, 162,  49,  47,  42,  57,  49,  75, 152, 104,  47, 125, 144,
        164,  57,  54,  42,  55,  42,  60,  49, 142,  42,  49, 125,  49,  31,
         31, 158, 125,  36,  42,  31,  54, 124, 116, 164, 109, 125,  57,  83,
         36,  60, 116,  42, 152,  60,   4,  47,  57,  53, 151,  47,   4, 158,
         49, 142, 125,  49,   4,  54,  48, 164], device='cuda:2') torch.Size([64])
10/17/2023, 03:05:03# labels of 55000: tensor([162,  81,  53, 143,  60, 143,   9, 116, 121,  83,  36,  97, 142,  34,
          9,

10/17/2023, 03:27:15# labels of 100000: tensor([ 47, 150, 150,  75,   2,  81, 143, 143,  14, 125,  48,  31,  54,  57,
        109,  55,  33, 151,  53,  55,  75,  74,  92,   9, 121,  18,   1, 152,
        158, 152,  48,   2,  38, 150, 162, 119,  53,  49,  47,  31, 142, 112,
         18,  81, 143,  30, 125, 157, 163,  36,  54, 117, 117, 117,  44, 116,
         47,  57,  87,  76,  97,  33, 116,  97,   2, 119], device='cuda:2') torch.Size([66])
10/17/2023, 03:27:15# predicted of 100000: tensor([ 14,  76,  92,  75,   4, 158, 151, 109, 162,  57,   2, 124,  57, 112,
        164,  81,  55,  74,  53,  55,  38,  57, 109,   4, 162,  24,  57, 152,
        119,  81, 125, 152,  75,  92, 162,  53,  75,  49,  48,  34, 142, 121,
         49,  74,  55, 125,  24, 157, 164,  49,  12, 117, 117, 117,   1,  87,
        162,  49,  36,  12, 111,  18,  49,  31,   4,  57], device='cuda:2') torch.Size([66])
10/17/2023, 03:29:40# labels of 105000: tensor([ 48,   4, 111,  31,  47,   4,  75,  42,  33,  33,  55, 125,

10/17/2023, 03:54:05# labels of 150000: tensor([ 36,   9, 144,  47,  55,  30,   9, 152,  24,   1, 152, 144, 158, 116,
        164,  18,  57,  14, 158,  55, 112,   9,  12,  97,  14, 116, 112,  33,
         57,  97,  44, 109,  14, 152,  11,  31,  30, 157,  33, 144, 111,   4,
         83, 119, 119,  49, 144,  36,   2,  42,  76, 142, 116,  83,  44, 162,
         14,  33,  47,   1, 163,  30,  12, 111], device='cuda:2') torch.Size([64])
10/17/2023, 03:54:05# predicted of 150000: tensor([142,  75,  92,  53,  55,  87, 152, 163, 158,  92,   2,  87, 109, 121,
         49,  60,  87, 109, 142,  97, 112,   9, 125,  75,   9,   4,  42,  53,
         12,  97, 142,  60,  14,  33,  11,   4,   9,  38, 144,  75, 111, 158,
        163, 119,  74, 121, 121, 142,  33, 158, 104,  60, 116,  87,  83, 109,
         97, 152, 144,  34,  60, 144, 150, 152], device='cuda:2') torch.Size([64])
10/17/2023, 03:56:49# labels of 155000: tensor([ 55, 116,  97,  54, 143, 164,  33,  53, 116,  75, 124,   2,  75, 158,
         

10/17/2023, 04:19:16# labels of 195000: tensor([  1,  92,  97,  55, 142,  60,  31, 151, 109, 111,  92,  12,  60,  74,
         54, 111,  11,  11,  81,  11, 111,  81,  11,   2, 125, 111,  38,  36,
        144,  14, 112,   2,  44,  92,  87, 152,  11,  97,  55,  87,  83,  49,
        121, 158,  24,  53,  97, 150, 111,  48,  57, 164, 121,   1,  44, 144,
        152,  87, 104,  12,  33,  87, 164,  83], device='cuda:2') torch.Size([64])
10/17/2023, 04:19:16# predicted of 195000: tensor([ 44,  57,  97,  55, 142,  38,  87,  60, 104,  48,  44,  60,  11, 157,
         76, 109,  49,  47,  11,  53, 111,  83, 142,  36, 158,  33,  53, 163,
        152,  74, 112,  53, 119,  30,  44,  53, 116,  83, 119,  74,  75,  49,
         12, 150,  48, 144,   1, 150,  53, 163,  57,  53, 121,  76,  36,  92,
          2,  31,   4,  12,  30,  30,  34, 116], device='cuda:2') torch.Size([64])
10/17/2023, 04:21:56# labels of 200000: tensor([143, 112, 124,  55,  49, 162, 119,  55,  74,  42,   9, 111,  92,  33,
        1

Validation:   0%|          | 0/258 [00:00<?, ?it/s]

10/17/2023, 04:28:43# labels of Validation: tensor([110, 110,  36,  56,  56,  56,  56,  56,  56,  28,  28,  28,  28,  28,
         65,  28,  28,  28,  28,  28,  28,  28,  67,  67,  67,  67,  67, 163,
        104,  68,  68, 136, 136, 136, 136, 136, 136, 136, 120, 120, 120, 120,
        120, 120,  67,  67,  67,  67,  67,  91,  91,  91, 102, 102,  47,  29,
         29,  29,  29,  29,  29,  29,  29,  29,  29,  61,  61,  61,   4, 124,
         46,  46, 153, 153, 153,  82,  82,  82,  82,  82,  82,  82,  82,  82,
         82,  82,  82,  82,  82,  82,  82,  82,  82,  65,  82,  82,  82,  82,
         82,  82,  82,  82,  82,  56,  56,  56,  56,  56,  56,  41,  41,  41,
         41,  41,  88,  88,  50,  50,  50, 162,   8,   8, 157,  85,  85,  85,
         85,  85,  85,  85,  85,  85,  85,  85, 157, 146, 146, 146, 146, 146,
        146, 100, 100, 100, 100, 100, 100,  16,  16,  16,  16,  16,  16,  16,
         16,  16,  16,  16,  16,  16,  16,  16,  16,  16,  16,  16,  16,   5,
          5,   5, 11

10/17/2023, 04:28:52# Validation Loss: 0.2582 | Validation Accuracy: 0.9557

10/17/2023, 04:28:52# Find a better model!!


Training:   0%|          | 0/213400 [00:00<?, ?it/s]

10/17/2023, 04:31:23# labels of 5000: tensor([ 42, 152, 162,  48, 150,  11,  33,  34,  47,  97,  12, 142,  92, 121,
         60, 163,  75,  55,  87, 158, 158, 116,  18,   2,  38,  24,  54,  36,
        164,  49, 164, 158,   4,  47, 162,  14, 144, 116,  18,  74,  60, 112,
         42,   2,  54,  48,  48,  18, 125, 152,  92,  31, 121,  74, 151, 163,
        125, 142,  75,  31,  74,  53, 162, 109], device='cuda:2') torch.Size([64])
10/17/2023, 04:31:23# predicted of 5000: tensor([158,   2,  54, 109,  38, 121,  24, 124,  24, 109, 121,  57,  53,  14,
         57,  24,  12,  57, 119, 116, 158,  55,  49,  31, 142,  38,   1,  11,
         38,  48,  87, 121,  53, 124,  74,  14, 164, 116,  33,   9,  60,  54,
         18,  31,  54,  48,  30, 158,  92, 104,  55,  31,  74,  47, 124,  53,
         34, 104,  47,  47, 158,  48, 158,  76], device='cuda:2') torch.Size([64])
10/17/2023, 04:33:52# labels of 10000: tensor([ 30, 116, 109,   4, 151,  75, 162,  34, 162, 104,  14,  97,  83, 157,
         83, 1

10/17/2023, 04:53:54# labels of 50000: tensor([111,  87,  92,  53,  24,  54, 111, 142,  36,  74, 104,  74, 164,  16,
         16,  16,  16,  16,  16,  16,  16,  16,  16,  16,  16,  16,  16,  16,
         16,  16,  16,  16,  16,  34,  54,  49,   1,  54,  12, 116,  60,  48,
         54, 121, 121, 158, 121, 124, 164, 109, 144,  48, 112, 150,  49, 121,
        109,  11, 121,  42,  30,  92,  48,  74,  14,  75, 142, 142,   2, 150,
         60,   4, 111,  14,  76,  87,  38,  75,   4,  57,  81,  33, 150],
       device='cuda:2') torch.Size([83])
10/17/2023, 04:53:54# predicted of 50000: tensor([ 75, 143, 158,  44,  34,   1,  31, 124,  36,  60, 104, 150, 111,  16,
         16,  16,  16,  16,  16,  16,  16,  16,  16,  16,  16,  16,  16,  16,
         16,  16,  16,  16,  16,  34,  54, 163, 164,  42,  12, 163,  18,   1,
        163, 111, 121,  48,  60, 125,  55, 111,  44,  55,  11,  48,  49,  30,
          1,  47, 121,  54, 125, 125, 163, 162, 142,  54,  92,  55,  24, 157,
         42, 111, 111,  

10/17/2023, 05:19:02# labels of 95000: tensor([  4,   4, 150,  87,  30,  76,  18,  81,  92, 116,  81, 143, 109, 143,
         42, 112,  12,  54, 152, 116, 143,  14,  48,   4, 157, 125,   1, 151,
        157, 119,   1, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
        127, 127, 127, 127, 127, 127, 127, 127, 127, 157,  33, 143,   2, 157,
          2, 164,  31,  87,  83,  81,  18, 151,  92,  38,  97,  57, 151, 111,
         54, 112, 109,  48, 144,  42, 119,  36,  55,  36, 158,  11, 116],
       device='cuda:2') torch.Size([83])
10/17/2023, 05:19:02# predicted of 95000: tensor([  4,  76,  12,  87,   4,  81, 116,   2, 152,   1,  81,  48,  54, 143,
        152, 112, 124, 104, 152,  74, 157,  49, 163,  57, 157, 152,   1,   4,
        157, 111,  60, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
        127, 127, 127, 127, 127, 127, 127, 127, 127,  55,  47, 143,  75,  54,
         48,   4,  75, 116, 144, 112,  18,  75,  49, 116,  97,  81,  83, 157,
        162, 142,  12,  

10/17/2023, 05:42:07# labels of 140000: tensor([ 30, 109, 119,  18,  34,  36,  48,  42,  92, 104,  97,   1,   1,  31,
         24, 119,  49,  12, 116, 112,   2,  48,   4,  34, 112, 111, 109,  42,
          4, 143,   2, 104,  53,  44,  87, 162, 142,  44,  76,  42,  92,  36,
        162,  36,  74,  12,  12, 150,  97, 159, 159, 159, 159, 159, 159, 159,
        159, 159, 159,  47,  38,  92,  14, 150, 121,  53, 150,  33,  55,  75,
        162,  36,   4], device='cuda:2') torch.Size([73])
10/17/2023, 05:42:07# predicted of 140000: tensor([ 87, 121, 142, 151, 143,  87, 125,  42,  60,  83,  75, 158,  92, 104,
        152,  47,  49, 164, 142,  18,  42,  48, 121,  34, 152,  44, 125,  87,
         60, 143, 112,  31,  12,  34,  12,   1,  49, 164, 121,   1,   2,  34,
          9,  38, 150,  38,  12,  24,  76, 159, 159, 159, 159, 159, 159, 159,
        159, 159, 159,  47,  38, 163,  42, 111,  34,   9, 152,  42,  75,  44,
        158,  83,  87], device='cuda:2') torch.Size([73])
10/17/2023, 05:44:39#

10/17/2023, 06:07:33# labels of 185000: tensor([ 55,  75,  76,  38,  24, 150,  60,  57,   4,  54, 121,  57,  74, 121,
         47,  47, 121,  81, 163, 124, 152,  42, 104,  12,   2,   9,  11,  47,
        157,  24,   9,  38, 112, 111, 124, 151, 144,  57,  48, 119, 151, 111,
        151,   9,  60,  34,  42,  18,  92,  55,  74,   4,  81, 158,  60, 150,
        144, 142,  57, 162,  75, 163,   4,   9], device='cuda:2') torch.Size([64])
10/17/2023, 06:07:33# predicted of 185000: tensor([ 30,  34, 143, 121, 158,  42,  76,  24,  30,  34, 162,  18,  24, 119,
         42, 164, 121,  11,  11, 124,  53,  49,  75,  60,   2,  49,   2,  34,
          2, 124,  38, 162,  38,   2,  33,  54,  87,  57,  57,  87,  14, 142,
        144,   9,  60,  34, 150, 112,  55,  34,  48, 152,  38, 158,  60, 163,
        124, 116,  34,  34,  42, 125, 150, 104], device='cuda:2') torch.Size([64])
10/17/2023, 06:10:05# labels of 190000: tensor([ 55, 162,  12,  18,  74,  14,  53,  97,   2,  44,   1, 157, 152,  44,
        1

Validation:   0%|          | 0/258 [00:00<?, ?it/s]

10/17/2023, 06:22:08# labels of Validation: tensor([ 11,  92, 113,  ..., 122, 122, 122], device='cuda:2') torch.Size([2882])
10/17/2023, 06:22:08# predicted of Validation: tensor([109, 104, 158,  ..., 122, 122, 122], device='cuda:2') torch.Size([2882])
10/17/2023, 06:22:08# labels of 0: tensor([ 11,  92, 113,  ..., 122, 122, 122], device='cuda:2') torch.Size([2882])
10/17/2023, 06:22:08# predicted of 0: tensor([109, 104, 158,  ..., 122, 122, 122], device='cuda:2') torch.Size([2882])
10/17/2023, 06:22:17# Validation Loss: 0.2597 | Validation Accuracy: 0.9550



Training:   0%|          | 0/213400 [00:00<?, ?it/s]

10/17/2023, 06:24:52# labels of 5000: tensor([ 34,  75, 150,  49, 152,  49, 116,   1,  14,  76,  87, 124,   4,  55,
        158,  11,  38, 162,  74,  36,   9,  75,  54, 124, 124, 119, 112, 111,
         53,   2,  57, 111,  97,  75, 104, 125,  30,  18, 143, 150,  92, 151,
        124, 162, 162,  24, 151,  42,  74,  33, 151,  83,  30, 109, 162, 119,
        109,  42, 158,  81,  83, 157,  54,  83], device='cuda:2') torch.Size([64])
10/17/2023, 06:24:52# predicted of 5000: tensor([ 34, 125, 142,  18,   4,  75,   2,  31,  36, 142,  36,  18,  57,  49,
        158, 109, 124, 150,  74,   1, 162,   9, 125, 116,  48,  81,  11,   9,
         12,  42, 162, 111,  42, 125, 158,  54, 104,  60, 157,  14,  92,  31,
         81,  97,  18,  57,  33, 164,  14,  14,  83,  76, 163, 109,  18, 152,
        111,  42, 158, 158,  83,  30,  49,  31], device='cuda:2') torch.Size([64])
10/17/2023, 06:27:31# labels of 10000: tensor([144,  42,  33,  30,  18, 111, 150, 142,  60, 150, 124, 162, 164, 152,
         75,  

10/17/2023, 06:48:25# labels of 50000: tensor([ 83, 144,  31, 111, 119,  74,   2,  87,  38,  34,  97, 121,  55,  74,
         60,  87,   9,  12,  53,  87,  75,  38,  11,  48, 152,   2, 124, 111,
        143,   2, 144,   9,  87, 125,  31, 158,   2,  60, 164, 119,  38,  75,
        119,  44,  53, 157,  30, 104,  31, 162,  33,  48, 162,  48, 116, 121,
         14,  24,  55,  30, 158,  24,   4, 124], device='cuda:2') torch.Size([64])
10/17/2023, 06:48:25# predicted of 50000: tensor([ 53,  87, 116,  74,  18, 104,   2,  87,   4,   9,  54,  76, 111, 158,
         60,  87,   1,  12, 164,   1, 116, 121,  11,  48, 152,  87, 124,  55,
         55,  83, 111,   9, 112,  36, 116,  48, 124, 143,  75, 151,  38,  55,
         47,  74,  92, 152,  83, 116, 119,  54,  24,  49,  87,  34,  33, 157,
         74,  24, 109,  97, 157, 121,  30, 124], device='cuda:2') torch.Size([64])
10/17/2023, 06:51:04# labels of 55000: tensor([158,  54, 157, 111, 112,  81, 119,  14,  55,  83,  12,  42,  48, 152,
        164,

10/17/2023, 07:13:33# labels of 95000: tensor([ 14, 119, 151,  33,  18,  31,  44,  54, 116,  47,   9,  24, 119, 150,
         14,  47,  11, 112,   9, 109,  48, 111, 151,  97, 109,  24,  12,  76,
         14,  11,  81, 119, 109, 142, 116, 124,  75, 116,  83, 152,  87, 142,
        152,  28,  28,  28,  28,  28,  65,  28,  28,  28,  28,  28,  28,  28,
         54, 142,   4,  30,   4,  53,  48,  87, 158,  44,  54,   2,  33, 151,
        150,  92,  44,  74,  97, 104], device='cuda:2') torch.Size([76])
10/17/2023, 07:13:33# predicted of 95000: tensor([ 49, 119, 143,  83,  76,  55, 163, 163, 116,  81,  36,  24, 119, 150,
         49, 162, 162, 112, 116,  81, 109,  75, 162, 144, 104, 162,  34, 162,
        144,  47,  38,  54, 144, 142,  87, 111,  49,   9,  74, 152, 144, 152,
         53,  28,  28,  28,  28,  28,  65,  28,  28,  28,  28,  28,  28,  28,
        109,   2,  54,  30,   4,  87,  74,  74, 158,  81,  75,   2,  81,  30,
        150,  75,  44, 150,  11, 104], device='cuda:2') torch.Size

10/17/2023, 07:38:39# labels of 140000: tensor([157, 111,   2,  14,  92, 112,  54,   1,  92,   9,   2,  81,  34, 150,
         75,  48,  38,  97,  36,  76, 111, 112,   4,  97,  49, 144,  76, 158,
         33, 116,  30, 104,  11, 163,  54, 124,  47,  31,  60,   4,  44, 124,
          9, 125,  55,  65,  65,  65,  65,  65,  65,  65,  65,  65,  65,  65,
         65,  65,  65,  65,  65,  65,  65,  65,  65,  65,  65,  65,  65,  65,
         65,  65,  65,  65,  65,  65,  65,  65,  65,  65,  65,  65,  65,  65,
         65,  65,  65,  65,  65,  65,  65,  65,  65,  65,  65,  65,  65, 141,
        141, 141, 141, 141, 141, 141, 141, 141, 141,  76,  44,  47, 151,  81,
         30, 104,  18, 116, 142,  38,   4, 162,  11,  65,  65,  65,  65,  65,
         65,  65,  65,  65,  65,  65,  65,  65,  65, 141, 141, 141, 141, 141,
        141, 141, 141, 141, 141,  31, 151, 116], device='cuda:2') torch.Size([148])
10/17/2023, 07:38:39# predicted of 140000: tensor([ 38, 111,  12,  81, 124,  81,  54,  97,  31, 

10/17/2023, 08:01:59# labels of 185000: tensor([112, 116,  30,  31, 150, 152,   1,  30, 121,  54, 125, 143,  48,  42,
         38,   2,  47, 151,  47, 112, 144, 143, 143, 158,  60, 164,  81,   2,
        142, 119, 112,  48,  12,  76,   2,  11,  36, 142, 121,   1,  44,   2,
         81,  55,  47, 157,  75,  30, 150,  48,  83, 164,  92, 112, 157,  76,
         74,  97,  11, 124, 157,  30, 116,  49], device='cuda:2') torch.Size([64])
10/17/2023, 08:01:59# predicted of 185000: tensor([163, 116,  30, 143, 121,  53,   1,  30, 142, 109, 164,   4,  48,  42,
         38,  31,  60, 150,  34, 104,  47,  92,  47, 125,  44, 125,  33, 163,
         34, 109,  75, 111,  30, 111,  48,  74, 163,  55, 157,   1, 109, 119,
        111,  55,  47, 143,  47,  33,  75, 151,  33,  30, 164,  18, 163, 121,
        158,  14,  12, 124, 157, 142,  54,  81], device='cuda:2') torch.Size([64])
10/17/2023, 08:04:32# labels of 190000: tensor([ 36,  74,  49,   4,  53,  55,  11,  55,   2,  31,  55, 143,  36,  57,
        1

Validation:   0%|          | 0/258 [00:00<?, ?it/s]

10/17/2023, 08:16:42# labels of Validation: tensor([112, 145, 145, 145, 145, 145, 145,  34,  53,   7,   7,   7,   7,   7,
          7,   7,   7,   7,   7,   7,   7,   7,   7,   7,   7,   7,   7,   7,
          7,   7,   7,   7,   7,   7,   7,   7,   7,   7,   7,   7,   7,   7,
          7,   7,   7,   7,   7,   7,   7,   7,   7,   7,   7,   7,   7,   7,
          7,   7,   7,   7,   7,   7,   7,   7,   7,   7,   7,   7,   7,   7,
          7,   7,   7,   7,   7,   7,  94,  94, 130, 130, 130, 130, 130,  92,
        129, 129, 129, 129, 129, 129, 129, 104,  20,  20,   3,   3,   3,   3,
          3,   3,   3,   3,   3,   3,   3,   3,   3,   3,   3,  64,  64, 104,
          1,  89,  89,  29,  29,  29,  29,  29,  29,  29,  29,  29,  29,  15,
         15,  15, 136, 136, 136, 136, 136, 136, 136, 124,  59,  59,  59, 127,
        127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
        127, 127, 127, 127, 127, 164,  32,  32,  32,  32,  32,  32,  32,  32,
         32,  32,  3

10/17/2023, 08:16:50# Validation Loss: 0.2555 | Validation Accuracy: 0.9571

10/17/2023, 08:16:50# Find a better model!!


Training:   0%|          | 0/213400 [00:00<?, ?it/s]

10/17/2023, 08:19:41# labels of 5000: tensor([ 75, 116,   1, 119,  55,  97,  87,  74,   1, 157,  60, 163,  53, 116,
         47, 163,  33,   1, 104, 150, 142,  54, 162,  81,  97,  36,  14, 143,
        143, 142, 142,  54,  74, 124, 121, 112,  33,  30,   4, 163,  38,  54,
        158,  42,  38, 142, 112, 143,  83,  49, 121,  30,  36,   1,  57,  60,
        109,  60, 143,  36, 158,  18,  18, 142], device='cuda:2') torch.Size([64])
10/17/2023, 08:19:41# predicted of 5000: tensor([151,   2,  75, 151,  55,  49,  42,  12,  47, 152,  76, 121,  53, 116,
         53,  54,  33,   1,  42,  48,   1, 144, 104,  49, 151,  97, 144, 143,
        119,  48,  31, 158,  38,  92,  38,  14,  54,  60,   4,  54,  38,  54,
        158,  42,  38,  48, 112, 143,  83,  49, 121,  24,  36,   1,  87,  60,
         75,  24, 143,  31, 158,  83,  14, 142], device='cuda:2') torch.Size([64])
10/17/2023, 08:22:28# labels of 10000: tensor([143,  76, 163, 116,  49, 151, 163,  83,  74,  83,  18, 116, 109,  87,
        152, 1

10/17/2023, 08:48:07# labels of 55000: tensor([ 33, 157,  34, 143,  57, 121,  44, 158,  24,   2, 162, 142, 109, 109,
        142,  14, 143, 143,   2,  38, 125,  74,  44, 119, 119,   9, 119, 121,
         18,  34, 144,  54,  11, 144, 164,  83,  48, 164,  76,  18,  24,  75,
        124,  87,  55,  49,  42,  53,  34,  92,  31, 151, 158, 157,  75,  57,
        144,  83,   1,  87,  24,  54, 151,  74], device='cuda:2') torch.Size([64])
10/17/2023, 08:48:07# predicted of 55000: tensor([ 33,  75,  34,  34, 119, 121,   4, 158, 144, 116,  38,  11, 125, 109,
        142,  14, 143, 143,  11,  38, 158, 162,  81,  36,   1, 121,  87,  92,
         49,  34,  49,  33,  11,  81, 143,  75, 152, 104,  76, 158, 158,  75,
         14,  87,  55,  49, 116, 121,  92, 151,  36, 152, 164,  47,  34,  57,
        144,  83,  75,  75, 144,  47,  42,  53], device='cuda:2') torch.Size([64])
10/17/2023, 08:50:57# labels of 60000: tensor([142,  87,  92,  31, 100, 100, 100, 100, 100, 100,  31,  12, 158, 144,
         98,

10/17/2023, 09:12:36# labels of 100000: tensor([ 55,   2, 158,  81,  12,  42, 157, 158,  42,  34, 158,  76, 112, 124,
         49, 152,  38,  36,   1, 121, 150,  60,  60,  44,  31,   4,  53, 144,
        144, 121,  47,  44,  83,  36, 157,   4,   4, 111, 104, 162,  74, 151,
         87,  47,   4,  14,  31,  53,  57,  12, 116,  57,  49, 144, 163,  33,
         34, 152, 121,  30,   9,  54,  30,  49], device='cuda:2') torch.Size([64])
10/17/2023, 09:12:36# predicted of 100000: tensor([ 92, 143,  18,  44,  12,  42, 157, 158,  92,   1,  75, 124, 112,  12,
         75,  53, 164,  30,  36,  74,  49,  75,  42,  74,  42, 124,   9, 144,
        143,  76, 111,  44,  57,  31,  47,  53,  12, 111,  47, 152,   2, 151,
        119,  74, 143, 164, 124,  11,  57, 152,  34, 144,  55, 119, 158,  33,
         60, 152, 150,  38, 116, 162, 158,  49], device='cuda:2') torch.Size([64])
10/17/2023, 09:15:30# labels of 105000: tensor([150,  36,  49, 143, 124,  30, 119, 157, 124, 144,   4,  74,  42,  34,
         

10/17/2023, 09:40:43# labels of 150000: tensor([ 42,  57, 142, 164,  57, 116,  97,  54,   2, 150, 125,  24,  47, 143,
         11, 150,   2, 150, 104,   4, 125,  31,   4, 124,  49,  24, 104,  24,
         44, 109,  76, 142,  53, 109,  92,  14,  33,   4, 162,  87,  18,  14,
         30, 157,   9,   1,  76, 162, 152,  60,  44,  48,  44,   4,  44, 142,
         12,  92,  49, 143, 112,  30, 125, 116], device='cuda:2') torch.Size([64])
10/17/2023, 09:40:43# predicted of 150000: tensor([162,  57,  53,   1,  57, 158,  42,  60,  36, 150,  74,  11,  47, 143,
         11,  48,  33, 150, 104,  14,  47, 119,   4, 124,  74,  24,  49,  55,
        158,  34,  76, 142,  30,  49,  30,  14,  49,  30, 162,  87,  18,  14,
        158,  97,  38,  74,  76,  18,  60,  60,   4,  47, 121,   4,   4, 142,
         60,  18,  81, 143, 164,  30,   4, 157], device='cuda:2') torch.Size([64])
10/17/2023, 09:43:16# labels of 155000: tensor([  9,  33,  60,  57, 112,  87,  81,  87, 125,   1,  30, 144,  14,  76,
        1

10/17/2023, 10:01:30# labels of 190000: tensor([104,  92, 162, 109,  55,  12,  44, 111, 121,  49,  38, 144, 125, 124,
          1,  47, 142,  48,  60,  30,  47,  75, 162,  81,  14, 109,  60, 158,
         97, 152, 150, 142,  31,  81,  75,  49, 142,  36, 143, 162,  14,  38,
          4, 119,  38, 163,  14, 116,  47,  24,  55, 142,  57, 158,   9, 109,
         60,  55,  49,  38, 111,  18,  47, 164], device='cuda:2') torch.Size([64])
10/17/2023, 10:01:30# predicted of 190000: tensor([112,  92,  87,  33,  47,  92,  44, 151,  74,  49,  38,  74, 125,  97,
         55, 116, 142,  49, 158,   4,  83, 116,   4,  74,  38,  54,  60,  60,
         34, 164,  34,  97,  48,  47, 116,  18, 142, 151, 143, 162,  11,  38,
        112, 119, 157,  24, 112,  34, 112, 150,  55,  36,  57, 158,   1, 150,
         60,  47, 121, 162, 152,  54, 150,  24], device='cuda:2') torch.Size([64])
10/17/2023, 10:04:06# labels of 195000: tensor([ 38,  83, 112,  38,  49,  60,  55,  83,  38,  31,  42, 124,  76,   1,
         

Validation:   0%|          | 0/258 [00:00<?, ?it/s]

10/17/2023, 10:14:16# labels of Validation: tensor([ 31,  31, 126,  ..., 101,  83, 119], device='cuda:2') torch.Size([2026])
10/17/2023, 10:14:16# predicted of Validation: tensor([ 31,  87, 126,  ..., 101,  31,   9], device='cuda:2') torch.Size([2026])
10/17/2023, 10:14:16# labels of 0: tensor([ 31,  31, 126,  ..., 101,  83, 119], device='cuda:2') torch.Size([2026])
10/17/2023, 10:14:16# predicted of 0: tensor([ 31,  87, 126,  ..., 101,  31,   9], device='cuda:2') torch.Size([2026])
10/17/2023, 10:14:24# Validation Loss: 0.2535 | Validation Accuracy: 0.9569

10/17/2023, 10:14:24# Find a better model!!


Training:   0%|          | 0/213400 [00:00<?, ?it/s]

10/17/2023, 10:17:06# labels of 5000: tensor([ 47, 104,  44, 109,  60, 104,  87, 157,  47,  92,  11, 144,  54,  47,
        152,  14, 116,  92, 144,  42, 144,  38, 121,  44,  49,  42, 151, 143,
         49, 150,  34, 119,  83, 163,  14, 124,   9,  57,   4,  83,  42,  83,
         57,   2, 158, 125,  83,  49,  34,  11, 119, 143,  92, 157,   1, 151,
         54,   4,  31, 158,  18,  49,  11,  92], device='cuda:2') torch.Size([64])
10/17/2023, 10:17:06# predicted of 5000: tensor([ 48, 164, 125, 124, 112, 104,  53, 158,  47,   1,   1,   1,  54,  75,
        152,   2, 116, 125,  54,  31,   1, 121, 121,  47, 152, 151,   1,  11,
         81, 150,  34, 112,   1,   1,  14,  12, 111,  12, 158,  83, 111, 112,
         42, 151,  18, 111, 109,  55,  38, 163,   2,  92, 164,  81, 142, 116,
        163,  24,  92,  44, 142, 109,  83,  38], device='cuda:2') torch.Size([64])
10/17/2023, 10:19:40# labels of 10000: tensor([116, 121,   4,  44, 144, 150,  11, 112, 116,  31,  11, 164,  92,  38,
        142, 1

10/17/2023, 10:45:01# labels of 50000: tensor([125,  76,  92,  44, 158,  87, 151, 111, 152,  12, 109, 152, 151,  11,
         30,  47,  30, 142,  44, 163,  18,  75, 162, 111,  49,  49, 162,  42,
         44,  83,  24,  14,  75, 164,  54,  92, 152, 163, 157,  87,  49, 109,
         48, 144,  44, 152,  87, 121,   2,  42,  33, 116,   1, 109, 112, 137,
        137, 137, 137, 137, 163,  33,  81,  74, 151,  97,  31, 119],
       device='cuda:2') torch.Size([68])
10/17/2023, 10:45:01# predicted of 50000: tensor([125,   4,  47,   2, 158,  87,   2, 111, 152,  83, 151, 152, 143, 151,
         30,  47, 125,  18,  81,  92,  48,  75,  76,  48,  49,  55,  18,  42,
         33,  74, 116,  14,  34,  74,  12,  47, 152, 163,  31,  75,  44,  14,
         48, 144,  44, 152,  87, 158,   2,  42,  48,  87,  55, 150,  57, 137,
        137, 137, 137, 137,   1, 164,  49,  47, 162,  54,  42,   4],
       device='cuda:2') torch.Size([68])
10/17/2023, 10:48:09# labels of 55000: tensor([ 34,  87,  11, 124, 112, 164

10/17/2023, 11:13:38# labels of 100000: tensor([ 18, 164, 121, 121, 121, 112,  18,  48, 124, 116, 151, 104,   2, 116,
         75,  33,  30,  57,  36,  11,  16,  16,  16,  16,  16,  16,  16,  16,
         16,  16,  16,  16,  16,  16,  16,  16,  16,  16,  16,  16,  49,  57,
         33,  47,  24,  18,  92, 162, 164, 111,  92, 124,  36, 158, 104,  36,
         75,  60, 143, 151, 111,  47,  97, 143,  47,   4, 119,  31, 116,  53,
         48,  31, 125,  54, 125,  42,   9,  74,   9,  97,  60,  24, 142],
       device='cuda:2') torch.Size([83])
10/17/2023, 11:13:38# predicted of 100000: tensor([ 12,   9, 121, 119,  34, 112, 116,  24, 124, 116, 116,   2,   2, 125,
        157, 109,  30,  54, 158,  11,  16,  16,  16,  16,  16,  16,  16,  16,
         16,  16,  16,  16,  16,  16,  16,  16,  16,  16,  16,  16, 112, 162,
          2, 163,  12,  18,  92,   1, 109, 143,  38, 124, 116,  83, 125,  36,
        162, 116, 144,  87,  11,  49,  97, 143,  47,   4,  34,  11,  31,  53,
        164, 125, 125,

10/17/2023, 11:40:51# labels of 145000: tensor([ 48, 144, 151, 125, 144,  36,  76, 112,  12,  47,  33, 142, 157,   1,
        152, 104,  42, 125,  57,  87,  53,  55,  36, 144,  11, 112,  92,  48,
         53, 163,  60, 112, 109, 142,  53,  83,  49,  81,  14,   9, 162,  60,
         83,  42,  24, 152, 112, 109,  54,  33,  92, 142,  76, 152,  11,  38,
        164,  48,   2, 143,  92,   9, 152,  53], device='cuda:2') torch.Size([64])
10/17/2023, 11:40:51# predicted of 145000: tensor([ 92,  47,  48, 164,  55,   4,  76, 112, 143, 121,  55, 150,  30, 157,
        152, 104,  42, 125,  24,  97,  53,  55,  36, 143, 112,  34, 144,  48,
          1, 150,  55,  55, 109,  42,   9,  57, 163,  57,  14, 150, 162,  49,
         83,  14,  55, 152,  57, 152,  54,  33,  75, 164,  76, 163,  11, 104,
        164,  83,  55,   1,  92,   1,  38,  54], device='cuda:2') torch.Size([64])
10/17/2023, 11:43:42# labels of 150000: tensor([  4,  54, 158,   4,  75, 104,  47,  87,  38,  18, 124,  57, 111,  49,
         

10/17/2023, 12:03:17# labels of 185000: tensor([164, 164,  38, 142, 119,  18, 144,  55, 143,  55, 164, 157, 157,   2,
         55,  76,  81,  30,  53,  87, 162,  76,  76,  92,  31, 154, 154, 154,
        154, 154, 154, 154, 154, 154, 154, 154, 154, 154, 154, 154, 154, 154,
        154, 154, 154, 154, 154, 154, 154, 154, 154, 154, 154, 154, 154, 154,
        154, 154, 154, 154, 154, 154, 154, 154, 154, 154, 154,  65,  65,  65,
         65,  65,  65,  65,  65,  65,  65,  65,  65,  65,  65,  65,  65,  65,
         65,  65,  65,  65,  65,  65,  65,  65,  65,  65,  65,  65,  65,  65,
         65,  65,  65,  65,  65,  65,  65,  65,  65,  65,  65,  65,  65,  65,
         65,  65,  65,  65,  65,  65,  65,  65,  65,  65,  65,  65,  65,  65,
         65,  65,  65,  65,  65,  65,  65,  65,  65,  65,  65,  65,  65,  65,
         65,  65,  65,  65,  65,  65,  65, 154, 154, 154, 154, 154, 154, 154,
        154, 154, 154, 154, 154, 154, 154, 154, 104,   9,  75, 157,  38,  75,
        104,  33,  11,  

Validation:   0%|          | 0/258 [00:00<?, ?it/s]

10/17/2023, 12:19:09# labels of Validation: tensor([ 69,  69,  69,  69,  69,  69,  43,  43,  69,  69,  69,  69,  69,  69,
        108, 108, 108, 108, 108, 108, 108, 108,  86,  86, 105, 105, 105, 105,
        105, 105, 105, 105, 105, 105, 105, 105, 105, 101, 101, 101, 101, 101,
        101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101,  76,  92,  65,
         65,  65,  65,  65,  65, 135, 135,  30, 127, 127, 127, 127, 127, 127,
        127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
         50,  50,  50,  82,  82,  82,  82,  82,  82,  82,  82,  82,  82,  82,
         82,  82,  82,  82,  82,  82,  82,  65,  82,  82,  82,  82,  82,  82,
         82,  82,  82, 144,  89,  89, 139, 139, 139, 139, 139, 164, 150,  32,
         32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,
         86,  86, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140, 140,
        140,   0,   0,   0,   0,   0,   0,  65,  65,  65,  65,  65,  65,  70,
         70,  70,  7

10/17/2023, 12:19:18# Validation Loss: 0.2654 | Validation Accuracy: 0.9553



Training:   0%|          | 0/213400 [00:00<?, ?it/s]

10/17/2023, 12:22:07# labels of 5000: tensor([150,  30,  30,  33, 152, 157,   4,  76,  44, 121, 112,  92, 162, 143,
         11, 143,   1,  36,  60,  18,  11,  31,  83,  76,  42, 116, 152,  60,
        116,  31, 121, 116,  76, 121,   9,  47, 150, 121,  30,  24,   9,  48,
         34, 143, 121, 144,  18, 164,   9,  44,  11, 163,  74,  30,  54, 152,
         87,  48,  97, 150, 143, 112,  75, 164], device='cuda:2') torch.Size([64])
10/17/2023, 12:22:07# predicted of 5000: tensor([150,  33,  44, 121,  34, 163,  38, 150, 163,   9, 150,  92,  44, 152,
         11,  44,   1,  30,  42, 151,  49,  31,  92,  74,  42, 163,  83,  97,
        116, 125, 121, 116,  57, 121, 164,  24, 163, 121, 143,   1,  76,  49,
         47,   9,  24, 144,  18, 164, 124,  47, 163, 163, 163, 111,  54,  18,
         49,   4,  97, 112, 143, 109,  54, 163], device='cuda:2') torch.Size([64])
10/17/2023, 12:24:54# labels of 10000: tensor([ 44, 163, 112,  60,   9,   4, 166, 166, 166, 166, 166, 166,  74,  97,
         18,  

10/17/2023, 12:49:19# labels of 50000: tensor([124,  53,  36,  76,   1,  92, 117, 117, 117, 151, 151,  97, 125,  12,
         33,  54,   1, 144,  44,  81,  34,  97, 119,  34,  12,  74,  36, 111,
         36, 162,  74,  44,  53,  24, 158,   9, 119, 163, 164,  38,  57, 144,
         24,  60,  97, 111,  24,  92,  44,  55,  97,  81,  92,  49,  30,  47,
         49,  31,  57,  42,  36, 143,  14, 143, 127, 127, 127, 127, 127, 127,
        127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
          2], device='cuda:2') torch.Size([85])
10/17/2023, 12:49:19# predicted of 50000: tensor([ 55, 125, 142,  74,  18,  48, 117, 117, 117, 151, 111,  33, 125,  12,
         83,  55, 164,  47, 112,  53,  12,   4,  48, 150,  42,  14,  48,   9,
         49,  18,  74,  97,  92, 121, 109,  47,  75,   1,  42,  38,  57,  11,
         53,  34,  60,  87, 158, 143,  44,  55,  42,  49,  87,  36,  75,  97,
         49,  11,  92,  42, 158,  38,  14,  11, 127, 127, 127, 127, 127, 127,
        127, 

10/17/2023, 13:14:17# labels of 95000: tensor([ 33,  75, 162,  49,  14, 144, 164,  34,  47,  87,  74, 142,  36, 109,
         34,  92,  11,  75, 125,  42,   2,  87,  48,  92,  12,  33,  14, 121,
        124, 164, 104,  42, 119,   4,  11, 157,  74, 143,  34,  49,  30,  48,
         55,  83,  81,  34,  97,   4,  44, 112, 112,  87,  81, 163,  11,  81,
        144,  44,  97, 125,  97,   4, 162,  76], device='cuda:2') torch.Size([64])
10/17/2023, 13:14:17# predicted of 95000: tensor([157, 158,  75,  11,  38,  54, 143,  11,  47,  83,   4,  55, 144,  12,
         34,  92, 158, 116,  33,  54,  34,  34,  54,  14, 121,   2,  14, 121,
         42, 164, 104,  42,  75,   4, 111, 144,   9, 143, 121, 112, 142,  75,
         14,  83,  30, 162,  97, 121,  83, 112,  83, 151,  75,  76,  30,  55,
        144,  44,  76, 125,  54,   4, 144,  76], device='cuda:2') torch.Size([64])
10/17/2023, 13:17:05# labels of 100000: tensor([ 74, 119,  83, 142,  42,  18, 121, 116,  74, 109, 150,  14, 104, 157,
         92

10/17/2023, 13:44:00# labels of 145000: tensor([  2, 158,  11,  89,  89, 158,  34,  55,  55,  11,  55,  57,  55, 143,
         14,  12,  38, 109,  11, 162,  14,  48, 109,  81, 162, 104, 144, 163,
         11,  81,  60,  74,  18,  48, 142, 109,  30,  47,  57,  30,  75,  53,
         83,  34,  47,   4,  57, 143,  76, 157,  30,  47,  30,  76, 144,  38,
        112,   1,   1, 125,  47,  92,  75, 109,  92], device='cuda:2') torch.Size([65])
10/17/2023, 13:44:00# predicted of 145000: tensor([163,  44, 164,  89,  89, 158,  92,  11,   1,  83, 164,  30,  55, 143,
        144,  54,  38, 157, 144,  57, 158,  48,  81,  60,  74,  47, 144, 163,
        142,  38,  44,  74, 116,  48, 142,  30,  30,  97,  57,   2,  75,  53,
        119,  34,  47,  44, 111,  92,   1,   1, 164,   1, 164, 162, 119, 111,
        164, 143,  48,  31, 112,  38,  14,  31,   9], device='cuda:2') torch.Size([65])
10/17/2023, 13:46:46# labels of 150000: tensor([ 36, 152,   1,  14,  75,   4,  60,  54,  44,  60, 143, 150,  24, 164,

10/17/2023, 14:11:46# labels of 195000: tensor([121,  31,  30, 162,  81,  36, 162,  47,  75,  12, 152,   1, 121,  18,
         33, 150, 144, 144, 163,  36, 121, 164, 158,  33,  81,  12,  48, 125,
        104,  30,  81, 112,  31,  60, 164,   9, 121,  42,  12, 157,  54,  97,
         76,  53, 124, 104,  44,  54, 142, 151,  54, 142, 158,  16,  16,  16,
         16,  16,  16,  16,  16,  16,  16,  16,  16,  16,  16,  16,  16,  16,
         16,  16,  16,  11, 162,   9,  76, 109, 163,  12,  14,  55,  36],
       device='cuda:2') torch.Size([83])
10/17/2023, 14:11:46# predicted of 195000: tensor([ 74,   4,  30, 143, 124, 143, 162, 109,  12, 163,   4,   1,  76,  97,
         33, 150,  55, 144, 163, 125, 157,  92,  31,  31, 111,  14,  48, 144,
         24,  30,  49,  30,  31,  75, 164, 158, 163,  42,  54,  49,   1,  31,
         14,  57, 124,   4,  57, 121,  60, 157,  33,  49,  57,  16,  16,  16,
         16,  16,  16,  16,  16,  16,  16,  16,  16,  16,  16,  16,  16,  16,
         16,  16,  16,

Validation:   0%|          | 0/258 [00:00<?, ?it/s]

10/17/2023, 14:21:58# labels of Validation: tensor([ 54, 143, 155,  65,  65,  65,  65,  65,  65, 155, 155, 155,  65,  65,
         65,  65,  65,  65,  65,  65,  65,  65,  65,  65,  65,  65,  65,  65,
         65,  65,  65,  65,  65,  65,  65,  65,  65,  65,  65,  65,  65,  65,
         65,  65,  65,  65,  65,  65,  65,  65,  65,  65,  65,  65,  65,  65,
        141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 107, 107, 107, 107,
        107, 107, 107, 107,   3,   3,   3,   3,   3,   3,   3,   3,   3,   3,
          3,   3,   3,   3,   3,  75, 165, 165, 165, 165, 165, 165, 165, 165,
        165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165,
        165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165,
        165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165,
        165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165,
        165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165, 165,
        165, 165, 16

10/17/2023, 14:22:07# Validation Loss: 0.3007 | Validation Accuracy: 0.9556



Training:   0%|          | 0/213400 [00:00<?, ?it/s]

10/17/2023, 14:24:55# labels of 5000: tensor([ 48,  75,  42,  83,  55,  14, 125, 143, 112,  18,   1, 121, 158,  42,
         54,  54, 158, 124,   9, 143,   4,   1,  57,  31,  24, 143,  33,  30,
        151,  54, 116,  31,  36,  44,  47, 142,  30, 142, 143, 158,  30,  36,
         14,   4, 152, 112,  34, 124,   2, 151, 158,  55, 119, 125, 150,  75,
         81, 112,  49,  18,   9,  49,  53,  57], device='cuda:2') torch.Size([64])
10/17/2023, 14:24:55# predicted of 5000: tensor([ 48, 162, 119,   9,  55,  12, 125,  55,  36,  76, 150,  38,  36, 164,
          9, 111, 158, 158,   9,  11,  36,  47,  57, 119,  18,  83,  76,  30,
         74, 109, 112,  11,  36,  97,  76, 142,  33,  47, 143, 158, 163,  36,
         49,  48, 112, 144,  34,  18,  54,  34,  54,  55, 109,  60,  24,  53,
         44, 150,  49,  38, 116,   9,  54,   9], device='cuda:2') torch.Size([64])
10/17/2023, 14:27:41# labels of 10000: tensor([ 49,  11,  54, 125,  97, 116, 116, 158,  38, 144, 144, 162,  11, 112,
        157, 1

10/17/2023, 14:52:36# labels of 55000: tensor([111,  60,  44, 119,  48,  24,  34, 122, 122, 122, 122, 122, 122, 122,
        111, 111, 121, 143,  57,  74, 121, 164, 104, 116, 143, 119,  47,  53,
         60,  57,  60, 111,  24,  55, 124,   2, 162, 158, 144,  47,   9,  14,
         31,  57, 116,  12,  83,  11, 164, 152, 158, 162, 121,  87,  11,  55,
         31, 151, 151, 163,  34,  34,  11,  83,  87,  34, 158, 162,  12,   4],
       device='cuda:2') torch.Size([70])
10/17/2023, 14:52:36# predicted of 55000: tensor([144,  60,  44, 119, 111,  24, 124, 122, 122, 122, 122, 122, 122, 122,
         60, 111, 121, 143,  57, 119,  14, 158,   4,  97,  53,  11,  14,  18,
         34,  49,  60, 111, 121,   2, 158, 163, 162, 109, 162,  47, 158,  53,
         31,  57,   2,  30, 144,  18,  12, 152,  38, 112,  30,  60,   4,  11,
        162, 151, 111, 163,  34,  24,  87, 121,  87, 164,  49, 162, 125,  83],
       device='cuda:2') torch.Size([70])
10/17/2023, 14:55:26# labels of 60000: tensor([ 42,  83

10/17/2023, 15:16:47# labels of 95000: tensor([ 57,  33,  81,  74, 162, 157,   4,  57, 162,  11, 142,  11, 104,  54,
         30, 125,  57,  24,   4,  57, 162, 119,  75, 121, 144, 150,   4,  11,
        163,  74, 124,  74,  97,  74,  97,  14, 109,  42, 152, 144,  76,  60,
         54, 150, 112, 109,  97, 143, 112,  12, 144, 157,  60, 116,  54,  87,
        158, 163,  12, 124, 112,  60,  12,  48], device='cuda:2') torch.Size([64])
10/17/2023, 15:16:47# predicted of 95000: tensor([ 57,  31, 125, 121, 143, 163,  44, 125,  87,  11,  74,  87, 151,  97,
          9,  42,  34,  24,  75,  57,   1,  81,  74,  36, 109,  49,  81, 163,
        163, 104,  31,  74, 109,  74, 104, 125,  31,  48,   1,  81, 142, 116,
         34,   1, 163, 109, 124,  14, 125,  57, 121, 157,  60, 116,  33,  42,
        158, 163,  12,  30,  92, 152,  12,   1], device='cuda:2') torch.Size([64])
10/17/2023, 15:19:34# labels of 100000: tensor([ 18,  48,  55, 109, 152,  54,  49,  75, 111, 111,  33,  74,  74, 143,
          2

10/17/2023, 15:44:27# labels of 145000: tensor([144,  36, 111, 124,   2,  30, 150, 111,  24, 111,  57,  11, 164,  60,
         44, 109,   4,  81, 151,   4,  18,  83,  87,  60, 116,  83, 112,  75,
         81,  92,  53,  57, 125, 116,  60,  24,  44, 144,   1,  24,  53, 109,
         74,  75,  34,  74, 144, 124,   2, 111, 142, 109,  97,  11,  54,   2,
        119, 144,  92,  18,  92, 125,  31, 116], device='cuda:2') torch.Size([64])
10/17/2023, 15:44:27# predicted of 145000: tensor([ 57, 116, 111, 124,  75,  30,  12,  83,   2,  81,  55,  53, 158,  55,
        157, 109,  18,  12,  44, 125,  18, 157,  87,  33, 116,  12,   4,  76,
        121,  92,  54, 109, 125, 116,  31,  24,  31,  55, 116,  24,  36,  53,
          1,  55,  36,  57, 152,  24,   4,   9, 150, 163,  97,  11, 109,   2,
         36,  74,  30,  49,  11, 125, 152, 116], device='cuda:2') torch.Size([64])
10/17/2023, 15:47:12# labels of 150000: tensor([ 47, 109,  83,  49,  12,  24, 164,  60, 104,  49,  18, 116, 116, 142,
         

10/17/2023, 16:13:42# labels of 195000: tensor([151, 111,  44,  33, 144,  18,  92, 116, 151,  24, 158,   9, 151,  49,
        119,  57, 124, 109,  92,  87,  33,  53,  83, 163,  81,  47,  60,  92,
         92,  49,  31,  57, 121, 152, 142, 163, 162,  75,  48, 109,  55,  24,
        142,  60, 109,   1, 163,   2, 104,  60,  38,  34,  14, 163, 125,  36,
         74, 157, 111, 143,  92, 158,  60,  47], device='cuda:2') torch.Size([64])
10/17/2023, 16:13:42# predicted of 195000: tensor([151,  83, 109, 143,  53,  18, 104, 119, 121,  24, 158,  55,  54, 116,
         42, 111,  53,   2, 164, 142,  11, 158,  81, 163, 121, 157, 104, 104,
        116,  53, 158,  60, 121, 152,  33, 163, 109,  75, 143,  34,  55,  24,
        142,  18, 109,   1, 163, 142, 142,  42, 162,  81,  14,  83,   1,  31,
        119,  48,  87,   2,  92,  14,  60,  47], device='cuda:2') torch.Size([64])
10/17/2023, 16:16:26# labels of 200000: tensor([  2,  76,  87,  11,  76,  11, 162,  54, 158, 109, 151,  31,  87,  57,
         

Validation:   0%|          | 0/258 [00:00<?, ?it/s]

10/17/2023, 16:23:50# labels of Validation: tensor([ 86,  86,  50,  ..., 137, 137, 137], device='cuda:2') torch.Size([1567])
10/17/2023, 16:23:50# predicted of Validation: tensor([ 86,  86,  50,  ..., 137, 137, 137], device='cuda:2') torch.Size([1567])
10/17/2023, 16:23:50# labels of 0: tensor([ 86,  86,  50,  ..., 137, 137, 137], device='cuda:2') torch.Size([1567])
10/17/2023, 16:23:50# predicted of 0: tensor([ 86,  86,  50,  ..., 137, 137, 137], device='cuda:2') torch.Size([1567])
10/17/2023, 16:23:59# Validation Loss: 0.2532 | Validation Accuracy: 0.9575

10/17/2023, 16:23:59# Find a better model!!


KeyError: 'train'

In [44]:
pretrained_model_path = '../checkpoint_graphSAGE/best_model_GraphSAGE_transR_50_hidden_dim.pt'
model.load_state_dict(torch.load(pretrained_model_path))
model.to(device)

true_labels = []
predicted_labels = []

with torch.no_grad():
    for batched_g in tqdm(dataloaders['repeat_train_320'], desc="Generating Train Report", position=0, leave=True):
        loss, accuracy, predicted = model_fn(batched_g, model, criterion, device, which_type='train')
        labels = batched_g.edata['label'].to(device)

        true_labels.extend(labels.cpu().numpy())
        predicted_labels.extend(predicted.cpu().numpy())

# Assuming that the mapping and other things are defined earlier or globally
mapped_true_labels = [label_mapping[label] for label in true_labels]
mapped_predicted_labels = [label_mapping[label] for label in predicted_labels]

# Generate classification report
train_report = classification_report(mapped_true_labels, mapped_predicted_labels)
add_log_msg(f"Train Classification Report at Epoch {epoch}:\n{train_report}")

RuntimeError: Error(s) in loading state_dict for Model:
	size mismatch for sage.layer1.fc_neigh.weight: copying a param with shape torch.Size([256, 50]) from checkpoint, the shape in current model is torch.Size([128, 50]).
	size mismatch for sage.layer1.fc_self.weight: copying a param with shape torch.Size([256, 50]) from checkpoint, the shape in current model is torch.Size([128, 50]).
	size mismatch for sage.layer1.fc_self.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([128]).
	size mismatch for sage.layer2.fc_pool.weight: copying a param with shape torch.Size([256, 256]) from checkpoint, the shape in current model is torch.Size([128, 128]).
	size mismatch for sage.layer2.fc_pool.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([128]).
	size mismatch for sage.layer2.fc_neigh.weight: copying a param with shape torch.Size([512, 256]) from checkpoint, the shape in current model is torch.Size([256, 128]).
	size mismatch for sage.layer2.fc_self.weight: copying a param with shape torch.Size([512, 256]) from checkpoint, the shape in current model is torch.Size([256, 128]).
	size mismatch for sage.layer2.fc_self.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([256]).
	size mismatch for pred.W.weight: copying a param with shape torch.Size([167, 1024]) from checkpoint, the shape in current model is torch.Size([167, 512]).

### test of valid and test part is ``graph``

- 60 APs in training x 10000times
- 5 APs in validation x 4 times
- 3 APs in test x 4 times
- Batch size = 4

In [None]:
# load the pretrained model
pretrained_model_path = '../checkpoint_graphSAGE/best_model_GraphSAGE_transR_50_hidden_dim.pt'
model.load_state_dict(torch.load(pretrained_model_path))

model.to(device)
model.eval()

total = 0
correct = 0
count = 0

true_labels = []
predicted_labels = []

with torch.no_grad():
    for batched_g in tqdm(dataloaders['test'], desc="Testing", position=0, leave=True):
#         print(f"data:{data[1]}")
        loss, accuracy, predicted = model_fn(batched_g, model, criterion, device, count, which_type='test')
        labels = batched_g.edata['label'].to(device)
        
        true_labels.extend(labels.cpu().numpy())
        predicted_labels.extend(predicted.cpu().numpy())
        
        if count % 5000 == 0:
            add_log_msg(f"labels: {labels} {labels.shape}")
            add_log_msg(f"predicted: {predicted} {predicted.shape}")
            
        count += 1
        
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

add_log_msg(f'Test Accuracy: {100 * correct / total} %\n\n\n')


# ======================================== handlig the output excel files ========================================
mapped_true_labels = [label_mapping[label] for label in true_labels]
mapped_predicted_labels = [label_mapping[label] for label in predicted_labels]

# 生成Scikit-learn报告信息的DataFrame
report_data = classification_report(mapped_true_labels, mapped_predicted_labels, output_dict=True)
report_df = pd.DataFrame(report_data).transpose()

report_folder = 'classification_report'
os.makedirs(report_folder, exist_ok=True)

count = 0
while True:
    report_filename = f'classification_report-transE_50-graphSAGE-{count}.xlsx'
    labels_filename = f'mapped_true_predicted_labels-transE_50-graphSAGE-{count}.xlsx'
    
    report_path = os.path.join(report_folder, report_filename)
    labels_path = os.path.join(report_folder, labels_filename)
    
    if not os.path.exists(report_path) and not os.path.exists(labels_path):
        break
    count += 1

    
report_df.to_excel(report_path, index_label='Label')

mapped_labels_df = pd.DataFrame({'true_label': mapped_true_labels, 'predicted_label': mapped_predicted_labels})
mapped_labels_df.to_excel(labels_path, index=False)

add_log_msg(f"report path: {report_path}")
add_log_msg(f"label path: {labels_path}")

mapped_report = classification_report(mapped_true_labels, mapped_predicted_labels)
add_log_msg(f"mapped_report:\n{mapped_report}")