# Test of GAT
- use DGL
- predict `graphs`
- test: 0~99
- validation: 100~199
- train: 200~999
- larger lr with scheduler
- more epochs
- `model2` (3 layers)
- `more data`(duplicated)

In [1]:
import os
import dgl
import json
import torch
import torch as th
# from tqdm import tqdm
from tqdm.notebook import tqdm
import torch.nn as nn
from dgl.nn import GraphConv, GATConv
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from transformers import get_linear_schedule_with_warmup
from torch.optim import AdamW

- check the GPU and assign the GPU by the best memory usage

In [2]:
import subprocess
import torch

def get_free_gpu():
    try:
        # Run nvidia-smi command to get GPU details
        _output_to_list = lambda x: x.decode('ascii').split('\n')[:-1]
        command = "nvidia-smi --query-gpu=memory.free --format=csv,nounits,noheader"
        memory_free_info = _output_to_list(subprocess.check_output(command.split())) 
        memory_free_values = [int(x) for i, x in enumerate(memory_free_info)]
        
        # Get the GPU with the maximum free memory
        best_gpu_id = memory_free_values.index(max(memory_free_values))
        return best_gpu_id
    except:
        # If any exception occurs, default to GPU 0 (this handles cases where nvidia-smi isn't installed)
        return 0

if torch.cuda.is_available():
    # Get the best GPU ID based on free memory and set it
    best_gpu_id = get_free_gpu()
    device = torch.device(f"cuda:{best_gpu_id}")
else:
    device = torch.device("cpu")
    print("there's no available GPU")

print(device)


cuda:1


## Fix the seed

In [3]:
import numpy as np
import torch
import random

#fix seed
def same_seeds(seed = 8787):
    torch.manual_seed(seed)
    # random.seed(seed) 
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)  
    np.random.seed(seed)  
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True

## Data Loader

In [4]:
class GraphDataset(Dataset):
    def __init__(self, data_list, device):
        self.data_list = data_list
        self.device = device

    def __len__(self):
        return len(self.data_list)
    
    def __getitem__(self, idx):
        data = self.data_list[idx]

        g = dgl.graph((th.tensor(data["edge_index"][0]), th.tensor(data["edge_index"][1])), num_nodes=data["num_nodes"]).to(self.device)

        g.ndata['feat'] = th.tensor(data["node_feat"]).to(self.device)
        g.edata['feat'] = th.tensor(data["edge_attr"]).to(self.device)  # Add edge features to graph

        return g, th.tensor(data["label"]).to(self.device)


def collate(samples):
    # The input `samples` is a list of pairs
    #  (graph, label).
    graphs, labels = map(list, zip(*samples))
    batched_graph = dgl.batch(graphs)
    return batched_graph, torch.tensor(labels)


In [5]:
datasets = ['train', 'valid', 'test']
dataset_data = {}

for dataset_name in tqdm(datasets):
#     file_path = f"../../data_processing/dgl/data/test_graph/repeated_{dataset_name}.jsonl"
    file_path = f"../../data_processing/dgl/data_new/training_data/{dataset_name}.jsonl"
    
    print(file_path)
    with open(file_path) as f:
        data_list = [json.loads(line) for line in tqdm(f, position=0, leave=True)]
    
    dataset_data[dataset_name] = GraphDataset(data_list, device)

print("Datasets loaded!")

  0%|          | 0/3 [00:00<?, ?it/s]

../../data_processing/dgl/data_new/training_data/train.jsonl


0it [00:00, ?it/s]

../../data_processing/dgl/data_new/training_data/valid.jsonl


0it [00:00, ?it/s]

../../data_processing/dgl/data_new/training_data/test.jsonl


0it [00:00, ?it/s]

Datasets loaded!


- choose batch size

In [6]:
def create_dataloaders(batch_size, shuffle=True):
    dataloaders = {}
    for dataset_name, dataset in dataset_data.items():
        dataloaders[dataset_name] = DataLoader(dataset, batch_size=batch_size, shuffle=shuffle, collate_fn=collate)
    return dataloaders

dataloaders = create_dataloaders(8)
# dataloaders = create_dataloaders(16)

- Turn the print message to a log file

In [7]:
import datetime

now = datetime.datetime.now()

formatted_time = now.strftime("%m%d_%H:%M")

log_file_path = f"../log_message/{formatted_time}_GAT_model.log"

def add_log_msg(msg, log_file_path=log_file_path):
    with open(log_file_path, 'a') as f:
        f.write(f'{datetime.datetime.now().strftime("%m/%d/%Y, %H:%M:%S")}# {msg}\n')
    print(f'{datetime.datetime.now().strftime("%m/%d/%Y, %H:%M:%S")}# {msg}')

print(log_file_path)

../log_message/0825_09:33_GAT_model.log


### Model

In [8]:
import dgl
import torch.nn as nn
import torch.nn.functional as F
from dgl.nn import GATConv

class GAT(nn.Module):
    def __init__(self, in_dim, hidden_dim1, hidden_dim2, out_dim, num_heads, dropout_prob=0.25):
        super(GAT, self).__init__()

        # First GAT layer
        self.layer1 = GATConv(in_dim, hidden_dim1, num_heads=num_heads, activation=F.relu, allow_zero_in_degree=True)
        self.batchnorm1 = nn.BatchNorm1d(hidden_dim1 * num_heads)

        # Second GAT layer
        self.layer2 = GATConv(hidden_dim1 * num_heads, hidden_dim2, num_heads=num_heads, activation=F.relu, allow_zero_in_degree=True)
        self.batchnorm2 = nn.BatchNorm1d(hidden_dim2 * num_heads)

        # Third GAT layer
        self.layer3 = GATConv(hidden_dim2 * num_heads, out_dim, num_heads=num_heads, allow_zero_in_degree=True)
        self.batchnorm3 = nn.BatchNorm1d(out_dim)

        # Dropout for regularization
        self.dropout = nn.Dropout(dropout_prob)

    def forward(self, g, h):
        # First GAT layer
        h = self.layer1(g, h)
        h = h.view(h.shape[0], -1)
        h = F.relu(self.batchnorm1(h))
        h = self.dropout(h)

        # Second GAT layer
        h = self.layer2(g, h)
        h = h.view(h.shape[0], -1)
        h = F.relu(self.batchnorm2(h))
        h = self.dropout(h)

        # Third GAT layer
        h = self.layer3(g, h).squeeze(1)

        # Store the output as a new node feature
        g.ndata['h_out'] = h

        # Use mean pooling to aggregate this new node feature
        h_agg = dgl.mean_nodes(g, feat='h_out')

        return h_agg


- Model Forward  

In [9]:
def model_fn(data, model, criterion, device, count=1, which_type='train'):
    """Forward a batch through the model."""
    batched_g, labels = data
    batched_g = batched_g.to(device)
    
    labels = labels.to(device)
    logits = model(batched_g, batched_g.ndata['feat'].float()) # for GAT
    logits = logits.mean(dim=1)
    
    loss = criterion(logits, labels)

    # Get the class id with the highest probability
    preds = logits.argmax(1)
    
    # Compute accuracy
    accuracy = torch.mean((preds == labels).float())
    
    if which_type == 'validation' and count % 1000 == 0:
        add_log_msg(f"labels of Validation: {labels} {labels.shape}")
        add_log_msg(f"predicted of Validation: {preds} {preds.shape}")
        
    elif which_type == 'test'  and count % 1000 == 0:
        add_log_msg(f"labels of Test: {labels} {labels.shape}")
        add_log_msg(f"predicted of Test: {preds} {preds.shape}")
        
    if count % 5000 == 0: 
        add_log_msg(f"labels of {count}: {labels} {labels.shape}")
        add_log_msg(f"predicted of {count}: {preds} {preds.shape}")
        
    return loss, accuracy, preds

### Training

- Fix the seed and save the model.state_dict that contains the initial weight

In [10]:
seed = 8787
same_seeds(seed)

model = GAT(in_dim=50, hidden_dim1=8, hidden_dim2=128, out_dim=168, num_heads=8)
torch.save(model.state_dict(), 'model2_initial/initial_weight.pth')

In [11]:
model.layer2.fc.weight

Parameter containing:
tensor([[-0.0173,  0.0305, -0.1151,  ...,  0.1396,  0.0871, -0.0046],
        [-0.0276,  0.0435, -0.0844,  ..., -0.0037, -0.0386, -0.0648],
        [ 0.0583,  0.1108, -0.0515,  ..., -0.0669, -0.0645,  0.0809],
        ...,
        [ 0.1049, -0.0281,  0.0213,  ...,  0.0550, -0.0606, -0.0602],
        [-0.0168,  0.0097,  0.1041,  ...,  0.0753,  0.0272,  0.0322],
        [-0.0035,  0.0883, -0.0423,  ...,  0.0232,  0.1120, -0.0347]],
       requires_grad=True)

- Check if model really load the model_dict

In [12]:
model = GAT(in_dim=50, hidden_dim1=8, hidden_dim2=128, out_dim=168, num_heads=8)
model.load_state_dict(torch.load('model2_initial/initial_weight.pth'))
model.layer2.fc.weight

Parameter containing:
tensor([[-0.0173,  0.0305, -0.1151,  ...,  0.1396,  0.0871, -0.0046],
        [-0.0276,  0.0435, -0.0844,  ..., -0.0037, -0.0386, -0.0648],
        [ 0.0583,  0.1108, -0.0515,  ..., -0.0669, -0.0645,  0.0809],
        ...,
        [ 0.1049, -0.0281,  0.0213,  ...,  0.0550, -0.0606, -0.0602],
        [-0.0168,  0.0097,  0.1041,  ...,  0.0753,  0.0272,  0.0322],
        [-0.0035,  0.0883, -0.0423,  ...,  0.0232,  0.1120, -0.0347]],
       requires_grad=True)

### test of valid and test part is ``graph``

- model 2
- Batch size = 8
- use larger lr and scheduler

In [None]:
from torch.optim import AdamW, lr_scheduler

seed = 8787
same_seeds(seed)

model = GAT(in_dim=50, hidden_dim1=8, hidden_dim2=128, out_dim=168, num_heads=8)
# in_dim means the dimension of the node_feat(50 dim, since the 50-dim embedding)
# out_dim means the # of the categories -> 168 for out tasks
model.load_state_dict(torch.load('model2_initial/initial_weight.pth'))
best_model_path = "../checkpoint_GAT/best_model_model.pt"

model = model.to(device)

optimizer = AdamW(model.parameters(), lr=2e-4)
# scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=100, num_training_steps=total_steps)

# T_max control the period of the lr changing -> set 1/10 first
scheduler = lr_scheduler.CosineAnnealingLR(optimizer, T_max=100, eta_min=0, last_epoch=- 1, verbose=False)


criterion = nn.CrossEntropyLoss()
total_steps = 1000

# save the best model
best_val_loss = float('inf')
patience = 50  # Number of epochs with no improvement after which training will be stopped.
waiting = 0  # The number of epochs with no improvement so far.


# Training Part
for epoch in tqdm(range(total_steps)):
    # Train
    model.train()
    total_loss = 0.0
    total_accuracy = 0.0
    num_batches = 0
    
    count = 0 
    
    for data in tqdm(dataloaders['train'], desc="Training", position=0, leave=True):
        
        count += 1
        loss, accuracy, _ = model_fn(data, model, criterion, device, count, which_type='train')
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        total_accuracy += accuracy.item()
        num_batches += 1
        
    scheduler.step()
    add_log_msg(f"total count: {count}")

    avg_loss = total_loss / num_batches
    avg_accuracy = total_accuracy / num_batches

    add_log_msg(f'Epoch {epoch} | Train Loss: {avg_loss:.4f} | Train Accuracy: {avg_accuracy:.4f}')

    
    # Validation Part
    model.eval()
    total_accuracy = 0.0
    total_loss = 0.0
    num_batches = 0


    with torch.no_grad():
        for data in tqdm(dataloaders['valid'], desc="Validation", position=0, leave=True):
            loss, accuracy, _ = model_fn(data, model, criterion, device, num_batches, which_type='validation')
            total_accuracy += accuracy.item()
            total_loss += loss.item()
            num_batches += 1

    avg_accuracy = total_accuracy / num_batches
    current_loss = total_loss / num_batches
    
    add_log_msg(f'Validation Loss: {current_loss:.4f} | Validation Accuracy: {avg_accuracy:.4f}\n')
    
    
    if current_loss < best_val_loss:
        best_val_loss = current_loss
        waiting = 0
        
        if os.path.exists(best_model_path):
            os.remove(best_model_path)

        torch.save({
                'epoch': epoch,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'loss': loss,
                }, best_model_path)

    else:
        waiting += 1
        if waiting >= patience:
            add_log_msg("Early stopping")
            break
            
            
# Testing Part
model.eval()
total = 0
correct = 0
count = 0

with torch.no_grad():
    for data in tqdm(dataloaders['test'], desc="Testing", position=0, leave=True):

        loss, accuracy, predicted = model_fn(data, model, criterion, device, count, which_type='test')
        labels = data[1].to(device)  # Assuming labels are the second element in the tuple
        
        if count % 5000 == 0:
            add_log_msg(f"labels of {count}: {labels} {labels.shape}")
            add_log_msg(f"predicted of {count}: {predicted} {predicted.shape}")
            
        count += 1
        
        total += labels.size(0) # label.size(0) is the batch size
        correct += (predicted == labels).sum().item() 
        # (predicted == labels).sum() would return how many of them are equal;
        # .item() would make the tensor to the regular value
        
#     print('Test Accuracy: %d %%' % (100 * correct / total))
add_log_msg(f'Test Accuracy: {100 * correct / total} %%')

  0%|          | 0/1000 [00:00<?, ?it/s]

Training:   0%|          | 0/16500 [00:00<?, ?it/s]

08/25/2023, 09:35:50# labels of 5000: tensor([134, 156,  17,   9, 100,  31, 119,  31], device='cuda:1') torch.Size([8])
08/25/2023, 09:35:50# predicted of 5000: tensor([134, 118,  17, 141, 141, 114,  37, 107], device='cuda:1') torch.Size([8])
08/25/2023, 09:37:53# labels of 10000: tensor([ 10,  57, 130, 134,   3, 150, 118, 148], device='cuda:1') torch.Size([8])
08/25/2023, 09:37:53# predicted of 10000: tensor([ 64,  64,  78, 134,   3,  64, 118,  59], device='cuda:1') torch.Size([8])
08/25/2023, 09:39:59# labels of 15000: tensor([ 15,  92, 152, 107,  44,  14,  51,  79], device='cuda:1') torch.Size([8])
08/25/2023, 09:39:59# predicted of 15000: tensor([ 15, 141, 107, 107,  37,  21,  21,  15], device='cuda:1') torch.Size([8])
08/25/2023, 09:40:36# total count: 16500
08/25/2023, 09:40:36# Epoch 0 | Train Loss: 3.6864 | Train Accuracy: 0.2013


Validation:   0%|          | 0/2063 [00:00<?, ?it/s]

08/25/2023, 09:40:36# labels of Validation: tensor([136,  81, 123,  58, 163, 119,  43, 140], device='cuda:1') torch.Size([8])
08/25/2023, 09:40:36# predicted of Validation: tensor([21,  4, 85,  4,  4,  4,  4, 21], device='cuda:1') torch.Size([8])
08/25/2023, 09:40:36# labels of 0: tensor([136,  81, 123,  58, 163, 119,  43, 140], device='cuda:1') torch.Size([8])
08/25/2023, 09:40:36# predicted of 0: tensor([21,  4, 85,  4,  4,  4,  4, 21], device='cuda:1') torch.Size([8])
08/25/2023, 09:40:53# labels of Validation: tensor([ 60, 118, 153,  35,  79,  70, 110, 159], device='cuda:1') torch.Size([8])
08/25/2023, 09:40:53# predicted of Validation: tensor([ 4, 21,  4, 21, 21, 21,  4, 21], device='cuda:1') torch.Size([8])
08/25/2023, 09:41:08# labels of Validation: tensor([ 97, 162, 151, 118,  38,  29,  17,   9], device='cuda:1') torch.Size([8])
08/25/2023, 09:41:08# predicted of Validation: tensor([  4,   4,   4,  21,   4, 107,  17,   4], device='cuda:1') torch.Size([8])
08/25/2023, 09:41:09# 

Training:   0%|          | 0/16500 [00:00<?, ?it/s]

08/25/2023, 09:43:12# labels of 5000: tensor([ 76,  76,  90, 153,  57, 134,  62,  38], device='cuda:1') torch.Size([8])
08/25/2023, 09:43:12# predicted of 5000: tensor([107, 107, 107, 107, 160, 134,  62, 160], device='cuda:1') torch.Size([8])
08/25/2023, 09:45:17# labels of 10000: tensor([ 79,  24,   7,   5, 147, 111, 162, 106], device='cuda:1') torch.Size([8])
08/25/2023, 09:45:17# predicted of 10000: tensor([  5, 110,   7,   5, 147, 108,  41, 106], device='cuda:1') torch.Size([8])
08/25/2023, 09:47:23# labels of 15000: tensor([ 45,  81,   4,   3,  84, 147, 159, 100], device='cuda:1') torch.Size([8])
08/25/2023, 09:47:23# predicted of 15000: tensor([ 45, 100, 135,  84,  84,  84, 159, 100], device='cuda:1') torch.Size([8])
08/25/2023, 09:47:59# total count: 16500
08/25/2023, 09:47:59# Epoch 1 | Train Loss: 3.1575 | Train Accuracy: 0.2898


Validation:   0%|          | 0/2063 [00:00<?, ?it/s]

08/25/2023, 09:47:59# labels of Validation: tensor([59, 16, 30, 37, 69, 79, 66, 74], device='cuda:1') torch.Size([8])
08/25/2023, 09:47:59# predicted of Validation: tensor([ 88,  70, 111,  88,  69,  21,  88, 111], device='cuda:1') torch.Size([8])
08/25/2023, 09:47:59# labels of 0: tensor([59, 16, 30, 37, 69, 79, 66, 74], device='cuda:1') torch.Size([8])
08/25/2023, 09:47:59# predicted of 0: tensor([ 88,  70, 111,  88,  69,  21,  88, 111], device='cuda:1') torch.Size([8])
08/25/2023, 09:48:15# labels of Validation: tensor([132,  81,  37,  81, 125, 116,  39, 160], device='cuda:1') torch.Size([8])
08/25/2023, 09:48:15# predicted of Validation: tensor([ 88, 111,  88, 111, 111, 111,  21,  88], device='cuda:1') torch.Size([8])
08/25/2023, 09:48:31# labels of Validation: tensor([ 63,  96, 104, 165, 128, 131,  97, 164], device='cuda:1') torch.Size([8])
08/25/2023, 09:48:31# predicted of Validation: tensor([111, 156, 111,  21,  21,  88, 111, 111], device='cuda:1') torch.Size([8])
08/25/2023, 09

Training:   0%|          | 0/16500 [00:00<?, ?it/s]

08/25/2023, 09:50:37# labels of 5000: tensor([ 74,  73, 122,  39, 140,  79, 107, 133], device='cuda:1') torch.Size([8])
08/25/2023, 09:50:37# predicted of 5000: tensor([122,  39, 122,  73, 140,  40, 122,  21], device='cuda:1') torch.Size([8])
08/25/2023, 09:52:41# labels of 10000: tensor([153,   3,  30, 117, 148, 106, 129,  68], device='cuda:1') torch.Size([8])
08/25/2023, 09:52:41# predicted of 10000: tensor([129,   3, 129, 117, 133, 106, 133, 129], device='cuda:1') torch.Size([8])
08/25/2023, 09:54:45# labels of 15000: tensor([ 64, 163, 159,  94, 105, 127, 134,  26], device='cuda:1') torch.Size([8])
08/25/2023, 09:54:45# predicted of 15000: tensor([ 37,  37, 159, 105, 127, 159, 134,  37], device='cuda:1') torch.Size([8])
08/25/2023, 09:55:22# total count: 16500
08/25/2023, 09:55:22# Epoch 2 | Train Loss: 2.9763 | Train Accuracy: 0.3201


Validation:   0%|          | 0/2063 [00:00<?, ?it/s]

08/25/2023, 09:55:22# labels of Validation: tensor([104, 109, 157, 163,  26,  86, 109, 121], device='cuda:1') torch.Size([8])
08/25/2023, 09:55:22# predicted of Validation: tensor([87, 87, 87, 87, 87, 87, 87, 87], device='cuda:1') torch.Size([8])
08/25/2023, 09:55:22# labels of 0: tensor([104, 109, 157, 163,  26,  86, 109, 121], device='cuda:1') torch.Size([8])
08/25/2023, 09:55:22# predicted of 0: tensor([87, 87, 87, 87, 87, 87, 87, 87], device='cuda:1') torch.Size([8])
08/25/2023, 09:55:38# labels of Validation: tensor([ 66, 137,  29, 132, 148, 118,   5,  17], device='cuda:1') torch.Size([8])
08/25/2023, 09:55:38# predicted of Validation: tensor([ 87,  87, 141, 161,  87, 141, 141,  17], device='cuda:1') torch.Size([8])
08/25/2023, 09:55:54# labels of Validation: tensor([126, 143, 150,  35,  12,  19,  67, 161], device='cuda:1') torch.Size([8])
08/25/2023, 09:55:54# predicted of Validation: tensor([141,  87,  87, 141,  87,  87, 141, 161], device='cuda:1') torch.Size([8])
08/25/2023, 09

Training:   0%|          | 0/16500 [00:00<?, ?it/s]

08/25/2023, 09:57:58# labels of 5000: tensor([ 48,  37,  82, 157, 103,  73,   1, 136], device='cuda:1') torch.Size([8])
08/25/2023, 09:57:58# predicted of 5000: tensor([ 37,  37,  73,  37,  37,  73,  37, 136], device='cuda:1') torch.Size([8])
08/25/2023, 10:00:03# labels of 10000: tensor([103,  12, 166,  37, 132, 130,  97, 164], device='cuda:1') torch.Size([8])
08/25/2023, 10:00:03# predicted of 10000: tensor([132, 132, 132, 132, 132,  72, 132, 132], device='cuda:1') torch.Size([8])
08/25/2023, 10:02:09# labels of 15000: tensor([ 87, 109,  14, 164, 122,  53,  41,  40], device='cuda:1') torch.Size([8])
08/25/2023, 10:02:09# predicted of 15000: tensor([ 41, 160,  41, 160,  40, 160,  41,  40], device='cuda:1') torch.Size([8])
08/25/2023, 10:02:47# total count: 16500
08/25/2023, 10:02:47# Epoch 3 | Train Loss: 2.8776 | Train Accuracy: 0.3383


Validation:   0%|          | 0/2063 [00:00<?, ?it/s]

08/25/2023, 10:02:47# labels of Validation: tensor([ 14, 115, 157,   8,  28,  14,  74,  13], device='cuda:1') torch.Size([8])
08/25/2023, 10:02:47# predicted of Validation: tensor([121, 165, 121, 121, 121, 121, 121, 165], device='cuda:1') torch.Size([8])
08/25/2023, 10:02:47# labels of 0: tensor([ 14, 115, 157,   8,  28,  14,  74,  13], device='cuda:1') torch.Size([8])
08/25/2023, 10:02:47# predicted of 0: tensor([121, 165, 121, 121, 121, 121, 121, 165], device='cuda:1') torch.Size([8])
08/25/2023, 10:03:03# labels of Validation: tensor([ 17, 128,  61, 145,  57, 163,  20,  78], device='cuda:1') torch.Size([8])
08/25/2023, 10:03:03# predicted of Validation: tensor([ 17, 165, 165,  72, 121, 121, 121, 165], device='cuda:1') torch.Size([8])
08/25/2023, 10:03:18# labels of Validation: tensor([ 44, 121,  13,  84,  64, 138,  31, 154], device='cuda:1') torch.Size([8])
08/25/2023, 10:03:18# predicted of Validation: tensor([121, 121, 165,  56, 121,  16, 121, 149], device='cuda:1') torch.Size([8]

Training:   0%|          | 0/16500 [00:00<?, ?it/s]

08/25/2023, 10:05:24# labels of 5000: tensor([ 27,  95,   0,  70, 131, 116,  35,  41], device='cuda:1') torch.Size([8])
08/25/2023, 10:05:24# predicted of 5000: tensor([ 0, 27,  0, 70, 41, 41,  0, 41], device='cuda:1') torch.Size([8])
08/25/2023, 10:07:26# labels of 10000: tensor([  8,   8,  76, 156,  89, 119, 122,  23], device='cuda:1') torch.Size([8])
08/25/2023, 10:07:26# predicted of 10000: tensor([ 46, 131, 108,  89,  89, 131, 122,  23], device='cuda:1') torch.Size([8])
08/25/2023, 10:09:31# labels of 15000: tensor([ 67, 161,  71,   8,  99,  23, 150, 120], device='cuda:1') torch.Size([8])
08/25/2023, 10:09:31# predicted of 15000: tensor([120, 161, 161, 161,  99, 120, 161, 120], device='cuda:1') torch.Size([8])
08/25/2023, 10:10:08# total count: 16500
08/25/2023, 10:10:08# Epoch 4 | Train Loss: 2.8096 | Train Accuracy: 0.3487


Validation:   0%|          | 0/2063 [00:00<?, ?it/s]

08/25/2023, 10:10:08# labels of Validation: tensor([ 86,  11, 110, 154, 105,  89,  92, 135], device='cuda:1') torch.Size([8])
08/25/2023, 10:10:08# predicted of Validation: tensor([ 26,  26,  26, 156, 135, 135,  26,  26], device='cuda:1') torch.Size([8])
08/25/2023, 10:10:08# labels of 0: tensor([ 86,  11, 110, 154, 105,  89,  92, 135], device='cuda:1') torch.Size([8])
08/25/2023, 10:10:08# predicted of 0: tensor([ 26,  26,  26, 156, 135, 135,  26,  26], device='cuda:1') torch.Size([8])
08/25/2023, 10:10:24# labels of Validation: tensor([  7,  89, 101,   9,  19, 106,  26,  83], device='cuda:1') torch.Size([8])
08/25/2023, 10:10:24# predicted of Validation: tensor([  7, 135,  26,  26,  26, 106,  26,  26], device='cuda:1') torch.Size([8])
08/25/2023, 10:10:40# labels of Validation: tensor([156,  78, 129,   6,  60,  84, 150,  60], device='cuda:1') torch.Size([8])
08/25/2023, 10:10:40# predicted of Validation: tensor([105, 135,  26,  26,  26, 156,  26,  26], device='cuda:1') torch.Size([8]

Training:   0%|          | 0/16500 [00:00<?, ?it/s]