# Test of GraphSAGE
- use DGL
- predict `graphs`
- valid, test data are in the training dataset

In [1]:
import os
import dgl
import csv
import json
import torch
import random
import subprocess
import torch as th
import numpy as np
import pandas as pd
import torch.nn as nn
import dgl.nn as dglnn
import torch.nn.functional as F

from tqdm.notebook import tqdm
from sklearn.decomposition import PCA
from torch.optim import AdamW, lr_scheduler
from dgl.nn import GraphConv, GATConv, SAGEConv
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from transformers import get_linear_schedule_with_warmup

- check the GPU and assign the GPU by the best memory usage
- on A100 server we use cuda:0

In [2]:
# def get_free_gpu():
#     try:
#         # Run nvidia-smi command to get GPU details
#         _output_to_list = lambda x: x.decode('ascii').split('\n')[:-1]
#         command = "nvidia-smi --query-gpu=memory.free --format=csv,nounits,noheader"
#         memory_free_info = _output_to_list(subprocess.check_output(command.split())) 
#         memory_free_values = [int(x) for i, x in enumerate(memory_free_info)]
        
#         # Get the GPU with the maximum free memory
#         best_gpu_id = memory_free_values.index(max(memory_free_values))
#         return best_gpu_id
#     except:
#         # If any exception occurs, default to GPU 0 (this handles cases where nvidia-smi isn't installed)
#         return 0

# if torch.cuda.is_available():
#     # Get the best GPU ID based on free memory and set it
#     best_gpu_id = get_free_gpu()
#     device = torch.device(f"cuda:{best_gpu_id}")
# else:
#     device = torch.device("cpu")
#     print("there's no available GPU")

device = torch.device(f"cuda:{0}")
print(device)

cuda:0


## Fix the seed

In [3]:
#fix seed
def same_seeds(seed = 8787):
    torch.manual_seed(seed)
    # random.seed(seed) 
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)  
    np.random.seed(seed)  
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True

## Load the embedding

In [4]:
DIM = 50
embedding = "transE"
embedding = f'{embedding}_{DIM}'

with open(f"../../data/4_embedding/synthesize/{embedding}.vec.json", "r") as f:
    tmp = json.load(f)

index2entemb = {idx:emb for idx, emb in enumerate(tmp["ent_embeddings.weight"])}
index2relemb = {idx:emb for idx, emb in enumerate(tmp["rel_embeddings.weight"])}

In [5]:
len(index2entemb)

26868

In [6]:
len(index2relemb)

23

In [107]:
with open("../../data/source_data/before_embedding/3.10/all_graph_data-1hop(10:1).jsonl", "r") as f:
    print("Loading the data...")
#     input_data = list(f)
#     input_data = [json.loads(line) for idx, line in tqdm(f, desc="Loading")]

    # only process 40000 data from 400000 data
    # wanted_data = 400000
    input_data = []
    for idx, line in tqdm(enumerate(f), total=wanted_data, desc="Loading"):
        # if idx == wanted_data:
            # break
        input_data.append(json.loads(line))
        
    print("FINISH...")

Loading the data...


Loading:   0%|          | 0/400000 [00:00<?, ?it/s]

FINISH...


In [108]:
len(input_data)

43895

In [83]:
input_data[0]

{'labels': [0, 0, 0, 0, 0, 0, 0],
 'num_nodes': 6,
 'node_feat': [9089, 8784, 16471, 12216, 4826, 8575],
 'edge_attr': [1, 12, 1, 1, 16, 1, 7],
 'edge_index': [[3, 3, 3, 3, 3, 3, 3], [4, 5, 2, 5, 0, 1, 5]]}

- Convert the 'node_feat' and 'edge_attr' from int to embedding
    - takes about 45 min to transform the embedding
    - if use original method -> takes about 60 hours

In [84]:
# ============ If type(input_data[0] == dict) ============
for data_point in tqdm(input_data):
    data_point['node_feat'] = [index2entemb[node_id] for node_id in data_point['node_feat']]
    data_point['edge_attr'] = [index2relemb[edge_id] for edge_id in data_point['edge_attr']]


# ============ If type(input_data[0] == str) ============
# for idx, data in tqdm(enumerate(input_data)):
    
#     # make the data from string to int
#     data_point = json.loads(data)

#     data_point['node_feat'] = [index2entemb[node_id] for node_id in data_point['node_feat']]
#     data_point['edge_attr'] = [index2relemb[edge_id] for edge_id in data_point['edge_attr']]

#     input_data[idx] = data_point

  0%|          | 0/400000 [00:00<?, ?it/s]

In [85]:
type(input_data[0])

dict

In [86]:
input_data[1]['node_feat']

[[-0.01974446140229702,
  0.040913552045822144,
  -0.007980378344655037,
  -0.02847393788397312,
  0.000316029938403517,
  -0.02667837403714657,
  0.04442673176527023,
  -0.015228618867695332,
  0.04259283095598221,
  -0.022810133174061775,
  0.01043322216719389,
  0.001925037708133459,
  0.0007568973232991993,
  -0.018916746601462364,
  -0.03676433488726616,
  0.017077064141631126,
  0.01814185455441475,
  -0.020504726096987724,
  -0.024615883827209473,
  -0.03734942525625229,
  0.025559449568390846,
  -0.039223603904247284,
  0.015173343010246754,
  0.030808748677372932,
  -0.00513201579451561,
  0.016123853623867035,
  0.02677449956536293,
  -0.011095662601292133,
  0.0225190632045269,
  0.028384124860167503,
  -0.03429616987705231,
  0.01600845530629158,
  -0.021545961499214172,
  -0.023546697571873665,
  0.016315115615725517,
  0.04067566245794296,
  -0.017607757821679115,
  0.046072471886873245,
  -0.02772834338247776,
  -0.022649917751550674,
  -0.026763107627630234,
  0.0233130

## Data Loader

In [87]:
class GraphDataset(Dataset):
    def __init__(self, data_list, device):
        self.data_list = data_list
        self.device = device
    def __len__(self):
        return len(self.data_list)
    
    def __getitem__(self, idx):
        data = self.data_list[idx]
        return data

def collate(samples):
    data_list = samples
    batched_graphs = []
    for data in data_list:
        # print(data["edge_index"])
        g = dgl.graph((th.tensor(data["edge_index"][0]), th.tensor(data["edge_index"][1])), num_nodes=data["num_nodes"])

        g.ndata['feat'] = th.tensor(data["node_feat"])
        g.edata['feat'] = th.tensor(data["edge_attr"])

        g.edata['label'] = th.tensor(data['labels'])

        batched_graphs.append(g)
    
    return dgl.batch(batched_graphs)

In [88]:
total_data = len(input_data)

test_size = int(total_data * 0.1)
train_valid_size = total_data - test_size

train_valid_data = input_data[:train_valid_size]
test_data = input_data[train_valid_size:]

train_data, valid_data = train_test_split(train_valid_data, test_size=0.25, random_state=42)

# creating GraphDataset
dataset_data = {
    'train': GraphDataset(train_data, device),
    'valid': GraphDataset(valid_data, device),
    'test': GraphDataset(test_data, device)
}

print("Datasets loaded and ready for training!")

Datasets loaded and ready for training!


In [89]:
# # split 8:1:1 (train, valid, test)
# train_data, remaining_data = train_test_split(input_data, test_size=0.2, random_state=42)
# valid_data, test_data = train_test_split(remaining_data, test_size=0.5, random_state=42)


# dataset_data = {
#     'train': GraphDataset(train_data, device),
#     'valid': GraphDataset(valid_data, device),
#     'test': GraphDataset(test_data, device)
# }

# print("Datasets loaded and ready for training!")

In [90]:
dataset_data['train'][10]['labels']

[0, 0, 0, 0, 0, 0]

In [91]:
len(dataset_data['test'])

40000

- choose batch size

In [94]:
def create_dataloaders(batch_size, shuffle=True):
    dataloaders = {}
    for dataset_name, dataset in dataset_data.items():
        # do not shuffle the testing dataset
        if dataset_name == "test":
            dataloaders[dataset_name] = DataLoader(dataset, batch_size=batch_size, shuffle=False, collate_fn=collate)    
        else:
            dataloaders[dataset_name] = DataLoader(dataset, batch_size=batch_size, shuffle=shuffle, collate_fn=collate)
    return dataloaders

dataloaders = create_dataloaders(200)

- Turn the print message to a log file

In [95]:
import datetime

now = datetime.datetime.now()

formatted_time = now.strftime("%m%d_%H:%M")

log_file_path = f"./log_message/{formatted_time}_GraphSAGE_{embedding}.log"

def add_log_msg(msg, log_file_path=log_file_path):
    with open(log_file_path, 'a') as f:
        f.write(f'{datetime.datetime.now().strftime("%m/%d/%Y, %H:%M:%S")}# {msg}\n')
    print(f'{datetime.datetime.now().strftime("%m/%d/%Y, %H:%M:%S")}# {msg}')

print(log_file_path)

./log_message/0201_12:32_GraphSAGE_transE_50.log


### Model

In [96]:
class GraphSAGE(nn.Module):
    def __init__(self, in_dim, hidden_dim, out_dim):
        super(GraphSAGE, self).__init__()
        self.layer1 = dglnn.SAGEConv(in_dim, hidden_dim, 'pool')
        self.layer2 = dglnn.SAGEConv(hidden_dim, out_dim, 'pool')
        self.dropout = nn.Dropout(0.25)

    def forward(self, g, inputs):
        h = self.layer1(g, inputs)
        h = torch.relu(h)
        h = self.dropout(h)
        h = self.layer2(g, h)
        return h

In [97]:
class MLPPredictor(nn.Module):
    def __init__(self, out_feats, out_classes):
        super().__init__()
        self.W = nn.Linear(out_feats*2, out_classes)

    def apply_edges(self, edges):
        h_u = edges.src['h']
        h_v = edges.dst['h']
        score = self.W(torch.cat([h_u, h_v], 1))
        return {'score': score}

    def forward(self, graph, h):
        with graph.local_scope():
            graph.ndata['h'] = h
            graph.apply_edges(self.apply_edges)
            return graph.edata['score']

In [98]:
class Model(nn.Module):
    def __init__(self, in_features, hidden_features, out_features, num_classes):
        super().__init__()
        self.sage = GraphSAGE(in_features, hidden_features, out_features)
        self.pred = MLPPredictor(out_features, num_classes)
      
    def forward(self, g, node_feat, return_logits=False):
        h = self.sage(g, node_feat)
        logits = self.pred(g, h)
        
        return logits

- Model Forward  

In [99]:
def model_fn(batched_g, model, criterion, device, count=1, which_type='train'):
    """Forward a batch through the model."""
#     batched_g, labels = data
    batched_g = batched_g.to(device)
    
    labels = batched_g.edata['label'].to(device)
    
    logits = model(batched_g, batched_g.ndata['feat'].float())

    loss = criterion(logits, labels)

    output = torch.softmax(logits, dim=1)
    preds = output.argmax(1)
    
    # Compute accuracy
    accuracy = torch.mean((preds == labels).float())
        
    return loss, accuracy, preds

### Traning Loop

In [100]:
# # For release the GPU memory
# # No need to restart the kernel

# import gc
# gc.collect()
# torch.cuda.empty_cache()

In [133]:
seed = 5269
in_dim = DIM # dimension of the node feature
hidden_dim = 256
out_dim = 512
num_classes = 2 # for DARPA

lr = 5e-4

total_steps = 100
patience = 5
waiting = 0

In [134]:
model = Model(in_dim, hidden_dim, out_dim, num_classes)
best_model_path = f"./checkpoint_graphSAGE/best_model_GraphSAGE_{embedding}.pt"
print(best_model_path)

optimizer = AdamW(model.parameters(), lr)

scheduler = lr_scheduler.CosineAnnealingLR(optimizer, T_max=10, eta_min=0, last_epoch=-1, verbose=False)

criterion = nn.CrossEntropyLoss()
# criterion = torch.nn.BCEWithLogitsLoss()

./checkpoint_graphSAGE/best_model_GraphSAGE_transE_50.pt


In [135]:
same_seeds(seed)
model = model.to(device)
best_val_loss = float('inf')

# Training Part
for epoch in tqdm(range(total_steps)):
    # Train
    model.train()
    total_loss = 0.0
    total_accuracy = 0.0
    num_batches = 0
    
    for batched_g in tqdm(dataloaders['train'], desc="Training", position=0, leave=True):
        num_batches += 1
        # print(num_batches, ": ")
        loss, accuracy, _ = model_fn(batched_g, model, criterion, device, num_batches, which_type='train')
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        total_accuracy += accuracy.item()

    scheduler.step()
    add_log_msg(f"total batches: {num_batches}")

    avg_loss = total_loss / num_batches
    avg_accuracy = total_accuracy / num_batches

    add_log_msg(f'Epoch {epoch} | Train Loss: {avg_loss:.4f} | Train Accuracy: {avg_accuracy:.4f}')

    
    # Validation Part
    model.eval()
    total_accuracy = 0.0
    total_loss = 0.0
    num_batches = 0


    with torch.no_grad():
        for batched_g in tqdm(dataloaders['valid'], desc="Validation", position=0, leave=True):
            loss, accuracy, _ = model_fn(batched_g, model, criterion, device, num_batches, which_type='validation')
            total_accuracy += accuracy.item()
            total_loss += loss.item()
            num_batches += 1

    avg_accuracy = total_accuracy / num_batches
    current_loss = total_loss / num_batches
    
    add_log_msg(f'Validation Loss: {current_loss:.4f} | Validation Accuracy: {avg_accuracy:.4f}\n')
    
            
    if current_loss < best_val_loss:
        best_val_loss = current_loss
        waiting = 0
        
        if os.path.exists(best_model_path):
            os.remove(best_model_path)
            add_log_msg("Find a better model!!")

        torch.save(model.state_dict(), best_model_path)
 
    else:
        waiting += 1
        if waiting >= patience:
            add_log_msg("============================== Early stopping ==================================")
            break

  0%|          | 0/100 [00:00<?, ?it/s]

Training:   0%|          | 0/149 [00:00<?, ?it/s]

02/01/2024, 14:54:07# total batches: 149
02/01/2024, 14:54:07# Epoch 0 | Train Loss: 0.4355 | Train Accuracy: 0.8124


Validation:   0%|          | 0/50 [00:00<?, ?it/s]

02/01/2024, 14:54:11# Validation Loss: 0.3732 | Validation Accuracy: 0.8277

02/01/2024, 14:54:11# Find a better model!!


Training:   0%|          | 0/149 [00:00<?, ?it/s]

02/01/2024, 14:54:24# total batches: 149
02/01/2024, 14:54:24# Epoch 1 | Train Loss: 0.3554 | Train Accuracy: 0.8367


Validation:   0%|          | 0/50 [00:00<?, ?it/s]

02/01/2024, 14:54:28# Validation Loss: 0.3367 | Validation Accuracy: 0.8403

02/01/2024, 14:54:28# Find a better model!!


Training:   0%|          | 0/149 [00:00<?, ?it/s]

02/01/2024, 14:54:40# total batches: 149
02/01/2024, 14:54:40# Epoch 2 | Train Loss: 0.3341 | Train Accuracy: 0.8403


Validation:   0%|          | 0/50 [00:00<?, ?it/s]

02/01/2024, 14:54:43# Validation Loss: 0.3250 | Validation Accuracy: 0.8425

02/01/2024, 14:54:43# Find a better model!!


Training:   0%|          | 0/149 [00:00<?, ?it/s]

02/01/2024, 14:54:55# total batches: 149
02/01/2024, 14:54:55# Epoch 3 | Train Loss: 0.3243 | Train Accuracy: 0.8435


Validation:   0%|          | 0/50 [00:00<?, ?it/s]

02/01/2024, 14:54:59# Validation Loss: 0.3197 | Validation Accuracy: 0.8445

02/01/2024, 14:54:59# Find a better model!!


Training:   0%|          | 0/149 [00:00<?, ?it/s]

02/01/2024, 14:55:11# total batches: 149
02/01/2024, 14:55:11# Epoch 4 | Train Loss: 0.3201 | Train Accuracy: 0.8448


Validation:   0%|          | 0/50 [00:00<?, ?it/s]

02/01/2024, 14:55:15# Validation Loss: 0.3154 | Validation Accuracy: 0.8472

02/01/2024, 14:55:15# Find a better model!!


Training:   0%|          | 0/149 [00:00<?, ?it/s]

02/01/2024, 14:55:27# total batches: 149
02/01/2024, 14:55:27# Epoch 5 | Train Loss: 0.3167 | Train Accuracy: 0.8457


Validation:   0%|          | 0/50 [00:00<?, ?it/s]

02/01/2024, 14:55:31# Validation Loss: 0.3130 | Validation Accuracy: 0.8474

02/01/2024, 14:55:31# Find a better model!!


Training:   0%|          | 0/149 [00:00<?, ?it/s]

02/01/2024, 14:55:43# total batches: 149
02/01/2024, 14:55:43# Epoch 6 | Train Loss: 0.3152 | Train Accuracy: 0.8459


Validation:   0%|          | 0/50 [00:00<?, ?it/s]

02/01/2024, 14:55:47# Validation Loss: 0.3128 | Validation Accuracy: 0.8473

02/01/2024, 14:55:47# Find a better model!!


Training:   0%|          | 0/149 [00:00<?, ?it/s]

02/01/2024, 14:55:59# total batches: 149
02/01/2024, 14:55:59# Epoch 7 | Train Loss: 0.3138 | Train Accuracy: 0.8464


Validation:   0%|          | 0/50 [00:00<?, ?it/s]

02/01/2024, 14:56:03# Validation Loss: 0.3116 | Validation Accuracy: 0.8474

02/01/2024, 14:56:03# Find a better model!!


Training:   0%|          | 0/149 [00:00<?, ?it/s]

02/01/2024, 14:56:14# total batches: 149
02/01/2024, 14:56:14# Epoch 8 | Train Loss: 0.3133 | Train Accuracy: 0.8466


Validation:   0%|          | 0/50 [00:00<?, ?it/s]

02/01/2024, 14:56:18# Validation Loss: 0.3109 | Validation Accuracy: 0.8477

02/01/2024, 14:56:18# Find a better model!!


Training:   0%|          | 0/149 [00:00<?, ?it/s]

02/01/2024, 14:56:29# total batches: 149
02/01/2024, 14:56:29# Epoch 9 | Train Loss: 0.3133 | Train Accuracy: 0.8467


Validation:   0%|          | 0/50 [00:00<?, ?it/s]

02/01/2024, 14:56:33# Validation Loss: 0.3103 | Validation Accuracy: 0.8481

02/01/2024, 14:56:33# Find a better model!!


Training:   0%|          | 0/149 [00:00<?, ?it/s]

02/01/2024, 14:56:45# total batches: 149
02/01/2024, 14:56:45# Epoch 10 | Train Loss: 0.3131 | Train Accuracy: 0.8466


Validation:   0%|          | 0/50 [00:00<?, ?it/s]

02/01/2024, 14:56:48# Validation Loss: 0.3116 | Validation Accuracy: 0.8476



Training:   0%|          | 0/149 [00:00<?, ?it/s]

02/01/2024, 14:57:00# total batches: 149
02/01/2024, 14:57:00# Epoch 11 | Train Loss: 0.3132 | Train Accuracy: 0.8467


Validation:   0%|          | 0/50 [00:00<?, ?it/s]

02/01/2024, 14:57:04# Validation Loss: 0.3110 | Validation Accuracy: 0.8478



Training:   0%|          | 0/149 [00:00<?, ?it/s]

02/01/2024, 14:57:16# total batches: 149
02/01/2024, 14:57:16# Epoch 12 | Train Loss: 0.3129 | Train Accuracy: 0.8466


Validation:   0%|          | 0/50 [00:00<?, ?it/s]

02/01/2024, 14:57:20# Validation Loss: 0.3113 | Validation Accuracy: 0.8477



Training:   0%|          | 0/149 [00:00<?, ?it/s]

02/01/2024, 14:57:31# total batches: 149
02/01/2024, 14:57:31# Epoch 13 | Train Loss: 0.3123 | Train Accuracy: 0.8469


Validation:   0%|          | 0/50 [00:00<?, ?it/s]

02/01/2024, 14:57:35# Validation Loss: 0.3099 | Validation Accuracy: 0.8482

02/01/2024, 14:57:35# Find a better model!!


Training:   0%|          | 0/149 [00:00<?, ?it/s]

02/01/2024, 14:57:47# total batches: 149
02/01/2024, 14:57:47# Epoch 14 | Train Loss: 0.3118 | Train Accuracy: 0.8469


Validation:   0%|          | 0/50 [00:00<?, ?it/s]

02/01/2024, 14:57:52# Validation Loss: 0.3105 | Validation Accuracy: 0.8476



Training:   0%|          | 0/149 [00:00<?, ?it/s]

02/01/2024, 14:58:04# total batches: 149
02/01/2024, 14:58:04# Epoch 15 | Train Loss: 0.3115 | Train Accuracy: 0.8470


Validation:   0%|          | 0/50 [00:00<?, ?it/s]

02/01/2024, 14:58:08# Validation Loss: 0.3105 | Validation Accuracy: 0.8482



Training:   0%|          | 0/149 [00:00<?, ?it/s]

02/01/2024, 14:58:20# total batches: 149
02/01/2024, 14:58:20# Epoch 16 | Train Loss: 0.3110 | Train Accuracy: 0.8471


Validation:   0%|          | 0/50 [00:00<?, ?it/s]

02/01/2024, 14:58:24# Validation Loss: 0.3090 | Validation Accuracy: 0.8482

02/01/2024, 14:58:24# Find a better model!!


Training:   0%|          | 0/149 [00:00<?, ?it/s]

02/01/2024, 14:58:36# total batches: 149
02/01/2024, 14:58:36# Epoch 17 | Train Loss: 0.3098 | Train Accuracy: 0.8473


Validation:   0%|          | 0/50 [00:00<?, ?it/s]

02/01/2024, 14:58:40# Validation Loss: 0.3080 | Validation Accuracy: 0.8477

02/01/2024, 14:58:40# Find a better model!!


Training:   0%|          | 0/149 [00:00<?, ?it/s]

02/01/2024, 14:58:52# total batches: 149
02/01/2024, 14:58:52# Epoch 18 | Train Loss: 0.3093 | Train Accuracy: 0.8473


Validation:   0%|          | 0/50 [00:00<?, ?it/s]

02/01/2024, 14:58:56# Validation Loss: 0.3078 | Validation Accuracy: 0.8481

02/01/2024, 14:58:56# Find a better model!!


Training:   0%|          | 0/149 [00:00<?, ?it/s]

02/01/2024, 14:59:07# total batches: 149
02/01/2024, 14:59:07# Epoch 19 | Train Loss: 0.3080 | Train Accuracy: 0.8477


Validation:   0%|          | 0/50 [00:00<?, ?it/s]

02/01/2024, 14:59:10# Validation Loss: 0.3073 | Validation Accuracy: 0.8478

02/01/2024, 14:59:10# Find a better model!!


Training:   0%|          | 0/149 [00:00<?, ?it/s]

02/01/2024, 14:59:22# total batches: 149
02/01/2024, 14:59:22# Epoch 20 | Train Loss: 0.3081 | Train Accuracy: 0.8475


Validation:   0%|          | 0/50 [00:00<?, ?it/s]

02/01/2024, 14:59:26# Validation Loss: 0.3083 | Validation Accuracy: 0.8475



Training:   0%|          | 0/149 [00:00<?, ?it/s]

02/01/2024, 14:59:38# total batches: 149
02/01/2024, 14:59:38# Epoch 21 | Train Loss: 0.3073 | Train Accuracy: 0.8477


Validation:   0%|          | 0/50 [00:00<?, ?it/s]

02/01/2024, 14:59:42# Validation Loss: 0.3081 | Validation Accuracy: 0.8465



Training:   0%|          | 0/149 [00:00<?, ?it/s]

02/01/2024, 14:59:53# total batches: 149
02/01/2024, 14:59:53# Epoch 22 | Train Loss: 0.3068 | Train Accuracy: 0.8480


Validation:   0%|          | 0/50 [00:00<?, ?it/s]

02/01/2024, 14:59:57# Validation Loss: 0.3064 | Validation Accuracy: 0.8483

02/01/2024, 14:59:57# Find a better model!!


Training:   0%|          | 0/149 [00:00<?, ?it/s]

02/01/2024, 15:00:10# total batches: 149
02/01/2024, 15:00:10# Epoch 23 | Train Loss: 0.3060 | Train Accuracy: 0.8480


Validation:   0%|          | 0/50 [00:00<?, ?it/s]

02/01/2024, 15:00:14# Validation Loss: 0.3061 | Validation Accuracy: 0.8483

02/01/2024, 15:00:14# Find a better model!!


Training:   0%|          | 0/149 [00:00<?, ?it/s]

02/01/2024, 15:00:27# total batches: 149
02/01/2024, 15:00:27# Epoch 24 | Train Loss: 0.3051 | Train Accuracy: 0.8485


Validation:   0%|          | 0/50 [00:00<?, ?it/s]

02/01/2024, 15:00:31# Validation Loss: 0.3058 | Validation Accuracy: 0.8482

02/01/2024, 15:00:31# Find a better model!!


Training:   0%|          | 0/149 [00:00<?, ?it/s]

02/01/2024, 15:00:45# total batches: 149
02/01/2024, 15:00:45# Epoch 25 | Train Loss: 0.3050 | Train Accuracy: 0.8482


Validation:   0%|          | 0/50 [00:00<?, ?it/s]

02/01/2024, 15:00:50# Validation Loss: 0.3049 | Validation Accuracy: 0.8485

02/01/2024, 15:00:50# Find a better model!!


Training:   0%|          | 0/149 [00:00<?, ?it/s]

02/01/2024, 15:01:06# total batches: 149
02/01/2024, 15:01:06# Epoch 26 | Train Loss: 0.3048 | Train Accuracy: 0.8484


Validation:   0%|          | 0/50 [00:00<?, ?it/s]

02/01/2024, 15:01:10# Validation Loss: 0.3047 | Validation Accuracy: 0.8485

02/01/2024, 15:01:10# Find a better model!!


Training:   0%|          | 0/149 [00:00<?, ?it/s]

02/01/2024, 15:01:23# total batches: 149
02/01/2024, 15:01:23# Epoch 27 | Train Loss: 0.3046 | Train Accuracy: 0.8483


Validation:   0%|          | 0/50 [00:00<?, ?it/s]

02/01/2024, 15:01:27# Validation Loss: 0.3048 | Validation Accuracy: 0.8486



Training:   0%|          | 0/149 [00:00<?, ?it/s]

02/01/2024, 15:01:39# total batches: 149
02/01/2024, 15:01:39# Epoch 28 | Train Loss: 0.3041 | Train Accuracy: 0.8486


Validation:   0%|          | 0/50 [00:00<?, ?it/s]

02/01/2024, 15:01:43# Validation Loss: 0.3048 | Validation Accuracy: 0.8485



Training:   0%|          | 0/149 [00:00<?, ?it/s]

02/01/2024, 15:01:55# total batches: 149
02/01/2024, 15:01:55# Epoch 29 | Train Loss: 0.3042 | Train Accuracy: 0.8485


Validation:   0%|          | 0/50 [00:00<?, ?it/s]

02/01/2024, 15:01:58# Validation Loss: 0.3046 | Validation Accuracy: 0.8485

02/01/2024, 15:01:58# Find a better model!!


Training:   0%|          | 0/149 [00:00<?, ?it/s]

02/01/2024, 15:02:11# total batches: 149
02/01/2024, 15:02:11# Epoch 30 | Train Loss: 0.3041 | Train Accuracy: 0.8485


Validation:   0%|          | 0/50 [00:00<?, ?it/s]

02/01/2024, 15:02:14# Validation Loss: 0.3050 | Validation Accuracy: 0.8483



Training:   0%|          | 0/149 [00:00<?, ?it/s]

02/01/2024, 15:02:27# total batches: 149
02/01/2024, 15:02:27# Epoch 31 | Train Loss: 0.3040 | Train Accuracy: 0.8485


Validation:   0%|          | 0/50 [00:00<?, ?it/s]

02/01/2024, 15:02:30# Validation Loss: 0.3045 | Validation Accuracy: 0.8486

02/01/2024, 15:02:30# Find a better model!!


Training:   0%|          | 0/149 [00:00<?, ?it/s]

02/01/2024, 15:02:42# total batches: 149
02/01/2024, 15:02:42# Epoch 32 | Train Loss: 0.3041 | Train Accuracy: 0.8485


Validation:   0%|          | 0/50 [00:00<?, ?it/s]

02/01/2024, 15:02:46# Validation Loss: 0.3051 | Validation Accuracy: 0.8482



Training:   0%|          | 0/149 [00:00<?, ?it/s]

02/01/2024, 15:02:59# total batches: 149
02/01/2024, 15:02:59# Epoch 33 | Train Loss: 0.3043 | Train Accuracy: 0.8486


Validation:   0%|          | 0/50 [00:00<?, ?it/s]

02/01/2024, 15:03:03# Validation Loss: 0.3046 | Validation Accuracy: 0.8483



Training:   0%|          | 0/149 [00:00<?, ?it/s]

02/01/2024, 15:03:16# total batches: 149
02/01/2024, 15:03:16# Epoch 34 | Train Loss: 0.3045 | Train Accuracy: 0.8483


Validation:   0%|          | 0/50 [00:00<?, ?it/s]

02/01/2024, 15:03:19# Validation Loss: 0.3044 | Validation Accuracy: 0.8486

02/01/2024, 15:03:19# Find a better model!!


Training:   0%|          | 0/149 [00:00<?, ?it/s]

02/01/2024, 15:03:31# total batches: 149
02/01/2024, 15:03:31# Epoch 35 | Train Loss: 0.3044 | Train Accuracy: 0.8483


Validation:   0%|          | 0/50 [00:00<?, ?it/s]

02/01/2024, 15:03:35# Validation Loss: 0.3049 | Validation Accuracy: 0.8486



Training:   0%|          | 0/149 [00:00<?, ?it/s]

02/01/2024, 15:03:48# total batches: 149
02/01/2024, 15:03:48# Epoch 36 | Train Loss: 0.3048 | Train Accuracy: 0.8482


Validation:   0%|          | 0/50 [00:00<?, ?it/s]

02/01/2024, 15:03:52# Validation Loss: 0.3052 | Validation Accuracy: 0.8485



Training:   0%|          | 0/149 [00:00<?, ?it/s]

02/01/2024, 15:04:04# total batches: 149
02/01/2024, 15:04:04# Epoch 37 | Train Loss: 0.3045 | Train Accuracy: 0.8482


Validation:   0%|          | 0/50 [00:00<?, ?it/s]

02/01/2024, 15:04:08# Validation Loss: 0.3048 | Validation Accuracy: 0.8481



Training:   0%|          | 0/149 [00:00<?, ?it/s]

02/01/2024, 15:04:20# total batches: 149
02/01/2024, 15:04:20# Epoch 38 | Train Loss: 0.3044 | Train Accuracy: 0.8481


Validation:   0%|          | 0/50 [00:00<?, ?it/s]

02/01/2024, 15:04:24# Validation Loss: 0.3050 | Validation Accuracy: 0.8485



Training:   0%|          | 0/149 [00:00<?, ?it/s]

02/01/2024, 15:04:36# total batches: 149
02/01/2024, 15:04:36# Epoch 39 | Train Loss: 0.3048 | Train Accuracy: 0.8481


Validation:   0%|          | 0/50 [00:00<?, ?it/s]

02/01/2024, 15:04:40# Validation Loss: 0.3038 | Validation Accuracy: 0.8486

02/01/2024, 15:04:40# Find a better model!!


Training:   0%|          | 0/149 [00:00<?, ?it/s]

02/01/2024, 15:04:53# total batches: 149
02/01/2024, 15:04:53# Epoch 40 | Train Loss: 0.3041 | Train Accuracy: 0.8483


Validation:   0%|          | 0/50 [00:00<?, ?it/s]

02/01/2024, 15:04:57# Validation Loss: 0.3046 | Validation Accuracy: 0.8485



Training:   0%|          | 0/149 [00:00<?, ?it/s]

02/01/2024, 15:05:10# total batches: 149
02/01/2024, 15:05:10# Epoch 41 | Train Loss: 0.3039 | Train Accuracy: 0.8484


Validation:   0%|          | 0/50 [00:00<?, ?it/s]

02/01/2024, 15:05:14# Validation Loss: 0.3053 | Validation Accuracy: 0.8486



Training:   0%|          | 0/149 [00:00<?, ?it/s]

02/01/2024, 15:05:26# total batches: 149
02/01/2024, 15:05:26# Epoch 42 | Train Loss: 0.3039 | Train Accuracy: 0.8482


Validation:   0%|          | 0/50 [00:00<?, ?it/s]

02/01/2024, 15:05:30# Validation Loss: 0.3037 | Validation Accuracy: 0.8487

02/01/2024, 15:05:30# Find a better model!!


Training:   0%|          | 0/149 [00:00<?, ?it/s]

02/01/2024, 15:05:42# total batches: 149
02/01/2024, 15:05:42# Epoch 43 | Train Loss: 0.3031 | Train Accuracy: 0.8486


Validation:   0%|          | 0/50 [00:00<?, ?it/s]

02/01/2024, 15:05:46# Validation Loss: 0.3040 | Validation Accuracy: 0.8486



Training:   0%|          | 0/149 [00:00<?, ?it/s]

02/01/2024, 15:05:59# total batches: 149
02/01/2024, 15:05:59# Epoch 44 | Train Loss: 0.3031 | Train Accuracy: 0.8485


Validation:   0%|          | 0/50 [00:00<?, ?it/s]

02/01/2024, 15:06:03# Validation Loss: 0.3044 | Validation Accuracy: 0.8485



Training:   0%|          | 0/149 [00:00<?, ?it/s]

02/01/2024, 15:06:15# total batches: 149
02/01/2024, 15:06:15# Epoch 45 | Train Loss: 0.3028 | Train Accuracy: 0.8485


Validation:   0%|          | 0/50 [00:00<?, ?it/s]

02/01/2024, 15:06:19# Validation Loss: 0.3037 | Validation Accuracy: 0.8486

02/01/2024, 15:06:19# Find a better model!!


Training:   0%|          | 0/149 [00:00<?, ?it/s]

02/01/2024, 15:06:31# total batches: 149
02/01/2024, 15:06:31# Epoch 46 | Train Loss: 0.3023 | Train Accuracy: 0.8488


Validation:   0%|          | 0/50 [00:00<?, ?it/s]

02/01/2024, 15:06:35# Validation Loss: 0.3033 | Validation Accuracy: 0.8487

02/01/2024, 15:06:35# Find a better model!!


Training:   0%|          | 0/149 [00:00<?, ?it/s]

02/01/2024, 15:06:47# total batches: 149
02/01/2024, 15:06:47# Epoch 47 | Train Loss: 0.3025 | Train Accuracy: 0.8485


Validation:   0%|          | 0/50 [00:00<?, ?it/s]

02/01/2024, 15:06:51# Validation Loss: 0.3031 | Validation Accuracy: 0.8488

02/01/2024, 15:06:51# Find a better model!!


Training:   0%|          | 0/149 [00:00<?, ?it/s]

02/01/2024, 15:07:04# total batches: 149
02/01/2024, 15:07:04# Epoch 48 | Train Loss: 0.3020 | Train Accuracy: 0.8488


Validation:   0%|          | 0/50 [00:00<?, ?it/s]

02/01/2024, 15:07:08# Validation Loss: 0.3033 | Validation Accuracy: 0.8486



Training:   0%|          | 0/149 [00:00<?, ?it/s]

02/01/2024, 15:07:20# total batches: 149
02/01/2024, 15:07:20# Epoch 49 | Train Loss: 0.3022 | Train Accuracy: 0.8488


Validation:   0%|          | 0/50 [00:00<?, ?it/s]

02/01/2024, 15:07:24# Validation Loss: 0.3029 | Validation Accuracy: 0.8488

02/01/2024, 15:07:24# Find a better model!!


Training:   0%|          | 0/149 [00:00<?, ?it/s]

02/01/2024, 15:07:36# total batches: 149
02/01/2024, 15:07:36# Epoch 50 | Train Loss: 0.3023 | Train Accuracy: 0.8487


Validation:   0%|          | 0/50 [00:00<?, ?it/s]

02/01/2024, 15:07:40# Validation Loss: 0.3036 | Validation Accuracy: 0.8484



Training:   0%|          | 0/149 [00:00<?, ?it/s]

02/01/2024, 15:07:52# total batches: 149
02/01/2024, 15:07:52# Epoch 51 | Train Loss: 0.3020 | Train Accuracy: 0.8489


Validation:   0%|          | 0/50 [00:00<?, ?it/s]

02/01/2024, 15:07:56# Validation Loss: 0.3034 | Validation Accuracy: 0.8485



Training:   0%|          | 0/149 [00:00<?, ?it/s]

02/01/2024, 15:08:09# total batches: 149
02/01/2024, 15:08:09# Epoch 52 | Train Loss: 0.3021 | Train Accuracy: 0.8487


Validation:   0%|          | 0/50 [00:00<?, ?it/s]

02/01/2024, 15:08:13# Validation Loss: 0.3028 | Validation Accuracy: 0.8489

02/01/2024, 15:08:13# Find a better model!!


Training:   0%|          | 0/149 [00:00<?, ?it/s]

02/01/2024, 15:08:26# total batches: 149
02/01/2024, 15:08:26# Epoch 53 | Train Loss: 0.3020 | Train Accuracy: 0.8489


Validation:   0%|          | 0/50 [00:00<?, ?it/s]

02/01/2024, 15:08:29# Validation Loss: 0.3034 | Validation Accuracy: 0.8486



Training:   0%|          | 0/149 [00:00<?, ?it/s]

02/01/2024, 15:08:42# total batches: 149
02/01/2024, 15:08:42# Epoch 54 | Train Loss: 0.3025 | Train Accuracy: 0.8487


Validation:   0%|          | 0/50 [00:00<?, ?it/s]

02/01/2024, 15:08:46# Validation Loss: 0.3031 | Validation Accuracy: 0.8485



Training:   0%|          | 0/149 [00:00<?, ?it/s]

02/01/2024, 15:08:58# total batches: 149
02/01/2024, 15:08:58# Epoch 55 | Train Loss: 0.3024 | Train Accuracy: 0.8487


Validation:   0%|          | 0/50 [00:00<?, ?it/s]

02/01/2024, 15:09:02# Validation Loss: 0.3029 | Validation Accuracy: 0.8488



Training:   0%|          | 0/149 [00:00<?, ?it/s]

02/01/2024, 15:09:14# total batches: 149
02/01/2024, 15:09:14# Epoch 56 | Train Loss: 0.3023 | Train Accuracy: 0.8487


Validation:   0%|          | 0/50 [00:00<?, ?it/s]

02/01/2024, 15:09:18# Validation Loss: 0.3038 | Validation Accuracy: 0.8485



Training:   0%|          | 0/149 [00:00<?, ?it/s]

02/01/2024, 15:09:30# total batches: 149
02/01/2024, 15:09:30# Epoch 57 | Train Loss: 0.3022 | Train Accuracy: 0.8487


Validation:   0%|          | 0/50 [00:00<?, ?it/s]

02/01/2024, 15:09:34# Validation Loss: 0.3030 | Validation Accuracy: 0.8487



### Testing Loop

In [136]:
# load the pretrained model
model.load_state_dict(torch.load(best_model_path))

model.to(device)
model.eval()

total = 0
correct = 0
count = 0

true_labels = []
predicted_labels = []

with torch.no_grad():
    for batched_g in tqdm(dataloaders['test'], desc="Testing", position=0, leave=True):
#         print(f"data:{data[1]}")
        loss, accuracy, predicted = model_fn(batched_g, model, criterion, device, count, which_type='test')
        labels = batched_g.edata['label'].to(device)
        
        true_labels.extend(labels.cpu().numpy())
        predicted_labels.extend(predicted.cpu().numpy())
            
        count += 1
        
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

add_log_msg(f'Test Accuracy: {100 * correct / total} %\n\n\n')

Testing:   0%|          | 0/22 [00:00<?, ?it/s]

02/01/2024, 15:09:36# Test Accuracy: 84.75887392900857 %





In [137]:
report_data = classification_report(true_labels, predicted_labels, output_dict=True)
report_df = pd.DataFrame(report_data).transpose()

report_df

Unnamed: 0,precision,recall,f1-score,support
0,0.853069,0.980516,0.912363,49579.0
1,0.774773,0.284114,0.415765,11696.0
accuracy,0.847589,0.847589,0.847589,0.847589
macro avg,0.813921,0.632315,0.664064,61275.0
weighted avg,0.838124,0.847589,0.817574,61275.0


### Training

- Fix the seed and save the model.state_dict that contains the initial weight

In [67]:
# seed = 8787
# same_seeds(seed)

# model = Model(in_features=50, hidden_features=64, out_features=128, num_classes=167)
# torch.save(model.state_dict(), 'model3_initial(graphsage)/initial_weight.pth')

In [33]:
# # model.layer1.fc_self.weight
# model.sage.layer1.fc_self.weight

- Check if model really load the model_dict

In [34]:
# model = Model(in_features=50, hidden_features=64, out_features=128, num_classes=167)
# model.load_state_dict(torch.load('model3_initial(graphsage)/initial_weight.pth'))
# model.sage.layer1.fc_self.weight

- For release the GPU memory
    - no need to restart the kernel

In [35]:
# # For release the GPU memory
# # No need to restart the kernel

# import gc
# gc.collect()
# torch.cuda.empty_cache()

In [None]:
# # ======================================== handlig the output excel files ========================================
# mapping_file = './new_mapping.txt'
# label_mapping = {}
# with open(mapping_file, 'r') as f:
#     for line in f:
#         parts = line.strip().split(': ')
#         label_mapping[int(parts[1])] = parts[0]
        
# # 将映射后的标签应用到true和predicted标签列表
# mapped_true_labels = [label_mapping[label] for label in true_labels]
# mapped_predicted_labels = [label_mapping[label] for label in predicted_labels]

# # 生成Scikit-learn报告信息的DataFrame
# report_data = classification_report(mapped_true_labels, mapped_predicted_labels, output_dict=True)
# report_df = pd.DataFrame(report_data).transpose()

# # mapped_true_labels_np = np.array(mapped_true_labels)
# # mapped_predicted_labels_np = np.array(mapped_predicted_labels)

# # print("mapped_true_labels 的形状:", mapped_true_labels_np.shape)
# # print("mapped_predicted_labels 的形状:", mapped_predicted_labels_np.shape)

# report_folder = 'classification_report'
# os.makedirs(report_folder, exist_ok=True)

# count = 0
# while True:
#     report_filename = f'classification_report-transE_50-graphSAGE-{count}.xlsx'
#     labels_filename = f'mapped_true_predicted_labels-transE_50-graphSAGE-{count}.xlsx'
    
#     report_path = os.path.join(report_folder, report_filename)
#     labels_path = os.path.join(report_folder, labels_filename)
    
#     if not os.path.exists(report_path) and not os.path.exists(labels_path):
#         break
#     count += 1

    
# report_df.to_excel(report_path, index_label='Label')

# mapped_labels_df = pd.DataFrame({'true_label': mapped_true_labels, 'predicted_label': mapped_predicted_labels})
# mapped_labels_df.to_excel(labels_path, index=False)

# add_log_msg(f"report path: {report_path}")
# add_log_msg(f"label path: {labels_path}")

# mapped_report = classification_report(mapped_true_labels, mapped_predicted_labels)
# add_log_msg(f"mapped_report:\n{mapped_report}")