# Test of GraphSAGE
- consider the edge embedding with secureBERT_150
- use DGL
- predict `graphs`
- valid, test data are in the training dataset

In [22]:
import os
import dgl
import json
import torch
import torch as th
import dgl.nn as dglnn
# from tqdm import tqdm
from tqdm.notebook import tqdm  # 使用 notebook 版本的 tqdm
import torch.nn as nn
from dgl.nn import GraphConv, GATConv, SAGEConv
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from transformers import get_linear_schedule_with_warmup

- check the GPU and assign the GPU by the best memory usage

In [2]:
import subprocess
import torch

def get_free_gpu():
    try:
        # Run nvidia-smi command to get GPU details
        _output_to_list = lambda x: x.decode('ascii').split('\n')[:-1]
        command = "nvidia-smi --query-gpu=memory.free --format=csv,nounits,noheader"
        memory_free_info = _output_to_list(subprocess.check_output(command.split())) 
        memory_free_values = [int(x) for i, x in enumerate(memory_free_info)]
        
        # Get the GPU with the maximum free memory
        best_gpu_id = memory_free_values.index(max(memory_free_values))
        return best_gpu_id
    except:
        # If any exception occurs, default to GPU 0 (this handles cases where nvidia-smi isn't installed)
        return 0

if torch.cuda.is_available():
    # Get the best GPU ID based on free memory and set it
    best_gpu_id = get_free_gpu()
    device = torch.device(f"cuda:{best_gpu_id}")
else:
    device = torch.device("cpu")
    print("there's no available GPU")

# device = torch.device(f"cuda:{1}")
print(device)


cuda:0


## Fix the seed

In [25]:
device = f"cuda:{1}"
print(device)

cuda:1


In [3]:
import numpy as np
import torch
import random

#fix seed
def same_seeds(seed = 8787):
    torch.manual_seed(seed)
    # random.seed(seed) 
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)  
    np.random.seed(seed)  
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True

## Data Loader

In [4]:
class GraphDataset(Dataset):
    def __init__(self, data_list, device):
        self.data_list = data_list
        self.device = device

    def __len__(self):
        return len(self.data_list)
    
    def __getitem__(self, idx):
        data = self.data_list[idx]
        return data

def collate(samples):
    data_list = samples
    batched_graphs = []
    for data in data_list:
        g = dgl.graph((th.tensor(data["edge_index"][0]), th.tensor(data["edge_index"][1])), num_nodes=data["num_nodes"])

        g.ndata['feat'] = th.tensor(data["node_feat"])
        g.edata['feat'] = th.tensor(data["edge_attr"])
        g.edata['label'] = th.tensor(data["labels"])  # Add edge labels to graph

        batched_graphs.append(g)
    
    return dgl.batch(batched_graphs)

In [26]:
# datasets = ['train', 'valid', 'test']
datasets = ['test'] 
dataset_data = {}

for dataset_name in tqdm(datasets):
    file_path = f"/workdir/home/bai/Euni_HO_modified/data/training_data/secureBERT_150/{dataset_name}.jsonl"
    
    print(file_path)
    with open(file_path) as f:
        data_list = [json.loads(line) for line in tqdm(f, position=0, leave=True)]
    
    dataset_data[dataset_name] = GraphDataset(data_list, device)

print("Datasets loaded!")

  0%|          | 0/1 [00:00<?, ?it/s]

/workdir/home/bai/Euni_HO_modified/data/training_data/secureBERT_150/test.jsonl


0it [00:00, ?it/s]

Datasets loaded!


- choose batch size

In [27]:
def create_dataloaders(batch_size, shuffle=True):
    dataloaders = {}
    for dataset_name, dataset in dataset_data.items():
        # do not shuffle the testing dataset
        if dataset_name == "test":
            dataloaders[dataset_name] = DataLoader(dataset, batch_size=batch_size, shuffle=False, collate_fn=collate)    
        else:
            dataloaders[dataset_name] = DataLoader(dataset, batch_size=batch_size, shuffle=shuffle, collate_fn=collate)
    return dataloaders

dataloaders = create_dataloaders(32)

- check the dimension of the node feature

In [7]:
dataset_data['train'][16]['labels']

[0, 0, 0, 0, 0, 0, 0, 0, 0]

In [8]:
dataset_data['train'][15]['labels']

[0, 0, 0]

In [9]:
len(dataset_data['train'][15]['node_feat'])

3

In [10]:
len(dataset_data['train'][15]['node_feat'][1])

150

- check the dimension of the edge feature

In [11]:
edge_embedding_dim = 0

# Assuming dataloaders is a dictionary with 'test' as one of the keys
for batch in dataloaders['train']:
    # Your batch processing code here
    print(batch, "\n")
#     print("edata:", batch.edata, '\n')
    print("edata['feat'] size:", batch.edata['feat'].shape, '\n')
    print("edata['label']:", batch.edata['label'])

    edge_embedding_dim = batch.edata['feat'].shape[1]

    break  # To break out after the first batch if needed

print("\n\nedge embedding dimension: ", edge_embedding_dim)

Graph(num_nodes=15228, num_edges=47224,
      ndata_schemes={'feat': Scheme(shape=(150,), dtype=torch.float32)}
      edata_schemes={'feat': Scheme(shape=(768,), dtype=torch.float32), 'label': Scheme(shape=(), dtype=torch.int64)}) 

edata['feat'] size: torch.Size([47224, 768]) 

edata['label']: tensor([0, 0, 0,  ..., 0, 0, 0])


edge embedding dimension:  768


- Turn the print message to a log file

In [12]:
import datetime

now = datetime.datetime.now()

formatted_time = now.strftime("%m%d_%H:%M")

log_file_path = f"../log_message/{formatted_time}_GraphSAGE_secureBERT_150-withedge.log"

def add_log_msg(msg, log_file_path=log_file_path):
    with open(log_file_path, 'a') as f:
        f.write(f'{datetime.datetime.now().strftime("%m/%d/%Y, %H:%M:%S")}# {msg}\n')
    print(f'{datetime.datetime.now().strftime("%m/%d/%Y, %H:%M:%S")}# {msg}')

print(log_file_path)

../log_message/0108_14:57_GraphSAGE_secureBERT_150-withedge.log


### Model

In [13]:
class GraphSAGE(nn.Module):
    def __init__(self, in_dim, hidden_dim, out_dim):
        super(GraphSAGE, self).__init__()
        self.layer1 = dglnn.SAGEConv(in_dim, hidden_dim, 'pool')
        self.layer2 = dglnn.SAGEConv(hidden_dim, out_dim, 'pool')
#         self.dropout = nn.Dropout(0.25)

    def forward(self, g, inputs):
        h = self.layer1(g, inputs)
        h = torch.relu(h)
#         h = self.dropout(h)
        new_node_feat = self.layer2(g, h)
    
        return new_node_feat

In [14]:
class MLPPredictor(nn.Module):
    def __init__(self, out_feats, out_classes, edge_embedding_dim):
        super().__init__()
        self.W = nn.Linear(out_feats*2 + edge_embedding_dim, out_classes)

    def apply_edges(self, edges, edge_feat):
#     def apply_edges(self, edges):

        h_u = edges.src['new_node_feat']
        h_v = edges.dst['new_node_feat']
        
        num_edges, edge_feat_dim = edge_feat.shape
#         print(num_edges, edge_feat_dim)
        
        h_e = edge_feat
        
        # concat 3 features
#         test = torch.cat([h_u, h_v, h_e],1)
#         print("with edge: ", test.shape)
        
#         test = torch.cat([h_u, h_v],1)
#         print("without edge: ", test.shape)
        
        score = self.W(torch.cat([h_u, h_v, h_e], 1))
#         score = self.W(torch.cat([h_u, h_v], 1))

        return {'score': score}


    def forward(self, graph, new_node_feat, edge_feat):
        with graph.local_scope():
            graph.ndata['new_node_feat'] = new_node_feat
#             graph.apply_edges(self.apply_edges)

            # 在 apply_edges 时传递 edge_feat
            graph.apply_edges(lambda edges: self.apply_edges(edges, edge_feat))
            return graph.edata['score']

In [15]:
class Model(nn.Module):
    def __init__(self, in_features, hidden_features, out_features, num_classes, edge_embedding_dim):
        super().__init__()
        self.sage = GraphSAGE(in_features, hidden_features, out_features)
        self.pred = MLPPredictor(out_features, num_classes, edge_embedding_dim)
      
    def forward(self, g, node_feat, edge_feat, return_logits=False):
        new_node_feat = self.sage(g, node_feat)
        logits = self.pred(g, new_node_feat, edge_feat)
        
        return logits

- Model Forward  

In [56]:
def model_fn(batched_g, model, criterion, device, count=1, which_type='train'):
    """Forward a batch through the model."""
#     batched_g, labels = data
    batched_g = batched_g.to(device)
    
    labels = batched_g.edata['label'].to(device)
    
#     logits = model(batched_g, batched_g.ndata['feat'].float())
    logits = model(batched_g, batched_g.ndata['feat'].float(), batched_g.edata['feat'].float())

#     print(labels.shape)
#     print(logits.shape)
    
#     print(0/0)
    loss = criterion(logits, labels)

    output = torch.softmax(logits, dim=1)
    preds = output.argmax(1)
    
    # Compute accuracy
    accuracy = torch.mean((preds == labels).float())
        
    return loss, accuracy, preds

### Training

- Fix the seed and save the model.state_dict that contains the initial weight

In [None]:
seed = 8787
same_seeds(seed)

model = Model(in_features=150, hidden_features=64, out_features=128, num_classes=167)
torch.save(model.state_dict(), 'model3_initial(graphsage)/initial_weight.pth')

In [None]:
# model.layer1.fc_self.weight
model.sage.layer1.fc_self.weight

- Check if model really load the model_dict

In [None]:
model = Model(in_features=150, hidden_features=64, out_features=128, num_classes=167)
model.load_state_dict(torch.load('model3_initial(graphsage)/initial_weight.pth'))
model.sage.layer1.fc_self.weight

In [57]:
import csv
import pandas as pd
from sklearn.metrics import classification_report
from torch.optim import AdamW, lr_scheduler

seed = 8787
same_seeds(seed)

# model = GraphSAGE(in_dim=50, hidden_dim=16, out_dim=167)
model = Model(in_features=150, hidden_features=64, out_features=128, num_classes=2, edge_embedding_dim = edge_embedding_dim)
# in_dim means the dimension of the node_feat(50 dim, since the 50-dim embedding)
# out_dim means the # of the categories -> 168 for out tasks
# model.load_state_dict(torch.load('model3_initial(graphsage)/initial_weight.pth'))
best_model_path = "../checkpoint_graphSAGE-DARPA/best_model_GraphSAGE_secureBERT_150.pt"

model = model.to(device)

# optimizer = torch.optim.AdamW(model.parameters(), lr=1e-5)
optimizer = AdamW(model.parameters(), lr=5e-4)
# scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=18, num_training_steps=total_steps)

# T_max control the period of the lr changing -> set 1/10 first
scheduler = lr_scheduler.CosineAnnealingLR(optimizer, T_max=36, eta_min=0, last_epoch=- 1, verbose=False)

criterion = nn.CrossEntropyLoss()
# criterion = torch.nn.BCEWithLogitsLoss()

total_steps = 20

# save the best model
best_val_loss = float('inf')
patience = 4  # Number of epochs with no improvement after which training will be stopped.
waiting = 0  # The number of epochs with no improvement so far.


# Training Part
for epoch in tqdm(range(total_steps)):
    # Train
    model.train()
    total_loss = 0.0
    total_accuracy = 0.0
    num_batches = 0
    
    for batched_g in tqdm(dataloaders['train'], desc="Training", position=0, leave=True):
        num_batches += 1
        loss, accuracy, _ = model_fn(batched_g, model, criterion, device, num_batches, which_type='train')
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        total_accuracy += accuracy.item()

        
#     scheduler.step()
    add_log_msg(f"total batches: {num_batches}")

    avg_loss = total_loss / num_batches
    avg_accuracy = total_accuracy / num_batches

    add_log_msg(f'Epoch {epoch} | Train Loss: {avg_loss:.4f} | Train Accuracy: {avg_accuracy:.4f}')

    
    # Validation Part
    model.eval()
    total_accuracy = 0.0
    total_loss = 0.0
    num_batches = 0


    with torch.no_grad():
        for batched_g in tqdm(dataloaders['valid'], desc="Validation", position=0, leave=True):
            loss, accuracy, _ = model_fn(batched_g, model, criterion, device, num_batches, which_type='validation')
            total_accuracy += accuracy.item()
            total_loss += loss.item()
            num_batches += 1

    avg_accuracy = total_accuracy / num_batches
    current_loss = total_loss / num_batches
    
    add_log_msg(f'Validation Loss: {current_loss:.4f} | Validation Accuracy: {avg_accuracy:.4f}\n')
    
            
    if current_loss < best_val_loss:
        best_val_loss = current_loss
        waiting = 0
        
        if os.path.exists(best_model_path):
            os.remove(best_model_path)
            add_log_msg("Find a better model!!")

        torch.save(model.state_dict(), best_model_path)

#         print(best_model_path)

    else:
        waiting += 1
        if waiting >= patience:
            add_log_msg("============================== Early stopping ==================================")
            break

  0%|          | 0/20 [00:00<?, ?it/s]

Training:   0%|          | 0/11 [00:00<?, ?it/s]

01/08/2024, 13:35:34# total batches: 11
01/08/2024, 13:35:34# Epoch 0 | Train Loss: 0.0321 | Train Accuracy: 0.9919


Validation:   0%|          | 0/2 [00:00<?, ?it/s]

01/08/2024, 13:35:35# Validation Loss: 0.0012 | Validation Accuracy: 0.9999

01/08/2024, 13:35:35# Find a better model!!


Training:   0%|          | 0/11 [00:00<?, ?it/s]

01/08/2024, 13:36:23# total batches: 11
01/08/2024, 13:36:23# Epoch 1 | Train Loss: 0.0076 | Train Accuracy: 0.9991


Validation:   0%|          | 0/2 [00:00<?, ?it/s]

01/08/2024, 13:36:24# Validation Loss: 0.0006 | Validation Accuracy: 0.9999

01/08/2024, 13:36:24# Find a better model!!


Training:   0%|          | 0/11 [00:00<?, ?it/s]

01/08/2024, 13:37:07# total batches: 11
01/08/2024, 13:37:07# Epoch 2 | Train Loss: 0.0065 | Train Accuracy: 0.9994


Validation:   0%|          | 0/2 [00:00<?, ?it/s]

01/08/2024, 13:37:08# Validation Loss: 0.0007 | Validation Accuracy: 0.9999



Training:   0%|          | 0/11 [00:00<?, ?it/s]

01/08/2024, 13:37:53# total batches: 11
01/08/2024, 13:37:53# Epoch 3 | Train Loss: 0.0048 | Train Accuracy: 0.9995


Validation:   0%|          | 0/2 [00:00<?, ?it/s]

01/08/2024, 13:37:54# Validation Loss: 0.0008 | Validation Accuracy: 0.9998



Training:   0%|          | 0/11 [00:00<?, ?it/s]

01/08/2024, 13:38:37# total batches: 11
01/08/2024, 13:38:37# Epoch 4 | Train Loss: 0.0047 | Train Accuracy: 0.9995


Validation:   0%|          | 0/2 [00:00<?, ?it/s]

01/08/2024, 13:38:38# Validation Loss: 0.0004 | Validation Accuracy: 0.9999

01/08/2024, 13:38:38# Find a better model!!


Training:   0%|          | 0/11 [00:00<?, ?it/s]

01/08/2024, 13:39:22# total batches: 11
01/08/2024, 13:39:22# Epoch 5 | Train Loss: 0.0051 | Train Accuracy: 0.9994


Validation:   0%|          | 0/2 [00:00<?, ?it/s]

01/08/2024, 13:39:23# Validation Loss: 0.0008 | Validation Accuracy: 0.9998



Training:   0%|          | 0/11 [00:00<?, ?it/s]

RuntimeError: CUDA out of memory. Tried to allocate 3.78 GiB (GPU 0; 10.76 GiB total capacity; 4.60 GiB already allocated; 3.70 GiB free; 5.80 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

### test of valid and test part is ``graph``

- 60 APs in training x 10000times
- 5 APs in validation x 4 times
- 3 APs in test x 4 times
- Batch size = 4

In [21]:


# load the pretrained model
pretrained_model_path = '../checkpoint_graphSAGE-DARPA/best_model_GraphSAGE_secureBERT_150.pt'
model.load_state_dict(torch.load(pretrained_model_path))

model.to(device)
model.eval()

total = 0
correct = 0
count = 0

true_labels = []
predicted_labels = []

with torch.no_grad():
    for batched_g in tqdm(dataloaders['test'], desc="Testing", position=0, leave=True):
        loss, accuracy, predicted = model_fn(batched_g, model, criterion, device, count, which_type='test')
        labels = batched_g.edata['label'].to(device)
        
        true_labels.extend(labels.cpu().numpy())
        predicted_labels.extend(predicted.cpu().numpy())
        
        if count % 5000 == 0:
            add_log_msg(f"labels: {labels} {labels.shape}")
            add_log_msg(f"predicted: {predicted} {predicted.shape}")
            
        count += 1
        
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

add_log_msg(f'Test Accuracy: {100 * correct / total} %\n\n\n')

Testing:   0%|          | 0/2 [00:00<?, ?it/s]

01/08/2024, 12:47:28# labels: tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0') torch.Size([538051])
01/08/2024, 12:47:28# predicted: tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0') torch.Size([538051])
01/08/2024, 12:47:28# Test Accuracy: 99.96302015680197 %





- observe the prediction

In [22]:
from collections import Counter


# Count occurrences of 0s and 1s in predicted_labels
true_counter = Counter(true_labels)
# Print the counts
add_log_msg(f"Count of true 0s: {true_counter[0]}")
add_log_msg(f"Count of true 1s: {true_counter[1]}\n")


# Count occurrences of 0s and 1s in predicted_labels
predicted_counter = Counter(predicted_labels)

# Print the counts
add_log_msg(f"Count of predicted 0s: {predicted_counter[0]}")
add_log_msg(f"Count of predicted 1s: {predicted_counter[1]}\n")

01/08/2024, 12:51:11# Count of true 0s: 537932
01/08/2024, 12:51:11# Count of true 1s: 199

01/08/2024, 12:51:11# Count of predicted 0s: 538131
01/08/2024, 12:51:11# Count of predicted 1s: 0



- the classification report

In [None]:
# 生成Scikit-learn报告信息的DataFrame
report_data = classification_report(true_labels, predicted_labels, output_dict=True)
report_df = pd.DataFrame(report_data).transpose()

report_folder = 'classification_report'
os.makedirs(report_folder, exist_ok=True)

count = 0
while True:
    report_filename = f'classification_report-secureBERT_150-graphSAGE-{count}.xlsx'
    labels_filename = f'true_predicted_labels-secureBERT_150-graphSAGE-{count}.xlsx'
    
    report_path = os.path.join(report_folder, report_filename)
    labels_path = os.path.join(report_folder, labels_filename)
    
    if not os.path.exists(report_path) and not os.path.exists(labels_path):
        break
    count += 1

report_df.to_excel(report_path, index_label='Label')

labels_df = pd.DataFrame({'true_label': true_labels, 'predicted_label': predicted_labels})
labels_df.to_excel(labels_path, index=False)

add_log_msg(f"report path: {report_path}")
add_log_msg(f"label path: {labels_path}")

plain_report = classification_report(true_labels, predicted_labels)
add_log_msg(f"plain_report:\n{plain_report}")


### For binary class classification

- turn the labels to one-hot encoding format

In [29]:
import torch

def to_one_hot(labels, num_classes=2):
    return torch.nn.functional.one_hot(labels, num_classes=num_classes)

# 示例
labels = torch.tensor([0, 1, 0, 1])
one_hot_labels = to_one_hot(labels)
print(one_hot_labels)


tensor([[1, 0],
        [0, 1],
        [1, 0],
        [0, 1]])


In [42]:
import torch
import torch.nn as nn

def model_fn(batched_g, model, criterion, device, count=1, which_type='train'):
    batched_g = batched_g.to(device)
    labels = batched_g.edata['label'].to(device)
    logits = model(batched_g, batched_g.ndata['feat'].float(), batched_g.edata['feat'].float())

    # 计算类别权重
    pos_weight = compute_class_weight(labels)
    criterion = nn.BCEWithLogitsLoss(pos_weight=pos_weight.to(device))
    
#     print(labels.shape)
#     print(logits.shape)
    
    labels = to_one_hot(labels).float()
#     print(labels.shape)

#     print(0/0)
    
    loss = criterion(logits, labels)

    output = torch.sigmoid(logits)  # for BCEWithLogitsLoss, use sigmoid
    preds = (output > 0.5).float() 
#     print(preds)
    
    # Compute accuracy
    accuracy = torch.mean((preds == labels).float())

    return loss, accuracy, preds

def compute_class_weight(labels):
    # 计算权重
    # 计算标签中 1 的频率
    pos = labels.sum(dim=0)
    print("pos: ", pos)
    
    # 计算标签中 0 的频率
    neg = (1 - labels).sum(dim=0)
    print("neg: ", neg)
    
    # 计算权重
    weight = neg / (pos + 1e-5)
    
    print("weight: ", weight, "\n")
    return weight


In [20]:
import csv
import pandas as pd
from sklearn.metrics import classification_report
from torch.optim import AdamW, lr_scheduler

seed = 8787
same_seeds(seed)

# model = GraphSAGE(in_dim=50, hidden_dim=16, out_dim=167)
model = Model(in_features=150, hidden_features=64, out_features=128, num_classes=2, edge_embedding_dim = edge_embedding_dim)
# in_dim means the dimension of the node_feat(50 dim, since the 50-dim embedding)
# out_dim means the # of the categories -> 168 for out tasks
# model.load_state_dict(torch.load('model3_initial(graphsage)/initial_weight.pth'))
best_model_path = "../checkpoint_graphSAGE-DARPA/best_model_GraphSAGE_secureBERT_150.pt"

model = model.to(device)

# optimizer = torch.optim.AdamW(model.parameters(), lr=1e-5)
optimizer = AdamW(model.parameters(), lr=5e-4)
# scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=18, num_training_steps=total_steps)

# T_max control the period of the lr changing -> set 1/10 first
scheduler = lr_scheduler.CosineAnnealingLR(optimizer, T_max=36, eta_min=0, last_epoch=- 1, verbose=False)

criterion = nn.CrossEntropyLoss()
# criterion = torch.nn.BCEWithLogitsLoss()

total_steps = 20

# save the best model
best_val_loss = float('inf')
patience = 4  # Number of epochs with no improvement after which training will be stopped.
waiting = 0  # The number of epochs with no improvement so far.


# Training Part
for epoch in tqdm(range(total_steps)):
    # Train
    model.train()
    total_loss = 0.0
    total_accuracy = 0.0
    num_batches = 0
    
    for batched_g in tqdm(dataloaders['train'], desc="Training", position=0, leave=True):
        num_batches += 1
        loss, accuracy, _ = model_fn(batched_g, model, criterion, device, num_batches, which_type='train')
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        total_accuracy += accuracy.item()

        
#     scheduler.step()
    add_log_msg(f"total batches: {num_batches}")

    avg_loss = total_loss / num_batches
    avg_accuracy = total_accuracy / num_batches

    add_log_msg(f'Epoch {epoch} | Train Loss: {avg_loss:.4f} | Train Accuracy: {avg_accuracy:.4f}')

    
    # Validation Part
    model.eval()
    total_accuracy = 0.0
    total_loss = 0.0
    num_batches = 0


    with torch.no_grad():
        for batched_g in tqdm(dataloaders['valid'], desc="Validation", position=0, leave=True):
            loss, accuracy, _ = model_fn(batched_g, model, criterion, device, num_batches, which_type='validation')
            total_accuracy += accuracy.item()
            total_loss += loss.item()
            num_batches += 1

    avg_accuracy = total_accuracy / num_batches
    current_loss = total_loss / num_batches
    
    add_log_msg(f'Validation Loss: {current_loss:.4f} | Validation Accuracy: {avg_accuracy:.4f}\n')
    
            
    if current_loss < best_val_loss:
        best_val_loss = current_loss
        waiting = 0
        
        if os.path.exists(best_model_path):
            os.remove(best_model_path)
            add_log_msg("Find a better model!!")

        torch.save(model.state_dict(), best_model_path)

#         print(best_model_path)

    else:
        waiting += 1
        if waiting >= patience:
            add_log_msg("============================== Early stopping ==================================")
            break

  0%|          | 0/20 [00:00<?, ?it/s]

Training:   0%|          | 0/6 [00:00<?, ?it/s]

tensor([[1., 1.],
        [1., 1.],
        [1., 1.],
        ...,
        [1., 1.],
        [1., 1.],
        [0., 1.]], device='cuda:0')
tensor([[1., 0.],
        [1., 0.],
        [1., 0.],
        ...,
        [1., 0.],
        [1., 0.],
        [1., 0.]], device='cuda:0')
tensor([[1., 1.],
        [1., 0.],
        [1., 1.],
        ...,
        [1., 0.],
        [1., 0.],
        [1., 0.]], device='cuda:0')
tensor([[1., 0.],
        [1., 1.],
        [1., 0.],
        ...,
        [1., 1.],
        [1., 0.],
        [1., 1.]], device='cuda:0')
tensor([[1., 0.],
        [1., 1.],
        [1., 0.],
        ...,
        [1., 0.],
        [1., 1.],
        [1., 1.]], device='cuda:0')
tensor([[1., 0.],
        [1., 0.],
        [1., 0.],
        ...,
        [1., 1.],
        [1., 1.],
        [1., 0.]], device='cuda:0')
01/08/2024, 15:03:59# total batches: 6
01/08/2024, 15:03:59# Epoch 0 | Train Loss: 54.0265 | Train Accuracy: 0.8798


Validation:   0%|          | 0/1 [00:00<?, ?it/s]

tensor([[1., 1.],
        [1., 0.],
        [1., 1.],
        ...,
        [1., 1.],
        [1., 1.],
        [1., 1.]], device='cuda:0')
01/08/2024, 15:04:00# Validation Loss: 12.7715 | Validation Accuracy: 0.9336

01/08/2024, 15:04:00# Find a better model!!


Training:   0%|          | 0/6 [00:00<?, ?it/s]

tensor([[1., 1.],
        [1., 1.],
        [1., 1.],
        ...,
        [1., 1.],
        [1., 0.],
        [1., 1.]], device='cuda:0')
tensor([[1., 1.],
        [1., 1.],
        [1., 1.],
        ...,
        [1., 1.],
        [1., 1.],
        [1., 1.]], device='cuda:0')
tensor([[1., 1.],
        [1., 1.],
        [1., 1.],
        ...,
        [1., 1.],
        [1., 1.],
        [1., 1.]], device='cuda:0')
tensor([[1., 1.],
        [1., 1.],
        [1., 1.],
        ...,
        [1., 1.],
        [1., 1.],
        [1., 1.]], device='cuda:0')
tensor([[1., 1.],
        [1., 1.],
        [1., 1.],
        ...,
        [1., 1.],
        [1., 1.],
        [1., 1.]], device='cuda:0')
tensor([[1., 1.],
        [1., 1.],
        [1., 1.],
        ...,
        [1., 0.],
        [1., 0.],
        [1., 0.]], device='cuda:0')
01/08/2024, 15:04:45# total batches: 6
01/08/2024, 15:04:45# Epoch 1 | Train Loss: 3.9182 | Train Accuracy: 0.6358


Validation:   0%|          | 0/1 [00:00<?, ?it/s]

tensor([[1., 0.],
        [1., 0.],
        [1., 0.],
        ...,
        [1., 1.],
        [1., 1.],
        [1., 1.]], device='cuda:0')
01/08/2024, 15:04:45# Validation Loss: 3.5695 | Validation Accuracy: 0.9210

01/08/2024, 15:04:45# Find a better model!!


Training:   0%|          | 0/6 [00:00<?, ?it/s]

tensor([[1., 1.],
        [1., 1.],
        [1., 0.],
        ...,
        [1., 1.],
        [1., 1.],
        [1., 1.]], device='cuda:0')
tensor([[1., 0.],
        [1., 0.],
        [1., 0.],
        ...,
        [1., 0.],
        [1., 1.],
        [1., 0.]], device='cuda:0')
tensor([[1., 1.],
        [1., 1.],
        [1., 1.],
        ...,
        [1., 0.],
        [1., 0.],
        [1., 0.]], device='cuda:0')
tensor([[1., 0.],
        [1., 1.],
        [1., 1.],
        ...,
        [1., 1.],
        [1., 0.],
        [1., 1.]], device='cuda:0')
tensor([[1., 1.],
        [1., 1.],
        [1., 1.],
        ...,
        [1., 1.],
        [1., 0.],
        [1., 0.]], device='cuda:0')
tensor([[1., 1.],
        [1., 1.],
        [1., 1.],
        ...,
        [1., 0.],
        [1., 0.],
        [1., 0.]], device='cuda:0')
01/08/2024, 15:05:31# total batches: 6
01/08/2024, 15:05:31# Epoch 2 | Train Loss: 1.4767 | Train Accuracy: 0.8475


Validation:   0%|          | 0/1 [00:00<?, ?it/s]

tensor([[1., 0.],
        [1., 0.],
        [1., 0.],
        ...,
        [1., 1.],
        [1., 1.],
        [1., 1.]], device='cuda:0')
01/08/2024, 15:05:31# Validation Loss: 1.6738 | Validation Accuracy: 0.9653

01/08/2024, 15:05:31# Find a better model!!


Training:   0%|          | 0/6 [00:00<?, ?it/s]

tensor([[1., 0.],
        [1., 0.],
        [1., 0.],
        ...,
        [1., 1.],
        [1., 0.],
        [1., 1.]], device='cuda:0')
tensor([[1., 0.],
        [1., 0.],
        [1., 0.],
        ...,
        [1., 0.],
        [1., 1.],
        [1., 1.]], device='cuda:0')
tensor([[1., 1.],
        [1., 1.],
        [1., 1.],
        ...,
        [1., 1.],
        [1., 1.],
        [1., 1.]], device='cuda:0')
tensor([[1., 1.],
        [1., 1.],
        [1., 1.],
        ...,
        [1., 1.],
        [1., 1.],
        [1., 1.]], device='cuda:0')
tensor([[1., 0.],
        [1., 0.],
        [1., 1.],
        ...,
        [1., 0.],
        [1., 0.],
        [1., 0.]], device='cuda:0')
tensor([[1., 1.],
        [1., 1.],
        [1., 1.],
        ...,
        [1., 0.],
        [1., 0.],
        [1., 1.]], device='cuda:0')
01/08/2024, 15:06:19# total batches: 6
01/08/2024, 15:06:19# Epoch 3 | Train Loss: 19738.2321 | Train Accuracy: 0.9116


Validation:   0%|          | 0/1 [00:00<?, ?it/s]

tensor([[1., 1.],
        [1., 1.],
        [1., 0.],
        ...,
        [1., 1.],
        [1., 1.],
        [1., 1.]], device='cuda:0')
01/08/2024, 15:06:20# Validation Loss: 0.7937 | Validation Accuracy: 0.9625

01/08/2024, 15:06:20# Find a better model!!


Training:   0%|          | 0/6 [00:00<?, ?it/s]

tensor([[1., 1.],
        [1., 1.],
        [1., 1.],
        ...,
        [1., 0.],
        [1., 0.],
        [1., 0.]], device='cuda:0')
tensor([[1., 1.],
        [1., 1.],
        [1., 0.],
        ...,
        [1., 0.],
        [1., 0.],
        [1., 0.]], device='cuda:0')
tensor([[1., 1.],
        [1., 1.],
        [1., 1.],
        ...,
        [1., 0.],
        [1., 0.],
        [1., 0.]], device='cuda:0')
tensor([[1., 1.],
        [1., 1.],
        [1., 1.],
        ...,
        [1., 1.],
        [1., 1.],
        [1., 1.]], device='cuda:0')
tensor([[1., 0.],
        [1., 0.],
        [1., 0.],
        ...,
        [1., 0.],
        [1., 1.],
        [1., 0.]], device='cuda:0')
tensor([[1., 0.],
        [1., 0.],
        [1., 0.],
        ...,
        [1., 0.],
        [1., 0.],
        [1., 0.]], device='cuda:0')
01/08/2024, 15:07:03# total batches: 6
01/08/2024, 15:07:03# Epoch 4 | Train Loss: 589.0427 | Train Accuracy: 0.9260


Validation:   0%|          | 0/1 [00:00<?, ?it/s]

tensor([[1., 0.],
        [1., 1.],
        [1., 0.],
        ...,
        [1., 1.],
        [1., 1.],
        [1., 1.]], device='cuda:0')
01/08/2024, 15:07:04# Validation Loss: 0.2079 | Validation Accuracy: 0.9636

01/08/2024, 15:07:04# Find a better model!!


Training:   0%|          | 0/6 [00:00<?, ?it/s]

tensor([[1., 0.],
        [1., 0.],
        [1., 0.],
        ...,
        [1., 1.],
        [1., 1.],
        [1., 1.]], device='cuda:0')
tensor([[1., 0.],
        [1., 1.],
        [1., 1.],
        ...,
        [1., 1.],
        [1., 1.],
        [1., 0.]], device='cuda:0')
tensor([[1., 1.],
        [1., 0.],
        [1., 0.],
        ...,
        [1., 0.],
        [1., 0.],
        [1., 1.]], device='cuda:0')
tensor([[1., 1.],
        [1., 0.],
        [1., 0.],
        ...,
        [1., 0.],
        [1., 1.],
        [1., 0.]], device='cuda:0')
tensor([[1., 0.],
        [1., 0.],
        [1., 0.],
        ...,
        [1., 0.],
        [1., 0.],
        [1., 0.]], device='cuda:0')
tensor([[1., 1.],
        [1., 0.],
        [1., 0.],
        ...,
        [1., 0.],
        [1., 0.],
        [1., 0.]], device='cuda:0')
01/08/2024, 15:07:49# total batches: 6
01/08/2024, 15:07:49# Epoch 5 | Train Loss: 95.4370 | Train Accuracy: 0.9010


Validation:   0%|          | 0/1 [00:00<?, ?it/s]

tensor([[1., 0.],
        [1., 0.],
        [1., 0.],
        ...,
        [1., 1.],
        [1., 1.],
        [1., 1.]], device='cuda:0')
01/08/2024, 15:07:50# Validation Loss: 0.1206 | Validation Accuracy: 0.9660

01/08/2024, 15:07:50# Find a better model!!


Training:   0%|          | 0/6 [00:00<?, ?it/s]

tensor([[1., 0.],
        [1., 0.],
        [1., 0.],
        ...,
        [1., 0.],
        [1., 0.],
        [1., 0.]], device='cuda:0')
tensor([[1., 0.],
        [1., 0.],
        [1., 0.],
        ...,
        [1., 0.],
        [1., 0.],
        [1., 1.]], device='cuda:0')


RuntimeError: CUDA out of memory. Tried to allocate 3.79 GiB (GPU 0; 10.76 GiB total capacity; 4.09 GiB already allocated; 1.36 GiB free; 8.12 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

In [43]:
# load the pretrained model
pretrained_model_path = '../checkpoint_graphSAGE-DARPA/best_model_GraphSAGE_secureBERT_150.pt'
model.load_state_dict(torch.load(pretrained_model_path))

model.to(device)
model.eval()

total = 0
correct = 0
count = 0

true_labels = []
predicted_labels = []

with torch.no_grad():
    for batched_g in tqdm(dataloaders['test'], desc="Testing", position=0, leave=True):
        loss, accuracy, predicted = model_fn(batched_g, model, criterion, device, count, which_type='test')
        
        print("one-hot predicted: ", predicted, "\n")
        predicted = torch.argmax(predicted, dim=1)
        print("transformed predicted:", predicted, "\n")
        
        
        labels = batched_g.edata['label'].to(device)
        
        true_labels.extend(labels.cpu().numpy())
        predicted_labels.extend(predicted.cpu().numpy())
        
        if count % 5000 == 0:
            add_log_msg(f"labels: {labels} {labels.shape}")
            add_log_msg(f"predicted: {predicted} {predicted.shape}")
            
        count += 1
        
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

add_log_msg(f'Test Accuracy: {100 * correct / total} %\n\n\n')

Testing:   0%|          | 0/1 [00:00<?, ?it/s]

pos:  tensor(199, device='cuda:1')
neg:  tensor(537932, device='cuda:1')
weight:  tensor(2703.1758, device='cuda:1')
one-hot predicted:  tensor([[1., 0.],
        [1., 1.],
        [1., 0.],
        ...,
        [1., 1.],
        [1., 1.],
        [1., 1.]], device='cuda:1') 

transformed predicted: tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:1') 

01/08/2024, 21:45:52# labels: tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:1') torch.Size([538131])
01/08/2024, 21:45:52# predicted: tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:1') torch.Size([538131])
01/08/2024, 21:45:52# Test Accuracy: 99.96302015680197 %





In [38]:
from collections import Counter


# # 将 One-Hot 编码转换为类别标签
# true_labels = np.argmax(true_labels, axis=1)
# predicted_labels = np.argmax(predicted_labels, axis=1)


# Count occurrences of 0s and 1s in predicted_labels
true_counter = Counter(true_labels)
# Print the counts
add_log_msg(f"Count of true 0s: {true_counter[0]}")
add_log_msg(f"Count of true 1s: {true_counter[1]}\n")


# Count occurrences of 0s and 1s in predicted_labels
predicted_counter = Counter(predicted_labels)

# Print the counts
add_log_msg(f"Count of predicted 0s: {predicted_counter[0]}")
add_log_msg(f"Count of predicted 1s: {predicted_counter[1]}\n")

01/08/2024, 21:42:03# Count of true 0s: 537932
01/08/2024, 21:42:03# Count of true 1s: 199

01/08/2024, 21:42:03# Count of predicted 0s: 538131
01/08/2024, 21:42:03# Count of predicted 1s: 0

