# Test of GraphSAGE
- use DGL
- predict `graphs`
- valid, test data are in the training dataset

In [11]:
import os
import dgl
import csv
import json
import torch
import random
import pickle
import subprocess
import torch as th
import numpy as np
import pandas as pd
import torch.nn as nn
import dgl.nn as dglnn
import torch.nn.functional as F

from tqdm.notebook import tqdm
from sklearn.decomposition import PCA
from torch.optim import AdamW, lr_scheduler
from dgl.nn import GraphConv, GATConv, SAGEConv
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from transformers import get_linear_schedule_with_warmup

- check the GPU and assign the GPU by the best memory usage

In [12]:
# def get_free_gpu():
#     try:
#         # Run nvidia-smi command to get GPU details
#         _output_to_list = lambda x: x.decode('ascii').split('\n')[:-1]
#         command = "nvidia-smi --query-gpu=memory.free --format=csv,nounits,noheader"
#         memory_free_info = _output_to_list(subprocess.check_output(command.split())) 
#         memory_free_values = [int(x) for i, x in enumerate(memory_free_info)]
        
#         # Get the GPU with the maximum free memory
#         best_gpu_id = memory_free_values.index(max(memory_free_values))
#         return best_gpu_id
#     except:
#         # If any exception occurs, default to GPU 0 (this handles cases where nvidia-smi isn't installed)
#         return 0

# if torch.cuda.is_available():
#     # Get the best GPU ID based on free memory and set it
#     best_gpu_id = get_free_gpu()
#     device = torch.device(f"cuda:{best_gpu_id}")
# else:
#     device = torch.device("cpu")
#     print("there's no available GPU")

device = torch.device(f"cuda:{0}")
print(device)

cuda:0


## Fix the seed

In [13]:
def same_seeds(seed = 8787):
    torch.manual_seed(seed)
    # random.seed(seed) 
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)  
    np.random.seed(seed)  
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True

### Loading the dataset

In [29]:
# with open("../../data/exp3/before_embedding/all_graph_data.jsonl", "r") as f:
with open("../../code_senior/all_graph_data.jsonl", "r") as f:
    print("Loading the data...")
#     input_data = list(f)
#     input_data = [json.loads(line) for idx, line in tqdm(f, desc="Loading")]

    input_data = []
    for idx, line in tqdm(enumerate(f), total=16700, desc="Loading"):
        input_data.append(json.loads(line))
        
    print("FINISH...")

Loading the data...


Loading:   0%|          | 0/16700 [00:00<?, ?it/s]

FINISH...


In [16]:
len(input_data)

16700

### Loading the embedding
- change to list, originally is ndarray

In [17]:
with open(f'../../data/4_embedding/secureBERT_YR/nodes_ent2emb_256.pkl', 'rb') as fp:
    node_ent2emb = pickle.load(fp)
with open(f'../../data/4_embedding/secureBERT_YR/edges_ent2emb_16.pkl', 'rb') as fp:
    edge_ent2emb = pickle.load(fp)

In [18]:
len(node_ent2emb[0])

256

In [19]:
len(edge_ent2emb[0])

16

In [20]:
type(edge_ent2emb[0])

numpy.ndarray

In [21]:
node_ent2emb_list = [data.tolist() for data in tqdm(node_ent2emb)]
edge_ent2emb_list = [data.tolist() for data in tqdm(edge_ent2emb)]

  0%|          | 0/1609759 [00:00<?, ?it/s]

  0%|          | 0/27 [00:00<?, ?it/s]

In [56]:
type(edge_ent2emb_list[0])

list

- add the embedding

In [30]:
for data_point in tqdm(input_data):
    print(data_point)
    # break
    # for node_id in data_point['node_feat']:
    #     print(node_id)
        
    data_point['node_feat'] = [node_ent2emb_list[node_id] for node_id in data_point['node_feat']]
    data_point['edge_attr'] = [edge_ent2emb_list[edge_id] for edge_id in data_point['edge_attr']]
    print()
    print(data_point)
    break

  0%|          | 0/16700 [00:00<?, ?it/s]

{'labels': [268, 268, 268, 268, 268, 204, 67, 143, 268, 268, 268, 268, 268], 'num_nodes': 12, 'node_feat': [409697, 719938, 142790, 195977, 314154, 803851, 690541, 803374, 506231, 274169, 677435, 418815], 'edge_attr': [24, 26, 26, 12, 24, 0, 0, 0, 2, 26, 12, 24, 2], 'edge_index': [[6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6], [9, 4, 4, 2, 4, 3, 10, 7, 5, 0, 11, 1, 8]]}

{'labels': [268, 268, 268, 268, 268, 204, 67, 143, 268, 268, 268, 268, 268], 'num_nodes': 12, 'node_feat': [[0.09539127349853516, 0.001768483780324459, -0.1486150473356247, -0.1503634750843048, -0.050150543451309204, 0.1135689988732338, 0.021546917036175728, 0.019741453230381012, 0.01511671207845211, -0.09023817628622055, -0.04694007709622383, 0.012567132711410522, 0.027866646647453308, -0.020929891616106033, 0.046746622771024704, 0.09262032806873322, -0.06226949393749237, 0.08454105257987976, 0.005372991785407066, 0.0018597445450723171, -0.08250919729471207, 0.004222130868583918, 0.0761239156126976, -0.019437553361058235, 0

In [31]:
len(input_data[0]['node_feat'][0])

256

In [32]:
input_data[0]['node_feat']

[[0.09539127349853516,
  0.001768483780324459,
  -0.1486150473356247,
  -0.1503634750843048,
  -0.050150543451309204,
  0.1135689988732338,
  0.021546917036175728,
  0.019741453230381012,
  0.01511671207845211,
  -0.09023817628622055,
  -0.04694007709622383,
  0.012567132711410522,
  0.027866646647453308,
  -0.020929891616106033,
  0.046746622771024704,
  0.09262032806873322,
  -0.06226949393749237,
  0.08454105257987976,
  0.005372991785407066,
  0.0018597445450723171,
  -0.08250919729471207,
  0.004222130868583918,
  0.0761239156126976,
  -0.019437553361058235,
  0.008016685955226421,
  -0.040946852415800095,
  0.008640502579510212,
  -0.01907811313867569,
  -0.006141497753560543,
  0.09424134343862534,
  -0.05527239292860031,
  -0.07346320152282715,
  0.0037471819669008255,
  -0.008071722462773323,
  0.015962086617946625,
  0.0549222007393837,
  0.017916418612003326,
  0.006070434115827084,
  0.008581655099987984,
  -0.03107302635908127,
  -0.013984883204102516,
  0.0410872064530849

## Data Loader

In [60]:
class GraphDataset(Dataset):
    def __init__(self, data_list, device):
        self.data_list = data_list
        self.device = device

    def __len__(self):
        return len(self.data_list)
    
    def __getitem__(self, idx):
        data = self.data_list[idx]
        return data

def collate(samples):
    data_list = samples
    batched_graphs = []
    for data in data_list:
        g = dgl.graph((th.tensor(data["edge_index"][0]), th.tensor(data["edge_index"][1])), num_nodes=data["num_nodes"])
        
        g.ndata['feat'] = th.tensor(data["node_feat"])
        g.edata['feat'] = th.tensor(data["edge_attr"])
        g.edata['label'] = th.tensor(data["labels"])  # Add edge labels to graph

        
        batched_graphs.append(g)
    
    return dgl.batch(batched_graphs)

In [61]:
total_data = len(input_data)

test_size = int(total_data * 0.1)
train_valid_size = total_data - test_size

train_valid_data = input_data[:train_valid_size]
test_data = input_data[train_valid_size:]

train_data, valid_data = train_test_split(train_valid_data, test_size=0.25, random_state=42)

# creating GraphDataset
dataset_data = {
    'train': GraphDataset(train_data, device),
    'valid': GraphDataset(valid_data, device),
    'test': GraphDataset(test_data, device)
}

print("Datasets loaded and ready for training!")

Datasets loaded and ready for training!


- choose batch size

In [62]:
def create_dataloaders(batch_size, shuffle=True):
    dataloaders = {}
    for dataset_name, dataset in dataset_data.items():
        # do not shuffle the testing dataset
        if dataset_name == "test":
            dataloaders[dataset_name] = DataLoader(dataset, batch_size=batch_size, shuffle=False, collate_fn=collate)    
        else:
            dataloaders[dataset_name] = DataLoader(dataset, batch_size=batch_size, shuffle=shuffle, collate_fn=collate)
    return dataloaders

dataloaders = create_dataloaders(126)

- Turn the print message to a log file

In [63]:
import datetime

now = datetime.datetime.now()

formatted_time = now.strftime("%m%d_%H:%M")

log_file_path = f"./log_message/{formatted_time}_GraphSAGE_secureBERT_50.log"

def add_log_msg(msg, log_file_path=log_file_path):
    with open(log_file_path, 'a') as f:
        f.write(f'{datetime.datetime.now().strftime("%m/%d/%Y, %H:%M:%S")}# {msg}\n')
    print(f'{datetime.datetime.now().strftime("%m/%d/%Y, %H:%M:%S")}# {msg}')

print(log_file_path)

./log_message/0125_21:39_GraphSAGE_secureBERT_50.log


### Model

In [64]:
class GraphSAGE(nn.Module):
    def __init__(self, in_dim, hidden_dim, out_dim):
        super(GraphSAGE, self).__init__()
        self.layer1 = dglnn.SAGEConv(in_dim, hidden_dim, 'pool')
        self.layer2 = dglnn.SAGEConv(hidden_dim, out_dim, 'pool')
        # self.dropout = nn.Dropout(0.25)

    def forward(self, g, inputs):
        h = self.layer1(g, inputs)
        h = torch.relu(h)
        # h = self.dropout(h)
        h = self.layer2(g, h)
        return h

In [65]:
class MLPPredictor(nn.Module):
    def __init__(self, out_feats, out_classes):
        super().__init__()
        self.W = nn.Linear(out_feats*2, out_classes)

    def apply_edges(self, edges):
        h_u = edges.src['h']
        h_v = edges.dst['h']
        score = self.W(torch.cat([h_u, h_v], 1))
        return {'score': score}

    def forward(self, graph, h):
        with graph.local_scope():
            graph.ndata['h'] = h
            graph.apply_edges(self.apply_edges)
            return graph.edata['score']

In [66]:
class Model(nn.Module):
    def __init__(self, in_features, hidden_features, out_features, num_classes):
        super().__init__()
        self.sage = GraphSAGE(in_features, hidden_features, out_features)
        self.pred = MLPPredictor(out_features, num_classes)
      
    def forward(self, g, node_feat, return_logits=False):
        h = self.sage(g, node_feat)
        logits = self.pred(g, h)
        
        return logits

- Model Forward  

In [67]:
def model_fn(batched_g, model, criterion, device, count=1, which_type='train'):
    """Forward a batch through the model."""
#     batched_g, labels = data
    batched_g = batched_g.to(device)
#     print("Input feature shape:", batched_g.ndata['feat'].float().shape)
    
    labels = batched_g.edata['label'].to(device)
#     print("Labels shape:", labels.shape)
    
    logits = model(batched_g, batched_g.ndata['feat'].float())
#     print("Logits shape:", logits.shape)

    loss = criterion(logits, labels)

    output = torch.softmax(logits, dim=1)
    preds = output.argmax(1)
    
    # Compute accuracy
    accuracy = torch.mean((preds == labels).float())
        
    return loss, accuracy, preds

### Training

- in_dim means the dimension of the node_feat(50 dim, since the 50-dim embedding)
- num_classed means the # of the categories -> 274 for out task (1~274) setting 275 for the first one is 1

In [70]:
import csv
import pandas as pd
from sklearn.metrics import classification_report
from torch.optim import AdamW, lr_scheduler

seed = 8787
same_seeds(seed)

model = Model(in_features=256, hidden_features=64, out_features=128, num_classes=274)
best_model_path = "./checkpoint_graphSAGE/best_model_GraphSAGE_secureBERT_50.pt"

model = model.to(device)

optimizer = AdamW(model.parameters(), lr=5e-4)

# T_max control the period of the lr changing -> set 1/10 first
scheduler = lr_scheduler.CosineAnnealingLR(optimizer, T_max=36, eta_min=0, last_epoch=- 1, verbose=False)

criterion = nn.CrossEntropyLoss()
# criterion = torch.nn.BCEWithLogitsLoss()

total_steps = 100

# save the best model
best_val_loss = float('inf')
patience = 10  # Number of epochs with no improvement after which training will be stopped.
waiting = 0  # The number of epochs with no improvement so far.


# Training Part
for epoch in tqdm(range(total_steps)):
    # Train
    model.train()
    total_loss = 0.0
    total_accuracy = 0.0
    num_batches = 0
    
    for batched_g in tqdm(dataloaders['test'], desc="Training", position=0, leave=True):
        num_batches += 1
        loss, accuracy, _ = model_fn(batched_g, model, criterion, device, num_batches, which_type='train')
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        total_accuracy += accuracy.item()

        
#     scheduler.step()
    add_log_msg(f"total batches: {num_batches}")

    avg_loss = total_loss / num_batches
    avg_accuracy = total_accuracy / num_batches

    add_log_msg(f'Epoch {epoch} | Train Loss: {avg_loss:.4f} | Train Accuracy: {avg_accuracy:.4f}')

    
    # Validation Part
    model.eval()
    total_accuracy = 0.0
    total_loss = 0.0
    num_batches = 0


    with torch.no_grad():
        for batched_g in tqdm(dataloaders['valid'], desc="Validation", position=0, leave=True):
            loss, accuracy, _ = model_fn(batched_g, model, criterion, device, num_batches, which_type='validation')
            total_accuracy += accuracy.item()
            total_loss += loss.item()
            num_batches += 1

    avg_accuracy = total_accuracy / num_batches
    current_loss = total_loss / num_batches
    
    add_log_msg(f'Validation Loss: {current_loss:.4f} | Validation Accuracy: {avg_accuracy:.4f}\n')
    
            
    if current_loss < best_val_loss:
        best_val_loss = current_loss
        waiting = 0
        
        if os.path.exists(best_model_path):
            os.remove(best_model_path)
            add_log_msg("Find a better model!!")

        torch.save(model.state_dict(), best_model_path)

    else:
        waiting += 1
        if waiting >= patience:
            add_log_msg("============================== Early stopping ==================================")
            break

  0%|          | 0/100 [00:00<?, ?it/s]

Training:   0%|          | 0/14 [00:00<?, ?it/s]

01/25/2024, 21:42:55# total batches: 14
01/25/2024, 21:42:55# Epoch 0 | Train Loss: 5.3579 | Train Accuracy: 0.3674


Validation:   0%|          | 0/30 [00:00<?, ?it/s]

01/25/2024, 21:43:18# Validation Loss: 5.1091 | Validation Accuracy: 0.5499

01/25/2024, 21:43:18# Find a better model!!


Training:   0%|          | 0/14 [00:00<?, ?it/s]

01/25/2024, 21:43:25# total batches: 14
01/25/2024, 21:43:25# Epoch 1 | Train Loss: 4.7191 | Train Accuracy: 0.6564


Validation:   0%|          | 0/30 [00:00<?, ?it/s]

01/25/2024, 21:43:41# Validation Loss: 4.3325 | Validation Accuracy: 0.5417

01/25/2024, 21:43:41# Find a better model!!


Training:   0%|          | 0/14 [00:00<?, ?it/s]

01/25/2024, 21:43:48# total batches: 14
01/25/2024, 21:43:48# Epoch 2 | Train Loss: 3.5249 | Train Accuracy: 0.6436


Validation:   0%|          | 0/30 [00:00<?, ?it/s]

01/25/2024, 21:44:03# Validation Loss: 2.7824 | Validation Accuracy: 0.5344

01/25/2024, 21:44:03# Find a better model!!


Training:   0%|          | 0/14 [00:00<?, ?it/s]

01/25/2024, 21:44:10# total batches: 14
01/25/2024, 21:44:10# Epoch 3 | Train Loss: 1.7558 | Train Accuracy: 0.6436


Validation:   0%|          | 0/30 [00:00<?, ?it/s]

01/25/2024, 21:44:26# Validation Loss: 1.5865 | Validation Accuracy: 0.5571

01/25/2024, 21:44:26# Find a better model!!


Training:   0%|          | 0/14 [00:00<?, ?it/s]

01/25/2024, 21:44:33# total batches: 14
01/25/2024, 21:44:33# Epoch 4 | Train Loss: 1.2480 | Train Accuracy: 0.6436


Validation:   0%|          | 0/30 [00:00<?, ?it/s]

01/25/2024, 21:44:49# Validation Loss: 1.4610 | Validation Accuracy: 0.5314

01/25/2024, 21:44:49# Find a better model!!


Training:   0%|          | 0/14 [00:00<?, ?it/s]

01/25/2024, 21:44:56# total batches: 14
01/25/2024, 21:44:56# Epoch 5 | Train Loss: 1.1845 | Train Accuracy: 0.6436


Validation:   0%|          | 0/30 [00:00<?, ?it/s]

01/25/2024, 21:45:10# Validation Loss: 1.3796 | Validation Accuracy: 0.5355

01/25/2024, 21:45:10# Find a better model!!


Training:   0%|          | 0/14 [00:00<?, ?it/s]

01/25/2024, 21:45:17# total batches: 14
01/25/2024, 21:45:17# Epoch 6 | Train Loss: 1.1467 | Train Accuracy: 0.6436


Validation:   0%|          | 0/30 [00:00<?, ?it/s]

01/25/2024, 21:45:31# Validation Loss: 1.3977 | Validation Accuracy: 0.5314



Training:   0%|          | 0/14 [00:00<?, ?it/s]

01/25/2024, 21:45:38# total batches: 14
01/25/2024, 21:45:38# Epoch 7 | Train Loss: 1.1306 | Train Accuracy: 0.6436


Validation:   0%|          | 0/30 [00:00<?, ?it/s]

01/25/2024, 21:46:00# Validation Loss: 1.4114 | Validation Accuracy: 0.5517



Training:   0%|          | 0/14 [00:00<?, ?it/s]

01/25/2024, 21:46:07# total batches: 14
01/25/2024, 21:46:07# Epoch 8 | Train Loss: 1.1147 | Train Accuracy: 0.6436


Validation:   0%|          | 0/30 [00:00<?, ?it/s]

01/25/2024, 21:46:21# Validation Loss: 1.4104 | Validation Accuracy: 0.5482



Training:   0%|          | 0/14 [00:00<?, ?it/s]

01/25/2024, 21:46:27# total batches: 14
01/25/2024, 21:46:27# Epoch 9 | Train Loss: 1.0927 | Train Accuracy: 0.6437


Validation:   0%|          | 0/30 [00:00<?, ?it/s]

01/25/2024, 21:46:42# Validation Loss: 1.4194 | Validation Accuracy: 0.5377



Training:   0%|          | 0/14 [00:00<?, ?it/s]

01/25/2024, 21:46:49# total batches: 14
01/25/2024, 21:46:49# Epoch 10 | Train Loss: 1.0676 | Train Accuracy: 0.6507


Validation:   0%|          | 0/30 [00:00<?, ?it/s]

01/25/2024, 21:47:04# Validation Loss: 1.4776 | Validation Accuracy: 0.5332



Training:   0%|          | 0/14 [00:00<?, ?it/s]

01/25/2024, 21:47:11# total batches: 14
01/25/2024, 21:47:11# Epoch 11 | Train Loss: 1.0388 | Train Accuracy: 0.6850


Validation:   0%|          | 0/30 [00:00<?, ?it/s]

01/25/2024, 21:47:27# Validation Loss: 1.4260 | Validation Accuracy: 0.5432



Training:   0%|          | 0/14 [00:00<?, ?it/s]

01/25/2024, 21:47:34# total batches: 14
01/25/2024, 21:47:34# Epoch 12 | Train Loss: 1.0046 | Train Accuracy: 0.6877


Validation:   0%|          | 0/30 [00:00<?, ?it/s]

01/25/2024, 21:47:50# Validation Loss: 1.5086 | Validation Accuracy: 0.5270



Training:   0%|          | 0/14 [00:00<?, ?it/s]

01/25/2024, 21:47:56# total batches: 14
01/25/2024, 21:47:56# Epoch 13 | Train Loss: 0.9651 | Train Accuracy: 0.7488


Validation:   0%|          | 0/30 [00:00<?, ?it/s]

01/25/2024, 21:48:12# Validation Loss: 1.4835 | Validation Accuracy: 0.5347



Training:   0%|          | 0/14 [00:00<?, ?it/s]

01/25/2024, 21:48:19# total batches: 14
01/25/2024, 21:48:19# Epoch 14 | Train Loss: 0.9210 | Train Accuracy: 0.7698


Validation:   0%|          | 0/30 [00:00<?, ?it/s]

01/25/2024, 21:48:35# Validation Loss: 1.5561 | Validation Accuracy: 0.5291



Training:   0%|          | 0/14 [00:00<?, ?it/s]

01/25/2024, 21:48:42# total batches: 14
01/25/2024, 21:48:42# Epoch 15 | Train Loss: 0.8740 | Train Accuracy: 0.7947


Validation:   0%|          | 0/30 [00:00<?, ?it/s]

01/25/2024, 21:49:07# Validation Loss: 1.5230 | Validation Accuracy: 0.5383



### test of valid and test part is ``graph``

- 60 APs in training x 10000times
- 5 APs in validation x 4 times
- 3 APs in test x 4 times
- Batch size = 4

In [22]:
# load the pretrained model
# pretrained_model_path = '../checkpoint_graphSAGE/best_model_GraphSAGE_transE_50.pt'
model.load_state_dict(torch.load(best_model_path))

model.to(device)
model.eval()

total = 0
correct = 0
count = 0

true_labels = []
predicted_labels = []

with torch.no_grad():
    for batched_g in tqdm(dataloaders['test'], desc="Testing", position=0, leave=True):
#         print(f"data:{data[1]}")
        loss, accuracy, predicted = model_fn(batched_g, model, criterion, device, count, which_type='test')
        labels = batched_g.edata['label'].to(device)
        
        true_labels.extend(labels.cpu().numpy())
        predicted_labels.extend(predicted.cpu().numpy())
        
        if count % 5000 == 0:
            add_log_msg(f"labels: {labels} {labels.shape}")
            add_log_msg(f"predicted: {predicted} {predicted.shape}")
            
        count += 1
        
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

add_log_msg(f'Test Accuracy: {100 * correct / total} %\n\n\n')

In [21]:
mapping_file = '../../data/label_to_int_mapping.txt'

label_mapping = {}
with open(mapping_file, 'r') as f:
    for line in f:
        parts = line.strip().split(': ')
        label_mapping[int(parts[1])] = parts[0]
        
# 将映射后的标签应用到true和predicted标签列表
mapped_true_labels = [label_mapping[label] for label in true_labels]
mapped_predicted_labels = [label_mapping[label] for label in predicted_labels]

# 生成Scikit-learn报告信息的DataFrame
report_data = classification_report(mapped_true_labels, mapped_predicted_labels, output_dict=True)
report_df = pd.DataFrame(report_data).transpose()

# mapped_true_labels_np = np.array(mapped_true_labels)
# mapped_predicted_labels_np = np.array(mapped_predicted_labels)

# print("mapped_true_labels 的形状:", mapped_true_labels_np.shape)
# print("mapped_predicted_labels 的形状:", mapped_predicted_labels_np.shape)

report_folder = 'classification_report'
os.makedirs(report_folder, exist_ok=True)

count = 0
while True:
    report_filename = f'classification_report-secureBERT_50-graphSAGE-{count}.xlsx'
    labels_filename = f'mapped_true_predicted_labels-secureBERT_50-graphSAGE-{count}.xlsx'
    
    report_path = os.path.join(report_folder, report_filename)
    labels_path = os.path.join(report_folder, labels_filename)
    
    if not os.path.exists(report_path) and not os.path.exists(labels_path):
        break
    count += 1

    
report_df.to_excel(report_path, index_label='Label')

mapped_labels_df = pd.DataFrame({'true_label': mapped_true_labels, 'predicted_label': mapped_predicted_labels})
# mapped_labels_df.to_excel(labels_path, index=False)

add_log_msg(f"report path: {report_path}")
add_log_msg(f"label path: {labels_path}")

mapped_report = classification_report(mapped_true_labels, mapped_predicted_labels)
add_log_msg(f"mapped_report:\n{mapped_report}")

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


01/13/2024, 15:50:21# report path: classification_report/classification_report-secureBERT_50-graphSAGE-1.xlsx
01/13/2024, 15:50:21# label path: classification_report/mapped_true_predicted_labels-secureBERT_50-graphSAGE-1.xlsx


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


01/13/2024, 15:58:04# mapped_report:
                                                  precision    recall  f1-score   support

                                               O       0.95      0.99      0.97   7208750
T1003.001_0ef4cc7b-611c-4237-b20b-db36b6906554_B       0.00      0.00      0.00        20
T1003.001_0ef4cc7b-611c-4237-b20b-db36b6906554_I       0.00      0.00      0.00      1440
    T1003.001_35d92515122effdd73801c6ac3021da7_B       0.00      0.00      0.00        19
    T1003.001_35d92515122effdd73801c6ac3021da7_I       0.00      0.00      0.00        95
    T1003.002_5a484b65c247675e3b7ada4ba648d376_B       0.00      0.00      0.00        20
    T1003.002_5a484b65c247675e3b7ada4ba648d376_I       0.00      0.00      0.00       100
    T1003.002_7fa4ea18694f2552547b65e23952cabb_B       0.00      0.00      0.00        20
    T1003.002_7fa4ea18694f2552547b65e23952cabb_I       0.00      0.00      0.00        80
    T1003.003_9f73269695e54311dd61dc68940fb3e1_B       0.00   

  _warn_prf(average, modifier, msg_start, len(result))


- Check if model really load the model_dict

In [14]:
# model.layer1.fc_self.weight
model.sage.layer1.fc_self.weight

Parameter containing:
tensor([[-0.0682,  0.0153, -0.1769,  ...,  0.0375,  0.2321, -0.2812],
        [-0.2271,  0.2290, -0.1997,  ..., -0.0095,  0.1509,  0.2686],
        [-0.2743,  0.0406, -0.1222,  ...,  0.1036, -0.1590, -0.2555],
        ...,
        [-0.0758,  0.0461,  0.1273,  ...,  0.1367,  0.0671, -0.2605],
        [-0.2425, -0.1362,  0.2474,  ..., -0.3221, -0.0595,  0.3141],
        [ 0.0234, -0.2783,  0.2146,  ..., -0.3020, -0.1751,  0.0528]],
       requires_grad=True)

In [15]:
model = Model(in_features=50, hidden_features=64, out_features=128, num_classes=167)
model.load_state_dict(torch.load('model3_initial(graphsage)/initial_weight.pth'))
model.sage.layer1.fc_self.weight

Parameter containing:
tensor([[-0.0682,  0.0153, -0.1769,  ...,  0.0375,  0.2321, -0.2812],
        [-0.2271,  0.2290, -0.1997,  ..., -0.0095,  0.1509,  0.2686],
        [-0.2743,  0.0406, -0.1222,  ...,  0.1036, -0.1590, -0.2555],
        ...,
        [-0.0758,  0.0461,  0.1273,  ...,  0.1367,  0.0671, -0.2605],
        [-0.2425, -0.1362,  0.2474,  ..., -0.3221, -0.0595,  0.3141],
        [ 0.0234, -0.2783,  0.2146,  ..., -0.3020, -0.1751,  0.0528]],
       requires_grad=True)