In [134]:
import torch
print(torch.cuda.is_available())


True


# Continuous Learning | Phase 1 
## 120 Sampling + Pre-Fixed Seed 

In [12]:
### import argparse
import numpy as np
import os
import time
import random
import torch
import torch.nn as nn
from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split
from transformers import AutoTokenizer, AutoModelForCausalLM
import pickle
import math
from sklearn.utils import resample

def set_seeds(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)

class LLMGraphTransformer(nn.Module):
    def __init__(self, model_name="TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T", device="cpu"):
        super().__init__()
        self.device = device

        # Load the tokenizer and model for TinyLlama
        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
        self.model = AutoModelForCausalLM.from_pretrained(model_name).to(self.device)

        # Ensure padding token is set for TinyLlama
        if self.tokenizer.pad_token is None:
            self.tokenizer.pad_token = self.tokenizer.eos_token
        self.model.resize_token_embeddings(len(self.tokenizer))

        self.dropout = nn.Dropout(p=0.2)

        # New layers to process edge features
        self.edge_fc = nn.Linear(77, 64).to(self.device)  
        self.edge_dropout = nn.Dropout(p=0.2)
    
    def forward(self, batch_text, edge_features):
        # Tokenize text
        inputs = self.tokenizer(batch_text, return_tensors="pt", padding=True, truncation=True, max_length=512).to(self.device)
        outputs = self.model(inputs['input_ids'], attention_mask=inputs['attention_mask'])
        
        # Get the logits for the last token in each sequence (for classification purposes)
        text_logits = outputs.logits[:, -1, :]  # Only take the last token's logits for classification

        # Process edge features through a fully connected layer
        edge_emb = self.edge_fc(edge_features)  # Project edge features to a lower-dimensional space
        edge_emb = self.edge_dropout(edge_emb)

        # Concatenate the text logits and the edge feature embeddings
        combined_logits = torch.cat((text_logits, edge_emb), dim=1)
        
        return combined_logits

    def generate_text(self, graph_data, labels, max_new_tokens=50):
        # Convert the graph adjacency list to text directly within this method
        batch_text = []
        for node, neighbors in enumerate(graph_data):
            if isinstance(neighbors, (list, set, np.ndarray)):
                for neighbor in neighbors:
                    question = f"What is the relationship between Node {node} and Node {neighbor}? Choices: {', '.join(labels)}."
                    batch_text.append(question)
            else:
                question = f"What is the relationship between Node {node} and Node {neighbors}? Choices: {', '.join(labels)}."
                batch_text.append(question)

        # Tokenize and generate predictions
        inputs = self.tokenizer(batch_text, return_tensors="pt", padding=True, truncation=True, max_length=512).to(self.device)
        outputs = self.model.generate(
            inputs['input_ids'],
            attention_mask=inputs['attention_mask'],
            max_new_tokens=max_new_tokens,
            temperature=0.7,
            top_p=0.9,
            do_sample=True
        )
        generated_text = [self.tokenizer.decode(output, skip_special_tokens=True) for output in outputs]

        return generated_text


def balance_data(data, labels, n_samples_per_label):
    # Find unique labels and their counts
    random.seed(42)
    label_groups = {}
    for label in np.unique(labels):
        label_indices = np.where(labels == label)[0]
        # If the label has fewer samples than the target, we use replace=True to oversample.
        sampled_indices = np.random.choice(label_indices, size=n_samples_per_label, replace=(len(label_indices) < n_samples_per_label))
        label_groups[label] = sampled_indices

    # Concatenate the balanced data
    balanced_indices = np.concatenate(list(label_groups.values()))
    balanced_data = data[balanced_indices]
    balanced_labels = labels[balanced_indices]

    return balanced_data, balanced_labels


def process_llm_output(llm_output):
    llm_output = llm_output.lower().strip()
    label_mapping = {
        'Normal':0, 'Audio-Streaming':1, 'Browsing':2, 'Chat':3, 'File-Transfer':4,
        'Email':5, 'P2P':6, 'Video-Streaming':7, 'VOIP':8
    }
    for keyword, index in label_mapping.items():
        if keyword in llm_output:
            return index
    return -1


def save_data_splits(train, val, test, train_labels, val_labels, test_labels, path="data_splits/"):
    os.makedirs(path, exist_ok=True)
    with open(os.path.join(path, "train.pkl"), "wb") as f:
        pickle.dump((train, train_labels), f)
    with open(os.path.join(path, "val.pkl"), "wb") as f:
        pickle.dump((val, val_labels), f)
    with open(os.path.join(path, "test.pkl"), "wb") as f:
        pickle.dump((test, test_labels), f)
    print("Data splits and labels saved successfully.")

def load_data_splits(path="data_splits/"):
    with open(os.path.join(path, "train.pkl"), "rb") as f:
        train, train_labels = pickle.load(f)
    with open(os.path.join(path, "val.pkl"), "rb") as f:
        val, val_labels = pickle.load(f)
    with open(os.path.join(path, "test.pkl"), "rb") as f:
        test, test_labels = pickle.load(f)
    print("Data splits and labels loaded successfully.")
    return train, val, test, train_labels, val_labels, test_labels

def fit(args):
    data = args["dataset"]
    binary = args["binary"]

    # Update the path to use ../cyber_gnn/ instead of datasets/
    path = "datasets/" + data
    if not path.endswith('/'):
        path += '/'

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Load the data manually (edge_feat, label, adj, adj_lists, config)
    edge_feat = np.load(path + "edge_feat_scaled.npy", allow_pickle=True)
    edge_feat = torch.tensor(edge_feat, dtype=torch.float, device=device)

    # Load the label for multiclass classification
    label = np.load(path + "label_mul.npy", allow_pickle=True)
    label = torch.tensor(label, dtype=torch.long, device=device)
    adj = np.load(path + "adj_random.npy", allow_pickle=True)
    with open(path + 'adj_random_list.dict', 'rb') as file:
        adj_lists = pickle.load(file)

    config = {
        "num_of_layers": 3,
        "num_heads_per_layer": [6, 6, 6],
        "num_features_per_layer": [edge_feat.shape[1], 8, 8, 8],
        "num_identity_feats": 8,
        "add_skip_connection": False,
        "bias": True,
        "dropout": 0.2
    }

    # Initialize LLMGraphTransformer using TinyLlama
    llm_graph_transformer = LLMGraphTransformer(model_name="TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T", device=device)

    # Define labels for relationship types
    labels = ['Normal', 'Audio-Streaming', 'Browsing', 'Chat', 'File-Transfer',
            'Email', 'P2P', 'Video-Streaming', 'VOIP']

    # Define the optimizer with Adam
    optimizer = torch.optim.Adam(llm_graph_transformer.parameters(), lr=1e-5)
    
    loss_fn = nn.CrossEntropyLoss()

    num_edges = len(edge_feat)
    label_cpu = label.cpu().numpy()
    unique, counts = np.unique(label_cpu, return_counts=True)

    balanced_data, balanced_labels = balance_data(np.arange(num_edges), label_cpu, n_samples_per_label=120)

    # Check if saved splits exist, else create and save them
    if not os.path.exists("data_splits/train.pkl"):
        # Perform initial train-validation-test split and save the splits
        train_val, test, train_val_labels, test_labels = train_test_split(
            balanced_data, balanced_labels, test_size=0.1, stratify=balanced_labels, random_state=42
        )
        train, val, train_labels, val_labels = train_test_split(
            train_val, train_val_labels, test_size=0.1, stratify=train_val_labels, random_state=42
        )
        save_data_splits(train, val, test, train_labels, val_labels, test_labels)
    else:
        # Load the saved splits and their labels for consistent use
        train, val, test, train_labels, val_labels, test_labels = load_data_splits()

    print(len(train), len(val), len(test))

    # Print the distribution of labels for each set
    print("Label distribution in Train Set:")
    unique_train, counts_train = np.unique(train_labels, return_counts=True)
    print(dict(zip(unique_train, counts_train)))

    print("Label distribution in Validation Set:")
    unique_val, counts_val = np.unique(val_labels, return_counts=True)
    print(dict(zip(unique_val, counts_val)))

    print("Label distribution in Test Set:")
    unique_test, counts_test = np.unique(test_labels, return_counts=True)
    print(dict(zip(unique_test, counts_test)))

    times = []
    trainscores = []
    valscores = []

    for epoch in range(10):
        print("Epoch: ", epoch)
        random.shuffle(train)
        epoch_start = time.time()
        
        # Print the number of batches
        print(f"Training data size: {len(train)}")
        print(f"Number of batches: {len(train) // 10}")
        
        for batch in range(int(len(train) / 10)):  # Batch size is 10
            batch_edges = train[10 * batch:10 * (batch + 1)]
            
            if len(batch_edges) == 0:
                print(f"Skipping empty batch {batch + 1}")
                continue

            start_time = time.time()
            
            # Convert batch_edges to text
            batch_text = llm_graph_transformer.generate_text(batch_edges, labels, max_new_tokens=10)

            # Generate logits from text inputs
            edge_batch = edge_feat[batch_edges]
            logits = llm_graph_transformer(batch_text, edge_batch)
            
            # Ensure logits and labels are both on the same device
            logits = logits.to(device)
            batch_labels = label[batch_edges].to(device)

            # Calculate loss using logits and target labels
            loss = loss_fn(logits, batch_labels)
            # Backpropagation
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()

            # Calculate accuracy
            predicted_labels = torch.argmax(logits, dim=-1)
            acc_train = f1_score(label_cpu[batch_edges], predicted_labels.cpu().numpy(), average="weighted")

            end_time = time.time()
            times.append(end_time - start_time)
            trainscores.append(acc_train)

            # Print the result
            print(f'batch: {batch + 1:03d}, loss_train: {loss.item():.4f}, acc_train: {acc_train:.4f}, time: {end_time - start_time:.4f}s')

            if batch >= 179:
                break

        # Perform validation after each epoch
        print(f"Validation after epoch {epoch}:")
        val_acc, val_loss, val_output = predict_(llm_graph_transformer, label, loss_fn, val, device, edge_feat)
        print(f"Validation set results: loss= {val_loss:.4f}, accuracy= {val_acc:.4f}, label acc= {f1_score(label_cpu[val], val_output, average=None)}")
        valscores.append(val_acc)

    acc_test, loss_test, predict_output = predict_(llm_graph_transformer, label, loss_fn, test, device, edge_feat)
    print(f"Test set results: loss= {loss_test:.4f}, accuracy= {acc_test:.4f}, label acc= {f1_score(label_cpu[test], predict_output, average=None)}")
    save_model(llm_graph_transformer, optimizer, epoch)



def save_model(model, optimizer, epoch, path="llm_w_edgefeat.pth"):
    # Get current time and format it
    current_time = time.strftime("%Y%m%d-%H%M%S")
    
    # Add the directory 'model/' and append the time to the path
    path = f"model/{current_time}_{path}"
    
    # Create checkpoint to save model and optimizer state
    checkpoint = {
        'epoch': epoch,
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
    }
    
    # Save the checkpoint
    torch.save(checkpoint, path)
    
    # Print confirmation that the model has been saved
    print(f"Model saved to {path}")

def predict_(model, label, loss_fn, data_idx, device, edge_feat):
    predict_output = []
    loss = 0.0
    num_batches = math.ceil(len(data_idx) / 10)

    for batch in range(num_batches):
        batch_edges = data_idx[10 * batch:10 * (batch + 1)]
        labels = ['Normal', 'Audio-Streaming', 'Browsing', 'Chat', 'File-Transfer',
        'Email', 'P2P', 'Video-Streaming', 'VOIP']

        # Generate text from batch_edges
        batch_text = model.generate_text(batch_edges, labels, max_new_tokens=10)
        edge_batch = edge_feat[batch_edges]
        # Get logits from the model (floating point values representing class probabilities)
        logits = model(batch_text, edge_batch).to(device)  # Use the model to get logits

        # Target labels
        batch_labels = label[batch_edges].to(device)  # Long type labels for cross_entropy

        # Compute the loss using logits (input) and batch_labels (target)
        batch_loss = loss_fn(logits, batch_labels)
        loss += batch_loss.item()

        # Calculate predictions based on logits
        predicted_labels = torch.argmax(logits, dim=-1).cpu().numpy()
        predict_output.extend(predicted_labels)

    # Normalize loss by the number of batches
    loss /= num_batches

    # Calculate accuracy using F1 score
    acc = f1_score(label.cpu().numpy()[data_idx], predict_output, average="weighted")
    return acc, loss, predict_output


if __name__ == '__main__':
    set_seeds(42) 
    fit({
        "dataset": "Darknet",
        "binary": False,
        "residual": True
    })



Data splits and labels saved successfully.
874 98 108
Label distribution in Train Set:
{0: 97, 1: 97, 2: 97, 3: 97, 4: 97, 5: 97, 6: 97, 7: 98, 8: 97}
Label distribution in Validation Set:
{0: 11, 1: 11, 2: 11, 3: 11, 4: 11, 5: 11, 6: 11, 7: 10, 8: 11}
Label distribution in Test Set:
{0: 12, 1: 12, 2: 12, 3: 12, 4: 12, 5: 12, 6: 12, 7: 12, 8: 12}
Epoch:  0
Training data size: 874
Number of batches: 87
batch: 001, loss_train: 23.7233, acc_train: 0.0000, time: 0.5825s
batch: 002, loss_train: 19.9106, acc_train: 0.0000, time: 0.5758s
batch: 003, loss_train: 18.7829, acc_train: 0.0000, time: 0.5776s
batch: 004, loss_train: 16.1703, acc_train: 0.0000, time: 0.5767s
batch: 005, loss_train: 10.5653, acc_train: 0.0000, time: 0.5665s
batch: 006, loss_train: 11.0256, acc_train: 0.1000, time: 0.5771s
batch: 007, loss_train: 5.4044, acc_train: 0.1000, time: 0.5768s
batch: 008, loss_train: 3.7000, acc_train: 0.0500, time: 0.5665s
batch: 009, loss_train: 6.5287, acc_train: 0.0667, time: 0.5765s
batc

# Continuous Learning | Phase 2
## Unused 120 Sampling + Pre-Fixed Seed 

In [13]:
from sklearn.utils import shuffle

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load model and optimizer
def load_model(model, optimizer, path, device='cpu'):
    checkpoint = torch.load(path, map_location=device)
    model.load_state_dict(checkpoint['model_state_dict'])
    optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
    epoch = checkpoint['epoch']
    print(f"Model loaded from {path}, epoch {epoch}")
    return model, optimizer, epoch

# Initialize the model and optimizer
model = LLMGraphTransformer(model_name="TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T", device=device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-5)
model, optimizer, start_epoch = load_model(model, optimizer, path="model/20241109-162008_llm_w_edgefeat.pth", device=device)
labels = ['Normal', 'Audio-Streaming', 'Browsing', 'Chat', 'File-Transfer', 'Email', 'P2P', 'Video-Streaming', 'VOIP']

# Define all indices in the dataset
label_cpu = label.cpu().numpy()  # Assume 'label' is the tensor of actual labels

# Define the indices of the entire dataset
all_data_indices = np.arange(len(label_cpu))

# Find unused data indices
used_data_indices = np.array(train.tolist() + val.tolist() + test.tolist())
unused_data_indices = np.setdiff1d(all_data_indices, used_data_indices)

# Sample balanced data and labels from unused data only
balanced_data, balanced_labels = balance_data(unused_data_indices, label_cpu[unused_data_indices], n_samples_per_label=120)

# Split unused balanced data into train, val, and test sets
unused_train, unused_temp, unused_train_labels, unused_temp_labels = train_test_split(
    balanced_data, balanced_labels, test_size=0.2, stratify=balanced_labels, random_state=42
)
unused_val, unused_test, unused_val_labels, unused_test_labels = train_test_split(
    unused_temp, unused_temp_labels, test_size=0.5, stratify=unused_temp_labels, random_state=42
)

# Display split results
print(f"Unused Train Data: {len(unused_train)}, Validation: {len(unused_val)}, Test: {len(unused_test)}")

print("Label distribution in Unused Train Set:", dict(zip(*np.unique(unused_train_labels, return_counts=True))))
print("Label distribution in Unused Validation Set:", dict(zip(*np.unique(unused_val_labels, return_counts=True))))
print("Label distribution in Unused Test Set:", dict(zip(*np.unique(unused_test_labels, return_counts=True))))

# Training loop on unused data
for epoch in range(10):
    model.train()
    print("Epoch:", epoch)
    unused_train, unused_train_labels = shuffle(unused_train, unused_train_labels, random_state=42)  # Shuffle together
    
    for batch in range(int(len(unused_train) / 10)):  # Batch size set to 10
        batch_edges = unused_train[10 * batch:10 * (batch + 1)]
        batch_labels = unused_train_labels[10 * batch:10 * (batch + 1)]
        
        # Generate predictions using model
        batch_text = model.generate_text(batch_edges, labels, max_new_tokens=10)
        edge_batch = edge_feat[batch_edges]
        logits = model(batch_text, edge_batch).to(device)
        
        batch_labels_tensor = torch.tensor(batch_labels, dtype=torch.long, device=device)
        
        # Calculate loss and backpropagate
        loss = loss_fn(logits, batch_labels_tensor)
        optimizer.zero_grad()  # Zero gradients before backward pass
        loss.backward()
        optimizer.step()

        # Calculate and print training accuracy
        predicted_labels = torch.argmax(logits, dim=-1)
        acc_train = f1_score(batch_labels, predicted_labels.cpu().numpy(), average="weighted")
        print(f'[Train] batch: {batch + 1:03d}, loss_train: {loss.item():.4f}, acc_train: {acc_train:.4f}')

    # Validation after each epoch
    model.eval()  # Set model to evaluation mode for validation
    with torch.no_grad():  # Disable gradient calculation
        val_acc, val_loss, val_output = predict_(model, label, loss_fn, unused_val, device, edge_feat)
        print(f"[Val] loss= {val_loss:.4f}, accuracy= {val_acc:.4f}, label acc= {f1_score(unused_val_labels, val_output, average=None)}")

    

# Final test evaluation
model.eval()  # Set model to evaluation mode for testing
with torch.no_grad():  # Disable gradient calculation
    acc_test, loss_test, predict_output = predict_(model, label, loss_fn, unused_test, device, edge_feat)
    print(f"[Test] loss= {loss_test:.4f}, accuracy= {acc_test:.4f}, label acc= {f1_score(unused_test_labels, predict_output, average=None)}")

print("Training complete.")


  checkpoint = torch.load(path, map_location=device)


Model loaded from model/20241109-162008_llm_w_edgefeat.pth, epoch 9
Unused Train Data: 864, Validation: 108, Test: 108
Label distribution in Unused Train Set: {0: 96, 1: 96, 2: 96, 3: 96, 4: 96, 5: 96, 6: 96, 7: 96, 8: 96}
Label distribution in Unused Validation Set: {0: 12, 1: 12, 2: 12, 3: 12, 4: 12, 5: 12, 6: 12, 7: 12, 8: 12}
Label distribution in Unused Test Set: {0: 12, 1: 12, 2: 12, 3: 12, 4: 12, 5: 12, 6: 12, 7: 12, 8: 12}
Epoch: 0
[Train] batch: 001, loss_train: 0.0130, acc_train: 1.0000
[Train] batch: 002, loss_train: 0.0067, acc_train: 1.0000
[Train] batch: 003, loss_train: 0.0062, acc_train: 1.0000
[Train] batch: 004, loss_train: 0.0760, acc_train: 1.0000
[Train] batch: 005, loss_train: 0.0046, acc_train: 1.0000
[Train] batch: 006, loss_train: 0.0042, acc_train: 1.0000
[Train] batch: 007, loss_train: 0.0114, acc_train: 1.0000
[Train] batch: 008, loss_train: 0.1003, acc_train: 1.0000
[Train] batch: 009, loss_train: 0.0013, acc_train: 1.0000
[Train] batch: 010, loss_train: 1.

Data splits and labels saved successfully.


In [14]:
def con1_save_data_splits(train, val, test, train_labels, val_labels, test_labels, path="data_splits/"):
    os.makedirs(path, exist_ok=True)
    with open(os.path.join(path, "con1_train.pkl"), "wb") as f:
        pickle.dump((train, train_labels), f)
    with open(os.path.join(path, "con1_val.pkl"), "wb") as f:
        pickle.dump((val, val_labels), f)
    with open(os.path.join(path, "con1_test.pkl"), "wb") as f:
        pickle.dump((test, test_labels), f)
    print("Data splits and labels saved successfully.")
con1_save_data_splits(unused_train, unused_val, unused_test, unused_train_labels, unused_val_labels, unused_test_labels)

Data splits and labels saved successfully.


In [15]:
acc_test, loss_test, predict_output = predict_(model, label, loss_fn, test, device, edge_feat)
print(f"[Test] loss= {loss_test:.4f}, accuracy= {acc_test:.4f}, label acc= {f1_score(test_labels, predict_output, average=None)}")
    

[Test] loss= 0.2312, accuracy= 0.9814, label acc= [0.95652174 0.96       0.95652174 1.         0.96       1.
 1.         1.         1.        ]


# Model Save

In [16]:
save_model(model, optimizer, epoch)

Model saved to model/20241109-164753_llm_w_edgefeat.pth


# Model Inference

In [23]:
### import argparse
import numpy as np
import time
import random
import torch
import torch.nn as nn
from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split
from transformers import AutoTokenizer, AutoModelForCausalLM
import pickle
import math
from sklearn.utils import resample

class LLMGraphTransformer(nn.Module):
    def __init__(self, model_name="TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T", device="cpu"):
        super().__init__()
        self.device = device

        # Load the tokenizer and model for TinyLlama
        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
        self.model = AutoModelForCausalLM.from_pretrained(model_name).to(self.device)

        # Ensure padding token is set for TinyLlama
        if self.tokenizer.pad_token is None:
            self.tokenizer.pad_token = self.tokenizer.eos_token
        self.model.resize_token_embeddings(len(self.tokenizer))

        self.dropout = nn.Dropout(p=0.2)

        # New layers to process edge features
        self.edge_fc = nn.Linear(77, 64).to(self.device)  
        self.edge_dropout = nn.Dropout(p=0.2)
    
    def forward(self, batch_text, edge_features):
        # Tokenize text
        inputs = self.tokenizer(batch_text, return_tensors="pt", padding=True, truncation=True, max_length=512).to(self.device)
        outputs = self.model(inputs['input_ids'], attention_mask=inputs['attention_mask'])
        
        # Get the logits for the last token in each sequence (for classification purposes)
        text_logits = outputs.logits[:, -1, :]  # Only take the last token's logits for classification

        # Process edge features through a fully connected layer
        edge_emb = self.edge_fc(edge_features)  # Project edge features to a lower-dimensional space
        edge_emb = self.edge_dropout(edge_emb)

        # Concatenate the text logits and the edge feature embeddings
        combined_logits = torch.cat((text_logits, edge_emb), dim=1)
        
        return combined_logits

    def generate_text(self, graph_data, labels, max_new_tokens=50):
        # Convert the graph adjacency list to text directly within this method
        batch_text = []
        for node, neighbors in enumerate(graph_data):
            if isinstance(neighbors, (list, set, np.ndarray)):
                for neighbor in neighbors:
                    question = f"What is the relationship between Node {node} and Node {neighbor}? Choices: {', '.join(labels)}."
                    batch_text.append(question)
            else:
                question = f"What is the relationship between Node {node} and Node {neighbors}? Choices: {', '.join(labels)}."
                batch_text.append(question)

        # Tokenize and generate predictions
        inputs = self.tokenizer(batch_text, return_tensors="pt", padding=True, truncation=True, max_length=512).to(self.device)
        outputs = self.model.generate(
            inputs['input_ids'],
            attention_mask=inputs['attention_mask'],
            max_new_tokens=max_new_tokens,
            temperature=0.7,
            top_p=0.9,
            do_sample=True
        )
        generated_text = [self.tokenizer.decode(output, skip_special_tokens=True) for output in outputs]

        return generated_text



def predict_(model, label, loss_fn, data_idx, device, edge_feat):
    predict_output = []
    loss = 0.0
    num_batches = math.ceil(len(data_idx) / 10)

    for batch in range(num_batches):
        batch_edges = data_idx[10 * batch:10 * (batch + 1)]
        labels = ['Normal', 'Audio-Streaming', 'Browsing', 'Chat', 'File-Transfer',
        'Email', 'P2P', 'Video-Streaming', 'VOIP']

        # Generate text from batch_edges
        batch_text = model.generate_text(batch_edges, labels, max_new_tokens=10)
        edge_batch = edge_feat[batch_edges]
        # Get logits from the model (floating point values representing class probabilities)
        logits = model(batch_text, edge_batch).to(device)  # Use the model to get logits

        # Target labels
        batch_labels = label[batch_edges].to(device)  # Long type labels for cross_entropy

        # Compute the loss using logits (input) and batch_labels (target)
        batch_loss = loss_fn(logits, batch_labels)
        loss += batch_loss.item()

        # Calculate predictions based on logits
        predicted_labels = torch.argmax(logits, dim=-1).cpu().numpy()
        predict_output.extend(predicted_labels)

    # Normalize loss by the number of batches
    loss /= num_batches

    # Calculate accuracy using F1 score
    acc = f1_score(label.cpu().numpy()[data_idx], predict_output, average="weighted")
    return acc, loss, predict_output

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
def load_model(model, optimizer, path, device='cpu'):
    checkpoint = torch.load(path, map_location=device)
    model.load_state_dict(checkpoint['model_state_dict'])
    optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
    epoch = checkpoint['epoch']
    print(f"Model loaded from {path}, epoch {epoch}")
    return model, optimizer, epoch
# Load the model and optimizer
model = LLMGraphTransformer(model_name="TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T", device=device)
model, optimizer, start_epoch = load_model(model, optimizer, path="model/20241109-164753_llm_w_edgefeat.pth", device=device)
# Define optimizer and loss function
optimizer = torch.optim.Adam(llm_graph_transformer.parameters(), lr=1e-5)
loss_fn = nn.CrossEntropyLoss()


  checkpoint = torch.load(path, map_location=device)


Model loaded from model/20241109-164753_llm_w_edgefeat.pth, epoch 9


In [35]:
import numpy as np
import torch
import torch.nn as nn
import random
import pickle
from sklearn.metrics import f1_score

# Set device to CUDA if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Path setup
path = "datasets/Darknet/"
if not path.endswith('/'):
    path += '/'


# Load dataset files
edge_feat = torch.tensor(np.load(path + "edge_feat_scaled.npy", allow_pickle=True), dtype=torch.float, device=device)
label = torch.tensor(np.load(path + "label_mul.npy", allow_pickle=True), dtype=torch.long, device=device)



# Labels for relationship types
labels = ['Normal', 'Audio-Streaming', 'Browsing', 'Chat', 'File-Transfer', 'Email', 'P2P', 'Video-Streaming', 'VOIP']

# Test the model
label_cpu = label.cpu().numpy()
acc_test, loss_test, predict_output = predict_(model, label, loss_fn, unused_test, device, edge_feat)
print(f"Test set results: loss= {loss_test:.4f}, accuracy= {acc_test:.4f}, label acc= {f1_score(label_cpu[unused_test], predict_output, average=None)}")


Test set results: loss= 0.1116, accuracy= 0.9907, label acc= [1.         1.         1.         0.95652174 0.96       1.
 1.         1.         1.        ]


In [40]:
import numpy as np
import torch
import torch.nn as nn
import random
import pickle
from sklearn.metrics import f1_score, accuracy_score, classification_report

# Set device to CUDA if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Path setup
path = "datasets/Darknet/"
if not path.endswith('/'):
    path += '/'

edge_feat = torch.tensor(np.load(path + "edge_feat_scaled.npy", allow_pickle=True), dtype=torch.float, device=device)
label = torch.tensor(np.load(path + "label_mul.npy", allow_pickle=True), dtype=torch.long, device=device)

# Labels for relationship types
labels = ['Normal', 'Audio-Streaming', 'Browsing', 'Chat', 'File-Transfer', 'Email', 'P2P', 'Video-Streaming', 'VOIP']

# Test the model
label_cpu = label.cpu().numpy()
f1_test, loss_test, predict_output = predict_(model, label, loss_fn, test, device, edge_feat)

# Calculate accuracy and F1 score for weighted average
accuracy = accuracy_score(label_cpu[test], predict_output)
f1_weighted = f1_score(label_cpu[test], predict_output, average="weighted")
report = classification_report(label_cpu[test], predict_output, target_names=labels)

# Print test set results
print(f"Test set results: loss= {loss_test:.4f}, accuracy= {accuracy:.4f}, f1_score(weighted)= {f1_weighted:.4f}")
print("Classification Report:\n", report)


Test set results: loss= 0.2312, accuracy= 0.9815, f1_score(weighted)= 0.9814
Classification Report:
                  precision    recall  f1-score   support

         Normal       1.00      0.92      0.96        12
Audio-Streaming       0.92      1.00      0.96        12
       Browsing       1.00      0.92      0.96        12
           Chat       1.00      1.00      1.00        12
  File-Transfer       0.92      1.00      0.96        12
          Email       1.00      1.00      1.00        12
            P2P       1.00      1.00      1.00        12
Video-Streaming       1.00      1.00      1.00        12
           VOIP       1.00      1.00      1.00        12

       accuracy                           0.98       108
      macro avg       0.98      0.98      0.98       108
   weighted avg       0.98      0.98      0.98       108



In [42]:
import numpy as np
import torch
import torch.nn as nn
import random
import pickle
from sklearn.metrics import f1_score, accuracy_score, classification_report

# Set device to CUDA if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Path setup
path = "datasets/Darknet/"
if not path.endswith('/'):
    path += '/'

edge_feat = torch.tensor(np.load(path + "edge_feat_scaled.npy", allow_pickle=True), dtype=torch.float, device=device)
label = torch.tensor(np.load(path + "label_mul.npy", allow_pickle=True), dtype=torch.long, device=device)

# Labels for relationship types
labels = ['Normal', 'Audio-Streaming', 'Browsing', 'Chat', 'File-Transfer', 'Email', 'P2P', 'Video-Streaming', 'VOIP']

# Test the model
label_cpu = label.cpu().numpy()
f1_test, loss_test, predict_output = predict_(model, label, loss_fn, unused_test, device, edge_feat)

# Calculate accuracy and F1 score for weighted average
accuracy = accuracy_score(label_cpu[unused_test], predict_output)
f1_weighted = f1_score(label_cpu[unused_test], predict_output, average="weighted")
report = classification_report(label_cpu[unused_test], predict_output, target_names=labels)

# Print test set results
print(f"Test set results: loss= {loss_test:.4f}, accuracy= {accuracy:.4f}, f1_score(weighted)= {f1_weighted:.4f}")
print("Classification Report:\n", report)


Test set results: loss= 0.1116, accuracy= 0.9907, f1_score(weighted)= 0.9907
Classification Report:
                  precision    recall  f1-score   support

         Normal       1.00      1.00      1.00        12
Audio-Streaming       1.00      1.00      1.00        12
       Browsing       1.00      1.00      1.00        12
           Chat       1.00      0.92      0.96        12
  File-Transfer       0.92      1.00      0.96        12
          Email       1.00      1.00      1.00        12
            P2P       1.00      1.00      1.00        12
Video-Streaming       1.00      1.00      1.00        12
           VOIP       1.00      1.00      1.00        12

       accuracy                           0.99       108
      macro avg       0.99      0.99      0.99       108
   weighted avg       0.99      0.99      0.99       108

