In [1]:
import os
import sys
import json
sys.path.append(os.path.join(os.getcwd().replace("model_inference", "")))

import torch
from torch.utils.data import DataLoader
from parse_dataset import NetworkDataset
from load_models import models
from benchmark import BenchmarkSplitModel

In [2]:
conf = {
    "batch_size": 128,
    "epochs": 10,
    "learning_rate": 0.0001,
    "dpu_model": "dpu_mlp",
    "host_model": "host_mlp",
    "model": "mlp",
    "device": "host"
}

In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cpu


In [4]:
dataset_path = os.path.join(os.getcwd().replace("model_inference", ""), "datasets")

# load label_dict
json_file = os.path.join(dataset_path, "label_index.json")
with open(json_file, 'r') as file:
    label_dict = json.load(file)

# load train, val and test datasets
train_dataset_file = os.path.join(dataset_path, "train_dataset.pt")
X_train, y_train = torch.load(train_dataset_file)

val_dataset_file = os.path.join(dataset_path, "val_dataset.pt")
X_val, y_val = torch.load(val_dataset_file)

test_dataset_file = os.path.join(dataset_path, "test_dataset.pt")
X_test, y_test = torch.load(test_dataset_file)

if conf["model"] != "mlp" and conf["model"] != "light_mlp":
    X_train, X_val, X_test = X_train.unsqueeze(-1), X_val.unsqueeze(-1), X_test.unsqueeze(-1)
    
print(X_test.shape)
print(y_test.shape)

# create train, val and test datasets
train_dataset = NetworkDataset(X_train, y_train)
train_loader = DataLoader(train_dataset, batch_size=conf["batch_size"], shuffle=True)

val_dataset = NetworkDataset(X_val, y_val)
val_loader = DataLoader(val_dataset, batch_size=conf["batch_size"], shuffle=True)

test_dataset = NetworkDataset(X_test, y_test)
test_loader = DataLoader(test_dataset, batch_size=conf["batch_size"], num_workers=16)

torch.Size([101811, 513])
torch.Size([101811])


In [5]:
dpu_model = models[conf["dpu_model"]]
dpu_path = os.path.join(os.getcwd().replace("model_inference", ""), "checkpoint", conf["dpu_model"] + "_model.pth")
# load model: 
dpu_model.load(dpu_path)

Checkpoint loaded from /home/jorgetf/testmodel/Network-Packet-ML-Model/checkpoint/dpu_mlp_model.pth!


In [6]:
# create (logits, labels) dataset from outputs of dpu model:
logits_data = []
labels_list = []

with torch.no_grad():
    for (data, labels) in test_loader:
        logits, _ = dpu_model.model(data)
        logits.detach()
        logits_data.append(logits)
        labels_list.append(labels)

logits_data, labels_list = torch.cat(logits_data, dim=0), torch.cat(labels_list, dim=0)

In [7]:
logits_test_dataset = NetworkDataset(logits_data, labels_list)
logits_test_loader = DataLoader(logits_test_dataset, batch_size=conf["batch_size"])

In [8]:
host_model = models[conf["host_model"]]
host_path = os.path.join(os.getcwd().replace("model_inference", ""), "checkpoint", conf["host_model"] + "_model.pth")
benchmark = BenchmarkSplitModel(host_model, host_path, logits_test_loader, conf["batch_size"], conf["host_model"])

Checkpoint loaded from /home/jorgetf/testmodel/Network-Packet-ML-Model/checkpoint/host_mlp_model.pth!


In [9]:
benchmark.memory_usage()
benchmark.latency()
benchmark.throughput()
benchmark.cpu_usage()
benchmark.accuracy()

[W1126 12:33:55.256594397 CPUAllocator.cpp:245] Memory block of unknown size was allocated before the profiling started, profiler results will not include the deallocation event
ERROR:2025-11-26 12:33:55 793915:793915 DeviceProperties.cpp:47] gpuGetDeviceCount failed with code 35


In [10]:
file = os.path.join(os.getcwd(), "benhcmarks", conf["device"], conf["host_model"] + "_" + conf["device"] + "_benchmark.txt")
with open(file, "w") as f:
    for line in benchmark.results:
        f.writelines(line + "\n")

for line in benchmark.results:
    print(line)

Benchmark - host_mlp model:

Memory usage (MB):
Avg memory usage: 0.458MB
Peak memory usage: 1.680MB

Model inference latency on one batch (batch size = 128):
Avg latency: 0.586ms
Min latency: 0.524ms
Max latency: 0.792ms

Model inference throughput (batch size = 128):
Throughput: 88140.80 samples/sec

Model inference CPU usage (number of logical cores) during runtime:
CPU runtime: 0.07 seconds
Average CPU usage: 49.02/96 cores

Model (host_mlp) inference accuracy (%):
Accuracy: 55.47%



