In [1]:
import os
import sys
sys.path.append(os.path.join(os.getcwd().replace("notebooks/split_models", "")))

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from config import Config
from data.dataset import NetworkDataset, load_datasets
from model_config import MLP_Models
from transfer.transfer_tensors import HostSocket
from utils.benchmark import SplitBenchmark

In [2]:
conf = Config()
model_conf = MLP_Models()
model = model_conf.get_model(model_conf.split_mlp_3)
model.load()
host_sock = HostSocket(so_file=conf.sock_so)
location = "host"
name = "split_" + model_conf.type
result_path = os.path.join(conf.benchmark_host, "split_model", name + ".txt")

Checkpoint loaded from /global/D1/homes/jorgetf/Network-Packet-ML-Model/checkpoint/split_model/split_mlp_3.pth!


In [3]:
X_train, y_train, X_val, y_val, X_test, y_test = load_datasets(conf.datasets, "mlp")

# create train, val and test dataloaders
train_dataset = NetworkDataset(X_train, y_train)
train_loader = DataLoader(train_dataset, conf.batch_size, shuffle=True)

val_dataset = NetworkDataset(X_val, y_val)
val_loader = DataLoader(val_dataset, conf.batch_size, shuffle=True)

test_dataset = NetworkDataset(X_test, y_test)
test_loader = DataLoader(test_dataset, conf.batch_size)

In [4]:
data, labels = next(iter(train_loader))
print(data.shape, labels.shape)

torch.Size([32, 513]) torch.Size([32])


In [5]:
# run benchmark
benchmark = SplitBenchmark(model, test_loader, conf.batch_size, name, result_path, socket=host_sock, split=location)
benchmark.open()
benchmark()
benchmark.transfer_time()
benchmark.close()

# print and save result
benchmark.print_result()

Success, opend host socket and listening on port (8065)!
Success, accepted connection from dpu!


[W108 11:53:45.267657630 CPUAllocator.cpp:245] Memory block of unknown size was allocated before the profiling started, profiler results will not include the deallocation event
ERROR:2026-01-08 11:53:45 1649029:1649029 DeviceProperties.cpp:47] gpuGetDeviceCount failed with code 35


Benchmark - split_mlp model:

Memory usage (MB):
Avg memory usage: 0.069MB
Peak memory usage: 0.709MB

Model inference latency on one batch (batch size = 32):
Avg latency: 0.655ms
Min latency: 0.361ms
Max latency: 6.037ms

Model inference throughput (batch size = 32):
Throughput: 11593.01 samples/sec

Model inference CPU usage (number of logical cores) during runtime:
CPU runtime: 0.54 seconds
Average CPU usage: 10.39/96 cores

Model (split_mlp) Macro-F1, Micro-F1 and Macro ROC AUC scores:
Macro-F1 score: 0.37
Micro-F1 score: 0.88
Macro ROC AUC score: nan

Split Model transfer time from dpu to host (batch size = 32):
Avg transfer time: 484.347ms
Min transfer time: 398.163ms
Max transfer time: 874.684ms



In [6]:
"""
y_logits = []
y_true = []

host_sock.open()
model.model.eval()
for i in range(100):
    # signal to dpu that host is ready to receive batch
    if i > 0:
        host_sock.signal()
    
    # receive batch from dpu
    features = host_sock.receive()
    labels = host_sock.receive().to(dtype=torch.long)

    # run inference on batch
    with torch.no_grad():
        _, logits = model.model(features, split="host")
    
    y_logits.append(logits)
    y_true.append(labels)

host_sock.close()

y_logits, y_true = torch.cat(y_logits, dim=0), torch.cat(y_true, dim=0)
y_pred = torch.argmax(y_logits, dim=1)
acc = (y_pred == y_true).float().mean()
print(f"Accuracy: {100*acc:.2f}%")
"""

'\ny_logits = []\ny_true = []\n\nhost_sock.open()\nmodel.model.eval()\nfor i in range(100):\n    # signal to dpu that host is ready to receive batch\n    if i > 0:\n        host_sock.signal()\n    \n    # receive batch from dpu\n    features = host_sock.receive()\n    labels = host_sock.receive().to(dtype=torch.long)\n\n    # run inference on batch\n    with torch.no_grad():\n        _, logits = model.model(features, split="host")\n    \n    y_logits.append(logits)\n    y_true.append(labels)\n\nhost_sock.close()\n\ny_logits, y_true = torch.cat(y_logits, dim=0), torch.cat(y_true, dim=0)\ny_pred = torch.argmax(y_logits, dim=1)\nacc = (y_pred == y_true).float().mean()\nprint(f"Accuracy: {100*acc:.2f}%")\n'