# MillenniumAI performance comparison

In [4]:
# Path to MillenniumDB executables
SERVER_PYMDB_PATH = "/home/zeus/MDB/MillenniumDB-Dev/build/Release/bin/server_pymdb"
CREATE_DB_PATH = "/home/zeus/MDB/MillenniumDB-Dev/build/Release/bin/create_db"

# Port to run MillenniumDB server on
SERVER_PORT = 8080

In [11]:
import sys
import networkx as nx
import os
from torch_geometric.data import Data
from torch_geometric.utils import from_networkx
from torch_geometric.loader import NeighborLoader
import torch
import subprocess
from typing import Tuple, List, Dict
import socket
import time

# Necessary to import from sibling directory
sys.path.append("..")


from pymdb import (
    MDBClient,
    TrainGraphLoader,
    EvalGraphLoader,
    SamplingGraphLoader,
    Sampler,
)


In [6]:
# Generate graphs in NetworkX and MillenniumDB format. Returns the in-memory graph and the path to the on-disk graph.
def generate_graph(
    num_nodes: int,
    num_edges: int,
    num_node_feat: int,
) -> Tuple[Data, str]:
    # In-memory graph
    graph = from_networkx(
        nx.gnm_random_graph(num_nodes, num_edges, seed=2023, directed=True)
    )
    graph.node_feat = torch.rand(num_nodes, num_node_feat, dtype=torch.float32)
    graph.node_label = torch.randint(0, 2, (num_nodes,), dtype=torch.uint8)

    # On-disk MillenniumDB graph
    dump_path = f"N{num_nodes}_E{num_edges}_F{num_node_feat}.milldb"
    with open(dump_path, "w") as f:
        for idx in range(graph.num_nodes):
            f.write(
                f"N{idx} :L{graph.node_label[idx]} feat:{graph.node_feat[idx].tolist()}\n"
            )
        for edge in graph.edge_index.T:
            f.write(f"N{edge[0]}->N{edge[1]} :T\n")
    return graph, dump_path


# Create a MillenniumDB database from a file in the current directory and return its path
def create_db(dump_path: str) -> str:
    dest_path = os.path.basename(dump_path).replace(".milldb", "")

    if os.path.isdir(dest_path):
        subprocess.run(["rm", "-rf", dest_path])

    result = subprocess.run(
        [CREATE_DB_PATH, dump_path, dest_path],
        stdout=subprocess.DEVNULL,
        stderr=subprocess.PIPE,
    )
    if result.returncode != 0:
        raise RuntimeError(f"create_db: {result.stderr.decode('utf-8')}")
    return dest_path


# Start a MillenniumDB server for a given database directory
def start_server(db_path: str):
    process = subprocess.Popen(
        [SERVER_PYMDB_PATH, db_path, "-p", str(SERVER_PORT)],
        stdout=subprocess.DEVNULL,
        stderr=subprocess.PIPE,
    )

    # Wait for server to listen to port
    while socket.socket().connect_ex(("localhost", SERVER_PORT)) != 0:
        time.sleep(0.5)

    return process


# Kill a MillenniumDB server process and return its exit code
def kill_server(process) -> int:
    process.kill()
    return process.wait()


In [16]:
def run_performance_tests(instances: List[Dict]):
    for instance in instances:
        # Generate graphs and create database
        graph, dump_path = generate_graph(**instance)
        db_path = create_db(dump_path)

        print(f"Running for instance: {instance}...")

        times_mem = list()
        times_mdb = list()

        # 1. In-memory graph
        print("  In-memory graph...")
        t0_mem = time.perf_counter_ns()
        # TODO: Change this to GraphSAGE training phase
        for batch in NeighborLoader(graph, num_neighbors=[5], batch_size=10):
            pass
        times_mem.append(time.perf_counter_ns() - t0_mem)

        # 2. MillenniumDB graph
        print("  MillenniumDB graph...")
        # Start MillenniumDB server
        server_process = start_server(db_path)
        # Connect to server
        with MDBClient("localhost", SERVER_PORT) as client:
            # TODO: Change this to GraphSAGE training phase
            t0_mdb = time.perf_counter_ns()
            for batch in EvalGraphLoader(client, num_neighbors=[5], batch_size=10):
                pass
            times_mdb.append(time.perf_counter_ns() - t0_mdb)

        # Clean up
        kill_server(server_process)
        subprocess.run(["rm", "-rf", db_path])
        subprocess.run(["rm", dump_path])


instances = [{"num_nodes": 20, "num_edges": 20, "num_node_feat": 10}]
run_performance_tests(instances)


Running for instance: {'num_nodes': 20, 'num_edges': 20, 'num_node_feat': 10}...
  In-memory graph...
  MillenniumDB graph...


In [8]:
"""graph, mdb_graph_path = generate_graph(100, 200, 10)"""

'graph, mdb_graph_path = generate_graph(100, 200, 10)'

In [9]:
"""create_db(mdb_graph_path)"""

'create_db(mdb_graph_path)'

In [10]:
"""server_process = start_server("N100_E200_F10")
with MDBClient("localhost", SERVER_PORT) as client:
    pass
kill_server(server_process)"""

'server_process = start_server("N100_E200_F10")\nwith MDBClient("localhost", SERVER_PORT) as client:\n    pass\nkill_server(server_process)'