# MillenniumDB

It is assumed that the `server_pymdb` executable is running at `localhost:8080`.

In [41]:
import sys
import time
import numpy as np

# Necessary to import from sibling directory
sys.path.append("..")

from pymdb import (
    MDBClient,
    TrainGraphLoader,
    EvalGraphLoader,
    SamplingGraphLoader,
    Sampler,
)

In [42]:
num_nodes = 364_253_648
num_edges = 1_257_169_959

## Training/Sampling

In [43]:
epochs = 100
batch_size = 1000
num_seeds = 10000

time_epoch_train = list()

with MDBClient(host="localhost", port=8080) as client:
    for epoch in range(epochs):
        print(f"Running for epoch {epoch+1}/{epochs}...")
        t0 = time.perf_counter_ns()
        
        sgl = SamplingGraphLoader(
            client=client,
            batch_size=batch_size,
            num_neighbors=[5, 5],
            num_seeds=num_seeds
        )
        
        for graph in sgl:
            pass
        
        tf = time.perf_counter_ns()
        time_epoch_train.append(tf - t0)

Running for epoch 1/100...
Running for epoch 2/100...
Running for epoch 3/100...
Running for epoch 4/100...
Running for epoch 5/100...
Running for epoch 6/100...
Running for epoch 7/100...
Running for epoch 8/100...
Running for epoch 9/100...
Running for epoch 10/100...
Running for epoch 11/100...
Running for epoch 12/100...
Running for epoch 13/100...
Running for epoch 14/100...
Running for epoch 15/100...
Running for epoch 16/100...
Running for epoch 17/100...
Running for epoch 18/100...
Running for epoch 19/100...
Running for epoch 20/100...
Running for epoch 21/100...
Running for epoch 22/100...
Running for epoch 23/100...
Running for epoch 24/100...
Running for epoch 25/100...
Running for epoch 26/100...
Running for epoch 27/100...
Running for epoch 28/100...
Running for epoch 29/100...
Running for epoch 30/100...
Running for epoch 31/100...
Running for epoch 32/100...
Running for epoch 33/100...
Running for epoch 34/100...
Running for epoch 35/100...
Running for epoch 36/100...
R

In [47]:
print(f"epochs               : {epochs}")
print(f"batch_size           : {batch_size}")
print(f"num_seeds            : {num_seeds}")
print(f"AVG time_epoch_train : {np.mean(time_epoch_train)/1e9}s")
print(f"STD time_epoch_train : {np.std(time_epoch_train)/1e9}s")

epochs               : 100
batch_size           : 1000
num_seeds            : 10000
AVG time_epoch_train : 100.68494127778999s
STD time_epoch_train : 22.27753825252498s


## Evaluation

In [45]:
epochs = 100
batch_size = 1000
max_iterations = 10

time_epoch_eval = list()

with MDBClient(host="localhost", port=8080) as client:
    for epoch in range(epochs):
        print(f"Running for epoch {epoch+1}/{epochs}...")
        t0 = time.perf_counter_ns()
        
        sgl = EvalGraphLoader(
            client=client,
            batch_size=batch_size,
            num_neighbors=[5, 5],
        )
        
        count = 0
        for graph in sgl:
            count += 1
            if count == max_iterations:
                break
        
        tf = time.perf_counter_ns()
        time_epoch_eval.append(tf - t0)

Running for epoch 1/100...
Running for epoch 2/100...
Running for epoch 3/100...
Running for epoch 4/100...
Running for epoch 5/100...
Running for epoch 6/100...
Running for epoch 7/100...
Running for epoch 8/100...
Running for epoch 9/100...
Running for epoch 10/100...
Running for epoch 11/100...
Running for epoch 12/100...
Running for epoch 13/100...
Running for epoch 14/100...
Running for epoch 15/100...
Running for epoch 16/100...
Running for epoch 17/100...
Running for epoch 18/100...
Running for epoch 19/100...
Running for epoch 20/100...
Running for epoch 21/100...
Running for epoch 22/100...
Running for epoch 23/100...
Running for epoch 24/100...
Running for epoch 25/100...
Running for epoch 26/100...
Running for epoch 27/100...
Running for epoch 28/100...
Running for epoch 29/100...
Running for epoch 30/100...
Running for epoch 31/100...
Running for epoch 32/100...
Running for epoch 33/100...
Running for epoch 34/100...
Running for epoch 35/100...
Running for epoch 36/100...
R

In [46]:
print(f"epochs               : {epochs}")
print(f"batch_size           : {batch_size}")
print(f"AVG time_epoch_train : {np.mean(time_epoch_eval)/1e9}s")
print(f"STD time_epoch_train : {np.std(time_epoch_eval)/1e9}s")

epochs               : 100
batch_size           : 1000
AVG time_epoch_train : 0.44371102799s
STD time_epoch_train : 0.00010089918852493264s
