In [1]:
import numpy as np
import pyarrow

from katana.loops import do_all, do_all_operator
from katana.property_graph import PropertyGraph
from katana.timer import StatTimer
from katana.example_utils import get_input
from katana.galois import setActiveThreads

In [2]:
@do_all_operator()
def jaccard_operator(g, n1_neighbors, n1_size, output, n2):
    intersection_size = 0
    n2_size = len(g.edges(n2))
    for e_iter in g.edges(n2):
        ne = g.get_edge_dst(e_iter)
        if n1_neighbors[ne]:
            intersection_size += 1
    union_size = n1_size + n2_size - intersection_size
    if union_size > 0:
        similarity = float(intersection_size) / union_size
    else:
        similarity = 1
    output[n2] = similarity


def jaccard(g, key_node, property_name):
    key_neighbors = np.zeros(len(g), dtype=bool)
    output = np.empty(len(g), dtype=float)

    for e in g.edges(key_node):
        n = g.get_edge_dst(e)
        key_neighbors[n] = True

    do_all(g, jaccard_operator(g, key_neighbors, len(g.edges(key_node)), output),
           steal=True, loop_name="jaccard")

    g.add_node_property(pyarrow.table({property_name: output}))

In [3]:
print("Using threads:", setActiveThreads(8))

Using threads: 8


In [5]:
g = PropertyGraph(get_input("propertygraphs/ldbc_003"))

timer = StatTimer("Jaccard (Property Graph) Numba")
timer.start()
jaccard(g, 1, "JM")
timer.stop()
print(timer.get())
# del timer

print("Node {}: {}".format(2, g.get_node_property("JM")[2]))

4.601
Node 2: 0.3333333333333333
