In [1]:
import arkouda as ak
import arachne as ar
import scipy as sp
import networkx as nx
import matplotlib.pyplot as plt
import os

    _         _                   _       
   / \   _ __| | _____  _   _  __| | __ _ 
  / _ \ | '__| |/ / _ \| | | |/ _` |/ _` |
 / ___ \| |  |   < (_) | |_| | (_| | (_| |
/_/   \_\_|  |_|\_\___/ \__,_|\__,_|\__,_|
                                          

Client Version: v2024.06.21+0.gcf6eeacde.dirty


In [2]:
# NOTE: Make sure to change the server name to whatever is applicable in your environment. If running locally, then use only ak.connect().
ak.connect("n31", 5555)

connected to arkouda server tcp://*:5555


In [3]:
n = 100_000
m = 100_000_000
s = 2
x = 2
y = 2

In [4]:
import time

### Build graph from randomly generated source and destination arrays.
# 1. Use Arkouda's randint to generate the random edge arrays.
src = ak.randint(0, n, m, seed=s*2)
dst = ak.randint(0, n, m, seed=s*4)

# 2. Build temporary property graph to get sorted edges and nodes lists.
temp_prop_graph = ar.PropGraph()
start = time.time()
temp_prop_graph.add_edges_from(src, dst)
end = time.time()
build_time = end - start
print(f"Building property graph with {len(temp_prop_graph)} vertices and "
        f"{temp_prop_graph.size()} "
        f"edges took {round(build_time,2)} seconds.")

### Generate node labels and edge relationships for the graph.
# 1. Extract node and edge information.
num_edges = temp_prop_graph.size()
num_nodes = len(temp_prop_graph)
edges = temp_prop_graph.edges()
nodes = temp_prop_graph.nodes()

# 2. Generate sets of node labels and edge relationships.
labels_set = ak.array(["lbl" + str(x) for x in range(1,x+1)])
relationships_set = ak.array(["rel" + str(y) for y in range(1,y+1)])

# 3. Give edges and nodes same exact labels and relationships.
node_labels_1 = ak.full(num_nodes, labels_set[0])
node_labels_2 = ak.full(num_nodes, labels_set[1])
edge_rels_1 = ak.full(num_edges, relationships_set[0])
edge_rels_2 = ak.full(num_edges, relationships_set[1])

# 4. Create dataframe to load into a new property graph.
edge_df = ak.DataFrame({"src":edges[0], "dst":edges[1], "rels1":edge_rels_1, "rels2":edge_rels_2})
node_df = ak.DataFrame({"nodes":nodes, "lbls1":node_labels_1, "lbls2":node_labels_2})

# 5. Create new property graph with node labels and edge relationships.
prop_graph = ar.PropGraph()
prop_graph.load_edge_attributes(edge_df, source_column="src", destination_column="dst",
                                relationship_columns=["rels1", "rels2"])
prop_graph.load_node_attributes(node_df, node_column="nodes", label_columns=["lbls1", "lbls2"])

### Create the subgraph we are searching for.
# 1. Create labels and relationships to search for.
src_list = [0, 1, 2, 2, 3]
dst_list = [1, 2, 0, 3, 0]
src_subgraph = ak.array(src_list)
dst_subgraph = ak.array(dst_list)

subgraph_nodes = list(set(src_list) | set(dst_list))
subgraph_nodes.sort()

labels1_subgraph = ak.array(["lbl1", "lbl1", "lbl1", "lbl1"])
labels2_subgraph = ak.array(["lbl3", "lbl3", "lbl3", "lbl3"])
rels1_subgraph = ak.array(["rel1", "rel1", "rel1", "rel1", "rel1"])
rels2_subgraph = ak.array(["rel3", "rel3", "rel3", "rel3", "rel3"])

#2. Populate the subgraph.
subgraph = ar.PropGraph()
edge_df_h = ak.DataFrame({"src":src_subgraph, "dst":dst_subgraph,
                        "rels1":rels1_subgraph, "rels2":rels2_subgraph})
node_df_h = ak.DataFrame({"nodes": ak.array(subgraph_nodes), "lbls1":labels1_subgraph,
                          "lbls2":labels2_subgraph})
subgraph.load_edge_attributes(edge_df_h, source_column="src", destination_column="dst",
                                relationship_columns=["rels1","rels2"])

Building property graph with 100000 vertices and 99500951 edges took 5.96 seconds.


In [5]:
isos_as_vertices = ar.subgraph_isomorphism(prop_graph, subgraph, 
                                           semantic_check="or", algorithm_type="si", 
                                           return_isos_as="vertices", size_limit = 500
)
print(f"We found {len(isos_as_vertices[0])/len(subgraph)} isos inside of the graph")

We found 658.0 isos inside of the graph


In [6]:
isos_as_edges = ar.subgraph_isomorphism(prop_graph, subgraph, 
                                        semantic_check="or", algorithm_type="si", 
                                        return_isos_as="edges", time_limit=2
)
print(f"We found {len(isos_as_edges[0])/subgraph.size()} isos inside of the graph")

We found 16487961.0 isos inside of the graph


In [7]:
subgraph.load_node_attributes(node_df_h, node_column="nodes", label_columns=["lbls1","lbls2"])

In [8]:
isos_as_edges = ar.subgraph_isomorphism(prop_graph, subgraph, 
                                        semantic_check="or", algorithm_type="si", 
                                        return_isos_as="edges", time_limit=2
)
print(f"We found {len(isos_as_edges[0])/subgraph.size()} isos inside of the graph")

We found 16099278.0 isos inside of the graph


In [9]:
isos_as_vertices = ar.subgraph_isomorphism(prop_graph, subgraph, 
                                           semantic_check="or", algorithm_type="si", 
                                           return_isos_as="vertices", size_limit = 500
)
print(f"We found {len(isos_as_vertices[0])/len(subgraph)} isos inside of the graph")

We found 766.0 isos inside of the graph
