In [None]:
import arkouda as ak
import arachne as ar
import scipy as sp
import networkx as nx
import matplotlib.pyplot as plt
import os

In [None]:
# NOTE: Make sure to change the server name to whatever is applicable in your environment. If running locally, then use only ak.connect().
ak.connect("n115", 5555)

### Simple Graph Experiments

In [3]:
src_main = [1, 2, 1, 3, 4, 2, 5, 4, 5, 6, 3, 8, 9, 4, 9, 10]
dst_main = [0, 1, 3, 4, 1, 4, 2, 5, 6, 7, 8, 9, 3, 9, 10, 4]
src_maingraph = ak.array(src_main)
dst_maingraph = ak.array(dst_main)

maingraph_nodes = list(set(src_main) | set(dst_main))
maingraph_nodes.sort()

labels1_maingraph = ak.array(["yellow", "red", "blue", "blue", "blue", "red", "blue","yellow", "blue", "yellow", "blue"])
labels2_maingraph = ak.array([10, 12, 10, 11, 11, 12, 11, 13, 10, 13, 10])
labels3_maingraph = ak.array([False, True, True, True, True, True, False, False, True, False, False])
labels4_maingraph = ak.Categorical(labels1_maingraph)

rels1_maingraph = ak.array([10, 5, 10, 10, 10, 10, 10, 10, 10, 5, 5, 10, 10, 5, 5, 10])

edge_df = ak.DataFrame({"src":src_maingraph, "dst":dst_maingraph,
                        "value":rels1_maingraph 
})
node_df = ak.DataFrame({"nodes":ak.array(maingraph_nodes),
                        "color":labels1_maingraph,
                        "value":labels2_maingraph,
                        "True?":labels3_maingraph,
                        "cats":labels4_maingraph
})

In [4]:
pg = ar.PropGraph()
pg.load_edge_attributes(edge_df, source_column="src", destination_column="dst")
pg.load_node_attributes(node_df, node_column="nodes")

In [5]:
src_list = [0, 1, 2, 1]
dst_list = [1, 2, 0, 3]
src_subgraph = ak.array(src_list)
dst_subgraph = ak.array(dst_list)

subgraph_nodes = list(set(src_list) | set(dst_list))
subgraph_nodes.sort()

labels1_subgraph = ak.array(["blue", "blue", "red", "yellow"])
labels2_subgraph = ak.array([10, 11, 12, 13])
labels3_subgraph = ak.array([True, True, True, False])
labels4_subgraph = ak.Categorical(labels1_subgraph)

rels1_subgraph = ak.array([10, 10, 10, 5])

In [None]:
"""TEST COMBINED PICKER ON SIMPLE GRAPH"""
edge_df = ak.DataFrame({"src":src_subgraph, "dst":dst_subgraph,
                        "value":rels1_subgraph 
})
node_df = ak.DataFrame({"nodes":ak.array(subgraph_nodes),
                        "color":labels1_subgraph,
                        "value":labels2_subgraph,
                        "True?":labels3_subgraph,
                        "cats":labels4_subgraph
})

sg = ar.PropGraph()
sg.load_edge_attributes(edge_df, source_column="src", destination_column="dst", convert_string_relationships_to_categoricals=False)
sg.load_node_attributes(node_df, node_column="nodes", convert_string_labels_to_categoricals=False)

isos_as_vertices = ar.subgraph_isomorphism(pg, sg, 
                                           semantic_check = "or", algorithm_type = "si", 
                                           reorder = True, return_isos_as = "vertices")
print(f"We found {len(isos_as_vertices[0])/len(sg)} monos inside of the graph with reordering")

In [None]:
"""TEST COMBINED PICKER ON SIMPLE GRAPH"""
edge_df = ak.DataFrame({"src":src_subgraph, "dst":dst_subgraph,
                        "value":rels1_subgraph 
})
node_df = ak.DataFrame({"nodes":ak.array(subgraph_nodes),
                        "color":labels1_subgraph,
                        "value":labels2_subgraph,
                        "True?":labels3_subgraph,
                        "cats":labels4_subgraph
})

sg = ar.PropGraph()
sg.load_edge_attributes(edge_df, source_column="src", destination_column="dst", convert_string_relationships_to_categoricals=False)
sg.load_node_attributes(node_df, node_column="nodes", convert_string_labels_to_categoricals=False)

isos_as_vertices = ar.subgraph_isomorphism(pg, sg, 
                                           semantic_check = "or", algorithm_type = "si", 
                                           reorder = False, return_isos_as = "vertices")
print(f"We found {len(isos_as_vertices[0])/len(sg)} monos inside of the graph without reordering")

In [None]:
"""TEST VF2-PS ON SIMPLE GRAPH"""
edge_df = ak.DataFrame({"src":src_subgraph, "dst":dst_subgraph,
                        "value":rels1_subgraph 
})
node_df = ak.DataFrame({"nodes":ak.array(subgraph_nodes),
                        "color":labels1_subgraph,
                        "value":labels2_subgraph,
                        "True?":labels3_subgraph,
                        "cats":labels4_subgraph
})

sg = ar.PropGraph()
sg.load_edge_attributes(edge_df, source_column="src", destination_column="dst", convert_string_relationships_to_categoricals=False)
sg.load_node_attributes(node_df, node_column="nodes", convert_string_labels_to_categoricals=False)

isos_as_vertices = ar.subgraph_isomorphism(pg, sg, 
                                           semantic_check = "or", algorithm_type = "ps", 
                                           reorder = False, return_isos_as = "vertices")
print(f"We found {len(isos_as_vertices[0])/len(sg)} monos inside of the graph with VF2-PS")

### Experiments from Mohammad

In [48]:
import arkouda as ak
import arachne as ar
import scipy as sp
import networkx as nx
import matplotlib.pyplot as plt
import os
import time

ak.connect("n114", 5555)
n = 100
m = 1000
s = 2
x = 2
y = 2
p = 0.0005
num_nodes = 100_000

# ### Build graph from randomly generated source and destination arrays.
# # 1. Use Arkouda's randint to generate the random edge arrays.

# def create_random_directed_graph(num_nodes, p):
#     """
#     Generates a random directed graph (Erdős-Rényi model) and returns the src and dst arrays.
    
#     Parameters:
#     num_nodes (int): Number of nodes in the graph.
#     p (float): Probability of creating an edge between two nodes.
    
#     Returns:
#     tuple: A tuple containing two lists (src, dst) representing the source and destination of each edge.
#     """
#     # Create a random graph using Erdős–Rényi model
#     random_graph = nx.gnp_random_graph(num_nodes, p, seed=42, directed=True)

#     # Extract src and dst arrays
#     src = [edge[0] for edge in random_graph.edges()]
#     dst = [edge[1] for edge in random_graph.edges()]

#     return src, dst


# p = 0.0005  # Probability of edge creation
# num_nodes = 2000

# print("Begining of Random Directed graph with P= ",p)
# src_rand, dst_rand = create_random_directed_graph(num_nodes, p)
# print("Random Directed graph created")


# src = ak.array(src_rand)
# dst = ak.array(dst_rand)


# # 2. Build temporary property graph to get sorted edges and nodes lists.
# temp_prop_graph = ar.PropGraph()
# start = time.time()
# temp_prop_graph.add_edges_from(src, dst)
# end = time.time()
# build_time = end - start

# print(f"Building property graph with {len(temp_prop_graph)} vertices and "
#         f"{temp_prop_graph.size()} "
#         f"edges took {round(build_time,2)} seconds.")

start = time.time()
temp_prop_graph = ar.gnp(num_nodes, p, create_using=ar.PropGraph)
end = time.time()
build_time = end - start

print(f"Building property graph with {len(temp_prop_graph)} vertices and "
        f"{temp_prop_graph.size()} "
        f"edges took {round(build_time,2)} seconds.")

### Generate node labels and edge relationships for the graph.
# 1. Extract node and edge information.
num_edges = temp_prop_graph.size()
num_nodes = len(temp_prop_graph)
edges = temp_prop_graph.edges()
nodes = temp_prop_graph.nodes()

##############################################
# NEW SECTION: Generate random node and edge attributes
##############################################

# For nodes:
# 2.1. Categorical from ["blue", "red", "yellow"]
colors = ak.array(["blue", "red"])
node_colors = colors[ak.randint(0, len(colors), num_nodes)]

# 2.2. Integer from 10 to 12 (inclusive)
node_ints = ak.randint(10, 12, num_nodes)

# 2.3. Boolean from [False, True]
bool_choices = ak.array([False, True])
node_bools = bool_choices[ak.randint(0, 2, num_nodes)]

# For edges:
# 2.4. Randomly choose from [5, 10, 15]
edge_choices = ak.array([5, 10])
edge_vals = edge_choices[ak.randint(0, 2, num_edges)]

##############################################
# Create dataframes with the new attributes
##############################################

edge_df = ak.DataFrame({
    "src": edges[0],
    "dst": edges[1],
    "edge_attr": edge_vals
})

node_df = ak.DataFrame({
    "nodes": nodes,
    "color": node_colors,
    "int_attr": node_ints,
    "bool_attr": node_bools
})

##############################################
# Create the new property graph with these attributes
##############################################

prop_graph = ar.PropGraph()
prop_graph.load_edge_attributes(
    edge_df, source_column="src", destination_column="dst", 
    relationship_columns=["edge_attr"]
)

prop_graph.load_node_attributes(
    node_df, node_column="nodes", 
    label_columns=["color", "int_attr", "bool_attr"]
)

print("Property graph created with random node and edge attributes.")



### Create the subgraph we are searching for.
# 1. Create labels and relationships to search for.
src_list = [0, 1, 2, 1]
dst_list = [1, 2, 0, 3]

src_subgraph = ak.array(src_list)
dst_subgraph = ak.array(dst_list)

subgraph_nodes = list(set(src_list) | set(dst_list))
subgraph_nodes.sort()

labels1_subgraph = ak.array(["blue", "blue", "red", "red"])
labels2_subgraph = ak.array([10, 11, 11, 10])
labels3_subgraph = ak.array([True, True, True, False])

rels1_subgraph = ak.array([10, 10, 10, 5])

subgraph = ar.PropGraph()

# Include rels1_subgraph as an attribute for edges
edge_df_h = ak.DataFrame({
    "src": src_subgraph,
    "dst": dst_subgraph,
    # "rels1": rels1_subgraph
})

# Include the third label/attribute as well for nodes
node_df_h = ak.DataFrame({
    "nodes": ak.array(subgraph_nodes),
    "lbls1": labels1_subgraph,
    "lbls2": labels2_subgraph,
    "lbls3": labels3_subgraph
})

# Now load the attributes into the subgraph
# Treat 'rels1' as an edge relationship attribute
subgraph.load_edge_attributes(edge_df_h, 
                              source_column="src", 
                              destination_column="dst",
                              # relationship_columns=["rels1"]
                            )

# Treat lbls1, lbls2, and lbls3 as node label attributes
# subgraph.load_node_attributes(node_df_h, 
#                               node_column="nodes",
#                               label_columns=["lbls1","lbls2","lbls3"])

print("Subgraph created with node and edge attributes successfully.")

connected to arkouda server tcp://*:5555


Building property graph with 100000 vertices and 2498794 edges took 2.06 seconds.
Property graph created with random node and edge attributes.
Subgraph created with node and edge attributes successfully.


In [49]:
isos_as_vertices = ar.subgraph_isomorphism(prop_graph, subgraph, 
                                           semantic_check = "and", algorithm_type = "si", 
                                           reorder = True, return_isos_as = "vertices")

print(f"We found {len(isos_as_vertices[0])/len(subgraph)} monos inside of the graph")

We found 0.0 monos inside of the graph


In [50]:
isos_as_vertices = ar.subgraph_isomorphism(prop_graph, subgraph,
                                           semantic_check = None, algorithm_type = "si",
                                           reorder = False, return_isos_as = "vertices")

print(f"We found {len(isos_as_vertices[0])/len(subgraph)} monos inside of the graph")

We found 0.0 monos inside of the graph


In [None]:
isos_as_vertices = ar.subgraph_isomorphism(prop_graph, subgraph, 
                                           semantic_check = "and", algorithm_type = "ps", 
                                           reorder = True, return_isos_as = "vertices")

print(f"We found {len(isos_as_vertices[0])/len(subgraph)} monos inside of the graph")

In [51]:
isos_as_vertices = ar.subgraph_isomorphism(prop_graph, subgraph, 
                                           semantic_check = None, algorithm_type = "ps", 
                                           reorder = False, return_isos_as = "vertices")

print(f"We found {len(isos_as_vertices[0])/len(subgraph)} monos inside of the graph")

We found 0.0 monos inside of the graph


### Erdos-Renyi Experiments Below

In [35]:
n = 100
m = 10_000
s = 2
x = 2
y = 2

In [None]:
import time

### Build graph from randomly generated source and destination arrays.
# 1. Use Arkouda's randint to generate the random edge arrays.
src = ak.randint(0, n, m, seed=s*2)
dst = ak.randint(0, n, m, seed=s*4)

# 2. Build temporary property graph to get sorted edges and nodes lists.
temp_prop_graph = ar.PropGraph()
start = time.time()
temp_prop_graph.add_edges_from(src, dst)
end = time.time()
build_time = end - start
print(f"Building property graph with {len(temp_prop_graph)} vertices and "
        f"{temp_prop_graph.size()} "
        f"edges took {round(build_time,2)} seconds.")

### Generate node labels and edge relationships for the graph.
# 1. Extract node and edge information.
num_edges = temp_prop_graph.size()
num_nodes = len(temp_prop_graph)
edges = temp_prop_graph.edges()
nodes = temp_prop_graph.nodes()

# 2. Generate random integer attributes for both vertices and edges.
int_array_nodes = ak.randint(0, x, num_nodes, dtype=ak.dtype('int64'))
uint_array_nodes = ak.randint(0, x, num_nodes, dtype=ak.dtype('uint64'))
real_array_nodes = ak.randint(0, x, num_nodes, dtype=ak.dtype('float64'))
bool_array_nodes = ak.randint(0, x, num_nodes, dtype=ak.dtype('bool'))
strings_array_nodes = ak.random_strings_uniform(minlen=1, maxlen=2, size=num_nodes, characters="uppercase")
categorical_array_nodes = ak.Categorical(strings_array_nodes)

int_array_edges = ak.randint(0, x, num_edges, dtype=ak.dtype('int64'))
uint_array_edges = ak.randint(0, x, num_edges, dtype=ak.dtype('uint64'))
real_array_edges = ak.randint(0, x, num_edges, dtype=ak.dtype('float64'))
bool_array_edges = ak.randint(0, x, num_edges, dtype=ak.dtype('bool'))
strings_array_edges = ak.random_strings_uniform(minlen=1, maxlen=2, size=num_edges, characters="uppercase")
categorical_array_edges = ak.Categorical(strings_array_edges)

# 3. Create dataframe to load into a new property graph.
edge_df = ak.DataFrame({"src":edges[0], "dst":edges[1], 
                        "ints":int_array_edges, 
                        "uints":uint_array_edges,
                        #"reals":real_array_edges,
                        "bools":bool_array_edges,
                        "strings":strings_array_edges,
                        "categoricals":categorical_array_edges})
node_df = ak.DataFrame({"nodes":nodes, 
                        "ints":int_array_nodes, 
                        "uints":uint_array_nodes,
                        #"reals":real_array_nodes,
                        "bools":bool_array_nodes,
                        "strings":strings_array_nodes,
                        "categoricals":categorical_array_nodes})

# 4. Create new property graph with node labels and edge relationships.
prop_graph = ar.PropGraph()
prop_graph.load_edge_attributes(edge_df, source_column="src", destination_column="dst")
prop_graph.load_node_attributes(node_df, node_column="nodes")

### Create the subgraph we are searching for.
# 1. Create labels and relationships to search for.
src_list = [0, 1, 2, 2, 3]
dst_list = [1, 2, 0, 3, 0]
src_subgraph = ak.array(src_list)
dst_subgraph = ak.array(dst_list)

subgraph_nodes = list(set(src_list) | set(dst_list))
subgraph_nodes.sort()

subgraph_int_array_nodes = ak.array([1] * len(subgraph_nodes))
subgraph_uint_array_nodes = ak.array([0] * len(subgraph_nodes), dtype=ak.dtype('uint64'))
subgraph_real_array_nodes = ak.array([float(real_array_nodes[0])] * len(subgraph_nodes))
subgraph_bool_array_nodes = ak.array([False] * len(subgraph_nodes))
subgraph_strings_array_nodes = ak.array([str(prop_graph.node_attributes["strings"][0])] * len(subgraph_nodes))
subgraph_categorical_array_nodes = ak.Categorical(subgraph_strings_array_nodes)

subgraph_int_array_edges = ak.array([1] * len(src_list))
subgraph_uint_array_edges = ak.array([0] * len(src_list), dtype=ak.dtype('uint64'))
subgraph_real_array_edges = ak.array([float(real_array_edges[0])] * len(src_list))
subgraph_bool_array_edges = ak.array([True] * len(src_list))
subgraph_strings_array_edges = ak.array([str(prop_graph.edge_attributes["strings"][0])] * len(src_list))
subgraph_categorical_array_edges = ak.Categorical(subgraph_strings_array_edges)

In [None]:
"""TEST COMBINED PICKER"""
subgraph = ar.PropGraph()
edge_df_h = ak.DataFrame({"src":src_subgraph, "dst":dst_subgraph,
                          "ints":subgraph_int_array_edges, 
                          "uints":subgraph_uint_array_edges,
                          #"reals":subgraph_real_array_edges,
                          "bools":subgraph_bool_array_edges,
                          "strings":subgraph_strings_array_edges,
                          "categoricals":subgraph_categorical_array_edges})
node_df_h = ak.DataFrame({"nodes": ak.array(subgraph_nodes), 
                          "ints":subgraph_int_array_nodes, 
                          "uints":subgraph_uint_array_nodes,
                          #"reals":subgraph_real_array_nodes,
                          "bools":subgraph_bool_array_nodes,
                          "strings":subgraph_strings_array_nodes,
                          "categoricals":subgraph_categorical_array_nodes})
subgraph.load_edge_attributes(edge_df_h, source_column="src", destination_column="dst")
subgraph.load_node_attributes(node_df_h, node_column = "nodes")

isos_as_vertices = ar.subgraph_isomorphism(prop_graph, subgraph, 
                                           semantic_check="or", algorithm_type="si", return_isos_as="vertices", reorder=True)
print(f"We found {len(isos_as_vertices[0])/len(subgraph)} isos inside of the graph")

In [None]:
"""TEST VERTEX PICKER"""
subgraph = ar.PropGraph()
edge_df_h = ak.DataFrame({"src":src_subgraph, "dst":dst_subgraph})
node_df_h = ak.DataFrame({"nodes": ak.array(subgraph_nodes), 
                          "ints":subgraph_int_array_nodes, 
                          "uints":subgraph_uint_array_nodes,
                          # "reals":subgraph_real_array_nodes,
                          "bools":subgraph_bool_array_nodes,
                          "strings":subgraph_strings_array_nodes,
                          "categoricals":subgraph_categorical_array_nodes})
subgraph.load_edge_attributes(edge_df_h, source_column="src", destination_column="dst")
subgraph.load_node_attributes(node_df_h, node_column = "nodes")

isos_as_vertices = ar.subgraph_isomorphism(prop_graph, subgraph, 
                                           semantic_check="or", algorithm_type="si", return_isos_as="vertices")
print(f"We found {len(isos_as_vertices[0])/len(subgraph)} isos inside of the graph")

In [None]:
"""TEST EDGE PICKER"""
subgraph = ar.PropGraph()
edge_df_h = ak.DataFrame({"src":src_subgraph, "dst":dst_subgraph,
                          "ints":subgraph_int_array_edges, 
                          "uints":subgraph_uint_array_edges,
                          # "reals":subgraph_real_array_edges,
                          "bools":subgraph_bool_array_edges,
                          "strings":subgraph_strings_array_edges,
                          "categoricals":subgraph_categorical_array_edges})
subgraph.load_edge_attributes(edge_df_h, source_column="src", destination_column="dst")

isos_as_vertices = ar.subgraph_isomorphism(prop_graph, subgraph, 
                                           semantic_check="or", algorithm_type="si", return_isos_as="vertices")
print(f"We found {len(isos_as_vertices[0])/len(subgraph)} isos inside of the graph")

In [None]:
"""TEST NO PICKER"""
subgraph = ar.PropGraph()
edge_df_h = ak.DataFrame({"src":src_subgraph, "dst":dst_subgraph})
subgraph.load_edge_attributes(edge_df_h, source_column="src", destination_column="dst")

isos_as_vertices = ar.subgraph_isomorphism(prop_graph, subgraph, 
                                           semantic_check="or", algorithm_type="si", return_isos_as="vertices")
print(f"We found {len(isos_as_vertices[0])/len(subgraph)} isos inside of the graph")

In [None]:
"""VF2-PS COMBINED GROUND TRUTH"""
subgraph = ar.PropGraph()
edge_df_h = ak.DataFrame({"src":src_subgraph, "dst":dst_subgraph,
                          "ints":subgraph_int_array_edges, 
                          "uints":subgraph_uint_array_edges,
                          # "reals":subgraph_real_array_edges,
                          "bools":subgraph_bool_array_edges,
                          "strings":subgraph_strings_array_edges,
                          "categoricals":subgraph_categorical_array_edges})
node_df_h = ak.DataFrame({"nodes": ak.array(subgraph_nodes), 
                          "ints":subgraph_int_array_nodes, 
                          "uints":subgraph_uint_array_nodes,
                          # "reals":subgraph_real_array_nodes,
                          "bools":subgraph_bool_array_nodes,
                          "strings":subgraph_strings_array_nodes,
                          "categoricals":subgraph_categorical_array_nodes})
subgraph.load_edge_attributes(edge_df_h, source_column="src", destination_column="dst")
subgraph.load_node_attributes(node_df_h, node_column = "nodes")

isos_as_vertices = ar.subgraph_isomorphism(prop_graph, subgraph, 
                                           semantic_check="or", algorithm_type="ps", return_isos_as="vertices")
print(f"We found {len(isos_as_vertices[0])/len(subgraph)} isos inside of the graph")

In [None]:
"""VF2-PS VERTEX PICKER GROUND TRUTH"""
subgraph = ar.PropGraph()
edge_df_h = ak.DataFrame({"src":src_subgraph, "dst":dst_subgraph})
node_df_h = ak.DataFrame({"nodes": ak.array(subgraph_nodes), 
                          "ints":subgraph_int_array_nodes, 
                          "uints":subgraph_uint_array_nodes,
                          # "reals":subgraph_real_array_nodes,
                          "bools":subgraph_bool_array_nodes,
                          "strings":subgraph_strings_array_nodes,
                          "categoricals":subgraph_categorical_array_nodes})
subgraph.load_edge_attributes(edge_df_h, source_column="src", destination_column="dst")
subgraph.load_node_attributes(node_df_h, node_column = "nodes")

isos_as_vertices = ar.subgraph_isomorphism(prop_graph, subgraph, 
                                           semantic_check="or", algorithm_type="ps", return_isos_as="vertices")
print(f"We found {len(isos_as_vertices[0])/len(subgraph)} isos inside of the graph")

In [None]:
"""VF2-PS EDGE PICKER GROUND TRUTH"""
subgraph = ar.PropGraph()
edge_df_h = ak.DataFrame({"src":src_subgraph, "dst":dst_subgraph,
                          "ints":subgraph_int_array_edges, 
                          "uints":subgraph_uint_array_edges,
                          # "reals":subgraph_real_array_edges,
                          "bools":subgraph_bool_array_edges,
                          "strings":subgraph_strings_array_edges,
                          "categoricals":subgraph_categorical_array_edges})
subgraph.load_edge_attributes(edge_df_h, source_column="src", destination_column="dst")

isos_as_vertices = ar.subgraph_isomorphism(prop_graph, subgraph, 
                                           semantic_check="or", algorithm_type="ps", return_isos_as="vertices")
print(f"We found {len(isos_as_vertices[0])/len(subgraph)} isos inside of the graph")

In [None]:
"""VF2-PS NO ATTRIBUTES GROUND TRUTH"""
subgraph = ar.PropGraph()
edge_df_h = ak.DataFrame({"src":src_subgraph, "dst":dst_subgraph})
subgraph.load_edge_attributes(edge_df_h, source_column="src", destination_column="dst")

isos_as_vertices = ar.subgraph_isomorphism(prop_graph, subgraph, 
                                           semantic_check="or", algorithm_type="ps", return_isos_as="vertices")
print(f"We found {len(isos_as_vertices[0])/len(subgraph)} isos inside of the graph")