In [1]:
import arkouda as ak
import arachne as ar
import scipy as sp
import networkx as nx
import matplotlib.pyplot as plt
import os
import time

ak.connect("n28", 5555)
p = 0.0005
num_nodes = 50_000
seed = 42

# Define probabilities for labels and relationships
node_lbl_probs = {"lbls2": [0.6, 0.4],  # Probabilities for integers 10 and 11
                  "lbls3": [0.5, 0.5]}  # Probabilities for True and False
edge_rel_probs = {"rels1": [0.7, 0.3]}  # Probabilities for integers 10 and 11


start = time.time()
temp_prop_graph = ar.gnp(num_nodes, p, create_using=ar.PropGraph, seed=seed)
end = time.time()
build_time = end - start

print(f"Building property graph with {len(temp_prop_graph)} vertices and "
      f"{temp_prop_graph.size()} "
      f"edges took {round(build_time,2)} seconds.")

### Generate node labels and edge relationships for the graph.
# 1. Extract node and edge information.
num_edges = temp_prop_graph.size()
num_nodes = len(temp_prop_graph)
edges = temp_prop_graph.edges()
nodes = temp_prop_graph.nodes()

##############################################
# Generate random node and edge attributes for the main graph
##############################################

# # For nodes:
# node_ints = ak.randint(10, 12, num_nodes, seed=seed)
# node_bools = ak.randint(0, 2, num_nodes, dtype=ak.bool, seed=seed)

# # For edges:
# edge_ints = ak.randint(10, 12, num_edges, seed=seed)

node_ints = ak.randint(10, 12, num_nodes, seed=seed)
node_ints = ak.where(ak.randint(0, 100, num_nodes, seed=seed) < node_lbl_probs["lbls2"][0] * 100, 10, 11)

node_bools = ak.randint(0, 100, num_nodes, seed=seed) < node_lbl_probs["lbls3"][0] * 100

# For edges
edge_ints = ak.where(ak.randint(0, 100, num_edges, seed=seed) < edge_rel_probs["rels1"][0] * 100, 10, 11)

# Create dataframes with the new attributes
edge_df = ak.DataFrame({
    "src": edges[0],
    "dst": edges[1],
    "rels1": edge_ints
})

node_df = ak.DataFrame({
    "nodes": nodes,
    "lbls2": node_ints,
    "lbls3": node_bools
})

# Create the new property graph with these attributes
prop_graph = ar.PropGraph()
prop_graph.load_edge_attributes(edge_df, source_column="src", destination_column="dst")
prop_graph.load_node_attributes(node_df, node_column="nodes")
print("Property graph created with random node and edge attributes.")

##############################################
# Create the subgraph and assign random attributes
##############################################

# Subgraph structure
src_list = [2, 3, 1, 3]
dst_list = [3, 1, 2, 0]
src_subgraph = ak.array(src_list)
dst_subgraph = ak.array(dst_list)

subgraph_nodes = list(set(src_list) | set(dst_list))
subgraph_nodes.sort()

# Generate random node and edge attributes for the subgraph
num_subgraph_nodes = len(subgraph_nodes)
num_subgraph_edges = len(src_list)

# subgraph_node_ints = ak.randint(10, 12, num_subgraph_nodes, seed=seed)
# subgraph_node_bools = ak.randint(0, 2, num_subgraph_nodes, dtype=ak.bool, seed=seed)
# subgraph_edge_ints = ak.randint(10, 12, num_subgraph_edges, seed=seed)

# Subgraph attributes
subgraph_node_ints = ak.where(ak.randint(0, 100, num_subgraph_nodes, seed=seed) < node_lbl_probs["lbls2"][0] * 100, 10, 11)
subgraph_node_bools = ak.randint(0, 100, num_subgraph_nodes, seed=seed) < node_lbl_probs["lbls3"][0] * 100
subgraph_edge_ints = ak.where(ak.randint(0, 100, num_subgraph_edges, seed=seed) < edge_rel_probs["rels1"][0] * 100, 10, 11)

# Create dataframes for subgraph attributes
edge_df_h = ak.DataFrame({
    "src": src_subgraph,
    "dst": dst_subgraph,
    "rels1": subgraph_edge_ints
})

node_df_h = ak.DataFrame({
    "nodes": ak.array(subgraph_nodes),
    "lbls2": subgraph_node_ints,
    "lbls3": subgraph_node_bools
})

# Create the subgraph with these attributes
sg = ar.PropGraph()
sg.load_edge_attributes(edge_df_h, source_column="src", destination_column="dst")
sg.load_node_attributes(node_df_h, node_column="nodes")

print("Subgraph created with random node and edge attributes.")


    _         _                   _       
   / \   _ __| | _____  _   _  __| | __ _ 
  / _ \ | '__| |/ / _ \| | | |/ _` |/ _` |
 / ___ \| |  |   < (_) | |_| | (_| | (_| |
/_/   \_\_|  |_|\_\___/ \__,_|\__,_|\__,_|
                                          

Client Version: v2024.06.21


connected to arkouda server tcp://*:5555


Building property graph with 50000 vertices and 1250667 edges took 18.73 seconds.
Property graph created with random node and edge attributes.
Subgraph created with random node and edge attributes.


In [9]:
"""Correctness check with NetworkX """
# Get node and edge attributes from Arachne property graphs.
subgraph_node_attributes = sg.get_node_attributes()
subgraph_edge_attributes = sg.get_edge_attributes()
graph_node_attributes = prop_graph.get_node_attributes()
graph_edge_attributes = prop_graph.get_edge_attributes()

# Create NetworkX subgraph.
subgraph_networkx = nx.from_pandas_edgelist(subgraph_edge_attributes.to_pandas(), source="src", target="dst", edge_attr=True, create_using=nx.DiGraph)
subgraph_node_attribute_dict = subgraph_node_attributes.to_pandas().set_index('nodes').to_dict('index')
print(subgraph_node_attribute_dict)
nx.set_node_attributes(subgraph_networkx, subgraph_node_attribute_dict)

# Create NetworkX main graph.
graph_networkx = nx.from_pandas_edgelist(graph_edge_attributes.to_pandas(), source="src", target="dst", edge_attr=True, create_using=nx.DiGraph)
graph_node_attribute_dict = graph_node_attributes.to_pandas().set_index('nodes').to_dict('index')
print(graph_node_attribute_dict)
nx.set_node_attributes(graph_networkx, graph_node_attribute_dict)

# Attribute matching functions that need to be used by the NetworkX DiGraphMatcher.
def node_matcher(u, v):
    return u == v

def edge_matcher(e1, e2):
    return e1 == e2

# Find subgraph isomorphisms of H in G.
structural_matcher = nx.algorithms.isomorphism.DiGraphMatcher(graph_networkx, subgraph_networkx)
attribute_matcher = nx.algorithms.isomorphism.DiGraphMatcher(graph_networkx, subgraph_networkx, node_match=node_matcher, edge_match=edge_matcher)

# List of dicts. For each dict, keys is original graph vertex, values are subgraph vertices.
subgraph_isomorphisms_structural = list(structural_matcher.subgraph_monomorphisms_iter())
print("Structural monomorphisms found =", len(subgraph_isomorphisms_structural))

start_time = time.time()

subgraph_isomorphisms_attributed = list(attribute_matcher.subgraph_monomorphisms_iter())
end_time = time.time()

elapsed_time = end_time - start_time
print("Attributed monomorphisms found =", len(subgraph_isomorphisms_attributed))
print(f"Time taken to find attributed monomorphisms: {elapsed_time:.2f} seconds")


{0: {'lbls2': 11, 'lbls3': False}, 1: {'lbls2': 10, 'lbls3': True}, 2: {'lbls2': 11, 'lbls3': False}, 3: {'lbls2': 11, 'lbls3': False}}
This transfer will use 28 MB .
{0: {'lbls2': 11, 'lbls3': False}, 1: {'lbls2': 10, 'lbls3': True}, 2: {'lbls2': 11, 'lbls3': False}, 3: {'lbls2': 11, 'lbls3': False}, 4: {'lbls2': 11, 'lbls3': False}, 5: {'lbls2': 10, 'lbls3': True}, 6: {'lbls2': 10, 'lbls3': False}, 7: {'lbls2': 10, 'lbls3': True}, 8: {'lbls2': 10, 'lbls3': True}, 9: {'lbls2': 10, 'lbls3': True}, 10: {'lbls2': 10, 'lbls3': True}, 11: {'lbls2': 10, 'lbls3': True}, 12: {'lbls2': 11, 'lbls3': False}, 13: {'lbls2': 11, 'lbls3': False}, 14: {'lbls2': 10, 'lbls3': True}, 15: {'lbls2': 11, 'lbls3': False}, 16: {'lbls2': 10, 'lbls3': True}, 17: {'lbls2': 11, 'lbls3': False}, 18: {'lbls2': 10, 'lbls3': True}, 19: {'lbls2': 10, 'lbls3': True}, 20: {'lbls2': 10, 'lbls3': False}, 21: {'lbls2': 10, 'lbls3': True}, 22: {'lbls2': 11, 'lbls3': False}, 23: {'lbls2': 10, 'lbls3': True}, 24: {'lbls2': 1

In [3]:
"""VF2-SI """
isos_as_vertices = ar.subgraph_isomorphism(prop_graph, sg, 
                                           semantic_check = "and", algorithm_type = "si",
                                           reorder_type = "structural", return_isos_as = "vertices")

print(f"We found {len(isos_as_vertices[0])/len(sg)} monos inside of the graph")

We found 232.0 monos inside of the graph


In [4]:
"""VF2-PS MVE-REORDERING"""
isos_as_vertices = ar.subgraph_isomorphism(prop_graph, sg, 
                                           semantic_check = "and", algorithm_type = "ps", 
                                           reorder_type = "structural", return_isos_as = "vertices")

print(f"We found {len(isos_as_vertices[0])/len(sg)} monos inside of the graph")

We found 232.0 monos inside of the graph


In [5]:
"""VF2-PS DEFAULT"""
isos_as_vertices = ar.subgraph_isomorphism(prop_graph, sg, 
                                           semantic_check = "and", algorithm_type = "ps", 
                                           reorder_type = None, return_isos_as = "vertices")

print(f"We found {len(isos_as_vertices[0])/len(sg)} monos inside of the graph")

We found 232.0 monos inside of the graph


In [6]:
"""VF2-SI PROBABILITY-MVE"""
isos_as_vertices = ar.subgraph_isomorphism(prop_graph, sg, 
                                           semantic_check = "and", algorithm_type = "si",
                                           reorder_type = "probability", return_isos_as = "vertices")

print(f"We found {len(isos_as_vertices[0])/len(sg)} monos inside of the graph")

We found 232.0 monos inside of the graph


In [7]:
"""VF2-PS PROBABILITY-MVE"""
isos_as_vertices = ar.subgraph_isomorphism(prop_graph, sg, 
                                           semantic_check = "and", algorithm_type = "ps", 
                                           reorder_type = "probability", return_isos_as = "vertices")

print(f"We found {len(isos_as_vertices[0])/len(sg)} monos inside of the graph")

We found 232.0 monos inside of the graph
