In [None]:
import arkouda as ak
import arachne as ar
import scipy as sp
import networkx as nx
import matplotlib.pyplot as plt
import os
import numpy as np
import time

ak.connect("n82", 5555)
p = 0.05
num_nodes = 500

seed = 42

# Define probabilities for labels and relationships
node_lbl_probs = {"lbls2": [1, 0.0],  # Probabilities for integers 10 and 11
                  "lbls3": [1, 0.0]}  # Probabilities for True and False
edge_rel_probs = {"rels1": [1, 0.0],  # Probabilities for integers 5 and 10
                  "rels2": [1, 0.0]}  # Probabilities for True and False


start = time.time()
temp_prop_graph = ar.gnp(num_nodes, p, create_using=ar.PropGraph, seed=seed)
end = time.time()
build_time = end - start

print(f"Building property graph with {len(temp_prop_graph)} vertices and "
      f"{temp_prop_graph.size()} "
      f"edges took {round(build_time,2)} seconds.")

### Generate node labels and edge relationships for the graph.
# 1. Extract node and edge information.
num_edges = temp_prop_graph.size()
num_nodes = len(temp_prop_graph)
edges = temp_prop_graph.edges()
nodes = temp_prop_graph.nodes()

##############################################
# Generate random node and edge attributes for the main graph
##############################################

# # For nodes:
# node_ints = ak.randint(10, 12, num_nodes, seed=seed)
# node_bools = ak.randint(0, 2, num_nodes, dtype=ak.bool, seed=seed)

# # For edges:
# edge_ints = ak.randint(10, 12, num_edges, seed=seed)

# For nodes
node_ints = ak.where(ak.randint(0, 100, num_nodes, seed=seed) < node_lbl_probs["lbls2"][0] * 100, 10, 11)
node_bools = ak.randint(0, 100, num_nodes, seed=seed) < node_lbl_probs["lbls3"][0] * 100

# For edges
edge_ints = ak.where(ak.randint(0, 100, num_edges, seed=seed) < edge_rel_probs["rels1"][0] * 100, 5, 10)
edge_bools = ak.randint(0, 100, num_edges, seed=seed) < edge_rel_probs["rels2"][0] * 100


# Create dataframes with the new attributes
edge_df = ak.DataFrame({
    "src": edges[0],
    "dst": edges[1],
    "rels1": edge_ints,
    "rels2": edge_bools
})

node_df = ak.DataFrame({
    "nodes": nodes,
    "lbls2": node_ints,
    "lbls3": node_bools
})

# Create the new property graph with these attributes
prop_graph = ar.PropGraph()
prop_graph.load_edge_attributes(edge_df, source_column="src", destination_column="dst")
prop_graph.load_node_attributes(node_df, node_column="nodes")
print("Property graph created with random node and edge attributes.")

##############################################
# Create the subgraph and assign random attributes
##############################################

# Subgraph structure
src_list = [2, 3, 1, 3]
dst_list = [3, 1, 2, 0]
src_subgraph = ak.array(src_list)
dst_subgraph = ak.array(dst_list)

subgraph_nodes = list(set(src_list) | set(dst_list))
subgraph_nodes.sort()

# Generate random node and edge attributes for the subgraph
num_subgraph_nodes = len(subgraph_nodes)
num_subgraph_edges = len(src_list)

# subgraph_node_ints = ak.randint(10, 12, num_subgraph_nodes, seed=seed)
# subgraph_node_bools = ak.randint(0, 2, num_subgraph_nodes, dtype=ak.bool, seed=seed)
# subgraph_edge_ints = ak.randint(10, 12, num_subgraph_edges, seed=seed)

# # Subgraph attributes
# subgraph_node_ints = ak.where(ak.randint(0, 100, num_subgraph_nodes, seed=seed) < node_lbl_probs["lbls2"][0] * 100, 10, 11)
# subgraph_node_bools = ak.randint(0, 100, num_subgraph_nodes, seed=seed) < node_lbl_probs["lbls3"][0] * 100

# subgraph_edge_ints = ak.where(ak.randint(0, 100, num_subgraph_edges, seed=seed) < edge_rel_probs["rels1"][0] * 100, 5, 10)
# subgraph_edge_bools = ak.randint(0, 100, num_subgraph_edges, seed=seed) < edge_rel_probs["rels2"][0] * 100

# Fixed attributes
subgraph_node_ints = ak.array([10, 10, 10, 10])
subgraph_node_bools = ak.array([True, True, True, True])
subgraph_edge_ints = ak.array([5, 5, 5, 5])
subgraph_edge_bools = ak.array([True, True, True, True])

# Create dataframes for subgraph attributes
edge_df_h = ak.DataFrame({
    "src": src_subgraph,
    "dst": dst_subgraph,
    "rels1": subgraph_edge_ints,
    "rels2": subgraph_edge_bools
})

node_df_h = ak.DataFrame({
    "nodes": ak.array(subgraph_nodes),
    "lbls2": subgraph_node_ints,
    "lbls3": subgraph_node_bools
})

# Create the subgraph with these attributes
sg = ar.PropGraph()
sg.load_edge_attributes(edge_df_h, source_column="src", destination_column="dst")
sg.load_node_attributes(node_df_h, node_column="nodes")



print("Subgraph created with random node and edge attributes.")


In [None]:
# First, find structural motifs.
sg = ar.PropGraph()
edge_df_h = ak.DataFrame({"src": src_subgraph, "dst": dst_subgraph})
sg.load_edge_attributes(edge_df_h, source_column="src", destination_column="dst")

(isos_vertices,_,isos_src,isos_dst) = ar.subgraph_isomorphism(
    prop_graph, sg, algorithm_type="ps", reorder_type = None, return_isos_as="complete"
)

num_nodes = len(sg)
num_edges = sg.size()
num_isos = len(isos_vertices) // num_nodes
print("Number of monos found =", num_isos)

In [None]:
# Get the number of isos to modify.
# num_to_modify = max(1, int(0.001 * num_isos))
num_to_modify = 10

# Pick the indices for the isos to be data injected from isos as edges. This will also give us the vertices.
selected_edges = np.random.choice(range(num_isos), size=num_to_modify, replace=False)

print("num_to_modify =", num_to_modify)
# print("selected_edges =", selected_edges)

In [4]:
# Extract the edges.
edges = []
for idx in selected_edges:
    start = idx * num_edges
    end = start + num_edges + 1
    src = isos_src[start:end].to_list()
    dst = isos_dst[start:end].to_list()
    edges.extend(list(zip(src,dst)))
edge_set = set(edges)

src_to_inject = []
dst_to_inject = []
for e in edge_set:
    src_to_inject.append(e[0])
    dst_to_inject.append(e[1])

# print("src_to_inject =", src_to_inject)
# print("dst_to_inject =", dst_to_inject)

In [5]:
# Extract the vertices.
vertices = []
for e in edge_set:
    vertices.append(e[0])
    vertices.append(e[1])
vertices_set = ak.array(list(set(vertices)))
# print("vertices_set =", vertices_set)

In [None]:
(graph_src, graph_dst) = prop_graph.edges()
edge_idx = ak.find([ak.array(src_to_inject),ak.array(dst_to_inject)],[graph_src,graph_dst])
print("edge_idx =", edge_idx)

In [7]:
vertices_idx = ak.find(vertices_set, prop_graph.nodes())

In [None]:
# Inject the data at the right vertices and edges.
n = len(prop_graph)
m = prop_graph.size()

node_ints_new = ak.full(n, 1, dtype=ak.int64)
node_bools_new = ak.full(n, False, dtype=ak.bool)
edge_ints_new = ak.full(m, 1, dtype=ak.int64)

node_ints_new[vertices_idx] = 0
node_bools_new[vertices_idx] = True
edge_ints_new[edge_idx] = 0

# print(node_ints_new)
# print(node_bools_new)
# print(edge_ints_new)

In [9]:
edge_df_new = ak.DataFrame({
    "src": graph_src,
    "dst": graph_dst,
    "rels1": edge_ints_new
})

node_df_new = ak.DataFrame({
    "nodes": prop_graph.nodes(),
    "lbls2": node_ints_new,
    "lbls3": node_bools_new
})

# Create the new property graph with these attributes
prop_graph_new = ar.PropGraph()
prop_graph_new.load_edge_attributes(edge_df_new, source_column="src", destination_column="dst")
prop_graph_new.load_node_attributes(node_df_new, node_column="nodes")

In [None]:
subgraph_node_ints_new =ak.full(4, 0, dtype=ak.int64)
subgraph_node_bools_new = ak.full(4, True, dtype=ak.bool)
subgraph_edge_ints_new = ak.full(4, 0, dtype=ak.int64)

sg_new = ar.PropGraph()
edge_df_h_new = ak.DataFrame({"src": src_subgraph, "dst": dst_subgraph, "rels1": subgraph_edge_ints_new})
node_df_h_new = ak.DataFrame({"nodes": ak.array(subgraph_nodes), "lbls2": subgraph_node_ints_new, "lbls3": subgraph_node_bools_new})

sg_new.load_edge_attributes(edge_df_h_new, source_column = "src", destination_column = "dst")
sg_new.load_node_attributes(node_df_h_new, node_column = "nodes")

(isos_vertices,_,isos_src,isos_dst) = ar.subgraph_isomorphism(
    prop_graph_new, sg_new, algorithm_type = "si", reorder_type = "probability", return_isos_as="complete"
)

num_nodes = len(sg_new)
num_edges = sg_new.size()
num_isos = len(isos_vertices) // num_nodes
print("Number of monos found =", num_isos)
print("num_to_modify =", num_to_modify)


In [None]:
"""VF2-SI """
isos_as_vertices = ar.subgraph_isomorphism(prop_graph_new, sg_new, 
                                           semantic_check = "and", algorithm_type = "si",
                                           reorder_type = "structural", return_isos_as = "vertices")

print(f"We found {len(isos_as_vertices[0])/len(sg_new)} monos inside of the graph")

In [None]:
"""VF2-SI PROBABILITY-MVE"""
isos_as_vertices = ar.subgraph_isomorphism(prop_graph_new, sg_new, 
                                           semantic_check = "and", algorithm_type = "si",
                                           reorder_type = "probability", return_isos_as = "vertices")

print(f"We found {len(isos_as_vertices[0])/len(sg_new)} monos inside of the graph")

In [None]:
"""VF2-PS DEFAULT"""
isos_as_vertices = ar.subgraph_isomorphism(prop_graph_new, sg_new, 
                                           semantic_check = "and", algorithm_type = "ps", 
                                           reorder_type = None, return_isos_as = "vertices")

print(f"We found {len(isos_as_vertices[0])/len(sg_new)} monos inside of the graph")
#print(isos_as_vertices)

In [None]:
"""VF2-PS MVE-REORDERING"""
isos_as_vertices = ar.subgraph_isomorphism(prop_graph_new, sg_new, 
                                           semantic_check = "and", algorithm_type = "ps", 
                                           reorder_type = "structural", return_isos_as = "vertices")

print(f"We found {len(isos_as_vertices[0])/len(sg_new)} monos inside of the graph")

In [None]:
"""VF2-PS PROBABILITY-MVE"""
isos_as_vertices = ar.subgraph_isomorphism(prop_graph_new, sg_new, 
                                           semantic_check = "and", algorithm_type = "ps", 
                                           reorder_type = "probability", return_isos_as = "vertices")

print(f"We found {len(isos_as_vertices[0])/len(sg_new)} monos inside of the graph")

In [None]:
"""Correctness check with NetworkX """
# Get node and edge attributes from Arachne property graphs.
subgraph_node_attributes = sg.get_node_attributes()
subgraph_edge_attributes = sg.get_edge_attributes()
graph_node_attributes = prop_graph.get_node_attributes()
graph_edge_attributes = prop_graph.get_edge_attributes()

# Create NetworkX subgraph.
subgraph_networkx = nx.from_pandas_edgelist(subgraph_edge_attributes.to_pandas(), source="src", target="dst", edge_attr=True, create_using=nx.DiGraph)
subgraph_node_attribute_dict = subgraph_node_attributes.to_pandas().set_index('nodes').to_dict('index')
print(subgraph_node_attribute_dict)
nx.set_node_attributes(subgraph_networkx, subgraph_node_attribute_dict)

# Create NetworkX main graph.
graph_networkx = nx.from_pandas_edgelist(graph_edge_attributes.to_pandas(), source="src", target="dst", edge_attr=True, create_using=nx.DiGraph)
graph_node_attribute_dict = graph_node_attributes.to_pandas().set_index('nodes').to_dict('index')
print(graph_node_attribute_dict)
nx.set_node_attributes(graph_networkx, graph_node_attribute_dict)

# Attribute matching functions that need to be used by the NetworkX DiGraphMatcher.
def node_matcher(u, v):
    return u == v

def edge_matcher(e1, e2):
    return e1 == e2

# Find subgraph isomorphisms of H in G.
structural_matcher = nx.algorithms.isomorphism.DiGraphMatcher(graph_networkx, subgraph_networkx)
attribute_matcher = nx.algorithms.isomorphism.DiGraphMatcher(graph_networkx, subgraph_networkx, node_match=node_matcher, edge_match=edge_matcher)

# List of dicts. For each dict, keys is original graph vertex, values are subgraph vertices.
subgraph_isomorphisms_structural = list(structural_matcher.subgraph_monomorphisms_iter())
print("Structural monomorphisms found =", len(subgraph_isomorphisms_structural))

start_time = time.time()

subgraph_isomorphisms_attributed = list(attribute_matcher.subgraph_monomorphisms_iter())
end_time = time.time()

elapsed_time = end_time - start_time
print("Attributed monomorphisms found =", len(subgraph_isomorphisms_attributed))
print(f"Time taken to find attributed monomorphisms: {elapsed_time:.2f} seconds")


In [None]:
"""VF2-SI No Label No Rel"""
edge_df_h1 = ak.DataFrame({"src": src_subgraph, "dst": dst_subgraph})

sg_1 = ar.PropGraph()
sg_1.load_edge_attributes(edge_df_h1, source_column="src", destination_column="dst")

isos_as_vertices = ar.subgraph_isomorphism(prop_graph, sg_1, 
                                           semantic_check = "and", algorithm_type = "si",
                                           reorder_type = "structural", return_isos_as = "vertices")

print(f"We found {len(isos_as_vertices[0])/len(sg_1)} monos inside of the graph")

In [None]:
"""VF2-PS MVE-REORDERING No Label No Rel"""
edge_df_h1 = ak.DataFrame({"src": src_subgraph, "dst": dst_subgraph})

sg_1 = ar.PropGraph()
sg_1.load_edge_attributes(edge_df_h1, source_column="src", destination_column="dst")

isos_as_vertices = ar.subgraph_isomorphism(prop_graph, sg, 
                                           semantic_check = "and", algorithm_type = "ps", 
                                           reorder_type = "structural", return_isos_as = "vertices")

print(f"We found {len(isos_as_vertices[0])/len(sg)} monos inside of the graph")