In [1]:
import arkouda as ak
import arachne as ar
import networkx as nx
import numpy as np
import time

ak.connect("n116", 5555)
p = 0.0005
num_nodes = 50_000
seed = 42

# Define probabilities for labels and relationships
node_lbl_probs = {"lbls2": [0.6, 0.4],  # Probabilities for integers 10 and 11
                  "lbls3": [0.5, 0.5]}  # Probabilities for True and False
edge_rel_probs = {"rels1": [0.7, 0.3]}  # Probabilities for integers 10 and 11


start = time.time()
temp_prop_graph = ar.gnp(num_nodes, p, create_using=ar.PropGraph, seed=seed)
end = time.time()
build_time = end - start

print(f"Building property graph with {len(temp_prop_graph)} vertices and "
      f"{temp_prop_graph.size()} "
      f"edges took {round(build_time,2)} seconds.")

### Generate node labels and edge relationships for the graph.
# 1. Extract node and edge information.
num_edges = temp_prop_graph.size()
num_nodes = len(temp_prop_graph)
edges = temp_prop_graph.edges()
nodes = temp_prop_graph.nodes()

##############################################
# Generate random node and edge attributes for the main graph
##############################################

node_ints = ak.randint(10, 12, num_nodes, seed=seed)
node_ints = ak.where(ak.randint(0, 100, num_nodes, seed=seed) < node_lbl_probs["lbls2"][0] * 100, 10, 11)

node_bools = ak.randint(0, 100, num_nodes, seed=seed) < node_lbl_probs["lbls3"][0] * 100

# For edges
edge_ints = ak.where(ak.randint(0, 100, num_edges, seed=seed) < edge_rel_probs["rels1"][0] * 100, 10, 11)

# Create dataframes with the new attributes
edge_df = ak.DataFrame({
    "src": edges[0],
    "dst": edges[1],
    "rels1": edge_ints
})

node_df = ak.DataFrame({
    "nodes": nodes,
    "lbls2": node_ints,
    "lbls3": node_bools
})

# Create the new property graph with these attributes
prop_graph = ar.PropGraph()
prop_graph.load_edge_attributes(edge_df, source_column="src", destination_column="dst")
prop_graph.load_node_attributes(node_df, node_column="nodes")
print("Property graph created with random node and edge attributes.")

##############################################
# Create the subgraph and assign random attributes
##############################################

# Subgraph structure
src_list = [2, 3, 1, 3]
dst_list = [3, 1, 2, 0]
src_subgraph = ak.array(src_list)
dst_subgraph = ak.array(dst_list)

subgraph_nodes = list(set(src_list) | set(dst_list))
subgraph_nodes.sort()

# Generate random node and edge attributes for the subgraph
num_subgraph_nodes = len(subgraph_nodes)
num_subgraph_edges = len(src_list)

# Subgraph attributes
subgraph_node_ints = ak.array([10, 10, 10, 10])
subgraph_node_bools = ak.array([True, True, True, True])
subgraph_edge_ints = ak.array([5, 5, 5, 5])
subgraph_edge_bools = ak.array([True, True, True, True])

    _         _                   _       
   / \   _ __| | _____  _   _  __| | __ _ 
  / _ \ | '__| |/ / _ \| | | |/ _` |/ _` |
 / ___ \| |  |   < (_) | |_| | (_| | (_| |
/_/   \_\_|  |_|\_\___/ \__,_|\__,_|\__,_|
                                          

Client Version: v2024.06.21+0.gcf6eeacde.dirty


connected to arkouda server tcp://*:5555


Building property graph with 50000 vertices and 1250667 edges took 7.49 seconds.
Property graph created with random node and edge attributes.


In [37]:
# First, find structural motifs.
sg = ar.PropGraph()
edge_df_h = ak.DataFrame({"src": src_subgraph, "dst": dst_subgraph})
sg.load_edge_attributes(edge_df_h, source_column="src", destination_column="dst")

(isos_vertices,_,isos_src,isos_dst) = ar.subgraph_isomorphism(
    prop_graph, sg, algorithm_type="ps", reorder_type = None, return_isos_as="complete"
)

num_nodes = len(sg)
num_edges = sg.size()
num_isos = len(isos_vertices) // num_nodes
print("Number of monos found =", num_isos)
print(isos_src.size == isos_dst.size)

Number of monos found = 388559
True


In [38]:
print(isos_src.size / sg.size())

388559.0


In [394]:
# Get the number of isos to modify.
num_to_modify = 100

# Pick the indices for the isos to be data injected from isos as edges. This will also give us the vertices.
selected_edges = np.random.choice(range(0, num_isos), size=num_to_modify, replace=False)

print("num_to_modify =", num_to_modify)
# print("selected_edges =", selected_edges)

print(np.unique(selected_edges > isos_src.size))
print(np.unique(selected_edges > isos_dst.size))
print(len(selected_edges))

# Extract the edges.
edges = []
for idx in selected_edges:
    start = idx * num_edges
    end = start + num_edges
    for i in range(start, end):
        edges.append(i)

src_to_inject = isos_src[ak.array(edges)]
dst_to_inject = isos_dst[ak.array(edges)]

print(len(src_to_inject) == len(dst_to_inject))
print(len(src_to_inject) / num_edges)

# Extract the vertices.
vertices = ak.concatenate([src_to_inject, dst_to_inject])
vertices_set = ak.GroupBy(vertices).unique_keys

(graph_src, graph_dst) = prop_graph.edges()
edge_idx = ak.find([ak.array(src_to_inject),ak.array(dst_to_inject)],[graph_src,graph_dst])
print("edge_idx =", edge_idx)

print(src_to_inject.to_list())
print(dst_to_inject.to_list())
edge_idx_bool = edge_idx == -1
print(src_to_inject[edge_idx_bool].to_list())
print(dst_to_inject[edge_idx_bool].to_list())
print(len(src_to_inject[edge_idx_bool]))
print(len(dst_to_inject[edge_idx_bool]))

src_to_inject_list = src_to_inject.to_list()
dst_to_inject_list = dst_to_inject.to_list()

missing_src = src_to_inject[edge_idx_bool].to_list()
missing_dst = dst_to_inject[edge_idx_bool].to_list()

missing_tup = list(zip(missing_src, missing_dst))

count = 0
for (u,v) in zip(src_to_inject_list, dst_to_inject_list):
    count += 1
    print(f"{u} --> {v}         {(u,v) in missing_tup}")
    if count % 4 == 0:
        print()

print(count / 4)

num_to_modify = 100
[False]
[False]
100
True
100.0
edge_idx = [576614 625643 19332 ... 529177 780145 780152]
[23095, 25054, 779, 779, 35114, 14570, 33288, 33288, 10743, 14403, 27772, 27772, 36734, 24298, 25230, 25230, 29271, 21712, 37159, 37159, 12610, 26488, 24894, 24894, 21660, 21545, 16128, 16128, 44781, 28250, 19854, 19854, 33902, 18113, 9340, 9340, 16067, 29620, 23281, 23281, 40416, 21683, 30550, 30550, 43612, 26072, 26075, 26075, 10196, 17644, 10368, 10368, 14927, 19474, 44312, 44312, 30234, 20829, 9684, 9684, 29829, 25300, 29057, 29057, 45302, 19668, 24998, 24998, 20703, 21765, 11478, 11478, 35675, 48666, 22102, 22102, 11679, 49910, 1157, 1157, 32873, 5600, 20882, 20882, 17389, 45415, 47090, 47090, 26612, 39365, 20652, 20652, 42877, 41723, 46119, 46119, 20338, 2823, 45545, 45545, 16262, 22113, 12016, 12016, 32680, 19439, 17416, 17416, 5405, 49882, 44361, 44361, 26633, 29181, 44460, 44460, 2353, 7131, 42798, 42798, 10746, 14248, 41610, 41610, 5934, 17551, 45323, 45323, 14138, 466

In [44]:
vertices_idx = ak.find(vertices_set, prop_graph.nodes())

In [45]:
vertices_idx_bool = vertices_idx == -1
print(vertices_set[vertices_idx_bool])

[]


In [30]:
# Inject the data at the right vertices and edges.
n = len(prop_graph)
m = prop_graph.size()

node_ints_new = ak.full(n, 1, dtype=ak.int64)
node_bools_new = ak.full(n, False, dtype=ak.bool)
edge_ints_new = ak.full(m, 1, dtype=ak.int64)

node_ints_new[vertices_idx] = 0
node_bools_new[vertices_idx] = True
edge_ints_new[edge_idx] = 0

print(node_ints_new)
print(node_bools_new)
print(edge_ints_new)

[1 1 1 ... 1 1 1]
[False False False ... False False False]
[1 1 1 ... 1 1 1]


In [31]:
edge_df_new = ak.DataFrame({
    "src": graph_src,
    "dst": graph_dst,
    "rels1": edge_ints_new
})

node_df_new = ak.DataFrame({
    "nodes": prop_graph.nodes(),
    "lbls2": node_ints_new,
    "lbls3": node_bools_new
})

# Create the new property graph with these attributes
prop_graph_new = ar.PropGraph()
prop_graph_new.load_edge_attributes(edge_df_new, source_column="src", destination_column="dst")
prop_graph_new.load_node_attributes(node_df_new, node_column="nodes")

In [32]:
subgraph_node_ints_new =ak.full(len(sg), 0, dtype=ak.int64)
subgraph_node_bools_new = ak.full(len(sg), True, dtype=ak.bool)
subgraph_edge_ints_new = ak.full(sg.size(), 0, dtype=ak.int64)

sg_new = ar.PropGraph()
edge_df_h_new = ak.DataFrame({"src": src_subgraph, "dst": dst_subgraph, "rels1": subgraph_edge_ints_new})
node_df_h_new = ak.DataFrame({"nodes": ak.array(subgraph_nodes), "lbls2": subgraph_node_ints_new, "lbls3": subgraph_node_bools_new})

sg_new.load_edge_attributes(edge_df_h_new, source_column = "src", destination_column = "dst")
sg_new.load_node_attributes(node_df_h_new, node_column = "nodes")

(isos_vertices,_,isos_src,isos_dst) = ar.subgraph_isomorphism(
    prop_graph_new, sg_new, algorithm_type = "si", reorder_type = "probability", return_isos_as="complete"
)

num_nodes = len(sg_new)
num_edges = sg_new.size()
num_isos = len(isos_vertices) // num_nodes
print("Number of monos found =", num_isos)

Number of monos found = 38
