In [1]:
import arkouda as ak
import arachne as ar
import scipy as sp
import networkx as nx
import matplotlib.pyplot as plt
import os

    _         _                   _       
   / \   _ __| | _____  _   _  __| | __ _ 
  / _ \ | '__| |/ / _ \| | | |/ _` |/ _` |
 / ___ \| |  |   < (_) | |_| | (_| | (_| |
/_/   \_\_|  |_|\_\___/ \__,_|\__,_|\__,_|
                                          

Client Version: v2024.06.21+0.gcf6eeacde.dirty


In [2]:
# NOTE: Make sure to change the server name to whatever is applicable in your environment. If running locally, then use only ak.connect().
ak.connect("n119", 5555)

connected to arkouda server tcp://*:5555


### Simple Graph Experiments

In [3]:
src_main = [1, 2, 1, 3, 4, 2, 5, 4, 5, 6, 3, 8, 9, 4, 9, 10]
dst_main = [0, 1, 3, 4, 1, 4, 2, 5, 6, 7, 8, 9, 3, 9, 10, 4]
src_maingraph = ak.array(src_main)
dst_maingraph = ak.array(dst_main)

maingraph_nodes = list(set(src_main) | set(dst_main))
maingraph_nodes.sort()

labels1_maingraph = ak.array(["yellow", "red", "blue", "blue", "blue", "red", "blue","yellow", "blue", "yellow", "blue"])
labels2_maingraph = ak.array([10, 12, 10, 11, 11, 12, 11, 13, 10, 13, 10])
labels3_maingraph = ak.array([False, True, True, True, True, True, False, False, True, False, False])
labels4_maingraph = ak.Categorical(labels1_maingraph)

rels1_maingraph = ak.array([10, 5, 10, 10, 10, 10, 10, 10, 10, 5, 5, 10, 10, 5, 5, 10])

edge_df = ak.DataFrame({"src":src_maingraph, "dst":dst_maingraph,
                        "value":rels1_maingraph 
})
node_df = ak.DataFrame({"nodes":ak.array(maingraph_nodes),
                        "color":labels1_maingraph,
                        "value":labels2_maingraph,
                        "True?":labels3_maingraph,
                        "cats":labels4_maingraph
})

In [4]:
pg = ar.PropGraph()
pg.load_edge_attributes(edge_df, source_column="src", destination_column="dst")
pg.load_node_attributes(node_df, node_column="nodes")

In [5]:
src_list = [0, 1, 2, 1]
dst_list = [1, 2, 0, 3]
src_subgraph = ak.array(src_list)
dst_subgraph = ak.array(dst_list)

subgraph_nodes = list(set(src_list) | set(dst_list))
subgraph_nodes.sort()

labels1_subgraph = ak.array(["blue", "blue", "red", "yellow"])
labels2_subgraph = ak.array([10, 11, 12, 13])
labels3_subgraph = ak.array([True, True, True, False])
labels4_subgraph = ak.Categorical(labels1_subgraph)

rels1_subgraph = ak.array([10, 10, 10, 5])

In [6]:
"""TEST NO PICKER AND STRUCTURAL REORDERING ON SIMPLE GRAPH"""
edge_df = ak.DataFrame({"src":src_subgraph, "dst":dst_subgraph})

sg = ar.PropGraph()
sg.load_edge_attributes(edge_df, source_column="src", destination_column="dst", convert_string_relationships_to_categoricals=False)

isos_as_vertices = ar.subgraph_isomorphism(pg, sg, 
                                           semantic_check = "or", algorithm_type = "si", 
                                           reorder = True, return_isos_as = "vertices")
print(f"We found {len(isos_as_vertices[0])/len(sg)} monos inside of the graph with reordering")

We found 17.0 monos inside of the graph with reordering


In [7]:
"""TEST VERTEX PICKER AND VERTEX REORDERING ON SIMPLE GRAPH"""
edge_df = ak.DataFrame({"src":src_subgraph, "dst":dst_subgraph})
node_df = ak.DataFrame({"nodes":ak.array(subgraph_nodes),
                        "color":labels1_subgraph,
                        "value":labels2_subgraph,
                        "True?":labels3_subgraph,
                        "cats":labels4_subgraph
})

sg = ar.PropGraph()
sg.load_edge_attributes(edge_df, source_column="src", destination_column="dst", convert_string_relationships_to_categoricals=False)
sg.load_node_attributes(node_df, node_column="nodes", convert_string_labels_to_categoricals=False)

isos_as_vertices = ar.subgraph_isomorphism(pg, sg, 
                                           semantic_check = "or", algorithm_type = "si", 
                                           reorder = True, return_isos_as = "vertices")
print(f"We found {len(isos_as_vertices[0])/len(sg)} monos inside of the graph with reordering")

We found 4.0 monos inside of the graph with reordering


In [8]:
"""TEST COMBINED (EDGE) PICKER AND COMBINED (EDGE) REORDERING ON SIMPLE GRAPH"""
edge_df = ak.DataFrame({"src":src_subgraph, "dst":dst_subgraph,
                        "value":rels1_subgraph 
})
node_df = ak.DataFrame({"nodes":ak.array(subgraph_nodes),
                        "color":labels1_subgraph,
                        "value":labels2_subgraph,
                        "True?":labels3_subgraph,
                        "cats":labels4_subgraph
})

sg = ar.PropGraph()
sg.load_edge_attributes(edge_df, source_column="src", destination_column="dst", convert_string_relationships_to_categoricals=False)
sg.load_node_attributes(node_df, node_column="nodes", convert_string_labels_to_categoricals=False)

isos_as_vertices = ar.subgraph_isomorphism(pg, sg, 
                                           semantic_check = "or", algorithm_type = "si", 
                                           reorder = True, return_isos_as = "vertices")
print(f"We found {len(isos_as_vertices[0])/len(sg)} monos inside of the graph with reordering")

We found 2.0 monos inside of the graph with reordering


In [9]:
"""VF2-PS NO ATTRIBUTES GROUND TRUTH"""
edge_df = ak.DataFrame({"src":src_subgraph, "dst":dst_subgraph})

sg = ar.PropGraph()
sg.load_edge_attributes(edge_df, source_column="src", destination_column="dst", convert_string_relationships_to_categoricals=False)

isos_as_vertices = ar.subgraph_isomorphism(pg, sg, 
                                           semantic_check = "or", algorithm_type = "ps", 
                                           reorder = False, return_isos_as = "vertices")
print(f"We found {len(isos_as_vertices[0])/len(sg)} monos inside of the graph with VF2-PS")

We found 17.0 monos inside of the graph with VF2-PS


In [10]:
"""VF2-PS ONLY NODE ATTRIBUTES GROUND TRUTH"""
edge_df = ak.DataFrame({"src":src_subgraph, "dst":dst_subgraph})
node_df = ak.DataFrame({"nodes":ak.array(subgraph_nodes),
                        "color":labels1_subgraph,
                        "value":labels2_subgraph,
                        "True?":labels3_subgraph,
                        "cats":labels4_subgraph
})

sg = ar.PropGraph()
sg.load_edge_attributes(edge_df, source_column="src", destination_column="dst", convert_string_relationships_to_categoricals=False)
sg.load_node_attributes(node_df, node_column="nodes", convert_string_labels_to_categoricals=False)

isos_as_vertices = ar.subgraph_isomorphism(pg, sg, 
                                           semantic_check = "or", algorithm_type = "ps", 
                                           reorder = False, return_isos_as = "vertices")
print(f"We found {len(isos_as_vertices[0])/len(sg)} monos inside of the graph with VF2-PS")

We found 4.0 monos inside of the graph with VF2-PS


In [None]:
"""VF2-PS BOTH EDGE AND NODE ATTRIBUTES GROUND TRUTH"""
edge_df = ak.DataFrame({"src":src_subgraph, "dst":dst_subgraph,
                        "value":rels1_subgraph 
})
node_df = ak.DataFrame({"nodes":ak.array(subgraph_nodes),
                        "color":labels1_subgraph,
                        "value":labels2_subgraph,
                        "True?":labels3_subgraph,
                        "cats":labels4_subgraph
})

sg = ar.PropGraph()
sg.load_edge_attributes(edge_df, source_column="src", destination_column="dst", convert_string_relationships_to_categoricals=False)
sg.load_node_attributes(node_df, node_column="nodes", convert_string_labels_to_categoricals=False)

isos_as_vertices = ar.subgraph_isomorphism(pg, sg, 
                                           semantic_check = "or", algorithm_type = "ps", 
                                           reorder = False, return_isos_as = "vertices")
print(f"We found {len(isos_as_vertices[0])/len(sg)} monos inside of the graph with VF2-PS")

### Experiments from Mohammad

In [None]:
def compute_degrees(src, dst):
    # Find unique nodes
    unique_nodes = ak.unique(ak.concatenate([src, dst]))
    
    # Initialize degree arrays
    in_degree = ak.zeros(len(unique_nodes), dtype=ak.int64)
    out_degree = ak.zeros(len(unique_nodes), dtype=ak.int64)
    
    # Convert Arkouda arrays to Python lists for iteration
    unique_nodes_list = unique_nodes.to_list()
    
    # Create a dictionary to map nodes to their index in unique_nodes
    node_to_index = {node: idx for idx, node in enumerate(unique_nodes_list)}
    
    # Calculate out-degrees
    for node in src.to_list():
        out_degree[node_to_index[node]] += 1
    
    # Calculate in-degrees
    for node in dst.to_list():
        in_degree[node_to_index[node]] += 1
    
    # Calculate total degrees
    total_degree = in_degree + out_degree
    
    return unique_nodes_list, node_to_index, in_degree.to_list(), out_degree.to_list(), total_degree.to_list()

def update_degrees(src_temp, dst_temp, unique_nodes_list):
    # Recompute degrees based on updated src_temp and dst_temp
    node_to_index = {node: idx for idx, node in enumerate(unique_nodes_list)}
    in_degree = [0] * len(unique_nodes_list)
    out_degree = [0] * len(unique_nodes_list)
    
    for i in range(len(src_temp)):
        out_degree[node_to_index[src_temp[i]]] += 1
        in_degree[node_to_index[dst_temp[i]]] += 1
    
    total_degree = [in_degree[i] + out_degree[i] for i in range(len(unique_nodes_list))]
    
    return in_degree, out_degree, total_degree

def SubgraphMatchingOrder(src, dst):
    # Make copies of src and dst
    src_temp = src.to_list()
    dst_temp = dst.to_list()
    
    # Compute degrees
    unique_nodes_list, node_to_index, in_degree, out_degree, total_degree = compute_degrees(src, dst)
    
    print("initial src_temp  = ", src_temp)
    print("initial dst_temp  = ", dst_temp)
    print("unique_nodes_list = ", unique_nodes_list)
    print("in_degree         = ", in_degree)
    print("out_degree        = ", out_degree)
    print("total_degree      = ", total_degree)
    
    # Step 1: Find the node with the highest degree, breaking ties with out-degree
    candidates = [(unique_nodes_list[i], total_degree[i], out_degree[i]) for i in range(len(unique_nodes_list))]
    candidates.sort(key=lambda x: (-x[1], -x[2]))
    replaced_nodes = []  # List to keep track of replaced nodes

    print("candidates = ", candidates)
    
    print("\nSelecting and remapping the first given node...")
    if candidates:
        selected_node = candidates[0][0]
        sorted_index = 0
        print(f"Initially selected node {selected_node} was given sorted index {sorted_index}")
        
        # Step 2: Exchange selected_node with unique_nodes_list[sorted_index] in src_temp and dst_temp
        for i in range(len(src_temp)):
            if src_temp[i] == selected_node:
                src_temp[i] = unique_nodes_list[sorted_index]
            elif src_temp[i] == unique_nodes_list[sorted_index]:
                src_temp[i] = selected_node
            
            if dst_temp[i] == selected_node:
                dst_temp[i] = unique_nodes_list[sorted_index]
            elif dst_temp[i] == unique_nodes_list[sorted_index]:
                dst_temp[i] = selected_node
        
        replaced_nodes.append(unique_nodes_list[sorted_index])  # Mark the node placed in the sorted position
        
        print("replaced_nodes   = ", replaced_nodes)
        print("updated src_temp = ", src_temp)
        print("updated dst_temp = ", dst_temp)
    
    print("\nFirst node remapping finished, while loop begins...")
    # Process nodes until all are sorted
    while len(replaced_nodes) < len(unique_nodes_list):
        current_node = replaced_nodes[-1]
        
        # Recompute degrees based on updated src_temp and dst_temp
        in_degree, out_degree, total_degree = update_degrees(src_temp, dst_temp, unique_nodes_list)
        
        # Process out-neighbors
        out_neighbors = [dst_temp[i] for i in range(len(src_temp)) if src_temp[i] == current_node and dst_temp[i] not in replaced_nodes]
        out_neighbors.sort(key=lambda x: (-total_degree[node_to_index[x]], -out_degree[node_to_index[x]]))
        
        print(f"\nChecking node {current_node} with out-neighbors {out_neighbors}...")
        print("unique_nodes_list = ", unique_nodes_list)
        print("in_degree         = ", in_degree)
        print("out_degree        = ", out_degree)
        print("total_degree      = ", total_degree)
        if out_neighbors:
            next_node = out_neighbors[0]
            sorted_index = len(replaced_nodes)
            
            # Exchange next_node with unique_nodes_list[sorted_index] in src_temp and dst_temp
            for i in range(len(src_temp)):
                if src_temp[i] == next_node:
                    src_temp[i] = unique_nodes_list[sorted_index]
                elif src_temp[i] == unique_nodes_list[sorted_index]:
                    src_temp[i] = next_node
                
                if dst_temp[i] == next_node:
                    dst_temp[i] = unique_nodes_list[sorted_index]
                elif dst_temp[i] == unique_nodes_list[sorted_index]:
                    dst_temp[i] = next_node
            
            replaced_nodes.append(unique_nodes_list[sorted_index])  # Mark the node placed in the sorted position
            
            print(f"Next selected node {next_node} was given sorted index {sorted_index}")
            print("replaced_nodes   = ", replaced_nodes)
            print("updated src_temp = ", src_temp)
            print("updated dst_temp = ", dst_temp)
        else:
            # If no out-neighbors, find the next node with the highest degree from remaining nodes
            remaining_candidates = [(unique_nodes_list[i], total_degree[i], out_degree[i]) for i in range(len(unique_nodes_list)) if unique_nodes_list[i] not in replaced_nodes]
            remaining_candidates.sort(key=lambda x: (-x[1], -x[2]))
            if remaining_candidates:
                selected_node = remaining_candidates[0][0]
                sorted_index = len(replaced_nodes)
                
                # Exchange selected_node with unique_nodes_list[sorted_index] in src_temp and dst_temp
                for i in range(len(src_temp)):
                    if src_temp[i] == selected_node:
                        src_temp[i] = unique_nodes_list[sorted_index]
                    elif src_temp[i] == unique_nodes_list[sorted_index]:
                        src_temp[i] = selected_node
                    
                    if dst_temp[i] == selected_node:
                        dst_temp[i] = unique_nodes_list[sorted_index]
                    elif dst_temp[i] == unique_nodes_list[sorted_index]:
                        dst_temp[i] = selected_node
                
                replaced_nodes.append(unique_nodes_list[sorted_index])  # Mark the node placed in the sorted position
                
                print(f"Next selected node (no out-neighbors) {selected_node} was given sorted index {sorted_index}")
                print("replaced_nodes   = ", replaced_nodes)
                print("updated src_temp = ", src_temp)
                print("updated dst_temp = ", dst_temp)
    
    # Convert updated src_temp and dst_temp back to Arkouda arrays
    updated_src = ak.array(src_temp)
    updated_dst = ak.array(dst_temp)
    
    return updated_src, updated_dst, unique_nodes_list, replaced_nodes

src_list = [0, 1, 2, 1]
dst_list = [1, 2, 0, 3]

SubgraphMatchingOrder(ak.array(src_list), ak.array(dst_list))

In [None]:
import arkouda as ak
import arachne as ar
import scipy as sp
import networkx as nx
import matplotlib.pyplot as plt
import os
import time

ak.connect("n114", 5555)
p = 0.0005
num_nodes = 100_000

start = time.time()
temp_prop_graph = ar.gnp(num_nodes, p, create_using=ar.PropGraph)
end = time.time()
build_time = end - start

print(f"Building property graph with {len(temp_prop_graph)} vertices and "
        f"{temp_prop_graph.size()} "
        f"edges took {round(build_time,2)} seconds.")

### Generate node labels and edge relationships for the graph.
# 1. Extract node and edge information.
num_edges = temp_prop_graph.size()
num_nodes = len(temp_prop_graph)
edges = temp_prop_graph.edges()
nodes = temp_prop_graph.nodes()

##############################################
# NEW SECTION: Generate random node and edge attributes
##############################################

# For nodes:
# 2.1. Categorical from ["blue", "red", "yellow"]
colors = ak.array(["blue", "red"])
node_colors = colors[ak.randint(0, len(colors), num_nodes)]

# 2.2. Integer from 10 to 12 (inclusive)
node_ints = ak.randint(10, 12, num_nodes)

# 2.3. Boolean from [False, True]
bool_choices = ak.array([False, True])
node_bools = bool_choices[ak.randint(0, 2, num_nodes)]

# For edges:
# 2.4. Randomly choose from [5, 10]
edge_choices = ak.array([5, 10])
edge_vals = edge_choices[ak.randint(0, 2, num_edges)]

##############################################
# Create dataframes with the new attributes
##############################################

edge_df = ak.DataFrame({
    "src": edges[0],
    "dst": edges[1],
    "edge_attr": edge_vals
})

node_df = ak.DataFrame({
    "nodes": nodes,
    "color": node_colors,
    "int_attr": node_ints,
    "bool_attr": node_bools
})

##############################################
# Create the new property graph with these attributes
##############################################

prop_graph = ar.PropGraph()
prop_graph.load_edge_attributes(
    edge_df, source_column="src", destination_column="dst", 
    relationship_columns=["edge_attr"]
)

prop_graph.load_node_attributes(
    node_df, node_column="nodes", 
    label_columns=["color", "int_attr", "bool_attr"]
)

print("Property graph created with random node and edge attributes.")



### Create the subgraph we are searching for.
# 1. Create labels and relationships to search for.
src_list = [0, 1, 2, 1]
dst_list = [1, 2, 0, 3]

src_subgraph = ak.array(src_list)
dst_subgraph = ak.array(dst_list)

subgraph_nodes = list(set(src_list) | set(dst_list))
subgraph_nodes.sort()

labels1_subgraph = ak.array(["blue", "blue", "red", "red"])
labels2_subgraph = ak.array([10, 11, 11, 10])
labels3_subgraph = ak.array([True, True, True, False])

rels1_subgraph = ak.array([10, 10, 10, 5])

subgraph = ar.PropGraph()

# Include rels1_subgraph as an attribute for edges
edge_df_h = ak.DataFrame({
    "src": src_subgraph,
    "dst": dst_subgraph,
    "rels1": rels1_subgraph
})

# Include the third label/attribute as well for nodes
node_df_h = ak.DataFrame({
    "nodes": ak.array(subgraph_nodes),
    "lbls1": labels1_subgraph,
    "lbls2": labels2_subgraph,
    "lbls3": labels3_subgraph
})

# Now load the attributes into the subgraph
# Treat 'rels1' as an edge relationship attribute
subgraph.load_edge_attributes(edge_df_h, 
                              source_column="src", 
                              destination_column="dst",
                              relationship_columns=["rels1"]
                            )

# Treat lbls1, lbls2, and lbls3 as node label attributes
subgraph.load_node_attributes(node_df_h, 
                              node_column="nodes",
                              label_columns=["lbls1","lbls2","lbls3"])

print("Subgraph created with node and edge attributes successfully.")

In [None]:
isos_as_vertices = ar.subgraph_isomorphism(prop_graph, subgraph, 
                                           semantic_check = "and", algorithm_type = "si", 
                                           reorder = True, return_isos_as = "vertices")

print(f"We found {len(isos_as_vertices[0])/len(subgraph)} monos inside of the graph")

In [None]:
isos_as_vertices = ar.subgraph_isomorphism(prop_graph, subgraph,
                                           semantic_check = "and", algorithm_type = "si",
                                           reorder = False, return_isos_as = "vertices")

print(f"We found {len(isos_as_vertices[0])/len(subgraph)} monos inside of the graph")

In [None]:
isos_as_vertices = ar.subgraph_isomorphism(prop_graph, subgraph, 
                                           semantic_check = "and", algorithm_type = "ps", 
                                           reorder = True, return_isos_as = "vertices")

print(f"We found {len(isos_as_vertices[0])/len(subgraph)} monos inside of the graph")

In [None]:
isos_as_vertices = ar.subgraph_isomorphism(prop_graph, subgraph, 
                                           semantic_check = "and", algorithm_type = "ps", 
                                           reorder = False, return_isos_as = "vertices")

print(f"We found {len(isos_as_vertices[0])/len(subgraph)} monos inside of the graph")