In [13]:
import arkouda as ak
import arachne as ar
import pandas as pd
import numpy as np
import networkx as nx
import time
import json

In [14]:
# These are just wedges, we do not need subgraph isomorphism for these.
src0 = [    1,     1]
dst0 = [10002, 10003]
connection_type0 = [0, 0]

src1 = [    1,     1, 10003, 10003]
dst1 = [10002, 10003, 30004, 30005]
connection_type1 = [0, 0, 0, 0]

src2 = [    1,     1, 40005]
dst2 = [10002, 10003,     1]
connection_type2 = [0, 0, 1]

src3 = [    1,     1, 40005, 40005, 50008]
dst3 = [10002, 10003, 50008,     1, 10003]
connection_type3 = [0, 0, 0, 1, 1]

src4 = [1, 1, 10003, 10003, 60007]
dst4 = [10002, 10003, 30004, 30005, 1]
connection_type4 = [0, 0, 0, 0, 1]

src5 = [1, 1, 10003, 10003, 60007, 60007, 70010]
dst5 = [10002, 10003, 30004, 30005, 70010, 1, 30005]
connection_type5 = [0, 0, 0, 0, 0, 1, 1]

src6 = [1, 1, 40005, 40005, 80009, 80009, 10003, 90010]
dst6 = [10002, 10003, 50006, 50007, 90010, 90011, 50006, 50007]
connection_type6 = [0, 0, 0, 0, 0, 0, 1, 1]

src7 = [1, 10002, 40005, 60007, 80009]
dst7 = [10002, 20003, 1, 10002, 20003]
connection_type7 = [0, 0, 1, 1, 1]

src00 = [1, 1, 1, 1, 10002]
dst00 = [10002, 10003, 10004, 50006, 70008]
connection_type00 = [0, 0, 0, 1, 1]

src01 = [1, 10002, 20003, 30004, 1, 40005]
dst01 = [10002, 20003, 30004, 40005, 60007, 80009]
connection_type01 = [0, 0, 0, 0, 1, 1]

src02 = [1, 30004, 60007, 90010, 110012, 130014]
dst02 = [10002, 40005, 70008, 1, 30004, 60007]
connection_type02 = [0, 0, 0, 1, 1, 1]

src03 = [1, 1, 1, 50006, 50006, 50006, 120013, 120013, 120013, 10003, 10004]
dst03 = [10002, 10003, 10004, 60007, 60008, 60009, 130014, 130015, 130016, 60007, 130015]
connection_type03 = [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1]

raw_subgraph_data = {
    "0": (src0, dst0, connection_type0),
    "3": (src3, dst3, connection_type3),
    "2": (src2, dst2, connection_type2),
    "7": (src7, dst7, connection_type7),
    "5": (src5, dst5, connection_type5),
    # "4": (src4, dst4, connection_type4),
    # "1": (src1, dst1, connection_type1),
    # "6": (src6, dst6, connection_type6),
    "00": (src00, dst00, connection_type00),
    "01": (src01, dst01, connection_type01),
    "03": (src03, dst03, connection_type03),
    # "02": (src02, dst02, connection_type02)
}

In [15]:
# NOTE: Make sure to change the server name to whatever is applicable in your environment. If running locally, then use only ak.connect().
ak.connect("n81", 5555)

connected to arkouda server tcp://*:5555


In [16]:
# Read in the dataset with pandas.
df = pd.read_csv("/scratch/users/oaa9/arkouda-njit/arachne/data/OL_dataset.csv")
transformed_dataset = ak.DataFrame(df.to_dict(orient='list'))

  df = pd.read_csv("/scratch/users/oaa9/arkouda-njit/arachne/data/OL_dataset.csv")


In [17]:
# Change to string data to integers.
transformed_dataset["connection_type"] = ak.where(transformed_dataset["connection_type"] == "n", 0, 1)

In [18]:
# Keep only the edge information and "connection_type" attribute.
reduced_dataset = transformed_dataset["src", "dst", "connection_type"]
reduced_dataset

Unnamed: 0,src,dst,connection_type
0,328535863013441,285718156140881,0
1,328535863013441,361349432766191,0
2,340540827557177,328535863013441,0
3,340540827557177,357747943403111,0
4,354946785009842,340540827557177,0
...,...,...,...
1187303,914176572848441,59545684001365,1
1187304,914176572848441,59545684001365,1
1187305,1123552497597685,174465061404730,1
1187306,1094800169903761,167069259889183,1


In [19]:
# Create property graph.
graph = ar.PropGraph()
graph.load_edge_attributes(reduced_dataset, source_column="src", destination_column="dst")
print(f"Graph has {len(graph):_} vertices and {graph.size():_} edges.")

Graph has 667_711 vertices and 962_796 edges.


In [20]:
def vf2_si(g, h):
    isos_as_vertices = ar.subgraph_isomorphism(g, h, 
                                            semantic_check = "and", algorithm_type = "si",
                                            reorder_type = "structural", return_isos_as = "vertices")
    print(f"We found {len(isos_as_vertices[0])/len(h)} monos inside of the graph")

In [21]:
def vf2_si_probability_reordering(g,h):
    isos_as_vertices = ar.subgraph_isomorphism(g, h, 
                                            semantic_check = "and", algorithm_type = "si",
                                            reorder_type = "probability", return_isos_as = "vertices")
    print(f"We found {len(isos_as_vertices[0])/len(h)} monos inside of the graph")

In [22]:
def vf2_ps(g,h):
    isos_as_vertices = ar.subgraph_isomorphism(g, h, 
                                            semantic_check = "and", algorithm_type = "ps", 
                                            reorder_type = None, return_isos_as = "vertices")
    print(f"We found {len(isos_as_vertices[0])/len(h)} monos inside of the graph")

In [23]:
def vf2_ps_structural_reordering(g,h):
    isos_as_vertices = ar.subgraph_isomorphism(g, h, 
                                            semantic_check = "and", algorithm_type = "ps", 
                                            reorder_type = "structural", return_isos_as = "vertices")
    print(f"We found {len(isos_as_vertices[0])/len(h)} monos inside of the graph")

In [24]:
for key,value in raw_subgraph_data.items():
    print(f"Building subgraph{key}...")
    subgraph_dict = {
        "src": value[0],
        "dst": value[1],
        "connection_type": value[2]
    }
    
    subgraph = ar.PropGraph()
    df = ak.DataFrame(subgraph_dict)
    subgraph.load_edge_attributes(df, source_column="src", destination_column="dst")

    # print(f"Running VF2-SI on subgraph{key}...")
    start_time = time.time()
    vf2_si(graph, subgraph)
    end_time = time.time()
    print(f"Time taken for VF2-SI on subgraph{key}: {end_time - start_time:.4f} seconds\n")
    # print()

    # print(f"Running VF2-SI with probability reordering on subgraph{key}...")
    # start_time = time.time()
    # vf2_si_probability_reordering(graph, subgraph)
    # end_time = time.time()
    # print(f"Time taken for VF2-SI (probability reordering) on subgraph{key}: {end_time - start_time:.4f} seconds\n")
    # print()

    # print(f"Running VF2-PS on subgraph{key}...")
    # start_time = time.time()
    # vf2_ps(graph, subgraph)
    # end_time = time.time()
    # print(f"Time taken for VF2-PS on subgraph{key}: {end_time - start_time:.4f} seconds\n")
    # print()

    # print(f"Running VF2-PS with structural reordering on subgraph{key}...")
    # start_time = time.time()
    # vf2_ps_structural_reordering(graph, subgraph)
    # end_time = time.time()
    # print(f"Time taken for VF2-PS (structural reordering) on subgraph{key}: {end_time - start_time:.4f} seconds\n")
    # print()
    
    # Get node and edge attributes from Arachne property graphs.
    subgraph_node_attributes = subgraph.get_node_attributes()
    subgraph_edge_attributes = subgraph.get_edge_attributes()
    graph_node_attributes = graph.get_node_attributes()
    graph_edge_attributes = graph.get_edge_attributes()

    # Check if attributes are empty and handle accordingly.
    if subgraph_edge_attributes.size == 0:
        print("Subgraph edge attributes are empty.")
    else:
        print("Subgraph edge attributes:")
        print(subgraph_edge_attributes.to_pandas().head())

    if subgraph_node_attributes.size == 0:
        print("Subgraph node attributes are empty.")
    else:
        print("Subgraph node attributes:")
        print(subgraph_node_attributes.to_pandas().head())

    if graph_edge_attributes.size == 0:
        print("Main graph edge attributes are empty.")
    else:
        print("Main graph edge attributes:")
        print(graph_edge_attributes.to_pandas().head())

    if graph_node_attributes.size == 0:
        print("Main graph node attributes are empty.")
    else:
        print("Main graph node attributes:")
        print(graph_node_attributes.to_pandas().head())

    # Create NetworkX subgraph.
    subgraph_networkx = nx.from_pandas_edgelist(
        subgraph_edge_attributes.to_pandas(), 
        source="src", 
        target="dst", 
        edge_attr=True, 
        create_using=nx.DiGraph
    )

    if subgraph_node_attributes.size > 0:
        subgraph_node_attribute_dict = subgraph_node_attributes.to_pandas().set_index('nodes').to_dict('index')
        nx.set_node_attributes(subgraph_networkx, subgraph_node_attribute_dict)

    # Create NetworkX main graph.
    graph_networkx = nx.from_pandas_edgelist(
        graph_edge_attributes.to_pandas(), 
        source="src", 
        target="dst", 
        edge_attr=True, 
        create_using=nx.DiGraph
    )

    if graph_node_attributes.size > 0:
        graph_node_attribute_dict = graph_node_attributes.to_pandas().set_index('nodes').to_dict('index')
        nx.set_node_attributes(graph_networkx, graph_node_attribute_dict)

    # Attribute matching functions that need to be used by the NetworkX DiGraphMatcher.
    def node_matcher(u, v):
        return u == v

    def edge_matcher(e1, e2):
        return e1 == e2

    print("Networkx running")
    # Perform structural subgraph isomorphism.
    structural_matcher = nx.algorithms.isomorphism.DiGraphMatcher(graph_networkx, subgraph_networkx)
    subgraph_isomorphisms_structural = list(structural_matcher.subgraph_monomorphisms_iter())
    print("Structural monomorphisms found =", len(subgraph_isomorphisms_structural))

    # # Perform attributed subgraph isomorphism.
    # start_time = time.time()
    # attribute_matcher = nx.algorithms.isomorphism.DiGraphMatcher(
    #     graph_networkx, subgraph_networkx, 
    #     node_match=node_matcher, 
    #     edge_match=edge_matcher
    # )
    # subgraph_isomorphisms_attributed = list(attribute_matcher.subgraph_monomorphisms_iter())
    # end_time = time.time()

    # print("Attributed monomorphisms found =", len(subgraph_isomorphisms_attributed))
    # print(f"Time taken to find attributed monomorphisms: {end_time - start_time:.2f} seconds")
    

Building subgraph0...
We found 696460.0 monos inside of the graph
Time taken for VF2-SI on subgraph0: 3.2901 seconds

Subgraph edge attributes:
   src    dst  connection_type
0    1  10002                0
1    1  10003                0
Subgraph node attributes are empty.
Main graph edge attributes:
This transfer will use 22 MB .
          src              dst  connection_type
0  4721570023      29419008432                0
1  4721570023      34564308247                0
2  4721570023   62325779916524                1
3  4721570023   99268253475632                1
4  4721570023  308599174432198                1
Main graph node attributes are empty.
This transfer will use 22 MB .
Networkx running


In [None]:
print("Graph node attributes:")
print(graph_node_attributes)

print("Graph node attributes (Pandas DataFrame):")
print(graph_node_attributes.to_pandas().head())
print("Columns:", graph_node_attributes.to_pandas().columns)


In [None]:
def save_to_lad(file_name, src, dst, edge_labels=None):
    """
    Save a directed graph or subgraph to LAD format.
    :param file_name: The output file name.
    :param src: List of source nodes.
    :param dst: List of destination nodes.
    :param edge_labels: List of edge labels (optional).
    """
    # Map original node IDs to sequential LAD node IDs
    unique_nodes = sorted(set(src + dst))
    node_map = {node: idx for idx, node in enumerate(unique_nodes)}  # Map nodes to indices

    # Prepare adjacency list with labels
    adjacency_list = {node: [] for node in unique_nodes}
    for i in range(len(src)):
        edge = (node_map[dst[i]], edge_labels[i]) if edge_labels else (node_map[dst[i]],)
        adjacency_list[src[i]].append(edge)

    # Write to LAD format
    with open(file_name, "w") as file:
        file.write(f"{len(unique_nodes)}\n")  # Number of nodes
        for node in unique_nodes:
            edges = adjacency_list[node]
            file.write(f"{len(edges)}")
            for edge in edges:
                file.write(f" {edge[0]}")  # Destination node
                if edge_labels:  # Add edge label if available
                    file.write(f" {edge[1]}")
            file.write("\n")


# Process the main graph
print("Processing main graph...")
graph_edge_attributes = graph.get_edge_attributes()
src_main = graph_edge_attributes["src"].to_list()
dst_main = graph_edge_attributes["dst"].to_list()
connection_type_main = graph_edge_attributes["connection_type"].to_list()
save_to_lad("main_graph.lad", src_main, dst_main, connection_type_main)
print("Main graph saved to main_graph.lad")

# Process each subgraph
for key, value in raw_subgraph_data.items():
    print(f"Processing subgraph {key}...")
    subgraph_dict = {
        "src": value[0],
        "dst": value[1],
        "connection_type": value[2]
    }
    
    # Create the subgraph in Arachne
    subgraph = ar.PropGraph()
    df = ak.DataFrame(subgraph_dict)
    subgraph.load_edge_attributes(df, source_column="src", destination_column="dst")
    
    # Extract attributes for LAD format
    subgraph_edge_attributes = subgraph.get_edge_attributes()
    src_subgraph = subgraph_edge_attributes["src"].to_list()
    dst_subgraph = subgraph_edge_attributes["dst"].to_list()
    connection_type_subgraph = subgraph_edge_attributes["connection_type"].to_list()

    # Save the subgraph to LAD format
    subgraph_file_name = f"subgraph_{key}.lad"
    save_to_lad(subgraph_file_name, src_subgraph, dst_subgraph, connection_type_subgraph)
    print(f"Subgraph {key} saved to {subgraph_file_name}")
