In [1]:
import arkouda as ak
import arachne as ar
import time

def save_graph_to_ged(graph, file_name):
    """
    Save a graph in GED format with directed edges and multiple labels for nodes and edges.
    :param graph: Arachne property graph.
    :param file_name: Output GED file name.
    """
    # Extract internal edges and attributes
    internal_src, internal_dst = graph._internal_edges()
    src = internal_src.to_list()
    dst = internal_dst.to_list()

    # Extract edge attributes
    graph_edge_attributes = graph.get_edge_attributes()
    edge_df = graph_edge_attributes.to_pandas()
    edge_label1 = edge_df['rels1'] if 'rels1' in edge_df.columns else None
    edge_label2 = edge_df['rels2'] if 'rels2' in edge_df.columns else None

    # Extract node attributes
    graph_node_attributes = graph.get_node_attributes()
    node_df = graph_node_attributes.to_pandas()
    node_label1 = node_df['lbls2'] if 'lbls2' in node_df.columns else None
    node_label2 = node_df['lbls3'] if 'lbls3' in node_df.columns else None

    # Prepare the GED file content
    ged_content = []

    # Add the graph name
    ged_content.append("#graph_name")

    # Add the number of nodes
    num_nodes = len(node_df)
    ged_content.append(str(num_nodes))

    # Add node labels with multiple labels separated by commas
    for i, row in node_df.iterrows():
        label1 = str(row['lbls2']) if 'lbls2' in row else "0"
        label2 = str(row['lbls3']) if 'lbls3' in row else "False"
        multiple_labels = f"{label1},{label2}"  # Comma-separated labels
        ged_content.append(multiple_labels)

    # Add the number of edges
    num_edges = len(src)
    ged_content.append(str(num_edges))

    # Add edge details with multiple labels separated by commas
    for i in range(len(src)):
        label1 = str(edge_label1.iloc[i]) if edge_label1 is not None else "0"
        label2 = str(edge_label2.iloc[i]) if edge_label2 is not None else "False"
        multiple_labels = f"{label1},{label2}"  # Comma-separated labels
        ged_content.append(f"{src[i]} {dst[i]} {multiple_labels}")

    # Write to GED file
    with open(file_name, "w") as f:
        f.write("\n".join(ged_content) + "\n")

    print(f"Graph saved to {file_name} in GED format")



    _         _                   _       
   / \   _ __| | _____  _   _  __| | __ _ 
  / _ \ | '__| |/ / _ \| | | |/ _` |/ _` |
 / ___ \| |  |   < (_) | |_| | (_| | (_| |
/_/   \_\_|  |_|\_\___/ \__,_|\__,_|\__,_|
                                          

Client Version: v2024.06.21


In [2]:


ak.connect("n119", 5555)

# Parameters
p = 0.005
node_sizes = [10_000]
seed = 42
num_tests = 1

# Fixed attributes for subgraph
subgraph_node_ints = ak.array([10, 10, 10, 10])
subgraph_node_bools = ak.array([True, True, True, True])
subgraph_edge_ints = ak.array([5, 5, 5, 5])
subgraph_edge_bools = ak.array([True,  True, True, True])

# Subgraph structure
src_list = [0, 1, 2, 0]
dst_list = [1, 2, 0, 3]
src_subgraph = ak.array(src_list)
dst_subgraph = ak.array(dst_list)

# Subgraph dataframes
edge_df_h = ak.DataFrame({
    "src": src_subgraph,
    "dst": dst_subgraph,
    "rels1": subgraph_edge_ints,
    "rels2": subgraph_edge_bools
})

node_df_h = ak.DataFrame({
    "nodes": ak.array(list(set(src_list + dst_list))),
    "lbls2": subgraph_node_ints,
    "lbls3": subgraph_node_bools
})

# Create the subgraph
sg = ar.PropGraph()
sg.load_edge_attributes(edge_df_h, source_column="src", destination_column="dst")
sg.load_node_attributes(node_df_h, node_column="nodes")

# Save the main graph to a CSV file
print("Processing subgraph...")
save_graph_to_ged(sg, "Traiangle_Ordered.ged")
print("Main graph saved to Traiangle_Ordered")

# Run for different node sizes
for num_nodes in node_sizes:
    print(f"\nRunning for num_nodes = {num_nodes} with p = {p}")

    # Generate the property graph
    start = time.time()
    temp_prop_graph = ar.gnp_random_graph(num_nodes, p, create_using=ar.PropGraph)
    end = time.time()
    build_time = end - start

    print(f"Graph with {len(temp_prop_graph)} vertices and {temp_prop_graph.size()} edges "
          f"built in {round(build_time, 2)} seconds.")

    # Generate random attributes for the main graph
    num_edges = temp_prop_graph.size()
    edges = temp_prop_graph.edges()
    nodes = temp_prop_graph.nodes()

    node_ints = ak.array([10] * len(nodes))  # Fixed for simplicity
    node_bools = ak.array([True] * len(nodes))  # Fixed for simplicity
    edge_ints = ak.array([5] * len(edges[0]))  # Fixed for simplicity
    edge_bools = ak.array([True] * len(edges[0]))  # Fixed for simplicity

    edge_df = ak.DataFrame({
        "src": edges[0],
        "dst": edges[1],
        "rels1": edge_ints,
        "rels2": edge_bools
    })

    node_df = ak.DataFrame({
        "nodes": nodes,
        "lbls2": node_ints,
        "lbls3": node_bools
    })

    prop_graph = ar.PropGraph()
    prop_graph.load_edge_attributes(edge_df, source_column="src", destination_column="dst")
    prop_graph.load_node_attributes(node_df, node_column="nodes")



    # Initialize averages
    test_results = {
        "VF2-SI": {"monos": 0, "time": 0},
        # "VF2-SI PROBABILITY-MVE": {"monos": 0, "time": 0},
        # "VF2-PS DEFAULT": {"monos": 0, "time": 0},
        # "VF2-PS MVE-REORDERING": {"monos": 0, "time": 0},
        # "VF2-PS PROBABILITY-MVE": {"monos": 0, "time": 0},
    }

    # Run tests
    for test_run in range(num_tests):
        print(f"  Test run {test_run + 1}/{num_tests}")

        # VF2-SI
        start = time.time()
        isos_as_vertices = ar.subgraph_isomorphism(prop_graph, sg, semantic_check="and",
            algorithm_type="si", reorder_type="structural",return_isos_as="vertices"
        )
        end = time.time()
        result = len(isos_as_vertices[0]) / len(sg)
        test_results["VF2-SI"]["monos"] += result
        test_results["VF2-SI"]["time"] += (end - start)
        print("Time: ",end - start )


    # Compute averages
    for test in test_results:
        test_results[test]["monos"] /= num_tests
        test_results[test]["time"] /= num_tests

    # Print results
    print(f"\nResults for num_nodes = {num_nodes}:")
    for test, results in test_results.items():
        print(f"  {test}:")
        print(f"    Average monos found: {results['monos']}")
        print(f"    Average execution time: {results['time']:.2f} seconds")


connected to arkouda server tcp://*:5555


Processing subgraph...
Graph saved to Traiangle_Ordered.ged in GED format
Main graph saved to Traiangle_Ordered

Running for num_nodes = 10000 with p = 0.005
Graph with 10000 vertices and 499380 edges built in 4.84 seconds.
  Test run 1/1
Time:  282.0742816925049

Results for num_nodes = 10000:
  VF2-SI:
    Average monos found: 6221597.0
    Average execution time: 282.07 seconds


In [3]:
    # Save the main graph to a CSV file
print("Processing main graph...")
save_graph_to_ged(prop_graph, "main_graph_Random.ged")
print("Main graph saved to main_graph_Random.csv")
ak.shutdown()

Processing main graph...
This transfer will use 11 MB .
Graph saved to main_graph_Random.ged in GED format
Main graph saved to main_graph_Random.csv
