In [4]:
import arkouda as ak
import arachne as ar
import time

def save_graph_to_csv(graph, file_name):
    """
    Save a graph in CSV format with directed edges and optional labels.
    :param graph: Arachne property graph.
    :param file_name: Output CSV file name.
    """
    # Extract internal edges and attributes
    internal_src, internal_dst = graph._internal_edges()
    src = internal_src.to_list()
    dst = internal_dst.to_list()

    print("src = ", src)
    print("dst = ", dst)
    # Extract edge attributes.
    graph_edge_attributes = graph.get_edge_attributes()
    edge_df = graph_edge_attributes.to_pandas()
    edge_rels1 = edge_df['rels1'] if 'rels1' in edge_df.columns else None
    edge_rels2 = edge_df['rels2'] if 'rels2' in edge_df.columns else None
    
    
    # Generate edge data
    edge_data = []
    for i in range(len(src)):
        if edge_rels1 is not None and edge_rels2 is not None:
            edge_data.append(f"{src[i]}>{dst[i]},{edge_rels1[i]},{edge_rels2[i]}")
        else:
            edge_data.append(f"{src[i]}>{dst[i]}")

    print("Preparing node data...")

    graph_node_attributes = graph.get_node_attributes()
    if graph_node_attributes.size > 0:
        # If labels are present, include them in the node data
        node_df = graph_node_attributes.to_pandas()
        node_data = [
            f"{row['nodes']},,{row['lbls2']},{row['lbls3']}"
            for _, row in node_df.iterrows()
        ]
    else:
        # If no labels, generate blank labels
        # num_nodes = graph.num_nodes()
        num_nodes = sorted(set(src).union(dst))

        unique_nodes = ak.arange(0, num_nodes).to_list()
        node_data = [f"{node},," for node in unique_nodes]

    print(" Write to CSV")

    # Write to CSV
    with open(file_name, "w") as f:
        # f.write("\n".join(edge_data + node_data))
        # f.write("\n".join(edge_data))
        f.write("\n".join(edge_data) + "\n")
        f.write("\n".join(node_data) + "\n")

    print(f"Graph saved to {file_name}")
    



In [5]:


ak.connect("n0126", 5555)

# Parameters
p = 0.0005
node_sizes = [50000]
seed = 42
num_tests = 1

# Fixed attributes for subgraph
subgraph_node_ints = ak.array([10, 10, 10,10])
subgraph_node_bools = ak.array([True, True, True, True])
subgraph_edge_ints = ak.array([5, 5, 5, 5])
subgraph_edge_bools = ak.array([True, True, True, True])

# Subgraph structure
src_list = [0, 1, 2, 1]
dst_list = [1, 2, 0, 3]
src_subgraph = ak.array(src_list)
dst_subgraph = ak.array(dst_list)

# Subgraph dataframes
edge_df_h = ak.DataFrame({
    "src": src_subgraph,
    "dst": dst_subgraph,
    "rels1": subgraph_edge_ints,
    "rels2": subgraph_edge_bools
})

node_df_h = ak.DataFrame({
    "nodes": ak.array(list(set(src_list + dst_list))),
    "lbls2": subgraph_node_ints,
    "lbls3": subgraph_node_bools
})

# Create the subgraph
sg = ar.PropGraph()
sg.load_edge_attributes(edge_df_h, source_column="src", destination_column="dst")
sg.load_node_attributes(node_df_h, node_column="nodes")

# Save the main graph to a CSV file
print("Processing subgraph...")
save_graph_to_csv(sg, "Traiangle_notOrdered.csv")
print("Main graph saved to fan_out.csv")

# Run for different node sizes
for num_nodes in node_sizes:
    print(f"\nRunning for num_nodes = {num_nodes} with p = {p}")

    # Generate the property graph
    start = time.time()
    temp_prop_graph = ar.gnp_random_graph(num_nodes, p, create_using=ar.PropGraph, seed=seed)
    end = time.time()
    build_time = end - start

    print(f"Graph with {len(temp_prop_graph)} vertices and {temp_prop_graph.size()} edges "
          f"built in {round(build_time, 2)} seconds.")

    # Generate random attributes for the main graph
    num_edges = temp_prop_graph.size()
    edges = temp_prop_graph.edges()
    nodes = temp_prop_graph.nodes()

    node_ints = ak.array([10] * len(nodes))  # Fixed for simplicity
    node_bools = ak.array([True] * len(nodes))  # Fixed for simplicity
    edge_ints = ak.array([5] * len(edges[0]))  # Fixed for simplicity
    edge_bools = ak.array([True] * len(edges[0]))  # Fixed for simplicity

    edge_df = ak.DataFrame({
        "src": edges[0],
        "dst": edges[1],
        "rels1": edge_ints,
        "rels2": edge_bools
    })

    node_df = ak.DataFrame({
        "nodes": nodes,
        "lbls2": node_ints,
        "lbls3": node_bools
    })

    prop_graph = ar.PropGraph()
    prop_graph.load_edge_attributes(edge_df, source_column="src", destination_column="dst")
    prop_graph.load_node_attributes(node_df, node_column="nodes")



    # Initialize averages
    test_results = {
        "VF2-SI": {"monos": 0, "time": 0},
        # "VF2-SI PROBABILITY-MVE": {"monos": 0, "time": 0},
        # "VF2-PS DEFAULT": {"monos": 0, "time": 0},
        # "VF2-PS MVE-REORDERING": {"monos": 0, "time": 0},
        # "VF2-PS PROBABILITY-MVE": {"monos": 0, "time": 0},
    }

    # Run tests
    for test_run in range(num_tests):
        print(f"  Test run {test_run + 1}/{num_tests}")

        # VF2-SI
        start = time.time()
        isos_as_vertices = ar.subgraph_isomorphism(
            prop_graph, sg, 
            algorithm_type="ps", reorder_type="structural", return_isos_as="vertices"
        )
        end = time.time()
        result = len(isos_as_vertices[0]) / len(sg)
        test_results["VF2-SI"]["monos"] += result
        test_results["VF2-SI"]["time"] += (end - start)
        print("Time: ",end - start )


    # Compute averages
    for test in test_results:
        test_results[test]["monos"] /= num_tests
        test_results[test]["time"] /= num_tests

    # Print results
    print(f"\nResults for num_nodes = {num_nodes}:")
    for test, results in test_results.items():
        print(f"  {test}:")
        print(f"    Average monos found: {results['monos']}")
        print(f"    Average execution time: {results['time']:.2f} seconds")


connected to arkouda server tcp://*:5555


Processing subgraph...
src =  [0, 1, 1, 2]
dst =  [1, 2, 3, 0]
Preparing node data...
 Write to CSV
Graph saved to Traiangle_notOrdered.csv
Main graph saved to fan_out.csv

Running for num_nodes = 50000 with p = 0.0005
Graph with 50000 vertices and 1250667 edges built in 5.66 seconds.
  Test run 1/1
Time:  12.046776294708252

Results for num_nodes = 50000:
  VF2-SI:
    Average monos found: 387107.0
    Average execution time: 12.05 seconds


In [None]:
    # Save the main graph to a CSV file
print("Processing main graph...")
save_graph_to_csv(prop_graph, "main_graph_Random.csv")
print("Main graph saved to main_graph_Random.csv")
ak.shutdown()

Processing main graph...
