In [1]:
import arkouda as ak
import arachne as ar
import pandas as pd
import numpy as np
import networkx as nx
import time
import json

    _         _                   _       
   / \   _ __| | _____  _   _  __| | __ _ 
  / _ \ | '__| |/ / _ \| | | |/ _` |/ _` |
 / ___ \| |  |   < (_) | |_| | (_| | (_| |
/_/   \_\_|  |_|\_\___/ \__,_|\__,_|\__,_|
                                          

Client Version: v2024.06.21+0.gcf6eeacde.dirty


In [2]:
# These are just wedges, we do not need subgraph isomorphism for these.
src0 = [    1,     1]
dst0 = [10002, 10003]
connection_type0 = [0, 0]

src1 = [    1,     1, 10003, 10003]
dst1 = [10002, 10003, 30004, 30005]
connection_type1 = [0, 0, 0, 0]

src2 = [    1,     1, 40005]
dst2 = [10002, 10003,     1]
connection_type2 = [0, 0, 1]

src3 = [    1,     1, 40005, 40005, 50008]
dst3 = [10002, 10003, 50008,     1, 10003]
connection_type3 = [0, 0, 0, 1, 1]

src4 = [1, 1, 10003, 10003, 60007]
dst4 = [10002, 10003, 30004, 30005, 1]
connection_type4 = [0, 0, 0, 0, 1]

src5 = [1, 1, 10003, 10003, 60007, 60007, 70010]
dst5 = [10002, 10003, 30004, 30005, 70010, 1, 30005]
connection_type5 = [0, 0, 0, 0, 0, 1, 1]

src6 = [1, 1, 40005, 40005, 80009, 80009, 10003, 90010]
dst6 = [10002, 10003, 50006, 50007, 90010, 90011, 50006, 50007]
connection_type6 = [0, 0, 0, 0, 0, 0, 1, 1]

src7 = [1, 10002, 40005, 60007, 80009]
dst7 = [10002, 20003, 1, 10002, 20003]
connection_type7 = [0, 0, 1, 1, 1]

src00 = [1, 1, 1, 1, 10002]
dst00 = [10002, 10003, 10004, 50006, 70008]
connection_type00 = [0, 0, 0, 1, 1]

src01 = [1, 10002, 20003, 30004, 1, 40005]
dst01 = [10002, 20003, 30004, 40005, 60007, 80009]
connection_type01 = [0, 0, 0, 0, 1, 1]

src02 = [1, 30004, 60007, 90010, 110012, 130014]
dst02 = [10002, 40005, 70008, 1, 30004, 60007]
connection_type02 = [0, 0, 0, 1, 1, 1]

src03 = [1, 1, 1, 50006, 50006, 50006, 120013, 120013, 120013, 10003, 10004]
dst03 = [10002, 10003, 10004, 60007, 60008, 60009, 130014, 130015, 130016, 60007, 130015]
connection_type03 = [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1]

raw_subgraph_data = {
    "0": (src0, dst0, connection_type0),
    "3": (src3, dst3, connection_type3),
    "2": (src2, dst2, connection_type2),
    "7": (src7, dst7, connection_type7),
    "5": (src5, dst5, connection_type5),
    # "4": (src4, dst4, connection_type4),
    # "1": (src1, dst1, connection_type1),
    # "6": (src6, dst6, connection_type6),
    "00": (src00, dst00, connection_type00),
    "01": (src01, dst01, connection_type01),
    "03": (src03, dst03, connection_type03),
    # "02": (src02, dst02, connection_type02)
}

In [3]:
# NOTE: Make sure to change the server name to whatever is applicable in your environment. If running locally, then use only ak.connect().
ak.connect("n117", 5555)

connected to arkouda server tcp://*:5555


In [4]:
# Read in the dataset with pandas.
df = pd.read_csv("/scratch/users/oaa9/arkouda-njit/arachne/data/OL_dataset.csv")
transformed_dataset = ak.DataFrame(df.to_dict(orient='list'))

  df = pd.read_csv("/scratch/users/oaa9/arkouda-njit/arachne/data/OL_dataset.csv")


In [5]:
# Change to string data to integers.
transformed_dataset["connection_type"] = ak.where(transformed_dataset["connection_type"] == "n", 0, 1)

In [6]:
# Keep only the edge information and "connection_type" attribute.
reduced_dataset = transformed_dataset["src", "dst", "connection_type"]

In [7]:
# Create property graph.
graph = ar.PropGraph()
graph.load_edge_attributes(reduced_dataset, source_column="src", destination_column="dst")
print(f"Graph has {len(graph):_} vertices and {graph.size():_} edges.")

Graph has 667_711 vertices and 962_796 edges.


In [8]:
# Create list of subgraphs.
subgraphs_list = []
for key,value in raw_subgraph_data.items():
    subgraph_name = f"subgraph{key}"
    print(f"Building {subgraph_name}...")
    subgraph_dict = {
        "src": value[0],
        "dst": value[1],
        "connection_type": value[2]
    }
    
    subgraph = ar.PropGraph()
    df = ak.DataFrame(subgraph_dict)
    subgraph.load_edge_attributes(df, source_column="src", destination_column="dst")

    subgraphs_list.append(subgraph)

Building subgraph0...
Building subgraph3...
Building subgraph2...
Building subgraph7...
Building subgraph5...
Building subgraph00...
Building subgraph01...
Building subgraph03...


In [14]:
def save_graph_to_csv(graph, file_name):
    """
    Save a graph in CSV format with directed edges and optional labels.
    :param graph: Arachne property graph.
    :param file_name: Output CSV file name.
    """
    # Extract internal edges and attributes
    internal_src, internal_dst = graph._internal_edges()
    src = internal_src.to_list()
    dst = internal_dst.to_list()

    # Extract edge attributes.
    graph_edge_attributes = graph.get_edge_attributes()
    edge_data = graph_edge_attributes.to_pandas()['connection_type']

    # Generate edge data
    edge_data = [
        f"{src[i]}>{dst[i]},{edge_data[i]}"
        for i in range(len(src))
    ]
    print("Prepare node data")
    
    # Prepare node data
    # Generate node data with blank labels
    unique_nodes = sorted(set(src).union(dst))
    node_data = [f"{node},," for node in unique_nodes]
    

    print(" Write to CSV")

    # Write to CSV
    with open(file_name, "w") as f:
        f.write("\n".join(edge_data + node_data))
    print(f"Graph saved to {file_name}")

In [15]:
save_graph_to_csv(graph, "OL.csv")

This transfer will use 22 MB .
Prepare node data
 Write to CSV
Graph saved to OL.csv
