In [1]:
import arkouda as ak
import arachne as ar
import pandas as pd
import numpy as np

    _         _                   _       
   / \   _ __| | _____  _   _  __| | __ _ 
  / _ \ | '__| |/ / _ \| | | |/ _` |/ _` |
 / ___ \| |  |   < (_) | |_| | (_| | (_| |
/_/   \_\_|  |_|\_\___/ \__,_|\__,_|\__,_|
                                          

Client Version: v2024.06.21+0.gcf6eeacde.dirty


In [2]:
# NOTE: Make sure to change the server name to whatever is applicable in your environment. If running locally, then use only ak.connect().
ak.connect("n115", 5555)

connected to arkouda server tcp://*:5555


In [3]:
# These are just wedges, we do not need subgraph isomorphism for these.
src1 = [1, 1, 10003, 10003]
dst1 = [10002, 10003, 30004, 30005]
connection_type1 = ['n', 'n', 'n', 'n']

src2 = [1, 1, 40005]
dst2 = [10002, 10003, 1]
connection_type2 = ['n', 'n', 's']

src3 = [1, 1, 40005, 40005, 50008]
dst3 = [10002, 10003, 50008, 1, 10003]
connection_type3 = ['n', 'n', 'n', 's', 's']

src4 = [1, 1, 10003, 10003, 60007]
dst4 = [10002, 10003, 30004, 30005, 1]
connection_type4 = ['n', 'n', 'n', 'n', 's']

src5 = [1, 1, 10003, 10003, 60007, 60007, 70010]
dst5 = [10002, 10003, 30004, 30005, 70010, 1, 30005]
connection_type5 = ['n', 'n', 'n', 'n', 'n', 's', 's']

src6 = [1, 1, 40005, 40005, 80009, 80009, 10003, 90010]
dst6 = [10002, 10003, 50006, 50007, 90010, 90011, 50006, 50007]
connection_type6 = ['n', 'n', 'n', 'n', 'n', 'n', 's', 's']

src7 = [1, 10002, 40005, 60007, 80009]
dst7 = [10002, 20003, 1, 10002, 20003]
connection_type7 = ['n', 'n', 's', 's', 's']

src0 = [1, 1]
dst0 = [10002, 10003]
connection_type0 = ['n', 'n']

subgraphs_as_list_of_tuples = [ (src1,dst1,connection_type1, "1"),
                                (src2,dst2,connection_type2, "2"),
                                (src3,dst3,connection_type3, "3"),
                                (src4,dst4,connection_type4, "4"),
                                (src5,dst5,connection_type5, "5"),
                                (src6,dst6,connection_type6, "6"),
                                (src7,dst7,connection_type7, "7"),
                                (src0,dst0,connection_type0, "0")
                            ]

In [4]:
df = pd.read_csv("/scratch/users/oaa9/arkouda-njit/arachne/data/OL_dataset.csv")
transformed_dataset = ak.DataFrame(df.to_dict(orient='list'))

  df = pd.read_csv("/scratch/users/oaa9/arkouda-njit/arachne/data/OL_dataset.csv")


In [5]:
def get_mapping(g, subgraph):
    src_sub, dst_sub = subgraph.edges()
    src_sub = src_sub.to_ndarray()
    dst_sub = dst_sub.to_ndarray()
    isos_as_vertices = ar.subgraph_isomorphism(g, subgraph, algorithm_type="si", return_isos_as="vertices", semantic_check="or", size_limit=10_000)
    isos = isos_as_vertices[0]
    isos_ndarray = isos.to_ndarray()

    # Check if the length of isomorphisms is a multiple of the number of subgraph nodes
    if len(isos) % len(subgraph) != 0:
        raise ValueError("The length of isomorphisms is not a multiple of the number of subgraph nodes.")

    subgraph_nodes = sorted(list(np.unique(np.concatenate((src_sub, dst_sub)))))
    number_isos_found = len(isos) // len(subgraph_nodes)

    print(f"Number of motifs found is {number_isos_found}")

    # Prepare the hostgraph_nodes as a 2D array
    hostgraph_nodes = isos_ndarray.reshape(-1, len(subgraph_nodes))

    # Create all mappings at once using a list comprehension
    all_mappings = [
        dict(zip(subgraph_nodes, hostgraph_nodes[i]))
        for i in range(number_isos_found)
    ]

    # print(f"Number of Mappings found: {number_isos_found}")
    return all_mappings

In [6]:
g = ar.PropGraph()
g.load_edge_attributes(transformed_dataset, source_column="src", destination_column="dst", 
                            relationship_columns=["s_bef", "s_bef_x", "s_bef_y", "s_bef_z", "s_af", "s_af_x", 
                                                    "s_af_y", "s_af_z", 's_x', "s_y", "s_z", "s_distance", "d_bef",
                                                    "d_bef_x", "d_bef_y", "d_bef_z", "d_af", "d_af_x", "d_af_y", "d_af_z",
                                                    "d_x", "d_y", "d_z", "d_distance", "n_id", "connection_type"])

print(f"Built graph with {len(g)} vertices and {g.size()} edges")

Built graph with 667711 vertices and 962796 edges


In [7]:
def motif_to_viss(src, dst, connection_type):    
    subgraph_dict = {
        "src": src,
        "dst": dst,
        "connection_type": connection_type
        }
    
    subgraph = ar.PropGraph()
    df = ak.DataFrame(subgraph_dict)
    subgraph.load_edge_attributes(df, source_column="src", destination_column="dst", 
                                relationship_columns=["connection_type"])
    
    node_mapping = get_mapping(g, subgraph)
    return node_mapping

In [8]:
import time

for tup in subgraphs_as_list_of_tuples:
    start = time.time()
    motif_to_viss(tup[0], tup[1], tup[2])
    end = time.time()
    print(f"For subgraph {tup[3]} finding motifs took: {end-start} seconds")

Number of motifs found is 10001
For subgraph 1 finding motifs took: 80.36615204811096 seconds
Number of motifs found is 10011
For subgraph 2 finding motifs took: 6.3795390129089355 seconds
Number of motifs found is 5048
For subgraph 3 finding motifs took: 8.943705558776855 seconds
Number of motifs found is 10005
For subgraph 4 finding motifs took: 21.315133094787598 seconds
Number of motifs found is 2308
For subgraph 5 finding motifs took: 340.9331910610199 seconds
Number of motifs found is 10000
For subgraph 6 finding motifs took: 717.2857136726379 seconds
Number of motifs found is 10012
For subgraph 7 finding motifs took: 6.522960424423218 seconds
Number of motifs found is 10086
For subgraph 0 finding motifs took: 6.163693428039551 seconds
