In [1]:
import arkouda as ak
import arachne as ar
import pandas as pd
import time as time
import networkx as nx
import random
import argparse

ak.connect("n82", 5555)

### Get Arkouda server configuration information.
config = ak.get_config()
num_locales = config["numLocales"]
num_pus = config["numPUs"]
print(f"Arkouda server running with {num_locales}L and {num_pus}PUs.")

# Probabilities for node and edge attributes
P_Alpha = 0
P_Beta = 1
node_lbl_probs = {
    "lbls2": [P_Alpha, P_Beta],  # Probabilities for integers 10 and 11
    "lbls3": [P_Alpha, P_Beta]   # Probabilities for True and False
}
edge_rel_probs = {
    "rels1": [P_Alpha, P_Beta],  # Probabilities for integers 5 and 6
    "rels2": [P_Alpha, P_Beta]   # Probabilities for True and False
}
    
# with all 0.8, 0.2 and subgraph 11 and False and 6 and False we have 1 ISO


c_elegans = pd.read_csv("/scratch/users/oaa9/experimentation/data/connectome/c.elegans/celegans_actual.csv")
print(c_elegans)
c_elegans.columns = c_elegans.columns.str.replace(" ", "")
c_elegans.columns = c_elegans.columns.str.strip()
c_elegans["post1"] = c_elegans["post1"].astype(str)
c_elegans["post2"] = c_elegans["post2"].astype(str)
c_elegans["post3"] = c_elegans["post3"].astype(str)
c_elegans["post4"] = c_elegans["post4"].astype(str)
#c_elegans

temp_cols = list(c_elegans.columns)
temp_cols.remove("post1")
temp_cols.remove("post2")
temp_cols.remove("post3")
temp_cols.remove("post4")
temp = {k:[] for k in temp_cols}
for index,row in c_elegans.iterrows():
    if row["post1"] != "nan":
        for k in temp_cols:
            if k == "post":
                temp[k].append(row["post1"])
            else:
                temp[k].append(row[k])
    if row["post2"] != "nan":
        for k in temp_cols:
            if k == "post":
                temp[k].append(row["post2"])
            else:
                temp[k].append(row[k])
    if row["post3"] != "nan":
        for k in temp_cols:
            if k == "post":
                temp[k].append(row["post3"])
            else:
                temp[k].append(row[k])
    if row["post4"] != "nan":
        for k in temp_cols:
            if k == "post":
                temp[k].append(row["post4"])
            else:
                temp[k].append(row[k])

c_elegans_from_dict = pd.DataFrame.from_dict(temp)
c_elegans_from_dict

neuron_dfs_in_pandas = [c_elegans_from_dict]


neuron_dfs_in_arkouda = [ak.DataFrame(pd_df) for pd_df in neuron_dfs_in_pandas]

ak_celegans = neuron_dfs_in_arkouda[0]

ak_celegans_gb = ak_celegans.groupby(["pre", "post"])
print(ak_celegans_gb)

ak_celegans_sorted = ak_celegans[ak_celegans_gb.permutation[ak_celegans_gb.segments]]
chemical_synapses = ak_celegans_sorted["type"] == "chemical"
ak_celegans_sorted = ak_celegans_sorted[chemical_synapses]

ak_celegans_nodes = ak.concatenate([ak_celegans_sorted["pre"], ak_celegans_sorted["post"]])
gb_celegans_nodes = ak.GroupBy(ak_celegans_nodes)
new_vertex_range = ak.arange(gb_celegans_nodes.unique_keys.size)
all_vertices = gb_celegans_nodes.broadcast(new_vertex_range)
ak_celegans_sorted["pre"] = all_vertices[0:ak_celegans_sorted.shape[0]]
ak_celegans_sorted["post"] = all_vertices[ak_celegans_sorted.shape[0]:]
#ak_celegans_sorted
#ak_celegans_sorted.columns

ak_celegans_sorted['src'] = ak_celegans_sorted['pre']
del ak_celegans_sorted['pre']  # Remove the original column

ak_celegans_sorted['dst'] = ak_celegans_sorted['post']
del ak_celegans_sorted['post']  # Remove the original column

ak_celegans_sorted.columns



# Collect all unique nodes from src and dst
src_list =ak_celegans_sorted['src'].to_ndarray().tolist()
dst_list = ak_celegans_sorted['dst'].to_ndarray().tolist()
all_nodes = list(set(src_list) | set(dst_list))
all_nodes.sort()

# Generate attributes
num_nodes = len(all_nodes)
num_edges = len(src_list)

# node_lbls2 = ak.array([10] * num_nodes)  # lbls2 set to 10
# node_lbls3 = ak.array([True] * num_nodes)  # lbls3 set to True
# edge_rels1 = ak.array([5] * num_edges)  # rels1 set to 5
# edge_rels2 = ak.array([True] * num_edges)  # rels2 set to True

# Randomly generate node attributes
node_lbls2 = ak.where(
        ak.randint(0, 100, num_nodes) < node_lbl_probs["lbls2"][0] * 100, 10, 11
)
node_lbls3 = ak.randint(0, 100, num_nodes) < node_lbl_probs["lbls3"][0] * 100

# Randomly generate edge attributes
edge_rels1 = ak.where(
    ak.randint(0, 100, num_edges) < edge_rel_probs["rels1"][0] * 100, 5, 6
)
edge_rels2 = ak.randint(0, 100, num_edges) < edge_rel_probs["rels2"][0] * 100


# Create dataframes
edge_df = ak.DataFrame({
    "src": ak.array(src_list),
    "dst": ak.array(dst_list),
    "rels1": edge_rels1,
    "rels2": edge_rels2
})

node_df = ak.DataFrame({
    "nodes": ak.array(all_nodes),
    "lbls2": node_lbls2,
    "lbls3": node_lbls3
})

# Create the property graph
prop_graph = ar.PropGraph()
prop_graph.load_edge_attributes(edge_df, source_column="src", destination_column="dst")
prop_graph.load_node_attributes(node_df, node_column="nodes")

print("Property graph created with fixed node (lbls2=10, lbls3=True) and edge attributes (rels1=5, rels2=True).")


print("Data loaded now we are loading the subraph....")




    _         _                   _       
   / \   _ __| | _____  _   _  __| | __ _ 
  / _ \ | '__| |/ / _ \| | | |/ _` |/ _` |
 / ___ \| |  |   < (_) | |_| | (_| | (_| |
/_/   \_\_|  |_|\_\___/ \__,_|\__,_|\__,_|
                                          

Client Version: v2024.06.21


connected to arkouda server tcp://*:5555


Arkouda server running with 1L and 128PUs.
      continNum  EM series  pre    post       type      sec-tions     \
0            653       JSE   PVNL  PQR,hyp    chemical             1   
1            651       JSE     g0     PVCR    chemical             1   
2            652       JSE     g0     PVCR    chemical             1   
3            650       JSE     gx      r30    chemical             1   
4           2097       JSE     gx      r30    chemical             1   
...          ...       ...    ...      ...         ...           ...   
9668         999       N2W   e3VL     e2DL  electrical             1   
9669        1000       N2W  pm5VL      M2L  electrical             1   
9670        1064       N2W   g1AL       M4  electrical             2   
9671        1098       N2W    pm1      e3D  electrical             3   
9672        1153       N2W     M5       I5  electrical             2   

      part-ner Num-ber post1      post2    post3  post4     
0                    2        P

In [2]:
src_list = [1, 1]
dst_list = [0, 2]
src_subgraph = ak.array(src_list)
dst_subgraph = ak.array(dst_list)

subgraph_nodes = list(set(src_list) | set(dst_list))
subgraph_nodes.sort()

# Generate random node and edge attributes for the subgraph
num_subgraph_nodes = len(subgraph_nodes)
num_subgraph_edges = len(src_list)

subgraph_node_ints = ak.array([11]*num_subgraph_nodes)
subgraph_node_bools = ak.array([False]*num_subgraph_nodes)
subgraph_edge_ints = ak.array([6]*num_subgraph_edges)
subgraph_edge_bools = ak.array([False]*num_subgraph_edges)

# Create dataframes for subgraph attributes
edge_df_h = ak.DataFrame({
    "src": src_subgraph,
    "dst": dst_subgraph,
    "rels1": subgraph_edge_ints,
    "rels2": subgraph_edge_bools
})

node_df_h = ak.DataFrame({
    "nodes": ak.array(subgraph_nodes),
    "lbls2": subgraph_node_ints,
    "lbls3": subgraph_node_bools
})

# Create the subgraph with these attributes
subgraph = ar.PropGraph()
subgraph.load_edge_attributes(edge_df_h, source_column="src", destination_column="dst")
subgraph.load_node_attributes(node_df_h, node_column="nodes")



print("Subgraph created with fixed node and edge attributes.")
print(" Run Arachne....")

Subgraph created with fixed node and edge attributes.
 Run Arachne....


In [3]:
"""VF2-SI """
isos_as_vertices = ar.subgraph_isomorphism(prop_graph, subgraph, 
                                           semantic_check = "and", algorithm_type = "si",
                                           reorder_type = "structural", return_isos_as = "vertices")

print(f"We found {len(isos_as_vertices[0])/len(subgraph)} monos inside of the graph")

We found 76118.0 monos inside of the graph


In [4]:
"""VF2-SI PROBABILITY-MVE"""
isos_as_vertices = ar.subgraph_isomorphism(prop_graph, subgraph, 
                                           semantic_check = "and", algorithm_type = "si",
                                           reorder_type = "probability", return_isos_as = "vertices")

print(f"We found {len(isos_as_vertices[0])/len(subgraph)} monos inside of the graph")

We found 76118.0 monos inside of the graph


In [5]:
"""VF2-PS DEFAULT"""
isos_as_vertices = ar.subgraph_isomorphism(prop_graph, subgraph, 
                                           semantic_check = "and", algorithm_type = "ps", 
                                           reorder_type = None, return_isos_as = "vertices")

print(f"We found {len(isos_as_vertices[0])/len(subgraph)} monos inside of the graph")
#print(isos_as_vertices)

We found 76118.0 monos inside of the graph


In [None]:
# Print property graph DataFrames
print("Property Graph Edges:")
print(edge_df)
print("\nProperty Graph Nodes:")
print(node_df)

# Print subgraph DataFrames
print("\nSubgraph Edges:")
print(edge_df_h)
print("\nSubgraph Nodes:")
print(node_df_h)