In [1]:
import arkouda as ak
import arachne as ar
import pandas as pd
import time as time
import networkx as nx
import random
import argparse

ak.connect("n82", 5555)

### Get Arkouda server configuration information.
config = ak.get_config()
num_locales = config["numLocales"]
num_pus = config["numPUs"]
print(f"Arkouda server running with {num_locales}L and {num_pus}PUs.")

# Probabilities for node and edge attributes
node_lbl_probs = {
    "lbls2": [0.6, 0.4],  # Probabilities for integers 10 and 11
    "lbls3": [0.6, 0.4]   # Probabilities for True and False
}
edge_rel_probs = {
    "rels1": [0.6, 0.4],  # Probabilities for integers 5 and 6
    "rels2": [0.6, 0.4]   # Probabilities for True and False
}
    
# with all 0.8, 0.2 and subgraph 11 and False and 6 and False we have 1 ISO

hemibrain_traced_roi_connections = pd.read_csv("/scratch/users/oaa9/experimentation/data/connectome/hemibrain/exported-traced-adjacencies-v1.2/traced-roi-connections.csv")
# hemibrain_traced_roi_connections
# hemibrain_traced_roi_connections['type'] = 'T1'
# hemibrain_traced_roi_connections

neuron_dfs_in_pandas = [hemibrain_traced_roi_connections]
neuron_dfs_in_arkouda = [ak.DataFrame(pd_df) for pd_df in neuron_dfs_in_pandas]

ak_hemibrain_traced_roi_connections = neuron_dfs_in_arkouda[0]

ak_hemibrain_traced_roi_connections_gb = ak_hemibrain_traced_roi_connections.groupby(["bodyId_pre", "bodyId_post"])
ak_hemibrain_traced_roi_connections_sorted = ak_hemibrain_traced_roi_connections[ak_hemibrain_traced_roi_connections_gb.permutation[ak_hemibrain_traced_roi_connections_gb.segments]]
#ak_hemibrain_traced_roi_connections_sorted

  

ak_hemibrain_traced_roi_connections_sorted['src'] = ak_hemibrain_traced_roi_connections_sorted['bodyId_pre']
del ak_hemibrain_traced_roi_connections_sorted['bodyId_pre']  # Remove the original column

ak_hemibrain_traced_roi_connections_sorted['dst'] = ak_hemibrain_traced_roi_connections_sorted['bodyId_post']
del ak_hemibrain_traced_roi_connections_sorted['bodyId_post']  # Remove the original column

print(ak_hemibrain_traced_roi_connections_sorted.columns)

# Collect all unique nodes from src and dst
src_list = ak_hemibrain_traced_roi_connections_sorted['src'].to_ndarray().tolist()
dst_list = ak_hemibrain_traced_roi_connections_sorted['dst'].to_ndarray().tolist()
all_nodes = list(set(src_list) | set(dst_list))
all_nodes.sort()

# Generate attributes
num_nodes = len(all_nodes)
num_edges = len(src_list)

# node_lbls2 = ak.array([10] * num_nodes)  # lbls2 set to 10
# node_lbls3 = ak.array([True] * num_nodes)  # lbls3 set to True
# edge_rels1 = ak.array([5] * num_edges)  # rels1 set to 5
# edge_rels2 = ak.array([True] * num_edges)  # rels2 set to True

# Randomly generate node attributes
node_lbls2 = ak.where(
        ak.randint(0, 100, num_nodes) < node_lbl_probs["lbls2"][0] * 100, 10, 11
)
node_lbls3 = ak.randint(0, 100, num_nodes) < node_lbl_probs["lbls3"][0] * 100

# Randomly generate edge attributes
edge_rels1 = ak.where(
    ak.randint(0, 100, num_edges) < edge_rel_probs["rels1"][0] * 100, 5, 6
)
edge_rels2 = ak.randint(0, 100, num_edges) < edge_rel_probs["rels2"][0] * 100


# Create dataframes
edge_df = ak.DataFrame({
    "src": ak.array(src_list),
    "dst": ak.array(dst_list),
    "rels1": edge_rels1,
    "rels2": edge_rels2
})

node_df = ak.DataFrame({
    "nodes": ak.array(all_nodes),
    "lbls2": node_lbls2,
    "lbls3": node_lbls3
})

# Create the property graph
prop_graph = ar.PropGraph()
prop_graph.load_edge_attributes(edge_df, source_column="src", destination_column="dst")
prop_graph.load_node_attributes(node_df, node_column="nodes")

print("Property graph created with fixed node (lbls2=10, lbls3=True) and edge attributes (rels1=5, rels2=True).")


print("Data loaded now we are loading the subraph....")

src_list = [0, 1, 1]
dst_list = [1, 2, 0]
src_subgraph = ak.array(src_list)
dst_subgraph = ak.array(dst_list)

subgraph_nodes = list(set(src_list) | set(dst_list))
subgraph_nodes.sort()

# Generate random node and edge attributes for the subgraph
num_subgraph_nodes = len(subgraph_nodes)
num_subgraph_edges = len(src_list)

subgraph_node_ints = ak.array([11]*num_subgraph_nodes)
subgraph_node_bools = ak.array([False]*num_subgraph_nodes)
subgraph_edge_ints = ak.array([6]*num_subgraph_edges)
subgraph_edge_bools = ak.array([False]*num_subgraph_edges)

# Create dataframes for subgraph attributes
edge_df_h = ak.DataFrame({
    "src": src_subgraph,
    "dst": dst_subgraph,
    "rels1": subgraph_edge_ints,
    "rels2": subgraph_edge_bools
})

node_df_h = ak.DataFrame({
    "nodes": ak.array(subgraph_nodes),
    "lbls2": subgraph_node_ints,
    "lbls3": subgraph_node_bools
})

# Create the subgraph with these attributes
subgraph = ar.PropGraph()
subgraph.load_edge_attributes(edge_df_h, source_column="src", destination_column="dst")
subgraph.load_node_attributes(node_df_h, node_column="nodes")



print("Subgraph created with fixed node and edge attributes.")



print(" Run Arachne....")


    _         _                   _       
   / \   _ __| | _____  _   _  __| | __ _ 
  / _ \ | '__| |/ / _ \| | | |/ _` |/ _` |
 / ___ \| |  |   < (_) | |_| | (_| | (_| |
/_/   \_\_|  |_|\_\___/ \__,_|\__,_|\__,_|
                                          

Client Version: v2024.06.21


connected to arkouda server tcp://*:5555


Arkouda server running with 1L and 128PUs.
Index(['roi', 'weight', 'src', 'dst'], dtype='<U0')
Property graph created with fixed node (lbls2=10, lbls3=True) and edge attributes (rels1=5, rels2=True).
Data loaded now we are loading the subraph....
Subgraph created with fixed node and edge attributes.
 Run Arachne....


In [11]:
ak_hemibrain_traced_roi_connections_sorted 
# print(num_edges)


Unnamed: 0,roi,weight,src,dst
0,SLP(R),3,200326126,264083994
1,ICL(R),2,200326126,295816140
4,SCL(R),1,200326126,296203440
5,SLP(R),2,200326126,325122109
6,SCL(R),1,200326126,326474963
...,...,...,...,...
4259619,SMP(R),1,7112622236,328283521
4259620,SMP(R),1,7112622236,357932060
4259621,SMP(R),1,7112622236,357940977
4259622,SMP(R),1,7112622236,358631450


In [21]:
# Group by the "weight" column and get counts for each unique weight value
weight_group = ak.GroupBy(ak_hemibrain_traced_roi_connections_sorted["weight"])
unique_weights, counts = weight_group.size()

# Calculate the total number of weights
total_weights = ak.sum(counts)

# Compute probabilities for each unique weight value
probabilities = counts / total_weights

# Sort by probabilities (descending order)
sorted_indices = ak.argsort(probabilities)  # Sort in ascending order
sorted_indices = sorted_indices[::-1]  # Reverse for descending order

# Apply the sorted indices to reorder weights, counts, and probabilities
sorted_weights = unique_weights[sorted_indices]
sorted_counts = counts[sorted_indices]
sorted_probabilities = probabilities[sorted_indices]

# Create a sorted DataFrame for better readability
sorted_weight_df = ak.DataFrame({
    "Weight": sorted_weights,
    "Count": sorted_counts,
    "Probability": sorted_probabilities
})

# Display the sorted results
sorted_weight_df


Unnamed: 0,Weight,Count,Probability
0,1,1900192,5.352046e-01
1,2,620259,1.747010e-01
2,3,288869,8.136231e-02
3,4,165819,4.670428e-02
4,5,107429,3.025826e-02
...,...,...,...
380,247,1,2.816582e-07
381,244,1,2.816582e-07
382,236,1,2.816582e-07
383,225,1,2.816582e-07


In [17]:
print(ak.unique(ak_hemibrain_traced_roi_connections_sorted["roi"]))
print(ak.unique(ak_hemibrain_traced_roi_connections_sorted["weight"]))
print(ak.GroupBy(ak_hemibrain_traced_roi_connections_sorted["roi"]).size())
Total = ak.GroupBy(ak_hemibrain_traced_roi_connections_sorted["weight"]).size()

['EB', 'FB', 'GNG', 'IB', 'NO', 'PB', 'SAD', 'AB(L)', 'AB(R)', 'AL(L)', 'AL(R)', 'BU(L)', 'BU(R)', 'CA(R)', 'LH(R)', 'LO(R)', 'ME(R)', 'aL(L)', 'aL(R)', 'bL(L)', 'bL(R)', 'gL(L)', 'gL(R)', 'AME(R)', 'AOTU(R)', 'ATL(L)', 'ATL(R)', 'AVLP(R)', 'CAN(R)', 'CRE(L)', 'CRE(R)', 'EPA(L)', 'EPA(R)', 'FLA(R)', 'GOR(L)', 'GOR(R)', 'ICL(L)', 'ICL(R)', 'IPS(R)', 'LAL(L)', 'LAL(R)', 'LOP(R)', 'PED(R)', 'PLP(R)', 'PVLP(R)', 'SCL(L)', 'SCL(R)', 'SIP(L)', 'SIP(R)', 'SLP(R)', 'SMP(L)', 'SMP(R)', 'SPS(L)', 'SPS(R)', 'VES(L)', 'VES(R)', 'WED(R)', 'a'L(L)', 'a'L(R)', 'b'L(L)', 'b'L(R)', 'NotPrimary']
[1 2 3 ... 1015 1028 1409]
(array(['EB', 'FB', 'GNG', 'IB', 'NO', 'PB', 'SAD', 'AB(L)', 'AB(R)', 'AL(L)', 'AL(R)', 'BU(L)', 'BU(R)', 'CA(R)', 'LH(R)', 'LO(R)', 'ME(R)', 'aL(L)', 'aL(R)', 'bL(L)', 'bL(R)', 'gL(L)', 'gL(R)', 'AME(R)', 'AOTU(R)', 'ATL(L)', 'ATL(R)', 'AVLP(R)', 'CAN(R)', 'CRE(L)', 'CRE(R)', 'EPA(L)', 'EPA(R)', 'FLA(R)', 'GOR(L)', 'GOR(R)', 'ICL(L)', 'ICL(R)', 'IPS(R)', 'LAL(L)', 'LAL(R)', 'LOP(R)',

In [22]:
# Extract unique ROI strings
unique_rois = ak.unique(ak_hemibrain_traced_roi_connections_sorted["roi"])

# Create a mapping from each unique ROI string to an integer
roi_to_int = {roi: i for i, roi in enumerate(unique_rois.to_ndarray())}

# Map the ROI strings in the dataframe to integers
roi_as_int = ak.array([roi_to_int[roi] for roi in ak_hemibrain_traced_roi_connections_sorted["roi"].to_ndarray()])

# Replace the "roi" column with its integer representation
ak_hemibrain_traced_roi_connections_sorted["roi"] = roi_as_int

# Display the mapping for reference
print("ROI to Integer Mapping:", roi_to_int)

# Verify the updated dataframe
print("Updated ROI Column:", ak_hemibrain_traced_roi_connections_sorted["roi"])


ROI to Integer Mapping: {'EB': 0, 'FB': 1, 'GNG': 2, 'IB': 3, 'NO': 4, 'PB': 5, 'SAD': 6, 'AB(L)': 7, 'AB(R)': 8, 'AL(L)': 9, 'AL(R)': 10, 'BU(L)': 11, 'BU(R)': 12, 'CA(R)': 13, 'LH(R)': 14, 'LO(R)': 15, 'ME(R)': 16, 'aL(L)': 17, 'aL(R)': 18, 'bL(L)': 19, 'bL(R)': 20, 'gL(L)': 21, 'gL(R)': 22, 'AME(R)': 23, 'AOTU(R)': 24, 'ATL(L)': 25, 'ATL(R)': 26, 'AVLP(R)': 27, 'CAN(R)': 28, 'CRE(L)': 29, 'CRE(R)': 30, 'EPA(L)': 31, 'EPA(R)': 32, 'FLA(R)': 33, 'GOR(L)': 34, 'GOR(R)': 35, 'ICL(L)': 36, 'ICL(R)': 37, 'IPS(R)': 38, 'LAL(L)': 39, 'LAL(R)': 40, 'LOP(R)': 41, 'PED(R)': 42, 'PLP(R)': 43, 'PVLP(R)': 44, 'SCL(L)': 45, 'SCL(R)': 46, 'SIP(L)': 47, 'SIP(R)': 48, 'SLP(R)': 49, 'SMP(L)': 50, 'SMP(R)': 51, 'SPS(L)': 52, 'SPS(R)': 53, 'VES(L)': 54, 'VES(R)': 55, 'WED(R)': 56, "a'L(L)": 57, "a'L(R)": 58, "b'L(L)": 59, "b'L(R)": 60, 'NotPrimary': 61}
Updated ROI Column: [49 37 46 ... 51 51 49]


In [2]:
"""VF2-SI """
isos_as_vertices = ar.subgraph_isomorphism(prop_graph, subgraph, 
                                           semantic_check = "and", algorithm_type = "si",
                                           reorder_type = "structural", return_isos_as = "vertices")

print(f"We found {len(isos_as_vertices[0])/len(subgraph)} monos inside of the graph")

We found 8119.0 monos inside of the graph


In [3]:
"""VF2-SI PROBABILITY-MVE"""
isos_as_vertices = ar.subgraph_isomorphism(prop_graph, subgraph, 
                                           semantic_check = "and", algorithm_type = "si",
                                           reorder_type = "probability", return_isos_as = "vertices")

print(f"We found {len(isos_as_vertices[0])/len(subgraph)} monos inside of the graph")

We found 8119.0 monos inside of the graph


In [4]:
"""VF2-PS DEFAULT"""
isos_as_vertices = ar.subgraph_isomorphism(prop_graph, subgraph, 
                                           semantic_check = "and", algorithm_type = "ps", 
                                           reorder_type = None, return_isos_as = "vertices")

print(f"We found {len(isos_as_vertices[0])/len(subgraph)} monos inside of the graph")
#print(isos_as_vertices)

We found 8119.0 monos inside of the graph


In [5]:
"""VF2-PS MVE-REORDERING"""
isos_as_vertices = ar.subgraph_isomorphism(prop_graph, subgraph, 
                                           semantic_check = "and", algorithm_type = "ps", 
                                           reorder_type = "structural", return_isos_as = "vertices")

print(f"We found {len(isos_as_vertices[0])/len(subgraph)} monos inside of the graph")

We found 8119.0 monos inside of the graph


In [6]:
# Print property graph DataFrames
print("Property Graph Edges:")
print(edge_df)
print("\nProperty Graph Nodes:")
print(node_df)

# Print subgraph DataFrames
print("\nSubgraph Edges:")
print(edge_df_h)
print("\nSubgraph Nodes:")
print(node_df_h)



Property Graph Edges:
DataFrame(['src', 'dst', 'rels1', 'rels2'], 3,550,403 rows, 117163299.00 B)

Property Graph Nodes:
DataFrame(['nodes', 'lbls2', 'lbls3'], 21,739 rows, 543475.00 B)

Subgraph Edges:
DataFrame(['src', 'dst', 'rels1', 'rels2'], 3 rows, 99.00 B)

Subgraph Nodes:
DataFrame(['nodes', 'lbls2', 'lbls3'], 3 rows, 75.00 B)
