In [1]:
import arkouda as ak
import arachne as ar

    _         _                   _       
   / \   _ __| | _____  _   _  __| | __ _ 
  / _ \ | '__| |/ / _ \| | | |/ _` |/ _` |
 / ___ \| |  |   < (_) | |_| | (_| | (_| |
/_/   \_\_|  |_|\_\___/ \__,_|\__,_|\__,_|
                                          

Client Version: v2023.11.15


In [2]:
# NOTE: Make sure to change the server to the appropriate name.
ak.connect("n51", 5555)

connected to arkouda server tcp://*:5555


In [3]:
n = 1_000_000
m = 1_000_000_000
k = 2

In [4]:
src_array = ak.randint(0, n, m, dtype=ak.dtype('int64'), seed=2)
dst_array = ak.randint(0, n, m, dtype=ak.dtype('int64'), seed=4)
int_array = ak.randint(-1, k, m, dtype=ak.dtype('int64'), seed=6)
uint_array = ak.randint(0, k, m, dtype=ak.dtype('uint64'), seed=8)
real_array = ak.randint(0, k, m, dtype=ak.dtype('float64'), seed=10)
bool_array = ak.randint(0, k, m, dtype=ak.dtype('bool'), seed=12)
strings_array = ak.random_strings_uniform(0, k, m, characters="abcdefghijklmonpqrstuvwxyz", seed=14)

In [5]:
prop_graph = ar.PropGraph()

In [6]:
test_edge_dict = {
    "src":src_array,
    "dst":dst_array,
    "data1":int_array,
    "data2":uint_array,
    "data3":real_array,
    "data4":bool_array,
    "data5":strings_array
}
test_edge_df = ak.DataFrame(test_edge_dict)

In [7]:
prop_graph.load_edge_attributes(test_edge_df, source_column="src", destination_column="dst", relationship_columns=["data5", "data1"])

In [8]:
prop_graph.get_edge_attributes()

Unnamed: 0,src,dst,data1,data2,data3,data4,data5
0,0,266,1,0,0.640015,True,0
1,0,1013,-1,0,1.057109,True,0
2,0,2806,1,0,0.569387,False,9
3,0,3646,0,0,1.829369,False,4
4,0,3686,1,1,1.348952,False,0
...,...,...,...,...,...,...,...
999499340,999999,994687,1,1,1.390592,True,0
999499341,999999,995598,0,0,1.099502,True,4
999499342,999999,996090,1,0,0.252629,False,0
999499343,999999,996867,-1,0,0.460681,True,6


In [9]:
prop_graph.get_edge_relationships()

Unnamed: 0,src,dst,data5,data1
0,0,266,0,1
1,0,1013,0,-1
2,0,2806,9,1
3,0,3646,4,0
4,0,3686,0,1
...,...,...,...,...
999499340,999999,994687,0,1
999499341,999999,995598,4,0
999499342,999999,996090,0,1
999499343,999999,996867,6,-1


In [10]:
m = len(prop_graph)
k = 2

In [11]:
int_array = ak.randint(-1, k, m, dtype=ak.dtype('int64'), seed=6)
uint_array = ak.randint(0, k, m, dtype=ak.dtype('uint64'), seed=8)
real_array = ak.randint(0, k, m, dtype=ak.dtype('float64'), seed=10)
bool_array = ak.randint(0, k, m, dtype=ak.dtype('bool'), seed=12)
strings_array = ak.random_strings_uniform(0, k, m, characters="abcdefghijklmonpqrstuvwxyz", seed=14)

In [12]:
test_node_dict = {
    "nodes":prop_graph.nodes(),
    "data1":int_array,
    "data2":uint_array,
    "data3":real_array,
    "data4":bool_array,
    "data5":strings_array
}
test_node_df = ak.DataFrame(test_node_dict)

In [13]:
prop_graph.load_node_attributes(test_node_df, node_column="nodes", label_columns=["data5", "data2"])

In [14]:
prop_graph.get_node_attributes()

Unnamed: 0,nodes,data1,data2,data3,data4,data5
0,0,0,0,0.638154,False,2
1,1,0,0,1.366654,False,12
2,2,0,1,0.887981,True,14
3,3,-1,0,1.219599,False,0
4,4,1,0,1.191131,True,17
...,...,...,...,...,...,...
999995,999995,1,0,1.663987,False,0
999996,999996,-1,0,0.781876,True,9
999997,999997,1,0,1.614944,False,0
999998,999998,1,0,0.226886,True,0


In [15]:
prop_graph.get_node_labels()

Unnamed: 0,nodes,data5,data2
0,0,2,0
1,1,12,0
2,2,14,1
3,3,0,0
4,4,17,0
...,...,...,...
999995,999995,0,0
999996,999996,9,0
999997,999997,0,0
999998,999998,0,0


In [17]:
prop_graph.label_mapper["data5"]

array(['', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z'])

In [18]:
def node_filter(node_attributes):
    return node_attributes["data2"] == 0

In [19]:
def edge_filter(edge_attributes):
    return edge_attributes["data1"] > -1

In [20]:
subgraph_nodes = prop_graph.subgraph_view(filter_node=node_filter)

In [21]:
subgraph_edges = prop_graph.subgraph_view(filter_edge=edge_filter)

In [22]:
subgraph_together = prop_graph.subgraph_view(filter_node=node_filter, filter_edge=edge_filter)

In [26]:
print(f"Subgraph generated with edge size: {subgraph_nodes.size()}")

Subgraph generated with edge size: 750442115


In [27]:
print(f"Subgraph generated with edge size: {subgraph_edges.size()}")

Subgraph generated with edge size: 666325452


In [28]:
print(f"Subgraph generated with edge size: {subgraph_together.size()}")

Subgraph generated with edge size: 167128950
