### Exercise 1: aka AnalyzeGraph

In [1]:
import time
import pygraphblas as grb
import algorithms
from pygraphblas.gviz import draw, draw_graph_op as draw_op
from pygraphblas.gviz import draw_cy

#### Step 1: load a graph from a matrix market file

Nodes are authors of papers and abstracts published at IEEE HPEC through 2019.
Edges connect coauthors of papers.

In [2]:
pathname = './Data/hpec_coauthors.mtx'

with open(pathname, 'r') as f:
    t0 = time.time()
    M = grb.Matrix.from_mm(f, grb.types.UINT64)
    t1 = time.time()
    print("*** Step 1: Elapsed time: %s sec." % (t1 - t0))

*** Step 1: Elapsed time: 0.03556942939758301 sec.


#### Step 2: compute some basic statistics on the graph

In [3]:
t0 = time.time()
num_rows = M.nrows
num_cols = M.ncols
num_vals = M.nvals

degree = M.reduce_vector(grb.types.UINT64.PLUS_MONOID)

max_degree = degree.reduce_int(grb.types.UINT64.MAX_MONOID)
min_degree = degree.reduce_int(grb.types.UINT64.MIN_MONOID)
t1 = time.time()

print("*** Step 2: elapsed time: %s sec." % (t1 - t0))

print("Num nodes: ", num_rows)
print("Num edges: ", num_vals)
print("Avg degree:", float(num_vals)/float(num_rows))
print("Max degree:", max_degree)
print("Min degree:", min_degree)

target_ID = 0
for ix in range(num_rows):
    if degree.get(ix) == max_degree:
        target_ID = ix
        print("Node with max degree:", ix)

*** Step 2: elapsed time: 0.0012514591217041016 sec.
Num nodes:  1747
Num edges:  10072
Avg degree: 5.76531196336577
Max degree: 461
Min degree: 1
Node with max degree: 800


#### Step 3: Run a connected components algorithm and find component with highest degree node

Signature:

`num_components, cc_assignments = connected_components(graph)`

- `graph`: grb.Matrix representing a graph
- `num_components`: integer with the number of connected components found
- `cc_assignments`: grb.Vector containing the component assignment of each node (from 0 to num_components-1)

In [4]:
t0 = time.time()
num_ccs, components = algorithms.connected_components(M)
t1 = time.time()
print("*** Step 3: elapsed time: %s sec." % (t1 - t0))

print("Found", num_ccs, "connected components")
target_component_ID = components.get(target_ID)
print("Node", target_ID, "component ID is", target_component_ID)

*** Step 3: elapsed time: 0.02256011962890625 sec.
Found 246 connected components
Node 800 component ID is 0


#### Step 4: Find all the nodes from the target ID's cluster

In [8]:
t0 = time.time()
cluster_mask = components.select('==', target_component_ID)

# Get the number of elements in the mask and extract the indices
# cluster_indices = list(cluster_mask.indexes)
[cluster_indices, cluster_vals] = cluster_mask.to_lists()
component_size = len(cluster_indices)
t1 = time.time()
print("*** Step 4: elapsed time: %s sec." % (t1 - t0))
print("Number of nodes in target ID's component:", len(cluster_indices))
print("Component members:", cluster_indices)

*** Step 4: elapsed time: 0.0015413761138916016 sec.
Number of nodes in target ID's component: 822
Component members: [0, 1, 7, 9, 10, 11, 12, 13, 19, 23, 24, 31, 33, 34, 38, 39, 41, 43, 44, 48, 49, 50, 51, 52, 55, 61, 65, 67, 69, 72, 75, 78, 80, 81, 83, 84, 87, 88, 89, 90, 91, 93, 95, 96, 100, 101, 102, 104, 106, 107, 110, 111, 113, 114, 117, 120, 121, 122, 123, 125, 126, 128, 130, 133, 136, 143, 149, 150, 151, 153, 154, 160, 161, 163, 166, 169, 170, 175, 176, 177, 178, 180, 181, 182, 183, 186, 187, 189, 192, 193, 194, 196, 198, 203, 204, 205, 206, 209, 210, 211, 214, 216, 217, 221, 223, 225, 226, 228, 229, 234, 235, 236, 243, 244, 247, 248, 249, 255, 256, 259, 260, 261, 262, 263, 266, 269, 270, 273, 276, 277, 278, 281, 283, 284, 285, 286, 288, 289, 291, 295, 296, 297, 300, 305, 306, 308, 309, 310, 312, 313, 315, 317, 318, 321, 323, 324, 326, 327, 328, 330, 332, 334, 335, 338, 342, 343, 347, 348, 349, 356, 357, 362, 363, 364, 371, 374, 375, 376, 377, 391, 393, 399, 400, 410, 411, 413,

#### Step 5: extract and perform PageRank on the target component

Signature:

`pr = pagerank(graph)`

- `graph`: grb.Matrix representing a graph
- `pr`: grb.Vector containing the rank of each node in the graph

In [36]:
t0 = time.time()
sub_graph = M.extract_matrix(cluster_indices, cluster_indices)

pr = algorithms.pagerank(sub_graph)
t1 = time.time()
print("*** Step 5: elapsed time: %s sec." % (t1 - t0))

max_rank = pr.reduce_float(grb.types.FP32.MAX_MONOID)
min_rank = pr.reduce_float(grb.types.FP32.MIN_MONOID)
print("min rank:", min_rank)
print("max rank:", max_rank)

[pr_indices, pr_ranks] = pr.to_lists()
for ix in range(len(pr_indices)):
    if (pr_ranks[ix] == max_rank):
        print("Author with highest rank: %d, (sub_graph index = %d)" % (cluster_indices[pr_indices[ix]], pr_indices[ix]))
    if (pr_ranks[ix] == min_rank):
        print("Author with lowest rank:  %d, (sub_graph index = %d)" % (cluster_indices[pr_indices[ix]], pr_indices[ix]))
    if (cluster_indices[pr_indices[ix]] == target_ID):
        print("Author with higest degree: %d, rank = %s, (sub_graph index = %d)" % (target_ID, pr_ranks[ix], pr_indices[ix]))

*** Step 5: elapsed time: 0.007628679275512695 sec.
min rank: 0.00019327869813423604
max rank: 0.009313824586570263
Author with higest degree: 800, rank = 0.0075648571364581585, (sub_graph index = 357)
Author with lowest rank:  1094, (sub_graph index = 513)
Author with highest rank: 1424, (sub_graph index = 676)


In [29]:
style=[{'selector': 'node',
               'style': {'background-color': 'blue',
                         'label': 'data(id)',
                         'width': 2,
                         'height': 2,
                         'shape': 'circle',
                         'color': '#000000',
                         'font-weight': 400,
                         'text-halign': 'right', 
                         'text-valign': 'bottom',
                         'font-size': 4}},
              {'selector': 'edge',
               'style': {'width': 0.2,
                         'opacity': 1,
                         #'label': 'data(id)',
                         'line-color': 'green',
                         'font-size': 4}}]

draw_cy(sub_graph, visual_style=style)

Cytoscape(data={'directed': True, 'elements': {'nodes': [{'data': {'id': '0'}}, {'data': {'id': '25'}}, {'data…