### Exercise 1: aka AnalyzeGraph

In [1]:
import time
import pygraphblas as grb
import algorithms
from pygraphblas.gviz import draw, draw_graph_op as draw_op
from pygraphblas.gviz import draw_cy

#### Step 1: load a graph from a matrix market file

Nodes are authors of papers and abstracts published at IEEE HPEC through 2019.
Edges connect coauthors of papers.

In [2]:
pathname = './Data/hpec_coauthors.mtx'

with open(pathname, 'r') as f:
    t0 = time.time()
    M = grb.Matrix.from_mm(f, grb.UINT64)
    t1 = time.time()
    print("*** Step 1: Elapsed time: %s sec." % (t1 - t0))

*** Step 1: Elapsed time: 0.035506486892700195 sec.


#### Step 2: compute some basic statistics on the graph

In [3]:
t0 = time.time()
num_rows = M.nrows
num_cols = M.ncols
num_vals = M.nvals

degree = M.reduce_vector(grb.UINT64.PLUS_MONOID)

max_degree = degree.reduce_int(grb.UINT64.MAX_MONOID)
min_degree = degree.reduce_int(grb.UINT64.MIN_MONOID)
t1 = time.time()

print("*** Step 2: elapsed time: %s sec." % (t1 - t0))

print("Num nodes: ", num_rows)
print("Num edges: ", num_vals)
print("Avg degree:", float(num_vals)/float(num_rows))
print("Max degree:", max_degree)
print("Min degree:", min_degree)

target_ID = 0
for ix in range(num_rows):
    if degree.get(ix) == max_degree:
        target_ID = ix
        print("Node with max degree:", ix)

*** Step 2: elapsed time: 0.0018761157989501953 sec.
Num nodes:  1747
Num edges:  10072
Avg degree: 5.76531196336577
Max degree: 461
Min degree: 1
Node with max degree: 800


#### Step 3: Extract the three hop neighborhood around the node with highest degree

Signature:

`neighbors = algorithms.neighborhood(graph, src, num_hops)`

- `graph`: grb.Matrix representing a graph
- `src`: index of the node to start with
- `num_hops`: number of hops to traverse
- `neighbors`: grb.Vector with True marking all vertices reached in `num_hops`.

In [4]:
t0 = time.time()
neighbors = algorithms.neighborhood(M, target_ID, 2)
t1 = time.time()
print("*** Step 3: elapsed time: %s sec." % (t1 - t0))

#print("Found", num_ccs, "connected components")
#target_component_ID = components.get(target_ID)
print("Node", target_ID, "2-hop neigborhood has", neighbors.nvals, "nodes")

*** Step 3: elapsed time: 0.0014147758483886719 sec.
Node 800 2-hop neigborhood has 436 nodes


#### Step 4: Extract the subgraph containing the neighborhood

In [5]:
t0 = time.time()
[nb_indices, nb_vals] = neighbors.to_lists()
sub_graph = M.extract_matrix(nb_indices, nb_indices)
t1 = time.time()
print("*** Step 4: elapsed time: %s sec." % (t1 - t0))
print("Number of nodes in target ID's neighborhood:", len(nb_indices))
print("Number of edges in the subgraph:", sub_graph.nvals)
print("Component members:", nb_indices)

*** Step 4: elapsed time: 0.0013189315795898438 sec.
Number of nodes in target ID's neighborhood: 436
Number of edges in the subgraph: 4502
Component members: [0, 7, 9, 10, 13, 24, 31, 38, 41, 44, 48, 50, 51, 52, 55, 61, 80, 83, 84, 87, 91, 95, 96, 100, 106, 110, 111, 113, 114, 117, 120, 121, 128, 130, 136, 143, 149, 151, 153, 154, 161, 170, 175, 180, 181, 182, 192, 193, 196, 203, 204, 205, 209, 214, 221, 225, 226, 229, 236, 244, 248, 256, 259, 260, 261, 262, 269, 270, 276, 278, 281, 284, 285, 291, 295, 296, 305, 309, 312, 318, 321, 330, 348, 357, 362, 363, 371, 375, 376, 399, 411, 413, 422, 426, 429, 431, 434, 440, 443, 444, 447, 448, 453, 459, 460, 461, 463, 466, 468, 470, 471, 475, 479, 482, 486, 494, 502, 508, 516, 519, 525, 536, 538, 540, 545, 547, 548, 549, 561, 565, 567, 573, 577, 578, 586, 587, 596, 600, 610, 611, 615, 616, 618, 623, 626, 629, 631, 633, 638, 639, 642, 644, 646, 650, 655, 658, 659, 660, 664, 665, 666, 673, 675, 676, 682, 685, 686, 689, 701, 707, 711, 712, 714, 7

#### Step 5: Perform PageRank on the subgraph

Signature:

`pr = pagerank(graph)`

- `graph`: grb.Matrix representing a graph
- `pr`: grb.Vector containing the rank of each node in the graph

In [6]:
t0 = time.time()
pr = algorithms.pagerank(sub_graph)
t1 = time.time()
print("*** Step 5: elapsed time: %s sec." % (t1 - t0))

max_rank = pr.reduce_float(grb.FP32.MAX_MONOID)
min_rank = pr.reduce_float(grb.FP32.MIN_MONOID)

[pr_indices, pr_ranks] = pr.to_lists()
for ix in range(len(pr_indices)):
    if (pr_ranks[ix] == max_rank):
        print("Author with highest rank:   %4d, rank = %f, (sub_graph index = %d)" %
              (nb_indices[pr_indices[ix]], max_rank, pr_indices[ix]))
    if (pr_ranks[ix] == min_rank):
        print("Author with lowest rank:    %4d, rank = %f, (sub_graph index = %d)" %
              (nb_indices[pr_indices[ix]], min_rank, pr_indices[ix]))
    if (nb_indices[pr_indices[ix]] == target_ID):
        print("Author with highest degree: %4d, rank = %f, (sub_graph index = %d)" %
              (target_ID, pr_ranks[ix], pr_indices[ix]))

*** Step 5: elapsed time: 0.0052258968353271484 sec.
Author with highest degree:  800, rank = 0.013600, (sub_graph index = 196)
Author with lowest rank:    1094, rank = 0.000364, (sub_graph index = 270)
Author with highest rank:   1169, rank = 0.013663, (sub_graph index = 288)


In [7]:
style=[{'selector': 'node',
               'style': {'background-color': 'blue',
                         'label': 'data(id)',
                         'width': 2,
                         'height': 2,
                         'shape': 'circle',
                         'color': '#000000',
                         'font-weight': 400,
                         'text-halign': 'right', 
                         'text-valign': 'bottom',
                         'font-size': 4}},
              {'selector': 'edge',
               'style': {'width': 0.2,
                         'opacity': 1,
                         #'label': 'data(id)',
                         'line-color': 'green',
                         'font-size': 4}}]

draw_cy(sub_graph, visual_style=style)

Cytoscape(data={'directed': True, 'elements': {'nodes': [{'data': {'id': '0'}}, {'data': {'id': '15'}}, {'data…