-
Notifications
You must be signed in to change notification settings - Fork 0
/
a1.py
676 lines (547 loc) · 24.1 KB
/
a1.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
# coding: utf-8
# # CS579: Assignment 1
#
# In this assignment, we'll implement community detection and link prediction algorithms using Facebook "like" data.
#
# The file `edges.txt.gz` indicates like relationships between facebook users. This was collected using snowball sampling: beginning with the user "Bill Gates", I crawled all the people he "likes", then, for each newly discovered user, I crawled all the people they liked.
#
# We'll cluster the resulting graph into communities, as well as recommend friends for Bill Gates.
#
# Complete the **15** methods below that are indicated by `TODO`. I've provided some sample output to help guide your implementation.
# You should not use any imports not listed here:
from collections import deque
from itertools import combinations
import networkx as nx
# import urllib.request
## Community Detection
def example_graph():
"""
Create the example graph from class. Used for testing.
Do not modify.
"""
g = nx.Graph()
g.add_edges_from(
[('A', 'B'), ('A', 'C'), ('B', 'C'), ('B', 'D'), ('D', 'E'), ('D', 'F'), ('D', 'G'), ('E', 'F'), ('G', 'F')])
return g
def bfs(graph, root, max_depth):
"""
Perform breadth-first search to compute the shortest paths from a root node to all
other nodes in the graph. To reduce running time, the max_depth parameter ends
the search after the specified depth.
E.g., if max_depth=2, only paths of length 2 or less will be considered.
This means that nodes greather than max_depth distance from the root will not
appear in the result.
You may use these two classes to help with this implementation:
https://docs.python.org/3.5/library/collections.html#collections.defaultdict
https://docs.python.org/3.5/library/collections.html#collections.deque
Params:
graph.......A networkx Graph
root........The root node in the search graph (a string). We are computing
shortest paths from this node to all others.
max_depth...An integer representing the maximum depth to search.
Returns:
node2distances...dict from each node to the length of the shortest path from
the root node
node2num_paths...dict from each node to the number of shortest paths from the
root node to this node.
node2parents.....dict from each node to the list of its parents in the search
tree
In the doctests below, we first try with max_depth=5, then max_depth=2.
>>> node2distances, node2num_paths, node2parents = bfs(example_graph(), 'E', 5)
>>> sorted(node2distances.items())
[('A', 3), ('B', 2), ('C', 3), ('D', 1), ('E', 0), ('F', 1), ('G', 2)]
>>> sorted(node2num_paths.items())
[('A', 1), ('B', 1), ('C', 1), ('D', 1), ('E', 1), ('F', 1), ('G', 2)]
>>> sorted((node, sorted(parents)) for node, parents in node2parents.items())
[('A', ['B']), ('B', ['D']), ('C', ['B']), ('D', ['E']), ('F', ['E']), ('G', ['D', 'F'])]
>>> node2distances, node2num_paths, node2parents = bfs(example_graph(), 'E', 2)
>>> sorted(node2distances.items())
[('B', 2), ('D', 1), ('E', 0), ('F', 1), ('G', 2)]
>>> sorted(node2num_paths.items())
[('B', 1), ('D', 1), ('E', 1), ('F', 1), ('G', 2)]
>>> sorted((node, sorted(parents)) for node, parents in node2parents.items())
[('B', ['D']), ('D', ['E']), ('F', ['E']), ('G', ['D', 'F'])]
"""
###
q = deque()
q.append(('root_parent', root))
node2parents = dict()
seen = set()
path_cost = 0
node2distances = dict()
node2num_paths = dict()
prev_parent = 'root_parent'
while len(q) > 0 and path_cost <= max_depth:
pair = q.popleft()
n = pair[1]
parent = pair[0]
if n not in seen:
if parent != prev_parent:
path_cost = path_cost + 1
prev_parent = parent
seen.add(n)
if path_cost <= max_depth:
node2distances[n] = path_cost
node2num_paths[n] = 1
node2parents[n] = [parent]
else:
all_grand_parents = node2parents[parent]
all_parents = node2parents[n]
should_increase = True
for all_p in all_parents:
try:
if lists_overlap(node2parents[all_p], all_grand_parents):
should_increase = False
break
except:
pass
temp_path = (path_cost, path_cost + 1)[should_increase]
if node2distances[n] == temp_path and parent not in all_parents:
node2num_paths[n] = node2num_paths[n] + 1
all_parents.append(parent)
for node_neighbors in graph.neighbors(n):
if node_neighbors not in seen:
q.append((n, node_neighbors))
node2parents_copy = node2parents.copy()
for l in node2parents.keys():
if l not in node2distances:
node2parents_copy.pop(l)
node2parents_copy.pop(root)
return node2distances, node2num_paths, node2parents_copy
def lists_overlap(a, b):
return bool(set(a) & set(b))
def complexity_of_bfs(V, E, K):
"""
If V is the number of vertices in a graph, E is the number of
edges, and K is the max_depth of our approximate breadth-first
search algorithm, then what is the *worst-case* run-time of
this algorithm? As usual in complexity analysis, you can ignore
any constant factors. E.g., if you think the answer is 2V * E + 3log(K),
you would return V * E + math.log(K)
>>> v = complexity_of_bfs(13, 23, 7)
>>> type(v) == int or type(v) == float
True
"""
'''
O(1) operation for each vertex - total O(V) i.e. popping element
O(1) operation for each Edge - total O(V) i.e. adding element
Summing these operation we have O(V) + O(E)
for sparse graphs, where E = O(V), we have overall O(V).
While for dense graphs where we have E=O(V^2), we get overall O(E).
And O(V+E)=O(max(V,E)).
'''
if V > E:
return V
else:
return E
# return (E, V)[V > E]
def bottom_up(root, node2distances, node2num_paths, node2parents):
"""
Compute the final step of the Girvan-Newman algorithm.
See p 352 From your text:
https://github.com/iit-cs579/main/blob/master/read/lru-10.pdf
The third and final step is to calculate for each edge e the sum
over all nodes Y of the fraction of shortest paths from the root
X to Y that go through e. This calculation involves computing this
sum for both nodes and edges, from the bottom. Each node other
than the root is given a credit of 1, representing the shortest
path to that node. This credit may be divided among nodes and
edges above, since there could be several different shortest paths
to the node. The rules for the calculation are as follows: ...
Params:
root.............The root node in the search graph (a string). We are computing
shortest paths from this node to all others.
node2distances...dict from each node to the length of the shortest path from
the root node
node2num_paths...dict from each node to the number of shortest paths from the
root node that pass through this node.
node2parents.....dict from each node to the list of its parents in the search
tree
Returns:
A dict mapping edges to credit value. Each key is a tuple of two strings
representing an edge (e.g., ('A', 'B')). Make sure each of these tuples
are sorted alphabetically (so, it's ('A', 'B'), not ('B', 'A')).
Any edges excluded from the results in bfs should also be exluded here.
>>> node2distances, node2num_paths, node2parents = bfs(example_graph(), 'E', 5)
>>> result = bottom_up('E', node2distances, node2num_paths, node2parents)
>>> sorted(result.items())
[(('A', 'B'), 1.0), (('B', 'C'), 1.0), (('B', 'D'), 3.0), (('D', 'E'), 4.5), (('D', 'G'), 0.5), (('E', 'F'), 1.5), (('F', 'G'), 0.5)]
"""
bottom_up_dict = dict()
for node, parents in node2parents.items():
my_node_children_list = find_children(node, node2parents)
my_node_parents = node2parents[node]
my_parent_proportions = credit_proportion_for_parents(my_node_parents, node2num_paths)
my_credit = recurse_on_child(my_node_children_list, bottom_up_dict, node2parents, node2num_paths, node)
for p in my_node_parents:
my_node_parent_edge = tuple((node, p))
rounded_credit = round(my_parent_proportions[p] * my_credit, 1)
bottom_up_dict[my_node_parent_edge] = rounded_credit
return bottom_up_dict
def recurse_on_child(child_list, bottom_up_dict, node2parents, node2num_paths, child):
# type: (list, dict, dict, dict, node) -> int
my_credit = 1
if len(child_list) != 0:
for c in child_list:
child_edge = tuple((child, c))
if bottom_up_dict.get(child_edge, -1) == -1:
child_node_children_list = find_children(c, node2parents)
child_node_parents = node2parents[c]
child_parent_proportions = credit_proportion_for_parents(child_node_parents, node2num_paths)
child_credit = child_parent_proportions[child] * recurse_on_child(child_node_children_list,
bottom_up_dict,
node2parents, node2num_paths, c)
rounded_credit = round(child_credit, 1)
bottom_up_dict[child_edge] = rounded_credit
my_credit += rounded_credit
else:
my_credit = my_credit + bottom_up_dict[child_edge]
return round(my_credit, 1)
def find_children(node, node2parents):
list_of_children = []
for find_node, find_parent in node2parents.items():
if node in find_parent:
list_of_children.append(find_node)
return list_of_children
def credit_proportion_for_parents(parents, node2num_paths):
list_of_shortest_path = []
parent_proportion_dict = dict()
for p in parents:
list_of_shortest_path.append(node2num_paths[p])
for p in parents:
p_proportion = node2num_paths[p] / float(sum(list_of_shortest_path))
parent_proportion_dict[p] = p_proportion
return parent_proportion_dict
def approximate_betweenness(graph, max_depth):
"""
Compute the approximate betweenness of each edge, using max_depth to reduce
computation time in breadth-first search.
You should call the bfs and bottom_up functions defined above for each node
in the graph, and sum together the results. Be sure to divide by 2 at the
end to get the final betweenness.
Params:
graph.......A networkx Graph
max_depth...An integer representing the maximum depth to search.
Returns:
A dict mapping edges to betweenness. Each key is a tuple of two strings
representing an edge (e.g., ('A', 'B')). Make sure each of these tuples
are sorted alphabetically (so, it's ('A', 'B'), not ('B', 'A')).
>>> sorted(approximate_betweenness(example_graph(), 2).items())
[(('A', 'B'), 2.0), (('A', 'C'), 1.0), (('B', 'C'), 2.0), (('B', 'D'), 6.0), (('D', 'E'), 2.5), (('D', 'F'), 2.0), (('D', 'G'), 2.5), (('E', 'F'), 1.5), (('F', 'G'), 1.5)]
"""
approx_betweenness = dict()
for node in graph.nodes():
node2distances, node2num_paths, node2parents = bfs(graph, node, max_depth)
temp_dict = bottom_up(node, node2distances, node2num_paths, node2parents)
for edge, betweenness in temp_dict.items():
current_bet = approx_betweenness.get(edge, 0)
approx_betweenness[edge] = round(current_bet, 1) + round(betweenness, 1)
for edge, betweenness in approx_betweenness.items():
approx_betweenness[edge] = round(betweenness / 2, 1)
return approx_betweenness
def get_components(graph):
"""
A helper function you may use below.
Returns the list of all connected components in the given graph.
"""
return [c for c in nx.connected_component_subgraphs(graph)]
def partition_girvan_newman(graph, max_depth):
"""
Use your approximate_betweenness implementation to partition a graph.
Unlike in class, here you will not implement this recursively. Instead,
just remove edges until more than one component is created, then return
those components.
That is, compute the approximate betweenness of all edges, and remove
them until multiple components are created.
You only need to compute the betweenness once.
If there are ties in edge betweenness, break by edge name (e.g.,
(('A', 'B'), 1.0) comes before (('B', 'C'), 1.0)).
Note: the original graph variable should not be modified. Instead,
make a copy of the original graph prior to removing edges.
See the Graph.copy method https://networkx.github.io/documentation/stable/reference/classes/generated/networkx.Graph.copy.html
Params:
graph.......A networkx Graph
max_depth...An integer representing the maximum depth to search.
Returns:
A list of networkx Graph objects, one per partition.
>>> components = partition_girvan_newman(example_graph(), 5)
>>> components = sorted(components, key=lambda x: sorted(x.nodes())[0])
>>> sorted(components[0].nodes())
['A', 'B', 'C']
>>> sorted(components[1].nodes())
['D', 'E', 'F', 'G']
"""
components = []
graph_copy = graph.copy()
while not len(components) > 1:
betweenness = approximate_betweenness(graph_copy, max_depth)
edge_to_remove = max(betweenness, key=betweenness.get)
graph_copy.remove_edge(edge_to_remove[0], edge_to_remove[1])
components = get_components(graph_copy)
return components
def get_subgraph(graph, min_degree):
"""Return a subgraph containing nodes whose degree is
greater than or equal to min_degree.
We'll use this in the main method to prune the original graph.
Params:
graph........a networkx graph
min_degree...degree threshold
Returns:
a networkx graph, filtered as defined above.
>>> subgraph = get_subgraph(example_graph(), 3)
>>> sorted(subgraph.nodes())
['B', 'D', 'F']
>>> len(subgraph.edges())
2
"""
graph_copy = graph.copy()
for node in graph.nodes():
if graph.degree(node) < min_degree:
graph_copy.remove_node(node)
return graph_copy
""""
Compute the normalized cut for each discovered cluster.
I've broken this down into the three next methods.
"""
def volume(nodes, graph):
"""
Compute the volume for a list of nodes, which
is the number of edges in `graph` with at least one end in
nodes.
Params:
nodes...a list of strings for the nodes to compute the volume of.
graph...a networkx graph
>>> volume(['A', 'B', 'C'], example_graph())
4
"""
count = 0
for e in graph.edges():
if e[0] in nodes or e[1] in nodes:
count += 1
return count
def cut(S, T, graph):
"""
Compute the cut-set of the cut (S,T), which is
the set of edges that have one endpoint in S and
the other in T.
Params:
S.......set of nodes in first subset
T.......set of nodes in second subset
graph...networkx graph
Returns:
An int representing the cut-set.
>>> cut(['A', 'B', 'C'], ['D', 'E', 'F', 'G'], example_graph())
1
"""
count = 0
for e in graph.edges():
if (e[0] in S and e[1] in T) or (e[0] in T and e[1] in S):
count = count + 1
return count
def norm_cut(S, T, graph):
"""
The normalized cut value for the cut S/T. (See lec06.)
Params:
S.......set of nodes in first subset
T.......set of nodes in second subset
graph...networkx graph
Returns:
An float representing the normalized cut value
>>> norm_cut(['A', 'B', 'C'], ['D', 'E', 'F', 'G'], example_graph())
0.41666666666666663
"""
cut_value = cut(S, T, graph)
vol_s = volume(S, graph)
vol_t = volume(T, graph)
normalized_cut = (cut_value / float(vol_s)) + (cut_value / float(vol_t))
return normalized_cut
def brute_force_norm_cut(graph, max_size):
"""
Enumerate over all possible cuts of the graph, up to max_size, and compute the norm cut score.
Params:
graph......graph to be partitioned
max_size...maximum number of edges to consider for each cut.
E.graph_copy, if max_size=2, consider removing edge sets
of size 1 or 2 edges.
Returns:
(unsorted) list of (score, edge_list) tuples, where
score is the norm_cut score for each cut, and edge_list
is the list of edges (source, target) for each cut.
Note: only return entries if removing the edges results in exactly
two connected components.
You may find itertools.combinations useful here.
>>> r = brute_force_norm_cut(example_graph(), 1)
>>> len(r)
1
>>> r
[(0.41666666666666663, [('B', 'D')])]
>>> r = brute_force_norm_cut(example_graph(), 2)
>>> len(r)
14
>>> sorted(r)[0]
(0.41666666666666663, [('A', 'B'), ('B', 'D')])
"""
max_sub_components = 2
graph_copy = graph.copy()
list_of_cut_score_ebunch = []
for i in range(0, max_size):
for ebunch in combinations(graph.edges(), i + 1):
ebunch = list(ebunch)
graph_copy.remove_edges_from(ebunch)
sub_components = get_components(graph_copy)
if len(sub_components) == max_sub_components:
norm_cut_value = norm_cut(sub_components[0], sub_components[1], graph)
list_of_cut_score_ebunch.append((norm_cut_value, ebunch))
graph_copy.add_edges_from(ebunch)
return list_of_cut_score_ebunch
def score_max_depths(graph, max_depths):
"""
In order to assess the quality of the approximate partitioning method
we've developed, we will run it with different values for max_depth
and see how it affects the norm_cut score of the resulting partitions.
Recall that smaller norm_cut scores correspond to better partitions.
Params:
graph........a networkx Graph
max_depths...a list of ints for the max_depth values to be passed
to calls to partition_girvan_newman
Returns:
A list of (int, float) tuples representing the max_depth and the
norm_cut value obtained by the partitions returned by
partition_girvan_newman. See Log.txt for an example.
"""
list_of_depth_cut_score = []
for depth in max_depths:
girvan_newman_partition = partition_girvan_newman(graph, depth)
list_of_depth_cut_score.append((depth, norm_cut(girvan_newman_partition[0], girvan_newman_partition[1], graph)))
return list_of_depth_cut_score
## Link prediction
# Next, we'll consider the link prediction problem. In particular,
# we will remove 5 of the accounts that Bill Gates likes and
# compute our accuracy at recovering those links.
def make_training_graph(graph, test_node, n):
"""
To make a training graph, we need to remove n edges from the graph.
As in lecture, we'll assume there is a test_node for which we will
remove some edges. Remove the edges to the first n neighbors of
test_node, where the neighbors are sorted alphabetically.
E.g., if 'A' has neighbors 'B' and 'C', and n=1, then the edge
('A', 'B') will be removed.
Be sure to *copy* the input graph prior to removing edges.
Params:
graph.......a networkx Graph
test_node...a string representing one node in the graph whose
edges will be removed.
n...........the number of edges to remove.
Returns:
A *new* networkx Graph with n edges removed.
In this doctest, we remove edges for two friends of D:
>>> g = example_graph()
>>> sorted(g.neighbors('D'))
['B', 'E', 'F', 'G']
>>> train_graph = make_training_graph(g, 'D', 2)
>>> sorted(train_graph.neighbors('D'))
['F', 'G']
"""
graph_copy = graph.copy()
count = 0
for node_neighbor in sorted(graph.neighbors(test_node)):
if count < n:
graph_copy.remove_edge(node_neighbor, test_node)
count = count + 1
return graph_copy
def jaccard(graph, node, k):
"""
Compute the k highest scoring edges to add to this node based on
the Jaccard similarity measure.
Note that we don't return scores for edges that already appear in the graph.
Params:
graph....a networkx graph
node.....a node in the graph (a string) to recommend links for.
k........the number of links to recommend.
Returns:
A list of tuples in descending order of score representing the
recommended new edges. Ties are broken by
alphabetical order of the terminal node in the edge.
In this example below, we remove edges (D, B) and (D, E) from the
example graph. The top two edges to add according to Jaccard are
(D, E), with score 0.5, and (D, A), with score 0. (Note that all the
other remaining edges have score 0, but 'A' is first alphabetically.)
>>> g = example_graph()
>>> train_graph = make_training_graph(g, 'D', 2)
>>> jaccard(train_graph, 'D', 2)
[(('D', 'E'), 0.5), (('D', 'A'), 0.0)]
"""
graph_neighbors = set(graph.neighbors(node))
list_nodes = []
for a in graph.nodes():
if a not in graph_neighbors and a != node:
list_nodes.append((node, a))
jc_score = nx.jaccard_coefficient(graph, list_nodes)
# for u, v, p in jc_score:
# '(%s, %s) -> %.8f' % (u, v, p)
return list(map(lambda x: ((x[0], x[1]), x[2]), sorted(jc_score, key=lambda x: float(x[2]), reverse=True)[:k]))
def evaluate(predicted_edges, graph):
"""
Return the fraction of the predicted edges that exist in the graph.
Args:
predicted_edges...a list of edges (tuples) that are predicted to
exist in this graph
graph.............a networkx Graph
Returns:
The fraction of edges in predicted_edges that exist in the graph.
In this doctest, the edge ('D', 'E') appears in the example_graph,
but ('D', 'A') does not, so 1/2 = 0.5
>>> evaluate([('D', 'E'), ('D', 'A')], example_graph())
0.5
"""
count = 0
for a in predicted_edges:
if a in graph.edges():
count = count + 1
prob = count / float(len(predicted_edges))
return prob
"""
Next, we'll download a real dataset to see how our algorithm performs.
"""
def download_data():
"""
Download the data. Done for you.
"""
urllib.request.urlretrieve('http://cs.iit.edu/~culotta/cs579/a1/edges.txt.gz', 'edges.txt.gz')
def read_graph():
""" Read 'edges.txt.gz' into a networkx **undirected** graph.
Done for you.
Returns:
A networkx undirected graph.
"""
return nx.read_edgelist('edges.txt.gz', delimiter='\t')
def main():
"""
FYI: This takes ~10-15 seconds to run on my laptop.
"""
download_data()
graph = read_graph()
print('graph has %d nodes and %d edges' %
(graph.order(), graph.number_of_edges()))
subgraph = get_subgraph(graph, 2)
print('subgraph has %d nodes and %d edges' %
(subgraph.order(), subgraph.number_of_edges()))
print('norm_cut scores by max_depth:')
print(score_max_depths(subgraph, range(1, 5)))
clusters = partition_girvan_newman(subgraph, 3)
print('%d clusters' % len(clusters))
print('first partition: cluster 1 has %d nodes and cluster 2 has %d nodes' %
(clusters[0].order(), clusters[1].order()))
print('smaller cluster nodes:')
print(sorted(clusters, key=lambda x: x.order())[0].nodes())
test_node = 'Bill Gates'
train_graph = make_training_graph(subgraph, test_node, 5)
print('train_graph has %d nodes and %d edges' %
(train_graph.order(), train_graph.number_of_edges()))
jaccard_scores = jaccard(train_graph, test_node, 5)
print('\ntop jaccard scores for Bill Gates:')
print(jaccard_scores)
print('jaccard accuracy=%g' %
evaluate([x[0] for x in jaccard_scores], subgraph))
if __name__ == '__main__':
main()