# cuGraph Cheat Sheets sample code

(c) 2020 NVIDIA, Blazing SQL

Distributed under Apache License 2.0

### Imports

In [1]:
import cudf
import cugraph
import json 
import pandas as pd

### Create graph dataframes

We use the Star Wars social network graph prepared by Evelina Gabasova: Gabasova, E. (2016). Star Wars social network. DOI: https://doi.org/10.5281/zenodo.1411479.

In [2]:
interactions = json.loads(open('data/starwars-full-interactions.json', 'r').read())

edges = cudf.from_pandas(pd.DataFrame(interactions['links']))
nodes = cudf.from_pandas(pd.DataFrame(interactions['nodes'])).drop('colour', axis=1).reset_index()

g = cugraph.Graph()
g.from_cudf_edgelist(
    edges
    , source='source'
    , destination='target'
    , edge_attr='value'
    , renumber=True
)

---

# Graph link analysis

---

#### cugraph.link_analysis.hits.hits()

In [3]:
cugraph.hits(g, max_iter=50)

Unnamed: 0,hubs,authorities,vertex
0,0.005909,0.005909,54
1,0.001112,0.001112,53
2,0.002419,0.002419,69
3,0.004100,0.004100,73
4,0.016282,0.016282,20
...,...,...,...
104,0.015237,0.015237,23
105,0.000671,0.000671,107
106,0.004151,0.004151,22
107,0.001111,0.001111,108


#### cugraph.link_analysis.pagerank.pagerank()

In [4]:
cugraph.pagerank(g, alpha = 0.85, max_iter = 500, tol = 1.0e-05)

Unnamed: 0,pagerank,vertex
0,0.002328,54
1,0.001727,53
2,0.005251,69
3,0.009282,73
4,0.008331,20
...,...,...
104,0.004216,23
105,0.005770,107
106,0.004360,22
107,0.005802,108


---

# Graph link prediction

---

#### cugraph.link_prediction.jaccard.jaccard()

In [5]:
pairs = g.get_two_hop_neighbors()
cugraph.jaccard(g, pairs)

Unnamed: 0,jaccard_coeff,source,destination
0,0.166667,101,27
1,1.000000,101,99
2,0.133333,101,91
3,0.250000,101,96
4,0.200000,25,19
...,...,...,...
4845,0.090909,19,21
4846,0.136364,19,8
4847,0.125000,19,11
4848,0.187500,19,33


#### cugraph.link_prediction.jaccard.jaccard_coefficient()

In [6]:
cugraph.jaccard_coefficient(g)

Unnamed: 0,jaccard_coeff,source,destination
0,0.230769,73,64
1,0.250000,73,66
2,0.133333,73,85
3,0.444444,73,72
4,0.222222,73,71
...,...,...,...
791,0.275862,70,91
792,0.153846,70,96
793,0.083333,70,101
794,0.178571,70,57


In [7]:
cugraph.jaccard_coefficient(g, pairs)

Unnamed: 0,jaccard_coeff,source,destination
0,0.357143,85,57
1,0.133333,85,73
2,0.088235,88,21
3,0.125000,88,70
4,0.086957,88,67
...,...,...,...
791,0.041667,70,79
792,0.146341,21,91
793,0.081081,21,74
794,0.147059,21,47


#### cugraph.link_prediction.wjaccard.jaccard_w()

In [8]:
cugraph.jaccard_w(g, weights=edges['value'])

Unnamed: 0,jaccard_coeff,source,destination
0,0.159091,24,0
1,0.166667,24,23
2,0.166667,24,27
3,0.019841,40,17
4,0.039370,40,14
...,...,...,...
791,0.147059,70,79
792,0.075630,21,91
793,0.100000,21,74
794,0.186529,21,47


In [9]:
cugraph.jaccard_w(g, weights=edges['value'], vertex_pair=pairs)

Unnamed: 0,jaccard_coeff,source,destination
0,0.015625,64,99
1,0.011364,64,56
2,0.040541,64,71
3,0.166667,64,49
4,0.122951,26,11
...,...,...,...
4845,0.200000,90,104
4846,0.586207,90,93
4847,0.358974,90,92
4848,0.222222,90,107


#### cugraph.link_prediction.overlap.overlap()

In [10]:
cugraph.overlap(g)

Unnamed: 0,overlap_coeff,source,destination
0,0.555556,85,57
1,0.250000,85,73
2,0.750000,88,21
3,0.750000,88,70
4,0.500000,88,67
...,...,...,...
791,0.800000,49,48
792,0.800000,49,47
793,0.750000,58,33
794,0.750000,58,14


In [11]:
cugraph.overlap(g, vertex_pair=pairs)

Unnamed: 0,overlap_coeff,source,destination
0,0.200000,70,49
1,0.250000,70,58
2,0.571429,70,91
3,0.500000,70,44
4,0.777778,30,17
...,...,...,...
4845,1.000000,63,61
4846,0.200000,63,3
4847,0.333333,63,51
4848,1.000000,63,54


#### cugraph.link_prediction.overlap.overlap_coefficient()

In [12]:
cugraph.overlap_coefficient(g)

Unnamed: 0,overlap_coeff,source,destination
0,0.555556,85,57
1,0.250000,85,73
2,0.750000,88,21
3,0.750000,88,70
4,0.500000,88,67
...,...,...,...
791,0.571429,70,91
792,0.571429,70,96
793,0.666667,70,101
794,0.500000,70,57


#### cugraph.link_prediction.woverlap.overlap_w()

In [13]:
cugraph.overlap_w(g, weights=edges['value'])

Unnamed: 0,overlap_coeff,source,destination
0,0.062500,79,64
1,0.970588,43,4
2,0.428571,43,45
3,0.500000,43,44
4,0.920000,43,46
...,...,...,...
791,0.363636,49,48
792,0.909091,49,47
793,0.133333,58,33
794,0.966667,58,14


In [14]:
cugraph.overlap_w(g, weights=edges['value'], vertex_pair=pairs)

Unnamed: 0,overlap_coeff,source,destination
0,0.600000,66,69
1,0.828571,66,73
2,0.125000,66,103
3,0.083333,66,16
4,0.181818,92,98
...,...,...,...
4845,0.276596,85,94
4846,0.571429,85,66
4847,0.520000,85,104
4848,0.043478,85,93
