In [1]:
from pyspark.sql import SparkSession
from pyspark.context import SparkContext
from pyspark.sql import SQLContext

In [None]:
spark = SparkSession.builder.appName("Python Spark SQL").config("spark.jars.packages", "graphframes:graphframes:0.8.2-spark3.2-s_2.12").getOrCreate()
sqlContext = SQLContext(spark.sparkContext)

In [3]:
from graphframes import *

In [4]:
v = sqlContext.createDataFrame([
("entry_1", "alg_1", 3.6),
("entry_2", "alg_2", 3.7),
("entry_3", "alg_3", 1.9),
("entry_4", "alg_4", 0.85),
("entry_5", "alg_5" ,0.85),
("entry_6", "alg_6", 2.2),
("entry_7", "alg_7", 0.4),
("entry_8", "alg_8", 0.3),
], ["id", "alg", "run_time"])

In [5]:
e = sqlContext.createDataFrame([
("entry_1", "entry_2", 1),
("entry_2", "entry_1", 2),
("entry_3", "entry_1", 2),
("entry_1", "entry_4", 9),
("entry_3", "entry_8", 8),
("entry_2", "entry_6", 3),
("entry_6", "entry_2", 2),
("entry_6", "entry_5", 5),
("entry_5", "entry_7", 10),
], ["src", "dst", "wait"])

In [6]:
g = GraphFrame(v, e)

In [7]:
g.vertices.show()



+-------+-----+--------+
|     id|  alg|run_time|
+-------+-----+--------+
|entry_1|alg_1|     3.6|
|entry_2|alg_2|     3.7|
|entry_3|alg_3|     1.9|
|entry_4|alg_4|    0.85|
|entry_5|alg_5|    0.85|
|entry_6|alg_6|     2.2|
|entry_7|alg_7|     0.4|
|entry_8|alg_8|     0.3|
+-------+-----+--------+





In [8]:
g.edges.show()

+-------+-------+----+
|    src|    dst|wait|
+-------+-------+----+
|entry_1|entry_2|   1|
|entry_2|entry_1|   2|
|entry_3|entry_1|   2|
|entry_1|entry_4|   9|
|entry_3|entry_8|   8|
|entry_2|entry_6|   3|
|entry_6|entry_2|   2|
|entry_6|entry_5|   5|
|entry_5|entry_7|  10|
+-------+-------+----+



In [9]:
g.degrees.show()

+-------+------+
|     id|degree|
+-------+------+
|entry_1|     4|
|entry_2|     4|
|entry_4|     1|
|entry_3|     2|
|entry_8|     1|
|entry_6|     3|
|entry_7|     1|
|entry_5|     2|
+-------+------+



In [10]:
ind = g.inDegrees
ind.sort(['inDegree'],ascending=[0]).show()

+-------+--------+
|     id|inDegree|
+-------+--------+
|entry_2|       2|
|entry_1|       2|
|entry_4|       1|
|entry_8|       1|
|entry_7|       1|
|entry_6|       1|
|entry_5|       1|
+-------+--------+



In [11]:
g.outDegrees.show()

+-------+---------+
|     id|outDegree|
+-------+---------+
|entry_1|        2|
|entry_2|        2|
|entry_3|        2|
|entry_6|        2|
|entry_5|        1|
+-------+---------+



In [13]:
spark.sparkContext.setCheckpointDir("/tmp/checkpoints2")
result = g.connectedComponents()



In [14]:
result.show()

+-------+-----+--------+------------+
|     id|  alg|run_time|   component|
+-------+-----+--------+------------+
|entry_1|alg_1|     3.6|214748364800|
|entry_2|alg_2|     3.7|214748364800|
|entry_3|alg_3|     1.9|214748364800|
|entry_4|alg_4|    0.85|214748364800|
|entry_5|alg_5|    0.85|214748364800|
|entry_6|alg_6|     2.2|214748364800|
|entry_7|alg_7|     0.4|214748364800|
|entry_8|alg_8|     0.3|214748364800|
+-------+-----+--------+------------+



In [15]:
PageRankResults = g.pageRank(resetProbability=0.15, tol=0.01)
PageRankResults.vertices.sort(['pagerank'],ascending=[0]).show()



+-------+-----+--------+------------------+
|     id|  alg|run_time|          pagerank|
+-------+-----+--------+------------------+
|entry_2|alg_2|     3.7|1.4311828823278607|
|entry_1|alg_1|     3.6|1.2711967122398973|
|entry_7|alg_7|     0.4|1.1995692587580498|
|entry_6|alg_6|     2.2| 1.073476575691486|
|entry_4|alg_4|    0.85|0.9902188955315402|
|entry_5|alg_5|    0.85|0.9061878374984652|
|entry_8|alg_8|     0.3|0.6629439872505565|
|entry_3|alg_3|     1.9|0.4652238507021449|
+-------+-----+--------+------------------+



In [16]:
PageRankResults.edges.show()



+-------+-------+----+------+
|    src|    dst|wait|weight|
+-------+-------+----+------+
|entry_1|entry_4|   9|   0.5|
|entry_1|entry_2|   1|   0.5|
|entry_6|entry_5|   5|   0.5|
|entry_6|entry_2|   2|   0.5|
|entry_5|entry_7|  10|   1.0|
|entry_3|entry_8|   8|   0.5|
|entry_3|entry_1|   2|   0.5|
|entry_2|entry_1|   2|   0.5|
|entry_2|entry_6|   3|   0.5|
+-------+-------+----+------+

