In [1]:
from pyspark import SparkConf
from pyspark.context import SparkContext
from pyspark.sql.session import SparkSession
from graphframes import *
# spark session and context setup
conf = SparkConf().setAppName('appName').setMaster('local')
sc = SparkContext.getOrCreate(conf)
spark = SparkSession(sc)
# graphframes basic example
nodes = spark.read.csv('cidades.csv', header=True)
rels = spark.read.csv('relations.csv', header=True)
g = GraphFrame(nodes, rels)

In [2]:
g.vertices.show()

+--------------------+---------+----------+---------+
|                  id| latitude| longitude|populacao|
+--------------------+---------+----------+---------+
|              Recife|-8.053889|-34.880833| 1645727 |
|              Olinda|-8.008889|   -34.855|   390771|
|             Caruaru|-8.282778|-35.975833|   361118|
|Santa Cruz do Cap...|-7.956944|   -36.205|   107937|
|           Petrolina|-9.392778|-40.507778|   349145|
|           Pesqueira|-8.361667|-36.694722|    66881|
+--------------------+---------+----------+---------+



In [3]:
g.edges.show()

+--------------------+--------------------+------------------+
|                 src|                 dst|      relationship|
+--------------------+--------------------+------------------+
|              Recife|              Olinda| 5.757491906294353|
|              Recife|             Caruaru|123.21975279682546|
|              Recife|Santa Cruz do Cap...|146.24953979065546|
|              Recife|           Petrolina| 636.2964802826075|
|              Recife|           Pesqueira|202.60471438298742|
|              Olinda|              Recife| 5.757491906294353|
|              Olinda|             Caruaru|127.11660109042127|
|              Olinda|Santa Cruz do Cap...|148.81728268257336|
|              Olinda|           Petrolina| 640.2782234019045|
|              Olinda|           Pesqueira|206.31276007928233|
|             Caruaru|              Recife|123.21975279682546|
|             Caruaru|              Olinda|127.11660109042127|
|             Caruaru|Santa Cruz do Cap...|44.162227354

In [4]:
far_from_capital_cities = g.bfs("id = 'Recife'", "id !=  'Recife'", edgeFilter="relationship > 100")
far_from_capital_cities.show()

+--------------------+--------------------+--------------------+
|                from|                  e0|                  to|
+--------------------+--------------------+--------------------+
|[Recife, -8.05388...|[Recife, Caruaru,...|[Caruaru, -8.2827...|
|[Recife, -8.05388...|[Recife, Santa Cr...|[Santa Cruz do Ca...|
|[Recife, -8.05388...|[Recife, Petrolin...|[Petrolina, -9.39...|
|[Recife, -8.05388...|[Recife, Pesqueir...|[Pesqueira, -8.36...|
+--------------------+--------------------+--------------------+



In [5]:
really_far_from_capital_cities = g.bfs("id = 'Recife'", "id !=  'Recife'", edgeFilter="relationship > 400")
really_far_from_capital_cities.show()

+--------------------+--------------------+--------------------+
|                from|                  e0|                  to|
+--------------------+--------------------+--------------------+
|[Recife, -8.05388...|[Recife, Petrolin...|[Petrolina, -9.39...|
+--------------------+--------------------+--------------------+



In [10]:
travel_to_low_populated_cities = g.bfs("id = 'Recife'", "populacao < 100000")
travel_to_low_populated_cities.show()

+--------------------+--------------------+--------------------+
|                from|                  e0|                  to|
+--------------------+--------------------+--------------------+
|[Recife, -8.05388...|[Recife, Pesqueir...|[Pesqueira, -8.36...|
+--------------------+--------------------+--------------------+



In [12]:
travel_to_medium_populated_cities = g.bfs("id = 'Recife'", "populacao < 350000 and populacao > 100000")
travel_to_medium_populated_cities.show()

+--------------------+--------------------+--------------------+
|                from|                  e0|                  to|
+--------------------+--------------------+--------------------+
|[Recife, -8.05388...|[Recife, Santa Cr...|[Santa Cruz do Ca...|
|[Recife, -8.05388...|[Recife, Petrolin...|[Petrolina, -9.39...|
+--------------------+--------------------+--------------------+



In [13]:
travel_to_high_populated_cities = g.bfs("id = 'Recife'", "populacao > 350000")
travel_to_high_populated_cities.show()

+--------------------+--------------------+--------------------+
|                from|                  e0|                  to|
+--------------------+--------------------+--------------------+
|[Recife, -8.05388...|[Recife, Olinda, ...|[Olinda, -8.00888...|
|[Recife, -8.05388...|[Recife, Caruaru,...|[Caruaru, -8.2827...|
+--------------------+--------------------+--------------------+



In [18]:
find_path = g.find("(a)-[ab]->(b)")
find_path.filter("a.populacao > 300000").show()

+--------------------+--------------------+--------------------+
|                   a|                  ab|                   b|
+--------------------+--------------------+--------------------+
|[Olinda, -8.00888...|[Olinda, Recife, ...|[Recife, -8.05388...|
|[Olinda, -8.00888...|[Olinda, Caruaru,...|[Caruaru, -8.2827...|
|[Olinda, -8.00888...|[Olinda, Santa Cr...|[Santa Cruz do Ca...|
|[Olinda, -8.00888...|[Olinda, Petrolin...|[Petrolina, -9.39...|
|[Olinda, -8.00888...|[Olinda, Pesqueir...|[Pesqueira, -8.36...|
|[Caruaru, -8.2827...|[Caruaru, Recife,...|[Recife, -8.05388...|
|[Caruaru, -8.2827...|[Caruaru, Olinda,...|[Olinda, -8.00888...|
|[Caruaru, -8.2827...|[Caruaru, Santa C...|[Santa Cruz do Ca...|
|[Caruaru, -8.2827...|[Caruaru, Petroli...|[Petrolina, -9.39...|
|[Caruaru, -8.2827...|[Caruaru, Pesquei...|[Pesqueira, -8.36...|
|[Petrolina, -9.39...|[Petrolina, Recif...|[Recife, -8.05388...|
|[Petrolina, -9.39...|[Petrolina, Olind...|[Olinda, -8.00888...|
|[Petrolina, -9.39...|[Pe

In [35]:
find_path = g.find("(a)-[ab]->(b); (b)-[bc]->(c)")
find_path.filter("b.populacao > 350000 and b.populacao > 390000 and c.populacao < 350000").show()

+--------------------+--------------------+--------------------+--------------------+--------------------+
|                   a|                  ab|                   b|                  bc|                   c|
+--------------------+--------------------+--------------------+--------------------+--------------------+
|[Recife, -8.05388...|[Recife, Olinda, ...|[Olinda, -8.00888...|[Olinda, Pesqueir...|[Pesqueira, -8.36...|
|[Recife, -8.05388...|[Recife, Olinda, ...|[Olinda, -8.00888...|[Olinda, Petrolin...|[Petrolina, -9.39...|
|[Recife, -8.05388...|[Recife, Olinda, ...|[Olinda, -8.00888...|[Olinda, Santa Cr...|[Santa Cruz do Ca...|
|[Caruaru, -8.2827...|[Caruaru, Olinda,...|[Olinda, -8.00888...|[Olinda, Pesqueir...|[Pesqueira, -8.36...|
|[Caruaru, -8.2827...|[Caruaru, Olinda,...|[Olinda, -8.00888...|[Olinda, Petrolin...|[Petrolina, -9.39...|
|[Caruaru, -8.2827...|[Caruaru, Olinda,...|[Olinda, -8.00888...|[Olinda, Santa Cr...|[Santa Cruz do Ca...|
|[Santa Cruz do Ca...|[Santa Cruz do 

In [38]:
big_cities = g.filterVertices("populacao > 300000")
big_cities.vertices.show()

+---------+---------+----------+---------+
|       id| latitude| longitude|populacao|
+---------+---------+----------+---------+
|   Olinda|-8.008889|   -34.855|   390771|
|  Caruaru|-8.282778|-35.975833|   361118|
|Petrolina|-9.392778|-40.507778|   349145|
+---------+---------+----------+---------+



In [40]:
little_cities = g.filterVertices("populacao < 300000")
little_cities.vertices.show()

+--------------------+---------+----------+---------+
|                  id| latitude| longitude|populacao|
+--------------------+---------+----------+---------+
|Santa Cruz do Cap...|-7.956944|   -36.205|   107937|
|           Pesqueira|-8.361667|-36.694722|    66881|
+--------------------+---------+----------+---------+

