# PySpark and GraphFrame

In [1]:
from pyspark import SparkConf
from pyspark.context import SparkContext
from pyspark.sql.session import SparkSession
from graphframes import *
from graphframes.examples import Graphs

In [2]:
# spark session and context setup

conf = SparkConf().setAppName('app').setMaster('local')
sc = SparkContext.getOrCreate(conf)
spark = SparkSession(sc)

In [47]:
# Read csv and create graph object

nodes = spark.read.csv('data/cities.csv', header=True)
rels = spark.read.csv('data/relationship.csv', header=True)
graph = GraphFrame(nodes, rels)

In [4]:
# Show graph vetices

graph.vertices.show()

+---------+---------+-------------+---------+
|       id| latitude|    longitude|populacao|
+---------+---------+-------------+---------+
|   Recife|8.0522° S|   34.9286° W|  1645727|
|   Olinda|7.9907° S| "34.8416° W"|   392482|
|  Caruaru|8.2760° S|   35.9819° W|   361118|
|Tamandare|8.7579° S|   35.1051° W|    23388|
| Floresta|8.5969° S|   38.5744° W|    32873|
|Garanhuns|8.8829° S|   36.4969° W|   139788|
|  Gravata|8.2116° S|   35.5678° W|    84074|
|  Carpina|7.8450° S|   35.2437° W|    83641|
|   Buique|8.6211° S|   37.1572° W|    58378|
|  Cabrobo|8.5082° S|   39.3103° W|    34221|
+---------+---------+-------------+---------+



In [5]:
# Show graph edges

graph.edges.show()

+---------+---------+------------+
|      src|      dst|relationship|
+---------+---------+------------+
|   Recife|   Olinda|    Favorito|
|   Recife|  Caruaru|    Favorito|
|   Recife|Tamandare|    Favorito|
|  Caruaru|Tamandare|       Barro|
|  Caruaru|  Gravata|    Perigoso|
|  Gravata|   Recife|       Barro|
|  Caruaru|   Olinda|       Barro|
|   Olinda|  Gravata|    Perigoso|
|  Cabrobo|   Recife|       Barro|
|  Cabrobo|  Gravata|    Perigoso|
|   Recife|Garanhuns|    Favorito|
|   Recife|  Gravata|       Barro|
|   Recife|  Cabrobo|    Favorito|
|  Cabrobo|   Olinda|    Favorito|
|  Carpina|   Olinda|       Barro|
|  Carpina|  Gravata|    Perigoso|
|Garanhuns|  Gravata|    Perigoso|
|Garanhuns|   Recife|    Favorito|
|Garanhuns|   Buique|    Favorito|
|   Buique| Floresta|    Perigoso|
+---------+---------+------------+
only showing top 20 rows



## Breadth First Search

In [37]:
# Search for favorites paths from "Recife"

paths = graph.bfs("id = 'Recife'", "id !=  'Recife'", edgeFilter="relationship = 'Favorito'")
paths.show()

+--------------------+--------------------+--------------------+
|                from|                  e0|                  to|
+--------------------+--------------------+--------------------+
|[Recife, 8.0522° ...|[Recife, Olinda, ...|[Olinda, 7.9907° ...|
|[Recife, 8.0522° ...|[Recife, Caruaru,...|[Caruaru, 8.2760°...|
|[Recife, 8.0522° ...|[Recife, Tamandar...|[Tamandare, 8.757...|
|[Recife, 8.0522° ...|[Recife, Garanhun...|[Garanhuns, 8.882...|
|[Recife, 8.0522° ...|[Recife, Cabrobo,...|[Cabrobo, 8.5082°...|
+--------------------+--------------------+--------------------+



In [7]:
# Search for the best to path to "Gravata" from "Recife"

paths = graph.bfs("id = 'Recife'", "id = 'Gravata'")
paths.show()

+--------------------+--------------------+--------------------+
|                from|                  e0|                  to|
+--------------------+--------------------+--------------------+
|[Recife, 8.0522° ...|[Recife, Gravata,...|[Gravata, 8.2116°...|
+--------------------+--------------------+--------------------+



In [8]:
# Search for the path to get to "Gravata" from "Recife" that isn't dangerous

paths = graph.bfs("id = 'Recife'", "id = 'Gravata'", edgeFilter="relationship != 'Perigoso'")
paths.show()

+--------------------+--------------------+--------------------+
|                from|                  e0|                  to|
+--------------------+--------------------+--------------------+
|[Recife, 8.0522° ...|[Recife, Gravata,...|[Gravata, 8.2116°...|
+--------------------+--------------------+--------------------+



In [9]:
# Search for the best to path to "Cabrobo" from "Olinda"

paths = graph.bfs("id = 'Olinda'", "id = 'Cabrobo'")
paths.show()

+--------------------+--------------------+--------------------+--------------------+--------------------+
|                from|                  e0|                  v1|                  e1|                  to|
+--------------------+--------------------+--------------------+--------------------+--------------------+
|[Olinda, 7.9907° ...|[Olinda, Tamandar...|[Tamandare, 8.757...|[Tamandare, Cabro...|[Cabrobo, 8.5082°...|
|[Olinda, 7.9907° ...|[Olinda, Buique, ...|[Buique, 8.6211° ...|[Buique, Cabrobo,...|[Cabrobo, 8.5082°...|
+--------------------+--------------------+--------------------+--------------------+--------------------+



In [10]:
# Search for the best to path to "Floresta" from "Olinda"

paths = graph.bfs("id = 'Olinda'", "id = 'Floresta'")
paths.show()

+--------------------+--------------------+--------------------+--------------------+--------------------+
|                from|                  e0|                  v1|                  e1|                  to|
+--------------------+--------------------+--------------------+--------------------+--------------------+
|[Olinda, 7.9907° ...|[Olinda, Buique, ...|[Buique, 8.6211° ...|[Buique, Floresta...|[Floresta, 8.5969...|
+--------------------+--------------------+--------------------+--------------------+--------------------+



## Find Method

In [34]:
# Get all paths to any city that has a populations greater than 390000

find_path = graph.find("(a)-[ab]->(b)")

find_path.filter("b.populacao > 390000").show()

+--------------------+--------------------+--------------------+
|                   a|                  ab|                   b|
+--------------------+--------------------+--------------------+
|[Recife, 8.0522° ...|[Recife, Olinda, ...|[Olinda, 7.9907° ...|
|[Gravata, 8.2116°...|[Gravata, Recife,...|[Recife, 8.0522° ...|
|[Caruaru, 8.2760°...|[Caruaru, Olinda,...|[Olinda, 7.9907° ...|
|[Cabrobo, 8.5082°...|[Cabrobo, Recife,...|[Recife, 8.0522° ...|
|[Cabrobo, 8.5082°...|[Cabrobo, Olinda,...|[Olinda, 7.9907° ...|
|[Carpina, 7.8450°...|[Carpina, Olinda,...|[Olinda, 7.9907° ...|
|[Garanhuns, 8.882...|[Garanhuns, Recif...|[Recife, 8.0522° ...|
|[Floresta, 8.5969...|[Floresta, Recife...|[Recife, 8.0522° ...|
+--------------------+--------------------+--------------------+



In [40]:
# Get all paths to any city that has a populations greater than 1000000, and get the paths from those cities

find_path = graph.find("(a)-[ab]->(b); (b)-[bc]->(c)")

find_path.filter("b.populacao > 1000000").show()

+--------------------+--------------------+--------------------+--------------------+--------------------+
|                   a|                  ab|                   b|                  bc|                   c|
+--------------------+--------------------+--------------------+--------------------+--------------------+
|[Gravata, 8.2116°...|[Gravata, Recife,...|[Recife, 8.0522° ...|[Recife, Cabrobo,...|[Cabrobo, 8.5082°...|
|[Gravata, 8.2116°...|[Gravata, Recife,...|[Recife, 8.0522° ...|[Recife, Gravata,...|[Gravata, 8.2116°...|
|[Gravata, 8.2116°...|[Gravata, Recife,...|[Recife, 8.0522° ...|[Recife, Garanhun...|[Garanhuns, 8.882...|
|[Gravata, 8.2116°...|[Gravata, Recife,...|[Recife, 8.0522° ...|[Recife, Tamandar...|[Tamandare, 8.757...|
|[Gravata, 8.2116°...|[Gravata, Recife,...|[Recife, 8.0522° ...|[Recife, Caruaru,...|[Caruaru, 8.2760°...|
|[Gravata, 8.2116°...|[Gravata, Recife,...|[Recife, 8.0522° ...|[Recife, Olinda, ...|[Olinda, 7.9907° ...|
|[Cabrobo, 8.5082°...|[Cabrobo, Recif

## FilterVertices Method

In [48]:
# Get cities that have a population gretes

filtered_graph = graph.filterVertices("populacao > 390000").dropIsolatedVertices()
filtered_graph.vertices.show()

+------+---------+-------------+---------+
|    id| latitude|    longitude|populacao|
+------+---------+-------------+---------+
|Recife|8.0522° S|   34.9286° W|  1645727|
|Olinda|7.9907° S| "34.8416° W"|   392482|
+------+---------+-------------+---------+



In [50]:
# Get cities that have a population gretes

filtered_graph = graph.filterVertices("id != 'Recife'").dropIsolatedVertices()
filtered_graph.vertices.show()

+---------+---------+-------------+---------+
|       id| latitude|    longitude|populacao|
+---------+---------+-------------+---------+
|   Olinda|7.9907° S| "34.8416° W"|   392482|
|  Caruaru|8.2760° S|   35.9819° W|   361118|
|Tamandare|8.7579° S|   35.1051° W|    23388|
| Floresta|8.5969° S|   38.5744° W|    32873|
|Garanhuns|8.8829° S|   36.4969° W|   139788|
|  Gravata|8.2116° S|   35.5678° W|    84074|
|  Carpina|7.8450° S|   35.2437° W|    83641|
|   Buique|8.6211° S|   37.1572° W|    58378|
|  Cabrobo|8.5082° S|   39.3103° W|    34221|
+---------+---------+-------------+---------+

