In [None]:
from pyspark.sql import SparkSession

spark = SparkSession.builder \
    .appName("GraphFramesExample") \
    .config("spark.jars.packages", "graphframes:graphframes:0.8.3-spark3.1-s_2.12") \
    .master("spark://spark-master:7077") \
    .config("spark.submit.deployMode", "client") \
    .config("spark.driver.host", "jupyter") \
    .getOrCreate()

In [None]:
from graphframes import GraphFrame

In [None]:
# Cell 3: Create vertices DataFrame
vertices = spark.createDataFrame([
    ("a", "Alice", 34),
    ("b", "Bob", 36),
    ("c", "Charlie", 30),
    ("d", "David", 29),
    ("e", "Esther", 32)
], ["id", "name", "age"])

vertices.show()

In [None]:
# Cell 4: Create edges DataFrame
edges = spark.createDataFrame([
    ("a", "b", "friend"),
    ("b", "c", "follow"),
    ("c", "b", "follow"),
    ("d", "a", "friend"),
    ("e", "d", "follow")
], ["src", "dst", "relationship"])

edges.show()

In [None]:
# Cell 5: Create the GraphFrame
g = GraphFrame(vertices, edges)

In [None]:
# Cell 6: Display vertices and edges
print("Vertices:")
g.vertices.show()

print("Edges:")
g.edges.show()

In [None]:
# Cell 7: Run PageRank
print("PageRank:")
g.pageRank(resetProbability=0.15, maxIter=5).vertices.show()

In [None]:
# Cell 8: Run BFS from a to c
print("BFS from a to c:")
g.bfs(fromExpr="id = 'a'", toExpr="id = 'c'").show()

In [None]:
# Cell 9: Stop Spark session
spark.stop()