In [1]:
from pyspark.sql import SparkSession
from graphframes import GraphFrame

spark = SparkSession.builder \
    .appName("GraphAnalysis") \
    .getOrCreate()

vertices = spark.createDataFrame([("1", "Alice"), ("2", "Bob")], ["id", "name"])
edges = spark.createDataFrame([("1", "2", "friend")], ["src", "dst", "relationship"])

g = GraphFrame(vertices, edges)
g.vertices.show()
g.edges.show()



+---+-----+
| id| name|
+---+-----+
|  1|Alice|
|  2|  Bob|
+---+-----+

+---+---+------------+
|src|dst|relationship|
+---+---+------------+
|  1|  2|      friend|
+---+---+------------+



In [2]:
from graphframes import GraphFrame

In [3]:
# Cell 3: Create vertices DataFrame
vertices = spark.createDataFrame([
    ("a", "Alice", 34),
    ("b", "Bob", 36),
    ("c", "Charlie", 30),
    ("d", "David", 29),
    ("e", "Esther", 32)
], ["id", "name", "age"])

vertices.show()

+---+-------+---+
| id|   name|age|
+---+-------+---+
|  a|  Alice| 34|
|  b|    Bob| 36|
|  c|Charlie| 30|
|  d|  David| 29|
|  e| Esther| 32|
+---+-------+---+



In [4]:
# Cell 4: Create edges DataFrame
edges = spark.createDataFrame([
    ("a", "b", "friend"),
    ("b", "c", "follow"),
    ("c", "b", "follow"),
    ("d", "a", "friend"),
    ("e", "d", "follow")
], ["src", "dst", "relationship"])

edges.show()

+---+---+------------+
|src|dst|relationship|
+---+---+------------+
|  a|  b|      friend|
|  b|  c|      follow|
|  c|  b|      follow|
|  d|  a|      friend|
|  e|  d|      follow|
+---+---+------------+



In [5]:
# Cell 5: Create the GraphFrame
g = GraphFrame(vertices, edges)

In [6]:
# Cell 6: Display vertices and edges
print("Vertices:")
g.vertices.show()

print("Edges:")
g.edges.show()

Vertices:
+---+-------+---+
| id|   name|age|
+---+-------+---+
|  a|  Alice| 34|
|  b|    Bob| 36|
|  c|Charlie| 30|
|  d|  David| 29|
|  e| Esther| 32|
+---+-------+---+

Edges:
+---+---+------------+
|src|dst|relationship|
+---+---+------------+
|  a|  b|      friend|
|  b|  c|      follow|
|  c|  b|      follow|
|  d|  a|      friend|
|  e|  d|      follow|
+---+---+------------+



In [7]:
# Cell 7: Run PageRank
print("PageRank:")
g.pageRank(resetProbability=0.15, maxIter=5).vertices.show()

PageRank:




+---+-------+---+-------------------+
| id|   name|age|           pagerank|
+---+-------+---+-------------------+
|  e| Esther| 32|               0.15|
|  d|  David| 29|0.27749999999999997|
|  c|Charlie| 30| 1.8008009374999996|
|  b|    Bob| 36|       2.3858240625|
|  a|  Alice| 34|0.38587499999999997|
+---+-------+---+-------------------+



In [8]:
# Cell 8: Run BFS from a to c
print("BFS from a to c:")
g.bfs(fromExpr="id = 'a'", toExpr="id = 'c'").show()

BFS from a to c:
+--------------+--------------+------------+--------------+----------------+
|          from|            e0|          v1|            e1|              to|
+--------------+--------------+------------+--------------+----------------+
|{a, Alice, 34}|{a, b, friend}|{b, Bob, 36}|{b, c, follow}|{c, Charlie, 30}|
+--------------+--------------+------------+--------------+----------------+



In [9]:
# Cell 9: Stop Spark session
spark.stop()