In [3]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import explode, split, col, count

# Step 1: Create a SparkSession
spark = SparkSession.builder \
    .appName("WordCountDataFrame") \
    .master("local[*]") \
    .getOrCreate()

# Step 2: Create a DataFrame with text data
data = [("Hello world spark spark",), ("Hello again spark",), ("Welcome to the world of spark",)]
columns = ["line"]
df = spark.createDataFrame(data, columns)

# Step 3: Split lines into words, explode to create one word per row
words_df = df.select(explode(split(col("line"), " ")).alias("word"))

# Step 4: Count occurrences of each word
word_count_df = words_df.groupBy("word").agg(count("word").alias("count"))

# Step 5: Show the result
word_count_df.show()

# Stop the SparkSession
spark.stop()

+-------+-----+
|   word|count|
+-------+-----+
|  Hello|    2|
|  spark|    4|
|  world|    2|
|  again|    1|
|    the|    1|
|     of|    1|
|Welcome|    1|
|     to|    1|
+-------+-----+

