## Structured streaming example from https://spark.apache.org/docs/latest/structured-streaming-programming-guide.html

In [1]:
from pyspark.sql import SparkSession
spark = SparkSession.builder.appName('wordCount').getOrCreate()

In [2]:
from pyspark.sql.functions import explode, split

# Create DataFrame representing the stream of input lines from connection to localhost:9999
lines = spark.readStream.format("socket").option("host", "localhost").option("port", 9999).load()

# Split the lines into words
words = lines.select(explode(split(lines.value, " ")).alias("word"))

# Generate running word count
wordCounts = words.groupBy("word").count()

In [3]:
# Start running the query that prints the running counts to the console
query = wordCounts.writeStream.outputMode("complete").format("console").start()

# query.awaitTermination(timeout=60)

In [4]:
query.stop()

## Input terminal

![input terminal](structured_streaming_input.png)

## Output terminal

![output terminal](structured_streaming_output.png)