### Define Schema for our Sample Streaming Data

In [0]:
from pyspark.sql.types import StructType, StructField, IntegerType, StringType
schema_defined = StructType([StructField('File', StringType(), True),
                             StructField('Shop', StringType(), True),
                             StructField('SaleCount', IntegerType(), True)])

### Create the Folder in DBFS File System

In [0]:
dbutils.fs.mkdirs("/FileStore/tables/stream_checkpoint/")
dbutils.fs.mkdirs("/FileStore/tables/stream_read/")
dbutils.fs.mkdirs("/FileStore/tables/stream_write/")

dbutils.fs.rm("/FileStore/tables/stream_checkpoint/", True)
dbutils.fs.rm("/FileStore/tables/stream_read/", True)
dbutils.fs.rm("/FileStore/tables/stream_write/", True)

Out[2]: True

### Read Streaming Data

- Upload files in this stream read path for row to be displayed. 
- It keeps on executing and it doesnt stop unless interrupted.

In [0]:
df_streamread = spark.readStream.schema(schema_defined).option("header", True).option("sep", ";").csv("/FileStore/tables/stream_read/")
df1 = df_streamread.groupBy("Shop").sum("SaleCount")
display(df1)

Shop,sum(SaleCount)


### Write Streaming Data

- Upload files in stream read location and write it using below writeStream.

In [0]:
df4 = df_streamread.writeStream.format("parquet").outputMode("append").option("path", "/FileStore/tables/stream_write/").option("checkpointLocation", "/FileStore/tables/stream_checkpoint/").start().awaitTermination()

### Verify the written stream output data

In [0]:
display(spark.read.parquet("/FileStore/tables/stream_write/*.parquet"))