In [15]:
from delta import *
from pyspark.sql import SparkSession
from pyspark.sql.functions import *
from pyspark.sql.types import *

In [16]:
builder = (SparkSession.builder
           .appName("apply-window-aggregations")
           .master("spark://spark-master:7077")
           .config("spark.executor.memory", "512m")
           .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension")
           .config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog"))

spark = configure_spark_with_delta_pip(builder,['org.apache.spark:spark-sql-kafka-0-10_2.12:3.4.1']).getOrCreate()
spark.sparkContext.setLogLevel("ERROR")

In [17]:
df = (spark.readStream
      .format("kafka")
      .option("kafka.bootstrap.servers", "kafka:9092")
      .option("subscribe", "events")
      .option("startingOffsets", "earliest")
      .load())

In [18]:
from pyspark.sql.types import StructType, StructField, IntegerType, StringType, TimestampType
schema = StructType([
    StructField('user_id', IntegerType(), True),
    StructField('event_type', StringType(), True),
    StructField('event_time', StringType(), True),
    StructField('processing_time', StringType(), True)])

df = df.withColumn('value', from_json(col('value').cast("STRING"), schema))

In [19]:
from pyspark.sql.functions import col

df = (df
      .select(
          col('value.user_id').alias('user_id'),
          col('value.event_type').alias('event_type'),
          col('value.event_time').alias('event_time'),
          col('value.processing_time').alias('processing_time'))
      .withColumn("event_time"
        , to_timestamp(col("event_time")
        , "MM/dd/yyyy, HH:mm:ss" ))
      .withColumn("processing_time"
        , to_timestamp(col("processing_time")
        , "MM/dd/yyyy, HH:mm:ss")))

In [20]:
df = (df.groupBy(
    window(col("event_time"), "60 minute", "60 minute")
    , col("event_type"))
      .agg(count(col("user_id")).alias("NumberOfUsers")))

In [21]:
query = (df.writeStream
    .outputMode('complete')
    .format('console')
    .option("truncate", False)
    .start())

                                                                                

-------------------------------------------
Batch: 0
-------------------------------------------
+------------------------------------------+----------+-------------+
|window                                    |event_type|NumberOfUsers|
+------------------------------------------+----------+-------------+
|{2023-08-19 13:00:00, 2023-08-19 14:00:00}|like      |3            |
|{2023-08-19 13:00:00, 2023-08-19 14:00:00}|view      |1            |
|{2023-08-19 13:00:00, 2023-08-19 14:00:00}|share     |4            |
|{2023-08-19 13:00:00, 2023-08-19 14:00:00}|purchase  |1            |
|{2023-08-19 13:00:00, 2023-08-19 14:00:00}|click     |4            |
+------------------------------------------+----------+-------------+



                                                                                

-------------------------------------------
Batch: 1
-------------------------------------------
+------------------------------------------+----------+-------------+
|window                                    |event_type|NumberOfUsers|
+------------------------------------------+----------+-------------+
|{2023-08-19 13:00:00, 2023-08-19 14:00:00}|like      |3            |
|{2023-08-19 13:00:00, 2023-08-19 14:00:00}|view      |2            |
|{2023-08-19 13:00:00, 2023-08-19 14:00:00}|share     |5            |
|{2023-08-19 13:00:00, 2023-08-19 14:00:00}|purchase  |1            |
|{2023-08-19 13:00:00, 2023-08-19 14:00:00}|click     |5            |
+------------------------------------------+----------+-------------+



                                                                                

-------------------------------------------
Batch: 2
-------------------------------------------
+------------------------------------------+----------+-------------+
|window                                    |event_type|NumberOfUsers|
+------------------------------------------+----------+-------------+
|{2023-08-19 13:00:00, 2023-08-19 14:00:00}|like      |3            |
|{2023-08-19 13:00:00, 2023-08-19 14:00:00}|view      |2            |
|{2023-08-19 13:00:00, 2023-08-19 14:00:00}|share     |5            |
|{2023-08-19 13:00:00, 2023-08-19 14:00:00}|purchase  |1            |
|{2023-08-19 13:00:00, 2023-08-19 14:00:00}|click     |6            |
+------------------------------------------+----------+-------------+



                                                                                

-------------------------------------------
Batch: 3
-------------------------------------------
+------------------------------------------+----------+-------------+
|window                                    |event_type|NumberOfUsers|
+------------------------------------------+----------+-------------+
|{2023-08-19 13:00:00, 2023-08-19 14:00:00}|like      |3            |
|{2023-08-19 13:00:00, 2023-08-19 14:00:00}|view      |3            |
|{2023-08-19 13:00:00, 2023-08-19 14:00:00}|share     |5            |
|{2023-08-19 13:00:00, 2023-08-19 14:00:00}|purchase  |1            |
|{2023-08-19 13:00:00, 2023-08-19 14:00:00}|click     |6            |
+------------------------------------------+----------+-------------+



                                                                                

-------------------------------------------
Batch: 4
-------------------------------------------
+------------------------------------------+----------+-------------+
|window                                    |event_type|NumberOfUsers|
+------------------------------------------+----------+-------------+
|{2023-08-19 13:00:00, 2023-08-19 14:00:00}|like      |3            |
|{2023-08-19 13:00:00, 2023-08-19 14:00:00}|view      |4            |
|{2023-08-19 13:00:00, 2023-08-19 14:00:00}|share     |5            |
|{2023-08-19 13:00:00, 2023-08-19 14:00:00}|purchase  |1            |
|{2023-08-19 13:00:00, 2023-08-19 14:00:00}|click     |6            |
+------------------------------------------+----------+-------------+



                                                                                

-------------------------------------------
Batch: 5
-------------------------------------------
+------------------------------------------+----------+-------------+
|window                                    |event_type|NumberOfUsers|
+------------------------------------------+----------+-------------+
|{2023-08-19 13:00:00, 2023-08-19 14:00:00}|like      |3            |
|{2023-08-19 13:00:00, 2023-08-19 14:00:00}|view      |5            |
|{2023-08-19 13:00:00, 2023-08-19 14:00:00}|share     |5            |
|{2023-08-19 13:00:00, 2023-08-19 14:00:00}|purchase  |1            |
|{2023-08-19 13:00:00, 2023-08-19 14:00:00}|click     |6            |
+------------------------------------------+----------+-------------+



                                                                                

-------------------------------------------
Batch: 6
-------------------------------------------
+------------------------------------------+----------+-------------+
|window                                    |event_type|NumberOfUsers|
+------------------------------------------+----------+-------------+
|{2023-08-19 13:00:00, 2023-08-19 14:00:00}|like      |3            |
|{2023-08-19 13:00:00, 2023-08-19 14:00:00}|view      |5            |
|{2023-08-19 13:00:00, 2023-08-19 14:00:00}|share     |5            |
|{2023-08-19 13:00:00, 2023-08-19 14:00:00}|purchase  |2            |
|{2023-08-19 13:00:00, 2023-08-19 14:00:00}|click     |6            |
+------------------------------------------+----------+-------------+



                                                                                

-------------------------------------------
Batch: 7
-------------------------------------------
+------------------------------------------+----------+-------------+
|window                                    |event_type|NumberOfUsers|
+------------------------------------------+----------+-------------+
|{2023-08-19 13:00:00, 2023-08-19 14:00:00}|like      |4            |
|{2023-08-19 13:00:00, 2023-08-19 14:00:00}|view      |5            |
|{2023-08-19 13:00:00, 2023-08-19 14:00:00}|share     |5            |
|{2023-08-19 13:00:00, 2023-08-19 14:00:00}|purchase  |2            |
|{2023-08-19 13:00:00, 2023-08-19 14:00:00}|click     |6            |
+------------------------------------------+----------+-------------+



                                                                                

-------------------------------------------
Batch: 8
-------------------------------------------
+------------------------------------------+----------+-------------+
|window                                    |event_type|NumberOfUsers|
+------------------------------------------+----------+-------------+
|{2023-08-19 13:00:00, 2023-08-19 14:00:00}|like      |5            |
|{2023-08-19 13:00:00, 2023-08-19 14:00:00}|view      |5            |
|{2023-08-19 13:00:00, 2023-08-19 14:00:00}|share     |5            |
|{2023-08-19 13:00:00, 2023-08-19 14:00:00}|purchase  |2            |
|{2023-08-19 13:00:00, 2023-08-19 14:00:00}|click     |6            |
+------------------------------------------+----------+-------------+



                                                                                

-------------------------------------------
Batch: 9
-------------------------------------------
+------------------------------------------+----------+-------------+
|window                                    |event_type|NumberOfUsers|
+------------------------------------------+----------+-------------+
|{2023-08-19 13:00:00, 2023-08-19 14:00:00}|like      |5            |
|{2023-08-19 13:00:00, 2023-08-19 14:00:00}|view      |5            |
|{2023-08-19 13:00:00, 2023-08-19 14:00:00}|share     |5            |
|{2023-08-19 13:00:00, 2023-08-19 14:00:00}|purchase  |2            |
|{2023-08-19 13:00:00, 2023-08-19 14:00:00}|click     |7            |
+------------------------------------------+----------+-------------+



                                                                                

-------------------------------------------
Batch: 10
-------------------------------------------
+------------------------------------------+----------+-------------+
|window                                    |event_type|NumberOfUsers|
+------------------------------------------+----------+-------------+
|{2023-08-19 13:00:00, 2023-08-19 14:00:00}|like      |5            |
|{2023-08-19 13:00:00, 2023-08-19 14:00:00}|view      |6            |
|{2023-08-19 13:00:00, 2023-08-19 14:00:00}|share     |5            |
|{2023-08-19 13:00:00, 2023-08-19 14:00:00}|purchase  |2            |
|{2023-08-19 13:00:00, 2023-08-19 14:00:00}|click     |7            |
+------------------------------------------+----------+-------------+



                                                                                

-------------------------------------------
Batch: 11
-------------------------------------------
+------------------------------------------+----------+-------------+
|window                                    |event_type|NumberOfUsers|
+------------------------------------------+----------+-------------+
|{2023-08-19 13:00:00, 2023-08-19 14:00:00}|like      |5            |
|{2023-08-19 13:00:00, 2023-08-19 14:00:00}|view      |6            |
|{2023-08-19 13:00:00, 2023-08-19 14:00:00}|share     |5            |
|{2023-08-19 13:00:00, 2023-08-19 14:00:00}|purchase  |2            |
|{2023-08-19 13:00:00, 2023-08-19 14:00:00}|click     |8            |
+------------------------------------------+----------+-------------+



                                                                                

-------------------------------------------
Batch: 12
-------------------------------------------
+------------------------------------------+----------+-------------+
|window                                    |event_type|NumberOfUsers|
+------------------------------------------+----------+-------------+
|{2023-08-19 13:00:00, 2023-08-19 14:00:00}|like      |5            |
|{2023-08-19 13:00:00, 2023-08-19 14:00:00}|view      |6            |
|{2023-08-19 13:00:00, 2023-08-19 14:00:00}|share     |6            |
|{2023-08-19 13:00:00, 2023-08-19 14:00:00}|purchase  |2            |
|{2023-08-19 13:00:00, 2023-08-19 14:00:00}|click     |8            |
+------------------------------------------+----------+-------------+



                                                                                

-------------------------------------------
Batch: 13
-------------------------------------------
+------------------------------------------+----------+-------------+
|window                                    |event_type|NumberOfUsers|
+------------------------------------------+----------+-------------+
|{2023-08-19 13:00:00, 2023-08-19 14:00:00}|like      |5            |
|{2023-08-19 13:00:00, 2023-08-19 14:00:00}|view      |7            |
|{2023-08-19 13:00:00, 2023-08-19 14:00:00}|share     |6            |
|{2023-08-19 13:00:00, 2023-08-19 14:00:00}|purchase  |2            |
|{2023-08-19 13:00:00, 2023-08-19 14:00:00}|click     |8            |
+------------------------------------------+----------+-------------+





In [23]:
query.stop()

23/08/19 13:39:23 ERROR WriteToDataSourceV2Exec: Data source write support MicroBatchWrite[epoch: 18, writer: ConsoleWriter[numRows=20, truncate=false]] is aborting.
23/08/19 13:39:24 ERROR WriteToDataSourceV2Exec: Data source write support MicroBatchWrite[epoch: 18, writer: ConsoleWriter[numRows=20, truncate=false]] aborted.


In [24]:
spark.stop() 