In [None]:
import os
os.environ['PYSPARK_SUBMIT_ARGS'] = '--packages org.apache.spark:spark-streaming-kafka-0-10_2.12:3.5.1,org.apache.spark:spark-sql-kafka-0-10_2.12:3.5.1 pyspark-shell'

from pyspark.sql import SparkSession
from pyspark.sql.functions import from_json, col, window
from pyspark.sql.types import StructType, StringType, BooleanType, IntegerType, TimestampType, StructField, ArrayType

# Initialize SparkSession
spark = SparkSession.builder \
    .appName("KafkaStructuredStreaming") \
    .getOrCreate()

# Define schema for nested field floodArea

flood_area_schema = StructType([
    StructField("@id", StringType()),
    StructField("county", StringType()),
    StructField("notation", StringType()),
    StructField("polygon", StringType()),
    StructField("riverOrSea", StringType(), True)  # Optional field, set nullable=True
])

# Define schema for JSON data including nested field floodArea
schema = StructType([
    StructField("@id", StringType()),
    StructField("description", StringType()),
    StructField("eaAreaName", StringType()),
    StructField("eaRegionName", StringType()),
    StructField("floodArea", flood_area_schema),
    StructField("floodAreaID", StringType()),
    StructField("isTidal", BooleanType()),
    StructField("message", StringType()),
    StructField("severity", StringType()),
    StructField("severityLevel", IntegerType()),
    StructField("timeMessageChanged", TimestampType()),
    StructField("timeRaised", TimestampType()),
    StructField("timeSeverityChanged", TimestampType())
])

# Read data from Kafka
kafkaStreamDF = spark.readStream \
    .format("kafka") \
    .option("kafka.bootstrap.servers", "localhost:9092") \
    .option("subscribe", "uk-flood") \
    .load()

# Parse JSON data
parsedDF = kafkaStreamDF \
    .selectExpr("CAST(value AS STRING)") \
    .select(from_json(col("value"), ArrayType(schema)).alias("data")) \
    .selectExpr("explode(data) AS items") \
    .select("items.*")

aggregatedDF = parsedDF \
    .groupBy("eaAreaName", "severityLevel") \
    .count() \
    .withColumnRenamed("count", "severityLevelCount")

# Write the parsed data to console
query = aggregatedDF \
    .writeStream \
    .outputMode("complete") \
    .format("console") \
    .start()

# Start the streaming query
query.awaitTermination()


24/05/10 06:40:21 WARN Utils: Your hostname, nasibaso-sambeldomba resolves to a loopback address: 127.0.1.1; using 192.168.0.131 instead (on interface wlo1)
24/05/10 06:40:21 WARN Utils: Set SPARK_LOCAL_IP if you need to bind to another address


:: loading settings :: url = jar:file:/home/nasibaso-sambeldomba/.local/lib/python3.10/site-packages/pyspark/jars/ivy-2.5.1.jar!/org/apache/ivy/core/settings/ivysettings.xml


Ivy Default Cache set to: /home/nasibaso-sambeldomba/.ivy2/cache
The jars for the packages stored in: /home/nasibaso-sambeldomba/.ivy2/jars
org.apache.spark#spark-streaming-kafka-0-10_2.12 added as a dependency
org.apache.spark#spark-sql-kafka-0-10_2.12 added as a dependency
:: resolving dependencies :: org.apache.spark#spark-submit-parent-cc9c4438-7c7e-4b80-a8c6-45497cc88afa;1.0
	confs: [default]
	found org.apache.spark#spark-streaming-kafka-0-10_2.12;3.5.1 in central
	found org.apache.spark#spark-token-provider-kafka-0-10_2.12;3.5.1 in central
	found org.apache.kafka#kafka-clients;3.4.1 in central
	found org.lz4#lz4-java;1.8.0 in central
	found org.xerial.snappy#snappy-java;1.1.10.3 in central
	found org.slf4j#slf4j-api;2.0.7 in central
	found org.apache.hadoop#hadoop-client-runtime;3.3.4 in central
	found org.apache.hadoop#hadoop-client-api;3.3.4 in central
	found commons-logging#commons-logging;1.1.3 in central
	found com.google.code.findbugs#jsr305;3.0.0 in central
	found org.apac

-------------------------------------------
Batch: 0
-------------------------------------------
+----------+-------------+------------------+
|eaAreaName|severityLevel|severityLevelCount|
+----------+-------------+------------------+
+----------+-------------+------------------+



                                                                                

-------------------------------------------
Batch: 1
-------------------------------------------
+--------------------+-------------+------------------+
|          eaAreaName|severityLevel|severityLevelCount|
+--------------------+-------------+------------------+
|Solent and South ...|            4|                 1|
|Kent, South Londo...|            3|                 4|
|Hertfordshire and...|            3|                 3|
|Hertfordshire and...|            4|                 1|
|              Thames|            3|                16|
|         East Anglia|            3|                 1|
|Solent and South ...|            3|                12|
+--------------------+-------------+------------------+



                                                                                

-------------------------------------------
Batch: 2
-------------------------------------------
+--------------------+-------------+------------------+
|          eaAreaName|severityLevel|severityLevelCount|
+--------------------+-------------+------------------+
|Solent and South ...|            4|                 2|
|Kent, South Londo...|            3|                 8|
|Hertfordshire and...|            3|                 6|
|Hertfordshire and...|            4|                 2|
|              Thames|            3|                32|
|         East Anglia|            3|                 2|
|Solent and South ...|            3|                24|
+--------------------+-------------+------------------+



                                                                                

-------------------------------------------
Batch: 3
-------------------------------------------
+--------------------+-------------+------------------+
|          eaAreaName|severityLevel|severityLevelCount|
+--------------------+-------------+------------------+
|Solent and South ...|            4|                 3|
|Kent, South Londo...|            3|                12|
|Hertfordshire and...|            3|                 9|
|Hertfordshire and...|            4|                 3|
|              Thames|            3|                48|
|         East Anglia|            3|                 3|
|Solent and South ...|            3|                36|
+--------------------+-------------+------------------+



                                                                                

-------------------------------------------
Batch: 4
-------------------------------------------
+--------------------+-------------+------------------+
|          eaAreaName|severityLevel|severityLevelCount|
+--------------------+-------------+------------------+
|Solent and South ...|            4|                 4|
|Kent, South Londo...|            3|                16|
|Hertfordshire and...|            3|                12|
|Hertfordshire and...|            4|                 4|
|              Thames|            3|                64|
|         East Anglia|            3|                 4|
|Solent and South ...|            3|                48|
+--------------------+-------------+------------------+



                                                                                

-------------------------------------------
Batch: 5
-------------------------------------------
+--------------------+-------------+------------------+
|          eaAreaName|severityLevel|severityLevelCount|
+--------------------+-------------+------------------+
|Solent and South ...|            4|                 5|
|Kent, South Londo...|            3|                20|
|Hertfordshire and...|            3|                15|
|Hertfordshire and...|            4|                 5|
|              Thames|            3|                80|
|         East Anglia|            3|                 5|
|Solent and South ...|            3|                60|
+--------------------+-------------+------------------+



                                                                                

-------------------------------------------
Batch: 6
-------------------------------------------
+--------------------+-------------+------------------+
|          eaAreaName|severityLevel|severityLevelCount|
+--------------------+-------------+------------------+
|Solent and South ...|            4|                 6|
|Kent, South Londo...|            3|                24|
|Hertfordshire and...|            3|                18|
|Hertfordshire and...|            4|                 6|
|              Thames|            3|                96|
|         East Anglia|            3|                 6|
|Solent and South ...|            3|                72|
+--------------------+-------------+------------------+



                                                                                

-------------------------------------------
Batch: 7
-------------------------------------------
+--------------------+-------------+------------------+
|          eaAreaName|severityLevel|severityLevelCount|
+--------------------+-------------+------------------+
|Solent and South ...|            4|                 7|
|Kent, South Londo...|            3|                28|
|Hertfordshire and...|            3|                21|
|Hertfordshire and...|            4|                 7|
|              Thames|            3|               112|
|         East Anglia|            3|                 7|
|Solent and South ...|            3|                84|
+--------------------+-------------+------------------+



                                                                                

-------------------------------------------
Batch: 8
-------------------------------------------
+--------------------+-------------+------------------+
|          eaAreaName|severityLevel|severityLevelCount|
+--------------------+-------------+------------------+
|Solent and South ...|            4|                 8|
|Kent, South Londo...|            3|                32|
|Hertfordshire and...|            3|                24|
|Hertfordshire and...|            4|                 8|
|              Thames|            3|               128|
|         East Anglia|            3|                 8|
|Solent and South ...|            3|                96|
+--------------------+-------------+------------------+

