Skip to content

Commit 1fe331e

Browse files
Initial Commit
1 parent e1b41d2 commit 1fe331e

File tree

4 files changed

+31
-27
lines changed

4 files changed

+31
-27
lines changed
Lines changed: 20 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,55 +1,53 @@
11
from pyspark.sql import SparkSession
2-
from pyspark.sql.functions import from_json, col, to_timestamp, window
2+
from pyspark.sql.functions import from_json, col, to_timestamp, window, max
33
from pyspark.sql.types import StructType, StructField, StringType, DoubleType
44

55
from lib.logger import Log4j
66

77
if __name__ == "__main__":
88
spark = SparkSession \
99
.builder \
10-
.appName("Sliding Window Demo") \
1110
.master("local[3]") \
11+
.appName("Sliding Window Demo") \
1212
.config("spark.streaming.stopGracefullyOnShutdown", "true") \
13+
.config("spark.sql.shuffle.partitions", 1) \
1314
.getOrCreate()
1415

1516
logger = Log4j(spark)
1617

1718
invoice_schema = StructType([
18-
StructField("InvoiceNumber", StringType()),
1919
StructField("CreatedTime", StringType()),
20-
StructField("StoreID", StringType()),
21-
StructField("TotalAmount", DoubleType())
20+
StructField("Reading", DoubleType())
2221
])
2322

24-
kafka_df = spark.readStream \
23+
kafka_source_df = spark \
24+
.readStream \
2525
.format("kafka") \
2626
.option("kafka.bootstrap.servers", "localhost:9092") \
27-
.option("subscribe", "invoices") \
27+
.option("subscribe", "sensor") \
2828
.option("startingOffsets", "earliest") \
2929
.load()
3030

31-
value_df = kafka_df.select(from_json(col("value").cast("string"), invoice_schema).alias("value"))
32-
33-
# value_df.printSchema()
34-
# value_df.show(truncate=False)
35-
36-
invoice_df = value_df.select("value.*") \
37-
.withColumn("CreatedTime", to_timestamp("CreatedTime", "yyyy-MM-dd HH:mm:ss"))
31+
value_df = kafka_source_df.select(col("key").cast("string").alias("SensorID"),
32+
from_json(col("value").cast("string"), invoice_schema).alias("value"))
3833

39-
count_df = invoice_df.groupBy("StoreID",
40-
window("CreatedTime", "5 minute", "1 minute")).count()
34+
sensor_df = value_df.select("SensorID", "value.*") \
35+
.withColumn("CreatedTime", to_timestamp(col("CreatedTime"), "yyyy-MM-dd HH:mm:ss"))
4136

42-
# count_df.printSchema()
43-
# count_df.show(truncate=False)
37+
agg_df = sensor_df \
38+
.withWatermark("CreatedTime", "30 minute") \
39+
.groupBy(col("SensorID"),
40+
window(col("CreatedTime"), "15 minute", "5 minute")) \
41+
.agg(max("Reading").alias("MaxReading"))
4442

45-
output_df = count_df.select("StoreID", "window.start", "window.end", "count")
43+
output_df = agg_df.select("SensorID", "window.start", "window.end", "MaxReading")
4644

47-
windowQuery = output_df.writeStream \
45+
window_query = output_df.writeStream \
4846
.format("console") \
4947
.outputMode("update") \
5048
.option("checkpointLocation", "chk-point-dir") \
5149
.trigger(processingTime="1 minute") \
5250
.start()
5351

54-
logger.info("Counting Invoices")
55-
windowQuery.awaitTermination()
52+
logger.info("Waiting for Query")
53+
window_query.awaitTermination()
Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,9 @@
1-
STR1534:{"InvoiceNumber": 101,"CreatedTime": "2019-02-05 10:01:00","StoreID": "STR1534", "TotalAmount": 1920}
2-
STR1534:{"InvoiceNumber": 103,"CreatedTime": "2019-02-05 10:03:19","StoreID": "STR1534", "TotalAmount": 2400}
3-
STR1534:{"InvoiceNumber": 105,"CreatedTime": "2019-02-05 10:07:50","StoreID": "STR1534", "TotalAmount": 6375}
1+
SET41:{"CreatedTime": "2019-02-05 09:54:00","Reading": 36.2}
2+
SET41:{"CreatedTime": "2019-02-05 09:59:00","Reading": 36.5}
3+
SET41:{"CreatedTime": "2019-02-05 10:04:00","Reading": 36.8}
4+
SET41:{"CreatedTime": "2019-02-05 10:09:00","Reading": 36.2}
5+
SET41:{"CreatedTime": "2019-02-05 10:14:00","Reading": 36.5}
6+
SET41:{"CreatedTime": "2019-02-05 10:19:00","Reading": 36.3}
7+
SET41:{"CreatedTime": "2019-02-05 10:24:00","Reading": 37.7}
8+
SET41:{"CreatedTime": "2019-02-05 10:29:00","Reading": 37.2}
9+
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
%KAFKA_HOME%\bin\windows\kafka-topics.bat --create --zookeeper localhost:2181 --replication-factor 1 --partitions 1 --topic invoices
1+
%KAFKA_HOME%\bin\windows\kafka-topics.bat --create --zookeeper localhost:2181 --replication-factor 1 --partitions 1 --topic sensor
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
%KAFKA_HOME%\bin\windows\kafka-console-producer.bat --broker-list localhost:9092 --topic invoices --property "parse.key=true" --property "key.separator=:"
1+
%KAFKA_HOME%\bin\windows\kafka-console-producer.bat --broker-list localhost:9092 --topic sensor --property "parse.key=true" --property "key.separator=:"

0 commit comments

Comments
 (0)