In [None]:
# Spark Streaming

In [None]:
from pyspark.sql.types import StructType, StringType, DoubleType, TimestampType
import os
import json
import pyspark.sql.functions as F
import time

source_path = "Files/streaming/source"
checkpoint_path = "Files/streaming/checkpoint"
schema_name = "streaming"
table_name = "temperature_stream"

# Schema for incoming JSON data
file_schema = StructType() \
    .add("id", StringType()) \
    .add("temperature", DoubleType()) \
    .add("timestamp", TimestampType())

In [None]:
# Read streaming data to unbounded table/dataframe
raw_stream_df = spark.readStream \
    .schema(file_schema) \
    .option("maxFilesPerTrigger", 1) \
    .json(source_path)

# Example transformation that adds a processed_timestamp column to the data
transformed_stream_df = raw_stream_df \
    .withColumn("processed_timestamp", \
    F.current_timestamp())

# Stream data to a delta table
deltastream = transformed_stream_df.writeStream \
            .format("delta") \
            .outputMode("append") \
            .option("checkpointLocation", checkpoint_path) \
            .start(f"Tables/{schema_name}/{table_name}")