In [0]:
from pyspark.sql.types import StructType, StringType, IntegerType

schema = StructType() \
    .add("order_id", StringType()) \
    .add("customer_id", StringType()) \
    .add("product", StringType()) \
    .add("quantity", IntegerType()) \
    .add("region", StringType())

initial_data = [
    ("1", "C101", "Laptop", 2, "South"),
    ("2", "C102", "Chair", 6, "North"),
    ("3", "C103", "Mobile", 1, "East"),
]

df = spark.createDataFrame(initial_data, schema)

df.write \
    .mode("overwrite") \
    .option("header", True) \
    .csv("dbfs:/tmp/stream/orders")


In [0]:
orders_stream = (
    spark.readStream
    .schema(schema)
    .option("header", True)
    .csv("dbfs:/tmp/stream/orders")
)


In [0]:
from pyspark.sql.functions import when

transformed_orders = orders_stream.withColumn(
    "bulk_order", when(orders_stream["quantity"] > 5, True).otherwise(False)
)


In [0]:
query = transformed_orders.writeStream \
    .outputMode("append") \
    .format("console") \
    .start()

query.awaitTermination()

query.stop()

com.databricks.backend.common.rpc.CommandCancelledException
	at com.databricks.spark.chauffeur.SequenceExecutionState.$anonfun$cancel$5(SequenceExecutionState.scala:132)
	at scala.Option.getOrElse(Option.scala:189)
	at com.databricks.spark.chauffeur.SequenceExecutionState.$anonfun$cancel$3(SequenceExecutionState.scala:132)
	at com.databricks.spark.chauffeur.SequenceExecutionState.$anonfun$cancel$3$adapted(SequenceExecutionState.scala:129)
	at scala.collection.immutable.Range.foreach(Range.scala:158)
	at com.databricks.spark.chauffeur.SequenceExecutionState.cancel(SequenceExecutionState.scala:129)
	at com.databricks.spark.chauffeur.ExecContextState.cancelRunningSequence(ExecContextState.scala:715)
	at com.databricks.spark.chauffeur.ExecContextState.$anonfun$cancel$1(ExecContextState.scala:435)
	at scala.Option.getOrElse(Option.scala:189)
	at com.databricks.spark.chauffeur.ExecContextState.cancel(ExecContextState.scala:435)
	at com.databricks.spark.chauffeur.ExecutionContextManagerV1.can

In [0]:
query = (
    transformed_orders.writeStream
    .format("memory")
    .queryName("orders_live")
    .outputMode("append")
    .start()
)


In [0]:
rate_df = (
    spark.readStream
    .format("rate")
    .option("rowsPerSecond", 1)
    .load()
)


In [0]:
from pyspark.sql.functions import col
transformed_df = rate_df.withColumn("is_even", (col("value") % 2 == 0))
query = (
    transformed_df.writeStream
    .format("memory")           # Memory table for SQL querying
    .queryName("rate_table")    # Table name to query
    .outputMode("append")       # Append new rows only
    .start()
)



In [0]:
spark.sql("SELECT * FROM rate_table").show()


+--------------------+-----+-------+
|           timestamp|value|is_even|
+--------------------+-----+-------+
|2025-08-08 11:08:...|    0|   true|
|2025-08-08 11:08:...|    1|  false|
|2025-08-08 11:08:...|    2|   true|
|2025-08-08 11:08:...|    3|  false|
|2025-08-08 11:08:...|    4|   true|
|2025-08-08 11:08:...|    5|  false|
|2025-08-08 11:08:...|    6|   true|
|2025-08-08 11:08:...|    7|  false|
|2025-08-08 11:08:...|    8|   true|
|2025-08-08 11:08:...|    9|  false|
|2025-08-08 11:08:...|   10|   true|
|2025-08-08 11:08:...|   11|  false|
|2025-08-08 11:09:...|   12|   true|
|2025-08-08 11:09:...|   13|  false|
|2025-08-08 11:09:...|   14|   true|
|2025-08-08 11:09:...|   15|  false|
|2025-08-08 11:09:...|   16|   true|
|2025-08-08 11:09:...|   17|  false|
|2025-08-08 11:09:...|   18|   true|
|2025-08-08 11:09:...|   19|  false|
+--------------------+-----+-------+
only showing top 20 rows


In [0]:
from pyspark.sql.functions import col
transformed_df = rate_df.withColumn(
    "size_label",
    when(col("value") < 25, "Small").otherwise("Big")
)
query = (
    transformed_df.writeStream
    .format("memory")           
    .queryName("Size_table")    
    .outputMode("append")       
    .start()
)


In [0]:
spark.sql("SELECT * FROM Size_table").show()

+--------------------+-----+----------+
|           timestamp|value|size_label|
+--------------------+-----+----------+
|2025-08-08 11:22:...|    0|     Small|
|2025-08-08 11:22:...|    1|     Small|
|2025-08-08 11:22:...|    2|     Small|
|2025-08-08 11:22:...|    3|     Small|
|2025-08-08 11:22:...|    4|     Small|
|2025-08-08 11:22:...|    5|     Small|
|2025-08-08 11:22:...|    6|     Small|
|2025-08-08 11:22:...|    7|     Small|
|2025-08-08 11:22:...|    8|     Small|
|2025-08-08 11:22:...|    9|     Small|
|2025-08-08 11:22:...|   10|     Small|
|2025-08-08 11:22:...|   11|     Small|
|2025-08-08 11:22:...|   12|     Small|
|2025-08-08 11:22:...|   13|     Small|
|2025-08-08 11:22:...|   14|     Small|
|2025-08-08 11:22:...|   15|     Small|
|2025-08-08 11:22:...|   16|     Small|
|2025-08-08 11:22:...|   17|     Small|
|2025-08-08 11:22:...|   18|     Small|
|2025-08-08 11:22:...|   19|     Small|
+--------------------+-----+----------+
only showing top 20 rows
