In [None]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import from_json, col, expr
from pyspark.sql.types import StructType, IntegerType, StringType, TimestampType

spark = SparkSession.builder \
    .appName("StreamStreamJoin") \
    .master("local[*]") \
    .config("spark.jars.packages", "org.apache.spark:spark-sql-kafka-0-10_2.12:3.5.0") \
    .getOrCreate()

# Schematy danych
orders_schema = StructType() \
    .add("order_id", IntegerType()) \
    .add("user_id", IntegerType()) \
    .add("amount", IntegerType()) \
    .add("timestamp", TimestampType())

users_schema = StructType() \
    .add("user_id", IntegerType()) \
    .add("user_name", StringType()) \
    .add("timestamp", TimestampType())

# Strumień zamówień
orders = spark.readStream \
    .format("kafka") \
    .option("kafka.bootstrap.servers", "kafka_streaming_lab:9092") \
    .option("subscribe", "spark-lab5-topic-orders") \
    .option("startingOffsets", "latest") \
    .load()

orders_df = orders.selectExpr("CAST(value AS STRING)") \
    .select(from_json(col("value"), orders_schema).alias("order")) \
    .select("order.*") \
    .withWatermark("timestamp", "30 seconds")

# Strumień użytkowników
users = spark.readStream \
    .format("kafka") \
    .option("kafka.bootstrap.servers", "kafka_streaming_lab:9092") \
    .option("subscribe", "spark-lab5-topic-users") \
    .option("startingOffsets", "latest") \
    .load()

users_df = users.selectExpr("CAST(value AS STRING)") \
    .select(from_json(col("value"), users_schema).alias("user")) \
    .select("user.*") \
    .withWatermark("timestamp", "30 seconds")

# Strumieniowe łączenie po user_id
joined = orders_df.join(
    users_df,
    expr("""
        orders_df.user_id = users_df.user_id AND
        orders_df.timestamp BETWEEN users_df.timestamp - interval 15 seconds AND users_df.timestamp + interval 15 seconds
    """)
)

# Wypisz wynik na konsolę
query = joined.writeStream \
    .outputMode("append") \
    .format("console") \
    .option("truncate", "false") \
    .start()

query.awaitTermination()
