In [2]:
from pyspark.sql import SparkSession
from pyspark.sql.types import StructType, StructField, IntegerType, DateType, StringType
from pyspark.sql.functions import to_date

spark = SparkSession.builder.appName("ProductsTable").getOrCreate()

schema_queue = StructType([
    StructField("person_id", IntegerType(), True),
    StructField("person_name", StringType(), True),
    StructField("weight", IntegerType(), True),
    StructField("turn", IntegerType(), True)
])

# Create data for Queue table
data_queue = [
    (5, "Alice", 250, 1),
    (4, "Bob", 175, 5),
    (3, "Alex", 350, 2),
    (6, "John Cena", 400, 3),
    (1, "Winston", 500, 6),
    (2, "Marie", 200, 4)
]

# Create DataFrame for Queue
df_queue = spark.createDataFrame(data_queue, schema=schema_queue)

# Register DataFrame as a SQL temporary table
df_queue.createOrReplaceTempView("Queue")

In [16]:
spark.sql("""
    with cum_sum as (select person_id, person_name, turn,
    SUM(Weight) OVER(ORDER BY turn range between unbounded preceding and current row) AS total_weight
    from Queue)
    SELECT person_name
    FROM cum_sum
    WHERE total_weight <= 1000
    ORDER BY total_weight DESC
    LIMIT 1
""").show()

+-----------+
|person_name|
+-----------+
|  John Cena|
+-----------+



In [20]:
from pyspark.sql.functions import *
from pyspark.sql import Window
windowSpec = Window.orderBy("turn").rangeBetween(Window.unboundedPreceding, Window.currentRow)
cum_sum  = df_queue.withColumn('cum_weight', sum(col('Weight')).over(windowSpec))

In [26]:
cum_sum.filter(col("cum_weight")<=1000).orderBy(col("cum_weight").desc()).select("person_name").limit(1).show()

+-----------+
|person_name|
+-----------+
|  John Cena|
+-----------+

