In [3]:
from pyspark.sql.types import StructType, StructField, IntegerType, StringType, DateType
from datetime import datetime
from pyspark.sql import SparkSession

from pyspark.sql.types import StructType, StructField, IntegerType, StringType, DoubleType
spark = SparkSession.builder.appName("PySparkTables").getOrCreate()
# customers data
customers_data = [
    (1, "Alice"),
    (2, "Bob"),
    (3, "Carol")
]

customers_schema = StructType([
    StructField("id", IntegerType(), False),
    StructField("first_name", StringType(), False)
])

customers_df = spark.createDataFrame(customers_data, customers_schema)
customers_df.createOrReplaceTempView("customers")
# orders data
orders_data = [
    (101, 1, "2019-02-05", 50.00),
    (102, 1, "2019-02-05", 70.00),
    (103, 2, "2019-02-05", 80.00),
    (104, 1, "2019-03-10", 120.00),
    (105, 3, "2019-04-20", 200.00),
    (106, 2, "2019-04-20", 150.00)
]

orders_data_typed = [
    (order_id, cust_id, datetime.strptime(order_date, "%Y-%m-%d").date(), total_cost)
    for order_id, cust_id, order_date, total_cost in orders_data
]

orders_schema = StructType([
    StructField("id", IntegerType(), False),
    StructField("cust_id", IntegerType(), False),
    StructField("order_date", DateType(), False),
    StructField("total_order_cost", DoubleType(), False)
])

orders_df = spark.createDataFrame(orders_data_typed, orders_schema)
orders_df.createOrReplaceTempView("orders")



In [13]:
spark.sql("""
    With cte as (
    select cust_id, order_date, SUM(total_order_cost) as total_per_day_cost 
    from orders group by cust_id, order_date
    having order_date between '2019-02-01' AND '2019-05-01'),
    cte2 as (
    select *,
    rank() over(partition by order_date order by total_per_day_cost desc) as rnk
    from cte)
    select c.first_name, c2.total_per_day_cost  from cte2 c2 join customers c on c.id = c2.cust_id where rnk = 1 order by order_date
    
    
""").show()

+----------+------------------+
|first_name|total_per_day_cost|
+----------+------------------+
|     Alice|             120.0|
|     Alice|             120.0|
|     Carol|             200.0|
+----------+------------------+

