# Monthly order summary
For each of the customers, produce the following summary per month
1. total orders
2. total items bought
3. total amount spent

In [0]:
# %sql
# select *
# from gizmobox.silver.orders
# limit 10;

import pyspark.sql.functions as F
import pyspark.sql.types as T

df_orders = spark.read.table("gizmobox.silver.py_orders")

In [0]:
# %sql
# select
#   customer_id,
#   date_format(transaction_timestamp, 'yyyy-MM') as order_month,
#   count(distinct order_id) as total_orders,
#   sum(quantity) as total_items_bought,
#   sum(price * quantity) as total_amount_spent
# from gizmobox.silver.orders
# group by customer_id, date_format(transaction_timestamp, 'yyyy-MM')
# order by order_month DESC, customer_id
# limit 10;

df_orders_monthly = df_orders.withColumn('order_month', F.date_format(F.col('transaction_timestamp'), 'yyyy-MM'))\
                              .groupby('order_month', 'customer_id')\
                              .agg(
                                F.countDistinct('order_id').alias('total_orders'),
                                F.sum('quantity').alias('total_items_bought'),
                                F.sum(F.col('price') * F.col('quantity')).alias('total_amount_spent')
                              )\
                              .select('customer_id', 'order_month', 'total_orders', 'total_items_bought', 'total_amount_spent')\
                              .orderBy(F.col('order_month').desc(), 'customer_id')
display(df_orders_monthly)
                                      


In [0]:
df_orders_monthly.writeTo('gizmobox.gold.py_order_summary_monthly').createOrReplace()

In [0]:
%sql
select *
from gizmobox.gold.py_order_summary_monthly
limit 10;