## Most sold products to identify the top-selling items

In [0]:
df = spark.table("sales_transactions_silver")
result_df = df.groupBy("product") \
              .sum("quantity") \
              .withColumnRenamed("sum(quantity)", "no_of_products_sold") \
              .orderBy("no_of_products_sold", ascending=False)
result_df.write.mode("overwrite").saveAsTable("top_sold_products")
result_df.show()

+--------------------+-------------------+
|             product|no_of_products_sold|
+--------------------+-------------------+
|  Golden Gate Ginger|               3865|
|     Outback Oatmeal|               3733|
|Austin Almond Bis...|               3716|
|       Tokyo Tidbits|               3662|
|         Pearly Pies|               3595|
|       Orchard Oasis|               3586|
+--------------------+-------------------+



## Suppliers provide ingredients to the most franchises

In [0]:
df = spark.table("sales_franchises_silver")
result_df = df.groupBy("supplierID") \
              .count() \
              .withColumnRenamed("count", "no_of_franchises") \
              .orderBy("no_of_franchises", ascending=False)
result_df.write.mode("overwrite").saveAsTable("top_suppliers")
result_df.show()

+----------+----------------+
|supplierID|no_of_franchises|
+----------+----------------+
|   4000022|               1|
|   4000034|               1|
|   4000021|               1|
|   4000005|               1|
|   4000003|               1|
|   4000044|               1|
|   4000004|               1|
|   4000037|               1|
|   4000039|               1|
|   4000047|               1|
|   4000045|               1|
|   4000031|               1|
|   4000009|               1|
|   4000015|               1|
|   4000019|               1|
|   4000013|               1|
|   4000026|               1|
|   4000018|               1|
|   4000028|               1|
|   4000032|               1|
+----------+----------------+
only showing top 20 rows



## Total sales per month

In [0]:
from pyspark.sql.functions import month, sum, col, to_date, expr

df = spark.table("sales_transactions_silver")

df = df.withColumn("totalPrice", col("totalPrice").cast("double"))
df = df.withColumn("sales_month", expr("extract(month from dateTime)"))

result_df = df.groupBy("sales_month") \
              .agg(sum("totalPrice").alias("sales_amount")) \
              .orderBy("sales_amount", ascending=False)
result_df.write.mode("overwrite").saveAsTable("total_sales_per_month")
result_df.show()

+-----------+------------+
|sales_month|sales_amount|
+-----------+------------+
|          5|     66471.0|
+-----------+------------+

