In [0]:
from pyspark.sql import SparkSession
import pandas as pd
from datetime import datetime

In [0]:
spark = SparkSession.builder.getOrCreate()

# Sales by Customer
- In order to view this data, the customer table needs to be joined with the orders table on customerid.
- Then, the results need to be joined with the sales table on orderid
- Lastly, it will be grouped by the customerid and name to get the sum of the sales per customer
- The result of the query gets loaded into the gold layer in unity catalog

In [0]:
%sql
create or replace table md_project.gold.customer_sales using delta as (
  SELECT c.customerid 
  , c.name
  , sum(s.quantity) as total_sales
  FROM md_project.silver.customers c
  left join md_project.silver.orders o
  on c.customerid = o.customerid
  left join md_project.silver.sales s
  on s.orderid = o.orderid
  group by c.customerid, c.name
)

# Sales by Geo-location
- In order to get the sales by country or region, the sales table needs to join with the orders table on order id
- Then, orders table needs to join with the customers table on customerid
- Lastly, the customers table needs to join with the countries table on country
- The result of the query gets loaded into the gold layer in unity catalog

In [0]:
%sql
create or replace table md_project.gold.location_sales using delta as (
  Select s.orderid
  , s.quantity 
  , c.country
  , c2.region
  , c2.name
  from md_project.silver.sales s
  left join md_project.silver.orders o
  on s.orderid = o.orderid
  left join md_project.silver.customers c
  on c.customerid = o.customerid
  left join md_project.silver.countries c2
  on c.country = c2.country
)
