### Approach #1 - sale_by_date_city

In [2]:
## Read Fact Data
df_fact_sal=spark.read.table("wwi_gold.fact_sale")
## Read Date Data
df_dimension_date=spark.read.table("wwi_gold.dimension_date")
## Read City Data
df_dimension_city=spark.read.table("wwi_gold.dimension_city")

StatementMeta(, 6d637527-7580-4d0d-8213-f1fdeb2be625, 4, Finished, Available, Finished)

In [5]:
## Join and Aggregate Fact,Date,City Data
sale_by_date_city = df_fact_sal.alias("sale")\
                               .join(df_dimension_date.alias("date"),df_fact_sal.InvoiceDateKey == df_dimension_date.Date,'inner')\
                               .join(df_dimension_city.alias("city"),df_fact_sal.CityKey == df_dimension_city.CityKey,'inner')\
                               .select("date.Date", "date.CalendarMonthLabel", "date.Day", "date.ShortMonth", "date.CalendarYear",\
                                        "city.City", "city.StateProvince", "city.SalesTerritory", "sale.TotalExcludingTax",\
                                        "sale.TaxAmount", "sale.TotalIncludingTax", "sale.Profit")\
                               .groupBy("date.Date", "date.CalendarMonthLabel", "date.Day", "date.ShortMonth",\
                                        "date.CalendarYear", "city.City", "city.StateProvince", "city.SalesTerritory")\
                               .sum("sale.TotalExcludingTax", "sale.TaxAmount", "sale.TotalIncludingTax", "sale.Profit")\
                               .withColumnRenamed("sum(TotalExcludingTax)", "SumOfTotalExcludingTax")\
                               .withColumnRenamed("sum(TaxAmount)", "SumOfTaxAmount")\
                               .withColumnRenamed("sum(TotalIncludingTax)", "SumOfTotalIncludingTax")\
                               .withColumnRenamed("sum(Profit)", "SumOfProfit")\
                               .orderBy("date.Date", "city.StateProvince", "city.City") 
##write Data to Delta Table
sale_by_date_city.write.mode("overwrite")\
                       .format("delta")\
                       .option("overwriteSchema",'true')\
                       .save("Tables/aggregate_sale_by_date_city")

StatementMeta(, 6d637527-7580-4d0d-8213-f1fdeb2be625, 7, Finished, Available, Finished)

### Approach #2 - sale_by_date_employee

In [7]:
%%sql
create or replace view sale_by_date_employee 
as
select d.Date, d.CalendarMonthLabel, d.Day, d.ShortMonth, d.CalendarYear,
c.City, c.StateProvince, c.SalesTerritory, sum(s.TotalExcludingTax) as SumOfTotalExcludingTax,
sum(s.TaxAmount) as SumOfTaxAmount, sum(s.TotalIncludingTax) as SumOfTotalIncludingTax, sum(s.Profit) as SumOfProfit
from
wwi_gold.fact_sale s
inner join wwi_gold.dimension_date d on s.InvoiceDateKey = d.Date
inner join wwi_gold.dimension_city c on s.CityKey = c.CityKey
group by d.Date, d.CalendarMonthLabel, d.Day, d.ShortMonth, d.CalendarYear,c.City, c.StateProvince, c.SalesTerritory
order by d.Date, c.StateProvince, c.City

StatementMeta(, 6d637527-7580-4d0d-8213-f1fdeb2be625, 9, Finished, Available, Finished)

<Spark SQL result set with 0 rows and 0 fields>

In [10]:
sale_by_date_employee = spark.sql("select * from sale_by_date_employee")
sale_by_date_employee.write.mode("overwrite").format("delta").option("overwriteSchema","true").save("Tables/aggregate_sale_by_date_employee")

StatementMeta(, 6d637527-7580-4d0d-8213-f1fdeb2be625, 12, Finished, Available, Finished)