In [0]:
%sql
-- Creating external location to access data from datalake container

CREATE EXTERNAL LOCATION IF NOT EXISTS dea_course_ext_dl_demo
URL 'abfss://gizmobox@deacourseextdldemo.dfs.core.windows.net/'
WITH (STORAGE CREDENTIAL dea_course_ext_sc);

In [0]:
# listing folders and files available in datalake container

%fs ls
'abfss://gizmobox@deacourseextdldemo.dfs.core.windows.net'

In [0]:
# reading orders data from datalake container

orders_df = spark.read.format("json").option('inferSchema', True).load('abfss://gizmobox@deacourseextdldemo.dfs.core.windows.net/orders/')

In [0]:
# reading payments data from datalake container

payments_df = spark.read.format("csv").option('inferSchema', True).load('abfss://gizmobox@deacourseextdldemo.dfs.core.windows.net/payments/')

In [0]:
%sql
-- Creating external location to access sales data from datalake container

CREATE EXTERNAL LOCATION IF NOT EXISTS dea_course_ext_dl_sales
URL 'abfss://salesdemo@deacourseextdldemo.dfs.core.windows.net/'
WITH (STORAGE CREDENTIAL dea_course_ext_sc);

In [0]:
# reading sales data from datalake container

sales_df = spark.read.format('csv').option('inferSchema', True).option('header', True).load('abfss://salesdemo@deacourseextdldemo.dfs.core.windows.net/sales.csv')

In [0]:
# creating temporary view on sales_df

sales_df.createOrReplaceTempView('vw_sales')

In [0]:
%sql

-- running sql queries to get data from view

SELECT * FROM vw_sales;

In [0]:
%sql

-- getting month wise total sales

SELECT date_format(order_date, 'yyyy-MM') AS month,
       ROUND(SUM(sales_amount),2) AS total_sales
FROM vw_sales
GROUP BY month
ORDER BY month;

In [0]:
%sql

-- getting month on month sales growth

with sales as (SELECT date_format(order_date, 'yyyy-MM') AS month,
      LAG(ROUND(SUM(sales_amount),2)) OVER(ORDER BY date_format(order_date, 'yyyy-MM')) AS previous_month_sales,
       ROUND(SUM(sales_amount),2) AS current_month_sales
FROM vw_sales
GROUP BY month
ORDER BY month)
SELECT month,
       previous_month_sales,
       current_month_sales,
       case when 
       previous_month_sales is null then 0
       else ROUND((current_month_sales - previous_month_sales),2)
       end as sales_growth,
       case when 
       previous_month_sales is null then 0
       else ROUND(((current_month_sales - previous_month_sales) / previous_month_sales) * 100,2) end as sales_growth_percent
FROM sales