###Importing the modules

In [0]:
from pyspark.sql.functions import *
from pyspark.sql.types import *

###Using catalog and schema for creating views

In [0]:
%sql
use catalog retail_analytics;
use schema gold;

###Views for total_sales

In [0]:
spark.sql(f"""
    CREATE OR REPLACE VIEW vw_total_sales AS
SELECT
  SUM(sales_amount) AS total_sales
FROM fact_sales;
""")

DataFrame[]

###Views for daily_sales

In [0]:
spark.sql(f"""
    CREATE OR REPLACE VIEW vw_daily_sales AS
SELECT
  d.date,
  SUM(f.sales_amount) AS daily_sales
FROM fact_sales f
JOIN dim_date d
ON f.date_sk = d.date_sk
GROUP BY d.date
ORDER BY d.date;
""")

DataFrame[]

###Views for total_returns

In [0]:
spark.sql(f"""
    CREATE OR REPLACE VIEW vw_total_returns AS
SELECT
  abs(round(SUM(refund_amount), 2)) AS total_returns
FROM fact_returns;
""")

DataFrame[]

###Views for store_performance

In [0]:
spark.sql(f"""
    CREATE OR REPLACE VIEW vw_store_sales AS
SELECT
  st.store_name,
  st.city,
  SUM(f.sales_amount) AS store_sales
FROM fact_sales f
JOIN dim_stores st
ON f.store_sk = st.store_sk
GROUP BY st.store_name, st.city
ORDER BY store_sales DESC;
""")

DataFrame[]

###Views for top_selling_products

In [0]:
spark.sql(f"""
    CREATE OR REPLACE VIEW vw_top_products AS
SELECT
  p.product_id,
  SUM(f.quantity_sold) AS units_sold,
  round(SUM(f.sales_amount), 2) AS revenue
FROM fact_sales f
JOIN dim_products p
ON f.product_sk = p.product_sk
GROUP BY p.product_id, p.description_en
ORDER BY revenue DESC
LIMIT 10;
""")

DataFrame[]

###Views for customer_lifetime_value

In [0]:
spark.sql(f"""
    CREATE OR REPLACE VIEW vw_top_customers AS
SELECT
  c.customer_id,
  c.name,
  SUM(f.sales_amount) AS lifetime_value
FROM retail_analytics.gold.fact_sales f
JOIN retail_analytics.gold.dim_customers c
ON f.customer_sk = c.customer_sk
GROUP BY c.customer_id, c.name
ORDER BY lifetime_value DESC
LIMIT 20;
""")

DataFrame[]

###Views for top_1_customer

In [0]:
spark.sql(f"""
    CREATE OR REPLACE VIEW vw_top_one_customer AS
SELECT
  c.customer_id,
  c.name,
  SUM(f.sales_amount) AS lifetime_value
FROM retail_analytics.gold.fact_sales f
JOIN retail_analytics.gold.dim_customers c
ON f.customer_sk = c.customer_sk
GROUP BY c.customer_id, c.name
ORDER BY lifetime_value DESC
LIMIT 1;
""")

DataFrame[]

###Views for customer_by_country

In [0]:
spark.sql(f"""
    CREATE OR REPLACE VIEW vw_customer_by_country AS
SELECT
  country,
  COUNT(*) AS customers
FROM retail_analytics.gold.dim_customers
GROUP BY country
ORDER BY customers DESC;
""")

DataFrame[]

###Views sales for Category

In [0]:
spark.sql(f"""
    CREATE OR REPLACE VIEW vw_total_sales_category AS
SELECT
  p.category,
  SUM(f.sales_amount) AS total_sales
FROM retail_analytics.gold.fact_sales f
JOIN retail_analytics.gold.dim_products p
ON f.product_sk = p.product_sk
WHERE lower(p.category) IN ('masculine','feminine','children')
GROUP BY p.category
ORDER BY total_sales DESC;
""")

DataFrame[]

###Views for total_customers

In [0]:
spark.sql(f"""
    CREATE OR REPLACE VIEW vw_total_customers AS
SELECT COUNT(*) AS total_customers
FROM retail_analytics.gold.dim_customers;
""")

DataFrame[]

###Views for total_customers_return_rate

In [0]:
spark.sql(f"""
    CREATE OR REPLACE VIEW vw_customer_return AS
SELECT
  ROUND(
    (SELECT COUNT(DISTINCT invoice_id) FROM retail_analytics.gold.fact_returns) * 100.0 /
    (SELECT COUNT(DISTINCT invoice_id) FROM retail_analytics.gold.fact_sales),
    2
  ) AS return_rate_percentage;
""")

DataFrame[]

###Views for total_store

In [0]:
spark.sql(f"""
    CREATE OR REPLACE VIEW vw_total_stores AS
SELECT COUNT(*) AS total_stores
FROM retail_analytics.gold.dim_stores;
""")

DataFrame[]

###Views for top_store

In [0]:
spark.sql(f"""
    CREATE OR REPLACE VIEW vw_top_store AS
SELECT
  st.store_name AS top_store_name
FROM retail_analytics.gold.fact_sales f
JOIN retail_analytics.gold.dim_stores st
ON f.store_sk = st.store_sk
GROUP BY st.store_name
ORDER BY SUM(f.sales_amount) DESC
LIMIT 1;
""")

DataFrame[]

###Views for least_selling_store

In [0]:
spark.sql(f"""
    CREATE OR REPLACE VIEW vw_least_store AS
SELECT
  st.store_name AS least_performance_store
FROM retail_analytics.gold.fact_sales f
JOIN retail_analytics.gold.dim_stores st
ON f.store_sk = st.store_sk
GROUP BY st.store_name
ORDER BY SUM(f.sales_amount) ASC
LIMIT 1;
""")

DataFrame[]

###Views for product_category_sales

In [0]:
spark.sql(f"""
    CREATE OR REPLACE VIEW vw_category_sales AS
SELECT
  p.category,
  SUM(f.sales_amount) AS category_sales
FROM retail_analytics.gold.fact_sales f
JOIN retail_analytics.gold.dim_products p
ON f.product_sk = p.product_sk
GROUP BY p.category
ORDER BY category_sales DESC;
""")

DataFrame[]

###Views for least_category

In [0]:
spark.sql(f"""
    CREATE OR REPLACE VIEW vw_least_category AS
SELECT
  p.product_id,
  p.description_en,
  SUM(f.sales_amount) AS revenue
FROM retail_analytics.gold.fact_sales f
JOIN retail_analytics.gold.dim_products p
ON f.product_sk = p.product_sk
GROUP BY p.product_id, p.description_en
ORDER BY revenue ASC
LIMIT 10;
""")

DataFrame[]

###Views for store_sales

In [0]:
spark.sql(f"""
    CREATE OR REPLACE VIEW vw_store_sales AS
SELECT
  SUM(f.sales_amount) AS top_store_sales_value
FROM retail_analytics.gold.fact_sales f
JOIN retail_analytics.gold.dim_stores st
ON f.store_sk = st.store_sk
GROUP BY st.store_name
ORDER BY top_store_sales_value DESC
LIMIT 1;
""")

DataFrame[]

###Views for toal_orders

In [0]:
spark.sql(f"""
    CREATE OR REPLACE VIEW vw_total_orders AS
SELECT COUNT(DISTINCT invoice_id) AS total_orders
FROM retail_analytics.gold.fact_sales;
""")

DataFrame[]

###Views for avg_order_value

In [0]:
spark.sql(f"""
    CREATE OR REPLACE VIEW vw_avg_order AS
SELECT
  ROUND(SUM(sales_amount) / COUNT(DISTINCT invoice_id), 2) AS avg_order_value
FROM retail_analytics.gold.fact_sales;
""")

DataFrame[]

###Views for net_sales

In [0]:
spark.sql(f"""
    CREATE OR REPLACE VIEW vw_net_sales AS
SELECT
  (SELECT SUM(sales_amount) FROM retail_analytics.gold.fact_sales)
  -
  (SELECT COALESCE(abs(SUM(refund_amount)), 0) FROM retail_analytics.gold.fact_returns)
  AS net_sales;
""")

DataFrame[]

###Views for yearly_sales

In [0]:
spark.sql(f"""
    CREATE OR REPLACE VIEW vw_yearly_sales AS
SELECT
    d.year,
    SUM(f.sales_amount) AS yearly_sales
FROM retail_analytics.gold.fact_sales f
JOIN retail_analytics.gold.dim_date d
ON f.date_sk = d.date_sk
GROUP BY d.year
ORDER BY d.year;
""")

DataFrame[]