### Importing the Needed Modules

In [0]:
from pyspark.sql.functions import *
from pyspark.sql.types import *

import sys
sys.path.append('/Workspace/Users/mohammedthoufiq9360@gmail.com/Retail-And-Ecommerce-Analytics-Platform')

from src.paths import (
    FACT_SALES_PATH, FACT_RETURNS_PATH, DIM_CUSTOMERS_PATH, DIM_PRODUCTS_PATH, DIM_STORES_PATH, DIM_EMPLOYEES_PATH, DIM_DATES_PATH, DIM_DISCOUNTS_PATH,
    TOTAL_ORDERS_COUNT_PATH, TOTAL_REFUND_AMOUNT_PATH, TOTAL_UNITS_SOLD_PATH, MONTHLY_UNITS_SOLD_PATH, TOTAL_REVENUE_PATH, 
    RETURNS_RATE_PATH, MONTHLY_REVENUE_PATH, REVENUE_CATEGORY_PATH,
    REVENUE_SUB_CATEGORY_PATH, REVENUE_COUNTRY_PATH, TOP_CUSTOMERS_REVENUE_PATH, TOP_PRODUCTS_REVENUE_PATH, 
    MONTHLY_REFUND_AMOUNT_PATH, RETURN_RATE_CATEGORY_PATH, TOP_RETURNED_PRODUCT_PATH, REVENUE_DISCOUNT
)

### Creating View for Total Number of Orders Count

In [0]:
spark.sql(f"""
    CREATE OR REPLACE VIEW {TOTAL_ORDERS_COUNT_PATH} AS
    SELECT
    COUNT(DISTINCT invoice_id) AS total_orders
    FROM
    {FACT_SALES_PATH}
""")

DataFrame[]

### Creating View for Total Refund Amount

In [0]:
spark.sql(f"""
    CREATE OR REPLACE VIEW {TOTAL_REFUND_AMOUNT_PATH} AS
    SELECT
        abs(round(SUM(refund_amount), 2)) AS total_refunds
    FROM {FACT_RETURNS_PATH}
""")

DataFrame[]

### Creating View for Total Number of Units Sold

In [0]:
spark.sql(f"""
    CREATE OR REPLACE VIEW {TOTAL_UNITS_SOLD_PATH} AS
    SELECT
        sum(quantity) AS units_sold
    FROM {FACT_SALES_PATH};
""")

DataFrame[]

### Creating View for Total Revenue 

In [0]:
spark.sql(f"""
    CREATE OR REPLACE VIEW {TOTAL_REVENUE_PATH} AS
SELECT
  round(SUM(line_total), 2) AS total_revenue
FROM {FACT_SALES_PATH}
""")

DataFrame[]

### Creating View for Returns rate

In [0]:
spark.sql(f"""
    CREATE OR REPLACE VIEW {RETURNS_RATE_PATH} AS
    WITH sold AS (
        SELECT SUM(quantity) AS total_sold
        FROM {FACT_SALES_PATH}
    ),
    returned AS (
        SELECT SUM(quantity_returned) AS total_returned
        FROM {FACT_RETURNS_PATH}
    )
    SELECT
        concat(ROUND(
            returned.total_returned * 100.0 / sold.total_sold,
        2
        ), "%") AS return_rate_pct
    FROM sold, returned;
""")

DataFrame[]

### Creating View for Monthly Revenue

In [0]:
spark.sql(f"""
    CREATE OR REPLACE VIEW {MONTHLY_REVENUE_PATH} AS
    SELECT
        d.year,
        d.month,
        d.month_name,
        concat(d.year,' ', d.month_name) AS month_year, 
        round(SUM(f.line_total), 2) AS revenue
    FROM {FACT_SALES_PATH} f
    JOIN {DIM_DATES_PATH} d
        ON f.date_sk = d.date_sk
    GROUP BY d.year, d.month, d.month_name
    ORDER BY d.year, d.month;
""")

DataFrame[]

### Creating View for Revenue by Category

In [0]:
spark.sql(f"""
    CREATE OR REPLACE VIEW {REVENUE_CATEGORY_PATH} AS
    SELECT
        p.category,
        round(SUM(f.line_total), 2) AS revenue
    FROM {FACT_SALES_PATH} f
    JOIN {DIM_PRODUCTS_PATH} p
        ON f.product_sk = p.product_sk
    GROUP BY p.category
    ORDER BY revenue DESC;
""")

DataFrame[]

### Creating View for Revenue by Sub Category

In [0]:
spark.sql(f"""
    CREATE OR REPLACE VIEW {REVENUE_SUB_CATEGORY_PATH} AS
    SELECT
        p.sub_category,
        round(SUM(f.line_total), 2) AS revenue
    FROM {FACT_SALES_PATH} f
    JOIN {DIM_PRODUCTS_PATH} p
        ON f.product_sk = p.product_sk
    GROUP BY p.sub_category
    ORDER BY revenue DESC;
""")

DataFrame[]

### Creating View for Revenue by Country

In [0]:
spark.sql(f"""
    CREATE OR REPLACE VIEW {REVENUE_COUNTRY_PATH} AS
    SELECT
        s.country,
        round(SUM(f.line_total), 2) AS revenue
    FROM {FACT_SALES_PATH} f
    JOIN {DIM_STORES_PATH} s
        ON f.store_sk = s.store_sk
    GROUP BY s.country
    ORDER BY revenue DESC;
""")

DataFrame[]

### Creating View for Total Number of Units Sold(monthly)

In [0]:
spark.sql(f"""
    CREATE OR REPLACE VIEW {MONTHLY_UNITS_SOLD_PATH} AS
    SELECT
        d.year,
        d.month,
        d.month_name,
        concat(d.year,' ', d.month_name) AS month_year, 
        SUM(f.quantity) AS units_sold
    FROM {FACT_SALES_PATH} f
    JOIN {DIM_DATES_PATH} d
        ON f.date_sk = d.date_sk
    GROUP BY d.year, d.month, d.month_name
    ORDER BY d.year, d.month;
""")

DataFrame[]

### Creating View for Top Customer by Revenue

In [0]:
spark.sql(f"""
    CREATE OR REPLACE VIEW {TOP_CUSTOMERS_REVENUE_PATH} AS
    SELECT
        c.customer_id,
        c.name,
        SUM(f.line_total) AS total_revenue
    FROM {FACT_SALES_PATH} f
    JOIN {DIM_CUSTOMERS_PATH} c
        ON f.customer_sk = c.customer_sk
    GROUP BY c.customer_id, c.name
    ORDER BY total_revenue DESC	
    LIMIT 20;
""")

DataFrame[]

### Creating View for Top Products by Revenue

In [0]:
spark.sql(f"""
    CREATE OR REPLACE VIEW {TOP_PRODUCTS_REVENUE_PATH} AS
    SELECT
        p.product_id,
        p.category,
        p.sub_category,
        SUM(f.line_total) AS revenue
    FROM {FACT_SALES_PATH} f
    JOIN {DIM_PRODUCTS_PATH} p
        ON f.product_sk = p.product_sk
    GROUP BY p.product_id, p.category, p.sub_category
    ORDER BY revenue DESC
    LIMIT 10;
""")

DataFrame[]

### Creating View for Refund Amount(monthly)

In [0]:
spark.sql(f"""
    CREATE OR REPLACE VIEW {MONTHLY_REFUND_AMOUNT_PATH} AS
    SELECT
        d.year,
        d.month,
        d.month_name,
        concat(d.year, " ", d.month_name) as month_year,
        round(abs(SUM(r.refund_amount)), 2) AS refund_amount
    FROM {FACT_RETURNS_PATH} r
    JOIN {DIM_DATES_PATH} d
        ON r.date_sk = d.date_sk
    GROUP BY d.year, d.month, d.month_name
    ORDER BY d.year, d.month;
""")

DataFrame[]

### Creating View for Return Rate by Category

In [0]:
spark.sql(f"""
    CREATE OR REPLACE VIEW {RETURN_RATE_CATEGORY_PATH} AS
    WITH sales_cat AS (
    SELECT
        p.category,
        SUM(s.quantity) AS qty_sold
    FROM {FACT_SALES_PATH} s
    JOIN {DIM_PRODUCTS_PATH} p
        ON s.product_sk = p.product_sk
    GROUP BY p.category
    ),
    returns_cat AS (
    SELECT
        p.category,
        SUM(r.quantity_returned) AS qty_returned
    FROM {FACT_RETURNS_PATH} r
    JOIN {DIM_PRODUCTS_PATH} p
        ON r.product_sk = p.product_sk
    GROUP BY p.category
    )
    SELECT
    s.category,
    ROUND(
        COALESCE(r.qty_returned, 0) * 100.0 / s.qty_sold,
        2
    ) AS return_rate_pct
    FROM sales_cat s
    LEFT JOIN returns_cat r
        ON s.category = r.category
    ORDER BY return_rate_pct DESC;
""")

DataFrame[]

### Creating View for Top Returned Products

In [0]:
spark.sql(f"""
    CREATE OR REPLACE VIEW {TOP_RETURNED_PRODUCT_PATH} AS
    SELECT
        p.product_id,
        p.category,
        p.sub_category,
        SUM(r.quantity_returned) AS units_returned
    FROM {FACT_RETURNS_PATH} r
    JOIN {DIM_PRODUCTS_PATH} p
        ON r.product_sk = p.product_sk
    GROUP BY p.product_id, p.category, p.sub_category
    ORDER BY units_returned DESC
    LIMIT 10;
""")

DataFrame[]

### Creating View for Discount Effectivenes

In [0]:
spark.sql(f"""
    CREATE OR REPLACE VIEW {REVENUE_DISCOUNT} AS
    SELECT
        concat(discount * 100, "%") AS discount_pct,
        round(SUM(f.line_total), 2) AS revenue
    FROM {FACT_SALES_PATH} f
    JOIN {DIM_DISCOUNTS_PATH} d
        ON f.discount_sk = d.discount_sk
    GROUP BY d.discount
    ORDER BY d.discount;
    """)

DataFrame[]