# GOLD - SCD CUMULATIVE TABLE
# Purpose: CREATE A CUMULATIVE SCD OF ALL DATA WITH BUSINESS LEVEL DATA QUALITY. ADDS COLUMN WHICH REFLECTS THE VALUE OF AN "AFFORDABLE" HOUSING FOR EACH QUARTILE IN THE US
# Source: DELTA TABLE
# Output: silver.cummulative_property_owners (DELTA TABLE)

## CONFIG/PARAMETERS

In [0]:
%sql
USE CATALOG harris_county_catalog

## SCD LOGIC

Important information:
- business level data quality (market value cannot be 0 and no invalid zip codes ("00000"))

In [0]:
%sql
CREATE TABLE IF NOT EXISTS gold.cummulative_scd AS
WITH business_logic_data_quality_enforcement (
  SELECT *
  FROM silver.cummulative_property_owners
  WHERE m_total_market_value <> 0 
  AND dim_zip_code <> '00000' -- 00000 is the only invalid zip code
)
, addition_of_quartile_column as (
  SELECT 
    dim_account_number
    , m_total_market_value
    , dim_year_date
    , (
      CASE 
        WHEN m_total_market_value < 41401 * 4
          THEN "<25%"
        WHEN m_total_market_value < 83592 * 4
          THEN "25-50%"
        WHEN m_total_market_value < 153000 * 4
          THEN "50-75%"
        ELSE "75%>"
      END
  ) as quartile
  FROM business_logic_data_quality_enforcement
)
, lag_of_previous_quartile(
  SELECT 
    *
    , lag(quartile, 1) OVER(PARTITION BY dim_account_number ORDER BY dim_year_date) AS previous_year_quartile
  FROM addition_of_quartile_column
)
, change_of_quartile_indicator(
  SELECT
    *
    , CASE 
        WHEN quartile <> previous_year_quartile
          THEN 1
        ELSE 0 
    END as change_indicators
  FROM lag_of_previous_quartile
)
, categorization_by_changes_in_quartile(
  SELECT 
    *
    , SUM(change_indicators) OVER(PARTITION BY dim_account_number ORDER BY dim_year_date) AS sum_indicator
  FROM change_of_quartile_indicator
)
, cumulative_scd_categorization_of_properties(
  SELECT 
    dim_account_number
    , quartile
    , MIN(dim_year_date) as start_date
    , MAX(dim_year_date) as end_date
  FROM categorization_by_changes_in_quartile
  GROUP BY dim_account_number, quartile, sum_indicator
  ORDER BY dim_account_number, start_date
)
SELECT 
  *
FROM cumulative_scd_categorization_of_properties

[0;31m---------------------------------------------------------------------------[0m
[0;31mAnalysisException[0m                         Traceback (most recent call last)
File [0;32m<command-7666628980231112>, line 1[0m
[0;32m----> 1[0m get_ipython()[38;5;241m.[39mrun_cell_magic([38;5;124m'[39m[38;5;124msql[39m[38;5;124m'[39m, [38;5;124m'[39m[38;5;124m'[39m, [38;5;124m'[39m[38;5;124mCREATE TABLE gold.cummulative_scd AS[39m[38;5;130;01m\n[39;00m[38;5;124mWITH business_logic_data_quality_enforcement ([39m[38;5;130;01m\n[39;00m[38;5;124m  SELECT *[39m[38;5;130;01m\n[39;00m[38;5;124m  FROM silver.cummulative_property_owners[39m[38;5;130;01m\n[39;00m[38;5;124m  WHERE m_total_market_value <> 0 [39m[38;5;130;01m\n[39;00m[38;5;124m  AND dim_zip_code <> [39m[38;5;130;01m\'[39;00m[38;5;124m00000[39m[38;5;130;01m\'[39;00m[38;5;124m -- 00000 is the only invalid zip code[39m[38;5;130;01m\n[39;00m[38;5;124m)[39m[38;5;130;01m\n[39;00m[38;5;124