http://jira.lge.com/issue/browse/HEDATAPLFM-1020?attachmentSortBy=dateTime&attachmentOrder=asc

In [0]:
%sql
CREATE OR REPLACE TABLE sandbox.t_weetv.po_activation_mart
USING DELTA
PARTITIONED BY (date_ym)
AS
WITH T101 AS (
    /* KMS 관련 테이블 */
    SELECT 
      date_ym,
      mac,
      try_cast(po_date AS DATE) AS po_date,
      po_country_code,
      po_type,
      po_board_maker,
      po_odm,
      po_brand,
      po_country,
      po_year,
      po_platform
    FROM kic_data_ods.kms.kms_wee_tv
),
T102 AS (
    /* ACTIVATION: MAC별 최초 */
    SELECT
        s.mac_addr,
        s.activ_country_code,
        s.activ_date,
        s.activ_platform_code,
        s.activ_product_code,
        s.activ_sales_model
    FROM (
        SELECT
            T102.mac_addr,
            T102.Cntry_CODE   AS activ_country_code,
            DATE(T102.crt_date) AS activ_date,
            T102.Platform_code AS activ_platform_code,
            T102.Product_CODE  AS activ_product_code,
            T102.Sales_Model   AS activ_sales_model,
            ROW_NUMBER() OVER (
                PARTITION BY T102.mac_addr
                ORDER BY T102.crt_date ASC, T102.last_chg_date ASC, T102.he_etl_dt ASC
            ) AS rn
        FROM eic_data_ods.tlamp.activation_date T102
        WHERE EXISTS (
            SELECT 1
            FROM T101
            WHERE T101.mac = T102.mac_addr
        )
    ) s
    WHERE s.rn = 1
),
T103 AS (
    /* 앱 브랜드: MAC별 최초 */
    SELECT
        s.mac_addr,
        s.brand_name
    FROM (
        SELECT
            T103.mac_addr,
            T103.brand_name,
            ROW_NUMBER() OVER (
                PARTITION BY T103.mac_addr
                ORDER BY T103.src_file_date ASC, T103.he_etl_dt ASC
            ) AS rn
        FROM eic_data_ods.tlamp.apps_device_brand T103
        WHERE EXISTS (
            SELECT 1
            FROM T101
            WHERE T101.mac = T103.mac_addr
        )
    ) s
    WHERE s.rn = 1
)
SELECT
    -- 파티션 키
    T101.date_ym,

    -- PO 측 주요 컬럼
    T101.mac,
    T101.po_date,
    T101.po_country_code,
    T101.po_type,
    T101.po_board_maker,
    T101.po_odm,
    T101.po_brand,
    T101.po_country,
    T101.po_year,
    T101.po_platform,

    -- ACTIVATION 측
    T102.activ_date,
    T102.activ_country_code,
    T102.activ_platform_code,
    T102.activ_product_code,
    T102.activ_sales_model,

    -- BRAND 파싱
    SPLIT_PART(T103.brand_name, '_', 1)   AS activ_board_maker,
    SPLIT_PART(T103.brand_name, '_', 2)   AS activ_odm,
    SPLIT_PART(T103.brand_name, '_', 3)   AS activ_brand,

    -- 비교 지표
    CASE
        WHEN T101.po_country_code IS NULL THEN 'po_country_code_is_null'
        WHEN T102.activ_country_code IS NULL THEN 'not_yep_activated'
        WHEN T101.po_country_code <> T102.activ_country_code THEN 'N'
        ELSE 'Y'
    END                                      AS is_same_country,
    DATE_DIFF(T102.activ_date, T101.po_date) AS day_diff,
    
    current_timestamp() as he_etl_dt
FROM T101
LEFT JOIN T102
  ON T102.mac_addr = T101.mac
LEFT JOIN T103
  ON T103.mac_addr = T101.mac
;


In [0]:
%sql
CREATE OR REPLACE VIEW sandbox.z_yeswook_kim.v_rbt_x_user_number AS
WITH user_num_dimension AS (
  SELECT X_User_Number AS origin_X_User_Number, X_User_Number_hashed AS X_User_Number
  FROM eic_data_private.tlamp.rbt_x_user_number_webos22
  UNION ALL
  SELECT X_User_Number, X_User_Number_hashed
  FROM eic_data_private.tlamp.rbt_x_user_number_webos23
  UNION ALL
  SELECT X_User_Number, X_User_Number_hashed
  FROM eic_data_private.tlamp.rbt_x_user_number_webos24
  UNION ALL
  SELECT X_User_Number, X_User_Number_hashed
  FROM eic_data_private.tlamp.rbt_x_user_number_webos25
  UNION ALL
  SELECT X_User_Number, X_User_Number_hashed
  FROM eic_data_private.tlamp.rbt_x_user_number_webos60
),
distinct_user_num_dimension AS (
  SELECT DISTINCT origin_X_User_Number, X_User_Number
  FROM user_num_dimension
)
SELECT
  *
FROM distinct_user_num_dimension


In [0]:
%sql
CREATE OR REPLACE VIEW sandbox.z_yeswook_kim.v_rbt_mac_addr AS
WITH user_num_dimension AS (
  SELECT mac_addr AS origin_mac_addr, mac_addr_hashed AS mac_addr
  FROM eic_data_private.tlamp.rbt_mac_addr_webos22
  UNION ALL
  SELECT mac_addr, mac_addr_hashed
  FROM eic_data_private.tlamp.rbt_mac_addr_webos23
  UNION ALL
  SELECT mac_addr, mac_addr_hashed
  FROM eic_data_private.tlamp.rbt_mac_addr_webos24
  UNION ALL
  SELECT mac_addr, mac_addr_hashed
  FROM eic_data_private.tlamp.rbt_mac_addr_webos25
  UNION ALL
  SELECT mac_addr, mac_addr_hashed
  FROM eic_data_private.tlamp.rbt_mac_addr_webos60
  /* activation에만 있는 mac도 확보*/
  UNION ALL
  SELECT mac_addr, mac_addr_hashed
  FROM sandbox.z_yeswook_kim.rbt_mac_addr_activation_date
),
distinct_user_num_dimension AS (
  SELECT DISTINCT origin_mac_addr, mac_addr
  FROM user_num_dimension
)
SELECT
  *
FROM distinct_user_num_dimension


In [0]:
%sql
select *
from eic_data_private.tlamp.activation_date

In [0]:
%sql
select 
  -- b.origin_mac_addr
  -- , a.*
  count(1)
from sandbox.t_weetv.po_activation_mart a
left join sandbox.z_yeswook_kim.v_rbt_mac_addr b 
  on a.mac = b.mac_addr
where 1=1
  and po_type like 'WEE 2.0%'
  and (po_board_maker != activ_board_maker
    or po_odm != activ_odm)

In [0]:
%sql
select 
    b.origin_mac_addr
  , a.*
from sandbox.t_weetv.po_activation_mart a
left join sandbox.z_yeswook_kim.v_rbt_mac_addr b 
  on a.mac = b.mac_addr
where 1=1
  and po_type like 'WEE 2.0%'
  and (po_board_maker != activ_board_maker
    or po_odm != activ_odm)
qualify row_number() over (order by a.mac) between 1 and 800000


In [0]:
%sql
select 
  b.origin_mac_addr
  , a.*
from sandbox.t_weetv.po_activation_mart a
left join sandbox.z_yeswook_kim.v_rbt_mac_addr b 
  on a.mac = b.mac_addr
where 1=1
  and po_type like 'WEE 2.0%'
  and (po_board_maker != activ_board_maker
    or po_odm != activ_odm)
qualify row_number() over (order by a.mac) between 800001 and 1600000

In [0]:
%sql
select 
    b.origin_mac_addr
  , a.*
from sandbox.t_weetv.po_activation_mart a
left join sandbox.z_yeswook_kim.v_rbt_mac_addr b 
  on a.mac = b.mac_addr
where 1=1
  and po_type like 'WEE 2.0%'
  and (po_board_maker != activ_board_maker
    or po_odm != activ_odm)
qualify row_number() over (order by a.mac) between 1600001 and 2400000

In [0]:
%sql


In [0]:
%sql


In [0]:
%python

salt = dbutils.secrets.get("admin", "salt")

sdf = spark.sql(f"""
    CREATE OR REPLACE TABLE sandbox.z_yeswook_kim.rbt_mac_addr_activation_date AS
    SELECT
        distinct 
        CASE
            WHEN mac_addr IS NULL OR mac_addr = '' THEN mac_addr
            ELSE sha2(CONCAT(mac_addr, '{salt}'), 256)
        END AS mac_addr_hashed,
        mac_addr,
        current_timestamp() AS he_etl_dt
    FROM eic_data_private.tlamp.activation_date
""")

display(spark.table("sandbox.z_yeswook_kim.rbt_mac_addr_activation_date"))

In [0]:
%sql
select mac_addr
from eic_data_private.tlamp.activation_date
where 1=1

In [0]:
%sql


In [0]:
%sql
select 
  *
from sandbox.t_weetv.po_activation_mart a
left join sandbox.z_yeswook_kim.v_rbt_mac_addr b 
  on a.mac = b.mac_addr
where 1=1
  and po_type like 'WEE 2.0%'
  and (po_board_maker != activ_board_maker
    or po_odm != activ_odm)
  and origin_mac_addr is null