###  app id / permission group 행을 하나의 컬럼으로 pair 짝 맞추기

In [0]:
# 변수 초기화 
region = 'AIC'
date_ym = '2025-10'
list_pv = [#'webOSTV 22', 
           'webOSTV 23', 'webOSTV 24', 'webOSTV 25']
pv_table = {
    #'webOSTV 22': f'{region.lower()}_data_ods.tlamp.normal_log_webos22',
    'webOSTV 23': f'{region.lower()}_data_ods.tlamp.normal_log_webos23',
    'webOSTV 24': f'{region.lower()}_data_ods.tlamp.normal_log_webos24',
    'webOSTV 25': f'{region.lower()}_data_ods.tlamp.normal_log_webos25',
}

In [0]:
%python 
df = None 
for pv in list_pv:

    df_t = spark.sql(f''' 
        with raw_data as (

            SELECT distinct
                X_device_product as `platform_version`,-- 필수 열
                X_Device_Country as `country_code`,    -- 필수/조건 열
                X_device_platform as `platform_code`,  -- 필수 열
                mac_addr,
                log_create_time,   
                normal_log:app_id as app_id,
                normal_log:permission_group as permission_group
            FROM {pv_table[pv]}
            WHERE 1=1
                -- AND X_device_country = 'US' -- sample
                AND  date_ym = '{date_ym}'
                AND  context_name = 'com.webos.service.secondscreen.gateway'
                AND  message_id = 'NL_SSG_PAIRING'    
                AND  X_device_product = '{pv}'
                and  X_device_product is not null 
                and  X_device_platform is not null 
                and  X_Device_Country is not null 
                and  replace(X_device_product, ' ', '') != ''
                and  replace(X_Device_Country, ' ', '') != ''

        ), pair_data as (

            -- country, mac_addr, log_create_time 으로 정렬했을때
            -- app_id 바로 뒤에 오는 permission_group 을 pair 한다.
            select *, 
                LAG(country_code, 1) OVER (ORDER BY country_code, mac_addr, log_create_time) as country_code_shifted,
                LAG(mac_addr, 1) OVER (ORDER BY country_code, mac_addr, log_create_time) as mac_addr_shifted,
                LAG(app_id, 1) OVER (ORDER BY country_code, mac_addr, log_create_time) as app_id_shifted
            from   raw_data

        )

        select 
            '{region}' as region_name
            , platform_version 
            , platform_code
            , country_code 
            , mac_addr 
            , app_id_shifted as app_id 
            , permission_group
        from   pair_data p
        where  1=1 
        and  mac_addr = mac_addr_shifted  
        and  country_code = country_code_shifted
        and  app_id_shifted is not null  
        and  exists (
            select 1
            from   {region.lower()}_data_dimension.common.country_code cc
            where  1=1 
                and  cc.region = '{region}' 
                and  p.country_code = cc.country_code
        )
        order by country_code, mac_addr, log_create_time 
    ''')

    if df == None : 
        df = df_t 
    else :
        df = df.union(df_t)

df.where("platform_version = 'webOSTV 24'").groupby("platform_version", "country_code", "app_id", "permission_group").count().display() 

In [0]:
df.groupby("platform_version", "country_code", "app_id", "permission_group").count().display() 

### 기기 사용 모수 관련 - region/플랫폼버전별

In [0]:
df.createOrReplaceTempView("df")

In [0]:
# NL_SSG_PAIRING 노말로그 사용 기기 수
 
from pyspark.sql.functions import countDistinct

df.groupby("region_name", "platform_version").agg(
    countDistinct("mac_addr").alias("count_ud")
).display()

In [0]:
# 전체 기기 수

spark.sql(f''' 
    SELECT df.platform_version, COUNT(DISTINCT m.mac_addr)
    FROM {region.lower()}_data_mart.master_tables.mac_user_master m
    INNER JOIN df 
      ON m.platform_code = df.platform_code
    WHERE EXISTS (
        SELECT 1
        FROM {region.lower()}_data_dimension.common.country_code cc
        WHERE cc.region = '{region}'
        AND m.country_code = cc.country_code
    )
    GROUP BY df.platform_version
''').display()


In [0]:
# 전체 기기 수
spark.sql(f''' 
    with master_pv as (
        select CASE
                    WHEN platform_code in ('W21O','W21U','W21P','W21A','W21K','N21D') then 'webOS6.0'
                    WHEN platform_code in ('W22O','W22H','W22P','W22A','W22K','W22L','N22D') then 'webOS22'
                    WHEN platform_code in ('W23O','W23H','W23P','W23A','W23M','W23K','W23L','W23T') then 'webOS23'
                    WHEN platform_code in ('W24O','W24H','W24K','W24P','W24G','N24D') then 'webOS24'
                    WHEN platform_code in ('W25H','W25P','W25O','W25G') then 'webOS25'
                    WHEN platform_code in ('W26H','W26O','W26P') then 'webOS26'
                    ELSE NULL END AS platform_version, 
                mac_addr 
        from   {region.lower()}_data_mart.master_tables.use_mac_user_master m
        where  platform_code is not null
    )
    select platform_version, count(distinct mac_addr) as count_ud
    from   master_pv
    group by platform_version
''').display()


In [0]:
# Re:New 25 업데이트를 받은 디바이스 수 집계
df2 = None 
for pv in list_pv:

    df2_t = spark.sql(f''' 
        select  '{pv}' as pv, count(distinct mac_addr)
        from    {pv_table[pv]}
        where   context_name = 'tvpowerd' -- 모든 mac_addr 탐지 가능 
          and   X_Device_SDK_VERSION like '10.%'
    ''')

    if df2 == None : 
        df2 = df2_t 
    else :
        df2 = df2.union(df2_t)
df2.display()