In [2]:
# 데이터 불러오기
from dotenv import load_dotenv
import pandas as pd
import os 
import numpy as np
from sqlalchemy import create_engine

# .env 파일에서 환경 부르기
load_dotenv()

# 환경 변수 사용
database = os.getenv("CON_STR")


# # csv 파일 경로
# csv_file_path = os.getenv("CSV_FILE_PATH")
# print(csv_file_path)

engine = create_engine(database)


In [3]:

# ALL Raw Data
sql1= ''' 
select * from cu.agg_CU_TEREA_Taste_Sourcing
order by YYYYMM, id
'''

gg = pd.read_sql_query(sql=sql1, con=engine)


In [4]:
gg.to_clipboard()

# TEREA Taste 3types Pivot 작업

In [5]:
# - TEREA_flaXtar_ from 202211
sql2 = ''' 
select  
	t.YYYYMM,
	t.FLAVORSEG_type,
	concat(FLAVORSEG_type3,' X ', New_TARSEGMENTAT) flavorXtar,
	count(distinct case when b.cigatype ='CC' then t.id end) CC,
	count(distinct case when b.cigatype ='HnB' then t.id end) HnB
from  cu.agg_CU_TEREA_Taste_Sourcing t
	join cu.Fct_BGFR_PMI_Monthly a on t.id = a.id 
		and a.YYYYMM BETWEEN CONVERT(NVARCHAR(6), DATEADD(MONTH, -3, t.YYYYMM+'01'), 112)
				 	     AND CONVERT(NVARCHAR(6), DATEADD(MONTH, -1, t.YYYYMM+'01'), 112)	
	join cu.dim_product_master b on a.ITEM_CD = b.PROD_ID and b.CIGADEVICE =  'CIGARETTES' AND b.cigatype != 'CSV'  
	join cu.dim_Regional_area c on t.SIDO_nm = c.sido_nm
where 1=1 
group BY 
	t.YYYYMM,
	concat(FLAVORSEG_type3,' X ', New_TARSEGMENTAT), 
	t.FLAVORSEG_type
;
'''

data2 = pd.read_sql_query(sql=sql2, con=engine)

pivot_flavor_tar = data2.pivot_table(index=['YYYYMM','FLAVORSEG_type'], 
                                     columns='flavorXtar',
                                     values=['HnB', 'CC' ]
                                    )
print(pivot_flavor_tar)



                            CC                                            \
flavorXtar            Fresh X  Fresh X 1MG Fresh X Below 1MG Fresh X LTS   
YYYYMM FLAVORSEG_type                                                      
202401 Fresh               0.0        90.0               NaN        96.0   
       New Taste           0.0       168.0               NaN       209.0   
       Regular             0.0        65.0               NaN        74.0   
202402 Fresh               0.0       118.0               NaN       105.0   
       New Taste           0.0       178.0               NaN       161.0   
       Regular             0.0        54.0               NaN        47.0   
202403 Fresh               0.0       115.0               NaN       124.0   
       New Taste           0.0       162.0               NaN       199.0   
       Regular             0.0        64.0               NaN        52.0   
202404 Fresh               0.0       104.0               NaN       112.0   
       New T

In [6]:
pivot_flavor_tar.to_clipboard()

In [7]:
# - Terea_user_past_type_M1
sql = ''' 
select  
	t.YYYYMM, 
	t.id,
	t.FLAVORSEG_type,
	max(case when b.cigatype='HnB' and b.company = 'PMK' then 1 else 0 end) IQOS_Purchased,
	max(case when b.cigatype='CC' then 1 else 0 end) CC_Purchased,
	max(case when b.cigatype='HnB' and b.company != 'PMK' then 1 else 0 end) CompHnB_Purchased
from cu.agg_CU_TEREA_Taste_Sourcing t
	join cu.Fct_BGFR_PMI_Monthly a on t.id = a.id 
		and a.YYYYMM BETWEEN CONVERT(NVARCHAR(6), DATEADD(MONTH, -3, t.YYYYMM+'01'), 112)
				 	     AND CONVERT(NVARCHAR(6), DATEADD(MONTH, -1, t.YYYYMM+'01'), 112)	
	join cu.dim_product_master b on a.ITEM_CD = b.PROD_ID and CIGADEVICE =  'CIGARETTES' AND  b.cigatype != 'CSV' 
	join cu.dim_Regional_area c on t.SIDO_nm = c.sido_nm
where 1 =1 
group BY 	    
	t.YYYYMM, 
	t.id,
	t.FLAVORSEG_type
'''

data = pd.read_sql_query(sql=sql, con=engine)
# Past Type 
def categorize(row):
    categories = []
    if row['IQOS_Purchased'] == 1:
        categories.append('PMK HnB')
    if row['CC_Purchased'] == 1:
        categories.append('CC')
    if row['CompHnB_Purchased'] == 1:
        categories.append('Comp. HnB')
    return ','.join(categories)

data['retype'] = data.apply(categorize, axis=1)
result = data.groupby(['YYYYMM','FLAVORSEG_type', 'retype']).size().reset_index(name='N')

pivot_past_type = result.pivot_table( 
                            index=['YYYYMM', 'FLAVORSEG_type'],
                            columns='retype',
                            values='N'
                            )
print(pivot_past_type)



retype                     CC  CC,Comp. HnB  Comp. HnB  PMK HnB  PMK HnB,CC  \
YYYYMM FLAVORSEG_type                                                         
202401 Fresh           1158.0         212.0       98.0    443.0       443.0   
       New Taste       2356.0         434.0      230.0    498.0       502.0   
       Regular         1044.0         171.0       67.0    444.0       460.0   
202402 Fresh           1214.0         206.0      128.0    445.0       430.0   
       New Taste       2440.0         489.0      224.0    492.0       484.0   
       Regular         1062.0         176.0       82.0    340.0       357.0   
202403 Fresh           1340.0         203.0      122.0    434.0       470.0   
       New Taste       2663.0         522.0      256.0    538.0       527.0   
       Regular         1137.0         174.0       89.0    353.0       331.0   
202404 Fresh           1224.0         222.0      144.0    613.0       498.0   
       New Taste       2676.0         585.0      311

In [None]:
pivot_past_type.to_clipboard()

In [8]:
sql3 = ''' 
with temp as (
select  
	t.YYYYMM, 
	t.id,
	t.FLAVORSEG_type,
	max(case when b.cigatype='HnB' and b.company = 'PMK' then 1 else 0 end) IQOS_Purchased,
	max(case when b.cigatype='CC' then 1 else 0 end) CC_Purchased,
	max(case when b.cigatype='HnB' and b.company != 'PMK' then 1 else 0 end) CompHnB_Purchased
from  cu.agg_CU_TEREA_Taste_Sourcing t
	join cu.Fct_BGFR_PMI_Monthly a on t.id = a.id 
		and a.YYYYMM = t.YYYYMM
	join cu.dim_product_master b on a.ITEM_CD = b.PROD_ID and CIGADEVICE =  'CIGARETTES' AND  b.cigatype != 'CSV' 
	join cu.dim_Regional_area c on t.SIDO_nm = c.sido_nm
where 1=1 	
group BY 	    	
	t.YYYYMM, 
	t.id,
	t.FLAVORSEG_type
)
select YYYYMM, 	FLAVORSEG_type,
    'IQOS' +
    CASE WHEN CompHnB_Purchased = 1 THEN ' + Comp. HnB' ELSE '' END + 
    CASE WHEN CC_Purchased = 1 THEN ' + CC' ELSE '' END 
     as Cigatype,
    count(*) purchaser_cnt
from temp
group by YYYYMM, FLAVORSEG_type,
    'IQOS' +
    CASE WHEN CompHnB_Purchased = 1 THEN ' + Comp. HnB' ELSE '' END + 
    CASE WHEN CC_Purchased = 1 THEN ' + CC' ELSE '' END 
'''

data3 = pd.read_sql_query(sql=sql3, con=engine)

pivot_current_type = data3.pivot_table(index=['YYYYMM', 'FLAVORSEG_type'],
                                       columns='Cigatype',
                                       values='purchaser_cnt',
                                       )

print(pivot_current_type)


Cigatype                 IQOS  IQOS + CC  IQOS + Comp. HnB  \
YYYYMM FLAVORSEG_type                                        
202401 Fresh           1047.0     1133.0             118.0   
       New Taste       1503.0     2042.0             238.0   
       Regular          919.0     1122.0              88.0   
202402 Fresh           1005.0     1225.0             135.0   
       New Taste       1546.0     2131.0             230.0   
       Regular          807.0     1057.0             102.0   
202403 Fresh           1056.0     1307.0             149.0   
       New Taste       1614.0     2360.0             277.0   
       Regular          802.0     1119.0             114.0   
202404 Fresh           1144.0     1333.0             146.0   
       New Taste       1950.0     2557.0             312.0   
       Regular          915.0     1102.0             122.0   
202405 Fresh           1467.0     1492.0             179.0   
       New Taste       1989.0     2661.0             312.0   
       R

In [9]:
pivot_current_type.to_clipboard()

# TEREA by SKU Pivot 작업

In [6]:
# - TEREA_flaXtar_ from 202211
sql2 = ''' 
select  
	t.YYYYMM,
	t.engname,
	concat(FLAVORSEG_type3,' X ', New_TARSEGMENTAT) flavorXtar,
	count(distinct case when b.cigatype ='CC' then t.id end) CC,
	count(distinct case when b.cigatype ='HnB' then t.id end) HnB
from  cu.agg_CU_TEREA_SKU_Sourcing t
	join cu.Fct_BGFR_PMI_Monthly a on t.id = a.id 
		and a.YYYYMM BETWEEN CONVERT(NVARCHAR(6), DATEADD(MONTH, -3, t.YYYYMM+'01'), 112)
				 	     AND CONVERT(NVARCHAR(6), DATEADD(MONTH, -1, t.YYYYMM+'01'), 112)	
	join cu.dim_product_master b on a.ITEM_CD = b.PROD_ID and b.CIGADEVICE =  'CIGARETTES' AND b.cigatype != 'CSV'  
	join cu.dim_Regional_area c on t.SIDO_nm = c.sido_nm
where 1=1 
group BY 
	t.YYYYMM,
	t.engname,
	concat(FLAVORSEG_type3,' X ', New_TARSEGMENTAT) 
'''

data2 = pd.read_sql_query(sql=sql2, con=engine)

pivot_flavor_tar = data2.pivot_table(index=['YYYYMM','engname'], 
                                     columns='flavorXtar',
                                     values=['HnB', 'CC' ]
                                    ).sort_values(['engname', 'YYYYMM'])
print(pivot_flavor_tar)



                         CC                                            \
flavorXtar         Fresh X  Fresh X 1MG Fresh X Below 1MG Fresh X LTS   
YYYYMM engname                                                          
202302 TEREA AMBER      0.0        19.0               NaN        16.0   
202303 TEREA AMBER      0.0        28.0               NaN        36.0   
202304 TEREA AMBER      0.0        34.0               NaN        44.0   
202305 TEREA AMBER      0.0        35.0               NaN        53.0   
202306 TEREA AMBER      0.0        23.0               NaN        31.0   
...                     ...         ...               ...         ...   
202402 TEREA YUGEN      0.0        28.0               NaN        17.0   
202403 TEREA YUGEN      0.0        15.0               NaN        15.0   
202404 TEREA YUGEN      0.0        11.0               1.0        19.0   
202405 TEREA YUGEN      0.0        13.0               NaN        21.0   
202406 TEREA YUGEN      0.0        13.0            

In [7]:
pivot_flavor_tar.to_clipboard()

In [5]:
# - Terea_user_past_type_M1
sql = ''' 
select  
	t.YYYYMM, 
	t.engname,
	t.id,
	max(case when b.cigatype='HnB' and b.company = 'PMK' then 1 else 0 end) IQOS_Purchased,
	max(case when b.cigatype='CC' then 1 else 0 end) CC_Purchased,
	max(case when b.cigatype='HnB' and b.company != 'PMK' then 1 else 0 end) CompHnB_Purchased
from cu.agg_CU_TEREA_SKU_Sourcing t
	join cu.Fct_BGFR_PMI_Monthly a on t.id = a.id 
		and a.YYYYMM BETWEEN CONVERT(NVARCHAR(6), DATEADD(MONTH, -3, t.YYYYMM+'01'), 112)
				 	     AND CONVERT(NVARCHAR(6), DATEADD(MONTH, -1, t.YYYYMM+'01'), 112)	
	join cu.dim_product_master b on a.ITEM_CD = b.PROD_ID and CIGADEVICE =  'CIGARETTES' AND  b.cigatype != 'CSV' 
	join cu.dim_Regional_area c on t.SIDO_nm = c.sido_nm
where 1=1 
group BY 	    
	t.YYYYMM, 
	t.id,
	t.engname
'''

data = pd.read_sql_query(sql=sql, con=engine)
# Past Type 
def categorize(row):
    categories = []
    if row['IQOS_Purchased'] == 1:
        categories.append('PMK HnB')
    if row['CC_Purchased'] == 1:
        categories.append('CC')
    if row['CompHnB_Purchased'] == 1:
        categories.append('Comp HnB')
    return ','.join(categories)

data['retype'] = data.apply(categorize, axis=1)
result = data.groupby(['YYYYMM','engname', 'retype']).size().reset_index(name='N')
total = result['N'].sum()

pivot_past_type = result.pivot_table( 
                            index=['YYYYMM', 'engname'],
                            columns='retype',
                            values='N'
                            ).sort_values(['engname', 'YYYYMM'])
print(pivot_past_type)
pivot_past_type.to_clipboard()


retype                 CC  CC,Comp HnB  Comp HnB  PMK HnB  PMK HnB,CC  \
YYYYMM engname                                                          
202302 TEREA AMBER  357.0         52.0      41.0    199.0       130.0   
202303 TEREA AMBER  583.0        118.0      58.0    372.0       275.0   
202304 TEREA AMBER  515.0         98.0      49.0    267.0       310.0   
202305 TEREA AMBER  603.0        106.0      55.0    278.0       276.0   
202306 TEREA AMBER  443.0         89.0      38.0    223.0       255.0   
...                   ...          ...       ...      ...         ...   
202402 TEREA YUGEN  162.0         39.0      15.0     97.0       117.0   
202403 TEREA YUGEN  155.0         34.0      23.0     97.0       155.0   
202404 TEREA YUGEN  151.0         33.0      26.0    113.0       135.0   
202405 TEREA YUGEN  146.0         26.0      21.0    116.0       132.0   
202406 TEREA YUGEN  135.0         36.0      20.0     94.0       129.0   

retype              PMK HnB,CC,Comp HnB  PMK HnB,C

In [6]:
# - Terea_user_Current_type_M1

sql3 = ''' 
with temp as (
select  
	t.YYYYMM,  
	t.id,
	t.engname,
	max(case when b.cigatype='HnB' and b.company = 'PMK' then 1 else 0 end) IQOS_Purchased,
	max(case when b.cigatype='CC' then 1 else 0 end) CC_Purchased,
	max(case when b.cigatype='HnB' and b.company != 'PMK' then 1 else 0 end) CompHnB_Purchased
from  cu.agg_CU_TEREA_SKU_Sourcing t
	join cu.Fct_BGFR_PMI_Monthly a on t.id = a.id 
		and a.YYYYMM = t.YYYYMM
	join cu.dim_product_master b on a.ITEM_CD = b.PROD_ID and CIGADEVICE =  'CIGARETTES' AND  b.cigatype != 'CSV' 
	join cu.dim_Regional_area c on t.SIDO_nm = c.sido_nm
where 1=1 	
group BY 	    	
	t.YYYYMM, 
	t.id,
	t.engname
)
select YYYYMM, engname,
    'IQOS' +
    CASE WHEN CompHnB_Purchased = 1 THEN ' + Comp. HnB' ELSE '' END + 
    CASE WHEN CC_Purchased = 1 THEN ' + CC' ELSE '' END 
     as Cigatype,
    count(*) purchaser_cnt
from temp
group by YYYYMM, engname,
    'IQOS' +
    CASE WHEN CompHnB_Purchased = 1 THEN ' + Comp. HnB' ELSE '' END + 
    CASE WHEN CC_Purchased = 1 THEN ' + CC' ELSE '' END 
'''

data3 = pd.read_sql_query(sql=sql3, con=engine)

pivot_current_type = data3.pivot_table(index=['YYYYMM', 'engname'],
                                       columns='Cigatype',
                                       values='purchaser_cnt',
                                       ).sort_values(['engname', 'YYYYMM'])

print(pivot_current_type)
pivot_current_type.to_clipboard()

Cigatype             IQOS  IQOS + CC  IQOS + Comp. HnB  IQOS + Comp. HnB + CC
YYYYMM engname                                                               
202302 TEREA AMBER  314.0      407.0              29.0                   53.0
202303 TEREA AMBER  659.0      661.0              69.0                  111.0
202304 TEREA AMBER  585.0      595.0              67.0                   93.0
202305 TEREA AMBER  550.0      681.0              69.0                  112.0
202306 TEREA AMBER  458.0      542.0              60.0                   71.0
...                   ...        ...               ...                    ...
202402 TEREA YUGEN  209.0      198.0              25.0                   24.0
202403 TEREA YUGEN  225.0      215.0              31.0                   27.0
202404 TEREA YUGEN  219.0      233.0              24.0                   23.0
202405 TEREA YUGEN  223.0      207.0              21.0                   34.0
202406 TEREA YUGEN  197.0      203.0              20.0          

# Pivot concat 해보기

In [14]:
# 필터링된 데이터 생성
filtered_data = data[data['ProductFamilyCode'] == 'IQOS']
 
# 피벗 테이블 생성
pivot_table4 = filtered_data.pivot_table(index=['YYYYMM', 'id', 'gender', 'age'],
                                         columns=['engname'], values=['qty'],
                                         aggfunc='sum')
 
# HnB_taste 컬럼 생성 및 피벗 테이블 생성
result = data[data['productSubFamilyCode'].isin(['AIIM', 'FIIT', 'HEETS', 'MIIX', 'NEO', 'NEOSTICKS', 'TEREA'])]
data['HnB_taste'] = result['productSubFamilyCode'] + ' ' + data['New_FLAVORSEG']

pivot_table3 = data.pivot_table(index=['YYYYMM', 'id', 'gender', 'age'],
                                columns=['HnB_taste'], values=['qty'],
                                aggfunc='sum')
 
# CC_taste 컬럼 생성 및 피벗 테이블 생성
data['CC_taste'] = data['cigatype'] + ' ' + data['New_FLAVORSEG']

pivot_table2 = data.pivot_table(index=['YYYYMM', 'id', 'gender', 'age'],
                                columns=['CC_taste'], values=['qty'],
                                aggfunc='sum')
 
# 기본 피벗 테이블 생성
pivot_table1 = data.pivot_table(index=['YYYYMM', 'id', 'gender', 'age'],
                                columns=['company'], values=['qty'],
                                aggfunc='sum')

# 피벗 테이블들을 하나의 데이터프레임으로 병합
concatenated_pivot = pd.concat([ pivot_table1, pivot_table2, pivot_table3, pivot_table4], axis=1)

# 결과 출력
# final = pd.merge(result, concatenated_pivot, how='outer')
concatenated_pivot

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,qty,qty,qty,qty,qty,qty,qty,qty,qty,qty,qty,qty,qty,qty,qty,qty,qty,qty,qty,qty,qty
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,BAT,JTI,KTG,PMK,CC Fresh,CC New Taste,CC Regular,HnB Fresh,HnB New Taste,HnB Regular,...,HEETS BRONZE LABEL,HEETS GOLD SELECTION,HEETS GREEN LABEL,HEETS GREEN ZING,HEETS PURPLE LABEL,HEETS SATIN WAVE,HEETS SILVER LABEL,HEETS SUMMER BREEZE,HEETS TURQUOISE LABEL,HEETS YUGEN
YYYYMM,id,gender,age,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2,Unnamed: 23_level_2,Unnamed: 24_level_2
202211,003DE9124299CC396950FEF717EFE5020EFCB079E8DECB8D78A8F04D69D5AB62,남,40대,32.0,,,,,,1.0,,,31.0,...,,,,,,,,,,
202211,007BE5AEC24991C378435D650C80E7C168971700CB8EDFD037194D71CA0B453F,남,40대,,,,4.0,,,,3.0,1.0,,...,,,,,,,,1.0,,
202211,00851229FF4A0026F2682594CEDABB0AE1B73FF85E6CDED060ED4FB00B37ECC9,남,50대,,,,7.0,,,,,1.0,6.0,...,6.0,,,,,,,,,
202211,00FA9CFB5FFA3F6E3C00DA693D6862019C5A42D6A6D0B352C7B0BC3CB692C7E2,남,30대,,,9.0,,,9.0,,,,,...,,,,,,,,,,
202211,01117EF5EFBB1B6D53108D3EEBF53FAFBECFA132F880CFB169DA724CDC567C92,남,30대,,,2.0,13.0,,1.0,,13.0,1.0,,...,,,,,,,,,,
202211,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
202211,FF83E54787EB82C6293ADA53E369F68215B39162BD33E3E5702C075F12FEF043,남,50대,,,,10.0,,,,2.0,,8.0,...,8.0,,,,,,,,,
202211,FFC50D9BE8D22E040EDAE5B54B00FA3856C2240DE21BE4C4782923710C85F0FB,남,20대,,,1.0,,,,,1.0,,,...,,,,,,,,,,
202211,FFC7C4D03CF065384860A5176E832123A07E70B6E185396323A8155F3D01AED8,여,50대,,,4.0,,4.0,,,,,,...,,,,,,,,,,
202211,FFD0E8F349A14622CD91B697DA35E0E9D4FFAF7D9D4C4EA966F09E5A94555746,남,40대,,,2.0,,,,2.0,,,,...,,,,,,,,,,


In [16]:
total_id_count = data.groupby('YYYYMM')['id'].count().reset_index(name='total_id_count')

gender_age_count = data.groupby(['YYYYMM', 'gender', 'age']).size().reset_index(name='count')

gender_pivot = gender_age_count.pivot_table(
    index ='YYYYMM',
    columns= ['gender', 'age'],
    values='count'
)

res = total_id_count.set_index('YYYYMM').join(gender_pivot)

res

MergeError: Not allowed to merge between different levels. (1 levels on the left, 2 on the right)

In [42]:
# -- Gr Region 집계 user_current_type_M1

gr_sql1= ''' 
with temp as (
select  
	t.YYYYMM, 
	gr_cd,
	t.id,
	max(case when b.cigatype='HnB' and b.company = 'PMK' then 1 else 0 end) IQOS_Purchased,
	max(case when b.cigatype='CC' then 1 else 0 end) CC_Purchased,
	max(case when b.cigatype='HnB' and b.company != 'PMK' then 1 else 0 end) CompHnB_Purchased
from  cu.agg_CU_TEREA_Sourcing t
	join cu.Fct_BGFR_PMI_Monthly a on t.id = a.id 
		and a.YYYYMM = t.YYYYMM
	join cu.dim_product_master b on a.ITEM_CD = b.PROD_ID and CIGADEVICE =  'CIGARETTES' AND  b.cigatype != 'CSV' 
	join cu.dim_Regional_area c on t.SIDO_nm = c.sido_nm
group BY 	    	
	t.YYYYMM, 
	gr_cd,
	t.id
)
select YYYYMM, gr_cd,
    'IQOS' +
    CASE WHEN CompHnB_Purchased = 1 THEN ' + Comp. HnB' ELSE '' END + 
    CASE WHEN CC_Purchased = 1 THEN ' + CC' ELSE '' END 
     as Cigatype,
    count(*) purchaser_cnt
from temp
group by YYYYMM, gr_cd,
    'IQOS' +
    CASE WHEN CompHnB_Purchased = 1 THEN ' + Comp. HnB' ELSE '' END + 
    CASE WHEN CC_Purchased = 1 THEN ' + CC' ELSE '' END 
;
'''


gr_data = pd.read_sql_query(sql=gr_sql1, con=engine)

In [49]:
pivot_current_type = gr_data.pivot_table(index=['YYYYMM','gr_cd'],
                                       columns='Cigatype',
                                       values='purchaser_cnt',
                                       ).sort_values(['YYYYMM', 'gr_cd'], ascending=[True, False])

print(pivot_current_type)
pivot_current_type.to_clipboard()

Cigatype        IQOS  IQOS + CC  IQOS + Comp. HnB  IQOS + Comp. HnB + CC
YYYYMM gr_cd                                                            
202401 서울      885.0     1274.0             135.0                  190.0
       부산      337.0      430.0              44.0                   90.0
       대전      247.0      373.0              34.0                   68.0
       대구      168.0      283.0              28.0                   47.0
       광주      279.0      347.0              46.0                   74.0
       Other   254.0      385.0              50.0                   66.0
202402 서울      855.0     1280.0             142.0                  184.0
       부산      345.0      493.0              55.0                   68.0
       대전      257.0      405.0              47.0                   64.0
       대구      214.0      261.0              27.0                   45.0
       광주      246.0      406.0              46.0                   59.0
       Other   248.0      441.0              35.0  

In [43]:
gr_sql2 = ''' 
select  
	t.YYYYMM, 
	gr_cd,
	t.id,
	max(case when b.cigatype='HnB' and b.company = 'PMK' then 1 else 0 end) IQOS_Purchased,
	max(case when b.cigatype='CC' then 1 else 0 end) CC_Purchased,
	max(case when b.cigatype='HnB' and b.company != 'PMK' then 1 else 0 end) CompHnB_Purchased
from cu.agg_CU_TEREA_Sourcing t
	join cu.Fct_BGFR_PMI_Monthly a on t.id = a.id 
		and a.YYYYMM BETWEEN CONVERT(NVARCHAR(6), DATEADD(MONTH, -3, t.YYYYMM+'01'), 112)
				 	     AND CONVERT(NVARCHAR(6), DATEADD(MONTH, -1, t.YYYYMM+'01'), 112)	
	join cu.dim_product_master b on a.ITEM_CD = b.PROD_ID and CIGADEVICE =  'CIGARETTES' AND  b.cigatype != 'CSV' 
	join cu.dim_Regional_area c on t.SIDO_nm = c.sido_nm
group BY 	    
	t.YYYYMM, 
	gr_cd, 
	t.id
;
'''

gr_data2 = pd.read_sql_query(sql=gr_sql2, con=engine)

In [52]:
# Past Type 
def categorize(row):
    categories = []
    if row['IQOS_Purchased'] == 1:
        categories.append('PMK HnB')
    if row['CC_Purchased'] == 1:
        categories.append('CC')
    if row['CompHnB_Purchased'] == 1:
        categories.append('Comp HnB')
    return ','.join(categories)

gr_data2['retype'] = gr_data2.apply(categorize, axis=1)
result = gr_data2.groupby(['YYYYMM','gr_cd', 'retype']).size().reset_index(name='N')
total = result['N'].sum()

print(result)

     YYYYMM  gr_cd               retype    N
0    202401  Other                   CC  467
1    202401  Other          CC,Comp HnB  102
2    202401  Other             Comp HnB   28
3    202401  Other              PMK HnB   81
4    202401  Other           PMK HnB,CC   54
..      ...    ...                  ...  ...
247  202406     서울             Comp HnB  214
248  202406     서울              PMK HnB  330
249  202406     서울           PMK HnB,CC  222
250  202406     서울  PMK HnB,CC,Comp HnB   32
251  202406     서울     PMK HnB,Comp HnB   25

[252 rows x 4 columns]


In [56]:
pivot_past_type = result.pivot_table( 
                            index=['YYYYMM', 'gr_cd'],
                            columns='retype',
                            values='N'
                            ).sort_values(['YYYYMM', 'gr_cd'], ascending=[True, False])
print(pivot_past_type)
pivot_past_type.to_clipboard()

retype            CC  CC,Comp HnB  Comp HnB  PMK HnB  PMK HnB,CC  \
YYYYMM gr_cd                                                       
202401 서울     1535.0        267.0     141.0    296.0       202.0   
       부산      565.0        105.0      49.0    103.0        64.0   
       대전      484.0         66.0      43.0     75.0        48.0   
       대구      342.0         64.0      40.0     41.0        33.0   
       광주      468.0         84.0      40.0     86.0        58.0   
       Other   467.0        102.0      28.0     81.0        54.0   
202402 서울     1548.0        258.0     164.0    275.0       177.0   
       부산      605.0        102.0      59.0    102.0        74.0   
       대전      504.0        108.0      35.0     61.0        47.0   
       대구      346.0         62.0      38.0     59.0        36.0   
       광주      492.0         89.0      42.0     78.0        46.0   
       Other   518.0        110.0      39.0     74.0        54.0   
202403 서울     1717.0        283.0     172.0    2

In [58]:
# -- TEREA flavorXtar from 202211

gr_sql3 = ''' 
select  
	t.YYYYMM,
	gr_cd,
	concat(FLAVORSEG_type3,' X ', New_TARSEGMENTAT) flavorXtar,
	count(distinct case when b.cigatype ='CC' then t.id end) CC,
	count(distinct case when b.cigatype ='HnB' then t.id end) HnB
from  cu.agg_CU_TEREA_Sourcing t
	join cu.Fct_BGFR_PMI_Monthly a on t.id = a.id 
		and a.YYYYMM BETWEEN CONVERT(NVARCHAR(6), DATEADD(MONTH, -3, t.YYYYMM+'01'), 112)
				 	     AND CONVERT(NVARCHAR(6), DATEADD(MONTH, -1, t.YYYYMM+'01'), 112)	
	join cu.dim_product_master b on a.ITEM_CD = b.PROD_ID and b.CIGADEVICE =  'CIGARETTES' AND b.cigatype != 'CSV'  
	join cu.dim_Regional_area c on t.SIDO_nm = c.sido_nm
group BY 
	t.YYYYMM,
	gr_cd,
	concat(FLAVORSEG_type3,' X ', New_TARSEGMENTAT) 
;
'''

gr_data3 = pd.read_sql_query(sql=gr_sql3, con=engine)

In [60]:
pivot_flavor_tar = gr_data3.pivot_table(index=['YYYYMM', 'gr_cd'], 
                                     columns='flavorXtar',
                                     values=['HnB', 'CC' ]
                                    ).sort_values(['YYYYMM', 'gr_cd'], ascending=[True,True])
print(pivot_flavor_tar)
pivot_flavor_tar.to_clipboard()

                   CC                                                        \
flavorXtar   Fresh X  Fresh X 1MG Fresh X Below 1MG Fresh X LTS Fresh X ULT   
YYYYMM gr_cd                                                                  
202401 Other      0.0        35.0               NaN        41.0        15.0   
       광주         0.0        24.0               NaN        26.0         5.0   
       대구         0.0        16.0               NaN        17.0         6.0   
       대전         0.0        20.0               NaN        27.0         8.0   
       부산         0.0        28.0               NaN        24.0         2.0   
       서울         0.0       117.0               NaN       128.0        18.0   
202402 Other      0.0        36.0               NaN        44.0         8.0   
       광주         0.0        31.0               NaN        26.0         3.0   
       대구         0.0        19.0               NaN        12.0         4.0   
       대전         0.0        34.0               NaN 