In [1]:
# 데이터 불러오기
from dotenv import load_dotenv
import pandas as pd
import os 
import numpy as np
from sqlalchemy import create_engine

# .env 파일에서 환경 부르기
load_dotenv()

# 환경 변수 사용
database = os.getenv("CON_STR")


# # csv 파일 경로
# csv_file_path = os.getenv("CSV_FILE_PATH")
# print(csv_file_path)

engine = create_engine(database)


In [2]:

# ALL Raw Data
sql1= ''' 
select * from cx.agg_LPoint_TEREA_SKU_Sourcing
order by YYYYMM, id
'''

gg = pd.read_sql_query(sql=sql1, con=engine)


        YYYYMM                                                 id  \
0       202211  003DE9124299CC396950FEF717EFE5020EFCB079E8DECB...   
1       202211  007BE5AEC24991C378435D650C80E7C168971700CB8EDF...   
2       202211  00851229FF4A0026F2682594CEDABB0AE1B73FF85E6CDE...   
3       202211  00851229FF4A0026F2682594CEDABB0AE1B73FF85E6CDE...   
4       202211  00FA9CFB5FFA3F6E3C00DA693D6862019C5A42D6A6D0B3...   
...        ...                                                ...   
123457  202407  FFC06F306A0DDA1B0607490AAE316DDE2D7BDC64F5BE1A...   
123458  202407  FFC820F653AE73D27A89A23EF11572A811666DA444D4B9...   
123459  202407  FFDBFA83BF7A87212EC6F7CBBF725A42481E7EC572D0B7...   
123460  202407  FFFB3F6F4064FA5CE1BA43DADF370056664B8916E3B673...   
123461  202407  FFFBB0C05EBD84A70B65B8075656BAB53B2BE9AAFE84F2...   

                     engname gender  age cigatype      company  \
0               TEREA SILVER      남  40대    Mixed          BAT   
1                 TEREA BLUE      남  40

In [6]:
gg.to_clipboard()

# TEREA L.Point SKU Sourcing Pivot

In [3]:
# - TEREA_flaXtar_ from 202211
sql2 = ''' 
select  
	t.YYYYMM,
	t.engname,
	concat(FLAVORSEG_type3,' X ', New_TARSEGMENTAT) flavorXtar,
	count(distinct case when b.cigatype ='CC' then t.id end) CC,
	count(distinct case when b.cigatype ='HnB' then t.id end) HnB
from  cx.agg_LPoint_TEREA_SKU_Sourcing t
	join cx.fct_K7_Monthly a on t.id = a.id 
		and a.YYYYMM BETWEEN CONVERT(NVARCHAR(6), DATEADD(MONTH, -3, t.YYYYMM+'01'), 112)
				 	     AND CONVERT(NVARCHAR(6), DATEADD(MONTH, -1, t.YYYYMM+'01'), 112)	
	join cx.product_master b on a.Product_code = b.PROD_ID and b.CIGADEVICE =  'CIGARETTES' AND b.cigatype != 'CSV'  
where 1=1 
group BY 
	t.YYYYMM,
	t.engname,
	concat(FLAVORSEG_type3,' X ', New_TARSEGMENTAT) 
'''

data2 = pd.read_sql_query(sql=sql2, con=engine)

pivot_flavor_tar = data2.pivot_table(index=['YYYYMM','engname'], 
                                     columns='flavorXtar',
                                     values=['HnB', 'CC' ]
                                    ).sort_values(['engname', 'YYYYMM'])
print(pivot_flavor_tar)



                         CC                                            \
flavorXtar         Fresh X  Fresh X 1MG Fresh X Below 1MG Fresh X LTS   
YYYYMM engname                                                          
202211 TEREA AMBER      0.0         6.0               NaN         6.0   
202212 TEREA AMBER      0.0         6.0               NaN         9.0   
202301 TEREA AMBER      0.0         9.0               NaN        10.0   
202302 TEREA AMBER      0.0        12.0               NaN        12.0   
202303 TEREA AMBER      0.0        13.0               NaN        20.0   
...                     ...         ...               ...         ...   
202403 TEREA YUGEN      0.0         5.0               NaN        10.0   
202404 TEREA YUGEN      0.0         4.0               NaN         6.0   
202405 TEREA YUGEN      0.0         4.0               NaN        13.0   
202406 TEREA YUGEN      0.0         5.0               NaN         7.0   
202407 TEREA YUGEN      0.0         6.0            

In [7]:
pivot_flavor_tar.to_clipboard()

In [4]:
# - Terea_user_past_type_M1
sql = ''' 
select  
	t.YYYYMM, 
	t.id,
	t.engname,
	max(case when b.cigatype='HnB' and b.company = 'PMK' then 1 else 0 end) IQOS_Purchased,
	max(case when b.cigatype='CC' then 1 else 0 end) CC_Purchased,
	max(case when b.cigatype='HnB' and b.company != 'PMK' then 1 else 0 end) CompHnB_Purchased
from cx.agg_LPoint_TEREA_SKU_Sourcing t
	join cx.fct_K7_Monthly a on t.id = a.id 
		and a.YYYYMM BETWEEN CONVERT(NVARCHAR(6), DATEADD(MONTH, -3, t.YYYYMM+'01'), 112)
				 	     AND CONVERT(NVARCHAR(6), DATEADD(MONTH, -1, t.YYYYMM+'01'), 112)	
	join cx.product_master b on a.Product_code = b.PROD_ID and CIGADEVICE =  'CIGARETTES' AND  b.cigatype != 'CSV' 
where 1=1 
group BY 	    
	t.YYYYMM, 
	t.id,
	t.engname
'''

data = pd.read_sql_query(sql=sql, con=engine)
# Past Type 
def categorize(row):
    categories = []
    if row['IQOS_Purchased'] == 1:
        categories.append('PMK HnB')
    if row['CC_Purchased'] == 1:
        categories.append('CC')
    if row['CompHnB_Purchased'] == 1:
        categories.append('Comp HnB')
    return ','.join(categories)

data['retype'] = data.apply(categorize, axis=1)
result = data.groupby(['YYYYMM','engname', 'retype']).size().reset_index(name='N')
total = result['N'].sum()

pivot_past_type = result.pivot_table( 
                            index=['YYYYMM', 'engname'],
                            columns='retype',
                            values='N'
                            ).sort_values(['engname', 'YYYYMM'])
print(pivot_past_type)



retype                 CC  CC,Comp HnB  Comp HnB  PMK HnB  PMK HnB,CC  \
YYYYMM engname                                                          
202211 TEREA AMBER   62.0         14.0      17.0     67.0        47.0   
202212 TEREA AMBER   91.0         18.0      16.0     93.0        57.0   
202301 TEREA AMBER   79.0          9.0      11.0     69.0        64.0   
202302 TEREA AMBER  124.0         29.0      24.0     97.0        69.0   
202303 TEREA AMBER  191.0         32.0      22.0    140.0       105.0   
...                   ...          ...       ...      ...         ...   
202403 TEREA YUGEN   66.0         12.0      16.0     58.0        44.0   
202404 TEREA YUGEN   71.0         20.0      12.0     54.0        36.0   
202405 TEREA YUGEN   63.0         16.0      12.0     75.0        43.0   
202406 TEREA YUGEN   57.0         15.0      11.0     58.0        46.0   
202407 TEREA YUGEN   54.0         12.0       8.0     57.0        35.0   

retype              PMK HnB,CC,Comp HnB  PMK HnB,C

In [8]:
pivot_past_type.to_clipboard()

In [5]:
# - Terea_user_Current_type_M1

sql3 = ''' 
with temp as (
select  
	t.YYYYMM,  
	t.id,
	t.engname,
	max(case when b.cigatype='HnB' and b.company = 'PMK' then 1 else 0 end) IQOS_Purchased,
	max(case when b.cigatype='CC' then 1 else 0 end) CC_Purchased,
	max(case when b.cigatype='HnB' and b.company != 'PMK' then 1 else 0 end) CompHnB_Purchased
from  cx.agg_LPoint_TEREA_SKU_Sourcing t
	join cx.fct_K7_Monthly a on t.id = a.id 
		and a.YYYYMM = t.YYYYMM
	join cx.product_master b on a.Product_code = b.PROD_ID and CIGADEVICE =  'CIGARETTES' AND  b.cigatype != 'CSV' 
where 1=1 	
group BY 	    	
	t.YYYYMM, 
	t.id,
	t.engname
)
select YYYYMM, engname,
    'IQOS' +
    CASE WHEN CompHnB_Purchased = 1 THEN ' + Comp. HnB' ELSE '' END + 
    CASE WHEN CC_Purchased = 1 THEN ' + CC' ELSE '' END 
     as Cigatype,
    count(*) purchaser_cnt
from temp
group by YYYYMM, engname,
    'IQOS' +
    CASE WHEN CompHnB_Purchased = 1 THEN ' + Comp. HnB' ELSE '' END + 
    CASE WHEN CC_Purchased = 1 THEN ' + CC' ELSE '' END 
'''

data3 = pd.read_sql_query(sql=sql3, con=engine)

pivot_current_type = data3.pivot_table(index=['YYYYMM', 'engname'],
                                       columns='Cigatype',
                                       values='purchaser_cnt',
                                       ).sort_values(['engname', 'YYYYMM'])

print(pivot_current_type)


Cigatype             IQOS  IQOS + CC  IQOS + Comp. HnB  IQOS + Comp. HnB + CC
YYYYMM engname                                                               
202211 TEREA AMBER  113.0       75.0              16.0                   13.0
202212 TEREA AMBER  180.0       95.0              14.0                   18.0
202301 TEREA AMBER  151.0       89.0               6.0                   11.0
202302 TEREA AMBER  200.0      132.0              21.0                   24.0
202303 TEREA AMBER  298.0      187.0              25.0                   30.0
...                   ...        ...               ...                    ...
202403 TEREA YUGEN  104.0       76.0              15.0                   12.0
202404 TEREA YUGEN   99.0       84.0              11.0                   12.0
202405 TEREA YUGEN  120.0       75.0              15.0                   13.0
202406 TEREA YUGEN   97.0       78.0              15.0                   10.0
202407 TEREA YUGEN   86.0       68.0              12.0          

In [9]:
pivot_current_type.to_clipboard()

# 연습

In [10]:
# -- Arbor_sourcing_M1 작업

data['company'] = data['company'].apply(lambda x : x.strip())

result = data.groupby(['YYYYMM', 'id', 'gender', 'age']).agg({
    'company': lambda x: x.unique(),
    'New_FLAVORSEG': lambda x: x.unique()
})


# result.set_index(['YYYYMM', 'id', 'gender', 'age'])
result

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,company,New_FLAVORSEG
YYYYMM,id,gender,age,Unnamed: 4_level_1,Unnamed: 5_level_1
202211,003DE9124299CC396950FEF717EFE5020EFCB079E8DECB8D78A8F04D69D5AB62,남,40대,[BAT],[Regular]
202211,007BE5AEC24991C378435D650C80E7C168971700CB8EDFD037194D71CA0B453F,남,40대,[PMK],"[Fresh, New Taste]"
202211,00851229FF4A0026F2682594CEDABB0AE1B73FF85E6CDED060ED4FB00B37ECC9,남,50대,[PMK],"[New Taste, Regular]"
202211,00FA9CFB5FFA3F6E3C00DA693D6862019C5A42D6A6D0B352C7B0BC3CB692C7E2,남,30대,[KTG],[New Taste]
202211,01117EF5EFBB1B6D53108D3EEBF53FAFBECFA132F880CFB169DA724CDC567C92,남,30대,"[KTG, PMK]","[New Taste, Fresh]"
202211,...,...,...,...,...
202211,FF83E54787EB82C6293ADA53E369F68215B39162BD33E3E5702C075F12FEF043,남,50대,[PMK],"[Fresh, Regular]"
202211,FFC50D9BE8D22E040EDAE5B54B00FA3856C2240DE21BE4C4782923710C85F0FB,남,20대,[KTG],[Fresh]
202211,FFC7C4D03CF065384860A5176E832123A07E70B6E185396323A8155F3D01AED8,여,50대,[KTG],[Fresh]
202211,FFD0E8F349A14622CD91B697DA35E0E9D4FFAF7D9D4C4EA966F09E5A94555746,남,40대,[KTG],[Regular]
