# L.Point 구매자 집계

In [49]:
import pandas as pd
import os
from sqlalchemy import create_engine
from dotenv import load_dotenv

load_dotenv()

database = os.getenv('CON_STR')

engine = create_engine(database)


In [2]:
# 1. 첨부파일의 제품들의 출시 첫 달부터 purchaser 수
sql = '''
with temp as ( 
	-- 매월 2팩이상 구매자
	select
		t.YYYYMM, 
		t.id
	from cx.seven11_user_3month_list t
		join cx.fct_K7_Monthly a on a.id = t.id and a.YYYYMM = t.YYYYMM 
		join cx.product_master b on a.product_code = b.PROD_ID  and CIGADEVICE ='CIGARETTES' and CIGATYPE = 'CC'
	where t.YYYYMM >= '202201'
	group by t.YYYYMM, t.id
	having sum(a.Pack_qty ) > 1
)
select engname,
	FLAVORSEG_type6,
	NPL_YN ,
	t.YYYYMM,
	count(distinct t.id) 'Purchasers'
from temp t
	join cx.fct_K7_Monthly a on a.id = t.id and a.YYYYMM = t.YYYYMM 
	join cx.product_master b on a.product_code = b.PROD_ID  and CIGADEVICE ='CIGARETTES' and CIGATYPE = 'CC'
group by engname, FLAVORSEG_type6, NPL_YN , t.YYYYMM
'''

df = pd.read_sql_query(sql, engine)

pivot_df = df.pivot_table(index=['engname', 'FLAVORSEG_type6', 'NPL_YN'],
							columns='YYYYMM',
							values='Purchasers').reset_index()

pivot_df

YYYYMM,engname,FLAVORSEG_type6,NPL_YN,202201,202202,202203,202204,202205,202206,202207,...,202310,202311,202312,202401,202402,202403,202404,202405,202406,202407
0,88 ICE GOLD,Regular to Fresh,N,,,,,,,,...,2.0,,,,,,,,,
1,88 RETURNS,Regular,N,287.0,265.0,271.0,303.0,314.0,286.0,305.0,...,,,,,,,,,,
2,BOHEM CIGAR CARIBE,Regular to New Taste,Y,927.0,835.0,827.0,881.0,928.0,944.0,978.0,...,1270.0,1233.0,1166.0,1151.0,1111.0,1245.0,1308.0,1292.0,1344.0,1392.0
3,BOHEM CIGAR CUBANA DOUBLE,Fresh to New Taste,N,632.0,625.0,705.0,818.0,864.0,888.0,985.0,...,1076.0,998.0,989.0,888.0,914.0,1001.0,1093.0,1132.0,1108.0,1162.0
4,BOHEM CIGAR ICE FIT,Regular to New Taste,Y,,,1651.0,1216.0,966.0,912.0,877.0,...,999.0,993.0,972.0,922.0,934.0,984.0,1095.0,1212.0,1131.0,1205.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
222,TIMELESS TIME KS BOX 20,Regular,N,212.0,220.0,220.0,227.0,221.0,238.0,245.0,...,2.0,,,,,,,,,
223,VS S. BLUE,Regular,N,136.0,132.0,144.0,162.0,164.0,154.0,180.0,...,169.0,165.0,164.0,168.0,170.0,175.0,180.0,188.0,173.0,179.0
224,VS S. GOLD,Regular,N,937.0,800.0,934.0,1059.0,975.0,1041.0,1011.0,...,1168.0,1070.0,1086.0,1085.0,979.0,1081.0,1097.0,1145.0,1111.0,1134.0
225,VS S. ONE,Regular,N,415.0,423.0,442.0,436.0,480.0,445.0,459.0,...,474.0,474.0,466.0,461.0,436.0,445.0,467.0,481.0,483.0,502.0


In [3]:
pivot_df.to_clipboard(index=False)

In [4]:
#  2. 첨부파일의 제품들의 출시 첫 달부터 재구매 purchaser 수 (해당 제품 2팩이상 구매자)
sql2 = '''
with temp as ( 
	-- 매월 2팩이상 구매자
	select
		t.YYYYMM, 
		t.id
	from cx.seven11_user_3month_list t
		join cx.fct_K7_Monthly a on a.id = t.id and a.YYYYMM = t.YYYYMM 
		join cx.product_master b on a.product_code = b.PROD_ID  and CIGADEVICE ='CIGARETTES' and CIGATYPE = 'CC'
	where t.YYYYMM >= '202201'
	group by t.YYYYMM, t.id
	having sum(a.Pack_qty ) > 1
)
select engname,
	FLAVORSEG_type6,
	NPL_YN ,
	t.YYYYMM,
	count(distinct t.id) 'Purchasers'
from temp t
	join cx.fct_K7_Monthly a on a.id = t.id and a.YYYYMM = t.YYYYMM 
	join cx.product_master b on a.product_code = b.PROD_ID  and CIGADEVICE ='CIGARETTES' and CIGATYPE = 'CC'
group by engname, FLAVORSEG_type6, NPL_YN , t.YYYYMM
having sum(a.Pack_qty ) > 1
'''

df2 = pd.read_sql_query(sql2, engine)
df2['first_purchased'] = df2.groupby(['engname','FLAVORSEG_type6', 'NPL_YN'])['YYYYMM'].transform(min)


pivot_df2 = df2.pivot_table(index=['engname', 'FLAVORSEG_type6', 'NPL_YN'],
							columns='YYYYMM',
							values='Purchasers').reset_index()


pivot_df2

  df2['first_purchased'] = df2.groupby(['engname','FLAVORSEG_type6', 'NPL_YN'])['YYYYMM'].transform(min)


YYYYMM,engname,FLAVORSEG_type6,NPL_YN,202201,202202,202203,202204,202205,202206,202207,...,202310,202311,202312,202401,202402,202403,202404,202405,202406,202407
0,88 ICE GOLD,Regular to Fresh,N,,,,,,,,...,2.0,,,,,,,,,
1,88 RETURNS,Regular,N,287.0,265.0,271.0,303.0,314.0,286.0,305.0,...,,,,,,,,,,
2,BOHEM CIGAR CARIBE,Regular to New Taste,Y,927.0,835.0,827.0,881.0,928.0,944.0,978.0,...,1270.0,1233.0,1166.0,1151.0,1111.0,1245.0,1308.0,1292.0,1344.0,1392.0
3,BOHEM CIGAR CUBANA DOUBLE,Fresh to New Taste,N,632.0,625.0,705.0,818.0,864.0,888.0,985.0,...,1076.0,998.0,989.0,888.0,914.0,1001.0,1093.0,1132.0,1108.0,1162.0
4,BOHEM CIGAR ICE FIT,Regular to New Taste,Y,,,1651.0,1216.0,966.0,912.0,877.0,...,999.0,993.0,972.0,922.0,934.0,984.0,1095.0,1212.0,1131.0,1205.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
216,TIME MID,Regular,N,285.0,264.0,289.0,285.0,286.0,299.0,337.0,...,444.0,444.0,411.0,441.0,421.0,399.0,435.0,436.0,464.0,450.0
217,TIMELESS TIME KS BOX 20,Regular,N,212.0,220.0,220.0,227.0,221.0,238.0,245.0,...,2.0,,,,,,,,,
218,VS S. BLUE,Regular,N,136.0,132.0,144.0,162.0,164.0,154.0,180.0,...,169.0,165.0,164.0,168.0,170.0,175.0,180.0,188.0,173.0,179.0
219,VS S. GOLD,Regular,N,937.0,800.0,934.0,1059.0,975.0,1041.0,1011.0,...,1168.0,1070.0,1086.0,1085.0,979.0,1081.0,1097.0,1145.0,1111.0,1134.0


In [7]:
pivot_df2.to_clipboard(index=False)

In [31]:
#  3. 첨부파일의 제품들의 출시 첫 달부터 Pack 수
sql3 = '''
select engname, 
	b.FLAVORSEG_type3,
	t.YYYYMM, 
	 sum(a.Pack_qty) Pack_Qty
from cx.v_user_3month_list t
	join cx.fct_K7_Monthly a on a.id = t.id and a.YYYYMM = t.YYYYMM 
	join cx.product_master b on a.product_code = b.PROD_ID  and CIGADEVICE ='CIGARETTES' and CIGATYPE = 'CC'
where t.YYYYMM >= '202201'	
group by engname, b.FLAVORSEG_type3, t.YYYYMM
'''

df3 = pd.read_sql_query(sql3, engine)

pivot_df3 = df3.pivot_table(index=['engname', 'FLAVORSEG_type3'],
							columns='YYYYMM',
							values='Pack_Qty').reset_index()

pivot_df3

YYYYMM,engname,FLAVORSEG_type3,202201,202202,202203,202204,202205,202206,202207,202208,...,202310,202311,202312,202401,202402,202403,202404,202405,202406,202407
0,88 ICE GOLD,Fresh,,,,,,,,,...,15.0,,,,,,,,,
1,88 RETURNS,Regular,1092.0,959.0,1060.0,1063.0,1283.0,1201.0,1212.0,1286.0,...,,,,,,,,,,
2,BOHEM CIGAR CARIBE,New Taste,3597.0,3174.0,3594.0,3676.0,3764.0,3774.0,4030.0,3941.0,...,4952.0,4943.0,4521.0,4719.0,4180.0,4976.0,5208.0,5405.0,5622.0,5979.0
3,BOHEM CIGAR CUBANA DOUBLE,New Taste,2619.0,2351.0,2789.0,3213.0,3443.0,3326.0,3665.0,3452.0,...,4120.0,3991.0,3794.0,3632.0,3571.0,3933.0,4254.0,4463.0,4469.0,4789.0
4,BOHEM CIGAR ICE FIT,New Taste,,,3733.0,3806.0,3554.0,3506.0,3329.0,3170.0,...,4045.0,4070.0,4132.0,4068.0,3681.0,4141.0,4517.0,4981.0,5044.0,5375.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
223,TIMELESS TIME KS BOX 20,Regular,995.0,972.0,1057.0,1044.0,1079.0,1142.0,1128.0,1012.0,...,4.0,,,,,,,,,
224,VS S. BLUE,Regular,572.0,594.0,679.0,704.0,695.0,688.0,830.0,781.0,...,716.0,640.0,729.0,859.0,758.0,741.0,851.0,789.0,731.0,805.0
225,VS S. GOLD,Regular,4089.0,3426.0,4192.0,4411.0,4532.0,4629.0,4773.0,4586.0,...,5462.0,5058.0,4832.0,4716.0,4249.0,5254.0,5341.0,5458.0,5318.0,5340.0
226,VS S. ONE,Regular,1804.0,1807.0,2050.0,1757.0,1930.0,1920.0,2039.0,2110.0,...,2339.0,2168.0,2181.0,2175.0,2116.0,2286.0,2236.0,2342.0,2289.0,2346.0


In [34]:
pivot_df3.to_clipboard(index=False)

In [32]:
#  4. 첨부파일의 제품들의 출시 첫 달부터 재구매 Pack 수 (해당 제품 2팩이상 구매자)
sql4 = '''
with temp as ( 
	-- 해당 제품 2팩이상 구매자
	select engname,
		b.FLAVORSEG_type3,
		t.YYYYMM, 
		t.id,
		sum(a.Pack_qty) pack
	from cx.v_user_3month_list t
		join cx.fct_K7_Monthly a on a.id = t.id and a.YYYYMM = t.YYYYMM 
		join cx.product_master b on a.product_code = b.PROD_ID  and CIGADEVICE ='CIGARETTES' and CIGATYPE = 'CC'
	where t.YYYYMM >= '202201'
	group by engname, b.FLAVORSEG_type3, t.YYYYMM, t.id
	having sum(a.Pack_qty ) > 1
)
select engname,
	FLAVORSEG_type3,
	YYYYMM,
	 sum(pack) Pack_Qty
from temp
group by engname, FLAVORSEG_type3, YYYYMM
'''

df4 = pd.read_sql_query(sql4, engine)

pivot_df4 = df4.pivot_table(index=['engname','FLAVORSEG_type3'],
							columns='YYYYMM',
							values='Pack_Qty').reset_index()

pivot_df4

YYYYMM,engname,FLAVORSEG_type3,202201,202202,202203,202204,202205,202206,202207,202208,...,202310,202311,202312,202401,202402,202403,202404,202405,202406,202407
0,88 ICE GOLD,Fresh,,,,,,,,,...,15.0,,,,,,,,,
1,88 RETURNS,Regular,917.0,786.0,899.0,874.0,1116.0,1034.0,1039.0,1124.0,...,,,,,,,,,,
2,BOHEM CIGAR CARIBE,New Taste,3063.0,2711.0,3095.0,3196.0,3217.0,3245.0,3459.0,3380.0,...,4258.0,4218.0,3875.0,4094.0,3567.0,4308.0,4454.0,4699.0,4841.0,5204.0
3,BOHEM CIGAR CUBANA DOUBLE,New Taste,2171.0,1964.0,2351.0,2692.0,2923.0,2768.0,3018.0,2790.0,...,3458.0,3387.0,3153.0,3056.0,3030.0,3321.0,3584.0,3828.0,3760.0,4055.0
4,BOHEM CIGAR ICE FIT,New Taste,,,2399.0,3008.0,2950.0,3010.0,2813.0,2668.0,...,3491.0,3518.0,3605.0,3519.0,3184.0,3602.0,3938.0,4349.0,4427.0,4752.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
216,TIME MID,Regular,1082.0,975.0,1242.0,1416.0,1435.0,1561.0,1574.0,1474.0,...,2262.0,2355.0,2018.0,2180.0,1991.0,1973.0,2004.0,2094.0,2164.0,2247.0
217,TIMELESS TIME KS BOX 20,Regular,913.0,901.0,966.0,958.0,995.0,1039.0,1038.0,927.0,...,3.0,,,,,,,,,
218,VS S. BLUE,Regular,509.0,537.0,620.0,646.0,621.0,630.0,757.0,708.0,...,643.0,578.0,660.0,787.0,682.0,677.0,785.0,722.0,661.0,717.0
219,VS S. GOLD,Regular,3723.0,3108.0,3803.0,4017.0,4123.0,4183.0,4358.0,4142.0,...,5004.0,4638.0,4409.0,4279.0,3850.0,4845.0,4926.0,4977.0,4851.0,4875.0


In [35]:
pivot_df4.to_clipboard(index=False)

# 제품 출시일 기준 m1, m2, m3, m4 Pivot

In [50]:
#  5. M1/M2/M3/M4 제품구매 추출 CC NPL 'Regular to New Taste'
sql5 = '''
with purchase as (
	select b.engname, 
		b.FLAVORSEG_type6, 
		t.YYYYMM, 
		t.id,
		first_purchase ,
		DATEDIFF(MONTH, CAST(first_purchase +'01' as date), CAST(t.YYYYMM +'01' as date) ) cohort,
		dense_rank() over(partition by b.engname, b.FLAVORSEG_type6 order by  first_purchase ) rn
	from cx.seven11_user_3month_list t 
		join cx.fct_K7_Monthly a on a.id = t.id and a.YYYYMM = t.YYYYMM 
		join cx.product_master b on a.product_code = b.PROD_ID  and CIGADEVICE ='CIGARETTES' and CIGATYPE = 'CC' and NPL_YN ='Y' and  FLAVORSEG_type6 ='Regular to New Taste'
		left join cx.first_purchaser x on t.id = x.id and x.engname = b.engname
	where 1=1 -- b.engname= 'BOHEM CIGAR ICE FIT'
	group by b.engname, b.FLAVORSEG_type6, t.YYYYMM, t.id, first_purchase
)
select *  
from purchase
where cohort between 0 and 3
and rn between 1 and 4
'''

df5 = pd.read_sql_query(sql5, engine)

pivot_df5 = df5.pivot_table(index= ['engname', 'FLAVORSEG_type6', 'first_purchase' ],
                            columns='cohort',
                            values='id',
                            aggfunc='count')
pivot_df5

Unnamed: 0_level_0,Unnamed: 1_level_0,cohort,0,1,2,3
engname,FLAVORSEG_type6,first_purchase,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
BOHEM CIGAR CARIBE,Regular to New Taste,202201,134.0,38.0,32.0,28.0
BOHEM CIGAR CARIBE,Regular to New Taste,202202,104.0,24.0,20.0,18.0
BOHEM CIGAR CARIBE,Regular to New Taste,202203,127.0,35.0,30.0,19.0
BOHEM CIGAR ICE FIT,Regular to New Taste,202203,1013.0,316.0,209.0,198.0
BOHEM CIGAR ICE FIT,Regular to New Taste,202204,399.0,125.0,105.0,95.0
BOHEM CIGAR ICE FIT,Regular to New Taste,202205,214.0,73.0,55.0,40.0
BOHEM MINI ROAST KS RCB 20 SSL,Regular to New Taste,202208,82.0,17.0,9.0,4.0
BOHEM MINI ROAST KS RCB 20 SSL,Regular to New Taste,202209,691.0,156.0,109.0,92.0
BOHEM MINI ROAST KS RCB 20 SSL,Regular to New Taste,202210,359.0,101.0,72.0,64.0
BOHEM PIPE BRITON,Regular to New Taste,202404,475.0,80.0,40.0,27.0


In [54]:
pivot_df5.to_clipboard()

In [56]:
#  6. CC NPL 'Fresh to New Taste'
sql6 = '''
with purchase as (
	select b.engname, 
		b.FLAVORSEG_type6, 
		t.YYYYMM, 
		t.id,
		first_purchase ,
		DATEDIFF(MONTH, CAST(first_purchase +'01' as date), CAST(t.YYYYMM +'01' as date) ) cohort,
		dense_rank() over(partition by b.engname, b.FLAVORSEG_type6 order by  first_purchase ) rn
	from cx.seven11_user_3month_list t 
		join cx.fct_K7_Monthly a on a.id = t.id and a.YYYYMM = t.YYYYMM 
		join cx.product_master b on a.product_code = b.PROD_ID  and CIGADEVICE ='CIGARETTES' and CIGATYPE = 'CC' and NPL_YN ='Y' and  FLAVORSEG_type6 ='Fresh to New Taste'
		left join cx.first_purchaser x on t.id = x.id and x.engname = b.engname
	where 1=1 -- b.engname= 'BOHEM CIGAR ICE FIT'
	group by b.engname, b.FLAVORSEG_type6, t.YYYYMM, t.id, first_purchase
)
select *  
from purchase
where cohort between 0 and 3
and rn between 1 and 4
'''

df6 = pd.read_sql_query(sql6, engine)

pivot_df6 = df6.pivot_table(index= ['engname', 'FLAVORSEG_type6', 'first_purchase' ],
                            columns='cohort',
                            values='id',
                            aggfunc='count')
pivot_df6

ValueError: Grouper for 'cohort' not 1-dimensional

In [55]:
pivot_df6.to_clipboard()