# Pivot Year, Purchaser count, total pack count

In [7]:
import pandas as pd
import os
from sqlalchemy import create_engine
from dotenv import load_dotenv

load_dotenv()

database = os.getenv('CON_STR')
conn = create_engine(database)

In [3]:
sql = '''
select 
	b.FLAVORSEG_type3,
	b.cigatype,
	left(a.yyyymm,4) year,
	COUNT(distinct t.id ) Purchaser_Cnt,
	sum(  a.pack_qty) as Total_Pack_Cnt
FROM 
	cx.seven11_user_3month_list t
		join cx.fct_K7_Monthly a on t.id = a.id and t.YYYYMM = a.YYYYMM
    	join cx.product_master b on a.product_code = b.PROD_ID and b.CIGADEVICE =  'CIGARETTES' AND  b.cigatype != 'CSV'
where 1=1
   	and left(t.YYYYMM, 4) in ('2021', '2022')
GROUP BY 
	b.FLAVORSEG_type3, b.cigatype, left(a.YYYYMM, 4)

'''

purchaser_pf = pd.read_sql_query(sql= sql , con=conn)


pivot_df = purchaser_pf.pivot_table(index=['cigatype', 'FLAVORSEG_type3'],
                        columns='year',
                        values=['Purchaser_Cnt', 'Total_Pack_Cnt']
                        )

pivot_df

Unnamed: 0_level_0,Unnamed: 1_level_0,Purchaser_Cnt,Purchaser_Cnt,Total_Pack_Cnt,Total_Pack_Cnt
Unnamed: 0_level_1,year,2021,2022,2021,2022
cigatype,FLAVORSEG_type3,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
CC,Fresh,23840.0,59054.0,128710.0,540875.0
CC,New Taste,103124.0,250016.0,621950.0,2866578.0
CC,Regular,117902.0,262278.0,889697.0,3852953.0
HnB,Fresh,21272.0,54464.0,135419.0,593992.0
HnB,New Taste,37505.0,108127.0,231117.0,1142730.0
HnB,Regular,8548.0,20820.0,57481.0,236112.0


In [5]:
pivot_df.to_clipboard()

In [4]:
sql2 = '''
select 
	b.cigatype,
	left(a.yyyymm,4) year,
	COUNT(distinct t.id ) Purchaser_Cnt,
	sum(a.pack_qty) as Total_Pack_Cnt
FROM 
	cx.seven11_user_3month_list t
		join cx.fct_K7_Monthly a on t.id = a.id and t.YYYYMM = a.YYYYMM
    	join cx.product_master b on a.product_code = b.PROD_ID and b.CIGADEVICE =  'CIGARETTES' AND  b.cigatype != 'CSV'
where 1=1
   	and left(a.YYYYMM, 4) in ('2021', '2022')
GROUP BY 
	 b.cigatype, left(a.YYYYMM, 4)
order by cigatype, [year]

'''

purchaser_pf2 = pd.read_sql_query(sql= sql2 , con=conn)


pivot_df2 = purchaser_pf2.pivot_table(index=['cigatype'],
                        columns='year',
                        values=['Purchaser_Cnt', 'Total_Pack_Cnt']
                        )

pivot_df2

Unnamed: 0_level_0,Purchaser_Cnt,Purchaser_Cnt,Total_Pack_Cnt,Total_Pack_Cnt
year,2021,2022,2021,2022
cigatype,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
CC,206910.0,444274.0,1640357.0,7260406.0
HnB,58695.0,150891.0,424017.0,1972834.0


In [6]:
pivot_df2.to_clipboard()

# Pivot Quarterly

In [9]:
sql3 = '''
select 
	b.FLAVORSEG_type3,
	b.cigatype,
	c.quarterly,
	COUNT(distinct t.id ) Purchaser_Cnt,
	sum(  a.pack_qty) as Total_Pack_Cnt
FROM 
	cx.seven11_user_3month_list t
		join cx.fct_K7_Monthly a on t.id = a.id and t.YYYYMM = a.YYYYMM
    	join cx.product_master b on a.product_code = b.PROD_ID and b.CIGADEVICE =  'CIGARETTES' AND  b.cigatype != 'CSV'
    	join cx.dim_calendar c on a.de_dt = c.dt and c.quarterly in ('20234', '20241')
where 1=1
   	and t.YYYYMM in ('202310', '202311', '202312', '202401', '202402', '202403')
GROUP BY 
	b.FLAVORSEG_type3, b.cigatype, c.quarterly
'''

purchaser_pf3 = pd.read_sql_query(sql= sql3 , con=conn)


pivot_df3 = purchaser_pf3.pivot_table(index=['cigatype', 'FLAVORSEG_type3'],
                        columns='quarterly',
                        values=['Purchaser_Cnt', 'Total_Pack_Cnt']
                        )

pivot_df3

Unnamed: 0_level_0,Unnamed: 1_level_0,Purchaser_Cnt,Purchaser_Cnt,Total_Pack_Cnt,Total_Pack_Cnt
Unnamed: 0_level_1,quarterly,20234,20241,20234,20241
cigatype,FLAVORSEG_type3,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
CC,Fresh,26560.0,24748.0,152650.0,142948.0
CC,New Taste,151722.0,144743.0,960531.0,915150.0
CC,Regular,148723.0,142229.0,1143448.0,1092283.0
HnB,Fresh,30238.0,29264.0,194754.0,191550.0
HnB,New Taste,69070.0,70340.0,444992.0,460635.0
HnB,Regular,11813.0,11481.0,80838.0,81730.0


In [10]:
pivot_df3.to_clipboard()

In [13]:
df = pd.read_csv(r'C:\Users\schoi43\OneDrive - Philip Morris International\Documents\cu_user_3month_list.csv')

row_cnt = len(df)
mid_index = row_cnt // 2

df_part1 = df.iloc[:mid_index]

df_part2 = df.iloc[mid_index:]

df_part1.to_csv('output1.csv', index=False)
df_part2.to_csv('output2.csv', index=False)
