In [1]:
# 데이터 불러오기
from dotenv import load_dotenv
import pandas as pd
import os 
import numpy as np
from sqlalchemy import create_engine


# .env 파일에서 환경 부르기
load_dotenv()

# 환경 변수 사용
database = os.getenv("CON_STR")


# # csv 파일 경로
# csv_file_path = os.getenv("CSV_FILE_PATH")
# print(csv_file_path)

engine = create_engine(database)

In [2]:
# -- 엑셀 시트 데이터 반영 작업
# --into cx.agg_LPoint_TEREA_Total_Sourcing
# --into cx.agg_LPoint_MIIX_Total_Sourcing
# --into cx.agg_LPoint_FIIT_Total_Sourcing
# --into cx.agg_LPoint_NEO_Total_Sourcing
# ALL Raw Data
sql1= ''' 
select * from cx.agg_LPoint_TEREA_Total_Sourcing2
order by YYYYMM, id
'''

gg = pd.read_sql_query(sql=sql1, con=engine)

In [3]:
gg.to_clipboard()

# L.Point 대상 TEREA/MIIX/FIIT/NEO Total Sourcing 피벗

In [4]:
# - flaXtar_ from 202211
sql2 = ''' 
select  
	t.YYYYMM,
	concat(FLAVORSEG_type3,' X ', New_TARSEGMENTAT) flavorXtar,
	count(distinct case when b.cigatype ='CC' then t.id end) CC,
	count(distinct case when b.cigatype ='HnB' then t.id end) HnB
from cx.agg_LPoint_TEREA_Total_Sourcing2  t
	join cx.fct_K7_Monthly a on t.id = a.id 
		and a.YYYYMM BETWEEN CONVERT(NVARCHAR(6), DATEADD(MONTH, -3, t.YYYYMM+'01'), 112)
				 	     AND CONVERT(NVARCHAR(6), DATEADD(MONTH, -1, t.YYYYMM+'01'), 112)	
	join cx.product_master b on a.product_code = b.PROD_ID and b.CIGADEVICE =  'CIGARETTES' AND b.cigatype != 'CSV'  
group BY 
	t.YYYYMM,
	concat(FLAVORSEG_type3,' X ', New_TARSEGMENTAT)
'''

data2 = pd.read_sql_query(sql=sql2, con=engine)

pivot_flavor_tar = data2.pivot_table(index=['YYYYMM'], 
                                     columns='flavorXtar',
                                     values=['HnB', 'CC' ]
                                    ).sort_values([ 'YYYYMM'])
print(pivot_flavor_tar)



                 CC                                                        \
flavorXtar Fresh X  Fresh X 1MG Fresh X Below 1MG Fresh X LTS Fresh X ULT   
YYYYMM                                                                      
202211          0.0        58.0               NaN        60.0        11.0   
202212          0.0        79.0               NaN       111.0        17.0   
202301          0.0        76.0               NaN        86.0         7.0   
202302          0.0       108.0               NaN       117.0        16.0   
202303          0.0       128.0               NaN       169.0        24.0   
202304          0.0       132.0               NaN       149.0        14.0   
202305          0.0       114.0               NaN       167.0        30.0   
202306          0.0       110.0               NaN       140.0        13.0   
202307          0.0       122.0               NaN       126.0        16.0   
202308          0.0       102.0               NaN       106.0        19.0   

In [8]:
pivot_flavor_tar.to_clipboard()

In [5]:
# - User_past_type_M1
sql = ''' 
select  
	t.YYYYMM, 
	t.id,
	max(case when b.cigatype='HnB' and b.company = 'PMK' then 1 else 0 end) IQOS_Purchased,
	max(case when b.cigatype='CC' then 1 else 0 end) CC_Purchased,
	max(case when b.cigatype='HnB' and b.company != 'PMK' then 1 else 0 end) CompHnB_Purchased
from cx.agg_LPoint_TEREA_Total_Sourcing2  t
	join cx.fct_K7_Monthly a on t.id = a.id 
		and a.YYYYMM BETWEEN CONVERT(NVARCHAR(6), DATEADD(MONTH, -3, t.YYYYMM+'01'), 112)
				 	     AND CONVERT(NVARCHAR(6), DATEADD(MONTH, -1, t.YYYYMM+'01'), 112)	
	join cx.product_master b on a.product_code = b.PROD_ID and CIGADEVICE =  'CIGARETTES' AND  b.cigatype != 'CSV' 
group BY 	    
	t.YYYYMM, 
	t.id
'''

data = pd.read_sql_query(sql=sql, con=engine)


# Past Type 
def categorize(row):
    categories = []
    if row['IQOS_Purchased'] == 1:
        categories.append('PMK HnB')
    if row['CC_Purchased'] == 1:
        categories.append('CC')
    if row['CompHnB_Purchased'] == 1:
        categories.append('Comp HnB')
    return ','.join(categories)

data['retype'] = data.apply(categorize, axis=1)
result = data.groupby(['YYYYMM', 'retype']).size().reset_index(name='N')
total = result['N'].sum()

pivot_past_type = result.pivot_table( 
                            index=['YYYYMM'],
                            columns='retype',
                            values='N'
                            ).sort_values(['YYYYMM'])
print(pivot_past_type)



retype      CC  CC,Comp HnB  Comp HnB  PMK HnB  PMK HnB,CC  \
YYYYMM                                                       
202211   322.0         91.0      59.0    547.0       305.0   
202212   511.0        118.0     101.0    741.0       376.0   
202301   577.0        111.0     106.0    560.0       296.0   
202302   845.0        235.0     166.0    704.0       323.0   
202303  1426.0        289.0     248.0    930.0       507.0   
202304  1328.0        269.0     249.0    738.0       368.0   
202305  1599.0        349.0     241.0    682.0       386.0   
202306  1308.0        243.0     237.0    601.0       322.0   
202307  1360.0        253.0     205.0    497.0       255.0   
202308  1287.0        281.0     194.0    436.0       244.0   
202309  1500.0        302.0     222.0    463.0       254.0   
202310  1422.0        318.0     215.0    458.0       243.0   
202311  1483.0        267.0     222.0    396.0       202.0   
202312  1419.0        250.0     195.0    359.0       193.0   
202401  

In [9]:
pivot_past_type.to_clipboard()

# MIIX/FIIT/NEO 구분 잘하기

In [6]:
# - user_Current_type_M1

sql3 = ''' 
with temp as (
select  
	t.YYYYMM, 
	t.id,
	max(case when b.cigatype='HnB' and b.company = 'PMK' then 1 else 0 end) IQOS_Purchased,
	max(case when b.cigatype='CC' then 1 else 0 end) CC_Purchased,
	max(case when b.cigatype='HnB' and b.company != 'PMK' then 1 else 0 end) CompHnB_Purchased
from  cx.agg_LPoint_TEREA_Total_Sourcing2  t
	join cx.fct_K7_Monthly a on t.id = a.id 
		and a.YYYYMM = t.YYYYMM
	join cx.product_master b on a.product_code = b.PROD_ID and CIGADEVICE =  'CIGARETTES' AND  b.cigatype != 'CSV' 
group BY 	    	
	t.YYYYMM, 
	t.id
)
select YYYYMM,
    'IQOS' +
    CASE WHEN CompHnB_Purchased = 1 THEN ' + Comp. HnB' ELSE '' END + 
    CASE WHEN CC_Purchased = 1 THEN ' + CC' ELSE '' END 
     as Cigatype,
    count(*) purchaser_cnt
from temp
group by YYYYMM,
    'IQOS' +
    CASE WHEN CompHnB_Purchased = 1 THEN ' + Comp. HnB' ELSE '' END + 
    CASE WHEN CC_Purchased = 1 THEN ' + CC' ELSE '' END 
'''

data3 = pd.read_sql_query(sql=sql3, con=engine)

pivot_current_type = data3.pivot_table(index=['YYYYMM'],
                                       columns='Cigatype',
                                       values='purchaser_cnt',
                                       ).sort_values(['YYYYMM'])

print(pivot_current_type)


Cigatype    IQOS  IQOS + CC  IQOS + Comp. HnB  IQOS + Comp. HnB + CC
YYYYMM                                                              
202211     836.0      427.0              84.0                   71.0
202212    1239.0      545.0             103.0                   86.0
202301    1045.0      557.0              85.0                   72.0
202302    1296.0      810.0             155.0                  149.0
202303    1960.0     1226.0             178.0                  200.0
202304    1521.0     1181.0             204.0                  183.0
202305    1554.0     1404.0             190.0                  231.0
202306    1376.0     1085.0             182.0                  172.0
202307    1214.0     1078.0             137.0                  193.0
202308    1076.0     1086.0             161.0                  187.0
202309    1191.0     1240.0             165.0                  208.0
202310    1206.0     1140.0             173.0                  194.0
202311    1175.0     1085.0       

In [10]:
pivot_current_type.to_clipboard()

# 연습

In [10]:
# -- Arbor_sourcing_M1 작업

data['company'] = data['company'].apply(lambda x : x.strip())

result = data.groupby(['YYYYMM', 'id', 'gender', 'age']).agg({
    'company': lambda x: x.unique(),
    'New_FLAVORSEG': lambda x: x.unique()
})


# result.set_index(['YYYYMM', 'id', 'gender', 'age'])
result

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,company,New_FLAVORSEG
YYYYMM,id,gender,age,Unnamed: 4_level_1,Unnamed: 5_level_1
202211,003DE9124299CC396950FEF717EFE5020EFCB079E8DECB8D78A8F04D69D5AB62,남,40대,[BAT],[Regular]
202211,007BE5AEC24991C378435D650C80E7C168971700CB8EDFD037194D71CA0B453F,남,40대,[PMK],"[Fresh, New Taste]"
202211,00851229FF4A0026F2682594CEDABB0AE1B73FF85E6CDED060ED4FB00B37ECC9,남,50대,[PMK],"[New Taste, Regular]"
202211,00FA9CFB5FFA3F6E3C00DA693D6862019C5A42D6A6D0B352C7B0BC3CB692C7E2,남,30대,[KTG],[New Taste]
202211,01117EF5EFBB1B6D53108D3EEBF53FAFBECFA132F880CFB169DA724CDC567C92,남,30대,"[KTG, PMK]","[New Taste, Fresh]"
202211,...,...,...,...,...
202211,FF83E54787EB82C6293ADA53E369F68215B39162BD33E3E5702C075F12FEF043,남,50대,[PMK],"[Fresh, Regular]"
202211,FFC50D9BE8D22E040EDAE5B54B00FA3856C2240DE21BE4C4782923710C85F0FB,남,20대,[KTG],[Fresh]
202211,FFC7C4D03CF065384860A5176E832123A07E70B6E185396323A8155F3D01AED8,여,50대,[KTG],[Fresh]
202211,FFD0E8F349A14622CD91B697DA35E0E9D4FFAF7D9D4C4EA966F09E5A94555746,남,40대,[KTG],[Regular]


In [13]:
pivot_table1 = data.pivot_table(
    index=['YYYYMM', 'id', 'gender', 'age'] ,           # 행 인덱스
    columns=['company'],
    values=['qty'],
    aggfunc='sum'            
)

pivot_table1


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,qty,qty,qty,qty
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,company,BAT,JTI,KTG,PMK
YYYYMM,id,gender,age,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
202211,003DE9124299CC396950FEF717EFE5020EFCB079E8DECB8D78A8F04D69D5AB62,남,40대,32.0,,,
202211,007BE5AEC24991C378435D650C80E7C168971700CB8EDFD037194D71CA0B453F,남,40대,,,,4.0
202211,00851229FF4A0026F2682594CEDABB0AE1B73FF85E6CDED060ED4FB00B37ECC9,남,50대,,,,7.0
202211,00FA9CFB5FFA3F6E3C00DA693D6862019C5A42D6A6D0B352C7B0BC3CB692C7E2,남,30대,,,9.0,
202211,01117EF5EFBB1B6D53108D3EEBF53FAFBECFA132F880CFB169DA724CDC567C92,남,30대,,,2.0,13.0
202211,...,...,...,...,...,...,...
202211,FF83E54787EB82C6293ADA53E369F68215B39162BD33E3E5702C075F12FEF043,남,50대,,,,10.0
202211,FFC50D9BE8D22E040EDAE5B54B00FA3856C2240DE21BE4C4782923710C85F0FB,남,20대,,,1.0,
202211,FFC7C4D03CF065384860A5176E832123A07E70B6E185396323A8155F3D01AED8,여,50대,,,4.0,
202211,FFD0E8F349A14622CD91B697DA35E0E9D4FFAF7D9D4C4EA966F09E5A94555746,남,40대,,,2.0,


In [12]:
data['CC_taste'] = data['cigatype'] + ' ' + data['New_FLAVORSEG']

pivot_table2 = data.pivot_table(
    index=['YYYYMM', 'id', 'gender', 'age'] ,
    columns=['CC_taste'],
    values=['qty'],  
    aggfunc='sum'           
)

pivot_table2

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,qty,qty,qty,qty,qty,qty
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,CC_taste,CC Fresh,CC New Taste,CC Regular,HnB Fresh,HnB New Taste,HnB Regular
YYYYMM,id,gender,age,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
202211,003DE9124299CC396950FEF717EFE5020EFCB079E8DECB8D78A8F04D69D5AB62,남,40대,,,1.0,,,31.0
202211,007BE5AEC24991C378435D650C80E7C168971700CB8EDFD037194D71CA0B453F,남,40대,,,,3.0,1.0,
202211,00851229FF4A0026F2682594CEDABB0AE1B73FF85E6CDED060ED4FB00B37ECC9,남,50대,,,,,1.0,6.0
202211,00FA9CFB5FFA3F6E3C00DA693D6862019C5A42D6A6D0B352C7B0BC3CB692C7E2,남,30대,,9.0,,,,
202211,01117EF5EFBB1B6D53108D3EEBF53FAFBECFA132F880CFB169DA724CDC567C92,남,30대,,1.0,,13.0,1.0,
202211,...,...,...,...,...,...,...,...,...
202211,FF83E54787EB82C6293ADA53E369F68215B39162BD33E3E5702C075F12FEF043,남,50대,,,,2.0,,8.0
202211,FFC50D9BE8D22E040EDAE5B54B00FA3856C2240DE21BE4C4782923710C85F0FB,남,20대,,,,1.0,,
202211,FFC7C4D03CF065384860A5176E832123A07E70B6E185396323A8155F3D01AED8,여,50대,4.0,,,,,
202211,FFD0E8F349A14622CD91B697DA35E0E9D4FFAF7D9D4C4EA966F09E5A94555746,남,40대,,,2.0,,,


In [17]:

result =data[data['productSubFamilyCode'].isin(['AIIM', 'FIIT', 'HEETS', 'MIIX', 'NEO', 'NEOSTICKS', 'TEREA'])]

data['HnB_taste'] = result['productSubFamilyCode'] + ' ' + data['New_FLAVORSEG']

pivot_table3 = data.pivot_table(
    index=['YYYYMM', 'id', 'gender', 'age'] ,            
    columns=['HnB_taste'],
    values=['qty'],  
    aggfunc='sum'            
)

pivot_table3

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,qty,qty,qty,qty,qty,qty,qty,qty,qty,qty,qty,qty
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,HnB_taste,FIIT Fresh,FIIT New Taste,FIIT Regular,HEETS Fresh,HEETS New Taste,HEETS Regular,MIIX Fresh,MIIX New Taste,MIIX Regular,NEOSTICKS Fresh,NEOSTICKS New Taste,NEOSTICKS Regular
YYYYMM,id,gender,age,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2
202211,003DE9124299CC396950FEF717EFE5020EFCB079E8DECB8D78A8F04D69D5AB62,남,40대,,,,,,,,,,,,31.0
202211,007BE5AEC24991C378435D650C80E7C168971700CB8EDFD037194D71CA0B453F,남,40대,,,,3.0,1.0,,,,,,,
202211,00851229FF4A0026F2682594CEDABB0AE1B73FF85E6CDED060ED4FB00B37ECC9,남,50대,,,,,1.0,6.0,,,,,,
202211,01117EF5EFBB1B6D53108D3EEBF53FAFBECFA132F880CFB169DA724CDC567C92,남,30대,,1.0,,13.0,,,,,,,,
202211,018BDE68F71093A6A0E7C55C8FC9F17632BD1BB32EA6A5C7C53933F10602C397,남,40대,,,,,13.0,,,,,,,
202211,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
202211,FF6D169BC2951D794CF9AF897DDA79D48423425D5053220EF01E5D6A612A5C39,남,50대,,,,,,,2.0,2.0,,,,
202211,FF8008FF518BE7144371D9250B3EEF6A021601FC869E9AB836673331314FF0DF,남,40대,,,,9.0,,,,,,,,
202211,FF83E54787EB82C6293ADA53E369F68215B39162BD33E3E5702C075F12FEF043,남,50대,,,,2.0,,8.0,,,,,,
202211,FFC50D9BE8D22E040EDAE5B54B00FA3856C2240DE21BE4C4782923710C85F0FB,남,20대,,,,,,,1.0,,,,,


In [None]:
filtered_data = data[data['ProductFamilyCode'] == 'IQOS'][['engname']]
filtered_data['engname'].unique()

In [None]:
filtered_data = data[data['ProductFamilyCode'] == 'IQOS']

pivot_table4 = filtered_data.pivot_table(
    index=['YYYYMM', 'id', 'gender', 'age'] ,           
    columns=['engname'],
    values=['qty'],   
    aggfunc='sum'           
)

pivot_table4

In [18]:
df2 = pd.DataFrame(result).set_index(['YYYYMM', 'id', 'gender', 'age'])

combined = pd.concat([result, pivot_table1, pivot_table2, pivot_table3, pivot_table4], axis=1, keys=['YYYYMM', 'id', 'gender', 'age'] )

combined.sort_index(level='id')
# combined.to_excel('finish.xlsx')
combined

  combined = pd.concat([result, pivot_table1, pivot_table2, pivot_table3, pivot_table4], axis=1, keys=['YYYYMM', 'id', 'gender', 'age'] )


AssertionError: Cannot concat indices that do not have the same number of levels

In [14]:
# 필터링된 데이터 생성
filtered_data = data[data['ProductFamilyCode'] == 'IQOS']
 
# 피벗 테이블 생성
pivot_table4 = filtered_data.pivot_table(index=['YYYYMM', 'id', 'gender', 'age'],
                                         columns=['engname'], values=['qty'],
                                         aggfunc='sum')
 
# HnB_taste 컬럼 생성 및 피벗 테이블 생성
result = data[data['productSubFamilyCode'].isin(['AIIM', 'FIIT', 'HEETS', 'MIIX', 'NEO', 'NEOSTICKS', 'TEREA'])]
data['HnB_taste'] = result['productSubFamilyCode'] + ' ' + data['New_FLAVORSEG']

pivot_table3 = data.pivot_table(index=['YYYYMM', 'id', 'gender', 'age'],
                                columns=['HnB_taste'], values=['qty'],
                                aggfunc='sum')
 
# CC_taste 컬럼 생성 및 피벗 테이블 생성
data['CC_taste'] = data['cigatype'] + ' ' + data['New_FLAVORSEG']

pivot_table2 = data.pivot_table(index=['YYYYMM', 'id', 'gender', 'age'],
                                columns=['CC_taste'], values=['qty'],
                                aggfunc='sum')
 
# 기본 피벗 테이블 생성
pivot_table1 = data.pivot_table(index=['YYYYMM', 'id', 'gender', 'age'],
                                columns=['company'], values=['qty'],
                                aggfunc='sum')

# 피벗 테이블들을 하나의 데이터프레임으로 병합
concatenated_pivot = pd.concat([ pivot_table1, pivot_table2, pivot_table3, pivot_table4], axis=1)

# 결과 출력
# final = pd.merge(result, concatenated_pivot, how='outer')
concatenated_pivot

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,qty,qty,qty,qty,qty,qty,qty,qty,qty,qty,qty,qty,qty,qty,qty,qty,qty,qty,qty,qty,qty
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,BAT,JTI,KTG,PMK,CC Fresh,CC New Taste,CC Regular,HnB Fresh,HnB New Taste,HnB Regular,...,HEETS BRONZE LABEL,HEETS GOLD SELECTION,HEETS GREEN LABEL,HEETS GREEN ZING,HEETS PURPLE LABEL,HEETS SATIN WAVE,HEETS SILVER LABEL,HEETS SUMMER BREEZE,HEETS TURQUOISE LABEL,HEETS YUGEN
YYYYMM,id,gender,age,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2,Unnamed: 23_level_2,Unnamed: 24_level_2
202211,003DE9124299CC396950FEF717EFE5020EFCB079E8DECB8D78A8F04D69D5AB62,남,40대,32.0,,,,,,1.0,,,31.0,...,,,,,,,,,,
202211,007BE5AEC24991C378435D650C80E7C168971700CB8EDFD037194D71CA0B453F,남,40대,,,,4.0,,,,3.0,1.0,,...,,,,,,,,1.0,,
202211,00851229FF4A0026F2682594CEDABB0AE1B73FF85E6CDED060ED4FB00B37ECC9,남,50대,,,,7.0,,,,,1.0,6.0,...,6.0,,,,,,,,,
202211,00FA9CFB5FFA3F6E3C00DA693D6862019C5A42D6A6D0B352C7B0BC3CB692C7E2,남,30대,,,9.0,,,9.0,,,,,...,,,,,,,,,,
202211,01117EF5EFBB1B6D53108D3EEBF53FAFBECFA132F880CFB169DA724CDC567C92,남,30대,,,2.0,13.0,,1.0,,13.0,1.0,,...,,,,,,,,,,
202211,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
202211,FF83E54787EB82C6293ADA53E369F68215B39162BD33E3E5702C075F12FEF043,남,50대,,,,10.0,,,,2.0,,8.0,...,8.0,,,,,,,,,
202211,FFC50D9BE8D22E040EDAE5B54B00FA3856C2240DE21BE4C4782923710C85F0FB,남,20대,,,1.0,,,,,1.0,,,...,,,,,,,,,,
202211,FFC7C4D03CF065384860A5176E832123A07E70B6E185396323A8155F3D01AED8,여,50대,,,4.0,,4.0,,,,,,...,,,,,,,,,,
202211,FFD0E8F349A14622CD91B697DA35E0E9D4FFAF7D9D4C4EA966F09E5A94555746,남,40대,,,2.0,,,,2.0,,,,...,,,,,,,,,,


In [16]:
total_id_count = data.groupby('YYYYMM')['id'].count().reset_index(name='total_id_count')

gender_age_count = data.groupby(['YYYYMM', 'gender', 'age']).size().reset_index(name='count')

gender_pivot = gender_age_count.pivot_table(
    index ='YYYYMM',
    columns= ['gender', 'age'],
    values='count'
)

res = total_id_count.set_index('YYYYMM').join(gender_pivot)

res

MergeError: Not allowed to merge between different levels. (1 levels on the left, 2 on the right)