In [None]:
import pandas as pd
import operator
import time
import numpy as np

In [None]:
config = {}

config["project_path"] = ".."
config["data_folder_path"] = "regional_data"
config["temp_folder_path"] = "regional_temp"

# input files
config["sellout_input_files"] = ["DW_Fact_Sellout_NoneSplit_2017-2019_20191219.csv", 'DW_Fact_Sellout_NoneSplit_202001-03.csv', 'DW_Fact_Sellout_NoneSplit_202004-05.csv', 'DW_Fact_Sellout_NoneSplit_202006.csv']
config["sellout_20191112_input_files"] = "DW_Fact_Sellout_NoneSplit_20191112_20200120.csv"
config["product_file"] = "DW_DIM_Product_20200609.csv"
config["customer_input_file"] = ["2020 MDM.xlsx"]
config["grade_channel_file"] = "grade_channel.xlsx"
config["customer_acct_file"] = "customer_code_account_name.xlsx"

# output files
config["sell_out_file"] = "sellout_202005_YTD.xlsx"

In [None]:
# schema of input file
sellout_columns = ['YearMonth', 'Data_Source', 'Bill_Date', 'Bill_Type', 
                   'Distributor_Code_DMS', 'Distributor_Code', 'Client_Code', 
                   'Client_Name', 'EA_Platform', 'Ship_To_Code', 
                   'Inventory_location', 'Customer_Code', 'Product_Code', 
                   'Sellout_Qty', 'Sellout_SP_Value', 'Sellout_Channel_Value', 
                   'ETL_DateTime', 'Data_CreationTime']

# Only below columns from input file are needed
select_columns = ['YearMonth','Distributor_Code','Customer_Code',
                  'Product_Code','Sellout_SP_Value']
sellout_df = pd.DataFrame(columns=select_columns)

In [None]:
for sellout_input_file in config["sellout_input_files"]:
    input_file_path = config["project_path"] + "/" \
                        + config["data_folder_path"] + "/" \
                        + sellout_input_file
    input_df = pd.read_csv(input_file_path, header = None, dtype=str).fillna('0')
    input_df.columns = sellout_columns
    if operator.eq(list(input_df.loc[0]),list(sellout_columns))== True:
        input_df = input_df.drop([0])
    input_df = input_df[select_columns]
    sellout_df = sellout_df.append(input_df, ignore_index=True)

In [None]:
# sellout_input_files contains incomplete 201912 data. Needs to be filtered out
sellout_df = sellout_df.loc[(sellout_df["YearMonth"] != "201912")]

In [None]:
# Add complete 201912 data from a seperate file

input_file_path = config["project_path"] + "/" \
                        + config["data_folder_path"] + "/" \
                        + config["sellout_20191112_input_files"]

input_df = pd.read_csv(input_file_path, header = None, dtype=str).fillna('0')
input_df.columns = sellout_columns
if operator.eq(list(input_df.loc[0]),list(sellout_columns))== True:
    input_df = input_df.drop([0])
input_df = input_df[select_columns]

input_df = input_df.loc[(input_df["YearMonth"] == "201912")]

sellout_df = sellout_df.append(input_df, ignore_index=True)

In [None]:
def remove_starting_zero(customer_code):
    if customer_code.startswith("0"):
        return customer_code[1:]
    return customer_code

In [None]:
sellout_df["Sellout_SP_Value"] = sellout_df["Sellout_SP_Value"].astype("float")
sellout_df["Product_Code"] = sellout_df["Product_Code"].astype("str")
sellout_df["Customer_Code"] = sellout_df["Customer_Code"].astype("str")
sellout_df["Distributor_Code"] = sellout_df["Distributor_Code"].astype("str")
sellout_df["YearMonth"] = sellout_df["YearMonth"].astype("str")

sellout_df["Customer_Code"] = sellout_df['Customer_Code'].apply(lambda x:remove_starting_zero(x))

# Read product code and related brandstage, brand information

In [None]:
product_brand_columns = ["Product_Code", "Product_Short_NameEN"]

input_file_path = config["project_path"] + "/" + config["data_folder_path"] + "/" + config["product_file"]

product_brand_input_df = pd.read_csv(input_file_path, header=0, dtype=str)[product_brand_columns]

product_brand_output_df = product_brand_input_df.drop_duplicates().reset_index(drop=True)

product_brand_output_df.columns = ["Product_Code", "Brand_Stage"]

product_brand_output_df = product_brand_output_df.dropna()

product_brand_output_df["Brand"] = product_brand_output_df["Brand_Stage"].str.slice(0, 2)

# Add sepcial product code for brands and all brands
product_brand_output_df = product_brand_output_df.append(
                        [{"Product_Code":"10332781", "Brand_Stage": "NC", "Brand": "NC"}], ignore_index=True)

product_brand_output_df = product_brand_output_df.append(
                        [{"Product_Code":"10332872", "Brand_Stage": "AC", "Brand": "AC"}], ignore_index=True)

product_brand_output_df = product_brand_output_df.append(
                        [{"Product_Code":"10332873", "Brand_Stage": "AP", "Brand": "AP"}], ignore_index=True)

product_brand_output_df = product_brand_output_df.append(
                        [{"Product_Code":"10332778", "Brand_Stage": "ELN", "Brand": "ELN"}], ignore_index=True)

In [None]:
sellout_with_brand = pd.merge(sellout_df, product_brand_output_df, on="Product_Code", how="left")
sellout_with_brand = sellout_with_brand[sellout_with_brand.Brand.isin(["AC", "NC", "AP"])]

In [None]:
sellout_with_brand_agg = sellout_with_brand.groupby(by = ['YearMonth','Distributor_Code','Customer_Code','Brand'])['Sellout_SP_Value'].sum().reset_index()

sellout_2019 = sellout_with_brand_agg.loc[(sellout_with_brand_agg['YearMonth'] <='201912') & (sellout_with_brand_agg['YearMonth'] >='201810') ].copy()
sellout_2020 = sellout_with_brand_agg.loc[sellout_with_brand_agg['YearMonth'] >='202001' ].copy()

# Read Customer information

In [None]:
# Read customer information

select_columns = ['YearMonth','Customer_Code','Grade_Code',
                  'region_name','subregion_name', 'province_name']

customer_input_df = pd.DataFrame(columns=select_columns)

In [None]:
for customer_input_file in config["customer_input_file"]:
    input_file_path = config["project_path"] + "/" \
                        + config["data_folder_path"] + "/" \
                        + customer_input_file
    input_df = pd.read_excel(input_file_path, dtype=str)[select_columns]
    customer_input_df = customer_input_df.append(input_df)

In [None]:
customer_df = customer_input_df

customer_df.columns = ['YearMonth', 'Customer_Code', 'Grade', 'Region_Name', 'Subregion_Name', 'Province_Name']

customer_2019_df = customer_df.loc[customer_df['YearMonth'] == '202001'].copy()
customer_2019_df = customer_2019_df.drop(columns=['YearMonth'])

In [None]:
sellout_2019_cust = pd.merge(sellout_2019, customer_2019_df, on=['Customer_Code'], how='left')

sellout_2020_cust = pd.merge(sellout_2020, customer_df, on=['YearMonth', 'Customer_Code'], how='left')

sellout_with_cust= sellout_2019_cust.append(sellout_2020_cust)

In [None]:
input_file_path = config["project_path"] + "/" \
                    + config["data_folder_path"] + "/" \
                    + config["grade_channel_file"] 

grade_df = pd.read_excel(input_file_path, dtype=str)

In [None]:
sellout_with_grade = pd.merge(sellout_with_cust, grade_df, on=['Grade'], how='left')

sellout_with_grade = sellout_with_grade.fillna({'Channel': 'Unassigned'})

sellout_with_grade[sellout_with_grade['Channel'] =='Unassigned']

In [None]:
sellout_with_grade[sellout_with_grade['Channel'] =='Unassigned'][['Sellout_SP_Value']].sum()

In [None]:
input_file_path = config["project_path"] + "/" \
                    + config["data_folder_path"] + "/" \
                    + config["customer_acct_file"] 

acct_df = pd.read_excel(input_file_path, dtype=str)
acct_df['Customer_Code'] = acct_df['Customer_Code'].apply(lambda x:remove_starting_zero(x))

In [None]:
sellout_with_acct = pd.merge(sellout_with_grade, acct_df, on=['Customer_Code'], how='left')

sellout_with_acct = sellout_with_acct.fillna({'Account_Name': 'Unassigned'})

In [None]:
sellout_with_acct[sellout_with_acct['Account_Name'] =='Unassigned']

# GT-DC Logic

In [None]:
def change_region_name(row):
    if row["Channel"] == "GT-DC":
        return "GT-DC"
    return row["Region_Name"]

sellout_with_acct["Region_Name"] = sellout_with_acct.apply(change_region_name, axis=1)


def change_subregion_name(row):
    if row["Channel"] == "GT-DC":
        return "GT-DC"
    return row["Subregion_Name"]

sellout_with_acct["Subregion_Name"] = sellout_with_acct.apply(change_subregion_name, axis=1)


def change_province_Name(row):
    if row["Channel"] == "GT-DC":
        return "GT-DC"
    return row["Province_Name"]

sellout_with_acct["Province_Name"] = sellout_with_acct.apply(change_province_Name, axis=1)

In [None]:
output_file_path = config["project_path"] \
                        + "/" + config["temp_folder_path"] \
                        + "/" + 'sellout_2019_2020.xlsx'

sellout_with_acct.to_excel(output_file_path, header=True, index=False)

# P3M by store

In [None]:
def get_past_3m(yearmonth):
    yearmonth_int = int(yearmonth)
    p3m_list = []
    for i in range(3):
        yearmonth_int = yearmonth_int - 1
        if yearmonth_int%100 == 0:
            yearmonth_int = yearmonth_int - 100
            yearmonth_int = yearmonth_int + 12
        p3m_list = p3m_list + [str(yearmonth_int)]
    return p3m_list

In [None]:
p3m_columns = ['YearMonth', 'Distributor_Code', 'Customer_Code', 'Brand',
       'Sellout_SP_Value', 'Grade', 'Region_Name', 'Subregion_Name',
       'Province_Name', 'Channel', 'P3M_Sellout']

p3m_df = pd.DataFrame(columns=p3m_columns)

In [None]:
year = 2020
for month in range(1, 7):
    year_month = str(year) + str(month).zfill(2)
    p3m_list = get_past_3m(year_month)
    
    current_sellout = sellout_with_acct.loc[sellout_with_acct['YearMonth'] == year_month].copy()
    p3m_sellout = sellout_with_acct.loc[sellout_with_acct['YearMonth'].isin(p3m_list)].copy()
    
    sellout_temp = p3m_sellout.groupby(by = ['YearMonth', 'Brand', 'Customer_Code'])['Sellout_SP_Value'].sum().reset_index()
    sellout_temp = sellout_temp.groupby(by = ['Brand', 'Customer_Code'])['Sellout_SP_Value'].mean().reset_index()
    sellout_temp["Brand"] = sellout_temp["Brand"].astype("str")
    sellout_temp["Customer_Code"] = sellout_temp["Customer_Code"].astype("str")
    sellout_temp = sellout_temp.rename(columns={'Sellout_SP_Value': 'P3M_Sellout'})

    p3m_temp = pd.merge(current_sellout, sellout_temp, on=['Brand', 'Customer_Code'], how = 'left').fillna({'P3M_Sellout':0})
    
    p3m_df = p3m_df.append(p3m_temp)
    
    
year = 2019
for month in range(1, 13):
    year_month = str(year) + str(month).zfill(2)
    p3m_list = get_past_3m(year_month)
    
    current_sellout = sellout_with_acct.loc[sellout_with_acct['YearMonth'] == year_month].copy()
    p3m_sellout = sellout_with_acct.loc[sellout_with_acct['YearMonth'].isin(p3m_list)].copy()
    
    sellout_temp = p3m_sellout.groupby(by = ['YearMonth', 'Brand', 'Customer_Code'])['Sellout_SP_Value'].sum().reset_index()
    sellout_temp = sellout_temp.groupby(by = ['Brand', 'Customer_Code'])['Sellout_SP_Value'].mean().reset_index()
    sellout_temp["Brand"] = sellout_temp["Brand"].astype("str")
    sellout_temp["Customer_Code"] = sellout_temp["Customer_Code"].astype("str")
    sellout_temp = sellout_temp.rename(columns={'Sellout_SP_Value': 'P3M_Sellout'})

    p3m_temp = pd.merge(current_sellout, sellout_temp, on=['Brand', 'Customer_Code'], how = 'left').fillna({'P3M_Sellout':0})
    
    p3m_df = p3m_df.append(p3m_temp)

In [None]:
p3m_df = p3m_df.drop(columns=['P3M_Sellout'])

In [None]:
output_file_path = config["project_path"] \
                        + "/" + config["temp_folder_path"] \
                        + "/" + 'sellout_P3M_2019_2020.xlsx'

p3m_df.to_excel(output_file_path, header=True, index=False)

# GT Segment

In [None]:
year_month = "202006"

In [None]:
def get_gt_segment(sellout):
    if (sellout<=5000) & (sellout>0):
        return "0-5K"
    elif  sellout<=10000:
        return "5K-10K"
    elif sellout > 10000:
        return ">10K"
    else:
        return "Others"

In [None]:
gt_seg_columns = ['YearMonth', 'Customer_Code', 'Sellout_SP_Value', 'Segment']

gt_seg_df = pd.DataFrame(columns=gt_seg_columns)

In [None]:
year = 2020
for month in range(1, 7):
    year_month = str(year) + str(month).zfill(2)
    GT_sellout = sellout_with_acct.loc[(sellout_with_acct['YearMonth'] == year_month) & ((sellout_with_acct['Channel'] == 'GT') | (sellout_with_acct['Channel'] == 'GT-DC'))].copy()
    GT_sellout = GT_sellout.groupby(by = ['YearMonth', 'Customer_Code'])['Sellout_SP_Value'].sum().reset_index()
    GT_sellout['Segment'] = GT_sellout['Sellout_SP_Value'].apply(lambda x:get_gt_segment(x))
    
    gt_seg_df = gt_seg_df.append(GT_sellout)
    
    
year = 2019
for month in range(1, 13):
    year_month = str(year) + str(month).zfill(2)
    GT_sellout = sellout_with_acct.loc[(sellout_with_acct['YearMonth'] == year_month) & ((sellout_with_acct['Channel'] == 'GT') | (sellout_with_acct['Channel'] == 'GT-DC'))].copy()
    GT_sellout = GT_sellout.groupby(by = ['YearMonth', 'Customer_Code'])['Sellout_SP_Value'].sum().reset_index()
    GT_sellout['Segment'] = GT_sellout['Sellout_SP_Value'].apply(lambda x:get_gt_segment(x))
    
    gt_seg_df = gt_seg_df.append(GT_sellout)

In [None]:
output_file_path = config["project_path"] \
                        + "/" + config["temp_folder_path"] \
                        + "/" + 'GT_segment_2019_2020.xlsx'

gt_seg_df.to_excel(output_file_path, header=True, index=False)

# Monthly Sellout

In [None]:
mth_columns = ['YearMonth', 'Brand', 'Geo_Name', 'Region_Name', 'Channel', 'Grade', 'Account_Name', 'Sellout_SP_Value']
mth_df = pd.DataFrame(columns=mth_columns)

In [None]:
sellout_temp = sellout_with_acct.groupby(by = ['YearMonth', 'Brand', 'Channel', 'Grade', 'Account_Name'])['Sellout_SP_Value'].sum().reset_index()
sellout_temp['Geo_Name'] = '全国'
sellout_temp['Region_Name'] = '全国'
mth_df = mth_df.append(sellout_temp)

sellout_temp = sellout_with_acct.groupby(by = ['YearMonth', 'Channel', 'Grade', 'Account_Name'])['Sellout_SP_Value'].sum().reset_index()
sellout_temp['Brand'] = 'TTL'
sellout_temp['Geo_Name'] = '全国'
sellout_temp['Region_Name'] = '全国'
mth_df = mth_df.append(sellout_temp)

sellout_temp = sellout_with_acct.groupby(by = ['YearMonth', 'Brand', 'Channel', 'Grade'])['Sellout_SP_Value'].sum().reset_index()
sellout_temp['Geo_Name'] = '全国'
sellout_temp['Region_Name'] = '全国'
sellout_temp['Account_Name'] = sellout_temp['Grade']
mth_df = mth_df.append(sellout_temp)

sellout_temp = sellout_with_acct.groupby(by = ['YearMonth', 'Channel', 'Grade'])['Sellout_SP_Value'].sum().reset_index()
sellout_temp['Brand'] = 'TTL'
sellout_temp['Geo_Name'] = '全国'
sellout_temp['Region_Name'] = '全国'
sellout_temp['Account_Name'] = sellout_temp['Grade']
mth_df = mth_df.append(sellout_temp)

sellout_temp = sellout_with_acct.groupby(by = ['YearMonth', 'Brand', 'Region_Name', 'Channel', 'Grade', 'Account_Name'])['Sellout_SP_Value'].sum().reset_index()
sellout_temp['Geo_Name'] = sellout_temp['Region_Name']
mth_df = mth_df.append(sellout_temp)

sellout_temp = sellout_with_acct.groupby(by = ['YearMonth', 'Region_Name', 'Channel', 'Grade', 'Account_Name'])['Sellout_SP_Value'].sum().reset_index()
sellout_temp['Brand'] = 'TTL'
sellout_temp['Geo_Name'] = sellout_temp['Region_Name']
mth_df = mth_df.append(sellout_temp)

sellout_temp = sellout_with_acct.groupby(by = ['YearMonth', 'Brand', 'Region_Name', 'Channel', 'Grade'])['Sellout_SP_Value'].sum().reset_index()
sellout_temp['Geo_Name'] = sellout_temp['Region_Name']
sellout_temp['Account_Name'] = sellout_temp['Grade']
mth_df = mth_df.append(sellout_temp)

sellout_temp = sellout_with_acct.groupby(by = ['YearMonth', 'Region_Name', 'Channel', 'Grade'])['Sellout_SP_Value'].sum().reset_index()
sellout_temp['Brand'] = 'TTL'
sellout_temp['Geo_Name'] = sellout_temp['Region_Name']
sellout_temp['Account_Name'] = sellout_temp['Grade']
mth_df = mth_df.append(sellout_temp)

In [None]:
output_file_path = config["project_path"] \
                        + "/" + config["temp_folder_path"] \
                        + "/" + 'monthly_KA_sellout_2019_2020.xlsx'

mth_df.to_excel(output_file_path, header=True, index=False)

# P3M

p3m_df = mth_df.loc[mth_df['YearMonth'].isin(['202003', '202004', '202005'])]

p3m_df = p3m_df.groupby(by = ['Brand', 'Geo_Name', 'Region_Name', 'Channel', 'Grade', 'Account_Name'])['Sellout_SP_Value'].sum().reset_index()

output_file_path = config["project_path"] \
                        + "/" + config["temp_folder_path"] \
                        + "/" + 'p3m_KA_sellout_202006.xlsx'

p3m_df.to_excel(output_file_path, header=True, index=False)

# GT Logic

### Active GT store

In [None]:
GT_sellout = sellout_with_acct.loc[sellout_with_acct['Channel'] == 'GT'].copy()
GT_sellout = GT_sellout.groupby(by = ['YearMonth', 'Customer_Code', 'Region_Name', 'Subregion_Name', 'Province_Name'])['Sellout_SP_Value'].sum().reset_index()
active_GT = GT_sellout.loc[GT_sellout['Sellout_SP_Value'] >0].copy()

In [None]:
mth_columns = ['YearMonth', 'Geo_Name', 'Region_Name', 'Subregion_Name', 'Province_Name', 'Customer_Code']
mth_df = pd.DataFrame(columns=mth_columns)

In [None]:
sellout_temp = GT_sellout.groupby(by = ['YearMonth'])['Customer_Code'].count().reset_index()
sellout_temp['Geo_Name'] = '全国'
sellout_temp['Region_Name'] = '全国'
sellout_temp['Subregion_Name'] = ''
sellout_temp['Province_Name'] = ''
mth_df = mth_df.append(sellout_temp)

sellout_temp = GT_sellout.groupby(by = ['YearMonth', 'Region_Name'])['Customer_Code'].count().reset_index()
sellout_temp['Geo_Name'] = sellout_temp['Region_Name']
sellout_temp['Subregion_Name'] = ''
sellout_temp['Province_Name'] = ''
mth_df = mth_df.append(sellout_temp)

sellout_temp = GT_sellout.groupby(by = ['YearMonth', 'Region_Name', 'Subregion_Name'])['Customer_Code'].count().reset_index()
sellout_temp['Geo_Name'] = sellout_temp['Subregion_Name']
sellout_temp['Province_Name'] = ''
mth_df = mth_df.append(sellout_temp)


sellout_temp = GT_sellout.groupby(by = ['YearMonth', 'Region_Name', 'Subregion_Name', 'Province_Name'])['Customer_Code'].count().reset_index()
sellout_temp['Geo_Name'] = sellout_temp['Province_Name']
mth_df = mth_df.append(sellout_temp)

In [None]:
mth_df.columns = ['YearMonth', 'Geo_Name', 'Region_Name', 'Subregion_Name', 'Province_Name', 'Active_Store']

output_file_path = config["project_path"] \
                        + "/" + config["temp_folder_path"] \
                        + "/" + 'GT_active_2019_2020.xlsx'

mth_df.to_excel(output_file_path, header=True, index=False)

In [None]:
gt_res_df = mth_df

### P3M Active GT store

In [None]:
p3m_columns = ['YearMonth', 'Geo_Name', 'Region_Name', 'Subregion_Name', 'Province_Name', 'Active_Store']

p3m_df = pd.DataFrame(columns=p3m_columns)

In [None]:
year = 2020
for month in range(1, 7):
    year_month = str(year) + str(month).zfill(2)
    p3m_list = get_past_3m(year_month)
    
    p3m_sellout = mth_df.loc[mth_df['YearMonth'].isin(p3m_list)].copy()
    p3m_sellout["Active_Store"] = p3m_sellout["Active_Store"].astype("int")
    sellout_temp = p3m_sellout.groupby(by = ['Geo_Name', 'Region_Name', 'Subregion_Name', 'Province_Name'])['Active_Store'].mean().reset_index()
    sellout_temp['Active_Store'] = sellout_temp["Active_Store"].round(0)
    sellout_temp['YearMonth'] = year_month
    
    p3m_df = p3m_df.append(sellout_temp)
    
    
year = 2019
for month in range(1, 13):
    year_month = str(year) + str(month).zfill(2)
    p3m_list = get_past_3m(year_month)
    
    p3m_sellout = mth_df.loc[mth_df['YearMonth'].isin(p3m_list)].copy()
    p3m_sellout["Active_Store"] = p3m_sellout["Active_Store"].astype("int")
    sellout_temp = p3m_sellout.groupby(by = ['Geo_Name', 'Region_Name', 'Subregion_Name', 'Province_Name'])['Active_Store'].mean().reset_index()
    sellout_temp['Active_Store'] = sellout_temp["Active_Store"].round(0)
    sellout_temp['YearMonth'] = year_month
    
    p3m_df = p3m_df.append(sellout_temp)

In [None]:
p3m_df.columns = ['YearMonth', 'Geo_Name', 'Region_Name', 'Subregion_Name', 'Province_Name', 'P3M_Active_Store']

output_file_path = config["project_path"] \
                        + "/" + config["temp_folder_path"] \
                        + "/" + 'P3M_GT_active_2019_2020.xlsx'

p3m_df.to_excel(output_file_path, header=True, index=False)

In [None]:
gt_res_df = pd.merge(gt_res_df, p3m_df, on=['YearMonth', 'Geo_Name', 'Region_Name', 'Subregion_Name', 'Province_Name'], how='outer')

In [None]:
gt_res_df = gt_res_df.fillna(0)
output_file_path = config["project_path"] \
                        + "/" + config["temp_folder_path"] \
                        + "/" + 'GT_active_2019_2020.xlsx'

gt_res_df.to_excel(output_file_path, header=True, index=False)

# GT by Segment Logic

### Active GT store by segment

In [None]:
GT_sellout = sellout_with_acct.loc[sellout_with_acct['Channel'] == 'GT'].copy()
GT_sellout = GT_sellout.groupby(by = ['YearMonth', 'Customer_Code', 'Region_Name', 'Subregion_Name', 'Province_Name'])['Sellout_SP_Value'].sum().reset_index()
GT_sellout['Segment'] = GT_sellout['Sellout_SP_Value'].apply(lambda x:get_gt_segment(x))

In [None]:
mth_columns = ['YearMonth', 'Geo_Name', 'Region_Name', 'Subregion_Name', 'Province_Name', 'Segment', 'Customer_Code']
mth_df = pd.DataFrame(columns=mth_columns)

In [None]:
sellout_temp = GT_sellout.groupby(by = ['YearMonth', 'Segment'])['Customer_Code'].count().reset_index()
sellout_temp['Geo_Name'] = '全国'
sellout_temp['Region_Name'] = ''
sellout_temp['Subregion_Name'] = ''
sellout_temp['Province_Name'] = ''
mth_df = mth_df.append(sellout_temp)

sellout_temp = GT_sellout.groupby(by = ['YearMonth', 'Region_Name', 'Segment'])['Customer_Code'].count().reset_index()
sellout_temp['Geo_Name'] = sellout_temp['Region_Name']
sellout_temp['Subregion_Name'] = ''
sellout_temp['Province_Name'] = ''
mth_df = mth_df.append(sellout_temp)

sellout_temp = GT_sellout.groupby(by = ['YearMonth', 'Region_Name', 'Subregion_Name', 'Segment'])['Customer_Code'].count().reset_index()
sellout_temp['Geo_Name'] = sellout_temp['Subregion_Name']
sellout_temp['Province_Name'] = ''
mth_df = mth_df.append(sellout_temp)


sellout_temp = GT_sellout.groupby(by = ['YearMonth', 'Region_Name', 'Subregion_Name', 'Province_Name', 'Segment'])['Customer_Code'].count().reset_index()
sellout_temp['Geo_Name'] = sellout_temp['Province_Name']
mth_df = mth_df.append(sellout_temp)

In [None]:
mth_df.columns = ['YearMonth', 'Geo_Name', 'Region_Name', 'Subregion_Name', 'Province_Name', 'Segment', 'Store_Number']

output_file_path = config["project_path"] \
                        + "/" + config["temp_folder_path"] \
                        + "/" + 'GT_active_by_segment_2019_2020.xlsx'

mth_df.to_excel(output_file_path, header=True, index=False)

In [None]:
gt_res_df = mth_df

### P3M Active GT store by segment

In [None]:
p3m_columns = ['YearMonth', 'Geo_Name', 'Region_Name', 'Subregion_Name', 'Province_Name', 'Segment', 'Store_Number']

p3m_df = pd.DataFrame(columns=p3m_columns)

In [None]:
year = 2020
for month in range(1, 7):
    year_month = str(year) + str(month).zfill(2)
    p3m_list = get_past_3m(year_month)
    
    p3m_sellout = mth_df.loc[mth_df['YearMonth'].isin(p3m_list)].copy()
    p3m_sellout["Store_Number"] = p3m_sellout["Store_Number"].astype("int")
    sellout_temp = p3m_sellout.groupby(by = ['Geo_Name', 'Region_Name', 'Subregion_Name', 'Province_Name', 'Segment'])['Store_Number'].mean().reset_index()
    sellout_temp['Store_Number'] = sellout_temp["Store_Number"].round(0)
    sellout_temp['YearMonth'] = year_month
    
    p3m_df = p3m_df.append(sellout_temp)
    
    
year = 2019
for month in range(1, 13):
    year_month = str(year) + str(month).zfill(2)
    p3m_list = get_past_3m(year_month)
    
    p3m_sellout = mth_df.loc[mth_df['YearMonth'].isin(p3m_list)].copy()
    p3m_sellout["Store_Number"] = p3m_sellout["Store_Number"].astype("int")
    sellout_temp = p3m_sellout.groupby(by = ['Geo_Name', 'Region_Name', 'Subregion_Name', 'Province_Name', 'Segment'])['Store_Number'].mean().reset_index()
    sellout_temp['Store_Number'] = sellout_temp["Store_Number"].round(0)
    sellout_temp['YearMonth'] = year_month
    
    p3m_df = p3m_df.append(sellout_temp)

In [None]:
p3m_df.columns = ['YearMonth', 'Geo_Name', 'Region_Name', 'Subregion_Name', 'Province_Name', 'Segment', 'P3M_Store_Number']

output_file_path = config["project_path"] \
                        + "/" + config["temp_folder_path"] \
                        + "/" + 'P3M_GT_active_by_segment_2019_2020.xlsx'

p3m_df.to_excel(output_file_path, header=True, index=False)

In [None]:
gt_res_df = pd.merge(gt_res_df, p3m_df, on=['YearMonth', 'Geo_Name', 'Region_Name', 'Subregion_Name', 'Province_Name', 'Segment'], how='outer')

### GT store sellout ratio by segment

In [None]:
ratio_columns = ['YearMonth', 'Geo_Name', 'Region_Name', 'Subregion_Name', 'Province_Name', 'Segment', 'Segment_sellout', 'Total_Sellout', 'Ratio']
ratio_df = pd.DataFrame(columns=ratio_columns)

In [None]:
sellout_seg_temp = GT_sellout.groupby(by = ['YearMonth', 'Segment'])['Sellout_SP_Value'].sum().reset_index()
sellout_seg_temp = sellout_seg_temp.rename(columns={'Sellout_SP_Value': 'Segment_sellout'})
sellout_ttl_temp = GT_sellout.groupby(by = ['YearMonth'])['Sellout_SP_Value'].sum().reset_index()
sellout_ttl_temp = sellout_ttl_temp.rename(columns={'Sellout_SP_Value': 'Total_Sellout'})
sellout_temp = pd.merge(sellout_ttl_temp, sellout_seg_temp, on=['YearMonth'], how='left').fillna(0)
sellout_temp['Ratio'] = sellout_temp['Segment_sellout']/sellout_temp['Total_Sellout']
sellout_temp['Geo_Name'] = '全国'
sellout_temp['Region_Name'] = ''
sellout_temp['Subregion_Name'] = ''
sellout_temp['Province_Name'] = ''
ratio_df = ratio_df.append(sellout_temp)

sellout_seg_temp = GT_sellout.groupby(by = ['YearMonth', 'Region_Name', 'Segment'])['Sellout_SP_Value'].sum().reset_index()
sellout_seg_temp = sellout_seg_temp.rename(columns={'Sellout_SP_Value': 'Segment_sellout'})
sellout_ttl_temp = GT_sellout.groupby(by = ['YearMonth', 'Region_Name'])['Sellout_SP_Value'].sum().reset_index()
sellout_ttl_temp = sellout_ttl_temp.rename(columns={'Sellout_SP_Value': 'Total_Sellout'})
sellout_temp = pd.merge(sellout_ttl_temp, sellout_seg_temp, on=['YearMonth', 'Region_Name'], how='left').fillna(0)
sellout_temp['Ratio'] = sellout_temp['Segment_sellout']/sellout_temp['Total_Sellout']
sellout_temp['Geo_Name'] = sellout_temp['Region_Name']
sellout_temp['Subregion_Name'] = ''
sellout_temp['Province_Name'] = ''
ratio_df = ratio_df.append(sellout_temp)

sellout_seg_temp = GT_sellout.groupby(by = ['YearMonth', 'Region_Name', 'Subregion_Name', 'Segment'])['Sellout_SP_Value'].sum().reset_index()
sellout_seg_temp = sellout_seg_temp.rename(columns={'Sellout_SP_Value': 'Segment_sellout'})
sellout_ttl_temp = GT_sellout.groupby(by = ['YearMonth', 'Region_Name', 'Subregion_Name'])['Sellout_SP_Value'].sum().reset_index()
sellout_ttl_temp = sellout_ttl_temp.rename(columns={'Sellout_SP_Value': 'Total_Sellout'})
sellout_temp = pd.merge(sellout_ttl_temp, sellout_seg_temp, on=['YearMonth', 'Region_Name', 'Subregion_Name'], how='left').fillna(0)
sellout_temp['Ratio'] = sellout_temp['Segment_sellout']/sellout_temp['Total_Sellout']
sellout_temp['Geo_Name'] = sellout_temp['Subregion_Name']
sellout_temp['Province_Name'] = ''
ratio_df = ratio_df.append(sellout_temp)


sellout_seg_temp = GT_sellout.groupby(by = ['YearMonth', 'Region_Name', 'Subregion_Name', 'Province_Name', 'Segment'])['Sellout_SP_Value'].sum().reset_index()
sellout_seg_temp = sellout_seg_temp.rename(columns={'Sellout_SP_Value': 'Segment_sellout'})
sellout_ttl_temp = GT_sellout.groupby(by = ['YearMonth', 'Region_Name', 'Subregion_Name', 'Province_Name'])['Sellout_SP_Value'].sum().reset_index()
sellout_ttl_temp = sellout_ttl_temp.rename(columns={'Sellout_SP_Value': 'Total_Sellout'})
sellout_temp = pd.merge(sellout_ttl_temp, sellout_seg_temp, on=['YearMonth', 'Region_Name', 'Subregion_Name', 'Province_Name'], how='left').fillna(0)
sellout_temp['Ratio'] = sellout_temp['Segment_sellout']/sellout_temp['Total_Sellout']
sellout_temp['Geo_Name'] = sellout_temp['Province_Name']
ratio_df = ratio_df.append(sellout_temp)

output_file_path = config["project_path"] \
                        + "/" + config["temp_folder_path"] \
                        + "/" + 'GT_segment_sellout_ratio_2019_2020.xlsx'

ratio_df.to_excel(output_file_path, header=True, index=False)

In [None]:
gt_res_df = pd.merge(gt_res_df, ratio_df, on=['YearMonth', 'Geo_Name', 'Region_Name', 'Subregion_Name', 'Province_Name', 'Segment'], how='outer')

In [None]:
gt_res_df = gt_res_df.fillna(0)
output_file_path = config["project_path"] \
                        + "/" + config["temp_folder_path"] \
                        + "/" + 'GT_by_segment_2019_2020.xlsx'

gt_res_df.to_excel(output_file_path, header=True, index=False)