In [None]:
import pandas as pd
import operator
import time
import numpy as np

In [None]:
config = {}

config["project_path"] = ".."
config["data_folder_path"] = "data"
config["temp_folder_path"] = "temp"

In [None]:
input_file_path = config["project_path"] + "/" \
                        + config["data_folder_path"] + "/" \
                        + "LTC ANP Tracking 202003-TTL.XLSX"

input_df = pd.read_excel(input_file_path, sheet_name='raw data',dtype=str)

In [None]:
input_df = input_df[['CHANNEL', '月份','省份', 'Premium', 'Activity', 'Others']]
input_df.columns = ['Channel', 'YearMonth', 'Province_Name', 'Premium', 'Activity', 'Others' ]

In [None]:
def to_yearmonth(gt_time):
    return time.strftime("%Y%m", time.strptime(gt_time,"%Y/%m"))

input_df["YearMonth"] = input_df['YearMonth'].apply(lambda x : to_yearmonth(x))

In [None]:
prem_df = input_df[['Channel', 'YearMonth', 'Province_Name', 'Premium']].copy()
prem_df.columns = ['Channel', 'YearMonth', 'Province_Name', 'Spending']
prem_df['Cost_Type_3'] = 'Premium'

acty_df = input_df[['Channel', 'YearMonth', 'Province_Name', 'Activity']].copy()
acty_df.columns = ['Channel', 'YearMonth', 'Province_Name', 'Spending']
acty_df['Cost_Type_3'] = 'Activity'

othe_df = input_df[['Channel', 'YearMonth', 'Province_Name', 'Others']].copy()
othe_df.columns = ['Channel', 'YearMonth', 'Province_Name', 'Spending']
othe_df['Cost_Type_3'] = 'Others'

In [None]:
cost_df = prem_df.append(acty_df, ignore_index=True).append(othe_df, ignore_index=True)
cost_df['Spending'] = cost_df['Spending'].astype("float")

In [None]:
input_file_path = config["project_path"] + "/" \
                        + config["data_folder_path"] + "/" \
                        + "Province_Mapping.xlsx"

province_df = pd.read_excel(input_file_path, dtype=str)

cost_df = pd.merge(cost_df, province_df, on='Province_Name', how='left' )

In [None]:
gt_df = cost_df[cost_df['Channel'] != 'RKMBS']
gt_df = gt_df.groupby(by = ['YearMonth', 'Province_Code', 'Cost_Type_3'])['Spending'].sum().reset_index()

rkmbs_df = cost_df[cost_df['Channel'] == 'RKMBS']
rkmbs_df = rkmbs_df.groupby(by = ['YearMonth', 'Province_Code', 'Cost_Type_3'])['Spending'].sum().reset_index()

In [None]:
input_file_path = config["project_path"] + "/" \
                        + config["temp_folder_path"] + "/" \
                        + "202001-05_province_brand_split.xlsx"

prov_brand_split = pd.read_excel(input_file_path, dtype=str)

prov_brand_split['Split_Ratio'] = prov_brand_split['Split_Ratio'].astype("float")

In [None]:
def format_yearmonth(year_month):
    return time.strftime("%Y/%m/%d", time.strptime(year_month,"%Y%m"))

def get_brand_name(brand):
    if brand == 'NC':
        return 'Nutrilon'
    elif (brand=='AC') | (brand == 'AP'):
        return "Aptamil"
    return ''

In [None]:
gt_brand_df = pd.merge(gt_df, prov_brand_split, on=['YearMonth', 'Province_Code'], how='left')

gt_brand_df[gt_brand_df['Split_Ratio'] == np.NAN]

In [None]:
gt_brand_df['Spending_Value'] = gt_brand_df['Spending'] * gt_brand_df['Split_Ratio']
gt_brand_df = gt_brand_df[['YearMonth', 'Province_Code', 'Cost_Type_3', 'SKU', 'Spending_Value']]

gt_nati_df = gt_brand_df.groupby(by = ['YearMonth', 'Cost_Type_3', 'SKU'])['Spending_Value'].sum().reset_index()
gt_nati_df['Province_Code'] = 'National'

gt_brand_df = gt_brand_df.append(gt_nati_df, ignore_index=True)

gt_brand_df["Date"] = gt_brand_df['YearMonth'].apply(lambda x : format_yearmonth(x))

gt_brand_df["Brand_Name"] = gt_brand_df['SKU'].apply(
        lambda x: get_brand_name(x))

gt_brand_df['Cost_Type_1'] = 'Sales'
gt_brand_df['Cost_Type_2'] = 'Sales GT'
gt_brand_df['Cost_Type_4'] = ''
gt_brand_df['AIP'] = 'P'

gt_brand_df = gt_brand_df[['Date', 'Province_Code', 'Cost_Type_1', 'Cost_Type_2', 'Cost_Type_3', 'Cost_Type_4', 'AIP', 'Brand_Name', 'SKU', 'Spending_Value']]

In [None]:
output_file_path = config["project_path"] + "/" \
                        + config["temp_folder_path"] + "/" \
                        + "2020003_GT_Cost.xlsx"

gt_brand_df.to_excel(output_file_path, index=False)

In [None]:
rkmbs_brand_df = pd.merge(rkmbs_df, prov_brand_split, on=['YearMonth', 'Province_Code'], how='left')

rkmbs_brand_df[rkmbs_brand_df['Split_Ratio'] == np.NAN]

In [None]:
rkmbs_brand_df['Spending_Value'] = rkmbs_brand_df['Spending'] * rkmbs_brand_df['Split_Ratio']
rkmbs_brand_df = rkmbs_brand_df[['YearMonth', 'Province_Code', 'Cost_Type_3', 'SKU', 'Spending_Value']]

rkmbs_nati_df = rkmbs_brand_df.groupby(by = ['YearMonth', 'Cost_Type_3', 'SKU'])['Spending_Value'].sum().reset_index()
rkmbs_nati_df['Province_Code'] = 'National'

rkmbs_brand_df = rkmbs_brand_df.append(rkmbs_nati_df, ignore_index=True)

rkmbs_brand_df["Date"] = rkmbs_brand_df['YearMonth'].apply(lambda x : format_yearmonth(x))

rkmbs_brand_df["Brand_Name"] = rkmbs_brand_df['SKU'].apply(
        lambda x: get_brand_name(x))

rkmbs_brand_df['Cost_Type_1'] = 'Sales'
rkmbs_brand_df['Cost_Type_2'] = 'Sales RKMBS'
rkmbs_brand_df['Cost_Type_4'] = ''
rkmbs_brand_df['AIP'] = 'P'

rkmbs_brand_df = rkmbs_brand_df[['Date', 'Province_Code', 'Cost_Type_1', 'Cost_Type_2', 'Cost_Type_3', 'Cost_Type_4', 'AIP', 'Brand_Name', 'SKU', 'Spending_Value']]

In [None]:
output_file_path = config["project_path"] + "/" \
                        + config["temp_folder_path"] + "/" \
                        + "2020003_RKMBS_Cost.xlsx"

rkmbs_brand_df.to_excel(output_file_path, index=False)