In [None]:
import pandas as pd
import operator
import time
import numpy as np

In [None]:
config = {}
config['project_path'] = '..'
config['data_folder_path'] = 'data'
config['temp_folder_path'] = 'temp'

# input files
config['MDM_mapping'] = 'MDM_mapping.xlsx'
config['grade_mapping'] = 'PC_grade_mapping.xlsx'
config['date_mapping'] = 'PC_date_format_mapping.xlsx'
config['pc_input'] = 'PC Cost 202001-04.xlsx'
config['province_to_province_brand_file'] = '202001-05_province_to_province_brand_file.xlsx'

# output files
config['pc_output'] = 'PC cost split 202001-04.xlsx'

## Read PC Cost Input

In [None]:
input_file_path = config['project_path'] + '/' \
                        + config['data_folder_path'] + '/' \
                        + config['pc_input']

pc_input_df = pd.read_excel(input_file_path, dtype=str)
pc_input_df['Spending Value'] = pc_input_df['金额'].astype('float')

In [None]:
input_file_path = config['project_path'] + '/' \
                        + config['data_folder_path'] + '/' \
                        + config['date_mapping']

date_mapping_df = pd.read_excel(input_file_path, dtype=str)

In [None]:
input_file_path = config['project_path'] + '/' \
                        + config['data_folder_path'] + '/' \
                        + config['grade_mapping']

grade_mapping_df = pd.read_excel(input_file_path, dtype=str)

In [None]:
input_file_path = config['project_path'] + '/' \
                        + config['data_folder_path'] + '/' \
                        + config['MDM_mapping']

mdm_input_df = pd.read_excel(input_file_path, dtype=str)

In [None]:
pc_df = pd.merge(pc_input_df, date_mapping_df, on='月份', how='left')

pc_df = pd.merge(pc_df, grade_mapping_df, on='Grade', how='left')

pc_df = pd.merge(pc_df, mdm_input_df, left_on='省份', right_on='Province_name2', how='left')

### Check for um-mapped records

In [None]:
pc_df[pc_df['Date'].isnull()]

In [None]:
pc_df[pc_df['Cost_type'].isnull()]

In [None]:
pc_df[pc_df['MDM_province_code'].isnull()]

In [None]:
pc_monthly_df = pc_df.groupby(by = ['Date', 'MDM_province_code', 'Cost_type'])['Spending Value'].sum().reset_index()

## Split to Brand

In [None]:
input_file_path = config['project_path'] + '/' \
                        + config['temp_folder_path'] + '/' \
                        + '202001-05_province_to_province_brand_file.xlsx'

prov_brand_split = pd.read_excel(input_file_path, dtype=str)

prov_brand_split['Split_Ratio'] = prov_brand_split['Split_Ratio'].astype('float')

In [None]:
pc_monthly_df['YearMonth'] = pc_monthly_df['Date']

In [None]:
pc_brand_df = pd.merge(pc_monthly_df, prov_brand_split, on=['YearMonth', 'MDM_province_code'], how='left' )

### Check for um-mapped records

In [None]:
pc_brand_df[pc_brand_df['Split_Ratio'].isnull()]

In [None]:
pc_brand_df['Spending_Value'] = pc_brand_df['Spending Value'] * pc_brand_df['Split_Ratio']

In [None]:
pc_brand_df = pc_brand_df[['YearMonth', 'MDM_province_code', 'Cost_type', 'Brand_Name', 'Spending_Value']]

In [None]:
pc_all_df = pc_brand_df.groupby(by = ['YearMonth', 'Cost_type', 'Brand_Name'])['Spending_Value'].sum().reset_index()
pc_all_df['MDM_province_code'] = 'National'

In [None]:
pc_all_df = pc_brand_df.append(pc_all_df, ignore_index=True)

### Check for um-mapped records

In [None]:
pc_all_df[pc_all_df['MDM_province_code'] == np.NAN]

In [None]:
pc_all_df['Level 5'] = 'PC - ' + pc_all_df['Cost_type']
pc_all_df['Level 4']  = 'PC - ' + pc_all_df['Cost_type']
pc_all_df['Level 3']  = 'PC - ' + pc_all_df['Cost_type']
pc_all_df['Level 2'] = 'Total PC'
pc_all_df['Level 1'] = 'Total A&P'

#### Create all year, province, KPI combination for 2020

In [None]:
all_dates_map = pd.DataFrame(columns=['YearMonth','MDM_province_code', 'Brand_Name', 'Level 1', 'Level 2', 'Level 3', 'Level 4', 'Level 5'])
all_types= pc_all_df[['MDM_province_code', 'Brand_Name', 'Level 1', 'Level 2', 'Level 3', 'Level 4', 'Level 5']].drop_duplicates()
for i in range(1,13):
    year_month = "2020" + str(i).zfill(2)
    all_types['YearMonth'] = year_month
    all_dates_map = all_dates_map.append(all_types, ignore_index=True)

In [None]:
output_df = pd.merge(all_dates_map, pc_all_df, on=['YearMonth','MDM_province_code', 'Brand_Name', 'Level 1', 'Level 2', 'Level 3', 'Level 4', 'Level 5'], how='left')
output_df = output_df.fillna(0)

In [None]:
def format_yearmonth(year_month):
    return time.strftime('%Y/%m/%d', time.strptime(year_month,'%Y%m'))

output_df['Date'] = output_df['YearMonth'].apply(lambda x : format_yearmonth(x))

In [None]:
output_df = output_df[['Date', 'Brand_Name', 'MDM_province_code', 'Level 1', 'Level 2', 'Level 3', 'Level 4', 'Level 5', 'Spending_Value']]
output_df.columns = ['Date', 'Brand', 'Province', 'Level 1', 'Level 2', 'Level 3', 'Level 4', 'Level 5', 'Value']

In [None]:
output_file_path = config['project_path'] + '/' \
                        + config['temp_folder_path'] + '/' \
                        + config['pc_output']

output_df.to_excel(output_file_path, index=False)