In [None]:
import pandas as pd
import operator
import time
import numpy as np

In [None]:
config = {}
config['project_path'] = '..'
config['data_folder_path'] = 'data'
config['temp_folder_path'] = 'temp'

# input files
config['MDM_mapping'] = 'MDM_mapping.xlsx'
config['grade_mapping'] = 'PC_grade_mapping.xlsx'
config['date_mapping'] = 'PC_date_format_mapping.xlsx'
config['pc_input'] = 'PC Cost 202001-04.xlsx'
config['province_to_province_brand_file'] = '202001-05_province_to_province_brand_file.xlsx'

# output files
config['spending_PC'] = 'spending PC 202001-04.xlsx'

## Read PC Cost Input

In [None]:
input_file_path = config['project_path'] + '/' \
                        + config['data_folder_path'] + '/' \
                        + config['pc_input']

pc_input_df = pd.read_excel(input_file_path, dtype=str)
pc_input_df['Spending Value'] = pc_input_df['金额'].astype('float')

In [None]:
input_file_path = config['project_path'] + '/' \
                        + config['data_folder_path'] + '/' \
                        + config['date_mapping']

date_mapping_df = pd.read_excel(input_file_path, dtype=str)

In [None]:
input_file_path = config['project_path'] + '/' \
                        + config['data_folder_path'] + '/' \
                        + config['grade_mapping']

grade_mapping_df = pd.read_excel(input_file_path, dtype=str)

In [None]:
input_file_path = config['project_path'] + '/' \
                        + config['data_folder_path'] + '/' \
                        + config['MDM_mapping']

mdm_input_df = pd.read_excel(input_file_path, dtype=str)

In [None]:
pc_df = pd.merge(pc_input_df, date_mapping_df, on='月份', how='left')

pc_df = pd.merge(pc_df, grade_mapping_df, on='Grade', how='left')

pc_df = pd.merge(pc_df, mdm_input_df, left_on='省份', right_on='Province_name2', how='left')

### Check for um-mapped records

In [None]:
pc_df[pc_df['Date'].isnull()]

In [None]:
pc_df[pc_df['Cost_type'].isnull()]

In [None]:
pc_df[pc_df['MDM_province_code'].isnull()]

In [None]:
pc_monthly_df = pc_df.groupby(by = ['Date', 'MDM_province_code', 'Cost_type'])['Spending Value'].sum().reset_index()

## Split to Brand

In [None]:
input_file_path = config['project_path'] + '/' \
                        + config['temp_folder_path'] + '/' \
                        + '202001-05_province_to_province_brand_file.xlsx'

prov_brand_split = pd.read_excel(input_file_path, dtype=str)

prov_brand_split['Split_Ratio'] = prov_brand_split['Split_Ratio'].astype('float')

In [None]:
pc_monthly_df['YearMonth'] = pc_monthly_df['Date']

In [None]:
pc_brand_df = pd.merge(pc_monthly_df, prov_brand_split, on=['YearMonth', 'MDM_province_code'], how='left' )

### Check for um-mapped records

In [None]:
pc_brand_df[pc_brand_df['Split_Ratio'].isnull()]

In [None]:
pc_brand_df['Spending_Value'] = pc_brand_df['Spending Value'] * pc_brand_df['Split_Ratio']

In [None]:
pc_brand_df = pc_brand_df[['YearMonth', 'MDM_province_code', 'Cost_type', 'Brand', 'Spending_Value']]

In [None]:
pc_all_df = pc_brand_df.groupby(by = ['YearMonth', 'Cost_type', 'Brand'])['Spending_Value'].sum().reset_index()
pc_all_df['MDM_province_code'] = 'National'

In [None]:
output_df = pc_brand_df.append(pc_all_df, ignore_index=True)

### Check for um-mapped records

In [None]:
output_df[output_df['MDM_province_code'] == np.NAN]

In [None]:
output_df['Cost_type_3'] = ''
output_df['Cost_type_4']  = ''
output_df['Cost_type_2']  = 'PC - ' + output_df['Cost_type']
output_df['Cost_type_1'] = 'PC'
output_df['AIP'] = 'P'

In [None]:
output_df = output_df[['YearMonth', 'MDM_province_code', 'Cost_type_1', 'Cost_type_2', 'Cost_type_3', 'Cost_type_4', 'AIP', 'Brand', 'Spending_Value']]
output_df.columns = ['Date', 'Province', 'Cost_type_1', 'Cost_type_2', 'Cost_type_3', 'Cost_type_4', 'AIP', 'Brand', 'Spending_value']

In [None]:
output_file_path = config['project_path'] + '/' \
                        + config['temp_folder_path'] + '/' \
                        + config['spending_PC']

output_df.to_excel(output_file_path, index=False)