In [None]:
import pandas as pd
import operator
import time
import numpy as np

In [None]:
config = {}

config['project_path'] = '..'
config['data_folder_path'] = 'data'
config['temp_folder_path'] = 'temp'

# input files
config['BHT input'] = 'CRM_data_0713.xlsx'
config['MDM_mapping'] = 'MDM_mapping.xlsx'

# output files
config['CRM cost output'] = 'CRM cost 202001-04.xlsx'
config['CRM performance output'] = 'CRM performance 202001-04.xlsx'

## Read CRM

In [None]:
input_file_path = config['project_path'] + '/' \
                        + config['data_folder_path'] + '/' \
                        + config['BHT input'] 

spending_input_df = pd.read_excel(input_file_path, sheet_name='spending', dtype=str)
spending_input_df['Value'] = spending_input_df['Spending_value'].astype('float')

In [None]:
input_file_path = config['project_path'] + '/' \
                        + config['data_folder_path'] + '/' \
                        + config['BHT input'] 

perform_input_df = pd.read_excel(input_file_path, sheet_name='KPI', dtype=str)
perform_input_df['Value'] = perform_input_df['KPI_value'].astype('float')

In [None]:
def get_brand_name(brand):
    if brand == 'NC':
        return 'Nutrilon'
    elif brand=='AN':
        return 'Aptamil'
    return brand

spending_input_df['Brand'] = spending_input_df['Brand'].apply(
        lambda x: get_brand_name(x))

perform_input_df['Brand'] = perform_input_df['Brand'].apply(
        lambda x: get_brand_name(x))

In [None]:
input_file_path = config['project_path'] + '/' \
                        + config['data_folder_path'] + '/' \
                        + config['MDM_mapping']

mdm_input_df = pd.read_excel(input_file_path, dtype=str)

In [None]:
def regulate_shannxi_shaanxi(province_code):
    if province_code == 'shaanxi':
        return 'shannxi'
    return province_code

In [None]:
spending_input_df['Province'] = spending_input_df['Province'].apply(lambda x: regulate_shannxi_shaanxi(x))
spending_df = pd.merge(spending_input_df, mdm_input_df, left_on='Province', right_on='Province_code', how='left')

In [None]:
perform_input_df['Province'] = perform_input_df['Province'].apply(lambda x: regulate_shannxi_shaanxi(x))
perform_df = pd.merge(perform_input_df, mdm_input_df, left_on='Province', right_on='Province_code', how='left')

### Check for um-mapped records

In [None]:
spending_df[spending_df['MDM_province_code'].isnull()]

In [None]:
perform_df[perform_df['MDM_province_code'].isnull()]

## Process CRM Investment

In [None]:
def get_aip_cost_type(aip):
    if aip == 'A':
        return 'Total A Cost'
    elif aip == 'I':
        return 'Total I Cost'
    elif aip == 'P':
        return 'Total P Cost'
    return 'Total AD/Non-working Cost'

def get_aip_crm_type(aip):
    if aip == 'A':
        return 'Total A CRM'
    elif aip == 'I':
        return'Total I CRM'
    elif aip == 'P':
        return'Total P CRM'
    return 'Total AD/Non-working Media'

In [None]:
spending_df['Level 1'] = "Total A&P"
spending_df['Level 2'] = "Total CRM"
spending_df['Level 3'] = spending_df['AIP'].apply(lambda x : get_aip_cost_type(x))
spending_df['Level 4'] = spending_df['AIP'].apply(lambda x : get_aip_crm_type(x))
spending_df['Level 5'] = spending_df['Cost_type_2']
spending_df['Province'] = spending_df['MDM_province_code']

In [None]:
spending_df = spending_df[['Date', 'Brand', 'Province', 'Level 1', 'Level 2', 'Level 3', 'Level 4', 'Level 5', 'Value']]

#### Create all year, province, KPI combination for 2020

In [None]:
all_dates_map = pd.DataFrame(columns=['Date', 'Brand', 'Province', 'Level 1', 'Level 2', 'Level 3', 'Level 4', 'Level 5'])
all_types= spending_df[['Brand', 'Province', 'Level 1', 'Level 2', 'Level 3', 'Level 4', 'Level 5']].drop_duplicates()
for i in range(1,13):
    year_month = "2020" + str(i).zfill(2)
    all_types['Date'] = year_month
    all_dates_map = all_dates_map.append(all_types, ignore_index=True)

In [None]:
output_df = pd.merge(all_dates_map, spending_df, on=['Date', 'Brand', 'Province', 'Level 1', 'Level 2', 'Level 3', 'Level 4', 'Level 5'], how='left')
output_df = output_df.fillna(0)

In [None]:
def format_yearmonth(year_month):
    return time.strftime('%Y/%m/%d', time.strptime(year_month,'%Y%m'))

output_df['Date'] = output_df['Date'].apply(lambda x : format_yearmonth(x))

In [None]:
output_file_path = config['project_path'] + '/' \
                        + config['temp_folder_path'] + '/' \
                        + config['CRM cost output']

output_df.to_excel(output_file_path, index=False)

## Process CRM Performance

In [None]:
perform_df = perform_df[['Date', 'Brand', 'MDM_province_code', 'KPI', 'KPI_value']]

#### Create all year, province, KPI combination for 2020

In [None]:
all_dates_map = pd.DataFrame(columns=['Date', 'Brand', 'MDM_province_code', 'KPI'])
all_types= perform_df[['Brand', 'MDM_province_code', 'KPI']].drop_duplicates()
for i in range(1,13):
    year_month = "2020" + str(i).zfill(2)
    all_types['Date'] = year_month
    all_dates_map = all_dates_map.append(all_types, ignore_index=True)

In [None]:
output_df = pd.merge(all_dates_map, perform_df, on=['Date', 'Brand', 'MDM_province_code', 'KPI'], how='left')
output_df = output_df.fillna(0)

In [None]:
output_df['Level 1'] = output_df['KPI']
output_df['Level 2'] = output_df['KPI']
output_df['Level 3'] = output_df['KPI']
output_df['Level 4'] = output_df['KPI']
output_df['Level 5'] = output_df['KPI']
output_df['Province'] = output_df['MDM_province_code']
output_df['Value'] = output_df['KPI_value']

In [None]:
def format_yearmonth(year_month):
    return time.strftime('%Y/%m/%d', time.strptime(year_month,'%Y%m'))

output_df['Date'] = output_df['Date'].apply(lambda x : format_yearmonth(x))

In [None]:
output_df = output_df[['Date', 'Brand', 'Province', 'Level 1', 'Level 2', 'Level 3', 'Level 4', 'Level 5', 'Value']]

In [None]:
output_file_path = config['project_path'] + '/' \
                        + config['temp_folder_path'] + '/' \
                        + config['CRM performance output']

output_df.to_excel(output_file_path, index=False)