In [None]:
import pandas as pd
import operator
import time
import numpy as np

In [None]:
config = {}
config['project_path'] = '..'
config['data_folder_path'] = 'data'
config['temp_folder_path'] = 'temp'

# input files
config['MDM_mapping'] = 'MDM_mapping.xlsx'
config['media_input'] = 'MediaData_0707.xlsx'

# output files
config['media_output'] = 'Media cost split 2020Q1.xlsx'

## Read Input

In [None]:
input_file_path = config['project_path'] + '/' \
                        + config['data_folder_path'] + '/' \
                        + config['media_input']

media_input_df = pd.read_excel(input_file_path, dtype=str)
media_input_df['Province'] = media_input_df['Province'].str.lower()
media_input_df['Spending_value'] = media_input_df['Spending_value'].astype('float')

In [None]:
input_file_path = config['project_path'] + '/' \
                        + config['data_folder_path'] + '/' \
                        + config['MDM_mapping']

mdm_input_df = pd.read_excel(input_file_path, dtype=str)

## Process input

In [None]:
media_df = pd.merge(media_input_df, mdm_input_df, left_on='Province', right_on='Province_code', how='left')

In [None]:
media_df[media_df['MDM_province_code'].isnull()]

In [None]:
media_df['Cost_Type'] = media_df['AIP']  + ' - ' + media_df['Cost_type_2'] 

In [None]:
media_df = media_df.groupby(by = ['Date', 'MDM_province_code', 'Cost_Type', 'AIP'])['Spending_value'].sum().reset_index()

In [None]:
media_df.columns = ['Date', 'Province Code', 'Level 5', 'AIP', 'Value']

In [None]:
def get_aip_cost_type(aip):
    if aip == 'A':
        return 'Total A Cost'
    elif aip == 'I':
        return 'Total I Cost'
    elif aip == 'P':
        return 'Total P Cost'
    return ''

def get_aip_media_type(aip):
    if aip == 'A':
        return 'Total A Media'
    elif aip == 'I':
        return'Total I Media'
    return 'Total Media'

media_df['Level 1'] = "Total A&P"
media_df['Level 2'] = "Total Media"
media_df['Level 3'] = media_df['AIP'].apply(lambda x : get_aip_cost_type(x))
media_df['Level 4'] = media_df['AIP'].apply(lambda x : get_aip_media_type(x))
media_df['Brand'] = 'Aptamil'

In [None]:
media_nati_df = media_df.groupby(by = ['Date', 'Brand', 'Level 1', 'Level 2', 'Level 3', 'Level 4', 'Level 5'])['Value'].sum().reset_index()
media_nati_df['Province Code'] = 'National'

In [None]:
media_res = media_df[['Date', 'Brand', 'Province Code', 'Level 1', 'Level 2', 'Level 3', 'Level 4', 'Level 5', 'Value']]

media_res = media_res.append(media_nati_df, ignore_index = True)

#### Create all year, province, KPI combination for 2020

In [None]:
all_dates_map = pd.DataFrame(columns=['Date', 'Brand','Province Code', 'Level 1', 'Level 2', 'Level 3', 'Level 4', 'Level 5'])
all_types= media_res[['Brand', 'Province Code', 'Level 1', 'Level 2', 'Level 3', 'Level 4', 'Level 5']].drop_duplicates()
for i in range(1,13):
    year_month = "2020" + str(i).zfill(2)
    all_types['Date'] = year_month
    all_dates_map = all_dates_map.append(all_types, ignore_index=True)

In [None]:
output_df = pd.merge(all_dates_map, media_res, on=['Date', 'Brand', 'Province Code', 'Level 1', 'Level 2', 'Level 3', 'Level 4', 'Level 5'], how='left')
output_df = output_df.fillna(0)

In [None]:
def format_yearmonth(year_month):
    return time.strftime('%Y/%m/%d', time.strptime(year_month,'%Y%m'))

output_df['Date'] = output_df['Date'].apply(lambda x : format_yearmonth(x))

output_df.columns = ['Date', 'Brand', 'Province', 'Level 1', 'Level 2', 'Level 3', 'Level 4', 'Level 5', 'Value']

In [None]:
output_file_path = config['project_path'] + '/' \
                        + config['temp_folder_path'] + '/' \
                        + config['media_output']

output_df.to_excel(output_file_path, index=False)