In [None]:
import pandas as pd
import operator
import time
import numpy as np

In [None]:
config = {}

config["project_path"] = ".."
config["data_folder_path"] = "data"
config["temp_folder_path"] = "temp"

# input files
config['nielsen_input'] = 'Nielsen 202001-05.xlsx'
config['performance_offtake'] = 'Performance Danone offtake 202001-05.xlsx'
config['performance_search_index'] = 'performance search index 202001-04.xlsx'
config['BHT input'] = 'BHT result.xlsx'
config['spending_trade_support'] = 'spending trade support 202001-04.xlsx'
config['spending_CRM'] = 'spending CRM 202001-04.xlsx'
config['spending_media'] = 'spending media 2020Q1.xlsx'
config['spending_channel'] = 'Spending Channel AnP 202001-04.xlsx'
config['spending_PC'] = 'spending PC 202001-04.xlsx'

# output files
config['view_2_output'] = '2020 Province ROI.xlsx'

In [None]:
def get_brand_name(SKU):
    if (SKU == 'AC') | (SKU == 'AP') | (SKU == 'AN'):
        return 'Aptamil'
    elif SKU == 'NC':
        return 'Nutrilon'
    return SKU

In [None]:
view2_columns = ['Date', 'Brand','Province','Type 1','Type 2','Type 3','Value']
view2_df = pd.DataFrame(columns = view2_columns)

## Nielsen

In [None]:
input_file_path = config['project_path'] + '/' \
                        + config['data_folder_path'] + '/' \
                        + config['nielsen_input']

input_df = pd.read_excel(input_file_path, dtype=str)
input_df['KPI_value'] = input_df['KPI_value'].astype('float')
input_df = input_df[input_df['Province']!='National']

wtd_df = input_df[(input_df['KPI'] == 'WTD - RI') | (input_df['KPI'] == 'WTD - MBS')].copy()
wtd_df['KPI'] = wtd_df['KPI'] + ' (' + wtd_df['Brand'] + ')'
share_df = input_df[input_df['KPI'] == 'Nielsen Value Market Share'].copy()
share_df['KPI'] = 'Market Share - ' + share_df['Brand']

nielsen_df = share_df.append(wtd_df, ignore_index=True)

In [None]:
nielsen_df['Value'] = nielsen_df['KPI_value'].astype('float')
nielsen_df['Type 1'] = nielsen_df['KPI']
nielsen_df['Type 2'] = nielsen_df['KPI']
nielsen_df['Type 3'] = nielsen_df['KPI']
nielsen_df['Brand'] = nielsen_df['Brand'].apply(lambda x : get_brand_name(x))

nielsen_df = nielsen_df[view2_columns]

In [None]:
view2_df = view2_df.append(nielsen_df, ignore_index = True)
nielsen_df[['Type 1','Type 2','Type 3']].drop_duplicates()

## Danone Offtake

In [None]:
input_file_path = config['project_path'] + '/' \
                        + config['temp_folder_path'] + '/' \
                        + config['performance_offtake']

input_df = pd.read_excel(input_file_path, dtype=str)
input_df['KPI_value'] = input_df['KPI_value'].astype('float')

In [None]:
offtake_df = input_df[input_df['Province']!='National'].copy()
offtake_df['Brand'] = offtake_df['Brand'].apply(lambda x : get_brand_name(x))

offtake_df = offtake_df.groupby(by = ['Date', 'Province', 'Brand','KPI'])['KPI_value'].sum().reset_index()

In [None]:
offtake_df['Value'] = offtake_df['KPI_value'].astype('float')
offtake_df['Type 1'] = offtake_df['KPI']
offtake_df['Type 2'] = offtake_df['KPI']
offtake_df['Type 3'] = offtake_df['KPI']

offtake_df = offtake_df[view2_columns]

In [None]:
view2_df = view2_df.append(offtake_df, ignore_index = True)
offtake_df[['Type 1','Type 2','Type 3']].drop_duplicates()

## BHT

In [None]:
input_file_path = config['project_path'] + '/' \
                        + config['data_folder_path'] + '/' \
                        + config['BHT input'] 
input_df = pd.read_excel(input_file_path, dtype=str)
input_df['KPI_value'] = input_df['KPI_value'].astype('float')

bht_df = input_df[input_df['Province']!='National'].copy()
bht_df['Brand'] = bht_df['Brand'].apply(lambda x : get_brand_name(x))

In [None]:
bht_df['Value'] = bht_df['KPI_value'].astype('float')
bht_df['Type 1'] = bht_df['KPI']
bht_df['Type 2'] = bht_df['KPI']
bht_df['Type 3'] = bht_df['KPI']

bht_df = bht_df[view2_columns]

In [None]:
view2_df = view2_df.append(bht_df, ignore_index = True)
bht_df[['Type 1','Type 2','Type 3']].drop_duplicates()

## Trade Support

In [None]:
input_file_path = config['project_path'] + '/' \
                        + config['temp_folder_path'] + '/' \
                        + config['spending_trade_support']
input_df = pd.read_excel(input_file_path, dtype=str)
input_df['Value'] = input_df['Spending_value'].astype('float')

In [None]:
ts_df = input_df[input_df['Province']!='National'].copy()
ts_df['Brand'] = ts_df['Brand'].apply(lambda x : get_brand_name(x))

ts_df = ts_df.groupby(by = ['Date', 'Province', 'Cost_type_1', 'AIP', 'Brand'])['Value'].sum().reset_index()

In [None]:
ts_df['Type 1'] = ts_df['Cost_type_1']
ts_df['Type 2'] = ''
ts_df['Type 3'] = ''

ts_df = ts_df[view2_columns]

In [None]:
view2_df = view2_df.append(ts_df, ignore_index = True)
ts_df[['Type 1','Type 2','Type 3']].drop_duplicates()

## PC

In [None]:
input_file_path = config['project_path'] + '/' \
                        + config['temp_folder_path'] + '/' \
                        + config['spending_PC']
input_df = pd.read_excel(input_file_path, dtype=str)
input_df['Value'] = input_df['Spending_value'].astype('float')

In [None]:
pc_df = input_df[input_df['Province']!='National'].copy()
pc_df['Brand'] = pc_df['Brand'].apply(lambda x : get_brand_name(x))

pc_df = pc_df.groupby(by = ['Date', 'Province', 'Cost_type_1', 'Cost_type_2', 'AIP', 'Brand'])['Value'].sum().reset_index()

In [None]:
pc_df['Type 1'] = 'Total A&P'
pc_df['Type 2'] = 'Total PC'
pc_df['Type 3'] = pc_df['Cost_type_2']

pc_df = pc_df[view2_columns]

In [None]:
view2_df = view2_df.append(pc_df, ignore_index = True)
pc_df[['Type 1','Type 2','Type 3']].drop_duplicates()

## CRM

In [None]:
input_file_path = config['project_path'] + '/' \
                        + config['temp_folder_path'] + '/' \
                        + config['spending_CRM']

input_df = pd.read_excel(input_file_path, dtype=str)
input_df['Value'] = input_df['Spending_value'].astype('float')

In [None]:
spen_crm_df = input_df[input_df['Province'] != 'National'].copy()
spen_crm_df['Brand'] = spen_crm_df['Brand'].apply(lambda x : get_brand_name(x))

In [None]:
spen_crm_df['Type 1'] = 'Total A&P'
spen_crm_df['Type 2'] = 'Total CRM'
spen_crm_df['Type 3'] = spen_crm_df['Cost_type_2']

spen_crm_df = spen_crm_df[view2_columns]

In [None]:
view2_df = view2_df.append(spen_crm_df, ignore_index = True)
spen_crm_df[['Type 1','Type 2','Type 3']].drop_duplicates()

## Media

In [None]:
input_file_path = config['project_path'] + '/' \
                        + config['temp_folder_path'] + '/' \
                        + config['spending_media']

input_df = pd.read_excel(input_file_path, dtype=str)
input_df['Value'] = input_df['Spending_value'].astype('float')

spen_media_df = input_df[input_df['Province'] != 'National'].copy()
spen_media_df['Brand'] = spen_media_df['Brand'].apply(lambda x : get_brand_name(x))
spen_media_df['Cost_type_2'] = spen_media_df['AIP'] + ' - ' + spen_media_df['Cost_type_2']

In [None]:
spen_media_df = spen_media_df.groupby(by = ['Date', 'Province', 'Cost_type_1', 'Cost_type_2', 'Brand'])['Value'].sum().reset_index()

In [None]:
spen_media_df['Type 1'] = 'Total A&P'
spen_media_df['Type 2'] = 'Total Media'
spen_media_df['Type 3'] = spen_media_df['Cost_type_2']

spen_media_df = spen_media_df[view2_columns]

In [None]:
view2_df = view2_df.append(spen_media_df, ignore_index = True)
spen_media_df[['Type 1','Type 2','Type 3']].drop_duplicates()

## Channel

In [None]:
input_file_path = config['project_path'] + '/' \
                        + config['temp_folder_path'] + '/' \
                        + config['spending_channel']

input_df = pd.read_excel(input_file_path, dtype=str)

channel_df = input_df[input_df['Province'] != 'National'].copy()
channel_df['Value'] = channel_df['Spending_value'].astype('float')
channel_df['Brand'] = channel_df['Brand'].apply(lambda x : get_brand_name(x))

In [None]:
channel_df = channel_df.groupby(by = ['Date', 'Province', 'Cost_type_1', 'Cost_type_2', 'Brand'])['Value'].sum().reset_index()

In [None]:
channel_df['Type 1'] = 'Total A&P'
channel_df['Type 2'] = 'Total Channel'
channel_df['Type 3'] = channel_df['Cost_type_2']

channel_df = channel_df[view2_columns]

In [None]:
view2_df = view2_df.append(channel_df, ignore_index = True)
channel_df[['Type 1','Type 2','Type 3']].drop_duplicates()

## Create view 2 output

In [None]:
view2_df = view2_df.fillna('')

an_df = view2_df[view2_df['Brand'] == 'Aptamil'][['Province', 'Type 1', 'Type 2', 'Type 3']].drop_duplicates()
nc_df = view2_df[view2_df['Brand'] == 'Nutrilon'][['Province', 'Type 1', 'Type 2', 'Type 3']].drop_duplicates()
an_df['Brand'] = 'Aptamil'
nc_df['Brand'] = 'Nutrilon'
all_df = an_df.append(nc_df, ignore_index=True)

In [None]:
all_dates_map = pd.DataFrame(columns=['Date', 'Brand', 'Province', 'Type 1', 'Type 2', 'Type 3'])
for i in range(1,13):
    year_month = "2020" + str(i).zfill(2)
    all_df['Date'] = year_month
    all_dates_map = all_dates_map.append(all_df, ignore_index=True)

In [None]:
output_df = pd.merge(all_dates_map, view2_df, on=['Date', 'Brand', 'Province', 'Type 1', 'Type 2', 'Type 3'], how='left')

output_df = output_df.fillna({'Value': 0})

In [None]:
def format_yearmonth(year_month):
    return time.strftime('%Y/%m/%d', time.strptime(year_month,'%Y%m'))

output_df['Date'] = output_df['Date'].apply(lambda x : format_yearmonth(x))

output_df = output_df[['Date', 'Brand', 'Province', 'Type 1', 'Type 2', 'Type 3', 'Value']]

In [None]:
output_file_path = config['project_path'] + '/' \
                        + config['temp_folder_path'] + '/' \
                        + config['view_2_output'] 

output_df.to_excel(output_file_path, index=False)