In [1]:
import pandas as pd
import operator
import time
import numpy as np

In [2]:
config = {}

config["project_path"] = ".."
config["data_folder_path"] = "data"
config["temp_folder_path"] = "temp"

# input files
config['nielsen_input'] = 'Nielsen 202001-05.xlsx'
config['performance_offtake'] = 'Performance Danone offtake 202001-05.xlsx'
config['performance_CRM'] = 'performance CRM 202001-04.xlsx'
config['performance_search_index'] = 'performance search index 202001-04.xlsx'
config['BHT input'] = 'BHT result.xlsx'
config['spending_CRM'] = 'spending CRM 202001-04.xlsx'
config['spending_media'] = 'spending media 2020Q1.xlsx'
config['spending_channel'] = 'Spending Channel AnP 202001-04.xlsx'

# output files
config['view_3_output'] = 'province_investment&Performance.xlsx'

In [3]:
def get_brand_name(SKU):
    if (SKU == 'AC') | (SKU == 'AP') | (SKU == 'AN'):
        return 'Aptamil'
    elif SKU == 'NC':
        return 'Nutrilon'
    return SKU

In [4]:
view3_columns = ['Date','Province','Cost_type_1','Cost_type_2','Cost_type_3','Cost_type_4','AIP','Brand','SKU','Spending_value']
view3_df = pd.DataFrame(columns = view3_columns)

## Nielsen

In [5]:
input_file_path = config['project_path'] + '/' \
                        + config['data_folder_path'] + '/' \
                        + config['nielsen_input']

input_df = pd.read_excel(input_file_path, dtype=str)
input_df['KPI_value'] = input_df['KPI_value'].astype('float')
input_df['SKU'] = input_df['Brand'] 

iffo_df = input_df[input_df['KPI'] == 'IFFO Share'].copy()
share_df = input_df[input_df['KPI'] == 'Nielsen Value Market Share'].copy()
share_df['KPI'] = 'Market Share'

nielsen_df = iffo_df.append(share_df, ignore_index=True)

In [6]:
nielsen_df['Brand'] = nielsen_df['SKU'].apply(lambda x : get_brand_name(x))
nielsen_df = nielsen_df.groupby(by = ['Date', 'Province', 'Brand', 'KPI'])['KPI_value'].sum().reset_index()

In [7]:
nielsen_df['Spending_value'] = nielsen_df['KPI_value'].astype('float')
nielsen_df['Cost_type_1'] = nielsen_df['KPI']
nielsen_df['Cost_type_2'] = nielsen_df['KPI']
nielsen_df['Cost_type_3'] = nielsen_df['KPI']
nielsen_df['Cost_type_4'] = nielsen_df['KPI']
nielsen_df['AIP'] = ''
nielsen_df['SKU'] = ''

nielsen_df = nielsen_df[view3_columns]

In [8]:
view3_df = view3_df.append(nielsen_df, ignore_index = True)
nielsen_df[['Cost_type_1','Cost_type_2','Cost_type_3','Cost_type_4']].drop_duplicates()

Unnamed: 0,Cost_type_1,Cost_type_2,Cost_type_3,Cost_type_4
0,IFFO Share,IFFO Share,IFFO Share,IFFO Share
1,Market Share,Market Share,Market Share,Market Share


## Danone Offtake

In [9]:
input_file_path = config['project_path'] + '/' \
                        + config['temp_folder_path'] + '/' \
                        + config['performance_offtake']

input_df = pd.read_excel(input_file_path, dtype=str)
input_df['KPI_value'] = input_df['KPI_value'].astype('float')
input_df['SKU'] = input_df['Brand']

In [10]:
input_df['Brand'] = input_df['SKU'].apply(lambda x : get_brand_name(x))
offtake_df = input_df.groupby(by = ['Date', 'Province', 'Brand', 'KPI'])['KPI_value'].sum().reset_index()

In [11]:
offtake_df['Spending_value'] = offtake_df['KPI_value'].astype('float')
offtake_df['Cost_type_1'] = offtake_df['KPI']
offtake_df['Cost_type_2'] = offtake_df['KPI']
offtake_df['Cost_type_3'] = offtake_df['KPI']
offtake_df['Cost_type_4'] = offtake_df['KPI']
offtake_df['AIP'] = ''
offtake_df['SKU'] = ''

offtake_df = offtake_df[view3_columns]

In [12]:
view3_df = view3_df.append(offtake_df, ignore_index = True)
offtake_df[['Cost_type_1','Cost_type_2','Cost_type_3','Cost_type_4']].drop_duplicates()

Unnamed: 0,Cost_type_1,Cost_type_2,Cost_type_3,Cost_type_4
0,Danone Offtake,Danone Offtake,Danone Offtake,Danone Offtake


## CRM Performance

In [13]:
input_file_path = config['project_path'] + '/' \
                        + config['temp_folder_path'] + '/' \
                        + config['performance_CRM']

input_df = pd.read_excel(input_file_path, dtype=str)
input_df['KPI_value'] = input_df['KPI_value'].astype('float')
input_df['SKU'] = input_df['Brand']
perf_crm_df = input_df[(input_df['KPI'] == 'CRM Leads Offline') 
                       | (input_df['KPI'] == 'CRM Leads Online')
                       | (input_df['KPI'] == 'CRM NU Offline')
                       | (input_df['KPI'] == 'CRM NU Online')
                       | (input_df['KPI'] == 'Active Leads')].copy()

In [14]:
nu_df = input_df[(input_df['KPI'] == 'CRM NU Offline')
                       | (input_df['KPI'] == 'CRM NU Online')].copy()
nu_df = nu_df.groupby(by = ['Date', 'Province', 'SKU'])['KPI_value'].sum().reset_index()
nu_df['KPI'] = 'CRM NU'

leads_df = input_df[(input_df['KPI'] == 'CRM Leads Offline')
                       | (input_df['KPI'] == 'CRM Leads Online')].copy()
leads_df = leads_df.groupby(by = ['Date', 'Province', 'SKU'])['KPI_value'].sum().reset_index()
leads_df['KPI'] = 'CRM Leads'

perf_crm_df = perf_crm_df.append(nu_df)
perf_crm_df = perf_crm_df.append(leads_df)

In [15]:
perf_crm_df['Brand'] = perf_crm_df['SKU'].apply(lambda x : get_brand_name(x))
perf_crm_df = perf_crm_df.groupby(by = ['Date', 'Province', 'Brand', 'KPI'])['KPI_value'].sum().reset_index()

In [16]:
perf_crm_df['Spending_value'] = perf_crm_df['KPI_value'].astype('float')
perf_crm_df['Cost_type_1'] = perf_crm_df['KPI']
perf_crm_df['Cost_type_2'] = perf_crm_df['KPI']
perf_crm_df['Cost_type_3'] = perf_crm_df['KPI']
perf_crm_df['Cost_type_4'] = perf_crm_df['KPI']
perf_crm_df['AIP'] = ''
perf_crm_df['SKU'] = ''

perf_crm_df = perf_crm_df[view3_columns]

In [17]:
view3_df = view3_df.append(perf_crm_df, ignore_index = True)
perf_crm_df[['Cost_type_1','Cost_type_2','Cost_type_3','Cost_type_4']].drop_duplicates()

Unnamed: 0,Cost_type_1,Cost_type_2,Cost_type_3,Cost_type_4
0,CRM Leads,CRM Leads,CRM Leads,CRM Leads
1,CRM Leads Offline,CRM Leads Offline,CRM Leads Offline,CRM Leads Offline
2,CRM NU,CRM NU,CRM NU,CRM NU
3,CRM NU Offline,CRM NU Offline,CRM NU Offline,CRM NU Offline
72,Active Leads,Active Leads,Active Leads,Active Leads
75,CRM Leads Online,CRM Leads Online,CRM Leads Online,CRM Leads Online
78,CRM NU Online,CRM NU Online,CRM NU Online,CRM NU Online


## Search Index

In [18]:
input_file_path = config['project_path'] + '/' \
                        + config['temp_folder_path'] + '/' \
                        + config['performance_search_index']

input_df = pd.read_excel(input_file_path, dtype=str)
input_df['KPI_value'] = input_df['KPI_value'].astype('float')
input_df['SKU'] = input_df['Brand']

input_df['Brand'] = input_df['SKU'].apply(lambda x : get_brand_name(x))
perf_si_df = input_df.groupby(by = ['Date', 'Province', 'Brand', 'KPI'])['KPI_value'].sum().reset_index()

In [19]:
perf_si_df['Spending_value'] = perf_si_df['KPI_value'].astype('float')
perf_si_df['Cost_type_1'] = perf_si_df['KPI']
perf_si_df['Cost_type_2'] = perf_si_df['KPI']
perf_si_df['Cost_type_3'] = perf_si_df['KPI']
perf_si_df['Cost_type_4'] = perf_si_df['KPI']
perf_si_df['AIP'] = ''
perf_si_df['SKU'] = ''

perf_si_df = perf_si_df[view3_columns]

In [20]:
view3_df = view3_df.append(perf_si_df, ignore_index = True)
perf_si_df[['Cost_type_1','Cost_type_2','Cost_type_3','Cost_type_4']].drop_duplicates()

Unnamed: 0,Cost_type_1,Cost_type_2,Cost_type_3,Cost_type_4
0,Ali Search,Ali Search,Ali Search,Ali Search
1,Baidu Search,Baidu Search,Baidu Search,Baidu Search


## BHT

In [21]:
input_file_path = config['project_path'] + '/' \
                        + config['data_folder_path'] + '/' \
                        + config['BHT input'] 
input_df = pd.read_excel(input_file_path, dtype=str)
input_df['KPI_value'] = input_df['KPI_value'].astype('float')
input_df['SKU'] = input_df['Brand']

bht_df = input_df
bht_df['Brand'] = bht_df['SKU'].apply(lambda x : get_brand_name(x))

In [22]:
bht_df['Spending_value'] = bht_df['KPI_value'].astype('float')
bht_df['Cost_type_1'] = bht_df['KPI']
bht_df['Cost_type_2'] = bht_df['KPI']
bht_df['Cost_type_3'] = bht_df['KPI']
bht_df['Cost_type_4'] = bht_df['KPI']
bht_df['AIP'] = ''
bht_df['SKU'] = ''

bht_df = bht_df[view3_columns]

In [23]:
view3_df = view3_df.append(bht_df, ignore_index = True)
bht_df[['Cost_type_1','Cost_type_2','Cost_type_3','Cost_type_4']].drop_duplicates()

Unnamed: 0,Cost_type_1,Cost_type_2,Cost_type_3,Cost_type_4
0,Total Brand Awareness,Total Brand Awareness,Total Brand Awareness,Total Brand Awareness
4,Deep Interest,Deep Interest,Deep Interest,Deep Interest


## CRM Investment

In [24]:
input_file_path = config['project_path'] + '/' \
                        + config['temp_folder_path'] + '/' \
                        + config['spending_CRM']

input_df = pd.read_excel(input_file_path, dtype=str)
input_df['Spending_value'] = input_df['Spending_value'].astype('float')
input_df['SKU'] = input_df['Brand']

In [25]:
spen_crm_df = input_df[(input_df['AIP'] == 'I') | (input_df['AIP'] == 'P')].copy()
spen_crm_df['Brand'] = spen_crm_df['SKU'].apply(lambda x : get_brand_name(x))
spen_crm_df = spen_crm_df[view3_columns]

In [26]:
nu_df = spen_crm_df[(spen_crm_df['Cost_type_2'] == 'CRM NU Offline')
                       | (spen_crm_df['Cost_type_2'] == 'CRM NU Online')].copy()
nu_df = nu_df.groupby(by = ['Date', 'Province', 'Cost_type_1','AIP', 'Brand', 'SKU'])['Spending_value'].sum().reset_index()
nu_df['Cost_type_2'] = 'CRM NU'
nu_df['Cost_type_3'] = ''
nu_df['Cost_type_4'] = ''

leads_df = spen_crm_df[(spen_crm_df['Cost_type_2'] == 'CRM Leads Offline')
                       | (spen_crm_df['Cost_type_2'] == 'CRM Leads Online')].copy()
leads_df = leads_df.groupby(by = ['Date', 'Province', 'Cost_type_1','AIP', 'Brand', 'SKU'])['Spending_value'].sum().reset_index()
leads_df['Cost_type_2'] = 'CRM Leads'
leads_df['Cost_type_3'] = ''
leads_df['Cost_type_4'] = ''

In [27]:
spen_crm_df = spen_crm_df.append(nu_df)
spen_crm_df = spen_crm_df.append(leads_df)

In [28]:
view3_df = view3_df.append(spen_crm_df, ignore_index = True)
spen_crm_df[['Cost_type_1','Cost_type_2','Cost_type_3','Cost_type_4', 'AIP']].drop_duplicates()

Unnamed: 0,Cost_type_1,Cost_type_2,Cost_type_3,Cost_type_4,AIP
0,CRM,CRM Leads Offline,-,-,I
90,CRM,CRM NU Offline,-,-,P
180,CRM,CRM others,-,-,I
194,CRM,CRM NU Online,-,-,P
197,CRM,CRM Leads Online,-,-,I
0,CRM,CRM NU,,,P
0,CRM,CRM Leads,,,I


## Media Investment

In [29]:
input_file_path = config['project_path'] + '/' \
                        + config['temp_folder_path'] + '/' \
                        + config['spending_media']

input_df = pd.read_excel(input_file_path, dtype=str)
input_df['Spending_value'] = input_df['Spending_value'].astype('float')
input_df['SKU'] = input_df['Brand']
input_df['Brand'] = input_df['SKU'].apply(lambda x : get_brand_name(x))

In [30]:
spen_media_df = input_df.groupby(by = ['Date', 'Province', 'Cost_type_1', 'Cost_type_2', 'AIP', 'Brand'])['Spending_value'].sum().reset_index()

In [31]:
spen_media_df['Cost_type_3'] = ''
spen_media_df['Cost_type_4'] = ''
spen_media_df['SKU'] = ''

In [32]:
view3_df = view3_df.append(spen_media_df, ignore_index = True)
spen_media_df[['Cost_type_1','Cost_type_2','Cost_type_3','Cost_type_4']].drop_duplicates()

Unnamed: 0,Cost_type_1,Cost_type_2,Cost_type_3,Cost_type_4
0,Media,Digital,,
12,Media,TV,,


## Channel Investment

In [33]:
input_file_path = config['project_path'] + '/' \
                        + config['temp_folder_path'] + '/' \
                        + config['spending_channel']

input_df = pd.read_excel(input_file_path, dtype=str)
input_df['Spending_value'] = input_df['Spending_value'].astype('float')
input_df['SKU'] = input_df['Brand']
input_df['Brand'] = input_df['SKU'].apply(lambda x : get_brand_name(x))

In [34]:
view3_df = view3_df.append(input_df, ignore_index = True)
input_df[['Cost_type_1','Cost_type_2','Cost_type_3','Cost_type_4']].drop_duplicates()

Unnamed: 0,Cost_type_1,Cost_type_2,Cost_type_3,Cost_type_4
0,Channel,KA,,
17,Channel,EC,,
162,Channel,GT,,
164,Channel,RKMBS,,


## Create view 3 output

In [35]:
view3_df = view3_df.fillna('')

an_df = view3_df[['Province', 'Cost_type_1', 'Cost_type_2', 'Cost_type_3', 'Cost_type_4', 'AIP']].drop_duplicates()
nc_df = view3_df[['Province', 'Cost_type_1', 'Cost_type_2', 'Cost_type_3', 'Cost_type_4', 'AIP']].drop_duplicates()

In [36]:
an_df['Brand'] = 'Aptamil'
nc_df['Brand'] = 'Nutrilon'
all_df = an_df.append(nc_df, ignore_index=True)

In [37]:
all_dates_map = pd.DataFrame(columns=['Date', 'Brand', 'Province', 'Cost_type_1', 'Cost_type_2', 'Cost_type_3', 'Cost_type_4', 'AIP'])
for i in range(1,13):
    year_month = "2020" + str(i).zfill(2)
    all_df['Date'] = year_month
    all_dates_map = all_dates_map.append(all_df, ignore_index=True)

In [38]:
output_df = pd.merge(all_dates_map, view3_df, on=['Date', 'Brand', 'Province', 
                                                  'Cost_type_1', 'Cost_type_2', 'Cost_type_3',
                                                  'Cost_type_4', 'AIP'], how='left')

In [39]:
output_df = output_df.fillna({'Spending_value': 0})

In [40]:
def format_yearmonth(year_month):
    return time.strftime('%Y/%m/%d', time.strptime(year_month,'%Y%m'))

output_df['Date'] = output_df['Date'].apply(lambda x : format_yearmonth(x))

output_df = output_df[['Date', 'Province', 'Cost_type_1', 'Cost_type_2', 'Cost_type_3', 'Cost_type_4', 'AIP', 'Brand', 'SKU', 'Spending_value']]
output_df.columns = ['Date', 'Province', 'Cost_type_1', 'Cost_type_2', 'Cost_type_3', 'Cost_type_4', 'AIP', 'Brand', 'SKU', 'Spending_Value']

In [41]:
output_file_path = config['project_path'] + '/' \
                        + config['temp_folder_path'] + '/' \
                        + config['view_3_output'] 

output_df.to_excel(output_file_path, index=False)