In [None]:
import pandas as pd
import operator
import time
import numpy as np

In [None]:
config = {}

config["project_path"] = ".."
config["data_folder_path"] = "data"
config["temp_folder_path"] = "temp"

# input files
config['nielsen_input'] = 'Nielsen 202001-05.xlsx'

# output files
config['nielsen_output'] = 'Nielsen formatted 202001-05.xlsx'

## Read Nielsen

In [None]:
input_file_path = config['project_path'] + '/' \
                        + config['data_folder_path'] + '/' \
                        + config['nielsen_input']

input_df = pd.read_excel(input_file_path, dtype=str)
input_df['Value'] = input_df['KPI_Value'].astype('float')

## Market Share

In [None]:
mkt_share_df = input_df[input_df['KPI'] == 'Nielsen Value Market Share'].copy()
mkt_share_df = mkt_share_df.groupby(by = ['Date', 'Brand', 'Province', 'KPI'])['Value'].sum().reset_index()

## WTD

In [None]:
wtd_df = input_df[(input_df['KPI'] == 'WTD - RI') | (input_df['KPI'] == 'WTD - MBS')].copy()

In [None]:
def format_wtd(row):
    brand = str(row["SKU"])
    wtd_kpi = str(row["KPI"])
    
    if brand == 'AC':
        return wtd_kpi + ' (AC)'
    elif brand == 'AP':
        return wtd_kpi + ' (AP)'
    return wtd_kpi

In [None]:
wtd_df['KPI'] = wtd_df.apply(format_wtd, axis=1)
wtd_df = wtd_df[['Date', 'Brand', 'Province', 'KPI', 'Value']]

In [None]:
nielsen_all = mkt_share_df.append(wtd_df, ignore_index = True)

#### Create all year, province, KPI combination for 2020

In [None]:
all_dates_map = pd.DataFrame(columns=['Date', 'Brand', 'Province', 'KPI'])
all_types= nielsen_all[['Brand', 'Province', 'KPI']].drop_duplicates()
for i in range(1,13):
    year_month = "2020" + str(i).zfill(2)
    all_types['Date'] = year_month
    all_dates_map = all_dates_map.append(all_types, ignore_index=True)

In [None]:
output_df = pd.merge(all_dates_map, nielsen_all, on=['Date', 'Brand', 'Province', 'KPI'], how='left')
output_df = output_df.fillna(0)

## Output result

In [None]:
def format_yearmonth(year_month):
    return time.strftime('%Y/%m/%d', time.strptime(year_month,'%Y%m'))

output_df['Date'] = output_df['Date'].apply(lambda x : format_yearmonth(x))

output_df['Level 1'] = output_df['KPI']
output_df['Level 2'] = output_df['Level 1']
output_df['Level 3'] = output_df['Level 1']
output_df['Level 4'] = output_df['Level 1']
output_df['Level 5'] = output_df['Level 1']

In [None]:
output_df = output_df[['Date', 'Brand', 'Province', 'Level 1', 'Level 2', 'Level 3', 'Level 4', 'Level 5', 'Value']]

In [None]:
output_file_path = config['project_path'] + '/' \
                        + config['temp_folder_path'] + '/' \
                        + config['nielsen_output'] 

output_df.to_excel(output_file_path, index=False)