In [None]:
import pandas as pd
import operator
import time
import numpy as np

In [None]:
config = {}

config['project_path'] = '..'
config['data_folder_path'] = 'data'
config['temp_folder_path'] = 'temp'

# input files
config['offtake_input1'] = 'POS-202001-202002.csv'
config['offtake_input2'] = 'POS-202003-202005.csv'
config['MDM_mapping'] = 'MDM_mapping.xlsx'

# output files
config['offtake_output'] = 'Offtake IFFO rate 202001-05.xlsx'
config['nation_province_by_brand_file'] = '202001-05_nation_province_by_brand.xlsx'
config['nation_to_province_brand_file'] = '202001-05_nation_to_province_brand.xlsx'
config['brand_to_province_brand_file'] = '202001-05_brand_to_province_brand.xlsx'
config['province_to_province_brand_file'] = '202001-05_province_to_province_brand_file.xlsx'

## Read Offtake

In [None]:
# schema of input file
offtake_columns = ['Year','Month','Channel_Code','Grade_Group_Code',
                   'Grade_Code','region_name','SubRegion_Name','Province_Name',
                   'xcity_name','Account_Name','Customer_Code',
                   'Customer_Name','validity','Brand','SKU','Offtake_Qty',
                   'Offtake_Qty_Factor','Offtake_SP_Value','Offtake_SP_Value_Factor']

# Only below columns from input file are needed
select_columns = ['YearMonth','Distributor_Code','Customer_Code',
                  'Product_Code','Sellout_SP_Value']
offtake_df = pd.DataFrame(columns=offtake_columns)

input_file_path = config['project_path'] + '/' \
                        + config['data_folder_path'] + '/' \
                        + config['offtake_input1']

input_df = pd.read_csv(input_file_path, dtype=str)
input_df = input_df[offtake_columns]
offtake_df = offtake_df.append(input_df, ignore_index=True)

input_file_path = config['project_path'] + '/' \
                        + config['data_folder_path'] + '/' \
                        + config['offtake_input2']
input_df = pd.read_csv(input_file_path, dtype=str)
input_df = input_df[offtake_columns]
offtake_df = offtake_df.append(input_df, ignore_index=True)

In [None]:
offtake_df['YearMonth'] = offtake_df['Year'] + '0' + offtake_df['Month']

In [None]:
offtake_df['Offtake_SP_Value'] = offtake_df['Offtake_SP_Value'].replace(' ','')
offtake_df['Offtake_SP_Value'] = offtake_df['Offtake_SP_Value'].astype('float')

In [None]:
input_file_path = config['project_path'] + '/' \
                        + config['data_folder_path'] + '/' \
                        + config['MDM_mapping']

province_df = pd.read_excel(input_file_path, dtype=str)

In [None]:
offtake_df = pd.merge(offtake_df, province_df, left_on='Province_Name', right_on='MDM_province_name', how='left' )

#### Merge MengDong MengXi

In [None]:
def merge_neimenggu(MDM_province_code):
    if (MDM_province_code=='Mengxi') | (MDM_province_code == 'Mengdong'):
        return 'Neimenggu'
    return MDM_province_code

offtake_df['MDM_province_code'] = offtake_df['MDM_province_code'].apply(lambda x : merge_neimenggu(x))

#### Check unmapped records

In [None]:
offtake_df[offtake_df['MDM_province_code'] == np.NAN]

## Keep Offline Only

In [None]:
offtake_df = offtake_df[offtake_df['Channel_Code'] != 'EC']

In [None]:
monthly_offtake = offtake_df.groupby(by = ['YearMonth', 'MDM_province_code', 'Brand', 'SKU'])['Offtake_SP_Value'].sum().reset_index()

## Distribute HQ offtake to each province

In [None]:
nonhq_offtake = monthly_offtake[monthly_offtake['MDM_province_code'] != 'HQ']
non_hq_all = nonhq_offtake.groupby(by = ['YearMonth', 'Brand', 'SKU'])['Offtake_SP_Value'].sum().reset_index()
non_hq_all.columns = ['YearMonth', 'Brand', 'SKU', 'all_offtake']
nonhq_offtake_split = pd.merge(nonhq_offtake, non_hq_all, on=['YearMonth', 'Brand', 'SKU'], how='left')
nonhq_offtake_split['split_ratio'] = nonhq_offtake_split['Offtake_SP_Value'] / nonhq_offtake_split['all_offtake']
nonhq_offtake_split = nonhq_offtake_split.drop(['Offtake_SP_Value', 'all_offtake'],axis=1)

In [None]:
hq_offtake = monthly_offtake[monthly_offtake['MDM_province_code'] == 'HQ']
hq_offtake = hq_offtake.drop('MDM_province_code',axis=1)

hq_offtake = pd.merge(hq_offtake, nonhq_offtake_split, on=['YearMonth', 'Brand', 'SKU'], how='left')
hq_offtake ['hq_split_value'] = hq_offtake['Offtake_SP_Value'] * hq_offtake['split_ratio']
hq_offtake = hq_offtake.drop(['Offtake_SP_Value', 'split_ratio'],axis=1)

In [None]:
all_offtake = pd.merge(nonhq_offtake, hq_offtake, on=['YearMonth', 'MDM_province_code','Brand', 'SKU'], how='left')

In [None]:
all_offtake['Offtake_Value'] = all_offtake['Offtake_SP_Value'] + all_offtake['hq_split_value']

## Add National Offtake

In [None]:
all_offtake_nati = all_offtake.groupby(by = ['YearMonth', 'Brand', 'SKU'])['Offtake_Value'].sum().reset_index()

In [None]:
all_offtake_nati['MDM_province_code'] = 'National'
all_offtake_nati = all_offtake_nati[['YearMonth', 'MDM_province_code', 'Brand', 'SKU', 'Offtake_Value']]

In [None]:
all_offtake_full = all_offtake.append(all_offtake_nati, ignore_index=True)

## Use Aptamil for AP and AC

In [None]:
def get_brand_name(brand):
    if brand == 'NC':
        return 'Nutrilon'
    elif (brand=='AC') | (brand == 'AP'):
        return 'Aptamil'
    return ''

all_offtake_full['Brand_Name'] = all_offtake_full['Brand'].apply(
        lambda x: get_brand_name(x))

In [None]:
def get_iffo_gum(sku):
    if sku.endswith('1') | sku.endswith('2'):
        return 'IFFO'
    elif sku.endswith('3') | sku.endswith('4'):
        return 'GUM'
    return ''

all_offtake_full['iffo_gum'] = all_offtake_full['SKU'].apply(
        lambda x: get_iffo_gum(x))

In [None]:
all_offtake_brand = all_offtake_full.groupby(by = ['YearMonth', 'MDM_province_code', 'Brand_Name'])['Offtake_Value'].sum().reset_index()

all_offtake_iffo =  all_offtake_full[all_offtake_full['iffo_gum'] == 'GUM']

all_offtake_iffo = all_offtake_iffo.groupby(by = ['YearMonth', 'MDM_province_code', 'Brand_Name'])['Offtake_Value'].sum().reset_index()

all_offtake_iffo.columns = ['YearMonth', 'MDM_province_code', 'Brand_Name', 'iffo_offtake_value']

In [None]:
all_offtake_res = pd.merge(all_offtake_brand, all_offtake_iffo, on=['YearMonth', 'MDM_province_code', 'Brand_Name'], how = 'left')

all_offtake_res['iffo_ratio'] = all_offtake_res['iffo_offtake_value'] / all_offtake_res['Offtake_Value']

# Prepare for output

In [None]:
danone_offtake = all_offtake_res[['YearMonth', 'MDM_province_code', 'Brand_Name', 'Offtake_Value']].copy()
danone_offtake['Level 1'] = 'Danone Offtake'
danone_offtake.columns = ['YearMonth', 'Province', 'Brand', 'Value', 'Level 1']

iffo_ratio = all_offtake_res[['YearMonth', 'MDM_province_code', 'Brand_Name', 'iffo_ratio']].copy()
iffo_ratio['Level 1'] = 'IFFO Ratio'
iffo_ratio.columns = ['YearMonth', 'Province', 'Brand', 'Value', 'Level 1']

In [None]:
danone_offtake = all_offtake_res[['YearMonth', 'MDM_province_code', 'Brand_Name', 'Offtake_Value']].copy()
danone_offtake['Level 1'] = 'Danone Offtake'
danone_offtake.columns = ['YearMonth', 'Province', 'Brand', 'Value', 'Level 1']

iffo_ratio = all_offtake_res[['YearMonth', 'MDM_province_code', 'Brand_Name', 'iffo_ratio']].copy()
iffo_ratio['Level 1'] = 'IFFO Ratio'
iffo_ratio.columns = ['YearMonth', 'Province', 'Brand', 'Value', 'Level 1']

In [None]:
output_df = danone_offtake.append(iffo_ratio, ignore_index = True)

#### Create all year, province, KPI combination for 2020

In [None]:
all_dates_map = pd.DataFrame(columns=['YearMonth','Province', 'Brand', 'Level 1'])
all_types= output_df[['Province', 'Brand', 'Level 1']].drop_duplicates()
for i in range(1,13):
    year_month = "2020" + str(i).zfill(2)
    all_types['YearMonth'] = year_month
    all_dates_map = all_dates_map.append(all_types, ignore_index=True)

In [None]:
output_df = pd.merge(all_dates_map, output_df, on=['YearMonth','Province', 'Brand', 'Level 1'], how='left')
output_df = output_df.fillna(0)

In [None]:
def format_yearmonth(year_month):
    return time.strftime('%Y/%m/%d', time.strptime(year_month,'%Y%m'))

output_df['Date'] = output_df['YearMonth'].apply(lambda x : format_yearmonth(x))

output_df['Level 2'] = output_df['Level 1']
output_df['Level 3'] = output_df['Level 1']
output_df['Level 4'] = output_df['Level 1']
output_df['Level 5'] = output_df['Level 1']

In [None]:
output_df = output_df[['Date', 'Brand', 'Province', 'Level 1', 'Level 2', 'Level 3', 'Level 4', 'Level 5', 'Value']]

## Save offtake and IFFO Ratio

In [None]:
output_file_path = config['project_path'] + '/' \
                        + config['temp_folder_path'] + '/' \
                        + config['offtake_output'] 

output_df.to_excel(output_file_path, index=False)

## Create Offtake Split to Ratio

## From Nation brand to province brand

In [None]:
all_offtake_nati = all_offtake_full[all_offtake_full['MDM_province_code'] == 'National']
all_offtake_nati = all_offtake_nati.groupby(by = ['YearMonth', 'Brand_Name'])['Offtake_Value'].sum().reset_index()
all_offtake_nati.columns = ['YearMonth', 'Brand_Name', 'National_Offtake_Value']

In [None]:
all_offtake_prov = all_offtake_full[all_offtake_full['MDM_province_code'] != 'National']
all_offtake_prov = all_offtake_prov.groupby(by = ['YearMonth', 'Brand_Name', 'MDM_province_code'])['Offtake_Value'].sum().reset_index()

In [None]:
nati_to_prov = pd.merge(all_offtake_nati, all_offtake_prov, on=['YearMonth', 'Brand_Name'], how='left')
nati_to_prov['Split_Ratio'] = nati_to_prov['Offtake_Value'] /  nati_to_prov['National_Offtake_Value']

In [None]:
nati_to_prov = nati_to_prov[['YearMonth', 'MDM_province_code', 'Brand_Name', 'Split_Ratio']]

In [None]:
output_file_path = config['project_path'] + '/' \
                        + config['temp_folder_path'] + '/' \
                        + config['nation_province_by_brand_file']

nati_to_prov.to_excel(output_file_path, index=False)

## From Nation to province brand

In [None]:
all_offtake_nati = all_offtake_full[all_offtake_full['MDM_province_code'] == 'National']
all_offtake_nati = all_offtake_nati.groupby(by = ['YearMonth', 'Brand_Name'])['Offtake_Value'].sum().reset_index()
all_offtake_nati.columns = ['YearMonth', 'Brand_Name', 'Brand_Offtake_Value']

In [None]:
all_offtake_prov = all_offtake_full[all_offtake_full['MDM_province_code'] != 'National']
all_offtake_prov = all_offtake_prov.groupby(by = ['YearMonth', 'Brand_Name', 'MDM_province_code'])['Offtake_Value'].sum().reset_index()

In [None]:
nati_to_prov = pd.merge(all_offtake_nati, all_offtake_prov, on=['YearMonth', 'Brand_Name'], how='left')
nati_to_prov['Split_Ratio'] = nati_to_prov['Offtake_Value'] /  nati_to_prov['Brand_Offtake_Value']

In [None]:
nati_to_prov = nati_to_prov[['YearMonth', 'MDM_province_code', 'Brand_Name', 'Split_Ratio']]

In [None]:
output_file_path = config['project_path'] + '/' \
                        + config['temp_folder_path'] + '/' \
                        + config['brand_to_province_brand_file']

nati_to_prov.to_excel(output_file_path, index=False)

## From brand to province brand

In [None]:
all_offtake_nati = all_offtake_full[all_offtake_full['MDM_province_code'] == 'National']
all_offtake_nati = all_offtake_nati.groupby(by = ['YearMonth'])['Offtake_Value'].sum().reset_index()
all_offtake_nati.columns = ['YearMonth', 'National_Offtake_Value']

In [None]:
all_offtake_prov = all_offtake_full[all_offtake_full['MDM_province_code'] != 'National']
all_offtake_prov = all_offtake_prov.groupby(by = ['YearMonth', 'Brand_Name', 'MDM_province_code'])['Offtake_Value'].sum().reset_index()

In [None]:
nati_to_prov = pd.merge(all_offtake_nati, all_offtake_prov, on=['YearMonth'], how='left')
nati_to_prov['Split_Ratio'] = nati_to_prov['Offtake_Value'] /  nati_to_prov['National_Offtake_Value']

In [None]:
nati_to_prov = nati_to_prov[['YearMonth', 'MDM_province_code', 'Brand_Name', 'Split_Ratio']]

In [None]:
output_file_path = config['project_path'] + '/' \
                        + config['temp_folder_path'] + '/' \
                        + config['nation_to_province_brand_file']

nati_to_prov.to_excel(output_file_path, index=False)

## From province to province brand

In [None]:
all_offtake_prov = all_offtake_full[all_offtake_full['MDM_province_code'] != 'National']
all_offtake_prov = all_offtake_prov.groupby(by = ['YearMonth', 'Brand_Name', 'MDM_province_code'])['Offtake_Value'].sum().reset_index()

In [None]:
all_offtake_prov_base = all_offtake_prov.groupby(by = ['YearMonth', 'MDM_province_code'])['Offtake_Value'].sum().reset_index()
all_offtake_prov_base.columns = ['YearMonth', 'MDM_province_code', 'All_Offtake_Value']

In [None]:
prov_to_provbrand = pd.merge(all_offtake_prov_base, all_offtake_prov, on=['YearMonth', 'MDM_province_code'], how='left')
prov_to_provbrand['Split_Ratio'] = prov_to_provbrand['Offtake_Value'] /  prov_to_provbrand['All_Offtake_Value']

In [None]:
prov_to_provbrand = prov_to_provbrand[['YearMonth', 'MDM_province_code', 'Brand_Name', 'Split_Ratio']]

In [None]:
output_file_path = config['project_path'] + '/' \
                        + config['temp_folder_path'] + '/' \
                        + config['province_to_province_brand_file']

prov_to_provbrand.to_excel(output_file_path, index=False)