In [None]:
import pandas as pd
import operator
import time
import numpy as np

In [None]:
config = {}

config['project_path'] = '..'
config['data_folder_path'] = 'data'
config['temp_folder_path'] = 'temp'

# input files
config['offtake_input1'] = 'POS-202001-202002.csv'
config['offtake_input2'] = 'POS-202003-202005.csv'
config['MDM_mapping'] = 'MDM_mapping.xlsx'

# output files
config['performance_offtake'] = 'Performance Danone offtake 202001-05.xlsx'
config['nation_province_by_brand_file'] = '202001-05_nation_province_by_brand.xlsx'
config['nation_to_province_brand_file'] = '202001-05_nation_to_province_brand.xlsx'
config['brand_to_province_brand_file'] = '202001-05_brand_to_province_brand.xlsx'
config['province_to_province_brand_file'] = '202001-05_province_to_province_brand_file.xlsx'

## Read Offtake

In [None]:
# schema of input file
offtake_columns = ['Year','Month','Channel_Code','Grade_Group_Code',
                   'Grade_Code','region_name','SubRegion_Name','Province_Name',
                   'xcity_name','Account_Name','Customer_Code',
                   'Customer_Name','validity','Brand','SKU','Offtake_Qty',
                   'Offtake_Qty_Factor','Offtake_SP_Value','Offtake_SP_Value_Factor']

# Only below columns from input file are needed
select_columns = ['YearMonth','Distributor_Code','Customer_Code',
                  'Product_Code','Sellout_SP_Value']
offtake_df = pd.DataFrame(columns=offtake_columns)

input_file_path = config['project_path'] + '/' \
                        + config['data_folder_path'] + '/' \
                        + config['offtake_input1']

input_df = pd.read_csv(input_file_path, dtype=str)
input_df = input_df[offtake_columns]
offtake_df = offtake_df.append(input_df, ignore_index=True)

input_file_path = config['project_path'] + '/' \
                        + config['data_folder_path'] + '/' \
                        + config['offtake_input2']
input_df = pd.read_csv(input_file_path, dtype=str)
input_df = input_df[offtake_columns]
offtake_df = offtake_df.append(input_df, ignore_index=True)

In [None]:
offtake_df['YearMonth'] = offtake_df['Year'] + '0' + offtake_df['Month']

In [None]:
offtake_df['Offtake_SP_Value'] = offtake_df['Offtake_SP_Value'].replace(' ','')
offtake_df['Offtake_SP_Value'] = offtake_df['Offtake_SP_Value'].astype('float')

In [None]:
input_file_path = config['project_path'] + '/' \
                        + config['data_folder_path'] + '/' \
                        + config['MDM_mapping']

province_df = pd.read_excel(input_file_path, dtype=str)
province_df = province_df[['MDM_province_name', 'MDM_province_code']].drop_duplicates()

In [None]:
offtake_df = pd.merge(offtake_df, province_df, left_on='Province_Name', right_on='MDM_province_name', how='left' )

#### Merge MengDong MengXi

In [None]:
def merge_neimenggu(MDM_province_code):
    if (MDM_province_code=='Mengxi') | (MDM_province_code == 'Mengdong'):
        return 'Neimenggu'
    return MDM_province_code

offtake_df['MDM_province_code'] = offtake_df['MDM_province_code'].apply(lambda x : merge_neimenggu(x))

#### Check unmapped records

In [None]:
offtake_df[offtake_df['MDM_province_code'] == np.NAN]

## Seperate Online Offline Only

In [None]:
offtake_ec_df = offtake_df[offtake_df['Channel_Code'] == 'EC']
offline_df = offtake_df[offtake_df['Channel_Code'] != 'EC']

In [None]:
monthly_offline = offline_df.groupby(by = ['YearMonth', 'MDM_province_code', 'Brand'])['Offtake_SP_Value'].sum().reset_index()

## Distribute HQ offtake to each province

In [None]:
nonhq_offline = monthly_offline[monthly_offline['MDM_province_code'] != 'HQ']
non_hq_all = nonhq_offline.groupby(by = ['YearMonth', 'Brand'])['Offtake_SP_Value'].sum().reset_index()
non_hq_all.columns = ['YearMonth', 'Brand', 'all_offtake']
nonhq_offline_split = pd.merge(nonhq_offline, non_hq_all, on=['YearMonth', 'Brand'], how='left')
nonhq_offline_split['split_ratio'] = nonhq_offline_split['Offtake_SP_Value'] / nonhq_offline_split['all_offtake']
nonhq_offline_split = nonhq_offline_split.drop(['Offtake_SP_Value', 'all_offtake'],axis=1)

In [None]:
hq_offline = monthly_offline[monthly_offline['MDM_province_code'] == 'HQ']
hq_offline = hq_offline.drop('MDM_province_code',axis=1)

hq_offline = pd.merge(hq_offline, nonhq_offline_split, on=['YearMonth', 'Brand'], how='left')
hq_offline ['hq_split_value'] = hq_offline['Offtake_SP_Value'] * hq_offline['split_ratio']
hq_offline = hq_offline.drop(['Offtake_SP_Value', 'split_ratio'],axis=1)

In [None]:
all_offtake = pd.merge(nonhq_offline, hq_offline, on=['YearMonth', 'MDM_province_code','Brand'], how='left')

In [None]:
all_offtake['Offtake_Value'] = all_offtake['Offtake_SP_Value'] + all_offtake['hq_split_value']
all_offtake = all_offtake[['YearMonth', 'MDM_province_code', 'Brand', 'Offtake_Value']]

## Add National Offline Offtake

In [None]:
offline_offtake_nati = all_offtake.groupby(by = ['YearMonth', 'Brand'])['Offtake_Value'].sum().reset_index()

In [None]:
offline_offtake_nati['MDM_province_code'] = 'National'
offline_offtake_nati = offline_offtake_nati[['YearMonth', 'MDM_province_code', 'Brand', 'Offtake_Value']]

In [None]:
all_offline_offtake = all_offtake.append(offline_offtake_nati, ignore_index=True)

## Add EC offtake back

In [None]:
ec_tmp_df = offtake_ec_df.groupby(by = ['YearMonth', 'Brand'])['Offtake_SP_Value'].sum().reset_index()

In [None]:
ec_tmp_df.columns = ['YearMonth', 'Brand', 'Offtake_Value']
ec_tmp_df['MDM_province_code'] = 'National'

In [None]:
all_offtake_nati = ec_tmp_df.append(offline_offtake_nati, ignore_index=True)
all_offtake_nati = all_offtake_nati.groupby(by = ['YearMonth', 'MDM_province_code', 'Brand'])['Offtake_Value'].sum().reset_index()

In [None]:
output_df = all_offtake.append(all_offtake_nati, ignore_index=True)

# Save output

In [None]:
output_df['KPI'] = 'Danone Offtake'
output_df['Date'] = output_df['YearMonth']
output_df = output_df[['Date', 'MDM_province_code', 'Brand', 'KPI', 'Offtake_Value']]
output_df.columns = ['Date', 'Province', 'Brand', 'KPI', 'KPI_value']

In [None]:
output_file_path = config['project_path'] + '/' \
                        + config['temp_folder_path'] + '/' \
                        + config['performance_offtake']

output_df.to_excel(output_file_path, index=False)

## Create Offtake Split to Ratio

## From Nation brand to province brand

In [None]:
all_offtake_nati = all_offline_offtake[all_offline_offtake['MDM_province_code'] == 'National']
all_offtake_nati = all_offtake_nati.groupby(by = ['YearMonth', 'Brand'])['Offtake_Value'].sum().reset_index()
all_offtake_nati.columns = ['YearMonth', 'Brand', 'National_Offtake_Value']

In [None]:
all_offtake_prov = all_offline_offtake[all_offline_offtake['MDM_province_code'] != 'National']
all_offtake_prov = all_offtake_prov.groupby(by = ['YearMonth', 'Brand', 'MDM_province_code'])['Offtake_Value'].sum().reset_index()

In [None]:
nati_to_prov = pd.merge(all_offtake_nati, all_offtake_prov, on=['YearMonth', 'Brand'], how='left')
nati_to_prov['Split_Ratio'] = nati_to_prov['Offtake_Value'] /  nati_to_prov['National_Offtake_Value']

In [None]:
nati_to_prov = nati_to_prov[['YearMonth', 'MDM_province_code', 'Brand', 'Split_Ratio']]

In [None]:
output_file_path = config['project_path'] + '/' \
                        + config['temp_folder_path'] + '/' \
                        + config['nation_province_by_brand_file']

nati_to_prov.to_excel(output_file_path, index=False)

## From Nation to province brand

In [None]:
all_offtake_nati = all_offline_offtake[all_offline_offtake['MDM_province_code'] == 'National']
all_offtake_nati = all_offtake_nati.groupby(by = ['YearMonth', 'Brand'])['Offtake_Value'].sum().reset_index()
all_offtake_nati.columns = ['YearMonth', 'Brand', 'Brand_Offtake_Value']

In [None]:
all_offtake_prov = all_offline_offtake[all_offline_offtake['MDM_province_code'] != 'National']
all_offtake_prov = all_offtake_prov.groupby(by = ['YearMonth', 'Brand', 'MDM_province_code'])['Offtake_Value'].sum().reset_index()

In [None]:
nati_to_prov = pd.merge(all_offtake_nati, all_offtake_prov, on=['YearMonth', 'Brand'], how='left')
nati_to_prov['Split_Ratio'] = nati_to_prov['Offtake_Value'] /  nati_to_prov['Brand_Offtake_Value']

In [None]:
nati_to_prov = nati_to_prov[['YearMonth', 'MDM_province_code', 'Brand', 'Split_Ratio']]

In [None]:
output_file_path = config['project_path'] + '/' \
                        + config['temp_folder_path'] + '/' \
                        + config['brand_to_province_brand_file']

nati_to_prov.to_excel(output_file_path, index=False)

## From brand to province brand

In [None]:
all_offtake_nati = all_offline_offtake[all_offline_offtake['MDM_province_code'] == 'National']
all_offtake_nati = all_offtake_nati.groupby(by = ['YearMonth'])['Offtake_Value'].sum().reset_index()
all_offtake_nati.columns = ['YearMonth', 'National_Offtake_Value']

In [None]:
all_offtake_prov = all_offline_offtake[all_offline_offtake['MDM_province_code'] != 'National']
all_offtake_prov = all_offtake_prov.groupby(by = ['YearMonth', 'Brand', 'MDM_province_code'])['Offtake_Value'].sum().reset_index()

In [None]:
nati_to_prov = pd.merge(all_offtake_nati, all_offtake_prov, on=['YearMonth'], how='left')
nati_to_prov['Split_Ratio'] = nati_to_prov['Offtake_Value'] /  nati_to_prov['National_Offtake_Value']

In [None]:
nati_to_prov = nati_to_prov[['YearMonth', 'MDM_province_code', 'Brand', 'Split_Ratio']]

In [None]:
output_file_path = config['project_path'] + '/' \
                        + config['temp_folder_path'] + '/' \
                        + config['nation_to_province_brand_file']

nati_to_prov.to_excel(output_file_path, index=False)

## From province to province brand

In [None]:
all_offtake_prov = all_offline_offtake[all_offline_offtake['MDM_province_code'] != 'National']
all_offtake_prov = all_offtake_prov.groupby(by = ['YearMonth', 'Brand', 'MDM_province_code'])['Offtake_Value'].sum().reset_index()

In [None]:
all_offtake_prov_base = all_offtake_prov.groupby(by = ['YearMonth', 'MDM_province_code'])['Offtake_Value'].sum().reset_index()
all_offtake_prov_base.columns = ['YearMonth', 'MDM_province_code', 'All_Offtake_Value']

In [None]:
prov_to_provbrand = pd.merge(all_offtake_prov_base, all_offtake_prov, on=['YearMonth', 'MDM_province_code'], how='left')
prov_to_provbrand['Split_Ratio'] = prov_to_provbrand['Offtake_Value'] /  prov_to_provbrand['All_Offtake_Value']

In [None]:
prov_to_provbrand = prov_to_provbrand[['YearMonth', 'MDM_province_code', 'Brand', 'Split_Ratio']]

In [None]:
output_file_path = config['project_path'] + '/' \
                        + config['temp_folder_path'] + '/' \
                        + config['province_to_province_brand_file']

prov_to_provbrand.to_excel(output_file_path, index=False)