In [None]:
import pandas as pd
import operator
import time
import numpy as np

In [None]:
config = {}

config["year_list"] = [2018,2019]
config["project_path"] = ".."
config["data_folder_path"] = "data"
config["temp_folder_path"] = "temp"

## Read Offtake

In [None]:
# schema of input file
offtake_columns = ['Year','Month','Channel_Code','Grade_Group_Code',
                   'Grade_Code','region_name','SubRegion_Name','Province_Name',
                   'xcity_name','Account_Name','Customer_Code',
                   'Customer_Name','validity','Brand','SKU','Offtake_Qty',
                   'Offtake_Qty_Factor','Offtake_SP_Value','Offtake_SP_Value_Factor']

# Only below columns from input file are needed
select_columns = ['YearMonth','Distributor_Code','Customer_Code',
                  'Product_Code','Sellout_SP_Value']
offtake_df = pd.DataFrame(columns=offtake_columns)

input_file_path = config["project_path"] + "/" \
                        + config["data_folder_path"] + "/" \
                        + "POS-202001-202002.csv"

input_df = pd.read_csv(input_file_path, dtype=str)
input_df = input_df[offtake_columns]
offtake_df = offtake_df.append(input_df, ignore_index=True)

input_file_path = config["project_path"] + "/" \
                        + config["data_folder_path"] + "/" \
                        + "POS-202003-202005.csv"
input_df = pd.read_csv(input_file_path, dtype=str)
input_df = input_df[offtake_columns]
offtake_df = offtake_df.append(input_df, ignore_index=True)

In [None]:
offtake_df["YearMonth"] = offtake_df["Year"] + "0" + offtake_df["Month"]

In [None]:
offtake_df["Offtake_SP_Value"] = offtake_df["Offtake_SP_Value"].replace(' ','')
offtake_df["Offtake_SP_Value"] = offtake_df["Offtake_SP_Value"].astype("float")

In [None]:
input_file_path = config["project_path"] + "/" \
                        + config["data_folder_path"] + "/" \
                        + "Province_Mapping.xlsx"

province_df = pd.read_excel(input_file_path, dtype=str)

In [None]:
offtake_df = pd.merge(offtake_df, province_df, on='Province_Name', how='left' )

## Keep Offline Only

In [None]:
offtake_df = offtake_df[offtake_df["Channel_Code"] != "EC"]

In [None]:
monthly_offtake = offtake_df.groupby(by = ['YearMonth', 'Province_Code', 'Brand'])['Offtake_SP_Value'].sum().reset_index()

## Distribute HQ offtake to each province

In [None]:
nonhq_offtake = monthly_offtake[monthly_offtake["Province_Code"] != "HQ"]
non_hq_all = nonhq_offtake.groupby(by = ['YearMonth', 'Brand'])['Offtake_SP_Value'].sum().reset_index()
non_hq_all.columns = ['YearMonth', 'Brand', 'all_offtake']
nonhq_offtake_split = pd.merge(nonhq_offtake, non_hq_all, on=['YearMonth', 'Brand'], how='left')
nonhq_offtake_split["split_ratio"] = nonhq_offtake_split["Offtake_SP_Value"] / nonhq_offtake_split['all_offtake']
nonhq_offtake_split = nonhq_offtake_split.drop(["Offtake_SP_Value", 'all_offtake'],axis=1)

In [None]:
hq_offtake = monthly_offtake[monthly_offtake["Province_Code"] == "HQ"]
hq_offtake = hq_offtake.drop('Province_Code',axis=1)

hq_offtake = pd.merge(hq_offtake, nonhq_offtake_split, on=['YearMonth', 'Brand'], how='left')
hq_offtake ["hq_split_value"] = hq_offtake["Offtake_SP_Value"] * hq_offtake["split_ratio"]
hq_offtake = hq_offtake.drop(["Offtake_SP_Value", 'split_ratio'],axis=1)

In [None]:
all_offtake = pd.merge(nonhq_offtake, hq_offtake, on=['YearMonth', 'Province_Code', 'Brand'], how='left')

In [None]:
all_offtake["Offtake_Value"] = all_offtake["Offtake_SP_Value"] + all_offtake["hq_split_value"]

## Add National Offtake

In [None]:
all_offtake_nati = all_offtake.groupby(by = ['YearMonth', 'Brand'])['Offtake_Value'].sum().reset_index()

In [None]:
all_offtake_nati["Province_Code"] = "National"
all_offtake_nati = all_offtake_nati[["YearMonth", "Province_Code", "Brand", "Offtake_Value"]]

In [None]:
all_offtake_res = all_offtake.append(all_offtake_nati, ignore_index=True)

# Prepare for output

In [None]:
def format_yearmonth(year_month):
    return time.strftime("%Y/%m/%d", time.strptime(year_month,"%Y%m"))

In [None]:
all_offtake_res["Date"] = all_offtake_res['YearMonth'].apply(lambda x : format_yearmonth(x))

In [None]:
def get_brand_name(brand):
    if brand == 'NC':
        return 'Nutrilon'
    elif (brand=='AC') | (brand == 'AP'):
        return "Aptamil"
    return ''

In [None]:
all_offtake_res["Brand_Name"] = all_offtake_res['Brand'].apply(
        lambda x: get_brand_name(x))

all_offtake_res['KPI'] = 'Danone Offtake'

In [None]:
all_offtake_res = all_offtake_res[['Date', 'Province_Code', 'Brand_Name', 'Brand', 'KPI', 'Offtake_Value']]
all_offtake_res.columns = ['Date', 'Province', 'Brand', 'SKU', 'KPI', 'KPI_Value']

## Save offtake KPI

In [None]:
all_offtake_res.to_excel("../temp/202001-05-offtake.xlsx", index=False)

## Create Offtake Split to Province Ratio

In [None]:
nati_tmp = all_offtake_nati[["YearMonth", "Brand", "Offtake_Value"]]
nati_tmp.columns = ["YearMonth", "Brand", "Total_Offtake"]

In [None]:
nati_to_prov = pd.merge(all_offtake, nati_tmp, on=["YearMonth", "Brand"], how='left')
nati_to_prov['Split_Ratio'] = nati_to_prov['Offtake_Value'] /  nati_to_prov['Total_Offtake']

In [None]:
nati_to_prov = nati_to_prov[["YearMonth", "Province_Code", "Brand", "Split_Ratio"]]

nati_to_prov.columns =["YearMonth", "Province_Code", "SKU", "Split_Ratio"]

In [None]:
nati_to_prov.to_excel("../temp/202001-05_brand_province_split.xlsx", index=False)

In [None]:
nati_tmp = all_offtake.groupby(by = ['YearMonth'])['Offtake_Value'].sum().reset_index()
nati_tmp.columns = ["YearMonth", "Total_Offtake"]
nati_to_provbrand = pd.merge(all_offtake, nati_tmp, on=["YearMonth"], how='left')
nati_to_provbrand['Split_Ratio'] = nati_to_provbrand['Offtake_Value'] /  nati_to_provbrand['Total_Offtake']

nati_to_provbrand = nati_to_provbrand[["YearMonth", "Province_Code", "Brand", "Split_Ratio"]]

nati_to_provbrand.columns =["YearMonth", "Province_Code", "SKU", "Split_Ratio"]


In [None]:
nati_to_provbrand.to_excel("../temp/202001-05_national_provincebrand_split.xlsx", index=False)

In [None]:
nati_tmp = all_offtake.groupby(by = ['YearMonth', 'Province_Code'])['Offtake_Value'].sum().reset_index()
nati_tmp.columns = ["YearMonth", 'Province_Code',  "Total_Offtake"]
prov_to_brand = pd.merge(all_offtake, nati_tmp, on=["YearMonth", 'Province_Code'], how='left')
prov_to_brand['Split_Ratio'] = prov_to_brand['Offtake_Value'] /  prov_to_brand['Total_Offtake']

prov_to_brand = prov_to_brand[["YearMonth", "Province_Code", "Brand", "Split_Ratio"]]

prov_to_brand.columns =["YearMonth", "Province_Code", "SKU", "Split_Ratio"]


In [None]:
prov_to_brand.to_excel("../temp/202001-05_province_brand_split.xlsx", index=False)