In [1]:
import pandas as pd
import operator
import time
import numpy as np

In [2]:
config = {}

config["year_list"] = [2018,2019]
config["project_path"] = ".."
config["data_folder_path"] = "data"
config["temp_folder_path"] = "temp"

# input files
config["sellout_input_files"] = ["DW_Fact_Sellout_NoneSplit_2017-2019_20191219.csv"]
config["sellout_20191112_input_files"] = "DW_Fact_Sellout_NoneSplit_20191112_20200120.csv"
config["product_file"] = "DW_DIM_Product_20191125.csv"

# output files
config["sell_out_file"] = "sellout_{0}.csv"

In [3]:
# schema of input file
sellout_columns = ['YearMonth', 'Data_Source', 'Bill_Date', 'Bill_Type', 
                   'Distributor_Code_DMS', 'Distributor_Code', 'Client_Code', 
                   'Client_Name', 'EA_Platform', 'Ship_To_Code', 
                   'Inventory_location', 'Customer_Code', 'Product_Code', 
                   'Sellout_Qty', 'Sellout_SP_Value', 'Sellout_Channel_Value', 
                   'ETL_DateTime', 'Data_CreationTime']

# Only below columns from input file are needed
select_columns = ['YearMonth','Distributor_Code','Customer_Code',
                  'Product_Code','Sellout_SP_Value']
sellout_df = pd.DataFrame(columns=select_columns)

In [4]:
for sellout_input_file in config["sellout_input_files"]:
    input_file_path = config["project_path"] + "/" \
                        + config["data_folder_path"] + "/" \
                        + sellout_input_file
    input_df = pd.read_csv(input_file_path, header = None, dtype=str).fillna('0')
    input_df.columns = sellout_columns
    if operator.eq(list(input_df.loc[0]),list(sellout_columns))== True:
        input_df = input_df.drop([0])
    input_df = input_df[select_columns]
    sellout_df = sellout_df.append(input_df, ignore_index=True)

In [5]:
# sellout_input_files contains incomplete 201912 data. Needs to be filtered out
sellout_df = sellout_df.loc[(sellout_df["YearMonth"] != "201912")]

In [6]:
# Add complete 201912 data from a seperate file

input_file_path = config["project_path"] + "/" \
                        + config["data_folder_path"] + "/" \
                        + config["sellout_20191112_input_files"]

input_df = pd.read_csv(input_file_path, header = None, dtype=str).fillna('0')
input_df.columns = sellout_columns
if operator.eq(list(input_df.loc[0]),list(sellout_columns))== True:
    input_df = input_df.drop([0])
input_df = input_df[select_columns]

input_df = input_df.loc[(input_df["YearMonth"] == "201912")]

sellout_df = sellout_df.append(input_df, ignore_index=True)

In [7]:
sellout_df["Sellout_SP_Value"] = sellout_df["Sellout_SP_Value"].astype("float")
sellout_df["Product_Code"] = sellout_df["Product_Code"].astype("str")
sellout_df["Customer_Code"] = sellout_df["Customer_Code"].astype("str")
sellout_df["Distributor_Code"] = sellout_df["Distributor_Code"].astype("str")
sellout_df["YearMonth"] = sellout_df["YearMonth"].astype("str")

In [8]:
# Read product code and related brandstage, brand information
product_brand_columns = ["Product_Code", "Product_Short_NameEN"]

input_file_path = config["project_path"] + "/" + config["data_folder_path"] + "/" + config["product_file"]

product_brand_input_df = pd.read_csv(input_file_path, header=0, dtype=str)[product_brand_columns]

product_brand_output_df = product_brand_input_df.drop_duplicates().reset_index(drop=True)

product_brand_output_df.columns = ["Product_Code", "Brand_Stage"]

product_brand_output_df = product_brand_output_df.dropna()

product_brand_output_df["Brand"] = product_brand_output_df["Brand_Stage"].str.slice(0, 2)

# Add sepcial product code for brands and all brands
product_brand_output_df = product_brand_output_df.append(
                        [{"Product_Code":"10332781", "Brand_Stage": "NC", "Brand": "NC"}], ignore_index=True)

product_brand_output_df = product_brand_output_df.append(
                        [{"Product_Code":"10332872", "Brand_Stage": "AC", "Brand": "AC"}], ignore_index=True)

product_brand_output_df = product_brand_output_df.append(
                        [{"Product_Code":"10332873", "Brand_Stage": "AP", "Brand": "AP"}], ignore_index=True)

product_brand_output_df = product_brand_output_df.append(
                        [{"Product_Code":"10332778", "Brand_Stage": "ELN", "Brand": "ELN"}], ignore_index=True)

In [9]:
sellout_with_brand = pd.merge(sellout_df, product_brand_output_df, on="Product_Code", how="left")
sellout_with_brand = sellout_with_brand[sellout_with_brand.Brand.isin(["AC", "NC", "AP"])]

In [10]:
sellout_with_brand_agg = sellout_with_brand.groupby(by = ['YearMonth', 'Brand'])['Sellout_SP_Value'].sum().reset_index()
sellout_with_brand_agg.columns = ['YearMonth', 'Brand', 'sellout_by_brand']

In [11]:
sellout_monthly = sellout_with_brand.groupby(by = ['YearMonth'])['Sellout_SP_Value'].sum().reset_index()
sellout_monthly.columns = ['YearMonth', 'sellout_all']

In [12]:
sellout_split = pd.merge(sellout_monthly, sellout_with_brand_agg, on=["YearMonth"], how="left")

In [13]:
sellout_split["split_ratio"] = sellout_split["sellout_by_brand"] / sellout_split["sellout_all"]

In [14]:
sellout_split.to_excel("sellout_all_by_brand_split.xlsx")

In [15]:
ac_sellout = sellout_split[sellout_split["Brand"] == "AC"][["YearMonth", "split_ratio"]]
ac_sellout.columns = ["YearMonth", "AC_ratio"]

ap_sellout = sellout_split[sellout_split["Brand"] == "AP"][["YearMonth", "split_ratio"]]
ap_sellout.columns = ["YearMonth", "AP_ratio"]

an_sellout = pd.merge(ac_sellout, ap_sellout, on="YearMonth", how="outer")
an_sellout = an_sellout.fillna(0)

an_sellout["AN_ratio"] = an_sellout["AC_ratio"] + an_sellout["AP_ratio"]
an_sellout=an_sellout[["YearMonth", "AN_ratio"]]

In [16]:
input_file_path = config["project_path"] + "/" \
                        + config["data_folder_path"] + "/" \
                        + "Natioanl all investment.xlsx"

input_cost_df = pd.read_excel(input_file_path).fillna({"Spending_value": 0})

In [17]:
input_cost_df["YearMonth"] = input_cost_df['Date'].apply(lambda x : x.strftime("%Y%m"))

In [18]:
an_cost_df = pd.merge(input_cost_df, an_sellout, on="YearMonth", how ="left")

In [19]:
an_cost_df["an_value"] = an_cost_df["Spending_value"] * an_cost_df["AN_ratio"] 

In [20]:
an_cost_df["Brand"] = "Aptamil"
an_cost_df["SKU"] = "AN"
an_cost_df = an_cost_df[an_cost_df["YearMonth"] < "202001"]

In [21]:
an_cost_df = an_cost_df[['Date', 'Province', 'Cost_type_1', 'Cost_type_2', 'Cost_type_3', \
       'Cost_type_4', 'AIP', 'Brand', 'SKU', 'an_value']]

an_cost_df.columns=['Date', 'Province', 'Cost_type_1', 'Cost_type_2', 'Cost_type_3', \
       'Cost_type_4', 'AIP', 'Brand', 'SKU', 'Spending_Value']

In [23]:
an_cost_df.to_excel("../temp/2018 2019 Natioanl all investment workbook.xlsx", index=False)