In [None]:
import pandas as pd
import operator
import time
import numpy as np

In [None]:
config = {}

config["project_path"] = ".."
config["data_folder_path"] = "regional_data"
config["temp_folder_path"] = "regional_temp"

# input files
#config["sellout_input_files"] = 'DW_Fact_Sellout_NoneSplit_202004-05.csv'
config["sellout_input_files"] = 'DW_Fact_Sellout_NoneSplit_202006.csv'
config["product_file"] = "DW_DIM_Product_20200609.csv"
config["customer_input_file"] = ["2020 MDM.xlsx"]
config["grade_channel_file"] = "grade_channel.xlsx"
config["customer_acct_file"] = "customer_code_account_name.xlsx"


# output files
config["mtd_out_file"] = "20200623_MTD_KA_sellout.xlsx"

In [None]:
# schema of input file
sellout_columns = ['YearMonth', 'Data_Source', 'Bill_Date', 'Bill_Type', 
                   'Distributor_Code_DMS', 'Distributor_Code', 'Client_Code', 
                   'Client_Name', 'EA_Platform', 'Ship_To_Code', 
                   'Inventory_location', 'Customer_Code', 'Product_Code', 
                   'Sellout_Qty', 'Sellout_SP_Value', 'Sellout_Channel_Value', 
                   'ETL_DateTime', 'Data_CreationTime']

# Only below columns from input file are needed
select_columns = ['YearMonth','Distributor_Code','Customer_Code',
                  'Product_Code','Sellout_SP_Value']
sellout_df = pd.DataFrame(columns=select_columns)

In [None]:
input_file_path = config["project_path"] + "/" \
                    + config["data_folder_path"] + "/" \
                    + config["sellout_input_files"]
input_df = pd.read_csv(input_file_path, header = None, dtype=str).fillna('0')
input_df.columns = sellout_columns

In [None]:
#sellout_df = input_df.loc[(input_df['YearMonth'] == '202005')].copy()

In [None]:
sellout_df = input_df.loc[(input_df['YearMonth'] == '202006') & (input_df['Bill_Date'] <= '20200623')].copy()

In [None]:
def remove_starting_zero(customer_code):
    if customer_code.startswith("0"):
        return customer_code[1:]
    return customer_code

In [None]:
sellout_df["Sellout_SP_Value"] = sellout_df["Sellout_SP_Value"].astype("float")
sellout_df["Product_Code"] = sellout_df["Product_Code"].astype("str")
sellout_df["Customer_Code"] = sellout_df["Customer_Code"].astype("str")
sellout_df["Distributor_Code"] = sellout_df["Distributor_Code"].astype("str")
sellout_df["YearMonth"] = sellout_df["YearMonth"].astype("str")

sellout_df["Customer_Code"] = sellout_df['Customer_Code'].apply(lambda x:remove_starting_zero(x))

# Read product code and related brandstage, brand information

In [None]:

product_brand_columns = ["Product_Code", "Product_Short_NameEN"]

input_file_path = config["project_path"] + "/" + config["data_folder_path"] + "/" + config["product_file"]

product_brand_input_df = pd.read_csv(input_file_path, header=0, dtype=str)[product_brand_columns]

product_brand_output_df = product_brand_input_df.drop_duplicates().reset_index(drop=True)

product_brand_output_df.columns = ["Product_Code", "Brand_Stage"]

product_brand_output_df = product_brand_output_df.dropna()

product_brand_output_df["Brand"] = product_brand_output_df["Brand_Stage"].str.slice(0, 2)

# Add sepcial product code for brands and all brands
product_brand_output_df = product_brand_output_df.append(
                        [{"Product_Code":"10332781", "Brand_Stage": "NC", "Brand": "NC"}], ignore_index=True)

product_brand_output_df = product_brand_output_df.append(
                        [{"Product_Code":"10332872", "Brand_Stage": "AC", "Brand": "AC"}], ignore_index=True)

product_brand_output_df = product_brand_output_df.append(
                        [{"Product_Code":"10332873", "Brand_Stage": "AP", "Brand": "AP"}], ignore_index=True)

product_brand_output_df = product_brand_output_df.append(
                        [{"Product_Code":"10332778", "Brand_Stage": "ELN", "Brand": "ELN"}], ignore_index=True)

In [None]:
sellout_with_brand = pd.merge(sellout_df, product_brand_output_df, on="Product_Code", how="left")
sellout_with_brand = sellout_with_brand[sellout_with_brand.Brand.isin(["AC", "NC", "AP"])]

In [None]:
sellout_with_brand_agg = sellout_with_brand.groupby(by = ['YearMonth','Distributor_Code','Customer_Code','Brand', 'Brand_Stage'])['Sellout_SP_Value'].sum().reset_index()

# Read Customer information

In [None]:
# Read customer information

select_columns = ['YearMonth','Customer_Code','Grade_Code',
                  'region_name','subregion_name', 'province_name']

customer_input_df = pd.DataFrame(columns=select_columns)

In [None]:
for customer_input_file in config["customer_input_file"]:
    input_file_path = config["project_path"] + "/" \
                        + config["data_folder_path"] + "/" \
                        + customer_input_file
    input_df = pd.read_excel(input_file_path, dtype=str)[select_columns]
    customer_input_df = customer_input_df.append(input_df)

In [None]:
customer_df = customer_input_df

customer_df.columns = ['YearMonth', 'Customer_Code', 'Grade', 'Region_Name', 'Subregion_Name', 'Province_Name']

In [None]:
sellout_with_cust = pd.merge(sellout_with_brand_agg, customer_df, on=['YearMonth', 'Customer_Code'], how='left')

In [None]:
input_file_path = config["project_path"] + "/" \
                    + config["data_folder_path"] + "/" \
                    + config["grade_channel_file"] 

grade_df = pd.read_excel(input_file_path, dtype=str)

In [None]:
sellout_with_grade = pd.merge(sellout_with_cust, grade_df, on=['Grade'], how='left')

sellout_with_grade = sellout_with_grade.fillna({'Channel': 'Unassigned'})

In [None]:
sellout_with_grade[sellout_with_grade['Channel'] =='Unassigned']

In [None]:
input_file_path = config["project_path"] + "/" \
                    + config["data_folder_path"] + "/" \
                    + config["customer_acct_file"] 

acct_df = pd.read_excel(input_file_path, dtype=str)
acct_df['Customer_Code'] = acct_df['Customer_Code'].apply(lambda x:remove_starting_zero(x))

In [None]:
sellout_with_acct = pd.merge(sellout_with_grade, acct_df, on=['Customer_Code'], how='left')

sellout_with_acct = sellout_with_acct.fillna({'Account_Name': 'Unassigned'})

In [None]:
sellout_with_acct[sellout_with_acct['Account_Name'] =='Unassigned']

# GT-DC Logic

In [None]:
def change_region_name(row):
    if row["Channel"] == "GT-DC":
        return "GT-DC"
    return row["Region_Name"]

sellout_with_acct["Region_Name"] = sellout_with_acct.apply(change_region_name, axis=1)


def change_subregion_name(row):
    if row["Channel"] == "GT-DC":
        return "GT-DC"
    return row["Subregion_Name"]

sellout_with_acct["Subregion_Name"] = sellout_with_acct.apply(change_subregion_name, axis=1)


def change_province_Name(row):
    if row["Channel"] == "GT-DC":
        return "GT-DC"
    return row["Province_Name"]

sellout_with_acct["Province_Name"] = sellout_with_acct.apply(change_province_Name, axis=1)

# National

In [None]:
mtd_columns = ['Brand', 'Geo_Name', 'Region_Name', 'Channel', 'Grade', 'Account_Name', 'Sellout_SP_Value']
mtd_df = pd.DataFrame(columns=mtd_columns)

In [None]:
sellout_temp = sellout_with_acct.groupby(by = ['Brand', 'Channel', 'Grade', 'Account_Name'])['Sellout_SP_Value'].sum().reset_index()
sellout_temp['Geo_Name'] = '全国'
sellout_temp['Region_Name'] = '全国'
mtd_df = mtd_df.append(sellout_temp)

sellout_temp = sellout_with_acct.groupby(by = ['Channel', 'Grade', 'Account_Name'])['Sellout_SP_Value'].sum().reset_index()
sellout_temp['Brand'] = 'TTL'
sellout_temp['Geo_Name'] = '全国'
sellout_temp['Region_Name'] = '全国'
mtd_df = mtd_df.append(sellout_temp)

sellout_temp = sellout_with_acct.groupby(by = ['Brand', 'Channel', 'Grade'])['Sellout_SP_Value'].sum().reset_index()
sellout_temp['Geo_Name'] = '全国'
sellout_temp['Region_Name'] = '全国'
sellout_temp['Account_Name'] = sellout_temp['Grade']
mtd_df = mtd_df.append(sellout_temp)

sellout_temp = sellout_with_acct.groupby(by = ['Channel', 'Grade'])['Sellout_SP_Value'].sum().reset_index()
sellout_temp['Brand'] = 'TTL'
sellout_temp['Geo_Name'] = '全国'
sellout_temp['Region_Name'] = '全国'
sellout_temp['Account_Name'] = sellout_temp['Grade']
mtd_df = mtd_df.append(sellout_temp)


sellout_temp = sellout_with_acct.groupby(by = ['Brand', 'Region_Name', 'Channel', 'Grade', 'Account_Name'])['Sellout_SP_Value'].sum().reset_index()
sellout_temp['Geo_Name'] = sellout_temp['Region_Name']
mtd_df = mtd_df.append(sellout_temp)

sellout_temp = sellout_with_acct.groupby(by = ['Region_Name', 'Channel', 'Grade', 'Account_Name'])['Sellout_SP_Value'].sum().reset_index()
sellout_temp['Brand'] = 'TTL'
sellout_temp['Geo_Name'] = sellout_temp['Region_Name']
mtd_df = mtd_df.append(sellout_temp)

sellout_temp = sellout_with_acct.groupby(by = ['Brand', 'Region_Name', 'Channel', 'Grade'])['Sellout_SP_Value'].sum().reset_index()
sellout_temp['Geo_Name'] = sellout_temp['Region_Name']
sellout_temp['Account_Name'] = sellout_temp['Grade']
mtd_df = mtd_df.append(sellout_temp)

sellout_temp = sellout_with_acct.groupby(by = ['Region_Name', 'Channel', 'Grade'])['Sellout_SP_Value'].sum().reset_index()
sellout_temp['Brand'] = 'TTL'
sellout_temp['Geo_Name'] = sellout_temp['Region_Name']
sellout_temp['Account_Name'] = sellout_temp['Grade']
mtd_df = mtd_df.append(sellout_temp)

In [None]:
output_file_path = config["project_path"] \
                        + "/" + config["temp_folder_path"] \
                        + "/" + config["mtd_out_file"]

mtd_df.to_excel(output_file_path, header=True, index=False)