In [1]:
import math
import warnings
import pandas as pd

In [2]:
def get_deliver_weekday(order_day, lead_time):
    if (order_day + lead_time) > 7:
        return (order_day + lead_time) % 7
    return (order_day + lead_time)


def get_week_shift(order_day, lead_time):
    if (order_day + lead_time) > 7:
        return math.floor((order_day + lead_time) / 7)
    return 0


def get_conholding(ds_supplier_code):
    if ds_supplier_code == "06WC" or ds_supplier_code == "1GTB":
        return "002"
    if ds_supplier_code == "ZB09":
        return "693"
    if ds_supplier_code == "0031" or ds_supplier_code == "N389":
        return "700"
    return ""


def get_qty_per_unit(row):

    if row['order_by'] == 'S':
        return int(row['qty_per_pack'])
    
    return int(row['qty_per_pack']) * int(row['pack_per_box'])      

def get_store_to_dc_day(row):
    if row['risk_item_unilever'] == 'Y':
        return int(row["lead_time"]) - 3
    
    return int(row["lead_time"]) - 2

# Read Excel

In [3]:
warnings.filterwarnings('ignore')

excel_input = pd.read_excel('East Parameter 20190820.xlsx', 'Detail', header=0, dtype=str).fillna("")

# Get store item list

In [4]:
store_items = excel_input[["Store", "Dept", "Item code", "sub code", "CN Name", "EN Name", "Store Status",
                   "Main Supplier", "DS Supplier", "Repl type", "Order day", "LT" ,"Min", "Order by",
                   "Qty/Pack", "Pack/Box", "DC Status", "Rotation", "Seasonal", "Risk Item (Unilever)"]].drop_duplicates().reset_index(drop=True)

In [5]:
store_items.columns = ["store_code", "dept_code", "item_code", "sub_code", "cn_name", "en_name", "store_status",
                   "dc_supplier_code", "ds_supplier_code", "repl_type", "order_day", "lead_time", "min_stock", "order_by",
                   "qty_per_pack", "pack_per_box", "dc_status", "rotation", "seasonal", "risk_item_unilever"]

In [6]:
store_items["store_code"] = store_items["store_code"].str[0:3]

In [7]:
store_items['qty_per_unit'] = store_items.apply(get_qty_per_unit, axis = 1)

In [8]:
store_items["con_holding"] = store_items.apply(
    lambda r: get_conholding(r.ds_supplier_code), axis=1)

# Get order delivery mapping for on stock

In [9]:
order_frequency = store_items[["store_code", "dept_code", "order_day", "lead_time", "rotation"]]

In [10]:
order_frequency = order_frequency[order_frequency["rotation"] != "X"].drop_duplicates().reset_index(drop=True)

In [11]:
order_frequency_mapping = []

for index, freq in order_frequency.iterrows():
    if freq.order_day.find('1') >= 0:
        order_frequency_mapping.append([freq.store_code, freq.dept_code, 
                                        freq.rotation, freq.lead_time, freq.order_day, "1" , "Mon"])
    if freq.order_day.find('2') >= 0:
        order_frequency_mapping.append([freq.store_code, freq.dept_code, 
                                        freq.rotation, freq.lead_time, freq.order_day, "2" , "Tue"])
    if freq.order_day.find('3') >= 0:
        order_frequency_mapping.append([freq.store_code, freq.dept_code, 
                                        freq.rotation, freq.lead_time, freq.order_day, "3" , "Wed"])
    if freq.order_day.find('4') >= 0:
        order_frequency_mapping.append([freq.store_code, freq.dept_code, 
                                        freq.rotation, freq.lead_time, freq.order_day, "4" , "Thu"])
    if freq.order_day.find('5') >= 0:
        order_frequency_mapping.append([freq.store_code, freq.dept_code, 
                                        freq.rotation, freq.lead_time, freq.order_day, "5", "Fri"])
    if freq.order_day.find('6') >= 0:
        order_frequency_mapping.append([freq.store_code, freq.dept_code, 
                                        freq.rotation, freq.lead_time, freq.order_day, "6", "Sat"])
    if freq.order_day.find('7') >= 0:
        order_frequency_mapping.append([freq.store_code, freq.dept_code, 
                                        freq.rotation, freq.lead_time, freq.order_day, "7", "Sun"])

In [12]:
order_days_mapping = pd.DataFrame(order_frequency_mapping)

In [13]:
order_days_mapping.columns = ["store_code", "dept_code", "rotation", "lead_time", "order_days", 
                              "order_iso_weekday", "order_weekday_short"]

In [14]:
order_days_mapping["delivery_iso_weekday"] = order_days_mapping.apply(
    lambda r: get_deliver_weekday(int(r.order_iso_weekday), int(r.lead_time)), axis=1)

order_days_mapping["week_shift"] = order_days_mapping.apply(lambda r: get_week_shift(int(r.order_iso_weekday), int(r.lead_time)), axis=1)

# Get order delivery mapping for cross docking

In [15]:
order_x_frequency = store_items[["store_code", "dept_code", "item_code", "sub_code", 
                                 "order_day", "lead_time", "rotation", "risk_item_unilever"]]

In [16]:
order_x_frequency = order_x_frequency[order_x_frequency["rotation"] == "X"].drop_duplicates().reset_index(drop=True)

In [17]:
order_x_frequency["dc_to_store_time"] = order_x_frequency.apply(get_store_to_dc_day, axis = 1)

In [18]:
order_x_frequency_mapping = []

for index, freq in order_x_frequency.iterrows():
    if freq.order_day.find('1') >= 0:
        order_x_frequency_mapping.append([freq.store_code, freq.dept_code, freq.item_code, freq.sub_code,
                                        freq.lead_time, freq.dc_to_store_time, freq.order_day, "1" , "Mon"])
    if freq.order_day.find('2') >= 0:
        order_x_frequency_mapping.append([freq.store_code, freq.dept_code, freq.item_code, freq.sub_code,
                                        freq.lead_time, freq.dc_to_store_time, freq.order_day, "2" , "Tue"])
    if freq.order_day.find('3') >= 0:
        order_x_frequency_mapping.append([freq.store_code, freq.dept_code, freq.item_code, freq.sub_code,
                                        freq.lead_time, freq.dc_to_store_time, freq.order_day, "3" , "Wed"])
    if freq.order_day.find('4') >= 0:
        order_x_frequency_mapping.append([freq.store_code, freq.dept_code, freq.item_code, freq.sub_code,
                                        freq.lead_time, freq.dc_to_store_time, freq.order_day, "4" , "Thu"])
    if freq.order_day.find('5') >= 0:
        order_x_frequency_mapping.append([freq.store_code, freq.dept_code, freq.item_code, freq.sub_code,
                                        freq.lead_time, freq.dc_to_store_time, freq.order_day, "5", "Fri"])
    if freq.order_day.find('6') >= 0:
        order_x_frequency_mapping.append([freq.store_code, freq.dept_code, freq.item_code, freq.sub_code,
                                        freq.lead_time, freq.dc_to_store_time, freq.order_day, "6", "Sat"])
    if freq.order_day.find('7') >= 0:
        order_x_frequency_mapping.append([freq.store_code, freq.dept_code, freq.item_code, freq.sub_code,
                                        freq.lead_time, freq.dc_to_store_time, freq.order_day, "7", "Sun"])

In [19]:
xdock_order_mapping = pd.DataFrame(order_x_frequency_mapping)

In [20]:
xdock_order_mapping.columns = ["store_code", "dept_code", "item_code", "sub_code", "lead_time", 
                               "dc_to_store_time", "order_days", "order_iso_weekday", "order_weekday_short"]

In [21]:
store_items.head()

Unnamed: 0,store_code,dept_code,item_code,sub_code,cn_name,en_name,store_status,dc_supplier_code,ds_supplier_code,repl_type,...,min_stock,order_by,qty_per_pack,pack_per_box,dc_status,rotation,seasonal,risk_item_unilever,qty_per_unit,con_holding
0,101,12,300859,1,Olay美肌清爽沐浴露亮爽珍珠200毫升,Lipton Peach Mango Black S10 24X10X1.8g,Active,KXB1,ZB09,3,...,2,B,24,1,Active,X,No,N,24,693
1,101,12,300863,1,Olay美肌清爽沐浴露亮爽珍珠360ml,,Active,KXB1,ZB09,1,...,2,B,12,1,Stop,X,No,N,12,693
2,101,12,300907,1,Olay美肌清爽沐浴露亮爽珍珠900毫升,,Active,KXB1,ZB09,3,...,2,B,6,1,Active,X,No,N,6,693
3,101,12,300923,1,Olay美肌清爽沐浴露舒缓清爽200毫升,,Stop,KXB1,ZB09,3,...,2,B,24,1,Stop,X,No,N,24,693
4,101,12,300983,1,Olay美肌清爽沐浴露舒缓清爽360毫升,,Stop,KXB1,ZB09,1,...,2,B,12,1,Stop,X,No,N,12,693


In [22]:
order_days_mapping.head()

Unnamed: 0,store_code,dept_code,rotation,lead_time,order_days,order_iso_weekday,order_weekday_short,delivery_iso_weekday,week_shift
0,101,12,A,1,246,2,Tue,3,0
1,101,12,A,1,246,4,Thu,5,0
2,101,12,A,1,246,6,Sat,7,0
3,101,12,B,2,25,2,Tue,4,0
4,101,12,B,2,25,5,Fri,7,0


In [23]:
xdock_order_mapping.head()

Unnamed: 0,store_code,dept_code,item_code,sub_code,lead_time,dc_to_store_time,order_days,order_iso_weekday,order_weekday_short
0,101,12,300859,1,3,1,14,1,Mon
1,101,12,300859,1,3,1,14,4,Thu
2,101,12,300863,1,3,1,14,1,Mon
3,101,12,300863,1,3,1,14,4,Thu
4,101,12,300907,1,3,1,14,1,Mon


In [24]:
from load_spark import load_spark
from pyspark.sql import HiveContext

sc = load_spark("Load store parameter")

sqlc = HiveContext(sc)

In [25]:
store_items_df = sqlc.createDataFrame(store_items)

store_items_df = store_items_df.withColumn("qty_per_unit", store_items_df["qty_per_unit"].cast("Int"))

store_items_df = store_items_df.withColumn("pack_per_box", store_items_df["pack_per_box"].cast("Int"))

store_items_df = store_items_df.withColumn("qty_per_pack", store_items_df["qty_per_pack"].cast("Int"))

store_items_df = store_items_df.withColumn("min_stock", store_items_df["min_stock"].cast("Int"))

store_items_df = store_items_df.withColumn("lead_time", store_items_df["lead_time"].cast("Int"))

store_items_df.write.mode("overwrite").saveAsTable("vartefact.forecast_store_item_details")

In [26]:
order_days_mapping_df = sqlc.createDataFrame(order_days_mapping)
order_days_mapping_df.write.mode("overwrite").saveAsTable("vartefact.forecast_onstock_order_delivery_mapping")

In [27]:
xdock_order_mapping_df = sqlc.createDataFrame(xdock_order_mapping)
xdock_order_mapping_df.write.mode("overwrite").saveAsTable("vartefact.forecast_xdock_order_mapping")

In [28]:
sc.stop()