In [86]:
import json
import numpy as np
import pandas as pd
import lxml

# Import SAP data from 15 December 2021

In [87]:
sap_data=pd.read_csv('data/SAP_15Dec21.csv', low_memory=False)
sap_data['BaseDate'] = pd.to_datetime(sap_data['BaseDate'], format='%Y-%m-%d')

In [88]:
sap_data.rename(columns={'Material Description': 'Material_Description'}, inplace=True)

In [89]:
sap_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 918658 entries, 0 to 918657
Data columns (total 20 columns):
 #   Column                Non-Null Count   Dtype         
---  ------                --------------   -----         
 0   Unnamed: 0            918658 non-null  int64         
 1   ActivityType          918658 non-null  object        
 2   Order Number          918658 non-null  object        
 3   Shipment N            918658 non-null  object        
 4   Delivery N            918658 non-null  object        
 5   DocumentNo            918658 non-null  object        
 6   Plant_Type            918658 non-null  object        
 7   Plant_WAREHOUSE_NAME  918658 non-null  object        
 8   Customer N            918658 non-null  object        
 9   Customer Name         918658 non-null  object        
 10  Billing Do            918658 non-null  object        
 11  Bill I                914493 non-null  object        
 12  Description           918658 non-null  object        
 13 

In [90]:
len(sap_data)

918658

In [91]:
len(sap_data['Customer N'].unique())

25440

In [92]:
mask = ((sap_data['BaseHour'] == 12) & (sap_data['ActivityType'] == '3DESPATCH'))
temp1 = sap_data[mask]

# Import warehouse lookup file

In [93]:
warehouses = pd.read_excel('./data/warehouses/IPT3_Warehouses-Warehouses Validated.xlsx', sheet_name='IPT3 - Site_WH_Location')
warehouses.drop(columns={'Plant Type', 'Finance Cost Centre'}, inplace=True, axis=1)

# Import D365 Products

In [94]:
items = pd.read_excel('./data/products/IPT Released Products Export-Released products V2..xlsx')

In [95]:
items = items[items['Lifecycle status '] == 'Active'].copy()
items.drop(columns={'Lifecycle status '}, inplace=True, axis=1)

In [96]:
sap_items = sap_data[['Material No', 'Material_Description']].drop_duplicates()
sap_items = sap_items.dropna().copy()

In [97]:
items = pd.merge(
    items,
    sap_items,
    how="inner",
    on=None,
    left_on='Material_Description',
    right_on='Material_Description',
    left_index=False,
    right_index=False,
    sort=True,
    suffixes=("_D365", "_SAP"),
    copy=True,
    indicator=False,
    validate=None,
).copy()

In [98]:
items

Unnamed: 0,D365_ItemNo,Material_Description,Material No
0,CB0200017,1250ML CFB UNSORTED,98181
1,CN0200043,2000ML CRATE(06)RIM,98201
2,CV0200043,2000ML CRATE(06)RIM,98201
3,CB0200019,300ML CFB UNSORTED (NEW),98183
4,CN0200048,500ML CRATE - RIM LOADED,98206
...,...,...,...
376,7540,VALPRE STILL 01X12 750 NRG,C7540
377,7512,VALPRE STILL 01X24 330 PET,C7512
378,7587,VALPRE STILL 01X24 350 NRG,C7587
379,7950,VALPRE STILL 04X06 500 PET,C7950


# Customers
- Use the Bloem customers list that Andre provided.
- Correlate with CE customers that are Outlets. Use CE_IPT_SoftDrinkOutlets_20200104.xlsx for this.
- Include only direct customers.  Get this from the customer master.
- Inner join this with the Bloem customers to get a list of Bloem customers (that we know will work), that also meet the criteria above.
- Do a random match for each valid SAP_15Dec21 customer to a Bloem customer.  There will be more than one SAP customer that will be mapped to a Bloem customer.
- We do not use Mode of Delivery from the customer master.
-'Source Channel' field to be defaulted to 'Voice In'
-'Order Category' field to be defaulted to 'Sales Local'

### Import the D365 customer master

In [99]:
#customers_ipt2=pd.read_csv('data/customer_master.csv', low_memory=False)
customers = pd.read_csv('./data/customers/IPT Customer Export-Customers V3 07082023.csv')

In [100]:
customersV3 = pd.read_csv('./data/customers/2023-08-10_CustomersV3.csv', low_memory=False)

In [101]:
customersV3 = customersV3[['CUSTOMERACCOUNT','ORGANIZATIONNAME']].copy()

In [102]:
customers = pd.merge(
    customers,
    customersV3,
    how="inner",
    on=None,
    left_on='CUSTOMERACCOUNT',
    right_on='CUSTOMERACCOUNT',
    left_index=False,
    right_index=False,
    sort=True,
    suffixes=("_x", "_y"),
    copy=True,
    indicator=False,
    validate=None,
).copy()

In [103]:
# customers_short is just a copy of customers, without most of the columns
customers_short = customers[['ADDRESSZIPCODE','CUSTOMERACCOUNT','ORGANIZATIONNAME']].copy()

In [104]:
customers_short.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 144878 entries, 0 to 144877
Data columns (total 3 columns):
 #   Column            Non-Null Count   Dtype 
---  ------            --------------   ----- 
 0   ADDRESSZIPCODE    144878 non-null  int64 
 1   CUSTOMERACCOUNT   144878 non-null  int64 
 2   ORGANIZATIONNAME  144878 non-null  object
dtypes: int64(2), object(1)
memory usage: 4.4+ MB


### Fix the address zip code for later use

In [105]:
customers_short['ADDRESSZIPCODE'] = customers_short['ADDRESSZIPCODE'].fillna(0)
customers_short['ADDRESSZIPCODE'] = customers_short['ADDRESSZIPCODE'].astype(int)
customers_short['ADDRESSZIPCODE'] = customers_short['ADDRESSZIPCODE'].astype(str)

In [106]:
customers_short.to_csv("./data/customers/customers_short.csv", index=False)

### Now import CE customers (provided by Gary), and join that to get the Order Placement Rule

In [107]:
#outlet_customers = pd.read_excel('data/PilotAccounts.xlsx')
outlet_customers = pd.read_csv('./data/customers/SoftDrinks.csv')

In [108]:
outlet_customers.drop(columns={'Legal Entity', 'Account Name'}, inplace=True, axis=1)

In [109]:
customers_short.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 144878 entries, 0 to 144877
Data columns (total 3 columns):
 #   Column            Non-Null Count   Dtype 
---  ------            --------------   ----- 
 0   ADDRESSZIPCODE    144878 non-null  object
 1   CUSTOMERACCOUNT   144878 non-null  int64 
 2   ORGANIZATIONNAME  144878 non-null  object
dtypes: int64(1), object(2)
memory usage: 4.4+ MB


In [110]:
customers_short.rename(columns={'ORGANIZATIONNAME': 'D365_Account_Name', 'CUSTOMERACCOUNT': 'Account Number'}, inplace=True)

In [111]:
outlet_customers.dropna(subset = ['CIC Order Placement Rule'], inplace=True)

In [112]:
outlet_customers['CIC Order Placement Rule'].unique()

array(['B2B', 'Voice', 'Portal', 'Digital', 'Voice; Digital',
       'Voice; Digital; Portal'], dtype=object)

In [113]:
pilot_customers = pd.merge(
    outlet_customers,
    customers_short,
    how="inner",
    on=None,
    left_on='Account Number',
    right_on='Account Number',
    left_index=False,
    right_index=False,
    sort=True,
    suffixes=("_x", "_y"),
    copy=True,
    indicator=False,
    validate=None,
)

In [114]:
pilot_customers.drop(columns={'ADDRESSZIPCODE'}, inplace=True, axis=1)

### Change made on 9 Jan 2023

Change the “Source Channel” based on the following rules 
- Set to B2B if the “Bill I” column in Tommys data is B2B 
- Set to SFA if the “Bill I” column in Tommys data is HHT 
- Set to Voice in for all other rows 

The B2B order lines can only be linked to a B2B customer and the SFA and Voice In order lines should be linked to non B2B customers. 
- Gary has provided a new list of pilot accounts, with the Place Rule Columns (B2B or Blank).  This list also excludes customers with very low credit limits, which has cause submission holds on some on the FnO Prep Data.
- These changes only need to be made for the 1ORDERCREATION order lines/files 
- This will mean we can split the order creation between the 3 different channels/test cases 

In [115]:
sap_data['Source Channel'] = 'Voice in'

In [116]:
mask = ((sap_data['Bill I'] == 'B2B') & (sap_data['ActivityType'] == '1ORDERCREATION'))
sap_data['Source Channel'].mask(mask,'B2B',inplace=True)
mask = ((sap_data['Bill I'] == 'HHT') & (sap_data['ActivityType'] == '1ORDERCREATION'))
sap_data['Source Channel'].mask(mask,'SFA',inplace=True)

In [117]:
sap_data['Order Category'] = 'Sales Local'

### This section creates a list of customers, padding the SAP customers with a repeating list of allowable D365 customers
- For B2B customers, make sure they are aligned with those SAP customers for 1ORDERCREATION activity type
- First map the D365 B2B customers (CIC Order Placement Rule == B2B) to the unique list of SAP customers where Activity Type == 1ORDERCREATION
- Then extract the list of unique SAP customers for all lines where Activity Type != 1ORDERCREATION, remove those that have already been mapped, and map the rest to D365 customers that are non-B2B

In [118]:
# Extract a list of unique customers from the SAP data, where 'Source Channel' == 'B2B'
mask = (sap_data['Source Channel'] == 'B2B') 
try1 = sap_data[mask]
unique_SAP_customers_np = try1['Customer Name'].unique()
unique_SAP_customers1 = pd.DataFrame(unique_SAP_customers_np)

In [119]:
unique_SAP_customers1.rename(columns={0: 'SAP_Cust_Name'}, inplace=True)

In [120]:
# Extract  list of allowable pilot customers, where CIC Order Placement Rule = B2B
mask = (pilot_customers['CIC Order Placement Rule'] == 'B2B') 
try1 = pilot_customers[mask]
allowable_pilot_customers_np = try1['Account Number'].unique()
allowable_pilot_customers = pd.DataFrame(allowable_pilot_customers_np)

In [121]:
repeats = int(len(unique_SAP_customers_np)/len(allowable_pilot_customers_np) + 1)

In [122]:
new_array = allowable_pilot_customers_np.repeat(repeats)

In [123]:
trim_rows = len(new_array) - len(unique_SAP_customers_np)

In [124]:
#a[:-n, :]
trimmed_array = new_array[:-trim_rows :]

In [125]:
unique_SAP_customers1['D365_Cust_No'] = trimmed_array.tolist()

In [126]:
# Repeat the process, but now with the rest of the activity types, mapping to the remainder of the D365 customers

# Extract a list of unique customers from the SAP data, where 'Source Channel' != 'B2B'
mask = (sap_data['Source Channel'] != 'B2B')
try1 = sap_data[mask]
unique_SAP_customers_np = try1['Customer Name'].unique()
unique_SAP_customers2 = pd.DataFrame(unique_SAP_customers_np)

unique_SAP_customers2.rename(columns={0: 'SAP_Cust_Name'}, inplace=True)

In [127]:
# Now exclude those SAP customers that have already been mapped to a D365 customer.  To do that, join the two dataframes
unique_SAP_customers2 = pd.merge(
    unique_SAP_customers2,
    unique_SAP_customers1,
    how="left",
    on=None,
    left_on='SAP_Cust_Name',
    right_on='SAP_Cust_Name',
    left_index=False,
    right_index=False,
    sort=True,
    suffixes=("_x", "_y"),
    copy=True,
    indicator=False,
    validate=None,
)

In [128]:
mask = (unique_SAP_customers2['D365_Cust_No'].isna())
unique_SAP_customers2 = unique_SAP_customers2[mask]

In [129]:
# Extract  list of allowable pilot customers, where CIC Order Placement Rule != B2B
mask = (pilot_customers['CIC Order Placement Rule'] != 'B2B') 
try1 = pilot_customers[mask]
allowable_pilot_customers_np = try1['Account Number'].unique()
allowable_pilot_customers2 = pd.DataFrame(allowable_pilot_customers_np)

repeats = int(len(unique_SAP_customers2)/len(allowable_pilot_customers_np) + 1)
new_array = allowable_pilot_customers_np.repeat(repeats)
trim_rows = len(new_array) - len(unique_SAP_customers2)
trimmed_array = new_array[:-trim_rows :]
unique_SAP_customers2['D365_Cust_No'] = trimmed_array.tolist()

In [130]:
# Concatenate unique_SAP_customers1 and unique_SAP_customers2
unique_SAP_customers = pd.concat([unique_SAP_customers1, unique_SAP_customers2], ignore_index=True)

In [131]:
len(sap_data['Customer Name'].unique())

23687

In [132]:
# Merge to pull in the rest of the columns in "pilot_customers"
unique_SAP_customers = pd.merge(
    unique_SAP_customers,
    pilot_customers,
    how="inner",
    on=None,
    left_on='D365_Cust_No',
    right_on='Account Number',
    left_index=False,
    right_index=False,
    sort=True,
    suffixes=("_x", "_y"),
    copy=True,
    indicator=False,
    validate=None,
)

In [133]:
unique_SAP_customers.drop(columns={'Account Number'}, inplace=True, axis=1)

In [134]:
unique_SAP_customers['CIC Order Placement Rule'].unique()

array(['Voice', 'B2B', 'Digital', 'Portal'], dtype=object)

In [135]:
items.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 381 entries, 0 to 380
Data columns (total 3 columns):
 #   Column                Non-Null Count  Dtype 
---  ------                --------------  ----- 
 0   D365_ItemNo           381 non-null    object
 1   Material_Description  381 non-null    object
 2   Material No           381 non-null    object
dtypes: object(3)
memory usage: 11.9+ KB


# Join SAP_15Dec data with D365 items

In [136]:
# Make a new dataframe for the SAP data
sap_data_orders = sap_data[['ActivityType', 'Order Number', 'Plant_WAREHOUSE_NAME', 'Customer N', 'Customer Name', 'Material No', 'Material_Description', 'BaseDate', 'BaseHour', 'Cases','Bill I','Source Channel','Order Category']]

In [137]:
# Only filter out type 'AP'
mask = (sap_data_orders['ActivityType'] != 'AP')
sap_data_orders = sap_data_orders[mask]
sap_data_orders.rename(columns={'Plant (WAREHOUSE NAME)': 'SAP_WH', 'Material No': 'SAP_MatlNo'}, inplace=True)

In [138]:
# New code for updating "Cases".  Where Cases==0, assign a fixed quantity of 10.  Where Cases<0, negate the quantity.
mask = (sap_data_orders['Cases'] == 0) 
sap_data_orders['Cases'].mask(mask,10,inplace=True)

mask = (sap_data_orders['Cases'] < 0) 
sap_data_orders['Cases'].mask(mask,-sap_data_orders['Cases'],inplace=True)

In [139]:
df3 = pd.merge(
    sap_data_orders,
    items,
    how="inner",
    on=None,
    left_on='Material_Description',
    right_on='Material_Description',
    left_index=False,
    right_index=False,
    sort=True,
    suffixes=("_x", "_y"),
    copy=True,
    indicator=False,
    validate=None,
)

In [140]:
df3.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 903662 entries, 0 to 903661
Data columns (total 15 columns):
 #   Column                Non-Null Count   Dtype         
---  ------                --------------   -----         
 0   ActivityType          903662 non-null  object        
 1   Order Number          903662 non-null  object        
 2   Plant_WAREHOUSE_NAME  903662 non-null  object        
 3   Customer N            903662 non-null  object        
 4   Customer Name         903662 non-null  object        
 5   SAP_MatlNo            903662 non-null  object        
 6   Material_Description  903662 non-null  object        
 7   BaseDate              903662 non-null  datetime64[ns]
 8   BaseHour              903662 non-null  int64         
 9   Cases                 903662 non-null  int64         
 10  Bill I                899552 non-null  object        
 11  Source Channel        903662 non-null  object        
 12  Order Category        903662 non-null  object        
 13 

In [141]:
#df3.drop(columns={'Material_Description', 'SAP_MatlNo'}, inplace=True, axis=1)
df3.drop(columns={'SAP_MatlNo'}, inplace=True, axis=1)

In [142]:
df3['ActivityType'].unique()

array(['2PLAN', '3DESPATCH', '4SETTLE', '1ORDERCREATION'], dtype=object)

In [143]:
df3['MOD'] = ''

In [144]:
# Now default Mode of Delivery accourding to ActivityType
mask = (df3['ActivityType'] == '1ORDERCREATION') 
df3['MOD'].mask(mask,'01',inplace=True)
mask = (df3['ActivityType'] == '2PLAN') 
df3['MOD'].mask(mask,'02',inplace=True)
mask = (df3['ActivityType'] == '3DESPATCH') 
df3['MOD'].mask(mask,'03',inplace=True)
mask = (df3['ActivityType'] == '4SETTLE') 
df3['MOD'].mask(mask,'04',inplace=True)
mask = (df3['ActivityType'] == '5TRADERETURNS') 
df3['MOD'].mask(mask,'05',inplace=True)

In [145]:
warehouses.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 88 entries, 0 to 87
Data columns (total 8 columns):
 #   Column               Non-Null Count  Dtype 
---  ------               --------------  ----- 
 0   SAP_WH_NAME          88 non-null     object
 1   D365_WH_NAME         88 non-null     object
 2   D365_WH_NO           88 non-null     object
 3   site_id              88 non-null     object
 4   WAREHOUSELOCATIONID  88 non-null     object
 5   Cost_Centre          88 non-null     object
 6   Financial_Dimension  88 non-null     object
 7   D365_Del_Loc         88 non-null     object
dtypes: object(8)
memory usage: 5.6+ KB


### Join SAP data with Warehouse

In [146]:
df4 = pd.merge(
    df3,
    warehouses,
    how="inner",
    on=None,
    left_on='Plant_WAREHOUSE_NAME',
    right_on='SAP_WH_NAME',
    left_index=False,
    right_index=False,
    sort=True,
    suffixes=("_x", "_y"),
    copy=True,
    indicator=False,
    validate=None,
)

In [147]:
df4.drop(columns={'SAP_WH_NAME', 'Plant_WAREHOUSE_NAME', 'SAP_WH_NAME', 'D365_WH_NO', 'D365_WH_NAME'}, inplace=True, axis=1)

### Merge SAP data with Customer lookup table
Note that there is no logic in replacing SAP customers with D365 customers, as only Bloem customers are available for the performance test.  

In [148]:
df5 = pd.merge(
    df4,
    unique_SAP_customers,
    how="inner",
    on=None,
    left_on='Customer Name',
    right_on='SAP_Cust_Name',
    left_index=False,
    right_index=False,
    sort=True,
    suffixes=("_x", "_y"),
    copy=True,
    indicator=False,
    validate=None,
)

In [149]:
df5.drop(columns={'Customer N','Customer Name','SAP_Cust_Name'}, inplace=True, axis=1)

In [150]:
df5['Order Number'] = df5['Order Number'].astype(float)
df5['Order Number'] = df5['Order Number'].astype(int)

In [151]:
df5.drop_duplicates(subset=['ActivityType', 'Order Number', 'D365_ItemNo'],keep='first',inplace=True)

In [152]:
df5.drop(columns={'Cost_Centre', 'Financial_Dimension'}, inplace=True, axis=1)

In [153]:
df5['Bill I'].unique()

array(['ZRCR', 'ZFBO', 'ZFBR', 'HHT', nan, 'ZFRO', 'ZCOR', 'CRM', 'ZTRD',
       'ZTRR', 'ZUBR', 'ZCOS', 'ZFBF', 'ZFRE', 'CRMW', 'ZUBS', 'ZRCS',
       'ZLC', 'B2B'], dtype=object)

# Generate output files

In [154]:
df5.query('D365_ItemNo == "7947"')

Unnamed: 0,ActivityType,Order Number,Material_Description,BaseDate,BaseHour,Cases,Bill I,Source Channel,Order Category,D365_ItemNo,Material No,MOD,site_id,WAREHOUSELOCATIONID,D365_Del_Loc,D365_Cust_No,CIC Order Placement Rule,D365_Account_Name
208,3DESPATCH,72425368,SPRITE 01X24 440 PET,2021-12-15,1,1,ZFBO,Voice in,Sales Local,7947,C7947,03,ZA036,GEN01,ZA1-ZA036B,10004104,Voice,DEBONAIRS PIZZA NQUTHU PLAZA
209,4SETTLE,72425368,SPRITE 01X24 440 PET,2021-12-15,15,1,ZFBO,Voice in,Sales Local,7947,C7947,04,ZA036,GEN01,ZA1-ZA036B,10004104,Voice,DEBONAIRS PIZZA NQUTHU PLAZA
1362,1ORDERCREATION,72440046,SPRITE 01X24 440 PET,2021-12-15,10,10,HHT,SFA,Sales Local,7947,C7947,01,ZA027,GEN01,ZA1-ZA533B,10004126,Voice,KFC ENGEN TUGELA NORTH-KSA1304
1438,4SETTLE,72424112,SPRITE 01X24 440 PET,2021-12-15,19,5,ZFBO,Voice in,Sales Local,7947,C7947,04,ZA014,GEN01,ZA1-ZA539B,10004128,Voice,SASOL DELIGHT KINGS
2060,4SETTLE,72428913,SPRITE 01X24 440 PET,2021-12-15,17,2,ZFBR,Voice in,Sales Local,7947,C7947,04,ZA022,GEN01,ZA1-ZA558B,10004152,Voice,DEBONAIRS PIZZA BELA BELA
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
902618,1ORDERCREATION,72437647,SPRITE 01X24 440 PET,2021-12-15,9,10,HHT,SFA,Sales Local,7947,C7947,01,ZA022,GEN01,ZA1-ZA552B,10123338,Voice,PRIMI PIATTI LIFESTYLE
902619,2PLAN,72437647,SPRITE 01X24 440 PET,2021-12-15,16,1,ZFBO,Voice in,Sales Local,7947,C7947,02,ZA022,GEN01,ZA1-ZA552B,10123338,Voice,PRIMI PIATTI LIFESTYLE
902620,3DESPATCH,72437647,SPRITE 01X24 440 PET,2021-12-15,22,1,ZFBO,Voice in,Sales Local,7947,C7947,03,ZA022,GEN01,ZA1-ZA552B,10123338,Voice,PRIMI PIATTI LIFESTYLE
903229,3DESPATCH,72425807,SPRITE 01X24 440 PET,2021-12-15,8,1,ZFBO,Voice in,Sales Local,7947,C7947,03,ZA010,GEN01,ZA1-ZA528B,10123350,Voice,KFC RANDPARK RIDGE-KSA496


In [69]:
# Generate CSV for the entire SAP data set
df5.to_csv('output/15Dec_D365_orders.csv', index=False)

In [70]:
# Now drop lines with negative order quantities, except where ActivityType = 5TRADERETURNS
#mask = ((df5['Cases'] > 0) | (df5['ActivityType'] == '5TRADERETURNS'))
#df5 = df5[mask]

In [156]:
rec_count = pd.DataFrame(columns=['Activity type','Hour','Sales orders', 'Order lines'])

In [157]:
# Generate CSVs per activity type, and for the 13th and 20th hours
# 1ORDERCREATION
mask = ((df5['ActivityType'] == '1ORDERCREATION') &  (df5['BaseHour'] == 12))
peak_order_hour = df5[mask]
peak_order_hour.to_csv('output/15Dec_D365_orders_1ORDERCREATION_12h.csv',index=False)

mask = ((df5['ActivityType'] == '1ORDERCREATION') &  (df5['BaseHour'] == 19))
peak_settlement_hour = df5[mask]
peak_settlement_hour.to_csv('output/15Dec_D365_orders_1ORDERCREATION_19h.csv',index=False)

# 2PLAN
mask = ((df5['ActivityType'] == '2PLAN') &  (df5['BaseHour'] == 12))
df5_1 = df5[mask].copy()
df5_1.drop(columns={'CIC Order Placement Rule'}, inplace=True, axis=1)
df5_1.to_csv('output/15Dec_D365_orders_2PLAN_12h.csv',index=False)
new_row = {'Activity type': '2PLAN', 'Hour': '12', 'Sales orders': len(df5_1['Order Number'].unique()), 'Order lines': len(df5_1)}
rec_count = pd.concat([rec_count, pd.DataFrame([new_row])])


mask = ((df5['ActivityType'] == '2PLAN') &  (df5['BaseHour'] == 19))
df5_1 = df5[mask].copy()
df5_1.drop(columns={'CIC Order Placement Rule'}, inplace=True, axis=1)
df5_1.to_csv('output/15Dec_D365_orders_2PLAN_19h.csv',index=False)
new_row = {'Activity type': '2PLAN', 'Hour': '19', 'Sales orders': len(df5_1['Order Number'].unique()), 'Order lines': len(df5_1)}
rec_count = pd.concat([rec_count, pd.DataFrame([new_row])])


# 3DESPATCH
mask = ((df5['ActivityType'] == '3DESPATCH') &  (df5['BaseHour'] == 12))
df5_1 = df5[mask].copy()
df5_1.drop(columns={'CIC Order Placement Rule'}, inplace=True, axis=1)
df5_1.to_csv('output/15Dec_D365_orders_3DESPATCH_12h.csv',index=False)
new_row = {'Activity type': '3DESPATCH', 'Hour': '12', 'Sales orders': len(df5_1['Order Number'].unique()), 'Order lines': len(df5_1)}
rec_count = pd.concat([rec_count, pd.DataFrame([new_row])])


mask = ((df5['ActivityType'] == '3DESPATCH') &  (df5['BaseHour'] == 19))
df5_1 = df5[mask].copy()
df5_1.drop(columns={'CIC Order Placement Rule'}, inplace=True, axis=1)
df5_1.to_csv('output/15Dec_D365_orders_3DESPATCH_19h.csv',index=False)
new_row = {'Activity type': '3DESPATCH', 'Hour': '19', 'Sales orders': len(df5_1['Order Number'].unique()), 'Order lines': len(df5_1)}
rec_count = pd.concat([rec_count, pd.DataFrame([new_row])])


# 4SETTLE
mask = ((df5['ActivityType'] == '4SETTLE') &  (df5['BaseHour'] == 12))
df5_1 = df5[mask].copy()
df5_1.drop(columns={'CIC Order Placement Rule'}, inplace=True, axis=1)
df5_1.to_csv('output/15Dec_D365_orders_4SETTLE_12h.csv',index=False)
new_row = {'Activity type': '4SETTLE', 'Hour': '12', 'Sales orders': len(df5_1['Order Number'].unique()), 'Order lines': len(df5_1)}
rec_count = pd.concat([rec_count, pd.DataFrame([new_row])])


mask = ((df5['ActivityType'] == '4SETTLE') &  (df5['BaseHour'] == 19))
df5_1 = df5[mask].copy()
df5_1.drop(columns={'CIC Order Placement Rule'}, inplace=True, axis=1)
df5_1.to_csv('output/15Dec_D365_orders_4SETTLE_19h.csv',index=False)
new_row = {'Activity type': '4SETTLE', 'Hour': '19', 'Sales orders': len(df5_1['Order Number'].unique()), 'Order lines': len(df5_1)}
rec_count = pd.concat([rec_count, pd.DataFrame([new_row])])


# 5TRADERETURNS
mask = ((df5['ActivityType'] == '5TRADERETURNS') &  (df5['BaseHour'] == 12))
df5_1 = df5[mask].copy()
df5_1.drop(columns={'CIC Order Placement Rule'}, inplace=True, axis=1)
df5_1.to_csv('output/15Dec_D365_orders_5TRADERETURNS_12h.csv',index=False)



mask = ((df5['ActivityType'] == '5TRADERETURNS') &  (df5['BaseHour'] == 19))
df5_1 = df5[mask].copy()
df5_1.drop(columns={'CIC Order Placement Rule'}, inplace=True, axis=1)
df5_1.to_csv('output/15Dec_D365_orders_5TRADERETURNS_19h.csv',index=False)



In [158]:
x = peak_order_hour.groupby(['Source Channel']).agg({'Order Number': 'nunique','D365_ItemNo': 'count'}).reset_index()
x.rename(columns={'D365_ItemNo': 'Order lines', 'Order Number': 'Sales orders'}, inplace=True)

In [159]:
# Generate a CSV for the rest of the hours, that is, excluding the 13th and 20th hours.  This set can be used to do preparation testing.
mask = ((df5['BaseHour'] != 12) & (df5['BaseHour'] != 19))
df5_1 = df5[mask]
df5_1.to_csv('output/15Dec_D365_orders_excluding_12h_19h.csv', index=False)

In [160]:
y = peak_settlement_hour.groupby(['Source Channel']).agg({'Order Number': 'nunique','D365_ItemNo': 'count'}).reset_index()
y.rename(columns={'D365_ItemNo': 'Order lines', 'Order Number': 'Sales orders'}, inplace=True)

In [161]:
rec_count = rec_count.reset_index(drop=True)

In [162]:
print('Sales order volumes for peak order hour (12h00 to 13h00):')
print(x)
print('\n\nSales order volumes for peak settlement hour (19h00 to 20h00):')
print(y)
print('\n\nSales order volumes data staging in F&O:')
print(rec_count)

Sales order volumes for peak order hour (12h00 to 13h00):
  Source Channel  Sales orders  Order lines
0            B2B           505        19270
1            SFA          1265        19845
2       Voice in           215         3386


Sales order volumes for peak settlement hour (19h00 to 20h00):
  Source Channel  Sales orders  Order lines
0            B2B             2           42
1            SFA            72          929
2       Voice in             9          186


Sales order volumes data staging in F&O:
  Activity type Hour Sales orders Order lines
0         2PLAN   12          408        3236
1         2PLAN   19          564        5007
2     3DESPATCH   12          426        4895
3     3DESPATCH   19          348        4333
4       4SETTLE   12          321        3621
5       4SETTLE   19         3312       35521


In [163]:
df5.query('D365_ItemNo == "7947"')

Unnamed: 0,ActivityType,Order Number,Material_Description,BaseDate,BaseHour,Cases,Bill I,Source Channel,Order Category,D365_ItemNo,Material No,MOD,site_id,WAREHOUSELOCATIONID,D365_Del_Loc,D365_Cust_No,CIC Order Placement Rule,D365_Account_Name
208,3DESPATCH,72425368,SPRITE 01X24 440 PET,2021-12-15,1,1,ZFBO,Voice in,Sales Local,7947,C7947,03,ZA036,GEN01,ZA1-ZA036B,10004104,Voice,DEBONAIRS PIZZA NQUTHU PLAZA
209,4SETTLE,72425368,SPRITE 01X24 440 PET,2021-12-15,15,1,ZFBO,Voice in,Sales Local,7947,C7947,04,ZA036,GEN01,ZA1-ZA036B,10004104,Voice,DEBONAIRS PIZZA NQUTHU PLAZA
1362,1ORDERCREATION,72440046,SPRITE 01X24 440 PET,2021-12-15,10,10,HHT,SFA,Sales Local,7947,C7947,01,ZA027,GEN01,ZA1-ZA533B,10004126,Voice,KFC ENGEN TUGELA NORTH-KSA1304
1438,4SETTLE,72424112,SPRITE 01X24 440 PET,2021-12-15,19,5,ZFBO,Voice in,Sales Local,7947,C7947,04,ZA014,GEN01,ZA1-ZA539B,10004128,Voice,SASOL DELIGHT KINGS
2060,4SETTLE,72428913,SPRITE 01X24 440 PET,2021-12-15,17,2,ZFBR,Voice in,Sales Local,7947,C7947,04,ZA022,GEN01,ZA1-ZA558B,10004152,Voice,DEBONAIRS PIZZA BELA BELA
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
902618,1ORDERCREATION,72437647,SPRITE 01X24 440 PET,2021-12-15,9,10,HHT,SFA,Sales Local,7947,C7947,01,ZA022,GEN01,ZA1-ZA552B,10123338,Voice,PRIMI PIATTI LIFESTYLE
902619,2PLAN,72437647,SPRITE 01X24 440 PET,2021-12-15,16,1,ZFBO,Voice in,Sales Local,7947,C7947,02,ZA022,GEN01,ZA1-ZA552B,10123338,Voice,PRIMI PIATTI LIFESTYLE
902620,3DESPATCH,72437647,SPRITE 01X24 440 PET,2021-12-15,22,1,ZFBO,Voice in,Sales Local,7947,C7947,03,ZA022,GEN01,ZA1-ZA552B,10123338,Voice,PRIMI PIATTI LIFESTYLE
903229,3DESPATCH,72425807,SPRITE 01X24 440 PET,2021-12-15,8,1,ZFBO,Voice in,Sales Local,7947,C7947,03,ZA010,GEN01,ZA1-ZA528B,10123350,Voice,KFC RANDPARK RIDGE-KSA496


In [164]:
#From above, generate a file that contains just one record per customer, so that we can use this to verify that each customer master record works
df5_2 = df5_1.drop_duplicates(subset=['D365_Cust_No'],keep='first').copy()

In [165]:
df5_2.to_csv('output/15Dec_D365_single_line_per_customer_excluding_12h_19h.csv', index=False)

In [166]:
df5.query('D365_ItemNo == "7947"')

Unnamed: 0,ActivityType,Order Number,Material_Description,BaseDate,BaseHour,Cases,Bill I,Source Channel,Order Category,D365_ItemNo,Material No,MOD,site_id,WAREHOUSELOCATIONID,D365_Del_Loc,D365_Cust_No,CIC Order Placement Rule,D365_Account_Name
208,3DESPATCH,72425368,SPRITE 01X24 440 PET,2021-12-15,1,1,ZFBO,Voice in,Sales Local,7947,C7947,03,ZA036,GEN01,ZA1-ZA036B,10004104,Voice,DEBONAIRS PIZZA NQUTHU PLAZA
209,4SETTLE,72425368,SPRITE 01X24 440 PET,2021-12-15,15,1,ZFBO,Voice in,Sales Local,7947,C7947,04,ZA036,GEN01,ZA1-ZA036B,10004104,Voice,DEBONAIRS PIZZA NQUTHU PLAZA
1362,1ORDERCREATION,72440046,SPRITE 01X24 440 PET,2021-12-15,10,10,HHT,SFA,Sales Local,7947,C7947,01,ZA027,GEN01,ZA1-ZA533B,10004126,Voice,KFC ENGEN TUGELA NORTH-KSA1304
1438,4SETTLE,72424112,SPRITE 01X24 440 PET,2021-12-15,19,5,ZFBO,Voice in,Sales Local,7947,C7947,04,ZA014,GEN01,ZA1-ZA539B,10004128,Voice,SASOL DELIGHT KINGS
2060,4SETTLE,72428913,SPRITE 01X24 440 PET,2021-12-15,17,2,ZFBR,Voice in,Sales Local,7947,C7947,04,ZA022,GEN01,ZA1-ZA558B,10004152,Voice,DEBONAIRS PIZZA BELA BELA
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
902618,1ORDERCREATION,72437647,SPRITE 01X24 440 PET,2021-12-15,9,10,HHT,SFA,Sales Local,7947,C7947,01,ZA022,GEN01,ZA1-ZA552B,10123338,Voice,PRIMI PIATTI LIFESTYLE
902619,2PLAN,72437647,SPRITE 01X24 440 PET,2021-12-15,16,1,ZFBO,Voice in,Sales Local,7947,C7947,02,ZA022,GEN01,ZA1-ZA552B,10123338,Voice,PRIMI PIATTI LIFESTYLE
902620,3DESPATCH,72437647,SPRITE 01X24 440 PET,2021-12-15,22,1,ZFBO,Voice in,Sales Local,7947,C7947,03,ZA022,GEN01,ZA1-ZA552B,10123338,Voice,PRIMI PIATTI LIFESTYLE
903229,3DESPATCH,72425807,SPRITE 01X24 440 PET,2021-12-15,8,1,ZFBO,Voice in,Sales Local,7947,C7947,03,ZA010,GEN01,ZA1-ZA528B,10123350,Voice,KFC RANDPARK RIDGE-KSA496


# Create file for stock journals

### Create a journal line item per item per warehouse, with a replenishment_qty that is 100x the sum of order quantities (Cases) per line

In [167]:
# Create this dataframe before dropping columns not needed for order creation
stock_journal = df5[['D365_ItemNo', 'D365_Del_Loc','Cases']]
stock_journal = stock_journal.groupby(['D365_Del_Loc', 'D365_ItemNo'],as_index=False).sum('Cases')

In [168]:
stock_journal['replenishment_qty'] = stock_journal['Cases']*100

### Merge with warehouse dataframe to get Financial Dimensions per warehouse

In [169]:
stock_journal['INVENTORYSTATUSID'] = 'Available'

In [170]:
stock_journal1 = pd.merge(
    stock_journal,
    warehouses,
    how="inner",
    on=None,
    left_on='D365_Del_Loc',
    right_on='D365_Del_Loc',
    left_index=False,
    right_index=False,
    sort=True,
    suffixes=("_x", "_y"),
    copy=True,
    indicator=False,
    validate=None,
)

In [171]:
stock_journal1.rename(columns={'Financial_Dimension': 'DEFAULTLEDGERDIMENSIONDISPLAYVALUE'}, inplace=True)

In [172]:
stock_journal1.drop(columns={'Cases', 'D365_WH_NAME', 'SAP_WH_NAME','Cost_Centre'}, inplace=True, axis=1)

In [173]:
stock_journal1 = stock_journal1.sort_values(['D365_WH_NO'],ascending=True)

In [174]:
# Add a sequential index for LineNumber
line_number = range(1,stock_journal1.last_valid_index()+2,1)
stock_journal1['LINENUMBER']=line_number

### Generate a journal number per warehouse

In [175]:
journal_number = pd.DataFrame(stock_journal1['D365_WH_NO'].unique())

In [176]:
journal_number.rename(columns={0: 'D365_WH_NO'}, inplace=True)

In [177]:
journal_number = journal_number.sort_values(['D365_WH_NO'],ascending=True)

In [178]:
first_valid_jnumber = input('Enter first valid journal number (numbers only)')
#first_valid_jnumber = '76517'
first_valid_jnumber = int(first_valid_jnumber)
#ZA10700076636 - 24 Jan 2023
#ZA10700084774 - 14 Mar

In [179]:
index = range(first_valid_jnumber,first_valid_jnumber+journal_number.last_valid_index()+1,1)
journal_number['JOURNALNUMBER']=index

In [180]:
journal_number['JOURNALNUMBER'] = journal_number['JOURNALNUMBER'].astype(str)
journal_number['JOURNALNUMBER'] = 'ZA1070' +  + journal_number['JOURNALNUMBER']

In [181]:
stock_journal1 = pd.merge(
   stock_journal1,
   journal_number,
   how="inner",
   on=None,
   left_on='D365_WH_NO',
   right_on='D365_WH_NO',
   left_index=False,
   right_index=False,
   sort=True,
   suffixes=("_x", "_y"),
   copy=True,
   indicator=False,
   validate=None,
)

In [182]:
stock_journal1['ITEMBATCHNUMBER']='1'
stock_journal1['JOURNALNAMEID']='ADJ_WHS'
stock_journal1['TRANSACTIONDATE']= pd.to_datetime('today').date()

In [183]:
stock_journal1.to_csv('output/stock_journal.csv',index=False)

In [184]:
unique_items = df5[['D365_ItemNo', 'Material_Description']].drop_duplicates()


In [185]:
unique_items.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 378 entries, 0 to 782792
Data columns (total 2 columns):
 #   Column                Non-Null Count  Dtype 
---  ------                --------------  ----- 
 0   D365_ItemNo           378 non-null    object
 1   Material_Description  378 non-null    object
dtypes: object(2)
memory usage: 8.9+ KB


In [186]:
unique_items.to_excel('data/unique_items.xlsx',index=False)

In [187]:
df5.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 873140 entries, 0 to 903661
Data columns (total 18 columns):
 #   Column                    Non-Null Count   Dtype         
---  ------                    --------------   -----         
 0   ActivityType              873140 non-null  object        
 1   Order Number              873140 non-null  int64         
 2   Material_Description      873140 non-null  object        
 3   BaseDate                  873140 non-null  datetime64[ns]
 4   BaseHour                  873140 non-null  int64         
 5   Cases                     873140 non-null  int64         
 6   Bill I                    869104 non-null  object        
 7   Source Channel            873140 non-null  object        
 8   Order Category            873140 non-null  object        
 9   D365_ItemNo               873140 non-null  object        
 10  Material No               873140 non-null  object        
 11  MOD                       873140 non-null  object        
 12  si

In [188]:
warehouses.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 88 entries, 0 to 87
Data columns (total 8 columns):
 #   Column               Non-Null Count  Dtype 
---  ------               --------------  ----- 
 0   SAP_WH_NAME          88 non-null     object
 1   D365_WH_NAME         88 non-null     object
 2   D365_WH_NO           88 non-null     object
 3   site_id              88 non-null     object
 4   WAREHOUSELOCATIONID  88 non-null     object
 5   Cost_Centre          88 non-null     object
 6   Financial_Dimension  88 non-null     object
 7   D365_Del_Loc         88 non-null     object
dtypes: object(8)
memory usage: 5.6+ KB


In [189]:
# Create the Cartesian product
price_update = unique_items.assign(key=1).merge(warehouses.assign(key=1), on='key').drop('key', axis=1)

In [190]:
price_update.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 33264 entries, 0 to 33263
Data columns (total 10 columns):
 #   Column                Non-Null Count  Dtype 
---  ------                --------------  ----- 
 0   D365_ItemNo           33264 non-null  object
 1   Material_Description  33264 non-null  object
 2   SAP_WH_NAME           33264 non-null  object
 3   D365_WH_NAME          33264 non-null  object
 4   D365_WH_NO            33264 non-null  object
 5   site_id               33264 non-null  object
 6   WAREHOUSELOCATIONID   33264 non-null  object
 7   Cost_Centre           33264 non-null  object
 8   Financial_Dimension   33264 non-null  object
 9   D365_Del_Loc          33264 non-null  object
dtypes: object(10)
memory usage: 2.8+ MB


In [191]:
price_update.drop(columns={'Material_Description', 'SAP_WH_NAME', 'D365_WH_NAME', 'D365_WH_NO', 'WAREHOUSELOCATIONID', 'Cost_Centre', 'Financial_Dimension', 'D365_Del_Loc'}, inplace=True, axis=1)

In [192]:
price_update.rename(columns={'D365_ItemNo': 'ItemNumber', 'site_id': 'PriceSiteId'}, inplace=True)

In [193]:
price_update = price_update.drop_duplicates()

In [194]:
price_update['Price'] = 42285
price_update['PriceQuantity'] = 1000
price_update['PriceType'] = 'Cost'
price_update['CostingVersion'] = 'Std'

In [195]:
price_update.to_excel('output/price_update.xlsx',index=False)