In [2]:
import json
import numpy as np
import pandas as pd
import lxml

# Import SAP data from 15 December 2021

In [3]:
sap_data=pd.read_csv('data/SAP_15Dec21.csv', low_memory=False)
sap_data['BaseDate'] = pd.to_datetime(sap_data['BaseDate'], format='%Y-%m-%d')

In [4]:
len(sap_data)

918658

In [5]:
mask = ((sap_data['BaseHour'] == 12) & (sap_data['ActivityType'] == '3DESPATCH'))
temp1 = sap_data[mask]

# Warehouse lookup table

### Start with a manually prepared table mapping D365 warehouses to the 88 SAP warehouses in the 15 Dec 21 data set

In [6]:
SAP_to_D365_warehouse=pd.read_csv('data/raw_data/warehouses/SAP-to-D365 warehouse mapping.csv')

In [7]:
SAP_to_D365_warehouse.drop(columns={'D365_Del_Loc', 'WAREHOUSELOCATIONID', 'site_id'}, inplace=True, axis=1)

### Merge with IPT_Site_Warehouse_locations to get SITE, WAREHOUSELOCATIONID, Cost Centre Financial Dimension and Financial Dimension for Stock Journal

In [8]:
IPT_Site_Warehouse_locations=pd.read_csv('data/raw_data/warehouses/IPT_Site_Warehouse_locations_20221226.csv')

In [9]:
warehouses = pd.merge(
    SAP_to_D365_warehouse,
    IPT_Site_Warehouse_locations,
    how="inner",
    on=None,
    left_on='D365_WH_NO',
    right_on='WAREHOUSEID',
    left_index=False,
    right_index=False,
    sort=True,
    suffixes=("_x", "_y"),
    copy=True,
    indicator=False,
    validate=None,
)

In [10]:
warehouses.drop(columns={'WAREHOUSEID'}, inplace=True, axis=1)

In [11]:
warehouses.rename(columns={'Cost Centre Financial Dimension': 'Cost_Centre', 'Financial Dimension for Stock Journal':'Financial_Dimension'}, inplace=True)

### Merge with CE warehouses to get the CE Delivery Location

In [12]:
CE_IPT_Active_Warehouses=pd.read_excel('data/raw_data/warehouses/CE_IPT_Active Warehouses _DeliveryLocations_20221223.xlsx',sheet_name='CE Active Warehouses')


In [13]:
warehouses = pd.merge(
    warehouses,
    CE_IPT_Active_Warehouses,
    how="inner",
    on=None,
    left_on='D365_WH_NO',
    right_on='Warehouse Id',
    left_index=False,
    right_index=False,
    sort=True,
    suffixes=("_x", "_y"),
    copy=True,
    indicator=False,
    validate=None,
)

In [14]:
warehouses.drop(columns={'Warehouse Id','Description'}, inplace=True, axis=1)

In [15]:
warehouses.rename(columns={'Delivery Location Description': 'D365_Del_Loc','SITE': 'site_id'}, inplace=True)

Change notes: 8 Jan
The following fields were in the warehouses dataframe before making these changes
    SAP_WH_NAME,D365_WH_NO,D365_WH_NAME,D365_Del_Loc,site_id,WAREHOUSELOCATIONID

The following fields were added (to be used in the journal creation):
    Cost_Centre, Financial_Dimension

In [16]:
warehouses.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 88 entries, 0 to 87
Data columns (total 8 columns):
 #   Column               Non-Null Count  Dtype 
---  ------               --------------  ----- 
 0   SAP_WH_NAME          88 non-null     object
 1   D365_WH_NAME         88 non-null     object
 2   D365_WH_NO           88 non-null     object
 3   site_id              88 non-null     object
 4   WAREHOUSELOCATIONID  88 non-null     object
 5   Cost_Centre          88 non-null     object
 6   Financial_Dimension  88 non-null     object
 7   D365_Del_Loc         88 non-null     object
dtypes: object(8)
memory usage: 6.2+ KB


# Items lookup table

In [17]:
items=pd.read_csv('data/matched_items.csv')
items.rename(columns={'ITEMNUMBER': 'D365_ItemNo'}, inplace=True)

# Customers
- Use the Bloem customers list that Andre provided.
- Correlate with CE customers that are Outlets. Use CE_IPT_SoftDrinkOutlets_20200104.xlsx for this.
- Include only direct customers.  Get this from the customer master.
- Inner join this with the Bloem customers to get a list of Bloem customers (that we know will work), that also meet the criteria above.
- Do a random match for each valid SAP_15Dec21 customer to a Bloem customer.  There will be more than one SAP customer that will be mapped to a Bloem customer.
- We do not use Mode of Delivery from the customer master.
-'Source Channel' field to be defaulted to 'Voice In'
-'Order Category' field to be defaulted to 'Sales Local'

### Import the D365 customer master

In [18]:
customers=pd.read_csv('data/customer_master.csv', low_memory=False)
# customers_short is just a copy of customers, without most of the columns
customers_short = customers[['ADDRESSZIPCODE','CUSTOMERACCOUNT','ORGANIZATIONNAME', 'NAMEALIAS', 'CCBCUSTOMERTYPE']].copy()

### Fix the address zip code for later use

In [19]:
customers_short['ADDRESSZIPCODE'] = customers_short['ADDRESSZIPCODE'].fillna(0)
customers_short['ADDRESSZIPCODE'] = customers_short['ADDRESSZIPCODE'].astype(int)
customers_short['ADDRESSZIPCODE'] = customers_short['ADDRESSZIPCODE'].astype(str)

### Import the list of D365 Pilot customers (Bloem)

In [20]:
pilot_customers=pd.read_csv('data/Pilot Customer Accounts.csv')
pilot_customers['Account Number']=pilot_customers['Account Number'].astype(str)
pilot_customers.drop(columns={'Customer Category', 'Inv-Postal Code', 'Inv-Province', 'Inv-Suburb', 'Inv-City', 'Inv-Address 1', 'Inv-Address 2', 'BillTo LocationId'}, inplace=True, axis=1)


In [21]:
pilot_customers['Account Number'] = pilot_customers['Account Number'].astype(int)

In [22]:
pilot_customers.drop(columns={'Account Name'}, inplace=True, axis=1)

### Now import CE customers (provided by Gary on 7 Jan), and join that with pilot_customers to arrive at the list pf usable customers.

In [23]:
outlet_customers = pd.read_excel('data/PilotAccounts.xlsx')

In [24]:
outlet_customers = outlet_customers.drop_duplicates()

In [25]:
outlet_customers.rename(columns={'Account Name': 'D365_Account_Name'}, inplace=True)

In [26]:
pilot_customers = pd.merge(
    outlet_customers,
    pilot_customers,
    how="inner",
    on=None,
    left_on='Account Number',
    right_on='Account Number',
    left_index=False,
    right_index=False,
    sort=True,
    suffixes=("_x", "_y"),
    copy=True,
    indicator=False,
    validate=None,
)

### Change made on 9 Jan 2023

Change the “Source Channel” based on the following rules 
- Set to B2B if the “Bill I” column in Tommys data is B2B 
- Set to SFA if the “Bill I” column in Tommys data is HHT 
- Set to Voice in for all other rows 

The B2B order lines can only be linked to a B2B customer and the SFA and Voice In order lines should be linked to non B2B customers. 
- Gary has provided a new list of pilot accounts, with the Place Rule Columns (B2B or Blank).  This list also excludes customers with very low credit limits, which has cause submission holds on some on the FnO Prep Data.
- These changes only need to be made for the 1ORDERCREATION order lines/files 
- This will mean we can split the order creation between the 3 different channels/test cases 

In [27]:
sap_data['Source Channel'] = 'Voice in'

In [28]:
mask = ((sap_data['Bill I'] == 'B2B') & (sap_data['ActivityType'] == '1ORDERCREATION'))
sap_data['Source Channel'].mask(mask,'B2B',inplace=True)
mask = ((sap_data['Bill I'] == 'HHT') & (sap_data['ActivityType'] == '1ORDERCREATION'))
sap_data['Source Channel'].mask(mask,'SFA',inplace=True)

In [29]:
sap_data['Order Category'] = 'Sales Local'

### This section creates a list of customers, padding the SAP customers with a repeating list of allowable D365 customers
- For B2B customers, make sure they are aligned with those SAP customers for 1ORDERCREATION activity type
- First map the D365 B2B customers (CIC Order Placement Rule == B2B) to the unique list of SAP customers where Activity Type == 1ORDERCREATION
- Then extract the list of unique SAP customers for all lines where Activity Type != 1ORDERCREATION, remove those that have already been mapped, and map the rest to D365 customers that are non-B2B

In [30]:
# Extract a list of unique customers from the SAP data, where 'Source Channel' == 'B2B'
mask = (sap_data['Source Channel'] == 'B2B') 
try1 = sap_data[mask]
unique_SAP_customers_np = try1['Customer Name'].unique()
unique_SAP_customers1 = pd.DataFrame(unique_SAP_customers_np)

In [31]:
unique_SAP_customers1.rename(columns={0: 'SAP_Cust_Name'}, inplace=True)

In [32]:
# Extract  list of allowable pilot customers, where CIC Order Placement Rule = B2B
mask = (pilot_customers['CIC Order Placement Rule'] == 'B2B') 
try1 = pilot_customers[mask]
allowable_pilot_customers_np = try1['Account Number'].unique()
allowable_pilot_customers = pd.DataFrame(allowable_pilot_customers_np)

In [33]:
repeats = int(len(unique_SAP_customers_np)/len(allowable_pilot_customers_np) + 1)

In [34]:
new_array = allowable_pilot_customers_np.repeat(repeats)

In [35]:
trim_rows = len(new_array) - len(unique_SAP_customers_np)

In [36]:
#a[:-n, :]
trimmed_array = new_array[:-trim_rows :]

In [37]:
unique_SAP_customers1['D365_Cust_No'] = trimmed_array.tolist()

In [38]:
# Repeat the process, but now with the rest of the activity types, mapping to the remainder of the D365 customers

# Extract a list of unique customers from the SAP data, where 'Source Channel' != 'B2B'
mask = (sap_data['Source Channel'] != 'B2B')
try1 = sap_data[mask]
unique_SAP_customers_np = try1['Customer Name'].unique()
unique_SAP_customers2 = pd.DataFrame(unique_SAP_customers_np)

unique_SAP_customers2.rename(columns={0: 'SAP_Cust_Name'}, inplace=True)

In [39]:
# Now exclude those SAP customers that have already been mapped to a D365 customer.  To do that, join the two dataframes
unique_SAP_customers2 = pd.merge(
    unique_SAP_customers2,
    unique_SAP_customers1,
    how="left",
    on=None,
    left_on='SAP_Cust_Name',
    right_on='SAP_Cust_Name',
    left_index=False,
    right_index=False,
    sort=True,
    suffixes=("_x", "_y"),
    copy=True,
    indicator=False,
    validate=None,
)

In [40]:
mask = (unique_SAP_customers2['D365_Cust_No'].isna())
unique_SAP_customers2 = unique_SAP_customers2[mask]

In [41]:
# Extract  list of allowable pilot customers, where CIC Order Placement Rule != B2B
mask = (pilot_customers['CIC Order Placement Rule'] != 'B2B') 
try1 = pilot_customers[mask]
allowable_pilot_customers_np = try1['Account Number'].unique()
allowable_pilot_customers2 = pd.DataFrame(allowable_pilot_customers_np)

repeats = int(len(unique_SAP_customers2)/len(allowable_pilot_customers_np) + 1)
new_array = allowable_pilot_customers_np.repeat(repeats)
trim_rows = len(new_array) - len(unique_SAP_customers2)
trimmed_array = new_array[:-trim_rows :]
unique_SAP_customers2['D365_Cust_No'] = trimmed_array.tolist()

In [42]:
# Concatenate unique_SAP_customers1 and unique_SAP_customers2
unique_SAP_customers = pd.concat([unique_SAP_customers1, unique_SAP_customers2], ignore_index=True)

In [43]:
len(sap_data['Customer Name'].unique())

23687

In [44]:
# Merge to pull in the rest of the columns in "pilot_customers"
unique_SAP_customers = pd.merge(
    unique_SAP_customers,
    pilot_customers,
    how="inner",
    on=None,
    left_on='D365_Cust_No',
    right_on='Account Number',
    left_index=False,
    right_index=False,
    sort=True,
    suffixes=("_x", "_y"),
    copy=True,
    indicator=False,
    validate=None,
)

In [45]:
unique_SAP_customers.drop(columns={'Account Number'}, inplace=True, axis=1)

In [46]:
unique_SAP_customers['CIC Order Placement Rule'].unique()

array([nan, 'B2B'], dtype=object)

### Join SAP_15Dec data with D365 items

In [47]:
# Make a new dataframe for the SAP data
sap_data_orders = sap_data[['ActivityType', 'Order Number', 'Plant_WAREHOUSE_NAME', 'Customer N', 'Customer Name', 'Material No', 'Material Description', 'BaseDate', 'BaseHour', 'Cases','Bill I','Source Channel','Order Category']]

In [48]:
# Only filter out type 'AP'
mask = (sap_data_orders['ActivityType'] != 'AP')
sap_data_orders = sap_data_orders[mask]
sap_data_orders.rename(columns={'Plant (WAREHOUSE NAME)': 'SAP_WH', 'Material No': 'SAP_MatlNo'}, inplace=True)

In [49]:
# New code for updating "Cases".  Where Cases==0, assign a fixed quantity of 10.  Where Cases<0, negate the quantity.
mask = (sap_data_orders['Cases'] == 0) 
sap_data_orders['Cases'].mask(mask,10,inplace=True)

mask = (sap_data_orders['Cases'] < 0) 
sap_data_orders['Cases'].mask(mask,-sap_data_orders['Cases'],inplace=True)

In [50]:
df3 = pd.merge(
    sap_data_orders,
    items,
    how="inner",
    on=None,
    left_on='Material Description',
    right_on='Material_Description',
    left_index=False,
    right_index=False,
    sort=True,
    suffixes=("_x", "_y"),
    copy=True,
    indicator=False,
    validate=None,
)

In [51]:
df3.drop(columns={'Material Description', 'SAP_MatlNo'}, inplace=True, axis=1)

In [52]:
df3['ActivityType'].unique()

array(['2PLAN', '3DESPATCH', '4SETTLE', '1ORDERCREATION'], dtype=object)

In [53]:
df3['MOD'] = ''

In [54]:
# Now default Mode of Delivery accourding to ActivityType
mask = (df3['ActivityType'] == '1ORDERCREATION') 
df3['MOD'].mask(mask,'01',inplace=True)
mask = (df3['ActivityType'] == '2PLAN') 
df3['MOD'].mask(mask,'02',inplace=True)
mask = (df3['ActivityType'] == '3DESPATCH') 
df3['MOD'].mask(mask,'03',inplace=True)
mask = (df3['ActivityType'] == '4SETTLE') 
df3['MOD'].mask(mask,'04',inplace=True)
mask = (df3['ActivityType'] == '5TRADERETURNS') 
df3['MOD'].mask(mask,'05',inplace=True)

### Join SAP data with Warehouse

In [55]:
df4 = pd.merge(
    df3,
    warehouses,
    how="inner",
    on=None,
    left_on='Plant_WAREHOUSE_NAME',
    right_on='SAP_WH_NAME',
    left_index=False,
    right_index=False,
    sort=True,
    suffixes=("_x", "_y"),
    copy=True,
    indicator=False,
    validate=None,
)

In [56]:
df4.drop(columns={'SAP_WH_NAME', 'Plant_WAREHOUSE_NAME', 'SAP_WH_NAME', 'D365_WH_NO', 'D365_WH_NAME'}, inplace=True, axis=1)

### Merge SAP data with Customer lookup table
Note that there is no logic in replacing SAP customers with D365 customers, as only Bloem customers are available for the performance test.  

In [57]:
df5 = pd.merge(
    df4,
    unique_SAP_customers,
    how="inner",
    on=None,
    left_on='Customer Name',
    right_on='SAP_Cust_Name',
    left_index=False,
    right_index=False,
    sort=True,
    suffixes=("_x", "_y"),
    copy=True,
    indicator=False,
    validate=None,
)

In [58]:
df5.drop(columns={'Customer N','Customer Name','SAP_Cust_Name'}, inplace=True, axis=1)

In [59]:
df5['Order Number'] = df5['Order Number'].astype(float)
df5['Order Number'] = df5['Order Number'].astype(int)

In [60]:
df5.drop_duplicates(subset=['ActivityType', 'Order Number', 'D365_ItemNo'],keep='first',inplace=True)

In [61]:
df5.drop(columns={'Cost_Centre', 'Financial_Dimension'}, inplace=True, axis=1)

In [62]:
df5['Bill I'].unique()

array(['ZRCR', 'ZFBO', 'ZFBR', 'HHT', nan, 'ZFRO', 'ZPLS', 'ZCOR', 'CRM',
       'ZTRD', 'ZTRR', 'ZUBR', 'ZPLR', 'ZCOS', 'ZFBF', 'ZFRE', 'CRMW',
       'ZUBS', 'ZRCS', 'ZLC', 'B2B', 'ZMTC'], dtype=object)

### Generate output files

In [63]:
# Generate CSV for the entire SAP data set
df5.to_csv('output/15Dec_D365_orders.csv', index=False)

In [64]:
# Now drop lines with negative order quantities, except where ActivityType = 5TRADERETURNS
#mask = ((df5['Cases'] > 0) | (df5['ActivityType'] == '5TRADERETURNS'))
#df5 = df5[mask]

In [65]:
rec_count = pd.DataFrame(columns=['Activity type','Hour','Sales orders', 'Order lines'])

In [66]:
# Generate CSVs per activity type, and for the 13th and 20th hours
# 1ORDERCREATION
mask = ((df5['ActivityType'] == '1ORDERCREATION') &  (df5['BaseHour'] == 12))
peak_order_hour = df5[mask]
peak_order_hour.to_csv('output/15Dec_D365_orders_1ORDERCREATION_12h.csv',index=False)

mask = ((df5['ActivityType'] == '1ORDERCREATION') &  (df5['BaseHour'] == 19))
peak_settlement_hour = df5[mask]
peak_settlement_hour.to_csv('output/15Dec_D365_orders_1ORDERCREATION_19h.csv',index=False)

# 2PLAN
mask = ((df5['ActivityType'] == '2PLAN') &  (df5['BaseHour'] == 12))
df5_1 = df5[mask].copy()
df5_1.drop(columns={'CIC Order Placement Rule'}, inplace=True, axis=1)
df5_1.to_csv('output/15Dec_D365_orders_2PLAN_12h.csv',index=False)
new_row = {'Activity type': '2PLAN', 'Hour': '12', 'Sales orders': len(df5_1['Order Number'].unique()), 'Order lines': len(df5_1)}
rec_count = pd.concat([rec_count, pd.DataFrame([new_row])])


mask = ((df5['ActivityType'] == '2PLAN') &  (df5['BaseHour'] == 19))
df5_1 = df5[mask].copy()
df5_1.drop(columns={'CIC Order Placement Rule'}, inplace=True, axis=1)
df5_1.to_csv('output/15Dec_D365_orders_2PLAN_19h.csv',index=False)
new_row = {'Activity type': '2PLAN', 'Hour': '19', 'Sales orders': len(df5_1['Order Number'].unique()), 'Order lines': len(df5_1)}
rec_count = pd.concat([rec_count, pd.DataFrame([new_row])])


# 3DESPATCH
mask = ((df5['ActivityType'] == '3DESPATCH') &  (df5['BaseHour'] == 12))
df5_1 = df5[mask].copy()
df5_1.drop(columns={'CIC Order Placement Rule'}, inplace=True, axis=1)
df5_1.to_csv('output/15Dec_D365_orders_3DESPATCH_12h.csv',index=False)
new_row = {'Activity type': '3DESPATCH', 'Hour': '12', 'Sales orders': len(df5_1['Order Number'].unique()), 'Order lines': len(df5_1)}
rec_count = pd.concat([rec_count, pd.DataFrame([new_row])])


mask = ((df5['ActivityType'] == '3DESPATCH') &  (df5['BaseHour'] == 19))
df5_1 = df5[mask].copy()
df5_1.drop(columns={'CIC Order Placement Rule'}, inplace=True, axis=1)
df5_1.to_csv('output/15Dec_D365_orders_3DESPATCH_19h.csv',index=False)
new_row = {'Activity type': '3DESPATCH', 'Hour': '19', 'Sales orders': len(df5_1['Order Number'].unique()), 'Order lines': len(df5_1)}
rec_count = pd.concat([rec_count, pd.DataFrame([new_row])])


# 4SETTLE
mask = ((df5['ActivityType'] == '4SETTLE') &  (df5['BaseHour'] == 12))
df5_1 = df5[mask].copy()
df5_1.drop(columns={'CIC Order Placement Rule'}, inplace=True, axis=1)
df5_1.to_csv('output/15Dec_D365_orders_4SETTLE_12h.csv',index=False)
new_row = {'Activity type': '4SETTLE', 'Hour': '12', 'Sales orders': len(df5_1['Order Number'].unique()), 'Order lines': len(df5_1)}
rec_count = pd.concat([rec_count, pd.DataFrame([new_row])])


mask = ((df5['ActivityType'] == '4SETTLE') &  (df5['BaseHour'] == 19))
df5_1 = df5[mask].copy()
df5_1.drop(columns={'CIC Order Placement Rule'}, inplace=True, axis=1)
df5_1.to_csv('output/15Dec_D365_orders_4SETTLE_19h.csv',index=False)
new_row = {'Activity type': '4SETTLE', 'Hour': '19', 'Sales orders': len(df5_1['Order Number'].unique()), 'Order lines': len(df5_1)}
rec_count = pd.concat([rec_count, pd.DataFrame([new_row])])


# 5TRADERETURNS
mask = ((df5['ActivityType'] == '5TRADERETURNS') &  (df5['BaseHour'] == 12))
df5_1 = df5[mask].copy()
df5_1.drop(columns={'CIC Order Placement Rule'}, inplace=True, axis=1)
df5_1.to_csv('output/15Dec_D365_orders_5TRADERETURNS_12h.csv',index=False)



mask = ((df5['ActivityType'] == '5TRADERETURNS') &  (df5['BaseHour'] == 19))
df5_1 = df5[mask].copy()
df5_1.drop(columns={'CIC Order Placement Rule'}, inplace=True, axis=1)
df5_1.to_csv('output/15Dec_D365_orders_5TRADERETURNS_19h.csv',index=False)



In [67]:
x = peak_order_hour.groupby(['Source Channel']).agg({'Order Number': 'nunique','D365_ItemNo': 'count'}).reset_index()
x.rename(columns={'D365_ItemNo': 'Order lines', 'Order Number': 'Sales orders'}, inplace=True)

In [68]:
# Generate a CSV for the rest of the hours, that is, excluding the 13th and 20th hours.  This set can be used to do preparation testing.
mask = ((df5['BaseHour'] != 12) & (df5['BaseHour'] != 19))
df5_1 = df5[mask]
df5_1.to_csv('output/15Dec_D365_orders_excluding_12h_19h.csv', index=False)

In [69]:
y = peak_settlement_hour.groupby(['Source Channel']).agg({'Order Number': 'nunique','D365_ItemNo': 'count'}).reset_index()
y.rename(columns={'D365_ItemNo': 'Order lines', 'Order Number': 'Sales orders'}, inplace=True)

In [70]:
rec_count = rec_count.reset_index(drop=True)

In [71]:
print('Sales order volumes for peak order hour (12h00 to 13h00):')
print(x)
print('\n\nSales order volumes for peak settlement hour (19h00 to 20h00):')
print(y)
print('\n\nSales order volumes data staging in F&O:')
print(rec_count)

Sales order volumes for peak order hour (12h00 to 13h00):
  Source Channel  Sales orders  Order lines
0            B2B           505        19601
1            SFA          1268        20008
2       Voice in           215         3423


Sales order volumes for peak settlement hour (19h00 to 20h00):
  Source Channel  Sales orders  Order lines
0            B2B             2           42
1            SFA            72          926
2       Voice in             9          190


Sales order volumes data staging in F&O:
  Activity type Hour Sales orders Order lines
0         2PLAN   12          462        3507
1         2PLAN   19          566        5080
2     3DESPATCH   12          447        5116
3     3DESPATCH   19          396        4585
4       4SETTLE   12          355        3781
5       4SETTLE   19         3508       36687


In [72]:
#From above, generate a file that contains just one record per customer, so that we can use this to verify that each customer master record works
df5_1.drop_duplicates(subset=['D365_Cust_No'],keep='first',inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df5_1.drop_duplicates(subset=['D365_Cust_No'],keep='first',inplace=True)


In [73]:
df5_1.to_csv('output/15Dec_D365_single_line_per_customer_excluding_12h_19h.csv', index=False)

# Create file for stock journals

### Create a journal line item per item per warehouse, with a replenishment_qty that is 100x the sum of order quantities (Cases) per line

In [74]:
# Create this dataframe before dropping columns not needed for order creation
stock_journal = df5[['D365_ItemNo', 'D365_Del_Loc','Cases']]
stock_journal = stock_journal.groupby(['D365_Del_Loc', 'D365_ItemNo'],as_index=False).sum('Cases')

In [75]:
stock_journal['replenishment_qty'] = stock_journal['Cases']*100

### Merge with warehouse dataframe to get Financial Dimensions per warehouse

In [76]:
stock_journal['INVENTORYSTATUSID'] = 'Available'

In [77]:
stock_journal1 = pd.merge(
    stock_journal,
    warehouses,
    how="inner",
    on=None,
    left_on='D365_Del_Loc',
    right_on='D365_Del_Loc',
    left_index=False,
    right_index=False,
    sort=True,
    suffixes=("_x", "_y"),
    copy=True,
    indicator=False,
    validate=None,
)

In [78]:
stock_journal1.rename(columns={'Financial_Dimension': 'DEFAULTLEDGERDIMENSIONDISPLAYVALUE'}, inplace=True)

In [79]:
stock_journal1.drop(columns={'Cases', 'D365_WH_NAME', 'SAP_WH_NAME','Cost_Centre'}, inplace=True, axis=1)

In [80]:
stock_journal1 = stock_journal1.sort_values(['D365_WH_NO'],ascending=True)

In [81]:
# Add a sequential index for LineNumber
line_number = range(1,stock_journal1.last_valid_index()+2,1)
stock_journal1['LINENUMBER']=line_number

### Generate a journal number per warehouse

In [82]:
journal_number = pd.DataFrame(stock_journal1['D365_WH_NO'].unique())

In [83]:
journal_number.rename(columns={0: 'D365_WH_NO'}, inplace=True)

In [84]:
journal_number = journal_number.sort_values(['D365_WH_NO'],ascending=True)

In [85]:
first_valid_jnumber = input('Enter first valid journal number (numbers only)')
#first_valid_jnumber = '76517'
first_valid_jnumber = int(first_valid_jnumber)
#ZA10700076636 - 24 Jan 2023

In [86]:
index = range(first_valid_jnumber,first_valid_jnumber+journal_number.last_valid_index()+1,1)
journal_number['JOURNALNUMBER']=index

In [87]:
journal_number['JOURNALNUMBER'] = journal_number['JOURNALNUMBER'].astype(str)
journal_number['JOURNALNUMBER'] = 'ZA107000' +  + journal_number['JOURNALNUMBER']

In [88]:
stock_journal1 = pd.merge(
   stock_journal1,
   journal_number,
   how="inner",
   on=None,
   left_on='D365_WH_NO',
   right_on='D365_WH_NO',
   left_index=False,
   right_index=False,
   sort=True,
   suffixes=("_x", "_y"),
   copy=True,
   indicator=False,
   validate=None,
)

In [89]:
stock_journal1['ITEMBATCHNUMBER']='1'
stock_journal1['JOURNALNAMEID']='ADJ_WHS'
stock_journal1['TRANSACTIONDATE']= pd.to_datetime('today').date()

In [90]:
stock_journal1.to_csv('output/stock_journal.csv',index=False)