In [187]:
import json
import numpy as np
import pandas as pd
import lxml
import re
import os
from datetime import datetime

### Clean up temporary folders

In [188]:
# Set filenames
consolidated_roadnet_out_file = 'data/roadnet/xml_consolidated/consolidated_roadnet_out.xml'
roadnet_inbound_file = 'data/roadnet/inbound/rdnet_in.csv'

In [191]:
try:
    os.remove(consolidated_roadnet_out_file)
except:
    print(consolidated_roadnet_out_file + ' not in folder')

try:
    os.remove(roadnet_inbound_file)
except:
    print(roadnet_inbound_file + ' not in folder')    

data/roadnet/xml_consolidated/consolidated_roadnet_out.xml not in folder
data/roadnet/inbound/rdnet_in.csv not in folder


In [194]:
# Clean up the xml_prep folder
path = "data/roadnet/xml_prep/"
dir_list = os.listdir(path)

for i in range(0,len(dir_list)):
    xml_file = path+dir_list[i]
    os.remove(xml_file)

# Clean up the xml_consolidated folder
path = "data/roadnet/xml_consolidated/"
dir_list = os.listdir(path)

for i in range(0,len(dir_list)):
    xml_file = path+dir_list[i]
    os.remove(xml_file)    

### Convert D365 XML outbound files from a single line (with no newlines) to multiple lines by inserting a newline after evert '>' character
- Store the resulting files in the /xml_prep/ folder

In [195]:
# Get the list of all files and directories
path = "data/roadnet/xml_outbound/"
dir_list = os.listdir(path)

print("Files and directories in '", path, "' :")

# prints all files
print(dir_list)

Files and directories in ' data/roadnet/xml_outbound/ ' :
['INT_LEW002_SendDataToRoadnet_EXP-Roadnet session composite entity. (10).xml', 'INT_LEW002_SendDataToRoadnet_EXP-Roadnet session composite entity. (12).xml', 'INT_LEW002_SendDataToRoadnet_EXP-Roadnet session composite entity. (11).xml']


In [196]:
def xml_to_multiple_lines(fname, fnum):

    with open(fname, 'r') as fr:
        # reading line by line
        lines = fr.readlines()
        last_line = len(lines)
        print(last_line)

    for line in lines:
        replaced_line = re.sub(">", ">\u000A", line)

    outfile = 'data/roadnet/xml_prep/f'+str(fnum)+'.xml'

    with open(outfile, 'w') as fw:
        fw.write(replaced_line)    

    return

In [197]:
# Convert xml from D365 by adding newlines

for i in range(0,len(dir_list)):
    xml_file = path+dir_list[i]
    print(xml_file)
    xml_to_multiple_lines(xml_file,i)

number_of_xml = len(dir_list)

data/roadnet/xml_outbound/INT_LEW002_SendDataToRoadnet_EXP-Roadnet session composite entity. (10).xml
1
data/roadnet/xml_outbound/INT_LEW002_SendDataToRoadnet_EXP-Roadnet session composite entity. (12).xml
1
data/roadnet/xml_outbound/INT_LEW002_SendDataToRoadnet_EXP-Roadnet session composite entity. (11).xml
1


### Consolidate the processed XML outbound files into a single, consolidated xml file
- Remove all lines that are not transaction line items

In [198]:
# Get the list of all files and directories
path = "data/roadnet/xml_prep/"
prep_dir_list = os.listdir(path)

In [199]:
def import_roadnet_files2(fname, fnum, outfile):

    try:    
        with open(fname, 'r') as fr:
            # reading line by line
            lines = fr.readlines()

            last_line = len(lines)

            # opening in writing mode
            with open(outfile, 'a') as fw:
                for line in lines:      
                    substr1 = 'CCBROADNETWORKBENCHSESSIONTABLEENTITY'       
                    x1 = re.search(substr1, line)
                    substr2 = 'Document>'       
                    x2 = re.search(substr2, line)
                    substr3 = 'xml version='       
                    x3 = re.search(substr3, line)
                    #print(x)
                    if x1 == None and x2 == None and x3 == None:
                        fw.write(line)
        print(fname+" lines deleted")

    except:
        print("Error importing "+fname)

In [201]:
for i in range(0,len(prep_dir_list)):
    xml_file = path+prep_dir_list[i]
    #print(i)
    #print(xml_file)
    import_roadnet_files2(xml_file,i, consolidated_roadnet_out_file)

data/roadnet/xml_prep/f2.xml lines deleted
data/roadnet/xml_prep/f1.xml lines deleted
data/roadnet/xml_prep/f0.xml lines deleted


### Split up the consolidated xml file into 10,000 lines or fewer, else they cannot be imported into a Pandas dataframe
- Store these files in the same folder as the consolidated xml, and delete the consolidated xml after the split
- Add the lines to turn this into a valid XML format

In [166]:
fname = consolidated_roadnet_out_file
outfile = fname

try:    
    with open(fname, 'r') as fr:
        # reading line by line
        lines = fr.readlines()

        last_line = len(lines)

        line_counter = 1

        # opening in writing mode
        last_x = 0
        for i in range(0,len(lines)):
            line = lines[i]
            if i == last_line:
                print(line)
            x = int(i/10000)
            with open(outfile[:-4]+str(x)+'.xml', 'a') as fw:        
                if i == 0:
                    fw.write('<?xml version="1.0" encoding="utf-8"?>\n')
                    fw.write('<Document>\n')
                if x > last_x:
                    last_x = x
                    fw.write('<?xml version="1.0" encoding="utf-8"?>\n')
                    fw.write('<Document>\n')
                fw.write(line)
                if int((i+1)/10000) > x:
                    fw.write('</Document>')
                if i == len(lines) - 1:
                    fw.write('</Document>')
    os.remove(consolidated_roadnet_out_file)
                
except:
    print("Error importing "+fname)

### Import the transformed XML files into a Pandas dataframe

In [167]:
# Get the list of all files and directories
path = "data/roadnet/xml_consolidated/"
dir_list = os.listdir(path)

print("Files and directories in '", path, "' :")

# prints all files
print(dir_list)

Files and directories in ' data/roadnet/xml_consolidated/ ' :
['consolidated_roadnet_out0.xml']


In [168]:
# Import the first file into a dataframe
rdnet_out = pd.read_xml(path+dir_list[0])

In [169]:
# Import the rest of the files, and append to the dataframe
for i in range(1,len(dir_list)):
    xml_file = path+dir_list[i]
    print(str(i)+xml_file)
    temp = pd.read_xml(path+dir_list[i])
    rdnet_out = pd.concat([rdnet_out, temp], ignore_index=True)

### Create the Roadnet inbound file by copying selected columns as-is from the outbound data

In [171]:
rdnet_in = rdnet_out[['QUANTITY','LOCATIONID','INVENTTRANSID','ITEMID','ORDERID','WAREHOUSEID','PRODUCTNAME','ROADNETROUTE','ORDERACCOUNT','ORDERACCOUNTNAME','WEIGHT']]

In [172]:
rdnet_in.rename(columns={'QUANTITY':'CASEQTY','LOCATIONID':'DESTINATIONLOCATIONID','ORDERID':'ORDERNUMBER','WAREHOUSEID':'ORIGINLOCATIONID','ORDERACCOUNT':'STOPLOCATIONID','ORDERACCOUNTNAME':'STOPLOCATIONNAME'}, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  rdnet_in.rename(columns={'QUANTITY':'CASEQTY','LOCATIONID':'DESTINATIONLOCATIONID','ORDERID':'ORDERNUMBER','WAREHOUSEID':'ORIGINLOCATIONID','ORDERACCOUNT':'STOPLOCATIONID','ORDERACCOUNTNAME':'STOPLOCATIONNAME'}, inplace=True)


### Create the rest of the fields as per the Roadnet inbound file spec

In [173]:
rdnet_in['DYNAMICSRETRIEVALSESSIONID'] = 'ZA1-000000661'

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  rdnet_in['DYNAMICSRETRIEVALSESSIONID'] = 'ZA1-000000661'


In [174]:
today = str(datetime.now())
today = today.replace(':','h')
today = today.replace('-','')
today = today.replace(' ','-')
today = today[0:14] + '-'
print("Today date is: ", today)

Today date is:  20230118-15h43-


In [175]:
rdnet_in['ROADNETROUTEINTERNALROUTEID'] = today + rdnet_in['STOPLOCATIONID'].astype(str)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  rdnet_in['ROADNETROUTEINTERNALROUTEID'] = today + rdnet_in['STOPLOCATIONID'].astype(str)


In [176]:
no_of_customers = len(rdnet_in['STOPLOCATIONID'].unique())

In [177]:
rdnet_in['APPTID'] = ''
rdnet_in['DESCRIPTION'] = 'BLOEM_PLAN'
rdnet_in['ERROR'] = ''
rdnet_in['FIRSTDRIVER'] = '825196'
rdnet_in['FIRSTTRAILER'] = 'ST29PTAIL'
rdnet_in['LASTSTOPISDESTINATION'] = 'No'
rdnet_in['LOADID'] = ''
rdnet_in['LOADTEMPLATEID'] = ''
rdnet_in['ORDERTYPE'] = 'rotOrder'
rdnet_in['ORIGINDESTINATION'] = 'Yes'
rdnet_in['PALLETQTY'] = '0'
rdnet_in['REFERENCECATEGORY'] = 'Sales'
rdnet_in['REFERENCEDOCUMENT'] = 'SalesOrder'
rdnet_in['ROADNETINTERNALSESSIONID'] = '35411'
rdnet_in['ROADNETREGIONID'] = 'ZA1'
rdnet_in['ROUTECODE'] = ''
rdnet_in['SECONDDRIVER'] = ''
rdnet_in['SECONDTRAILER'] = ''
rdnet_in['SEQUENCEDISTANCE'] = '.000000'
rdnet_in['SEQUENCENUMBER'] = '1'
rdnet_in['SEQUENCETRAVELTIME'] = '0'
rdnet_in['SHIPPINGCARRIER'] = '0'
rdnet_in['STATUS'] = 'Error'
rdnet_in['STOPTYPE'] = 'stpStop'
rdnet_in['TOTALDISTANCE'] = '.000000'
rdnet_in['TOTALROUTEDISTANCE'] = '.000000'
rdnet_in['TRUCKANDTRAILERASSIGNED'] = 'No'
rdnet_in['UNITID'] = ''
rdnet_in['VEHICLEID'] = 'TT4X2TAIL'
rdnet_in['STOPSERVICETIME'] = '720'
rdnet_in['TOTALSERVICETIME'] = '720'
rdnet_in['TOTALTRAVELTIME'] = '0'
rdnet_in['LINEREFID'] = rdnet_in['INVENTTRANSID']

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  rdnet_in['APPTID'] = ''
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  rdnet_in['DESCRIPTION'] = 'BLOEM_PLAN'
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  rdnet_in['ERROR'] = ''
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value inst

In [179]:
rdnet_in['ROUTECOMPLETETIME'] = pd.to_datetime('today')
rdnet_in['ROUTECOMPLETETIME'] = rdnet_in['ROUTECOMPLETETIME'].dt.normalize() + pd.Timedelta(days=1) + pd.Timedelta(hours=13) + pd.Timedelta(minutes=19)

rdnet_in['ROUTESTARTTIME'] = pd.to_datetime('today')
rdnet_in['ROUTESTARTTIME'] = rdnet_in['ROUTESTARTTIME'].dt.normalize() + pd.Timedelta(days=1) + pd.Timedelta(hours=4) + pd.Timedelta(minutes=0)

rdnet_in['SCHEDULEDARRIVALDATETIME'] = pd.to_datetime('today')
rdnet_in['SCHEDULEDARRIVALDATETIME'] = rdnet_in['SCHEDULEDARRIVALDATETIME'].dt.normalize() + pd.Timedelta(days=1) + pd.Timedelta(hours=12) + pd.Timedelta(minutes=59)

rdnet_in['SCHEDULEDDELIVERYDATETIME'] = pd.to_datetime('today')
rdnet_in['SCHEDULEDDELIVERYDATETIME'] = rdnet_in['SCHEDULEDDELIVERYDATETIME'].dt.normalize() + pd.Timedelta(days=1)

rdnet_in['SCHEDULEDSHIPDATETIME'] = pd.to_datetime('today')
rdnet_in['SCHEDULEDSHIPDATETIME'] = rdnet_in['SCHEDULEDSHIPDATETIME'].dt.normalize() + pd.Timedelta(days=1) + pd.Timedelta(hours=4) + pd.Timedelta(minutes=10)

rdnet_in['STOPARRIVALTIME'] = pd.to_datetime('today')
rdnet_in['STOPARRIVALTIME'] = rdnet_in['STOPARRIVALTIME'].dt.normalize() + pd.Timedelta(days=1) + pd.Timedelta(hours=8) + pd.Timedelta(minutes=28)

### Get customer master in order to get the postal code

In [180]:
customers=pd.read_csv('data/customer_master.csv')
customers_short = customers[['ADDRESSZIPCODE','CUSTOMERACCOUNT','ORGANIZATIONNAME']].copy()
customers_short['ADDRESSZIPCODE'] = customers_short['ADDRESSZIPCODE'].fillna(0)
customers_short['ADDRESSZIPCODE'] = customers_short['ADDRESSZIPCODE'].astype(int)
customers_short['ADDRESSZIPCODE'] = customers_short['ADDRESSZIPCODE'].astype(str)

  customers=pd.read_csv('data/customer_master.csv')


In [181]:
rdnet_in = pd.merge(
    rdnet_in,
    customers_short,
    how="inner",
    on=None,
    left_on='STOPLOCATIONID',
    right_on='CUSTOMERACCOUNT',
    left_index=False,
    right_index=False,
    sort=True,
    suffixes=("_x", "_y"),
    copy=True,
    indicator=False,
    validate=None,
)

In [182]:
rdnet_in.rename(columns={'ADDRESSZIPCODE':'STOPPOSTALCODE'}, inplace=True)
rdnet_in.drop(columns={'CUSTOMERACCOUNT', 'ORGANIZATIONNAME'}, inplace=True, axis=1)

### Generate the CSV file

In [183]:
rdnet_in.to_csv(roadnet_inbound_file,index=False)