In [690]:
import json
import numpy as np
import pandas as pd
import lxml
import re
import os
from datetime import datetime

# Import XML outbound files from Roadnet

In [691]:
# Set filenames
consolidated_roadnet_out_file = 'data/roadnet/xml_consolidated/consolidated_roadnet_out.xml'
roadnet_inbound_file = 'data/roadnet/inbound/rdnet_in.csv'

In [692]:
try:
    os.remove(consolidated_roadnet_out_file)
except:
    print(consolidated_roadnet_out_file + ' not in folder')

try:
    os.remove(roadnet_inbound_file)
except:
    print(roadnet_inbound_file + ' not in folder')    

In [693]:
# Clean up the xml_prep folder
path = "data/roadnet/xml_prep/"
dir_list = os.listdir(path)

for i in range(0,len(dir_list)):
    xml_file = path+dir_list[i]
    os.remove(xml_file)

In [694]:
# Get the list of all files and directories
path = "data/roadnet/xml_outbound/"
dir_list = os.listdir(path)

print("Files and directories in '", path, "' :")

# prints all files
print(dir_list)

Files and directories in ' data/roadnet/xml_outbound/ ' :
['Roadnet session composite entity (1).xml', 'Roadnet session composite entity (2).xml', 'Roadnet session composite entity.xml']


In [695]:
# Convert D365 XML output from a single line (with no newlines) to multiple lines by inserting a newline after evert '>' character
def xml_to_multiple_lines(fname, fnum):

    with open(fname, 'r') as fr:
        # reading line by line
        lines = fr.readlines()
        last_line = len(lines)
        print(last_line)

    for line in lines:
        replaced_line = re.sub(">", ">\u000A", line)

    outfile = 'data/roadnet/xml_prep/f'+str(fnum)+'.xml'

    with open(outfile, 'w') as fw:
        fw.write(replaced_line)    

    return

In [696]:
# Convert xml from D365 by adding newlines

for i in range(0,len(dir_list)):
    xml_file = path+dir_list[i]
    print(xml_file)
    xml_to_multiple_lines(xml_file,i)

number_of_xml = len(dir_list)

data/roadnet/xml_outbound/Roadnet session composite entity (1).xml
1
data/roadnet/xml_outbound/Roadnet session composite entity (2).xml
1
data/roadnet/xml_outbound/Roadnet session composite entity.xml
1


In [697]:
# Get the list of all files and directories
path = "data/roadnet/xml_prep/"
prep_dir_list = os.listdir(path)

In [698]:
def import_roadnet_files(fname, fnum, outfile):

    if fnum == 0:
        print('Import first file')
        try:    
            with open(fname, 'r') as fr:
                # reading line by line
                lines = fr.readlines()

                last_line = len(lines)

                # pointer for position
                ptr = 1

                # opening in writing mode
                with open(outfile, 'a') as fw:
                    for line in lines:             
                        if ptr != 3 and ptr != last_line-1 and ptr != last_line and len(line) > 1:
                            fw.write(line)
                            #print(line)
                        ptr += 1
            print(fname+" lines deleted")

        except:
            print("Error importing "+fname)


    # For all files except the first and last
    # Last file: delete lines 1,2,3 and the last and 2nd last line in the file
    if len(prep_dir_list) > 2 and fnum < len(prep_dir_list)-1 and fnum > 0:
        try:    
            with open(fname, 'r') as fr:
                # reading line by line
                lines = fr.readlines()

                last_line = len(lines)

                # pointer for position
                ptr = 1

                # opening in writing mode
                with open(outfile, 'a') as fw:
                    for line in lines:             
                        if ptr != 3 and ptr != 1 and ptr != 2 and ptr != last_line-1 and ptr != last_line and len(line) > 1:
                            fw.write(line)
                            #print(line)
                        ptr += 1
            print(fname+" lines deleted")

        except:
            print("Error importing "+fname)        


    # Last file: delete lines 1,2,3 and the 2nd last line in the file
    # if ptr != 3 and ptr != 1 and ptr != 2 and ptr != last_line-1 and len(line) > 1:
    if fnum == len(prep_dir_list)-1 and fnum > 0:
        print('Import last file')
        try:    
            with open(fname, 'r') as fr:
                # reading line by line
                lines = fr.readlines()

                last_line = len(lines)

                # pointer for position
                ptr = 1

                # opening in writing mode
                with open(outfile, 'a') as fw:
                    for line in lines:             
                        if ptr != 3 and ptr != 1 and ptr != 2 and ptr != last_line-1:
                            fw.write(line)
                            #print(line)
                        ptr += 1
            print(fname+" lines deleted")

        except:
            print("Error importing "+fname)



In [699]:
# Consolidate the xml files

for i in range(0,len(prep_dir_list)):
    xml_file = path+prep_dir_list[i]
    #print(i)
    print(xml_file)
    import_roadnet_files(xml_file,i, consolidated_roadnet_out_file)

data/roadnet/xml_prep/f2.xml
Import first file
data/roadnet/xml_prep/f2.xml lines deleted
data/roadnet/xml_prep/f1.xml
data/roadnet/xml_prep/f1.xml lines deleted
data/roadnet/xml_prep/f0.xml
Import last file
data/roadnet/xml_prep/f0.xml lines deleted


In [700]:
rdnet_out = pd.read_xml(consolidated_roadnet_out_file)

In [701]:
rdnet_out

Unnamed: 0,INVENTTRANSID,BLOCKEDSTATUS,CITY,CREDITRELEASEDATE,CREDITRELEASETIME,DAILYURGENCYINCREASE,DEPENDENCY,DESCRIPTION,DYNAMICSINTERNALSESSIONID,ERROR,...,REFTABLEID,ROADNETROUTE,SALESMANAGER,SHIPDATE,URGENCY,VOLUME,WAREHOUSEDESC,WAREHOUSEID,WEIGHT,WEIGHTUNIT
0,ZA1-007165390,0,Bloemfontein,1900-01-01T00:00:00+00:00,0,0,0,BISMILLAH WHOLESALER,ZA1-000001379,,...,1328,ZA531B01,,2023-01-10T00:00:00+00:00,0,332605000.0,LDP UPINGTON WH,ZA534B,180.348,CS
1,ZA1-007165387,0,Bloemfontein,1900-01-01T00:00:00+00:00,0,0,0,MANHUBE TUMELO PROJECTS,ZA1-000001377,,...,1328,ZA531B01,,2023-01-10T00:00:00+00:00,0,142545000.0,LDP MAFIKENG WH,ZA537B,77.292,CS
2,ZA1-007148741,0,Bloemfontein,1900-01-01T00:00:00+00:00,0,0,0,2 FAT COWS,ZA1-000001378,,...,1328,ZA531B01,,2023-01-10T00:00:00+00:00,0,150000000.0,LDP BLOEMFONTEIN WH,ZA531B,80.25,CS


In [702]:
#rdnet_out.info()

In [703]:
rdnet_in = rdnet_out[['QUANTITY','LOCATIONID','INVENTTRANSID','ITEMID','ORDERID','WAREHOUSEID','PRODUCTNAME','ROADNETROUTE','ORDERACCOUNT','ORDERACCOUNTNAME','WEIGHT']]

In [704]:
rdnet_in.rename(columns={'QUANTITY':'CASEQTY','LOCATIONID':'DESTINATIONLOCATIONID','ORDERID':'ORDERNUMBER','WAREHOUSEID':'ORIGINLOCATIONID','ORDERACCOUNT':'STOPLOCATIONID','ORDERACCOUNTNAME':'STOPLOCATIONNAME'}, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  rdnet_in.rename(columns={'QUANTITY':'CASEQTY','LOCATIONID':'DESTINATIONLOCATIONID','ORDERID':'ORDERNUMBER','WAREHOUSEID':'ORIGINLOCATIONID','ORDERACCOUNT':'STOPLOCATIONID','ORDERACCOUNTNAME':'STOPLOCATIONNAME'}, inplace=True)


In [705]:
rdnet_in['DYNAMICSRETRIEVALSESSIONID'] = 'ZA1-000000661'

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  rdnet_in['DYNAMICSRETRIEVALSESSIONID'] = 'ZA1-000000661'


In [706]:
today = str(datetime.now())
today = today.replace(':','h')
today = today.replace('-','')
today = today.replace(' ','-')
today = today[0:14] + '-'
print("Today date is: ", today)

Today date is:  20230110-17h47-


In [707]:
rdnet_in['ROADNETROUTEINTERNALROUTEID'] = today + rdnet_in['STOPLOCATIONID'].astype(str)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  rdnet_in['ROADNETROUTEINTERNALROUTEID'] = today + rdnet_in['STOPLOCATIONID'].astype(str)


In [708]:
no_of_customers = len(rdnet_in['STOPLOCATIONID'].unique())

In [709]:
'''
route_id_df = pd.DataFrame(rdnet_in['STOPLOCATIONID'].unique())
index = range(1,no_of_customers+1,1)
route_id_df['ROADNETROUTEINTERNALROUTEID']=index
route_id_df['ROADNETROUTEINTERNALROUTEID']=route_id_df['ROADNETROUTEINTERNALROUTEID'].astype(str)
'''

"\nroute_id_df = pd.DataFrame(rdnet_in['STOPLOCATIONID'].unique())\nindex = range(1,no_of_customers+1,1)\nroute_id_df['ROADNETROUTEINTERNALROUTEID']=index\nroute_id_df['ROADNETROUTEINTERNALROUTEID']=route_id_df['ROADNETROUTEINTERNALROUTEID'].astype(str)\n"

In [710]:
'''
route_id_df['ROADNETROUTEINTERNALROUTEID'] = route_id_df['ROADNETROUTEINTERNALROUTEID'].str.zfill(4)
route_id_df['ROADNETROUTEINTERNALROUTEID'] = today + route_id_df['ROADNETROUTEINTERNALROUTEID']
'''

"\nroute_id_df['ROADNETROUTEINTERNALROUTEID'] = route_id_df['ROADNETROUTEINTERNALROUTEID'].str.zfill(4)\nroute_id_df['ROADNETROUTEINTERNALROUTEID'] = today + route_id_df['ROADNETROUTEINTERNALROUTEID']\n"

In [711]:
rdnet_in['APPTID'] = ''
rdnet_in['DESCRIPTION'] = 'BLOEM_PLAN'
rdnet_in['ERROR'] = ''
rdnet_in['FIRSTDRIVER'] = '825196'
rdnet_in['FIRSTTRAILER'] = 'ST29PTAIL'
rdnet_in['LASTSTOPISDESTINATION'] = 'No'
rdnet_in['LOADID'] = ''
rdnet_in['LOADTEMPLATEID'] = ''
rdnet_in['ORDERTYPE'] = 'rotOrder'
rdnet_in['ORIGINDESTINATION'] = 'Yes'
rdnet_in['PALLETQTY'] = '0'
rdnet_in['REFERENCECATEGORY'] = 'Sales'
rdnet_in['REFERENCEDOCUMENT'] = 'SalesOrder'
rdnet_in['ROADNETINTERNALSESSIONID'] = '35411'
rdnet_in['ROADNETREGIONID'] = 'ZA1'
rdnet_in['ROUTECODE'] = ''
rdnet_in['SECONDDRIVER'] = ''
rdnet_in['SECONDTRAILER'] = ''
rdnet_in['SEQUENCEDISTANCE'] = '.000000'
rdnet_in['SEQUENCENUMBER'] = '1'
rdnet_in['SEQUENCETRAVELTIME'] = '0'
rdnet_in['SHIPPINGCARRIER'] = '0'
rdnet_in['STATUS'] = 'Error'
rdnet_in['STOPTYPE'] = 'stpStop'
rdnet_in['TOTALDISTANCE'] = '.000000'
rdnet_in['TOTALROUTEDISTANCE'] = '.000000'
rdnet_in['TRUCKANDTRAILERASSIGNED'] = 'No'
rdnet_in['UNITID'] = ''
rdnet_in['VEHICLEID'] = 'TT4X2TAIL'
rdnet_in['STOPSERVICETIME'] = '720'
rdnet_in['TOTALSERVICETIME'] = '720'
rdnet_in['TOTALTRAVELTIME'] = '0'
rdnet_in['LINEREFID'] = rdnet_in['INVENTTRANSID']


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  rdnet_in['APPTID'] = ''
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  rdnet_in['DESCRIPTION'] = 'BLOEM_PLAN'
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  rdnet_in['ERROR'] = ''
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value inst

In [712]:
#rdnet_in.info()

In [713]:
rdnet_in['ROUTECOMPLETETIME'] = pd.to_datetime('today')
rdnet_in['ROUTECOMPLETETIME'] = rdnet_in['ROUTECOMPLETETIME'].dt.normalize() + pd.Timedelta(days=1) + pd.Timedelta(hours=13) + pd.Timedelta(minutes=19)

rdnet_in['ROUTESTARTTIME'] = pd.to_datetime('today')
rdnet_in['ROUTESTARTTIME'] = rdnet_in['ROUTESTARTTIME'].dt.normalize() + pd.Timedelta(days=1) + pd.Timedelta(hours=4) + pd.Timedelta(minutes=0)

rdnet_in['SCHEDULEDARRIVALDATETIME'] = pd.to_datetime('today')
rdnet_in['SCHEDULEDARRIVALDATETIME'] = rdnet_in['SCHEDULEDARRIVALDATETIME'].dt.normalize() + pd.Timedelta(days=1) + pd.Timedelta(hours=12) + pd.Timedelta(minutes=59)

rdnet_in['SCHEDULEDDELIVERYDATETIME'] = pd.to_datetime('today')
rdnet_in['SCHEDULEDDELIVERYDATETIME'] = rdnet_in['SCHEDULEDDELIVERYDATETIME'].dt.normalize() + pd.Timedelta(days=1)

rdnet_in['SCHEDULEDSHIPDATETIME'] = pd.to_datetime('today')
rdnet_in['SCHEDULEDSHIPDATETIME'] = rdnet_in['SCHEDULEDSHIPDATETIME'].dt.normalize() + pd.Timedelta(days=1) + pd.Timedelta(hours=4) + pd.Timedelta(minutes=10)

rdnet_in['STOPARRIVALTIME'] = pd.to_datetime('today')
rdnet_in['STOPARRIVALTIME'] = rdnet_in['STOPARRIVALTIME'].dt.normalize() + pd.Timedelta(days=1) + pd.Timedelta(hours=8) + pd.Timedelta(minutes=28)

### Get customer master in order to get the postal code

In [714]:
customers=pd.read_csv('data/customer_master.csv')
customers_short = customers[['ADDRESSZIPCODE','CUSTOMERACCOUNT','ORGANIZATIONNAME']].copy()
customers_short['ADDRESSZIPCODE'] = customers_short['ADDRESSZIPCODE'].fillna(0)
customers_short['ADDRESSZIPCODE'] = customers_short['ADDRESSZIPCODE'].astype(int)
customers_short['ADDRESSZIPCODE'] = customers_short['ADDRESSZIPCODE'].astype(str)

  customers=pd.read_csv('data/customer_master.csv')


In [715]:
rdnet_in = pd.merge(
    rdnet_in,
    customers_short,
    how="inner",
    on=None,
    left_on='STOPLOCATIONID',
    right_on='CUSTOMERACCOUNT',
    left_index=False,
    right_index=False,
    sort=True,
    suffixes=("_x", "_y"),
    copy=True,
    indicator=False,
    validate=None,
)

In [716]:
rdnet_in.rename(columns={'ADDRESSZIPCODE':'STOPPOSTALCODE'}, inplace=True)
rdnet_in.drop(columns={'CUSTOMERACCOUNT', 'ORGANIZATIONNAME'}, inplace=True, axis=1)

In [717]:
rdnet_in.to_csv(roadnet_inbound_file,index=False)