In [1]:
import json
import os
import pandas as pd
from tqdm import tqdm
requests_dir = '../data/input/requests'
response_dir = '../data/input/responses'


import sys
sys.path.append('../utils')
import helpers as h

## Combining all routes in a single df.
We'll output a single dataframe that has all route requests read and parsed into a single df.

In [2]:
def read_responses(path):
    handles = {}
    for r,d,f in tqdm(list(os.walk(path))):
        for file in f:
            fq_path = os.path.join(r,file)
            fn = file.split('.')[0].strip()
            #just a check to assure uniqueness; hard error if it doesnt
            if fn in handles:
                raise Exception("well butter my butt and call me a biscuit. This shouldn't happen. ")
            with open(fq_path, 'r') as handle:
                content = [i.strip() for i in handle.readlines()]
                handles[fn] = content
    return handles


order_of_stops = read_responses(response_dir)

100%|██████████| 3726/3726 [00:19<00:00, 187.39it/s]


In [3]:
order_of_stops

{'0521_300-20220617-055733-2-0': ['394', '395'],
 '0521_300-20220617-085002-2-0': ['384', '385'],
 '0521_300-20220617-092416-2-0': ['388', '390'],
 '0521_300-20220617-124013-2-0': ['72', '75'],
 '0521_300-20220617-124222-1-0': ['72'],
 '0521_300-20220617-125536-1-0': ['78'],
 '0521_300-20220617-135801-1-0': ['81'],
 '0521_301-20220530-064538-154-154': ['64869',
  '64947',
  '64971',
  '64878',
  '64926',
  '64933',
  '64856',
  '64891',
  '64866',
  '64872',
  '64870',
  '64957',
  '64900',
  '64871',
  '64985',
  '65039',
  '64976',
  '64864',
  '64927',
  '64973',
  '64974',
  '64964',
  '64854',
  '64978',
  '64998',
  '64984',
  '64959',
  '65023',
  '64910',
  '64862',
  '64875',
  '65009',
  '64860',
  '65019',
  '64924',
  '64865',
  '65032',
  '64887',
  '64901',
  '64863',
  '64908',
  '64932',
  '64939',
  '64859',
  '64956',
  '65034',
  '64979',
  '64890',
  '64857',
  '64995',
  '64917',
  '64982',
  '64877',
  '64965',
  '64873',
  '65000',
  '64954',
  '65011',
  '65006'

In [4]:

def json_request_to_df(path, ordered_list): 
    """
        variation on Tim's function 'read_request', we'll not let a funciton decide
        what index file (last integer iin the filename) to read. in stead we do this
        in the df itself applying filters where needed. Gives us more control in case
        a json response for a hardcoded idx in read_requst is invalid. 

        CHOICES MADE: stop order is determined by JSON order!!!!!
        //TODO ==> Should be written in docfile.

        ARGUMENTS: 
            path = str = Fully qualified path to a json file

        RETURNS: 
            pandas dataframe
    """
    #WARNING do not convert order_list to int: there's a task with id == E1
    with open(path, 'r') as f:
        request = json.load(f)
    rows = list()
    #meta: 
    file_uuid = request['id']
    configurationName = request['configurationName']
    task_number = 1
    for task in request['tasks']:
        # TODO: Dit kan efficienter met een specifieke methode (pd.explode, json to dataframe zaken)
        # if task['id'] == 'E1':
        #     continue ## TODO: Also retain this task!
        row = { 
                'file_uuid' : file_uuid,
                'configurationName' : configurationName,
                'id' : task['id'].strip(),                          ###DO NOT cast to int (E1 is also a valid id)
                'lat' : task['address']['latitude'],
                'long' : task['address']['longitude'],
                'start_time' : task['timeWindow']['from'],
                'end_time' : task['timeWindow']['till'], 
                'server_task_number': task_number
                }
        task_number += 1
        rows.append(row)
    internal_df = pd.DataFrame(rows)
    #now sort the internal_df according to the order in ordered_list
    internal_df = internal_df.sort_values(by='id', key=lambda x: x.map({v: i for i, v in enumerate(ordered_list)}))
    #STOP ORDER IS HOW YOU SHOULD CONNECT ROUTE POINTS!!!! 
    internal_df["stop_order"] = range(1, len(internal_df) + 1)
    return internal_df

#reading requests: 
internal_id = 1     #using internal_id we can quikly comminicate with each other what precise file we want to look at for debugging/app-feature design. 
all_content = []    # the final dataframe (build once all data is read)
for dir in tqdm(os.listdir(requests_dir)):
    if dir.endswith(".txt"):
        continue
    route_id = dir.split('-')[0]
    route_date = h.routedatestring_to_date(dir.split('-')[1])
    contents = os.listdir(os.path.join(requests_dir, dir))
    for file in contents:
        if file.endswith('.json'): 
            fn = file.rstrip('.json').split('-')
            idblock_1 = fn[2]
            idblock_2 = fn[3]
            idblock_3 = fn[4]
            fq_path = os.path.join(requests_dir, dir, file)
            fn = file.split('.')[0].strip()
            # print(fn)
            ordered_stops = order_of_stops[fn]
            # print(ordered_stops)
            # break
            content_of_file = json_request_to_df(fq_path, ordered_stops)
            #enabling us to identify rows belong to a specific file on the drive. 
            content_of_file['dir'] = dir
            content_of_file['route_id'] = route_id
            content_of_file['route_date'] = route_date
            content_of_file['idblock_1'] = idblock_1
            content_of_file['idblock_2'] = idblock_2
            content_of_file['idblock_3'] = idblock_3
            content_of_file['internal_id'] = internal_id
            internal_id+=1
            all_content.append(content_of_file)




100%|██████████| 3726/3726 [01:50<00:00, 33.74it/s]


In [5]:
df_requests = pd.concat(all_content)

In [6]:
df_requests

Unnamed: 0,file_uuid,configurationName,id,lat,long,start_time,end_time,server_task_number,stop_order,dir,route_id,route_date,idblock_1,idblock_2,idblock_3,internal_id
1,41931cd2-8975-4a64-9197-d16abe871bb7,CreateSequence,394,0.565826,0.221868,2022-06-17T07:30:00,2022-06-17T18:00:00,2,1,0521_300-20220617,0521_300,2022-06-17,055733,2,0,1
0,41931cd2-8975-4a64-9197-d16abe871bb7,CreateSequence,395,0.565826,0.221868,2022-06-17T08:00:00,2022-06-17T17:00:00,1,2,0521_300-20220617,0521_300,2022-06-17,055733,2,0,1
1,5737d4ef-23e9-4832-8bb5-24340a176e31,CreateSequence,384,0.565826,0.221868,2022-06-17T07:30:00,2022-06-17T23:59:00,2,1,0521_300-20220617,0521_300,2022-06-17,085002,2,0,2
0,5737d4ef-23e9-4832-8bb5-24340a176e31,CreateSequence,385,0.565826,0.221868,2022-06-17T08:00:00,2022-06-17T17:00:00,1,2,0521_300-20220617,0521_300,2022-06-17,085002,2,0,2
1,34f409b3-9f92-4720-9926-48ca1cbf6f90,CreateSequence,388,0.572716,0.233306,2022-06-17T07:30:00,2022-06-17T23:59:00,2,1,0521_300-20220617,0521_300,2022-06-17,092416,2,0,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
134,8a8e6d6b-045c-44bd-9085-cf2d5ec7a6d1,EstimateTime,63699,0.480072,0.301707,2022-06-22T07:30:00,2022-06-22T23:59:00,135,135,0521_O69-20220622,0521_O69,2022-06-22,083833,139,139,21706
135,8a8e6d6b-045c-44bd-9085-cf2d5ec7a6d1,EstimateTime,63698,0.479680,0.301925,2022-06-22T07:30:00,2022-06-22T23:59:00,136,136,0521_O69-20220622,0521_O69,2022-06-22,083833,139,139,21706
136,8a8e6d6b-045c-44bd-9085-cf2d5ec7a6d1,EstimateTime,63697,0.479792,0.300703,2022-06-22T07:30:00,2022-06-22T23:59:00,137,137,0521_O69-20220622,0521_O69,2022-06-22,083833,139,139,21706
137,8a8e6d6b-045c-44bd-9085-cf2d5ec7a6d1,EstimateTime,63696,0.481527,0.300490,2022-06-22T07:30:00,2022-06-22T23:59:00,138,138,0521_O69-20220622,0521_O69,2022-06-22,083833,139,139,21706


In [7]:
os.makedirs('../data/intermediate', exist_ok=True)
df_requests.to_csv('../data/intermediate/requests.csv', index=False)


## Reading excel data
UUID's can be matched to recover timestamp information - required for EDA phase and as a way to dedouble points. 

In [8]:
excel_df = pd.read_excel('../data/input/ModifiedQueryRows.xlsx')

In [9]:
excel_df.rename(columns={' Time': 'Time'}, inplace=True)

In [10]:
excel_df.to_csv('../data/intermediate/serverlog.csv')

## Connecting serverlog to JSON files: 
We need to connect the two logs to reliably determine: 
- the order of requests
- initial route suggestion vs route driven

In [11]:
#In the explanation of the data we were basically told to ignore all requests in the PM-range of the day. Do that here: 
server_df = excel_df[~excel_df['Time'].str.contains('PM')]
#with the UUID of the request we can no allign the request time for a route_id on a day. This allows us to
#recover the final driven route and have the scatterplot dedoubled 
server_df

Unnamed: 0,Date,Time,OptimizationRequestId,RouteId,TriggerType,ConfigurationName,NumberOfTasks,NumberOfTasksInInputPlan
20,5/30/2022,10:39:12.287 AM,094aeb3f-4e9f-48e4-a14c-97adce541cf0,0512_E46,TimeCalculation,EstimateTime,38,38
21,5/30/2022,10:39:44.836 AM,2509a3a1-5e40-4357-bb87-9d73426cf40c,0512_512,TimeCalculation,EstimateTime,58,58
22,5/30/2022,10:40:20.568 AM,5a07fa18-c340-4d61-963e-56f1812a6d60,0515_I06,FullOptimization,CreateSequence,14,0
23,5/30/2022,10:40:20.850 AM,ae3dd9c1-c7be-4536-9b59-688a5a9837dd,0521_698,TimeCalculation,EstimateTime,154,154
24,5/30/2022,10:40:29.748 AM,c35395ec-b54c-4dc9-a1f1-80a730c079f7,0521_860,TimeCalculation,EstimateTime,135,135
...,...,...,...,...,...,...,...,...
134299,6/9/2022,9:56:42.831 AM,1b106804-f0e8-4671-acc5-712a019fc422,0511_163,TimeCalculation,EstimateTime,63,63
134300,6/9/2022,9:57:05.806 AM,e623ae2e-854a-49a5-a861-ced0bed1fce3,0521_375,TimeCalculation,EstimateTime,72,72
134301,6/9/2022,9:57:47.814 AM,099898c7-cd77-4b93-bc2a-df5e74a29201,0511_157,FullOptimization,CreateSequence,110,0
134302,6/9/2022,9:58:47.785 AM,276c2223-c067-46b0-8a8d-17f8d7b6abf4,0521_887,TimeCalculation,EstimateTime,157,157


In [12]:
#remove rows where RouteId == shiftId
server_df = server_df.loc[server_df['RouteId'] != 'shiftId']

In [13]:
server_df

Unnamed: 0,Date,Time,OptimizationRequestId,RouteId,TriggerType,ConfigurationName,NumberOfTasks,NumberOfTasksInInputPlan
20,5/30/2022,10:39:12.287 AM,094aeb3f-4e9f-48e4-a14c-97adce541cf0,0512_E46,TimeCalculation,EstimateTime,38,38
21,5/30/2022,10:39:44.836 AM,2509a3a1-5e40-4357-bb87-9d73426cf40c,0512_512,TimeCalculation,EstimateTime,58,58
22,5/30/2022,10:40:20.568 AM,5a07fa18-c340-4d61-963e-56f1812a6d60,0515_I06,FullOptimization,CreateSequence,14,0
23,5/30/2022,10:40:20.850 AM,ae3dd9c1-c7be-4536-9b59-688a5a9837dd,0521_698,TimeCalculation,EstimateTime,154,154
24,5/30/2022,10:40:29.748 AM,c35395ec-b54c-4dc9-a1f1-80a730c079f7,0521_860,TimeCalculation,EstimateTime,135,135
...,...,...,...,...,...,...,...,...
134299,6/9/2022,9:56:42.831 AM,1b106804-f0e8-4671-acc5-712a019fc422,0511_163,TimeCalculation,EstimateTime,63,63
134300,6/9/2022,9:57:05.806 AM,e623ae2e-854a-49a5-a861-ced0bed1fce3,0521_375,TimeCalculation,EstimateTime,72,72
134301,6/9/2022,9:57:47.814 AM,099898c7-cd77-4b93-bc2a-df5e74a29201,0511_157,FullOptimization,CreateSequence,110,0
134302,6/9/2022,9:58:47.785 AM,276c2223-c067-46b0-8a8d-17f8d7b6abf4,0521_887,TimeCalculation,EstimateTime,157,157


In [14]:
#Date AND Time Conversion
server_df.loc[:, 'Time'] = pd.to_datetime(server_df['Time'], format=' %I:%M:%S.%f %p') #weird space in string - hack
server_df.loc[:, 'Date'] = pd.to_datetime(server_df['Date'], format='%m/%d/%Y')

In [20]:
new_df = []
for _, df in tqdm(server_df.groupby(['Date', 'RouteId'])):
    df_int = df.sort_values(by='Time')
    id_range = range(1, len(df) + 1)
    is_max = [False] * (len(df) - 1) + [True]
    df_int['request_day_route_order'] = id_range
    df_int['request_day_route_is_last'] = is_max
    new_df.append(df_int)

100%|██████████| 19794/19794 [00:06<00:00, 2955.49it/s]


In [21]:

server_df = pd.concat(new_df)

In [23]:
server_df

Unnamed: 0,Date,Time,OptimizationRequestId,RouteId,TriggerType,ConfigurationName,NumberOfTasks,NumberOfTasksInInputPlan,request_day_route_order,request_day_route_is_last
1787,2022-05-30 00:00:00,1900-01-01 06:45:28.382000,ad7f8dd9-e28f-4e45-99ae-fb8d44a8e18a,0511_100,TimeCalculation,EstimateTime,95,95,1,False
1889,2022-05-30 00:00:00,1900-01-01 06:48:11.554000,18aeb04a-ba63-447a-9173-96e99bc53625,0511_100,TimeCalculation,EstimateTime,95,95,2,True
4583,2022-05-30 00:00:00,1900-01-01 07:40:34.573000,97f69458-3e01-49aa-a3e7-7056cbe62dbc,0511_102,FullOptimization,CreateSequence,138,0,1,False
4631,2022-05-30 00:00:00,1900-01-01 07:42:31.132000,9e0528d7-beaa-4768-b49e-22412cd61bb2,0511_102,FullOptimization,CreateSequence,51,0,2,False
4952,2022-05-30 00:00:00,1900-01-01 07:57:51.799000,ab5c343e-7e5a-4acc-b837-803e513c45a0,0511_102,TimeCalculation,EstimateTime,51,51,3,False
...,...,...,...,...,...,...,...,...,...,...
95073,2022-06-22 00:00:00,1900-01-01 06:07:10.009000,5ec9ae72-fd7e-44e0-b3c7-c6f27ce85d7b,0529_P81,FullOptimization,CreateSequence,139,0,1,False
95270,2022-06-22 00:00:00,1900-01-01 06:11:37.756000,36d2ba1e-8391-4e76-a766-517de0227104,0529_P81,TimeCalculation,EstimateTime,139,139,2,False
98340,2022-06-22 00:00:00,1900-01-01 07:10:28.409000,0173674b-8f48-43fe-9845-dbd1cc6b631f,0529_P81,AdditionalOrders,AddToSequence,140,134,3,False
99130,2022-06-22 00:00:00,1900-01-01 07:19:01.900000,d8d64bd6-3049-4955-a556-ea69e1b3edef,0529_P81,TimeCalculation,EstimateTime,140,140,4,False


In [24]:
server_df.isna().sum()#good

Date                         0
Time                         0
OptimizationRequestId        0
RouteId                      0
TriggerType                  0
ConfigurationName            0
NumberOfTasks                0
NumberOfTasksInInputPlan     0
request_day_route_order      0
request_day_route_is_last    0
dtype: int64

## Debugging

In [25]:
inspect_routedayid = '0521_621-20220613' #Bugreport id use as testcase
inspect_route_id = '0521_621'
inspect_day = '2022-06-13'

In [26]:

df_requests.query('dir==@inspect_routedayid')

Unnamed: 0,file_uuid,configurationName,id,lat,long,start_time,end_time,server_task_number,stop_order,dir,route_id,route_date,idblock_1,idblock_2,idblock_3,internal_id
133,8818dfc7-8557-41df-bf48-ad666877f41d,CreateSequence,153995,0.659712,0.239289,2022-06-13T07:30:00,2022-06-13T23:59:00,134,1,0521_621-20220613,0521_621,2022-06-13,050642,141,0,7051
107,8818dfc7-8557-41df-bf48-ad666877f41d,CreateSequence,154005,0.661009,0.240192,2022-06-13T07:30:00,2022-06-13T23:59:00,108,2,0521_621-20220613,0521_621,2022-06-13,050642,141,0,7051
108,8818dfc7-8557-41df-bf48-ad666877f41d,CreateSequence,153999,0.661192,0.240513,2022-06-13T07:30:00,2022-06-13T23:59:00,109,3,0521_621-20220613,0521_621,2022-06-13,050642,141,0,7051
65,8818dfc7-8557-41df-bf48-ad666877f41d,CreateSequence,154066,0.662071,0.238694,2022-06-13T07:30:00,2022-06-13T23:59:00,66,4,0521_621-20220613,0521_621,2022-06-13,050642,141,0,7051
116,8818dfc7-8557-41df-bf48-ad666877f41d,CreateSequence,154007,0.661853,0.239005,2022-06-13T07:30:00,2022-06-13T23:59:00,117,5,0521_621-20220613,0521_621,2022-06-13,050642,141,0,7051
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13,01a79726-9ab3-487d-901d-f61ad073e50f,CreateSequence,13,0.660629,0.241856,2022-06-13T07:30:00,2022-06-13T23:59:00,14,123,0521_621-20220613,0521_621,2022-06-13,191222,127,0,7064
11,01a79726-9ab3-487d-901d-f61ad073e50f,CreateSequence,11,0.660232,0.241899,2022-06-13T07:30:00,2022-06-13T23:59:00,12,124,0521_621-20220613,0521_621,2022-06-13,191222,127,0,7064
14,01a79726-9ab3-487d-901d-f61ad073e50f,CreateSequence,14,0.660511,0.241601,2022-06-13T07:30:00,2022-06-13T23:59:00,15,125,0521_621-20220613,0521_621,2022-06-13,191222,127,0,7064
15,01a79726-9ab3-487d-901d-f61ad073e50f,CreateSequence,15,0.660375,0.241284,2022-06-13T07:30:00,2022-06-13T23:59:00,16,126,0521_621-20220613,0521_621,2022-06-13,191222,127,0,7064


In [27]:
uuids_inspect_day = df_requests.query('dir==@inspect_routedayid')['file_uuid'].unique()
print(len(uuids_inspect_day), uuids_inspect_day)
#okay, so we have 14 UUID files for a given day in the requests log.

14 ['8818dfc7-8557-41df-bf48-ad666877f41d'
 'fc978408-4335-4359-8081-d32627905ef7'
 '52bca49b-62a8-4744-8f03-a1a370b85ad9'
 '19576071-7ff5-42c7-9feb-ebd3db2e77a4'
 '122c6d56-3d30-4713-b153-0d5b28c7dfab'
 '3df76292-dd3c-47e9-adec-785e19e567a5'
 '7de17674-3b05-4ed4-8f76-d85028c1c5bf'
 '9169f19e-55a1-4cc9-8ea5-a67ca421b620'
 '7662282e-1f45-4a20-bf2d-1b696590057e'
 'd93d3e74-a774-4902-b997-a2f05b81b098'
 '150f5c59-ff5c-4745-bf4b-3f30a1641624'
 'a7f5af76-6d44-4e79-a49a-df214da0ae3d'
 '10663f1f-6407-45a3-9e77-c186aa23b993'
 '01a79726-9ab3-487d-901d-f61ad073e50f']


In [None]:
inspect_daydt = pd.to_datetime(inspect_day)
server_df.query('RouteId==@inspect_route_id & Date==@inspect_daydt')
#Bug confirmed ==> request_day_route_is_last should only have ONE true 
#PATCH confirmed

Unnamed: 0,Date,Time,OptimizationRequestId,RouteId,TriggerType,ConfigurationName,NumberOfTasks,NumberOfTasksInInputPlan,request_day_route_order,request_day_route_is_last
31102,2022-06-13 00:00:00,1900-01-01 05:06:42.789000,8818dfc7-8557-41df-bf48-ad666877f41d,0521_621,FullOptimization,CreateSequence,141,0,1,False
31129,2022-06-13 00:00:00,1900-01-01 05:11:06.226000,fc978408-4335-4359-8081-d32627905ef7,0521_621,FullOptimization,CreateSequence,112,0,2,False
31142,2022-06-13 00:00:00,1900-01-01 05:13:06.553000,52bca49b-62a8-4744-8f03-a1a370b85ad9,0521_621,FullOptimization,CreateSequence,112,0,3,False
31147,2022-06-13 00:00:00,1900-01-01 05:13:39.131000,19576071-7ff5-42c7-9feb-ebd3db2e77a4,0521_621,FullOptimization,CreateSequence,112,0,4,False
31159,2022-06-13 00:00:00,1900-01-01 05:15:03.409000,122c6d56-3d30-4713-b153-0d5b28c7dfab,0521_621,FullOptimization,CreateSequence,111,0,5,False
31877,2022-06-13 00:00:00,1900-01-01 05:51:54.954000,3df76292-dd3c-47e9-adec-785e19e567a5,0521_621,FullOptimization,CreateSequence,129,0,6,False
32192,2022-06-13 00:00:00,1900-01-01 06:02:03.514000,7de17674-3b05-4ed4-8f76-d85028c1c5bf,0521_621,FullOptimization,CreateSequence,128,0,7,False
32646,2022-06-13 00:00:00,1900-01-01 06:13:31.304000,9169f19e-55a1-4cc9-8ea5-a67ca421b620,0521_621,TimeCalculation,EstimateTime,128,128,8,False
32709,2022-06-13 00:00:00,1900-01-01 06:14:59.437000,7662282e-1f45-4a20-bf2d-1b696590057e,0521_621,TimeCalculation,EstimateTime,126,126,9,False
32798,2022-06-13 00:00:00,1900-01-01 06:16:44.201000,d93d3e74-a774-4902-b997-a2f05b81b098,0521_621,TimeCalculation,EstimateTime,126,126,10,False


In [29]:
#apply the request_day_route_is_last to requests_df and save into a smaller df (optimized)
merged_df = pd.merge(df_requests, server_df, left_on='file_uuid', right_on='OptimizationRequestId', how='left')
request_df_driven = merged_df.query('request_day_route_is_last==True')

In [30]:
merged_df.isna().sum()#not good: but we know what this is; these are the PM requests. now we can drop them too from the initial file!

file_uuid                         0
configurationName                 0
id                                0
lat                               0
long                              0
start_time                        0
end_time                          0
server_task_number                0
stop_order                        0
dir                               0
route_id                          0
route_date                        0
idblock_1                         0
idblock_2                         0
idblock_3                         0
internal_id                       0
Date                         455839
Time                         455839
OptimizationRequestId        455839
RouteId                      455839
TriggerType                  455839
ConfigurationName            455839
NumberOfTasks                455839
NumberOfTasksInInputPlan     455839
request_day_route_order      455839
request_day_route_is_last    455839
dtype: int64

In [31]:
merged_df = merged_df.dropna()

In [None]:
## confirm if the patch for debugging works: 
merged_df.query('RouteId==@inspect_route_id & Date==@inspect_daydt')['request_day_route_order'].nunique()
#YES! 13 is the number to be expected, patch confirmed to be working. 


13

In [36]:
print("SHAPE: ", merged_df.shape)
print("NANS: \n", merged_df.isna().sum())
#good!

SHAPE:  (2120931, 26)
NANS: 
 file_uuid                    0
configurationName            0
id                           0
lat                          0
long                         0
start_time                   0
end_time                     0
server_task_number           0
stop_order                   0
dir                          0
route_id                     0
route_date                   0
idblock_1                    0
idblock_2                    0
idblock_3                    0
internal_id                  0
Date                         0
Time                         0
OptimizationRequestId        0
RouteId                      0
TriggerType                  0
ConfigurationName            0
NumberOfTasks                0
NumberOfTasksInInputPlan     0
request_day_route_order      0
request_day_route_is_last    0
dtype: int64


In [37]:
merged_df.to_csv("../data/intermediate/clean_data.csv", index=False)