In [1]:
import os
import pandas as pd
import numpy as np
import csv
import datetime as dt
import math

In [2]:
os.chdir("..")

In [3]:
from baseline import disk, optimizer, config, postprocessing

In [None]:
data_type = "NYC" #of "Chattanooga"
sample_per = 1
time_limit = 4


In [4]:
# read results

dirpath = os.path.join(os.getcwd(), "data_{}".format(data_type), "baseline_output")
routes_in = disk.read_vehicle_runs(dirpath)
routes_in.head(2)

Unnamed: 0,early_time,late_time,order,vehicle_id,request_id,location,location_type,node_id,date,perc,num_vehicles,time_limit
0,0,0,0,0,-1,0,depot_start,2527,2021-02-18,100,5,4
1,18900,19313,1,0,4108,10,pickup,226,2021-02-18,100,5,4


In [5]:
map_file_path = os.path.join(os.getcwd(), "data", "data_{}".format(data_type), "map", "times.csv")
time_matrix = disk.read_time_matrix(map_file_path)

if data_type == "Chattanooga":
    request_file_path = os.path.join(os.getcwd(), "data", "data_{}".format(data_type), "requests", "requests.csv")
elif data_type == "NYC":
    request_file_path = os.path.join(os.getcwd(), "data", "data_{}".format(data_type), "requests", "requests_{}.csv".format(sample_per))
else:
    raise ValueError("The dataset is not available")
    
requests_in = disk.read_requests(request_file_path, time_matrix=time_matrix)
requests_in['perc'] = sample_per


In [6]:
def get_window(location_type, source_window_start, source_window_end, target_window_start, target_window_end):
    if (location_type == 'pickup') or (location_type == 'depot_start'):
        return [source_window_start, source_window_end]
    else:
        return [target_window_start, target_window_end]
    
    
def get_arrival_depart(last_node_id, next_node_id, cur_time, next_start_window, time_matrix):
    earliest_arrive_time = cur_time + time_matrix[last_node_id][next_node_id]
    if earliest_arrive_time < next_start_window:
        arrive_time = next_start_window
    else:
        arrive_time = earliest_arrive_time
    depart_time = arrive_time + config.DWELL_TIME
    cur_time = depart_time
    return arrive_time, depart_time, cur_time


def add_arrival_depart_to_df(temp):
    arrive_times = [np.nan]
    depart_times = [np.nan]
    cur_time = 0
    for i in range(1, len(temp)-1):
        last_row = temp.iloc[i-1]
        next_row = temp.iloc[i]
        arrive_time, depart_time, cur_time = get_arrival_depart(last_row['node_id'], next_row['node_id'], cur_time, next_row['start_window'], time_matrix)
        arrive_times.append(arrive_time)
        depart_times.append(depart_time)
    arrive_times.append(np.nan)
    depart_times.append(np.nan)
    temp['arrive_time'] = arrive_times
    temp['depart_time'] = depart_times
    return temp
    
    
def postprocess_route(routes_in, requests_in, time_matrix, date, perc, num_vehicles, time_limit):
    # filter routes
    routes = routes_in[(routes_in['date']==date) & (routes_in['perc']==perc) & (routes_in['time_limit']==time_limit) & (routes_in['num_vehicles']==num_vehicles)]
    routes = routes.sort_values(by=['order'])
    routes = routes[['order', 'vehicle_id', 'request_id', 'location_type', 'node_id']]
    
    # filter requests
    requests = requests_in[(requests_in['date']==date) & (requests_in['perc']==perc)]
    requests = requests[['request_id', 'source', 'target', 'source_window_start', 'source_window_end', 'target_window_start', 'target_window_end']]
    
    df = routes.merge(requests, left_on='request_id', right_on='request_id', how='left')
    df['window'] = df.apply(lambda row: get_window(row['location_type'], row['source_window_start'], row['source_window_end'], row['target_window_start'], row['target_window_end']), axis=1)
    df['start_window'] = df['window'].apply(lambda x: x[0])
    df['end_window'] = df['window'].apply(lambda x: x[1])
    result = []
    for vehicle_id in sorted(df['vehicle_id'].unique()):
        temp = df[df['vehicle_id']==vehicle_id].sort_values(by=['order'])
        temp = add_arrival_depart_to_df(temp)
        result.append(temp)
    result = pd.concat(result, ignore_index=True)
    result = result[['order', 'vehicle_id', 'request_id', 'location_type', 'node_id', 'start_window', 'end_window', 'arrive_time', 'depart_time']]
    result['validate'] = result.apply(lambda row: validate_loc(row['start_window'], row['end_window'], row['arrive_time'], row['depart_time']), axis=1)
    result['time_diff'] = result.apply(lambda row: row['arrive_time'] - row['start_window'], axis=1)
    result['perc'] = perc
    result['date'] = date
    result['num_vehicles'] = num_vehicles
    result['time_limit'] = time_limit
    if len(result) != len(result[result['validate']]):
        print(f"There is an invalid location for date={date}, perc={perc}, num_vehicles={num_vehicles}, and time_limit={time_limit}")
    return result


def get_metrics(routes_in, requests_in, time_matrix, perc, time_limit):
    vehicles = sorted(routes_in[(routes_in['perc']==perc) & (routes_in['time_limit']==time_limit)]['num_vehicles'].unique())
    dates = sorted(routes_in['date'].unique())
    proc_r = []
    result = {}
    for num_vehicles in vehicles:
        sr = []
        awt = []
        adt = []
        for date in dates:
            df = postprocess_route(routes_in, requests_in, time_matrix, date, perc, num_vehicles, time_limit)

            proc_r.append(df)
            sr.append(extract_service_rate(df, requests_in, perc, date, num_vehicles, time_limit))
            awt.append(extract_awt(df))
            adt.append(extract_adt(df))
        result[f"veh{num_vehicles}ortools_Service Rate"] = sr
        result[f"veh{num_vehicles}ortools_Avg Waiting"] = awt
        result[f"veh{num_vehicles}ortools_Avg Delay"] = adt
    return pd.DataFrame(result), pd.concat(proc_r, ignore_index=True)


def extract_awt(df):
    temp = df[(df['location_type']=='pickup') & (df['time_diff']>0)]
    if temp is None:
        return float("nan")
    return float("nan") if len(temp) == 0 else math.ceil(temp['time_diff'].mean() / len(temp))


def extract_adt(df):
    temp = df[df['location_type']=='dropoff']
    if temp is None:
        return float("nan")
    return float("nan") if len(temp) == 0 else math.ceil(temp['time_diff'].mean() / len(temp))


def extract_adt(df):
    result = []
    for k, v in df.iterrows():
        if v['location_type'] == 'dropoff':
            result.append(v['arrive_time'] - v['start_window'])
    if result is None:
        return float("nan")
    return float("nan") if len(result) == 0 else math.ceil(sum(result) / len(result))

            
def extract_service_rate(routes_in, requests_in, perc, date, num_vehicles, time_limit):
    num_requests_served = len(routes_in[(routes_in['date']==date) & (routes_in['perc']==perc) & (routes_in['time_limit']==time_limit) & (routes_in['num_vehicles']==num_vehicles) & (routes_in['request_id']!=-1)]['request_id'].unique())
    total_num_requests = len(requests_in[(requests_in['date']==date) & (requests_in['perc']==perc)]['request_id'].unique())
    result = (num_requests_served / total_num_requests) * 100
    return math.floor(result)


def validate_loc(start_window, end_window, arrive_time, depart_time):
    if arrive_time < start_window:
        return False
    elif arrive_time > end_window:
        return False
    else:
        return True

In [9]:

r, proc_r = get_metrics(routes_in, requests_in, time_matrix, perc, time_limit)
result = proc_r.drop(columns=['validate', 'time_diff', 'perc'])
print(len(result), len(routes_in[routes_in['perc']==perc]))
file_path = os.path.join(os.getcwd(), "data", "processed_routes_output", f"routes_{perc}.csv")
result.to_csv(file_path, index=False)

dirpath = os.path.join(os.getcwd(), "data", "processed_baseline_output")
file_path = os.path.join(dirpath, "Service Rate.csv")
cols = list(filter(lambda x: "Service Rate" in x, list(r.columns)))
df = r[cols]
for col in cols:
    new_col_name = col.split("_")[0]
    df[new_col_name] = df[col]
df = df.drop(columns=cols)
df.to_csv(file_path, index=False)

file_path = os.path.join(dirpath, "Avg Waiting.csv")
cols = list(filter(lambda x: "Avg Waiting" in x, list(r.columns)))
df = r[cols]
for col in cols:
    new_col_name = col.split("_")[0]
    df[new_col_name] = df[col]
df = df.drop(columns=cols)
df.to_csv(file_path, index=False)

file_path = os.path.join(dirpath, "Avg Delay.csv")
cols = list(filter(lambda x: "Avg Delay" in x, list(r.columns)))
df = r[cols]
for col in cols:
    new_col_name = col.split("_")[0]
    df[new_col_name] = df[col]
df = df.drop(columns=cols)
df.to_csv(file_path, index=False)

file_path = os.path.join(dirpath, "Compute Time.csv")
data = {"veh3ortools": [time_limit/10 for x in range(len(r))], "veh4ortools": [time_limit/10 for x in range(len(r))], "veh5ortools": [time_limit/10 for x in range(len(r))], "veh6ortools": [time_limit/10 for x in range(len(r))]}
df = pd.DataFrame(data)
df.to_csv(file_path, index=False)

r, proc_r = get_metrics(routes_in, requests_in, time_matrix, perc, time_limit)
r

38686 135516


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[new_col_name] = df[col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[new_col_name] = df[col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[new_col_name] = df[col]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead



Unnamed: 0,veh3ortools_Service Rate,veh3ortools_Avg Waiting,veh3ortools_Avg Delay,veh4ortools_Service Rate,veh4ortools_Avg Waiting,veh4ortools_Avg Delay,veh5ortools_Service Rate,veh5ortools_Avg Waiting,veh5ortools_Avg Delay,veh6ortools_Service Rate,veh6ortools_Avg Waiting,veh6ortools_Avg Delay,veh7ortools_Service Rate,veh7ortools_Avg Waiting,veh7ortools_Avg Delay
0,80,26,962,93,27,1009,98,21,951,100,25,983,100,22,936
1,51,15,1006,63,14,1095,72,10,1071,81,11,1054,89,8,1078
2,56,16,1057,68,12,1026,78,11,1005,88,10,1069,94,10,1000
3,55,12,1072,66,12,1091,78,8,1031,85,8,1047,91,8,1043
4,64,13,1065,76,11,1059,86,10,1005,91,11,1031,96,8,1033
5,53,14,1023,66,10,1029,77,9,1044,84,9,1080,93,8,1031
6,49,14,1018,62,9,1052,70,9,1050,77,9,1032,83,10,1046
7,55,12,1041,67,10,1055,76,9,961,84,10,955,89,8,962
8,65,15,886,80,15,1009,91,15,1084,95,13,972,100,13,1050
9,52,13,1001,64,11,1008,71,10,996,82,9,1109,91,9,1081
