In [1]:
import numpy as np 
np.random.seed(100)
import matplotlib.pyplot as plt 
%matplotlib inline
import seaborn as sns
import pandas as pd 

import JargoInstanceHelper as jih 


# Creating instances with higher capacities

In [5]:
instance_path="Manhattan/mny-1-5000.instance"
customers, vehicles = jih.loadInstance(instance_path,debug=True)

DEBUG: loading instance from Manhattan/mny-1-5000.instance
DEBUG: loaded instance data with 10033 entities
DEBUG: instance contains 5033 customers
DEBUG: instance contains 5000 vehicles


In [7]:
vehicles['quantity']=6
jih.saveInstance('Manhattan/mny-6-5000.instance',customers,vehicles)

In [9]:
vehicles['quantity']=10
jih.saveInstance('Manhattan/mny-7-5000.instance',customers,vehicles)

# Creating instances with carpooling vehicles

We will create a couple of instances with type 3 vehicles by taking one of the test instances from Manhattan and modifying the destination of the vehicles (which is 0 for all of them by default)

In [6]:
instance_path="Manhattan/mny-1-5000.instance"
customers, vehicles = jih.loadInstance(instance_path,debug=True)

DEBUG: loading instance from Manhattan/mny-1-5000.instance
DEBUG: loaded instance data with 10033 entities
DEBUG: instance contains 5033 customers
DEBUG: instance contains 5000 vehicles


In [7]:
to_modify=vehicles.sample(frac=0.5)
#we kwow that there are 12,320 vertices in Manhattan
to_modify['destination']=np.random.randint(1, 12320, to_modify.shape[0])
vehicles.update(to_modify)
#"update" automatically conversts all numeric values to doubles
vehicles=vehicles.astype(int)
print("there are %d vehicles with the same origin and destination"%len(vehicles[vehicles['origin']==vehicles['destination']]))
vehicles.describe()

there are 1 vehicles with the same origin and destination


Unnamed: 0,id,origin,destination,quantity,early
count,5000.0,5000.0,5000.0,5000.0,5000.0
mean,2500.5,5951.4542,3074.97,3.0,0.0
std,1443.520003,3540.830722,3976.650628,0.0,0.0
min,1.0,2.0,0.0,3.0,0.0
25%,1250.75,2914.0,0.0,3.0,0.0
50%,2500.5,5717.0,0.5,3.0,0.0
75%,3750.25,9073.25,6060.25,3.0,0.0
max,5000.0,12309.0,12313.0,3.0,0.0


In [8]:
#setting the late time to 2000 arbitrarily
vehicles['late']=2000
customers['late']=2000
vehicles

Unnamed: 0,id,origin,destination,quantity,early,late
0,1,811,5939,3,0,2000
1,2,7144,0,3,0,2000
2,3,10068,0,3,0,2000
3,4,9694,1201,3,0,2000
4,5,5999,0,3,0,2000
...,...,...,...,...,...,...
4995,4996,5934,0,3,0,2000
4996,4997,5308,6512,3,0,2000
4997,4998,4971,11464,3,0,2000
4998,4999,840,0,3,0,2000


In [9]:
jih.saveInstance('Manhattan/mny-8-5000.instance',customers,vehicles)

# Creating a shorter instance to run quick tests

In [3]:
instance_path="Manhattan/mny-1-5000.instance"
customers, vehicles = jih.loadInstance(instance_path,debug=True)

DEBUG: loading instance from Manhattan/mny-1-5000.instance
DEBUG: loaded instance data with 10033 entities
DEBUG: instance contains 5033 customers
DEBUG: instance contains 5000 vehicles


In [4]:
customers.describe()

Unnamed: 0,id,origin,destination,quantity,early
count,5033.0,5033.0,5033.0,5033.0,5033.0
mean,7517.0,5874.238228,5970.859527,1.0,928.105702
std,1453.046283,3451.784306,3515.563056,0.0,516.79884
min,5001.0,3.0,1.0,1.0,1.0
25%,6259.0,2912.0,3009.0,1.0,480.0
50%,7517.0,5713.0,5762.0,1.0,943.0
75%,8775.0,8835.0,9097.0,1.0,1374.0
max,10033.0,12309.0,12310.0,1.0,1800.0


In [6]:
vehicles.describe()

Unnamed: 0,id,origin,destination,quantity,early
count,5000.0,5000.0,5000.0,5000.0,5000.0
mean,2500.5,5951.4542,0.0,3.0,0.0
std,1443.520003,3540.830722,0.0,0.0,0.0
min,1.0,2.0,0.0,3.0,0.0
25%,1250.75,2914.0,0.0,3.0,0.0
50%,2500.5,5717.0,0.0,3.0,0.0
75%,3750.25,9073.25,0.0,3.0,0.0
max,5000.0,12309.0,0.0,3.0,0.0


In [7]:
#creating a shorter simulation by removing all customers which aarrive on the network after 5 minutes
len(customers[customers['early']<=(5*60)])

761

In [8]:
customers=customers[customers['early']<=(5*60)]
jih.saveInstance('Manhattan/mny-9-5000.instance',customers,vehicles)

Unnamed: 0,id,origin,destination,quantity,early
5000,5001,5346,10858,1,1
5001,5002,8191,10836,1,1
5002,5003,7698,12154,1,2
5003,5004,6262,3572,1,3
5004,5005,1186,9750,1,3
...,...,...,...,...,...
5756,5757,1131,9292,1,298
5757,5758,4214,4328,1,299
5758,5759,3654,4790,1,299
5759,5760,6974,3010,1,300


# Creating Jargo-compatible instances to reproduce Simonetto 2019 results

In [2]:
df = pd.read_csv('./Simonetto/filtered_nodeid/may5_hour1.csv')
df

Unnamed: 0,rid,pickup_datetime,passenger_count,pickup_node,dropoff_node
0,28715,2013-05-05 01:00:00,1,12096,8197
1,28716,2013-05-05 01:00:00,1,2426,3086
2,28717,2013-05-05 01:00:00,2,1961,3400
3,28718,2013-05-05 01:00:00,1,3771,3072
4,28719,2013-05-05 01:00:00,6,4511,1727
...,...,...,...,...,...
20969,53883,2013-05-05 01:59:58,1,5654,8308
20970,53884,2013-05-05 01:59:59,1,7774,9264
20971,53885,2013-05-05 01:59:59,2,1747,1185
20972,53887,2013-05-05 01:59:59,3,2815,2831


In [2]:
# a function which takes a .csv (or more than 1) and returns a jargo .instance
def csv_to_instance(in_path_list):
    df_list=[]
    for in_path in in_path_list:
        df_list.append(pd.read_csv(in_path))
    
    df = pd.concat(df_list)
    df = df.sort_values('rid')

    df['pickup_datetime']=pd.to_datetime(df['pickup_datetime'])
    start_time = min(df['pickup_datetime'])
    print("instance requests start at",start_time)

    df['arrival_time'] = df['pickup_datetime'] - start_time
    df['arrival_time'] = df['arrival_time'].dt.seconds

    #dropping and reordering some columns to get a dataframe in the Jargo instance format
    df = df[['rid','pickup_node','dropoff_node','passenger_count','arrival_time']]
    df.columns = ['id','origin','destination','quantity','early']
    print("total number of requests:",len(df))
    return df

csv_to_instance(['./Simonetto/filtered_nodeid/may5_hour1.csv',
            './Simonetto/filtered_nodeid/may5_hour2.csv',
            './Simonetto/filtered_nodeid/may5_hour3.csv'])

instance requests start at 2013-05-05 01:00:00
total number of requests: 50290


Unnamed: 0,id,origin,destination,quantity,early
0,28715,12096,8197,1,0
1,28716,2426,3086,1,0
2,28717,1961,3400,2,0
3,28718,3771,3072,1,0
4,28719,4511,1727,6,0
...,...,...,...,...,...
12241,89684,1852,5635,1,10798
12242,89685,11091,10741,1,10799
12243,89686,10198,1781,1,10799
12244,89687,3284,184,1,10799


In [3]:
#we will use the randomly distributed vehicle locations of the other Manhattan instances
_, vehicles = jih.loadInstance("Manhattan/mny-1-5000.instance",debug=True)
vehicles

DEBUG: loading instance from Manhattan/mny-1-5000.instance
DEBUG: loaded instance data with 10033 entities
DEBUG: instance contains 5033 customers
DEBUG: instance contains 5000 vehicles


Unnamed: 0,id,origin,destination,quantity,early
0,1,811,0,3,0
1,2,7144,0,3,0
2,3,10068,0,3,0
3,4,9694,0,3,0
4,5,5999,0,3,0
...,...,...,...,...,...
4995,4996,5934,0,3,0
4996,4997,5308,0,3,0
4997,4998,4971,0,3,0
4998,4999,840,0,3,0


In [4]:
#the test instance will contain the first 3 hours of requests
test_filenames = ["./Simonetto/filtered_nodeid/may5_hour%d.csv"%hour for hour in [0,1,2]]
customers = csv_to_instance(test_filenames)

instance requests start at 2013-05-05 00:00:00
total number of requests: 62928


In [5]:
customers['quantity']=1
customers

Unnamed: 0,id,origin,destination,quantity,early
0,0,11664,3778,1,0
1,1,10677,643,1,0
2,2,9905,5238,1,0
3,4,11870,2019,1,0
4,5,5673,7263,1,0
...,...,...,...,...,...
17065,74598,2423,7901,1,10797
17066,74599,5062,3866,1,10798
17067,74600,10983,9698,1,10799
17068,74601,10352,6035,1,10799


In [6]:
jih.saveInstance('./Simonetto/sim-test.instance',customers,vehicles)

In [7]:
customers[customers['early']<10]

Unnamed: 0,id,origin,destination,quantity,early
0,0,11664,3778,1,0
1,1,10677,643,1,0
2,2,9905,5238,1,0
3,4,11870,2019,1,0
4,5,5673,7263,1,0
...,...,...,...,...,...
252,281,6365,1799,1,8
253,282,29,5496,1,8
254,284,3201,4031,1,9
255,286,8931,6605,1,9


In [14]:
#we will use the randomly distributed vehicle locations of the other Manhattan instances
print("Getting vehicle locations")
_, temp_vehicles = jih.loadInstance("Manhattan/mny-1-5000.instance",debug=True)
hour_configs=[ range(0,6),
                range(6,12),
                range(12,18),
                range(18,24) ]

for day in [5,6,7,8,9,10,11]:
    for config in hour_configs:
        instance_filename = "./Simonetto/sim-%dmay-hr%d_hr%d-sample.instance"%(day,min(config),max(config))
        print("\n\n\n~~~~~~~~~~~~ creating %s ~~~~~~~~~~~~"%instance_filename)

        test_filenames = ["./Simonetto/filtered_nodeid/may%d_hour%d.csv"%(day,hour) for hour in config]
        print("including %d files"%len(test_filenames))
        temp_customers = csv_to_instance(test_filenames)
        temp_customers['quantity']=1
        temp_customers = temp_customers[temp_customers['early']<=600]
        print("number of chosen requests:",len(temp_customers))

        jih.saveInstance(instance_filename,temp_customers,temp_vehicles)


Getting vehicle locations
DEBUG: loading instance from Manhattan/mny-1-5000.instance
DEBUG: loaded instance data with 10033 entities
DEBUG: instance contains 5033 customers
DEBUG: instance contains 5000 vehicles



~~~~~~~~~~~~ creating ./Simonetto/sim-5may-hr0_hr5-sample.instance ~~~~~~~~~~~~
including 6 files
instance requests start at 2013-05-05 00:00:00
total number of requests: 85158
number of chosen requests: 4627



~~~~~~~~~~~~ creating ./Simonetto/sim-5may-hr6_hr11-sample.instance ~~~~~~~~~~~~
including 6 files
instance requests start at 2013-05-05 06:00:00
total number of requests: 61047
number of chosen requests: 420



~~~~~~~~~~~~ creating ./Simonetto/sim-5may-hr12_hr17-sample.instance ~~~~~~~~~~~~
including 6 files
instance requests start at 2013-05-05 12:00:00
total number of requests: 119830
number of chosen requests: 3482



~~~~~~~~~~~~ creating ./Simonetto/sim-5may-hr18_hr23-sample.instance ~~~~~~~~~~~~
including 6 files
instance requests start at 2013-05-05 18:00:00