### This script creates a cross-tab of time periods left and trip depart offset for trip scheduling prob table

In [1]:
import pandas as pd
import os
import numpy as np
import itertools

pd.options.display.max_columns = 100

In [2]:
### Output directory
output_dir = r"C:\Users\david.hensle\OneDrive - Resource Systems Group, Inc\Documents\projects\CMAP\trip_scheduling\probability_creation\output-5region"
data_dir = r"C:\Users\david.hensle\OneDrive - Resource Systems Group, Inc\Documents\projects\CMAP\trip_scheduling\probability_creation\survey_data"

In [3]:
### CMAP Data
cmap_spa_path = os.path.join(data_dir, "CMAP")

### Columns to read
tour_cols = ['HH_ID', 'PER_ID', 'TOUR_ID', 'TOURPURP', 'ANCHOR_DEPART_BIN', 'ANCHOR_ARRIVE_BIN', 'TOUR_DUR_BIN', 'IS_SUBTOUR', 
            'NUM_SUBTOURS', 'CHILD_TOUR_ID_1', 'CHILD_TOUR_ID_2', 'CHILD_TOUR_ID_3', 'OUTBOUND_STOPS', 'INBOUND_STOPS']
trip_cols = ['HH_ID', 'PER_ID', 'TOUR_ID', 'TRIP_ID', 'ORIG_PURP', 'DEST_PURP', 
             'ORIG_DEP_BIN', 'IS_INBOUND', 'DEST_IS_TOUR_ORIG']

### Read files
tours_cmap = pd.read_csv(os.path.join(cmap_spa_path, "SPA_Processed\\tours.csv"), usecols = tour_cols)
trips_cmap = pd.read_csv(os.path.join(cmap_spa_path, "SPA_Processed\\trips.csv"), usecols = trip_cols)
expf_cmap = pd.read_csv(os.path.join(cmap_spa_path, "SPA_Inputs\\PER_SPA_INPUT.csv"), usecols = ['PERNO', 'SAMPN', 'PEREXPFAC']).rename({'PEREXPFAC': 'final_weight'}, axis=1)

tours_cmap = pd.merge(
    tours_cmap,
    expf_cmap,
    how='left',
    left_on=['HH_ID', 'PER_ID'],
    right_on=['SAMPN', 'PERNO']
)

tours_cmap['region'] = 'cmap'
trips_cmap['region'] = 'cmap'
tours_cmap['day_no'] = 0
trips_cmap['day_no'] = 0

In [4]:
### MWCOG data
mwcog_spa_path = os.path.join(data_dir, "MWCOG")

### Columns to read
tour_cols = ['HH_ID', 'PER_ID', 'TOUR_ID', 'TOURPURP', 'ANCHOR_DEPART_BIN', 'ANCHOR_ARRIVE_BIN', 'TOUR_DUR_BIN', 'IS_SUBTOUR', 
            'NUM_SUBTOURS', 'CHILD_TOUR_ID_1', 'CHILD_TOUR_ID_2', 'CHILD_TOUR_ID_3', 'OUTBOUND_STOPS', 'INBOUND_STOPS']
trip_cols = ['HH_ID', 'PER_ID', 'TOUR_ID', 'TRIP_ID', 'ORIG_PURP', 'DEST_PURP', 
             'ORIG_DEP_BIN', 'IS_INBOUND', 'DEST_IS_TOUR_ORIG']

### Read files
tours_mwcog = pd.read_csv(os.path.join(mwcog_spa_path, "SPA_Processed\\trip_purp_with_distance\\tours.csv"), usecols = tour_cols)
trips_mwcog = pd.read_csv(os.path.join(mwcog_spa_path, "SPA_Processed\\trip_purp_with_distance\\trips.csv"), usecols = trip_cols)
expf_mwcog = pd.read_csv(os.path.join(mwcog_spa_path, "SPA_Inputs\\PER_SPA_INPUT.csv"), usecols = ['PERNO', 'SAMPN', 'PEREXPFAC']).rename({'PEREXPFAC': 'final_weight'}, axis=1)

tours_mwcog = pd.merge(
    tours_mwcog,
    expf_mwcog,
    how='left',
    left_on=['HH_ID', 'PER_ID'],
    right_on=['SAMPN', 'PERNO']
)

tours_mwcog['region'] = 'mwcog'
trips_mwcog['region'] = 'mwcog'
tours_mwcog['day_no'] = 0
trips_mwcog['day_no'] = 0

In [5]:
### SEMCOG data
semcog_spa_path = os.path.join(data_dir, "SEMCOG")

### Columns to read
tour_cols = ['HH_ID', 'PER_ID', 'TOUR_ID', 'TOURPURP', 'ANCHOR_DEPART_BIN', 'ANCHOR_ARRIVE_BIN', 'TOUR_DUR_BIN', 'IS_SUBTOUR', 
            'NUM_SUBTOURS', 'CHILD_TOUR_ID_1', 'CHILD_TOUR_ID_2', 'CHILD_TOUR_ID_3', 'OUTBOUND_STOPS', 'INBOUND_STOPS']
trip_cols = ['HH_ID', 'PER_ID', 'TOUR_ID', 'TRIP_ID', 'ORIG_PURP', 'DEST_PURP', 
             'ORIG_DEP_BIN', 'IS_INBOUND', 'DEST_IS_TOUR_ORIG']

### Read files
tours_semcog = pd.read_csv(os.path.join(semcog_spa_path, "SPA_Processed\\trip_purp_with_distance\\tours.csv"), usecols = tour_cols)
trips_semcog = pd.read_csv(os.path.join(semcog_spa_path, "SPA_Processed\\trip_purp_with_distance\\trips.csv"), usecols = trip_cols)
expf_semcog = pd.read_csv(os.path.join(semcog_spa_path, "SPA_Inputs\\PER_SPA_INPUT.csv"), usecols = ['PERNO', 'SAMPN', 'PER_WEIGHT']).rename({'PER_WEIGHT': 'final_weight'}, axis=1)

tours_semcog = pd.merge(
    tours_semcog,
    expf_semcog,
    how='left',
    left_on=['HH_ID', 'PER_ID'],
    right_on=['SAMPN', 'PERNO']
)

tours_semcog['region'] = 'semcog'
trips_semcog['region'] = 'semcog'
tours_semcog['day_no'] = 0
trips_semcog['day_no'] = 0

In [6]:
### SANDAG data
sandag_spa_path = os.path.join(data_dir, "SANDAG")

### Columns to read
tour_cols = ['HH_ID', 'PER_ID', 'TOUR_ID', 'TOURPURP', 'ANCHOR_DEPART_BIN', 'ANCHOR_ARRIVE_BIN', 'TOUR_DUR_BIN', 'IS_SUBTOUR', 
            'NUM_SUBTOURS', 'CHILD_TOUR_ID_1', 'CHILD_TOUR_ID_2', 'CHILD_TOUR_ID_3', 'OUTBOUND_STOPS', 'INBOUND_STOPS']
trip_cols = ['HH_ID', 'PER_ID', 'TOUR_ID', 'TRIP_ID', 'ORIG_PURP', 'DEST_PURP', 
             'ORIG_DEP_BIN', 'IS_INBOUND', 'DEST_IS_TOUR_ORIG', 'DEST_PLACENO']

### Read and combine tour/trip files
tours_day = {}
trips_day = {}
place_day = {}
for i in [1, 2, 3, 4]:
    trips_day[i] = pd.read_csv(os.path.join(sandag_spa_path, "SPA_Processed\\day"+str(i)+"\\trips.csv"), usecols = trip_cols)
    trips_day[i]['day_no'] = i
    tours_day[i] = pd.read_csv(os.path.join(sandag_spa_path, "SPA_Processed\\day"+str(i)+"\\tours.csv"), usecols = tour_cols)
    tours_day[i]['day_no'] = i
    place_day[i] = pd.read_csv(os.path.join(sandag_spa_path, "SPA_Inputs\\place_"+str(i)+".csv"))
    place_day[i]['day_no'] = i

tours_sandag = pd.concat([tours_day[1], tours_day[2], tours_day[3], tours_day[4]], ignore_index=True)
trips_sandag = pd.concat([trips_day[1], trips_day[2], trips_day[3], trips_day[4]], ignore_index=True)
place_sandag = pd.concat([place_day[1], place_day[2], place_day[3], place_day[4]], ignore_index=True)

### Merge trip weights from place files
trips_sandag = pd.merge(
    trips_sandag,
    place_sandag[['SAMPN', 'PERNO', 'day_no', 'PLANO', 'TRIP_WEIGHT']],
    how='left',
    left_on=['HH_ID', 'PER_ID', 'day_no', 'DEST_PLACENO'],
    right_on=['SAMPN', 'PERNO', 'day_no', 'PLANO']
)

### Get weight of tour by averaging individual trip weights
trips_sandag['tour_weights'] = trips_sandag.groupby(['HH_ID', 'PER_ID', 'TOUR_ID', 'day_no'])['TRIP_WEIGHT'].transform('mean')
tour_weights = trips_sandag.groupby(['HH_ID', 'PER_ID', 'TOUR_ID', 'day_no'])['tour_weights'].unique().explode().reset_index()

### Merge tour weights to tours file
tours_sandag = pd.merge(
    tours_sandag,
    tour_weights,
    how='left',
    on=['HH_ID', 'PER_ID', 'TOUR_ID', 'day_no']).rename({'tour_weights': 'final_weight'}, axis=1)

tours_sandag['region'] = 'sandag'
trips_sandag['region'] = 'sandag'

In [7]:
### SANDAG 2016 data
sandag_spa_path = os.path.join(data_dir, "SANDAG_2016")

### Columns to read
tour_cols = ['HH_ID', 'PER_ID', 'TOUR_ID', 'TOURPURP', 'ANCHOR_DEPART_BIN', 'ANCHOR_ARRIVE_BIN', 'TOUR_DUR_BIN', 'IS_SUBTOUR', 
            'NUM_SUBTOURS', 'CHILD_TOUR_ID_1', 'CHILD_TOUR_ID_2', 'CHILD_TOUR_ID_3', 'OUTBOUND_STOPS', 'INBOUND_STOPS']
trip_cols = ['HH_ID', 'PER_ID', 'TOUR_ID', 'TRIP_ID', 'ORIG_PURP', 'DEST_PURP', 
             'ORIG_DEP_BIN', 'IS_INBOUND', 'DEST_IS_TOUR_ORIG', 'DEST_PLACENO']

### Read persons files for expansion factors
expf_san2016 = pd.read_csv(os.path.join(sandag_spa_path, "data\\person_geocoded.csv"), usecols = ['hhid', 'pernum', 'hh_final_weight_456x']).rename({'hh_final_weight_456x': 'final_weight'}, axis=1)

### Read and combine tour/trip files
tours_day = {}
trips_day = {}
place_day = {}
for i in [1, 2, 3, 4, 5, 6, 7]:
    trips_day[i] = pd.read_csv(os.path.join(sandag_spa_path, "output\\day"+str(i)+"\\trips.csv"), usecols = trip_cols)
    trips_day[i]['day_no'] = i
    tours_day[i] = pd.read_csv(os.path.join(sandag_spa_path, "output\\day"+str(i)+"\\tours.csv"), usecols = tour_cols)
    tours_day[i]['day_no'] = i

tours_san2016 = pd.concat([tours_day[1], tours_day[2], tours_day[3], tours_day[4], 
                          tours_day[5], tours_day[6], tours_day[7]], ignore_index=True)
trips_san2016 = pd.concat([trips_day[1], trips_day[2], trips_day[3], trips_day[4], 
                          trips_day[5], trips_day[6], trips_day[7]], ignore_index=True)

### Add person weights to tours
tours_san2016 = pd.merge(
    tours_san2016,
    expf_san2016,
    how='left',
    left_on=['HH_ID', 'PER_ID'],
    right_on=['hhid', 'pernum']
)

tours_san2016['region'] = 'sandag_2016'
trips_san2016['region'] = 'sandag_2016'

In [8]:
### Merge multiple region data
tours = pd.concat([tours_cmap, tours_mwcog, tours_semcog, tours_sandag, tours_san2016], ignore_index=True)
trips = pd.concat([trips_cmap, trips_mwcog, trips_semcog, trips_sandag, trips_san2016], ignore_index=True)

In [9]:
TOURPURP = {1: 'work',
            2: 'univ',
            3: 'school',
            4: 'escort',
            5: 'shopping',
            6: 'othmaint',
            7: 'eatout',
            8: 'social',
            9: 'othdiscr'}
NEWTOURPURP = {1: 'mand',
               2: 'mand',
               3: 'mand',
               4: 'non_mand',
               5: 'non_mand',
               6: 'non_mand',
               7: 'non_mand',
               8: 'non_mand',
               9: 'non_mand',
               10: 'non_mand',
               11: 'non_mand',
               12: 'non_mand',
               13: 'non_mand'
              }

tours['tour_purpose_grouped'] = tours['TOURPURP'].apply(lambda x: NEWTOURPURP[x])
# tours.loc[tours['IS_SUBTOUR'] == 1, 'NEWTOURPURP'] = 'at_work'

In [10]:
tours['CHILD_TOUR_ID_LAST'] = tours[['CHILD_TOUR_ID_1', 'CHILD_TOUR_ID_2', 'CHILD_TOUR_ID_3']].max(axis=1)
subtours = (tours[tours['IS_SUBTOUR'] == 1]).copy()

In [11]:
tours = pd.merge(
    tours,
    subtours[['HH_ID', 'PER_ID', 'TOUR_ID', 'ANCHOR_DEPART_BIN', 'ANCHOR_ARRIVE_BIN', 'region', 'day_no']],
    how='left',
    left_on=['HH_ID', 'PER_ID', 'CHILD_TOUR_ID_1', 'region', 'day_no'],
    right_on=['HH_ID', 'PER_ID', 'TOUR_ID', 'region', 'day_no'],
    suffixes=('', '_SUB_FIRST')
)
tours = pd.merge(
    tours,
    subtours[['HH_ID', 'PER_ID', 'TOUR_ID', 'ANCHOR_DEPART_BIN', 'ANCHOR_ARRIVE_BIN', 'region', 'day_no']],
    how='left',
    left_on=['HH_ID', 'PER_ID', 'CHILD_TOUR_ID_LAST', 'region', 'day_no'],
    right_on=['HH_ID', 'PER_ID', 'TOUR_ID', 'region', 'day_no'],
    suffixes=('', '_SUB_LAST')
)

In [12]:
tours[tours['HH_ID'] == 1681]

Unnamed: 0,HH_ID,PER_ID,TOUR_ID,TOURPURP,ANCHOR_DEPART_BIN,ANCHOR_ARRIVE_BIN,TOUR_DUR_BIN,IS_SUBTOUR,NUM_SUBTOURS,CHILD_TOUR_ID_1,CHILD_TOUR_ID_2,CHILD_TOUR_ID_3,OUTBOUND_STOPS,INBOUND_STOPS,PERNO,SAMPN,final_weight,region,day_no,hhid,pernum,tour_purpose_grouped,CHILD_TOUR_ID_LAST,TOUR_ID_SUB_FIRST,ANCHOR_DEPART_BIN_SUB_FIRST,ANCHOR_ARRIVE_BIN_SUB_FIRST,TOUR_ID_SUB_LAST,ANCHOR_DEPART_BIN_SUB_LAST,ANCHOR_ARRIVE_BIN_SUB_LAST
48267,1681,1,1,1,11.0,41.0,31.0,0,0.0,,,,0,0,1.0,1681.0,98.0,mwcog,0,,,mand,,,,,,,
95586,1681,1,1,1,8.0,36.0,29.0,0,1.0,2.0,,,1,0,1.0,1681.0,166.896413,semcog,0,,,mand,2.0,2.0,34.0,35.0,2.0,34.0,35.0
95587,1681,1,2,1,34.0,35.0,2.0,1,0.0,,,,0,0,1.0,1681.0,166.896413,semcog,0,,,mand,,,,,,,


In [13]:
trips[trips['HH_ID'] == 15673]

Unnamed: 0,HH_ID,PER_ID,TOUR_ID,TRIP_ID,ORIG_PURP,DEST_PURP,ORIG_DEP_BIN,IS_INBOUND,DEST_IS_TOUR_ORIG,region,day_no,DEST_PLACENO,SAMPN,PERNO,PLANO,TRIP_WEIGHT,tour_weights
222409,15673,1,1,1,0,1,14,0,0,mwcog,0,,,,,,
222410,15673,1,1,2,1,0,30,1,1,mwcog,0,,,,,,
222411,15673,1,2,1,1,7,19,0,0,mwcog,0,,,,,,
222412,15673,1,2,2,7,1,20,1,1,mwcog,0,,,,,,
222413,15673,2,1,1,0,1,8,0,0,mwcog,0,,,,,,
222414,15673,2,1,2,1,0,36,1,1,mwcog,0,,,,,,
222415,15673,2,2,1,1,1,12,0,1,mwcog,0,,,,,,
222416,15673,2,3,1,1,1,14,0,1,mwcog,0,,,,,,
222417,15673,2,4,1,1,7,18,0,0,mwcog,0,,,,,,
222418,15673,2,4,2,7,1,20,1,1,mwcog,0,,,,,,


In [14]:
trips = pd.merge(
    trips,
    tours,
    how='left',
    on=['HH_ID', 'PER_ID', 'TOUR_ID', 'region', 'day_no'],
)

In [15]:
trips

Unnamed: 0,HH_ID,PER_ID,TOUR_ID,TRIP_ID,ORIG_PURP,DEST_PURP,ORIG_DEP_BIN,IS_INBOUND,DEST_IS_TOUR_ORIG,region,day_no,DEST_PLACENO,SAMPN_x,PERNO_x,PLANO,TRIP_WEIGHT,tour_weights,TOURPURP,ANCHOR_DEPART_BIN,ANCHOR_ARRIVE_BIN,TOUR_DUR_BIN,IS_SUBTOUR,NUM_SUBTOURS,CHILD_TOUR_ID_1,CHILD_TOUR_ID_2,CHILD_TOUR_ID_3,OUTBOUND_STOPS,INBOUND_STOPS,PERNO_y,SAMPN_y,final_weight,hhid,pernum,tour_purpose_grouped,CHILD_TOUR_ID_LAST,TOUR_ID_SUB_FIRST,ANCHOR_DEPART_BIN_SUB_FIRST,ANCHOR_ARRIVE_BIN_SUB_FIRST,TOUR_ID_SUB_LAST,ANCHOR_DEPART_BIN_SUB_LAST,ANCHOR_ARRIVE_BIN_SUB_LAST
0,20000083,1,1,1,0,1,9,0,0,cmap,0,,,,,,,1,9.0,34.0,26.0,0,1.0,2.0,,,0,3,1.0,20000083.0,155.2391,,,mand,2.0,2,18.0,20.0,2,18.0,20.0
1,20000083,1,1,2,1,6,27,1,0,cmap,0,,,,,,,1,9.0,34.0,26.0,0,1.0,2.0,,,0,3,1.0,20000083.0,155.2391,,,mand,2.0,2,18.0,20.0,2,18.0,20.0
2,20000083,1,1,3,6,5,32,1,0,cmap,0,,,,,,,1,9.0,34.0,26.0,0,1.0,2.0,,,0,3,1.0,20000083.0,155.2391,,,mand,2.0,2,18.0,20.0,2,18.0,20.0
3,20000083,1,1,4,5,6,33,1,0,cmap,0,,,,,,,1,9.0,34.0,26.0,0,1.0,2.0,,,0,3,1.0,20000083.0,155.2391,,,mand,2.0,2,18.0,20.0,2,18.0,20.0
4,20000083,1,1,5,6,0,33,1,1,cmap,0,,,,,,,1,9.0,34.0,26.0,0,1.0,2.0,,,0,3,1.0,20000083.0,155.2391,,,mand,2.0,2,18.0,20.0,2,18.0,20.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
521808,171205369,1,1,3,5,5,21,1,0,sandag_2016,7,4,,,,,,6,17.0,23.0,7.0,0,,,,,0,4,,,65.0568,171205369.0,1.0,non_mand,,,,,,,
521809,171205369,1,1,4,5,6,22,1,0,sandag_2016,7,5,,,,,,6,17.0,23.0,7.0,0,,,,,0,4,,,65.0568,171205369.0,1.0,non_mand,,,,,,,
521810,171205369,1,1,5,6,9,23,1,0,sandag_2016,7,6,,,,,,6,17.0,23.0,7.0,0,,,,,0,4,,,65.0568,171205369.0,1.0,non_mand,,,,,,,
521811,171205369,1,1,6,9,0,23,1,1,sandag_2016,7,7,,,,,,6,17.0,23.0,7.0,0,,,,,0,4,,,65.0568,171205369.0,1.0,non_mand,,,,,,,


In [16]:
print('%d trips in total' % len(trips))
print('%d trips in overnight tours removed' % len(trips[trips['TOUR_DUR_BIN'] < 0]))
print('%d trips with missing tour start or end time removed' % len(trips[trips['TOUR_DUR_BIN'].isna()]))
print('%d trips with valid tour duration' % len(trips[trips['TOUR_DUR_BIN'] >= 0]))
trips = (trips[trips['TOUR_DUR_BIN'] >= 0]).copy()
#print('%d trips kept' % len(trips))
#print('%d trips with valid tour start or end time' % len(trips[trips['TOUR_DUR_BIN'].notna()]))
#trips = trips[trips['TOUR_DUR_BIN'].notna()]
#print('%d trips kept' % len(trips))

521813 trips in total
1701 trips in overnight tours removed
51255 trips with missing tour start or end time removed
468857 trips with valid tour duration


In [17]:
trips['outbound'] = np.where(trips.IS_INBOUND, False, True)
trips['half_tour_stop_num'] = trips.groupby(['HH_ID', 'PER_ID', 'TOUR_ID', 'outbound', 'region', 'day_no']).cumcount()
trips['half_tour_trip_count'] = trips.groupby(['HH_ID', 'PER_ID', 'TOUR_ID', 'outbound', 'region', 'day_no'])['TRIP_ID'].transform('count')
trips['half_tour_stops_remaining'] = (trips['half_tour_trip_count'] - trips['half_tour_stop_num'] - 1).clip(upper=3)

In [18]:
# prev dep time is either last time of the subtour or the prev time on the tour
trips['PREV_ORIG_DEP_BIN'] = trips.groupby(by=['HH_ID', 'PER_ID', 'TOUR_ID', 'region', 'day_no'])['ORIG_DEP_BIN'].shift(1)
trips['PREV_ORIG_DEP_BIN'] = np.where(trips.outbound,
    trips['PREV_ORIG_DEP_BIN'],
    trips[['PREV_ORIG_DEP_BIN', 'ANCHOR_ARRIVE_BIN_SUB_LAST']].max(axis=1))

# end time is either start time of subtour or end of actual tour
trips['END_BIN'] = np.where(trips['outbound'], 
    trips[['ANCHOR_ARRIVE_BIN', 'ANCHOR_DEPART_BIN_SUB_FIRST']].min(axis=1),
    trips['ANCHOR_ARRIVE_BIN']
)
trips['periods_left'] = trips['END_BIN'] - trips['PREV_ORIG_DEP_BIN']

# depart offset always time from last trip to current trip
trips['depart_offset'] = trips['ORIG_DEP_BIN'] - trips['PREV_ORIG_DEP_BIN']


In [19]:
trips[trips['HH_ID'] == 15673]

Unnamed: 0,HH_ID,PER_ID,TOUR_ID,TRIP_ID,ORIG_PURP,DEST_PURP,ORIG_DEP_BIN,IS_INBOUND,DEST_IS_TOUR_ORIG,region,day_no,DEST_PLACENO,SAMPN_x,PERNO_x,PLANO,TRIP_WEIGHT,tour_weights,TOURPURP,ANCHOR_DEPART_BIN,ANCHOR_ARRIVE_BIN,TOUR_DUR_BIN,IS_SUBTOUR,NUM_SUBTOURS,CHILD_TOUR_ID_1,CHILD_TOUR_ID_2,CHILD_TOUR_ID_3,OUTBOUND_STOPS,INBOUND_STOPS,PERNO_y,SAMPN_y,final_weight,hhid,pernum,tour_purpose_grouped,CHILD_TOUR_ID_LAST,TOUR_ID_SUB_FIRST,ANCHOR_DEPART_BIN_SUB_FIRST,ANCHOR_ARRIVE_BIN_SUB_FIRST,TOUR_ID_SUB_LAST,ANCHOR_DEPART_BIN_SUB_LAST,ANCHOR_ARRIVE_BIN_SUB_LAST,outbound,half_tour_stop_num,half_tour_trip_count,half_tour_stops_remaining,PREV_ORIG_DEP_BIN,END_BIN,periods_left,depart_offset
222409,15673,1,1,1,0,1,14,0,0,mwcog,0,,,,,,,1,14.0,31.0,18.0,0,1.0,2.0,,,0,0,1.0,15673.0,120.0,,,mand,2.0,2.0,19.0,21.0,2.0,19.0,21.0,True,0,1,0,,19.0,,
222410,15673,1,1,2,1,0,30,1,1,mwcog,0,,,,,,,1,14.0,31.0,18.0,0,1.0,2.0,,,0,0,1.0,15673.0,120.0,,,mand,2.0,2.0,19.0,21.0,2.0,19.0,21.0,False,0,1,0,21.0,31.0,10.0,9.0
222411,15673,1,2,1,1,7,19,0,0,mwcog,0,,,,,,,7,19.0,21.0,3.0,1,,,,,0,0,1.0,15673.0,120.0,,,non_mand,,,,,,,,True,0,1,0,,21.0,,
222412,15673,1,2,2,7,1,20,1,1,mwcog,0,,,,,,,7,19.0,21.0,3.0,1,,,,,0,0,1.0,15673.0,120.0,,,non_mand,,,,,,,,False,0,1,0,19.0,21.0,2.0,1.0
222413,15673,2,1,1,0,1,8,0,0,mwcog,0,,,,,,,1,8.0,37.0,30.0,0,4.0,2.0,3.0,4.0,0,0,2.0,15673.0,120.0,,,mand,4.0,2.0,12.0,12.0,4.0,18.0,21.0,True,0,1,0,,12.0,,
222414,15673,2,1,2,1,0,36,1,1,mwcog,0,,,,,,,1,8.0,37.0,30.0,0,4.0,2.0,3.0,4.0,0,0,2.0,15673.0,120.0,,,mand,4.0,2.0,12.0,12.0,4.0,18.0,21.0,False,0,1,0,21.0,37.0,16.0,15.0
222415,15673,2,2,1,1,1,12,0,1,mwcog,0,,,,,,,1,12.0,12.0,1.0,1,0.0,,,,0,0,2.0,15673.0,120.0,,,mand,,,,,,,,True,0,1,0,,12.0,,
222416,15673,2,3,1,1,1,14,0,1,mwcog,0,,,,,,,1,14.0,15.0,2.0,1,0.0,,,,0,0,2.0,15673.0,120.0,,,mand,,,,,,,,True,0,1,0,,15.0,,
222417,15673,2,4,1,1,7,18,0,0,mwcog,0,,,,,,,7,18.0,21.0,4.0,1,,,,,0,0,2.0,15673.0,120.0,,,non_mand,,,,,,,,True,0,1,0,,21.0,,
222418,15673,2,4,2,7,1,20,1,1,mwcog,0,,,,,,,7,18.0,21.0,4.0,1,,,,,0,0,2.0,15673.0,120.0,,,non_mand,,,,,,,,False,0,1,0,18.0,21.0,3.0,2.0


In [20]:
# remove trips with negative periods_left or depart_offset due to 
# incorrect dep time or incorrect trip id numbering for intrazonal trips)
print('%d trips in total' % len(trips))
trimmed_trips = (trips[(trips['periods_left'] >= 0)
                & (trips['depart_offset'] >= 0)
                & (trips['depart_offset'] <= trips['periods_left'])]).copy()
trips['is_valid'] = np.where((trips['periods_left'] >= 0)
                & (trips['depart_offset'] >= 0)
                & (trips['depart_offset'] <= trips['periods_left']), 1, 0)
print('%d trips valid and kept' % len(trimmed_trips))

468857 trips in total
293910 trips valid and kept


In [21]:
trips['region'].value_counts()

sandag_2016    125182
mwcog          111690
semcog         100550
cmap            95932
sandag          35503
Name: region, dtype: int64

In [22]:
# Pretty much every trip that is removed is the very first outbound trip which should not be included
len(trips[trips.outbound & (trips.half_tour_stop_num == 0)])

174692

In [23]:
# the "alternatives" to trip scheduling are the number of periods between the last trip and the current trip
trimmed_trips['depart_offset'] = trimmed_trips['ORIG_DEP_BIN'] - trimmed_trips['PREV_ORIG_DEP_BIN']

In [24]:
pd.crosstab(trimmed_trips['periods_left'], trimmed_trips['depart_offset'], margins=True)

depart_offset,0.0,1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0,9.0,10.0,11.0,12.0,13.0,14.0,15.0,16.0,17.0,18.0,19.0,20.0,21.0,22.0,23.0,24.0,25.0,26.0,27.0,28.0,29.0,30.0,31.0,32.0,33.0,34.0,35.0,36.0,37.0,38.0,39.0,42.0,46.0,All
periods_left,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1
0.0,9579,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,9579
1.0,15222,27003,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,42225
2.0,3822,20652,13257,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,37731
3.0,2495,6917,11517,8428,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,29357
4.0,1791,4323,3969,7677,4636,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,22396
5.0,1392,3112,2356,2661,5001,3012,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,17534
6.0,1129,2393,1595,1569,1691,3340,1971,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,13688
7.0,925,1835,1262,1050,976,1156,2486,1523,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,11213
8.0,762,1576,905,735,681,646,914,2050,1184,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,9453
9.0,679,1319,713,648,521,423,465,829,1668,1038,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8303


In [25]:
def create_trip_probs_table(
    trips,
    segmentation=['periods_left_min', 'periods_left_max', 'outbound'], 
    outbound_max_bin=25,  # trip durations longer than this are binned into one larger bin
    inbound_max_bin=25,
    weight='final_weight', 
    sort_values=['outbound', 'periods_left_min', 'periods_left_max'], 
    ascending=[False, True, True],
    create_full_index = True
    ):

    trips['periods_left_min'] = np.where(trips['outbound'], trips['periods_left'].clip(upper=outbound_max_bin), trips['periods_left'].clip(upper=inbound_max_bin))
    trips['periods_left_max'] = np.where(((trips['periods_left'] >= outbound_max_bin) & (trips['outbound']))
                                         | ((trips['periods_left'] >= inbound_max_bin) & (~trips['outbound'])),
                                         47,
                                         trips['periods_left'])
    trips['depart_offset'] = np.where(trips['outbound'], trips['depart_offset'].clip(upper=outbound_max_bin), trips['depart_offset'].clip(upper=inbound_max_bin)).astype(int)
    trips['dummy_weight'] = 1

    unweighted = trips.pivot_table(values='dummy_weight', index=segmentation, columns='depart_offset', aggfunc='sum', margins=True).sort_values(
        by=sort_values, ascending=ascending).fillna(0).round(0).astype(int)

    probs = trips.pivot_table(values=weight, index=segmentation, columns='depart_offset', aggfunc='sum', margins=True).sort_values(
        by=sort_values, ascending=ascending).fillna(0)
    
    # row normalize
    for col in probs.columns:
        probs[col] = probs[col] / probs['All']

    def create_complete_index(outbound):
        if outbound:
            max_bin = outbound_max_bin
        else:
            max_bin = inbound_max_bin
        index_dict = {
            'periods_left_min': range(0, max_bin + 1),
            'outbound': [outbound],
        }
        for segment in segmentation:
            if segment not in ['periods_left_min', 'periods_left_max', 'outbound']:
                index_dict[segment] = trips[segment].unique()

        index = pd.DataFrame(
            list(itertools.product(*index_dict.values())), columns=index_dict.keys()
        )
        index['periods_left_max'] = index['periods_left_min']
        index.loc[index['periods_left_min'] >= max_bin, 'periods_left_max'] = 47

        return index

    if create_full_index:
        outbound_index = create_complete_index(outbound=True)
        inbound_index = create_complete_index(outbound=False)
        full_index = pd.concat([outbound_index, inbound_index]).sort_values(
            by=sort_values, ascending=ascending)
        full_index = full_index.set_index(segmentation).index
        print(full_index)
    
        unweighted = unweighted.reindex(full_index)
        probs = probs.reindex(full_index)
        # only want to ffill probs and not unweighted
        probs = probs.fillna(method='ffill')
    
    return unweighted, probs



In [26]:
no_seg_unweighted, no_seg_probs = create_trip_probs_table(
    trimmed_trips.copy(),
    segmentation=['periods_left_min', 'periods_left_max', 'outbound'], 
    outbound_max_bin=25,
    inbound_max_bin=34,
    sort_values=['outbound', 'periods_left_min', 'periods_left_max'], 
    ascending=[False, True, True],
    create_full_index=True)

no_seg_unweighted.to_csv(os.path.join(output_dir, 'trip_scheduling_counts_base.csv'))
no_seg_probs.to_csv(os.path.join(output_dir, 'trip_scheduling_probs_base.csv'))

display(no_seg_unweighted)
display(no_seg_probs)

MultiIndex([( 0,  0,  True),
            ( 1,  1,  True),
            ( 2,  2,  True),
            ( 3,  3,  True),
            ( 4,  4,  True),
            ( 5,  5,  True),
            ( 6,  6,  True),
            ( 7,  7,  True),
            ( 8,  8,  True),
            ( 9,  9,  True),
            (10, 10,  True),
            (11, 11,  True),
            (12, 12,  True),
            (13, 13,  True),
            (14, 14,  True),
            (15, 15,  True),
            (16, 16,  True),
            (17, 17,  True),
            (18, 18,  True),
            (19, 19,  True),
            (20, 20,  True),
            (21, 21,  True),
            (22, 22,  True),
            (23, 23,  True),
            (24, 24,  True),
            (25, 47,  True),
            ( 0,  0, False),
            ( 1,  1, False),
            ( 2,  2, False),
            ( 3,  3, False),
            ( 4,  4, False),
            ( 5,  5, False),
            ( 6,  6, False),
            ( 7,  7, False),
            ( 

Unnamed: 0_level_0,Unnamed: 1_level_0,depart_offset,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,All
periods_left_min,periods_left_max,outbound,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1
0,0,True,106,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,106
1,1,True,1136,209,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1345
2,2,True,1566,1241,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2813
3,3,True,1441,2030,115,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3589
4,4,True,1163,2086,423,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3686
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
30,30,False,0,0,1,1,0,0,0,2,1,2,2,2,6,2,2,5,13,10,20,14,28,21,15,12,6,4,13,16,35,29,12,0,0,0,0,274
31,31,False,0,0,0,3,0,1,3,0,1,3,0,0,0,1,6,2,11,10,16,13,16,14,12,7,7,5,6,6,7,6,20,7,0,0,0,183
32,32,False,0,2,0,0,0,0,0,1,0,2,0,0,1,0,2,1,2,5,3,6,11,9,8,10,4,2,9,3,3,5,11,10,9,0,0,119
33,33,False,0,0,0,0,0,0,0,1,1,1,1,0,1,2,1,2,4,6,6,4,7,3,7,6,2,1,1,1,1,2,2,2,11,4,0,80


Unnamed: 0_level_0,Unnamed: 1_level_0,depart_offset,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,All
periods_left_min,periods_left_max,outbound,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1
0,0,True,1.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,1.0
1,1,True,0.809874,0.190126,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,1.0
2,2,True,0.561713,0.436380,0.001907,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,1.0
3,3,True,0.421881,0.555299,0.022345,0.000474,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,1.0
4,4,True,0.326245,0.556733,0.109626,0.007396,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
30,30,False,0.000000,0.000000,0.025865,0.000000,0.000000,0.000000,0.000000,0.002632,0.008723,0.030878,0.001653,0.010783,0.010249,0.008556,0.006432,0.010774,0.064184,0.037003,0.052543,0.032305,0.124474,0.093637,0.057149,0.023142,0.015247,0.007597,0.063147,0.047595,0.073739,0.143698,0.047995,0.000000,0.000000,0.000000,0.000000,1.0
31,31,False,0.000000,0.000000,0.000000,0.003909,0.000000,0.002077,0.014890,0.000000,0.002455,0.042896,0.000000,0.000000,0.000000,0.005429,0.016881,0.005109,0.041187,0.030664,0.138404,0.067899,0.069515,0.108581,0.091172,0.023279,0.054911,0.026791,0.020225,0.037776,0.026737,0.011687,0.103559,0.053967,0.000000,0.000000,0.000000,1.0
32,32,False,0.000000,0.003895,0.000000,0.000000,0.000000,0.000000,0.000000,0.005828,0.000000,0.012787,0.000000,0.000000,0.003303,0.000000,0.017632,0.002993,0.026336,0.022457,0.012116,0.025145,0.157061,0.092908,0.067354,0.059099,0.032182,0.009581,0.126899,0.012292,0.011742,0.087207,0.088533,0.051482,0.071169,0.000000,0.000000,1.0
33,33,False,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.001437,0.017577,0.009660,0.018658,0.000000,0.007079,0.022319,0.084627,0.017279,0.038851,0.040418,0.106574,0.041603,0.137063,0.022217,0.123436,0.034682,0.015356,0.012041,0.006010,0.009910,0.016664,0.024162,0.040128,0.062584,0.064258,0.025408,0.000000,1.0


In [27]:
# stop_remaining_unweighted, stops_remaining_probs = create_trip_probs_table(
#     trimmed_trips,
#     segmentation=['periods_left_min', 'periods_left_max', 'outbound', 'half_tour_stops_remaining'], 
#     max_bin=25, 
#     sort_values=['outbound', 'periods_left_min', 'periods_left_max', 'half_tour_stops_remaining'], 
#     ascending=[False, True, True, False])

# stop_remaining_unweighted.to_csv(os.path.join(output_dir, 'stops_remaining_counts.csv'))
# stops_remaining_probs.to_csv(os.path.join(output_dir, 'stops_remaining_probs.csv'))

# display(stop_remaining_unweighted)
# display(stops_remaining_probs)

In [28]:
# purpose_unweighted, purpose_probs = create_trip_probs_table(
#     trimmed_trips,
#     segmentation=['periods_left_min', 'periods_left_max', 'outbound', 'tour_purpose_grouped'], 
#     outbound_max_bin=25,
#     inbound_max_bin=28,
#     sort_values=['outbound', 'periods_left_min', 'periods_left_max', 'tour_purpose_grouped'], 
#     ascending=[False, True, True, False]
#     create_full_index=True
# )

# purpose_unweighted.to_csv(os.path.join(output_dir, 'purpose_counts.csv'))
# purpose_probs.to_csv(os.path.join(output_dir, 'purpose_probs.csv'))

# display(purpose_unweighted)
# display(purpose_probs)

In [29]:
# purpose_stops_unweighted, purpose_stops_probs = create_trip_probs_table(
#     trimmed_trips,
#     segmentation=['periods_left_min', 'periods_left_max', 'outbound', 'tour_purpose_grouped', 'half_tour_stops_remaining'], 
#     outbound_max_bin=25,
#     inbound_max_bin=28,
#     sort_values=['outbound', 'periods_left_min', 'periods_left_max', 'tour_purpose_grouped', 'half_tour_stops_remaining'], 
#     ascending=[False, True, True, False, True]
#     create_full_index=True
# )

# purpose_stops_unweighted.to_csv(os.path.join(output_dir, 'purpose_stops_remaining_counts.csv'))
# purpose_stops_probs.to_csv(os.path.join(output_dir, 'purpose_stops_remaining_probs.csv'))
# # display(purpose_stops_unweighted)
# # display(purpose_stops_probs)

In [30]:
trimmed_trips['half_tour_stops_remaining_grouped'] = trimmed_trips['half_tour_stops_remaining'].clip(upper=1)
purpose_stops_grouped_unweighted, purpose_stops_grouped_probs = create_trip_probs_table(
    trimmed_trips.copy(),
    segmentation=['periods_left_min', 'periods_left_max', 'outbound', 'tour_purpose_grouped', 'half_tour_stops_remaining_grouped'], 
    outbound_max_bin=25,
    inbound_max_bin=34,
    sort_values=['outbound', 'periods_left_min', 'periods_left_max', 'tour_purpose_grouped', 'half_tour_stops_remaining_grouped'], 
    ascending=[False, True, True, False, True],
    create_full_index=True
    )

purpose_stops_grouped_unweighted.to_csv(os.path.join(output_dir, 'trip_scheduling_counts_purpose_stops.csv'))
purpose_stops_grouped_probs.to_csv(os.path.join(output_dir, 'trip_scheduling_probs_purpose_stops.csv'))
# display(purpose_stops_grouped_unweighted)
display(purpose_stops_grouped_probs)

MultiIndex([( 0,  0,  True, 'non_mand', 0),
            ( 0,  0,  True, 'non_mand', 1),
            ( 0,  0,  True,     'mand', 0),
            ( 0,  0,  True,     'mand', 1),
            ( 1,  1,  True, 'non_mand', 0),
            ( 1,  1,  True, 'non_mand', 1),
            ( 1,  1,  True,     'mand', 0),
            ( 1,  1,  True,     'mand', 1),
            ( 2,  2,  True, 'non_mand', 0),
            ( 2,  2,  True, 'non_mand', 1),
            ...
            (32, 32, False,     'mand', 0),
            (32, 32, False,     'mand', 1),
            (33, 33, False, 'non_mand', 0),
            (33, 33, False, 'non_mand', 1),
            (33, 33, False,     'mand', 0),
            (33, 33, False,     'mand', 1),
            (34, 47, False, 'non_mand', 0),
            (34, 47, False, 'non_mand', 1),
            (34, 47, False,     'mand', 0),
            (34, 47, False,     'mand', 1)],
           names=['periods_left_min', 'periods_left_max', 'outbound', 'tour_purpose_grouped', 'half_tou

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,depart_offset,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,All
periods_left_min,periods_left_max,outbound,tour_purpose_grouped,half_tour_stops_remaining_grouped,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1
0,0,True,non_mand,0,1.000000,0.000000,0.0,0.00000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,1.0
0,0,True,non_mand,1,1.000000,0.000000,0.0,0.00000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,1.0
0,0,True,mand,0,1.000000,0.000000,0.0,0.00000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,1.0
0,0,True,mand,1,1.000000,0.000000,0.0,0.00000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,1.0
1,1,True,non_mand,0,0.818980,0.181020,0.0,0.00000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
33,33,False,mand,1,0.000000,0.000000,0.0,0.00000,0.000000,0.000000,0.000000,0.0,0.023248,0.000000,0.024678,0.000000,0.009363,0.029519,0.111930,0.022853,0.051386,0.044738,0.140958,0.055025,0.173841,0.029385,0.163259,0.034258,0.013563,0.015926,0.007949,0.000000,0.022040,0.005005,0.013479,0.000000,0.007597,0.000000,0.000000,1.0
34,47,False,non_mand,0,0.000000,0.000000,0.0,0.00000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.018384,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.557709,0.000000,0.423907,1.0
34,47,False,non_mand,1,0.016658,0.021838,0.0,0.02167,0.000000,0.101739,0.024987,0.0,0.142265,0.004760,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.026389,0.000000,0.000000,0.059292,0.000000,0.000000,0.000000,0.000000,0.000000,0.008987,0.000000,0.000000,0.222525,0.033524,0.000000,0.010930,0.017517,0.286919,0.000000,1.0
34,47,False,mand,0,0.000000,0.000000,0.0,0.00000,0.000000,0.000000,0.000000,0.0,0.000000,0.077898,0.030793,0.000000,0.000000,0.012379,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.074276,0.000000,0.000000,0.000000,0.000000,0.100235,0.265250,0.139702,0.299468,1.0
