In [1]:
import os
import pandas as pd
pd.options.display.max_columns = 100
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
import openmatrix as omx

In [2]:
project_dir = r"F:\Projects\Clients\mtc\updated_networks\version_12_v2_fixed_tollseg"
trn_dir = os.path.join(project_dir, "trn")
demand_dir = os.path.join(project_dir, "ctramp_output")

# _all_periods = ['EA', 'AM']
_all_periods = ['EA', 'AM', 'MD', 'PM', 'EV']

all_boardings = None
for period in _all_periods:
    boardings = pd.read_csv(os.path.join(trn_dir, "boardings_by_line_{}.csv".format(period)), sep="\t").drop(columns='c ')
    boardings.rename(columns={'total_boardings': 'total_boardings_{}'.format(period)}, inplace=True)
    
    if all_boardings is None:
        all_boardings = boardings
        continue
    
    all_boardings = pd.merge(all_boardings, boardings, how='outer', on=['line_name', 'description', 'mode', 'line_mode'])
    

boarding_cols = [f'total_boardings_{per}' for per in _all_periods] 
all_boardings['total_boardings'] = all_boardings[boarding_cols].sum(axis=1)

all_boardings
    

Unnamed: 0,line_name,description,total_boardings_EA,mode,line_mode,total_boardings_AM,total_boardings_MD,total_boardings_PM,total_boardings_EV,total_boardings
0,10_198_EA_d0_s491,Hollis,5.00000,b,12.0,,,,,5.00000
1,10_201_EA_d0_s490,Shellmound/Powell,29.78253,b,12.0,,,,,29.78253
2,12_481_EA_d0_s1416,1,0.00000,b,49.0,,,,,0.00000
3,12_481_EA_d1_s1425,1,10.00000,b,49.0,,,,,10.00000
4,12_482_EA_d1_s1444,2,5.00000,b,49.0,,,,,5.00000
...,...,...,...,...,...,...,...,...,...,...
3954,7_290_EV_d0_s726,SLAC Shuttle,,b,13.0,,,,29.16667,29.16667
3955,7_291_EV_d0_s707,Line N - Evenings ONLY,,b,13.0,,,,97.57709,97.57709
3956,99_259_EV_d0_s684,East Bayshore,,b,16.0,,,,0.00000,0.00000
3957,99_261_EV_d0_s678,East Whisman PM,,b,16.0,,,,10.00000,10.00000


In [3]:
any(all_boardings['line_name'].duplicated())

False

In [4]:
print("total number of boardings:", all_boardings['total_boardings'].sum())

total number of boardings: 1695685.592981


In [5]:
all_boardings.groupby('mode').sum().round(0)

Unnamed: 0_level_0,total_boardings_EA,line_mode,total_boardings_AM,total_boardings_MD,total_boardings_PM,total_boardings_EV,total_boardings
mode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
b,11950.0,110241.0,232188.0,218707.0,269143.0,133180.0,865168.0
f,330.0,5993.0,4005.0,1950.0,7049.0,3585.0,16919.0
h,15144.0,6720.0,161314.0,71816.0,121860.0,96490.0,466624.0
l,4259.0,10148.0,74456.0,59201.0,85441.0,32574.0,255931.0
r,2222.0,4569.0,14468.0,5353.0,10488.0,2148.0,34679.0
x,2344.0,36416.0,19275.0,9169.0,18333.0,7242.0,56364.0


In [6]:
all_boardings['total_boardings'].sum()

1695685.592981

In [7]:
all_boardings[all_boardings['mode'] == 'f'].groupby('line_mode')['total_boardings'].sum()

line_mode
101.0    16879.365941
103.0       40.000000
Name: total_boardings, dtype: float64

In [8]:
all_boardings[all_boardings['mode'] == 'f'].groupby('description').sum()

Unnamed: 0_level_0,total_boardings_EA,line_mode,total_boardings_AM,total_boardings_MD,total_boardings_PM,total_boardings_EV,total_boardings
description,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Alameda - Oakland - San Francisco Ferry,0.0,808.0,393.28,1352.909,4425.0,2445.0,8616.189
Harbor Bay - San Francisco Ferry Building,0.0,606.0,50.0,0.0,510.0,210.0,770.0
Larkspur - San Francisco Ferry,115.0,909.0,1975.0,260.0,740.0,871.2788,3961.2788
San Francisco - Angel Island Ferry,0.0,412.0,0.0,0.0,0.0,0.0,0.0
San Francisco - Sausalito Ferry,0.0,618.0,0.0,0.0,0.0,0.0,0.0
San Francisco - Tiburon Ferry,0.0,721.0,0.0,15.0,5.0,20.0,40.0
Sausalito - San Francisco Ferry,0.0,808.0,10.0,15.0,40.0,23.721141,88.721141
South SF - Alameda - Oakland,0.0,303.0,106.72,0.0,95.0,15.0,216.72
South San Francisco - San Francisco Ferry Building,0.0,202.0,70.0,0.0,120.0,0.0,190.0
Vallejo - San Francisco,215.0,606.0,1400.0,307.5,1113.957,0.0,3036.457


In [9]:
all_boardings[all_boardings['mode'] == 'f']['total_boardings'].sum()

16919.365941

In [10]:
all_boardings[all_boardings['line_mode'] == 91]

Unnamed: 0,line_name,description,total_boardings_EA,mode,line_mode,total_boardings_AM,total_boardings_MD,total_boardings_PM,total_boardings_EV,total_boardings
6,12_490_EA_d1_s1426,78,42.5,x,91.0,,,,,42.5
7,12_491_EA_d0_s1415,80,96.36992,x,91.0,,,,,96.36992
8,12_491_EA_d1_s1437,80,10.0,x,91.0,,,,,10.0
444,12_490_AM_d0_s1443,78,,x,91.0,55.43608,,,,55.43608
445,12_490_AM_d1_s1426,78,,x,91.0,210.5186,,,,210.5186
446,12_491_AM_d0_s1421,80,,x,91.0,374.8084,,,,374.8084
447,12_491_AM_d1_s1435,80,,x,91.0,99.56392,,,,99.56392
1478,12_490_MD_d0_s1457,78,,x,91.0,,41.55018,,,41.55018
1479,12_490_MD_d1_s1426,78,,x,91.0,,81.71387,,,81.71387
1480,12_491_MD_d0_s1421,80,,x,91.0,,132.0962,,,132.0962


In [11]:
modes_in_spreadsheet = [84, 30, 133, 131, 120, 130, 14, 86, 42, 12, 92, 52, 52, 101, 87, 70, 17, 71, 21, 20, 110, 94, 60, 68, 80, 24, 66, 81, 28, 111, 100, 103, 63, 13, 44, 38, 56, 91, 49, 90, 46]
all_boardings[~all_boardings['line_mode'].isin(modes_in_spreadsheet)]

Unnamed: 0,line_name,description,total_boardings_EA,mode,line_mode,total_boardings_AM,total_boardings_MD,total_boardings_PM,total_boardings_EV,total_boardings
377,4_530_EA_d1_s1582,Express Bart / Brentwood,15.81797,x,95.0,,,,,15.81797
1323,4_530_AM_d0_s1581,Express Bart / Brentwood,,x,95.0,11.75675,,,,11.75675
1324,4_530_AM_d1_s1582,Express Bart / Brentwood,,x,95.0,15.56075,,,,15.56075
1459,99_259_AM_d0_s677,East Bayshore,,b,16.0,110.0,,,,110.0
1460,99_260_AM_d0_s680,East Whisman AM,,b,16.0,60.0,,,,60.0
1461,99_262_AM_d0_s673,West Bayshore,,b,16.0,65.0,,,,65.0
2188,4_530_MD_d1_s1582,Express Bart / Brentwood,,x,95.0,,10.23105,,,10.23105
2290,99_259_MD_d0_s683,East Bayshore,,b,16.0,,0.0,,,0.0
2291,99_260_MD_d0_s679,East Whisman AM,,b,16.0,,0.0,,,0.0
2292,99_262_MD_d0_s682,West Bayshore,,b,16.0,,0.0,,,0.0


In [12]:
all_boardings[~all_boardings['line_mode'].isin(modes_in_spreadsheet)]['total_boardings'].sum()

600.899321

## Making table to copy directly into existing existing spreadsheet

In [13]:
all_boardings['total_boardings'].sum()

1695685.592981

In [14]:
all_boardings[all_boardings['line_name'].isna() | all_boardings['line_mode'].isna()]

Unnamed: 0,line_name,description,total_boardings_EA,mode,line_mode,total_boardings_AM,total_boardings_MD,total_boardings_PM,total_boardings_EV,total_boardings


In [15]:
model_boardings = all_boardings.groupby(['line_name', 'line_mode'])['total_boardings'].sum().to_frame()
model_boardings.reset_index(inplace=True)

In [16]:
model_boardings['total_boardings'].sum()

1695685.592981

In [17]:
model_boardings['line_mode'] = model_boardings['line_mode'].astype(int)
model_boardings['total_boardings'] = model_boardings['total_boardings'].astype(int)
model_boardings

Unnamed: 0,line_name,line_mode,total_boardings
0,10_198_AM_d0_s493,12,85
1,10_198_EA_d0_s491,12,5
2,10_198_EV_d0_s493,12,155
3,10_198_MD_d0_s493,12,460
4,10_199_AM_d0_s492,12,216
...,...,...,...
3954,99_261_PM_d0_s678,16,85
3955,99_262_AM_d0_s673,16,65
3956,99_262_EV_d0_s676,16,0
3957,99_262_MD_d0_s682,16,0


In [18]:
spreadsheet_cols = ['Name', 'Mode', 'Op', 'Stp', 'Cr', 'Distance', 'Time', 'Pass', 'PassDist', 'PassHr']
for col in spreadsheet_cols:
    model_boardings[col] = pd.NA
model_boardings['Name'] = model_boardings['line_name']
model_boardings['Mode'] = model_boardings['line_mode']
model_boardings['Pass'] = model_boardings['total_boardings']
model_boardings[spreadsheet_cols]

Unnamed: 0,Name,Mode,Op,Stp,Cr,Distance,Time,Pass,PassDist,PassHr
0,10_198_AM_d0_s493,12,,,,,,85,,
1,10_198_EA_d0_s491,12,,,,,,5,,
2,10_198_EV_d0_s493,12,,,,,,155,,
3,10_198_MD_d0_s493,12,,,,,,460,,
4,10_199_AM_d0_s492,12,,,,,,216,,
...,...,...,...,...,...,...,...,...,...,...
3954,99_261_PM_d0_s678,16,,,,,,85,,
3955,99_262_AM_d0_s673,16,,,,,,65,,
3956,99_262_EV_d0_s676,16,,,,,,0,,
3957,99_262_MD_d0_s682,16,,,,,,0,,


In [19]:
model_boardings[spreadsheet_cols].to_csv(os.path.join(trn_dir, 'boardings_spreadsheet_input.csv'), index=False)

## Demand Assigned by Set and Access Mode

In [20]:
def read_matrix_from_omx_file(file, core, row_map="Rows", col_map="Columns"):
    if type(file) == str:
        file = omx.open_file(file)
    matrix_df = pd.DataFrame(file[core][:])
    if row_map is not None:
        inv_row_map = file.mapping(row_map)
        row_map = {taz1: taz2 for taz2, taz1 in inv_row_map.items()}
        inv_col_map = file.mapping(col_map)
        col_map = {taz1: taz2 for taz2, taz1 in inv_col_map.items()}
        
    else:
        row_map = {i: i+1 for i in range(len(matrix_df))}
        col_map = {i: i+1 for i in range(len(matrix_df))}
    matrix_df.rename(index=row_map, columns=col_map, inplace=True)
    return matrix_df

In [21]:
def get_total_demand(demand_dir, period, trn_set, access_mode):
    df = None
    omx_filename_template = "transit_{period}_{access_mode}_TRN_{trn_set}_{period}.omx"
    core_name_template = "{access_mode}_TRN_{trn_set}_{period}"
    _all_access_modes = ['WLK', 'PNR', 'KNRTNC', 'KNRPRV']
    
    omx_filename = omx_filename_template.format(period=period, access_mode=access_mode, trn_set=trn_set)
    core_name = core_name_template.format(period=period, access_mode=access_mode, trn_set=trn_set)
    filename = os.path.join(demand_dir, omx_filename)
    sub_df = read_matrix_from_omx_file(filename, core=core_name, row_map=None, col_map=None)
    total_counts = sub_df.sum().sum()
    print(period, trn_set, access_mode, total_counts)
    return total_counts


In [22]:
_all_access_modes = ['WLK', 'PNR', 'KNRTNC', 'KNRPRV']
_all_sets = ['set1', 'set2', 'set3']

demand_df = pd.DataFrame(columns=['period', 'set', 'access_mode', 'demand'])

for period in _all_periods:
    for trn_set in _all_sets:
        for access_mode in _all_access_modes:
            demand_df.loc[len(demand_df)] = [
                period, trn_set, access_mode, get_total_demand(demand_dir, period, trn_set, access_mode)]

EA set1 WLK 7465.0
EA set1 PNR 125.0
EA set1 KNRTNC 5.0
EA set1 KNRPRV 310.0
EA set2 WLK 5840.0
EA set2 PNR 5700.0
EA set2 KNRTNC 100.0
EA set2 KNRPRV 5975.0
EA set3 WLK 1590.0
EA set3 PNR 245.0
EA set3 KNRTNC 5.0
EA set3 KNRPRV 520.0
AM set1 WLK 161170.0
AM set1 PNR 1850.0
AM set1 KNRTNC 740.0
AM set1 KNRPRV 3895.0
AM set2 WLK 100495.0
AM set2 PNR 51880.0
AM set2 KNRTNC 3185.0
AM set2 KNRPRV 51570.0
AM set3 WLK 23495.0
AM set3 PNR 1935.0
AM set3 KNRTNC 280.0
AM set3 KNRPRV 4205.0
MD set1 WLK 168195.0
MD set1 PNR 2055.0
MD set1 KNRTNC 1505.0
MD set1 KNRPRV 4205.0
MD set2 WLK 75945.0
MD set2 PNR 15425.0
MD set2 KNRTNC 5785.0
MD set2 KNRPRV 21905.0
MD set3 WLK 13190.0
MD set3 PNR 860.0
MD set3 KNRTNC 355.0
MD set3 KNRPRV 1850.0
PM set1 WLK 192295.0
PM set1 PNR 3495.0
PM set1 KNRTNC 1115.0
PM set1 KNRPRV 6025.0
PM set2 WLK 105270.0
PM set2 PNR 35425.0
PM set2 KNRTNC 4750.0
PM set2 KNRPRV 42865.0
PM set3 WLK 22455.0
PM set3 PNR 1825.0
PM set3 KNRTNC 455.0
PM set3 KNRPRV 4700.0
EV set1 WLK 

In [23]:
demand_df.demand.sum()

1397035.0

In [24]:
pd.crosstab(demand_df['set'], demand_df['access_mode'], values=demand_df['demand'], margins=True, aggfunc='sum').astype(int)

access_mode,KNRPRV,KNRTNC,PNR,WLK,All
set,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
set1,17985,3820,9700,630745,662250
set2,148850,16105,133200,347670,645825
set3,12885,1315,5450,69310,88960
All,179720,21240,148350,1047725,1397035


## Looking at output trip table

In [None]:
trips = pd.read_csv(os.path.join(demand_dir, 'indivTripData_5.csv'))

In [None]:
ind_tours = pd.read_csv(os.path.join(demand_dir, 'indivTourData_5.csv'))

In [None]:
hhs = pd.read_csv(os.path.join(demand_dir, 'householdData_5.csv'))

In [None]:
tour_mode_dict = {
    1: "SOV",
    3: "SR2",
    4: "SR2",
    6: "SR3",
    7: "SR3",
    9: "Walk",
    10: "Bike",
    11: "WLKTRN",
    12: "PNR",
    13: "KNRPRV",
    14: "KNRTNC",
    15: "Taxi",
    16: "TNC",
    17: "SCHBUS"
}

ind_tours['tour_mode_named'] = ind_tours['tour_mode'].map(tour_mode_dict)

In [None]:
# mode_order = list(tour_mode_dict.values()) + ['All']
mode_order = ['SOV', 'SR2', 'SR3', 'Walk', 'Bike', 'WLKTRN', 'PNR', 'KNRPRV', 'KNRTNC', 'Taxi', 'TNC', 'SCHBUS', 'All']

In [None]:
trips['trip_mode_named'] = trips['trip_mode'].map(tour_mode_dict)
trips['tour_mode_named'] = trips['tour_mode'].map(tour_mode_dict)

In [None]:
trips.head()

In [None]:
trips.stop_period.value_counts()

In [None]:
hhs.head()

In [None]:
trn_trips = trips[trips.trip_mode_named.isin(['WLKTRN','KNRPRV','KNRTNC','PNR'])]
sample_rate = hhs.sampleRate.mean()
(pd.crosstab(trn_trips.set, trn_trips.trip_mode_named, margins=True) / sample_rate).astype(int)

In [None]:
total_ct = pd.crosstab(trips.trip_mode_named, trips.tour_mode_named, margins=True)
cols = [col for col in mode_order if (col in total_ct.columns)]
rows = [row for row in mode_order if (row in total_ct.index.values)]
total_ct = total_ct.loc[rows, cols]
total_ct.index = rows
total_ct

In [None]:
work_tour_trips = trips[trips['tour_purpose'] == 'Work']
work_ct = pd.crosstab(work_tour_trips.trip_mode_named, work_tour_trips.tour_mode_named, margins=True)
cols = [col for col in mode_order if (col in work_ct.columns)]
rows = [row for row in mode_order if (row in work_ct.index.values)]
work_ct = work_ct.loc[rows, cols]
work_ct.index = rows
work_ct

In [None]:
cols = [col for col in mode_order if (col in work_ct.columns)]
rows = [row for row in mode_order if (row in work_ct.index.values)]
work_ct = work_ct.loc[rows, cols]
work_ct.index = rows
work_ct

In [None]:
rows

In [None]:
work_ct.index

In [None]:
work_tour_trips = trips[trips['tour_purpose'] == 'Work']
pd.crosstab(work_tour_trips.set, work_tour_trips.trip_mode_named, margins=True)

In [None]:
pd.crosstab(ind_tours.out_set, ind_tours.tour_mode, margins=True)

In [None]:
work_tours = ind_tours[ind_tours['tour_purpose'] == 'Work']
pd.crosstab(work_tours.out_set, work_tours.tour_mode_named, margins=True)