In [1]:
#Imports
import numpy as np
import pandas as pd
import difflib
import matplotlib.pyplot as plt
import time

In [2]:
def  processEvents(dataFilepath_sim, nrows):
    PTs = []
    PEVs = []
    print('read', dataFilepath_sim)
    for chunk in pd.read_csv(dataFilepath_sim, compression = 'gzip',chunksize=1500000, nrows = nrows):
        if sum((chunk['type'] == 'PathTraversal')) > 0:
            chunk['vehicle'] = chunk['vehicle'].astype(str)
            PT = chunk.loc[(chunk['type'] == 'PathTraversal') & (chunk['length'] > 0)].dropna(how='all', axis=1)
            PT['departureTime'] = PT['departureTime'].astype(int)
            PT['arrivalTime'] = PT['arrivalTime'].astype(int)

            PTs.append(PT[['driver', 'vehicle', 'mode', 'length', 'startX', 'startY', 'endX', 'endY', 'vehicleType',
                           'arrivalTime', 'departureTime', 'primaryFuel', 'primaryFuelType', 'secondaryFuel',
                           'secondaryFuelType', 'numPassengers', 'riders','time']])
            print(chunk.type.value_counts())
            PEV = chunk.loc[(chunk.type == "PersonEntersVehicle") &
                            ~(chunk['person'].apply(str).str.contains('Agent').fillna(False)) &
                            ~(chunk['vehicle'].str.contains('body').fillna(False)), :].dropna(how='all', axis=1)
            if ~PEV.empty:
                PEV['time'] = PEV['time'].astype(int)
                PEVs.append(PEV)

    Events_file_sim_PT = pd.concat(PTs)[['vehicle','time','endY','endX','startY','startX','mode']]
    Events_file_sim_PE = pd.concat(PEVs)[['vehicle','time',]]
    print(Events_file_sim_PE)

    return Events_file_sim_PT, Events_file_sim_PE



In [3]:
#Filter transit trips
def filter_transit(Events_file_sim_PT,Events_file_sim_PE):
    print(Events_file_sim_PT['mode'].value_counts())
    Events_file_sim_PT = Events_file_sim_PT[(Events_file_sim_PT['mode'] == 'bus')|
                                            (Events_file_sim_PT['mode'] == 'tram')|
                                            (Events_file_sim_PT['mode'] == 'subway')|
                                            (Events_file_sim_PT['mode'] == 'cable_car')|
                                            (Events_file_sim_PT['mode'] == 'ferry')|
                                            (Events_file_sim_PT['mode'] == 'rail')]
    print(Events_file_sim_PT['mode'].value_counts())
    Events_file_sim_PE = Events_file_sim_PE[Events_file_sim_PE['vehicle'].isin(Events_file_sim_PT['vehicle'])]
    
    return Events_file_sim_PE

In [4]:
def guess_agency(Events_file_sim_PE):

    agencies = []
    for vehicleID in Events_file_sim_PE['vehicle']:
        agency = vehicleID.split(':')[0]
        if agency == 'petalumatransit-petaluma-ca-us':
            agencies.append('PE')
        elif agency == 'westcat-ca-us':
            agencies.append('WC')
        elif agency == 'caltrain-ca-us':
            agencies.append('CA')
        elif agency == 'riovista-ca-us':
            agencies.append('RV')
        elif agency == 'unioncity-ca-us':
            agencies.append('UC')
        else:
            if len(agency) == 2:
                agencies.append(agency)
            elif agency == 'Caltrain':
                agencies.append('CA')
            else:
                print('Warning, this agency is not recognized:', agency)
    Events_file_sim_PE['agency'] = agencies

    print(np.unique(agencies))
    
    return Events_file_sim_PE

In [5]:
def guess_route(Events_file_sim_PE, GTFS_filepaths):

    GTFS_trip_files = {}

    for GTFS_filepath, GTFS in zip(GTFS_filepaths,GTFSs):
        GTFS_trip_files[GTFS] = pd.read_csv(GTFS_filepath+'trips.txt')

    route_ids = []
    total_routes = len(Events_file_sim_PE['vehicle'])
    i = 0
    time_start = time.time()
    for vehicle, agency in zip(Events_file_sim_PE['vehicle'],Events_file_sim_PE['agency']):
        i+=1
        if i%10000 ==0:
            print(i,'/',total_routes,'. Time = ', time.time()-time_start, '. Estimated remaining time:', (time.time()-time_start)/i*total_routes-(time.time()-time_start))
        if agency == 'SM':
            route_ids.append(agency+':'+str(list(GTFS_trip_files[agency]['route_id'][
                GTFS_trip_files[agency]['trip_id'].astype(str)==
                     str(vehicle).split(':')[1]+'|'
                     +str(vehicle).split(':')[2]+':'
                     +str(vehicle).split(':')[3]+'|'
                     +str(vehicle).split(':')[4]+':'
                     +str(vehicle).split(':')[5]+':'
                     +str(vehicle).split(':')[6]])[0]))
        else:
#             print(list(GTFS_trip_files[agency]['route_id'][GTFS_trip_files[agency]['trip_id'].astype(str)==str(vehicle.split(':')[1])])[0])
            try:
                route_ids.append(agency+':'+str(list(GTFS_trip_files[agency]['route_id'][GTFS_trip_files[agency]['trip_id'].astype(str)==str(vehicle.split(':')[1])])[0]))
            except:
                print('Warning, trip non found for vehicle', vehicle)
                route_ids.append('tripID not found')
    #     elif agency == 'GG':
    #         route_ids.append('GG:'+str(list(GTFS_trip_files[agency]['route_id'][GTFS_trip_files[agency]['trip_id']==vehicle.split(':')[1]])[0]))
    #     elif agency == 'SF':
    #         route_ids.append('SF:'+str(list(GTFS_trip_files[agency]['route_id'][GTFS_trip_files[agency]['trip_id']==vehicle.split(':')[1]])[0]))
    Events_file_sim_PE['route_id'] = route_ids
    
    return Events_file_sim_PE


In [6]:
dataFilepath_sim = [
#                 's3://beam-outputs/pilates-outputs/sfbay-baseline-20220801/beam/year-2018-iteration-5/ITERS/it.0/0.events.csv.gz',
#                 's3://beam-outputs/pilates-outputs/sfbay-AC-SF-CA-20220801/beam/year-2018-iteration-5/ITERS/it.0/0.events.csv.gz',
#                 's3://beam-outputs/pilates-outputs/sfbay-baseline-20220801/beam/year-2019-iteration-5/ITERS/it.0/0.events.csv.gz',
#                 's3://beam-outputs/pilates-outputs/sfbay-AC-SF-CA-20220801/beam/year-2019-iteration-5/ITERS/it.0/0.events.csv.gz',
#                 's3://beam-outputs/pilates-outputs/sfbay-baseline-20220801/beam/year-2020-iteration-5/ITERS/it.0/0.events.csv.gz',
#                 's3://beam-outputs/pilates-outputs/sfbay-AC-SF-CA-20220801/beam/year-2020-iteration-5/ITERS/it.0/0.events.csv.gz',
#                 's3://beam-outputs/pilates-outputs/sfbay-baseline-20220801/beam/year-2021-iteration-5/ITERS/it.0/0.events.csv.gz',
#                 's3://beam-outputs/pilates-outputs/sfbay-AC-SF-CA-20220801/beam/year-2021-iteration-5/ITERS/it.0/0.events.csv.gz',
                's3://beam-outputs/pilates-outputs/sfbay-baseline-20220816/beam/year-2018-iteration-5/ITERS/it.0/0.events.csv.gz',
                's3://beam-outputs/pilates-outputs/sfbay-TR-20220812/beam/year-2018-iteration-5/ITERS/it.0/0.events.csv.gz',
                    's3://beam-outputs/pilates-outputs/sfbay-baseline-20220816/beam/year-2019-iteration-5/ITERS/it.0/0.events.csv.gz',
                's3://beam-outputs/pilates-outputs/sfbay-TR-20220812/beam/year-2019-iteration-5/ITERS/it.0/0.events.csv.gz',
                    's3://beam-outputs/pilates-outputs/sfbay-baseline-20220816/beam/year-2020-iteration-5/ITERS/it.0/0.events.csv.gz',
                's3://beam-outputs/pilates-outputs/sfbay-TR-20220812/beam/year-2020-iteration-5/ITERS/it.0/0.events.csv.gz',
                    's3://beam-outputs/pilates-outputs/sfbay-baseline-20220816/beam/year-2021-iteration-5/ITERS/it.0/0.events.csv.gz',
                's3://beam-outputs/pilates-outputs/sfbay-TR-20220812/beam/year-2021-iteration-5/ITERS/it.0/0.events.csv.gz',
            ]

                    
years = ['2018','2019','2020','2021']

names = []
for year in years:
    names.append('Baseline'+year)
    names.append('Future'+year)

output_filepath = '/Users/cpoliziani/Downloads/Transit Rich/Results/'

GTFS_filepath = '/Users/cpoliziani/Downloads/Data/TR/GTFS/r5-simple-no-local/'
GTFS_filepath2 = '/Users/cpoliziani/Downloads/Data/TR/GTFS/r5-simple-no-local-TR/'

RA_output = 'agency_ridershipNew3.csv'
RR_output = 'route_ridershipNew3.csv'

GTFSs = ['3D','AC','AM','AY','BA','CA','CC','CE','CM','CT','DE','EM','GG','HF','MA','PE','RV', 'SB',
        'SC','SF','SM','SO','SR','ST','TD','UC','VC','VN','VT','WC','WH']

GTFS_3D = GTFS_filepath+'3D/'
GTFS_AC = GTFS_filepath+'AC/'
GTFS_AM = GTFS_filepath+'AM/'
GTFS_AY = GTFS_filepath+'AY/'
GTFS_BA = GTFS_filepath+'BA/'
GTFS_Caltrain = GTFS_filepath+'Caltrain/'
GTFS_CC = GTFS_filepath+'CC/'
GTFS_CE = GTFS_filepath+'CE/'
GTFS_CM = GTFS_filepath+'CM/'
GTFS_CT = GTFS_filepath+'CT/'
GTFS_DE = GTFS_filepath+'DE/'
GTFS_EM = GTFS_filepath+'EM/'
GTFS_GG = GTFS_filepath+'GG/'
GTFS_HF = GTFS_filepath+'HF/'
GTFS_MA = GTFS_filepath+'MA/'
GTFS_PE = GTFS_filepath+'PE/'
GTFS_RV = GTFS_filepath+'RV/'
GTFS_SB = GTFS_filepath+'SB/'
GTFS_SC = GTFS_filepath+'SC/'
GTFS_SF = GTFS_filepath+'SF/'
GTFS_SM = GTFS_filepath+'SM/'
GTFS_SO = GTFS_filepath+'SO/'
GTFS_SR = GTFS_filepath+'SR/'
GTFS_ST = GTFS_filepath+'ST/'
GTFS_TD = GTFS_filepath+'TD/'
GTFS_UC = GTFS_filepath+'UC/'
GTFS_VC = GTFS_filepath+'VC/'
GTFS_VN = GTFS_filepath+'VN/'
GTFS_VT = GTFS_filepath+'VT/'
GTFS_WC = GTFS_filepath+'WC/'
GTFS_WH = GTFS_filepath+'WH/'


GTFS_3D2 = GTFS_filepath2+'3D/'
GTFS_AC2 = GTFS_filepath2+'AC/'
GTFS_AM2 = GTFS_filepath2+'AM/'
GTFS_AY2 = GTFS_filepath2+'AY/'
GTFS_BA2 = GTFS_filepath2+'BA/'
GTFS_Caltrain2 = GTFS_filepath2+'Caltrain/'
GTFS_CC2 = GTFS_filepath2+'CC/'
GTFS_CE2 = GTFS_filepath2+'CE/'
GTFS_CM2 = GTFS_filepath2+'CM/'
GTFS_CT2 = GTFS_filepath2+'CT/'
GTFS_DE2 = GTFS_filepath2+'DE/'
GTFS_EM2 = GTFS_filepath2+'EM/'
GTFS_GG2 = GTFS_filepath2+'GG/'
GTFS_HF2 = GTFS_filepath2+'HF/'
GTFS_MA2 = GTFS_filepath2+'MA/'
GTFS_PE2 = GTFS_filepath2+'PE/'
GTFS_RV2 = GTFS_filepath2+'RV/'
GTFS_SB2 = GTFS_filepath2+'SB/'
GTFS_SC2 = GTFS_filepath2+'SC/'
GTFS_SF2 = GTFS_filepath2+'SF/'
GTFS_SM2 = GTFS_filepath2+'SM/'
GTFS_SO2 = GTFS_filepath2+'SO/'
GTFS_SR2 = GTFS_filepath2+'SR/'
GTFS_ST2 = GTFS_filepath2+'ST/'
GTFS_TD2 = GTFS_filepath2+'TD/'
GTFS_UC2 = GTFS_filepath2+'UC/'
GTFS_VC2 = GTFS_filepath2+'VC/'
GTFS_VN2 = GTFS_filepath2+'VN/'
GTFS_VT2 = GTFS_filepath2+'VT/'
GTFS_WC2 = GTFS_filepath2+'WC/'
GTFS_WH2 = GTFS_filepath2+'WH/'

GTFS_baseline = [GTFS_3D,
                    GTFS_AC,
                    GTFS_AM,
                    GTFS_AY,
                    GTFS_BA,
                    GTFS_Caltrain,
                    GTFS_CC,
                    GTFS_CE,
                    GTFS_CM,
                    GTFS_CT,
                    GTFS_DE,
                    GTFS_EM,
                    GTFS_GG,
                    GTFS_HF,
                    GTFS_MA,
                    GTFS_PE,
                    GTFS_RV,
                    GTFS_SB,
                    GTFS_SC,
                    GTFS_SF,
                    GTFS_SM,
                    GTFS_SO,
                    GTFS_SR,
                    GTFS_ST,
                    GTFS_TD,
                    GTFS_UC,
                    GTFS_VC,
                    GTFS_VN,
                    GTFS_VT,
                    GTFS_WC,
                    GTFS_WH]

GTFS_TR = [GTFS_3D2,
                    GTFS_AC2,
                    GTFS_AM2,
                    GTFS_AY2,
                    GTFS_BA2,
                    GTFS_Caltrain2,
                    GTFS_CC2,
                    GTFS_CE2,
                    GTFS_CM2,
                    GTFS_CT2,
                    GTFS_DE2,
                    GTFS_EM2,
                    GTFS_GG2,
                    GTFS_HF2,
                    GTFS_MA2,
                    GTFS_PE2,
                    GTFS_RV2,
                    GTFS_SB2,
                    GTFS_SC2,
                    GTFS_SF2,
                    GTFS_SM2,
                    GTFS_SO2,
                    GTFS_SR2,
                    GTFS_ST2,
                    GTFS_TD2,
                    GTFS_UC2,
                    GTFS_VC2,
                    GTFS_VN2,
                    GTFS_VT2,
                    GTFS_WC2,
                    GTFS_WH2]
GTFS_filepaths = [GTFS_baseline, GTFS_TR,GTFS_baseline, GTFS_TR,GTFS_baseline, GTFS_TR,GTFS_baseline, GTFS_TR]



In [None]:
nrows = 900000

#Ridership route
RR = pd.DataFrame()
#Ridership agency
RA = pd.DataFrame()

for fp, name, GTFS_filepath in zip(dataFilepath_sim,names, GTFS_filepaths):
    print('evaluate ridership')
    #import pathtraversal and person enter vehicles
    PT, PE = processEvents(fp, nrows)
    #filter PE transit trips from PT
    PE = filter_transit(PT, PE)
    #Guess transit agency for each PE
    PE = guess_agency(PE)
    #Guess transit route for each PE
    PE = guess_route(PE, GTFS_filepath)
    #Route Ridership
    rr = PE['route_id'].value_counts()
    sum_agency = 0
    sum_agency_bsva = 0
    sum_route = 0
    for route, count in zip(rr.keys(), rr):
        RR.at[route, name] = count
        sum_route += count
    ra = PE['agency'].value_counts()
    for agency, count in zip(ra.keys(), ra):
        RA.at[agency, name] = count
        sum_agency += count
        if agency in ['BA', 'SF', 'VT', 'AC']:
            RA.at[agency, name+' BA-SF-VT-AC'] = count
            sum_agency_bsva += count
    for route, count in zip(rr.keys(), rr):
        RR.at[route, name+' shares'] = count/sum_route
    ra = PE['agency'].value_counts()
    for agency, count in zip(ra.keys(), ra):
        RA.at[agency, name+' shares'] = count/sum_agency
        if agency in ['BA', 'SF', 'VT', 'AC']:
            RA.at[agency, name+' shares BA-SF-VT-AC'] = count/sum_agency_bsva
    RA.to_csv(output_filepath+RA_output)
    RR.to_csv(output_filepath+RR_output)

for year in years:
    diff = []
    diff_abs = []
    for baseline, future in zip(RA['Baseline'+year],RA['Future'+year]):
        diff.append((future-baseline)/baseline)
        diff_abs.append((future-baseline))
    RA['Diff %'+year] = diff
    RA['Diff'+year] = diff_abs

tot_NTD = 1756364558 + 15283299+5703705+49795740+110802986+7386518+49247910+27027693 + 8437926+50222832 + 2818648
    
RA.at['BA', 'clipper BA-SF-VT-AC 2016 share target'] = 350485/773719
RA.at['BA', 'MTC BA-SF-VT-AC 2016 share target'] = 458900/1564500
RA.at['BA', 'NTD BA-SF-VT-AC 2019 share target'] = (1756364558 + 15283299)/tot_NTD

RA.at['SF', 'clipper BA-SF-VT-AC 2016 share target'] = 293991/773719
RA.at['SF', 'MTC BA-SF-VT-AC 2016 share target'] = 777000/1564500
RA.at['SF', 'NTD BA-SF-VT-AC 2019 share target'] = (5703705+49795740+110802986+7386518+49247910)/tot_NTD

RA.at['VT', 'clipper BA-SF-VT-AC 2016 share target'] = 43950/773719
RA.at['VT', 'MTC BA-SF-VT-AC 2016 share target'] = 146700/1564500
RA.at['VT', 'NTD BA-SF-VT-AC 2019 share target'] = (27027693 + 8437926)/tot_NTD

RA.at['AC', 'clipper BA-SF-VT-AC 2016 share target'] = 85293/773719
RA.at['AC', 'MTC BA-SF-VT-AC 2016 share target'] = 181900/1564500
RA.at['AC', 'NTD BA-SF-VT-AC 2019 share target'] = (50222832 + 2818648)/tot_NTD

RA.at['BA', 'clipper BA-SF-VT-AC 2020 Jan av ridership target'] = 350485
RA.at['BA', 'MTC BA-SF-VT-AC 2016 av ridership target'] = 458900
RA.at['BA', 'NTD BA-SF-VT-AC 2019 ridership target'] = (1756364558 + 15283299)

RA.at['SF', 'clipper BA-SF-VT-AC 2020 Jan av ridership target'] = 293991
RA.at['SF', 'MTC BA-SF-VT-AC 2016 av ridership target'] = 777000
RA.at['SF', 'NTD BA-SF-VT-AC 2019 ridership target'] = (5703705+49795740+110802986+7386518+49247910)

RA.at['VT', 'clipper BA-SF-VT-AC 2020 Jan av ridership target'] = 43950
RA.at['VT', 'MTC BA-SF-VT-AC 2016 av ridership target'] = 146700
RA.at['VT', 'NTD BA-SF-VT-AC 2019 ridership target'] =  (27027693 + 8437926)

RA.at['AC', 'clipper BA-SF-VT-AC 2020 Jan av ridership target'] = 85293
RA.at['AC', 'MTC BA-SF-VT-AC 2016 av ridership target'] = 181900
RA.at['AC', 'NTD BA-SF-VT-AC 2019 ridership target'] = (50222832 + 2818648)

RA.to_csv(output_filepath+RA_output)
RR.to_csv(output_filepath+RR_output)



In [8]:
RA

Unnamed: 0,Baseline2019,Baseline2019 BA-SF-VT-AC,Baseline2019 shares,Baseline2019 shares BA-SF-VT-AC,Future2019,Future2019 BA-SF-VT-AC,Future2019 shares,Future2019 shares BA-SF-VT-AC,Diff %2019,Diff2019,clipper BA-SF-VT-AC 2016 share target,MTC BA-SF-VT-AC 2016 share target,NTD BA-SF-VT-AC 2019 share target,clipper BA-SF-VT-AC 2020 Jan av ridership target,MTC BA-SF-VT-AC 2016 av ridership target,NTD BA-SF-VT-AC 2019 ridership target
SF,94950.0,94950.0,0.32286,0.399707,94800.0,94800.0,0.324145,0.401981,-0.00158,-150.0,0.379971,0.496644,0.107022,293991.0,777000.0,222936900.0
AC,56400.0,56400.0,0.191778,0.237425,55981.0,55981.0,0.191413,0.237377,-0.007429,-419.0,0.110238,0.116267,0.025463,85293.0,181900.0,53041480.0
VT,53132.0,53132.0,0.180666,0.223668,52190.0,52190.0,0.178451,0.221302,-0.017729,-942.0,0.056804,0.093768,0.017025,43950.0,146700.0,35465620.0
BA,33067.0,33067.0,0.112438,0.139201,32861.0,32861.0,0.11236,0.139341,-0.00623,-206.0,0.452987,0.293321,0.85049,350485.0,458900.0,1771648000.0
ST,16332.0,,0.055534,,16233.0,,0.055505,,-0.006062,-99.0,,,,,,
GG,7456.0,,0.025353,,6969.0,,0.023829,,-0.065317,-487.0,,,,,,
CC,5533.0,,0.018814,,5870.0,,0.020071,,0.060907,337.0,,,,,,
3D,3073.0,,0.010449,,3000.0,,0.010258,,-0.023755,-73.0,,,,,,
WH,2958.0,,0.010058,,3132.0,,0.010709,,0.058824,174.0,,,,,,
SR,2710.0,,0.009215,,2531.0,,0.008654,,-0.066052,-179.0,,,,,,


In [None]:
RR[RR.index.str.contains('AC:1-142')]

In [9]:
RR[RR.index.str.contains('AC:1T-142')]

Unnamed: 0,Baseline2019,Baseline2019 shares,Future2019,Future2019 shares
AC:1T-142,,,2763.0,0.009447


Unnamed: 0,Baseline2019,Baseline2019 shares,Future2019,Future2019 shares
SF:1000,,,1604.0,0.005484


In [12]:
RR[RR.index.str.contains('CA:')]

Unnamed: 0,Baseline2019,Baseline2019 shares,Future2019,Future2019 shares
CA:12868,1190.0,0.004046,1329.0,0.004544
CA:12869,734.0,0.002496,870.0,0.002975
CA:12867,366.0,0.001245,421.0,0.00144


Unnamed: 0,Baseline2019,Baseline2019 shares,Future2019,Future2019 shares
AC:1-142,2171.0,0.007382,,


In [None]:
RR[RR.index.str.contains('SF:1000')]

In [13]:
#The Central Subway Project will improve public transportation in San Francisco by extending the Muni Metro T Third Line through SoMa

In [14]:
#J
RR[RR.index.str.contains('SF:12475')]

Unnamed: 0,Baseline2019,Baseline2019 shares,Future2019,Future2019 shares
SF:12475,2382.0,0.0081,2330.0,0.007967


In [15]:
#K/T
RR[RR.index.str.contains('SF:12476')]

Unnamed: 0,Baseline2019,Baseline2019 shares,Future2019,Future2019 shares
SF:12476,4182.0,0.01422,4283.0,0.014645


In [16]:
#L
RR[RR.index.str.contains('SF:12477')]

Unnamed: 0,Baseline2019,Baseline2019 shares,Future2019,Future2019 shares
SF:12477,3154.0,0.010725,3052.0,0.010436


In [17]:
#M
RR[RR.index.str.contains('SF:12478')]

Unnamed: 0,Baseline2019,Baseline2019 shares,Future2019,Future2019 shares
SF:12478,3180.0,0.010813,3137.0,0.010726


In [18]:
#N
RR[RR.index.str.contains('SF:12479')]

Unnamed: 0,Baseline2019,Baseline2019 shares,Future2019,Future2019 shares
SF:12479,4445.0,0.015114,4258.0,0.014559


In [26]:
for year in years:
    
    print('total SF light rail baseline 2018',
         (list(RR[RR.index.str.contains('SF:12475')]['Baseline'+year])[0]+
          list(RR[RR.index.str.contains('SF:12476')]['Baseline'+year])[0]+
          list(RR[RR.index.str.contains('SF:12477')]['Baseline'+year])[0]+
          list(RR[RR.index.str.contains('SF:12478')]['Baseline'+year])[0]+
          list(RR[RR.index.str.contains('SF:12479')]['Baseline'+year])[0])
         )
    print('total SF light rail future 2018',
         (list(RR[RR.index.str.contains('SF:12475')]['Future'+year])[0]+
          list(RR[RR.index.str.contains('SF:12476')]['Future'+year])[0]+
          list(RR[RR.index.str.contains('SF:12477')]['Future'+year])[0]+
          list(RR[RR.index.str.contains('SF:12478')]['Future'+year])[0]+
          list(RR[RR.index.str.contains('SF:12479')]['Future'+year])[0]+
          list(RR[RR.index.str.contains('SF:1000')]['Future'+year])[0])
         )


total SF light rail baseline 17343.0
total SF light rail future 18664.0


In [19]:
#########Analysis of what agents used in the baseline
index_baseline
index_future
fp_PtoPTss_baseline = '/Users/cpoliziani/Downloads/PtoPTss0 (1).csv.gz'
fp_PtoPTss_future = '/Users/cpoliziani/Downloads/PtoPTss1 (1).csv.gz'

print('read PtoPTss baseline')
PtoPTss_baseline = pd.read_csv(fp_PtoPTss_baseline)
print('read PtoPTss TR')
PtoPTss_TR = pd.read_csv(fp_PtoPTss_future)
print('read plans baseline')
plans_baseline = pd.read_csv(dataFilepath_sim[index_baseline][:-13]+'plans.csv.gz')
# print('read plans TR')
plans_TR = pd.read_csv(dataFilepath_sim[index_future][:-13]+'plans.csv.gz')

line_1T_trips_TR = pd.read_csv(GTFS_AC2+'trips.txt')
line_1T_trips_TR = line_1T_trips_TR[line_1T_trips_TR['route_id'].str.contains('1T-142')]['trip_id']

line_CS_trips_TR = pd.read_csv(GTFS_SF2+'trips.txt')
line_CS_trips_TR = line_CS_trips_TR[line_CS_trips_TR['route_id']==1000]['trip_id']

line_CA_trips_TR = pd.read_csv(GTFS_Caltrain2+'trips.txt')

PtoPTss_AC_TR = PtoPTss_TR[PtoPTss_TR['vehicle2'].str.contains('AC', na=False)]
PtoPTss_SF_TR = PtoPTss_TR[PtoPTss_TR['vehicle2'].str.contains('SF', na=False)]
PtoPTss_CA_TR = PtoPTss_TR[PtoPTss_TR['vehicle2'].str.contains('Ca', na=False)]

persons_baseline = pd.DataFrame(['person','plan_index','mode_baseline','vehicle_baseline','agency_baseline',
                                 'x_activity_TR','y_activity_TR','vehicle_now'])
i = 0
PtoPTss=pd.conc(PtoPTss_AC_TR,PtoPTss_SF_TR,PtoPTss_CA_TR)
for vehicle, person, plan_index in zip(PtoPTss['vehicleID'],PtoPTss['personID'],PtoPTss['planIndex'] ):
    if int(vehicle.split(':')[1]) in list(line_1T_trips_TR):
        i+=1
        persons_baseline.loc[i,'person'] = person
        persons_baseline.loc[i,'plan_index'] = plan_index
        persons_baseline.loc[i,'mode_baseline'] = list(plans_baseline['legMode'][(plans_baseline['personId']==person)&(plans_baseline['planElementIndex']==plan_index)])[0]
        persons_baseline.loc[i,'x_activity_TR'] = list(plans_TR['activityLocationX'][(plans_TR['personId']==person)&(plans_TR['planElementIndex']==plan_index-1)])[0]
        persons_baseline.loc[i,'y_activity_TR'] = list(plans_TR['activityLocationY'][(plans_TR['personId']==person)&(plans_TR['planElementIndex']==plan_index-1)])[0]
        persons_baseline.loc[i,'vehicle_now'] = '1T'
        try:
            persons_baseline.loc[i,'vehicle_baseline'] = list(PtoPTss_baseline['vehicleID'][(PtoPTss_baseline['personID']==person)&(PtoPTss_baseline['planIndex']==plan_index)])[0]
            persons_baseline.loc[i,'agency_baseline'] = list(PtoPTss_baseline['vehicle2'][(PtoPTss_baseline['personID']==person)&(PtoPTss_baseline['planIndex']==plan_index)])[0]
            persons_baseline.loc[i,'length_baseline'] = list(PtoPTss_baseline['length'][(PtoPTss_baseline['personID']==person)&(PtoPTss_baseline['planIndex']==plan_index)])[0]
            persons_baseline.loc[i,'length_TR'] = list(PtoPTss_TR['length'][(PtoPTss_baseline['personID']==person)&(PtoPTss_baseline['planIndex']==plan_index)])[0]
            persons_baseline.loc[i,'duration_baseline'] = list(PtoPTss_baseline['duration'][(PtoPTss_baseline['personID']==person)&(PtoPTss_baseline['planIndex']==plan_index)])[0]
            persons_baseline.loc[i,'duration_TR'] = list(PtoPTss_TR['duration'][(PtoPTss_baseline['personID']==person)&(PtoPTss_baseline['planIndex']==plan_index)])[0]
            persons_baseline.loc[i,'speed_baseline'] = persons_baseline.loc[i,'length_baseline']/persons_baseline.loc[i,'duration_baseline']
            persons_baseline.loc[i,'speed_TR'] = persons_baseline.loc[i,'length_TR']/persons_baseline.loc[i,'duration_TR']
            persons_baseline.loc[i,'diff_length'] = persons_baseline.loc[i,'length_TR']-persons_baseline.loc[i,'length_baseline'] 
            persons_baseline.loc[i,'diff_duration'] = persons_baseline.loc[i,'duration_TR']-persons_baseline.loc[i,'duration_baseline'] 
            persons_baseline.loc[i,'speed_length'] = persons_baseline.loc[i,'speed_TR']-persons_baseline.loc[i,'speed_baseline'] 
        except:
            persons_baseline.loc[i,'vehicle_baseline'] = None
            persons_baseline.loc[i,'agency_baseline'] = None
            print('PtoPTss_baseline not found person and plan index',person,plan_index, vehicle)


persons_baseline.to_csv('/Users/cpoliziani/Downloads/person_database.csv')

read PtoPTss baseline


  PtoPTss_baseline = pd.read_csv('/Users/cpoliziani/Downloads/PtoPTss0 (1).csv.gz')


read PtoPTss TR


  PtoPTss_TR = pd.read_csv('/Users/cpoliziani/Downloads/PtoPTss1 (1).csv.gz')


read plans baseline
read plans TR


In [27]:
PtoPTss_baseline

Unnamed: 0.1,Unnamed: 0,vehicleID,pathTraversalID,personID,planIndex,mode,length,duration,primaryFuelType,vehicle2
0,0,0,3788556,1951656,1,car_hov3,7814.880,404,Diesel,0
1,1,0,3994598,1951656,1,car,3798.358,237,Diesel,0
2,2,0,24000022,1951656,7,car_hov3,5043.540,301,Diesel,0
3,3,0,24011121,1951656,7,car_hov3,524.433,30,Diesel,0
4,4,1,2091068,1951655,1,car,2508.629,144,Gasoline,1
...,...,...,...,...,...,...,...,...,...,...
7914299,7914299,westcat-ca-us:160436A2376B3650,34047278,2282643,3,bus,37439.564,2400,Diesel,we
7914300,7914300,westcat-ca-us:160436A2376B3650,34147987,4095524,3,bus,2847.288,300,Diesel,we
7914301,7914301,westcat-ca-us:160436A2376B3650,34147987,3720284,3,bus,2847.288,300,Diesel,we
7914302,7914302,westcat-ca-us:160436A2376B3650,34147987,455385,9,bus,2847.288,300,Diesel,we


In [32]:

persons_baseline = pd.DataFrame(['person','plan_index','mode_baseline','vehicle_baseline','agency_baseline',
                                 'x_activity_TR','y_activity_TR','vehicle_now'])
i = 0
PtoPTss=pd.concat([PtoPTss_AC_TR,PtoPTss_SF_TR,PtoPTss_CA_TR])
for vehicle, person, plan_index in zip(PtoPTss['vehicleID'],PtoPTss['personID'],PtoPTss['planIndex'] ):
    if int(vehicle.split(':')[1]) in list(line_1T_trips_TR):
        i+=1
        try:

            persons_baseline.loc[i,'person'] = person
            persons_baseline.loc[i,'plan_index'] = plan_index
            persons_baseline.loc[i,'mode_baseline'] = list(plans_baseline['legMode'][(plans_baseline['personId']==person)&(plans_baseline['planElementIndex']==plan_index)])[0]
            persons_baseline.loc[i,'x_activity_TR'] = list(plans_TR['activityLocationX'][(plans_TR['personId']==person)&(plans_TR['planElementIndex']==plan_index-1)])[0]
            persons_baseline.loc[i,'y_activity_TR'] = list(plans_TR['activityLocationY'][(plans_TR['personId']==person)&(plans_TR['planElementIndex']==plan_index-1)])[0]
            persons_baseline.loc[i,'vehicle_now'] = '1T'
            persons_baseline.loc[i,'vehicle_baseline'] = list(PtoPTss_baseline['vehicleID'][(PtoPTss_baseline['personID']==person)&(PtoPTss_baseline['planIndex']==plan_index)])[0]
            persons_baseline.loc[i,'agency_baseline'] = list(PtoPTss_baseline['vehicle2'][(PtoPTss_baseline['personID']==person)&(PtoPTss_baseline['planIndex']==plan_index)])[0]
            persons_baseline.loc[i,'length_baseline'] = list(PtoPTss_baseline['length'][(PtoPTss_baseline['personID']==person)&(PtoPTss_baseline['planIndex']==plan_index)])[0]
            persons_baseline.loc[i,'length_TR'] = list(PtoPTss_TR['length'][(PtoPTss_baseline['personID']==person)&(PtoPTss_baseline['planIndex']==plan_index)])[0]
            persons_baseline.loc[i,'duration_baseline'] = list(PtoPTss_baseline['duration'][(PtoPTss_baseline['personID']==person)&(PtoPTss_baseline['planIndex']==plan_index)])[0]
            persons_baseline.loc[i,'duration_TR'] = list(PtoPTss_TR['duration'][(PtoPTss_baseline['personID']==person)&(PtoPTss_baseline['planIndex']==plan_index)])[0]
            persons_baseline.loc[i,'speed_baseline'] = persons_baseline.loc[i,'length_baseline']/persons_baseline.loc[i,'duration_baseline']
            persons_baseline.loc[i,'speed_TR'] = persons_baseline.loc[i,'length_TR']/persons_baseline.loc[i,'duration_TR']
            persons_baseline.loc[i,'diff_length'] = persons_baseline.loc[i,'length_TR']-persons_baseline.loc[i,'length_baseline'] 
            persons_baseline.loc[i,'diff_duration'] = persons_baseline.loc[i,'duration_TR']-persons_baseline.loc[i,'duration_baseline'] 
            persons_baseline.loc[i,'speed_length'] = persons_baseline.loc[i,'speed_TR']-persons_baseline.loc[i,'speed_baseline'] 
        except:
            persons_baseline.loc[i,'person'] = person
            persons_baseline.loc[i,'plan_index'] = plan_index

            print('PtoPTss_baseline not found person and plan index',person,plan_index, vehicle)


persons_baseline.to_csv('/Users/cpoliziani/Downloads/person_database.csv')

PtoPTss_baseline not found person and plan index 5517044 5 AC:5641806
PtoPTss_baseline not found person and plan index 5517044 5 AC:5641806
PtoPTss_baseline not found person and plan index 5517044 5 AC:5641806
PtoPTss_baseline not found person and plan index 5517044 5 AC:5641806
PtoPTss_baseline not found person and plan index 5517044 5 AC:5641806
PtoPTss_baseline not found person and plan index 5517044 5 AC:5641806
PtoPTss_baseline not found person and plan index 5517044 5 AC:5641806
PtoPTss_baseline not found person and plan index 5517044 5 AC:5641806
PtoPTss_baseline not found person and plan index 5517044 5 AC:5641806
PtoPTss_baseline not found person and plan index 5517044 5 AC:5641806
PtoPTss_baseline not found person and plan index 5517044 5 AC:5641806
PtoPTss_baseline not found person and plan index 5517044 5 AC:5641806
PtoPTss_baseline not found person and plan index 5517044 5 AC:5641806
PtoPTss_baseline not found person and plan index 5517044 5 AC:5641811
PtoPTss_baseline not

PtoPTss_baseline not found person and plan index 4047976 9 AC:5641878
PtoPTss_baseline not found person and plan index 2376757 3 AC:5641878
PtoPTss_baseline not found person and plan index 2376757 3 AC:5641878
PtoPTss_baseline not found person and plan index 2376757 3 AC:5641878
PtoPTss_baseline not found person and plan index 2376757 3 AC:5641878
PtoPTss_baseline not found person and plan index 2376757 3 AC:5641878
PtoPTss_baseline not found person and plan index 2376757 3 AC:5641878
PtoPTss_baseline not found person and plan index 5912554 5 AC:5641879
PtoPTss_baseline not found person and plan index 4501103 11 AC:5641879
PtoPTss_baseline not found person and plan index 846635 9 AC:5641879
PtoPTss_baseline not found person and plan index 4501103 11 AC:5641879
PtoPTss_baseline not found person and plan index 7261217 5 AC:5641879
PtoPTss_baseline not found person and plan index 846635 9 AC:5641879
PtoPTss_baseline not found person and plan index 7261217 5 AC:5641879
PtoPTss_baseline not

PtoPTss_baseline not found person and plan index 1596275 15 AC:5641894
PtoPTss_baseline not found person and plan index 1596275 15 AC:5641894
PtoPTss_baseline not found person and plan index 3100129 7 AC:5641895
PtoPTss_baseline not found person and plan index 3100129 7 AC:5641895
PtoPTss_baseline not found person and plan index 3100129 7 AC:5641895
PtoPTss_baseline not found person and plan index 3100129 7 AC:5641895
PtoPTss_baseline not found person and plan index 6563115 7 AC:5641898
PtoPTss_baseline not found person and plan index 6563115 7 AC:5641898
PtoPTss_baseline not found person and plan index 6563115 7 AC:5641898
PtoPTss_baseline not found person and plan index 6563115 7 AC:5641898
PtoPTss_baseline not found person and plan index 6563115 7 AC:5641898
PtoPTss_baseline not found person and plan index 6563115 7 AC:5641898
PtoPTss_baseline not found person and plan index 6563115 7 AC:5641898
PtoPTss_baseline not found person and plan index 3114581 7 AC:5641899
PtoPTss_baseline n

PtoPTss_baseline not found person and plan index 4424416 9 AC:5641923
PtoPTss_baseline not found person and plan index 4424416 9 AC:5641923
PtoPTss_baseline not found person and plan index 429135 13 AC:5641923
PtoPTss_baseline not found person and plan index 429135 13 AC:5641923
PtoPTss_baseline not found person and plan index 429135 13 AC:5641923
PtoPTss_baseline not found person and plan index 429135 13 AC:5641923
PtoPTss_baseline not found person and plan index 6764684 17 AC:5641923
PtoPTss_baseline not found person and plan index 6764684 17 AC:5641923
PtoPTss_baseline not found person and plan index 6764684 17 AC:5641923
PtoPTss_baseline not found person and plan index 6764684 17 AC:5641923
PtoPTss_baseline not found person and plan index 6764684 17 AC:5641923
PtoPTss_baseline not found person and plan index 6764684 17 AC:5641923
PtoPTss_baseline not found person and plan index 6764684 17 AC:5641923
PtoPTss_baseline not found person and plan index 6764684 17 AC:5641923
PtoPTss_base

PtoPTss_baseline not found person and plan index 6972241 11 AC:5641943
PtoPTss_baseline not found person and plan index 6972241 11 AC:5641943
PtoPTss_baseline not found person and plan index 6972241 11 AC:5641943
PtoPTss_baseline not found person and plan index 6972241 11 AC:5641943
PtoPTss_baseline not found person and plan index 6972241 11 AC:5641943
PtoPTss_baseline not found person and plan index 6972241 11 AC:5641943
PtoPTss_baseline not found person and plan index 6972241 11 AC:5641943
PtoPTss_baseline not found person and plan index 6972241 11 AC:5641943
PtoPTss_baseline not found person and plan index 6125086 9 AC:5641943
PtoPTss_baseline not found person and plan index 6972241 11 AC:5641943
PtoPTss_baseline not found person and plan index 6125086 9 AC:5641943
PtoPTss_baseline not found person and plan index 6972241 11 AC:5641943
PtoPTss_baseline not found person and plan index 6125086 9 AC:5641943
PtoPTss_baseline not found person and plan index 6972241 11 AC:5641943
PtoPTss_b

PtoPTss_baseline not found person and plan index 917727 7 AC:5641951
PtoPTss_baseline not found person and plan index 917727 7 AC:5641951
PtoPTss_baseline not found person and plan index 917727 7 AC:5641951
PtoPTss_baseline not found person and plan index 2411584 3 AC:5641951
PtoPTss_baseline not found person and plan index 2411584 3 AC:5641951
PtoPTss_baseline not found person and plan index 229919 13 AC:5641951
PtoPTss_baseline not found person and plan index 2411584 3 AC:5641951
PtoPTss_baseline not found person and plan index 229919 13 AC:5641951
PtoPTss_baseline not found person and plan index 2411584 3 AC:5641951
PtoPTss_baseline not found person and plan index 229919 13 AC:5641951
PtoPTss_baseline not found person and plan index 2411584 3 AC:5641951
PtoPTss_baseline not found person and plan index 2411584 3 AC:5641951
PtoPTss_baseline not found person and plan index 2411584 3 AC:5641951
PtoPTss_baseline not found person and plan index 2411584 3 AC:5641951
PtoPTss_baseline not fo

PtoPTss_baseline not found person and plan index 4844260 17 AC:5641974
PtoPTss_baseline not found person and plan index 1396528 11 AC:5641974
PtoPTss_baseline not found person and plan index 4844260 17 AC:5641974
PtoPTss_baseline not found person and plan index 1396528 11 AC:5641974
PtoPTss_baseline not found person and plan index 4844260 17 AC:5641974
PtoPTss_baseline not found person and plan index 1396528 11 AC:5641974
PtoPTss_baseline not found person and plan index 4844260 17 AC:5641974
PtoPTss_baseline not found person and plan index 1396528 11 AC:5641974
PtoPTss_baseline not found person and plan index 4844260 17 AC:5641974
PtoPTss_baseline not found person and plan index 4844260 17 AC:5641974
PtoPTss_baseline not found person and plan index 7269528 9 AC:5641975
PtoPTss_baseline not found person and plan index 299333 1 AC:5641987
PtoPTss_baseline not found person and plan index 299333 1 AC:5641987
PtoPTss_baseline not found person and plan index 299333 1 AC:5641987
PtoPTss_basel

PtoPTss_baseline not found person and plan index 6974740 5 AC:5642046
PtoPTss_baseline not found person and plan index 7100762 5 AC:5642046
PtoPTss_baseline not found person and plan index 7264334 17 AC:5642049
PtoPTss_baseline not found person and plan index 7264334 17 AC:5642049
PtoPTss_baseline not found person and plan index 7264334 17 AC:5642049
PtoPTss_baseline not found person and plan index 7264334 17 AC:5642049
PtoPTss_baseline not found person and plan index 7264334 17 AC:5642049
PtoPTss_baseline not found person and plan index 3340136 5 AC:5642051
PtoPTss_baseline not found person and plan index 3340136 5 AC:5642051
PtoPTss_baseline not found person and plan index 4354361 7 AC:5642051
PtoPTss_baseline not found person and plan index 3340136 5 AC:5642051
PtoPTss_baseline not found person and plan index 4354361 7 AC:5642051
PtoPTss_baseline not found person and plan index 3340136 5 AC:5642051
PtoPTss_baseline not found person and plan index 3340136 5 AC:5642051
PtoPTss_baselin

PtoPTss_baseline not found person and plan index 5971896 7 AC:5642066
PtoPTss_baseline not found person and plan index 5971896 7 AC:5642066
PtoPTss_baseline not found person and plan index 5971896 7 AC:5642066
PtoPTss_baseline not found person and plan index 6764684 21 AC:5642066
PtoPTss_baseline not found person and plan index 5971896 7 AC:5642066
PtoPTss_baseline not found person and plan index 6764684 21 AC:5642066
PtoPTss_baseline not found person and plan index 5971896 7 AC:5642066
PtoPTss_baseline not found person and plan index 6764684 21 AC:5642066
PtoPTss_baseline not found person and plan index 5971896 7 AC:5642066
PtoPTss_baseline not found person and plan index 6764684 21 AC:5642066
PtoPTss_baseline not found person and plan index 5971896 7 AC:5642066
PtoPTss_baseline not found person and plan index 6764684 21 AC:5642066
PtoPTss_baseline not found person and plan index 5971896 7 AC:5642066
PtoPTss_baseline not found person and plan index 6764684 21 AC:5642066
PtoPTss_baseli

PtoPTss_baseline not found person and plan index 4644099 11 AC:5642070
PtoPTss_baseline not found person and plan index 4644099 11 AC:5642070
PtoPTss_baseline not found person and plan index 4644099 11 AC:5642070
PtoPTss_baseline not found person and plan index 4644099 11 AC:5642070
PtoPTss_baseline not found person and plan index 4644099 11 AC:5642070
PtoPTss_baseline not found person and plan index 4644099 11 AC:5642070
PtoPTss_baseline not found person and plan index 4644099 11 AC:5642070
PtoPTss_baseline not found person and plan index 4644099 11 AC:5642070
PtoPTss_baseline not found person and plan index 4644099 11 AC:5642070
PtoPTss_baseline not found person and plan index 4644099 11 AC:5642070
PtoPTss_baseline not found person and plan index 6634658 11 AC:5642071
PtoPTss_baseline not found person and plan index 6634658 11 AC:5642071
PtoPTss_baseline not found person and plan index 6634658 11 AC:5642071
PtoPTss_baseline not found person and plan index 6634658 11 AC:5642071
PtoPTs

PtoPTss_baseline not found person and plan index 6525345 7 AC:5642139
PtoPTss_baseline not found person and plan index 6525345 7 AC:5642139
PtoPTss_baseline not found person and plan index 6525345 7 AC:5642139
PtoPTss_baseline not found person and plan index 6525345 7 AC:5642139
PtoPTss_baseline not found person and plan index 6525345 7 AC:5642139
PtoPTss_baseline not found person and plan index 2839412 7 AC:5642140
PtoPTss_baseline not found person and plan index 2839412 7 AC:5642140
PtoPTss_baseline not found person and plan index 2839412 7 AC:5642140
PtoPTss_baseline not found person and plan index 2839412 7 AC:5642140
PtoPTss_baseline not found person and plan index 2839412 7 AC:5642140
PtoPTss_baseline not found person and plan index 4904085 5 AC:5642141
PtoPTss_baseline not found person and plan index 4904085 5 AC:5642141
PtoPTss_baseline not found person and plan index 6128925 7 AC:5642144
PtoPTss_baseline not found person and plan index 6128925 7 AC:5642144
PtoPTss_baseline not

PtoPTss_baseline not found person and plan index 2362957 5 AC:5642163
PtoPTss_baseline not found person and plan index 2362957 5 AC:5642163
PtoPTss_baseline not found person and plan index 2362957 5 AC:5642163
PtoPTss_baseline not found person and plan index 2362957 5 AC:5642163
PtoPTss_baseline not found person and plan index 2362957 5 AC:5642163
PtoPTss_baseline not found person and plan index 2362957 5 AC:5642163
PtoPTss_baseline not found person and plan index 2362957 5 AC:5642163
PtoPTss_baseline not found person and plan index 2362957 5 AC:5642163
PtoPTss_baseline not found person and plan index 3212578 5 AC:5642164
PtoPTss_baseline not found person and plan index 3212578 5 AC:5642164
PtoPTss_baseline not found person and plan index 3212578 5 AC:5642164
PtoPTss_baseline not found person and plan index 4442599 7 AC:5642164
PtoPTss_baseline not found person and plan index 3212578 5 AC:5642164
PtoPTss_baseline not found person and plan index 4442599 7 AC:5642164
PtoPTss_baseline not

ValueError: invalid literal for int() with base 10: '139-clone-1'