In [1]:
#Imports
import numpy as np
import pandas as pd
import geopandas as gpd
import difflib
import matplotlib.pyplot as plt
import time

def  processEvents(dataFilepath_sim, nrows):
    PTs = []
    PEVs = []
    print('read', dataFilepath_sim)
    for chunk in pd.read_csv(dataFilepath_sim, compression = 'gzip',chunksize=1500000, nrows = nrows):
        if sum((chunk['type'] == 'PathTraversal')) > 0:
            chunk['vehicle'] = chunk['vehicle'].astype(str)
            PT = chunk.loc[(chunk['type'] == 'PathTraversal') & (chunk['length'] > 0)].dropna(how='all', axis=1)
            PT['departureTime'] = PT['departureTime'].astype(int)
            PT['arrivalTime'] = PT['arrivalTime'].astype(int)

            PTs.append(PT[['driver', 'vehicle', 'mode', 'length', 'startX', 'startY', 'endX', 'endY', 'vehicleType',
                           'arrivalTime', 'departureTime', 'primaryFuel', 'primaryFuelType', 'secondaryFuel',
                           'secondaryFuelType', 'numPassengers', 'riders','time']])
            print(chunk.type.value_counts())
            PEV = chunk.loc[(chunk.type == "PersonEntersVehicle") &
                            ~(chunk['person'].apply(str).str.contains('Agent').fillna(False)) &
                            ~(chunk['vehicle'].str.contains('body').fillna(False)), :].dropna(how='all', axis=1)
            if ~PEV.empty:
                PEV['time'] = PEV['time'].astype(int)
                PEVs.append(PEV)

    Events_file_sim_PT = pd.concat(PTs)[['vehicle','time','endY','endX','startY','startX','mode']]
    Events_file_sim_PE = pd.concat(PEVs)[['vehicle','time',]]
    print(Events_file_sim_PE)

    return Events_file_sim_PT, Events_file_sim_PE



#Filter transit trips
def filter_transit(Events_file_sim_PT,Events_file_sim_PE):
    print(Events_file_sim_PT['mode'].value_counts())
    Events_file_sim_PT = Events_file_sim_PT[(Events_file_sim_PT['mode'] == 'bus')|
                                            (Events_file_sim_PT['mode'] == 'tram')|
                                            (Events_file_sim_PT['mode'] == 'subway')|
                                            (Events_file_sim_PT['mode'] == 'cable_car')|
                                            (Events_file_sim_PT['mode'] == 'ferry')|
                                            (Events_file_sim_PT['mode'] == 'rail')]
    print(Events_file_sim_PT['mode'].value_counts())
    Events_file_sim_PE = Events_file_sim_PE[Events_file_sim_PE['vehicle'].isin(Events_file_sim_PT['vehicle'])]
    
    return Events_file_sim_PE

def guess_agency(Events_file_sim_PE):

    agencies = []
    for vehicleID in Events_file_sim_PE['vehicle']:
        agency = vehicleID.split(':')[0]
        if agency == 'petalumatransit-petaluma-ca-us':
            agencies.append('PE')
        elif agency == 'westcat-ca-us':
            agencies.append('WC')
        elif agency == 'caltrain-ca-us':
            agencies.append('CA')
        elif agency == 'riovista-ca-us':
            agencies.append('RV')
        elif agency == 'unioncity-ca-us':
            agencies.append('UC')
        else:
            if len(agency) == 2:
                agencies.append(agency)
            elif agency == 'Caltrain':
                agencies.append('CA')
            else:
                print('Warning, this agency is not recognized:', agency)
    Events_file_sim_PE['agency'] = agencies

    print(np.unique(agencies))
    
    return Events_file_sim_PE

def guess_route(Events_file_sim_PE, GTFS_filepaths):

    GTFS_trip_files = {}

    for GTFS_filepath, GTFS in zip(GTFS_filepaths,GTFSs):
        GTFS_trip_files[GTFS] = pd.read_csv(GTFS_filepath+'trips.txt')

    route_ids = []
    total_routes = len(Events_file_sim_PE['vehicle'])
    i = 0
    time_start = time.time()
    for vehicle, agency in zip(Events_file_sim_PE['vehicle'],Events_file_sim_PE['agency']):
        i+=1
        if i%10000 ==0:
            print(i,'/',total_routes,'. Time = ', time.time()-time_start, '. Estimated remaining time:', (time.time()-time_start)/i*total_routes-(time.time()-time_start))
        if agency == 'SM':
            route_ids.append(agency+':'+str(list(GTFS_trip_files[agency]['route_id'][
                GTFS_trip_files[agency]['trip_id'].astype(str)==
                     str(vehicle).split(':')[1]+'|'
                     +str(vehicle).split(':')[2]+':'
                     +str(vehicle).split(':')[3]+'|'
                     +str(vehicle).split(':')[4]+':'
                     +str(vehicle).split(':')[5]+':'
                     +str(vehicle).split(':')[6]])[0]))
        else:
#             print(list(GTFS_trip_files[agency]['route_id'][GTFS_trip_files[agency]['trip_id'].astype(str)==str(vehicle.split(':')[1])])[0])
            try:
                route_ids.append(agency+':'+str(list(GTFS_trip_files[agency]['route_id'][GTFS_trip_files[agency]['trip_id'].astype(str)==str(vehicle.split(':')[1])])[0]))
            except:
                print('Warning, trip non found for vehicle', vehicle)
                route_ids.append('tripID not found')
    #     elif agency == 'GG':
    #         route_ids.append('GG:'+str(list(GTFS_trip_files[agency]['route_id'][GTFS_trip_files[agency]['trip_id']==vehicle.split(':')[1]])[0]))
    #     elif agency == 'SF':
    #         route_ids.append('SF:'+str(list(GTFS_trip_files[agency]['route_id'][GTFS_trip_files[agency]['trip_id']==vehicle.split(':')[1]])[0]))
    Events_file_sim_PE['route_id'] = route_ids
    
    return Events_file_sim_PE











#####################################################################################
# dataFilepath_sim = [
#                 's3://beam-outputs/pilates-outputs/sfbay-baseline-20220801/beam/year-2018-iteration-5/ITERS/it.0/0.events.csv.gz',
#                 's3://beam-outputs/pilates-outputs/sfbay-AC-SF-CA-20220801/beam/year-2018-iteration-5/ITERS/it.0/0.events.csv.gz',
#                 's3://beam-outputs/pilates-outputs/sfbay-baseline-20220801/beam/year-2019-iteration-5/ITERS/it.0/0.events.csv.gz',
#                 's3://beam-outputs/pilates-outputs/sfbay-AC-SF-CA-20220801/beam/year-2019-iteration-5/ITERS/it.0/0.events.csv.gz',
#                 's3://beam-outputs/pilates-outputs/sfbay-baseline-20220801/beam/year-2020-iteration-5/ITERS/it.0/0.events.csv.gz',
#                 's3://beam-outputs/pilates-outputs/sfbay-AC-SF-CA-20220801/beam/year-2020-iteration-5/ITERS/it.0/0.events.csv.gz',
#                 's3://beam-outputs/pilates-outputs/sfbay-baseline-20220801/beam/year-2021-iteration-5/ITERS/it.0/0.events.csv.gz',
#                 's3://beam-outputs/pilates-outputs/sfbay-AC-SF-CA-20220801/beam/year-2021-iteration-5/ITERS/it.0/0.events.csv.gz',
#                 's3://beam-outputs/pilates-outputs/sfbay-baseline-20220816/beam/year-2018-iteration-5/ITERS/it.0/0.events.csv.gz',
#                 's3://beam-outputs/pilates-outputs/sfbay-TR-20220812/beam/year-2018-iteration-5/ITERS/it.0/0.events.csv.gz',
#                     's3://beam-outputs/pilates-outputs/sfbay-baseline-20220816/beam/year-2019-iteration-5/ITERS/it.0/0.events.csv.gz',
#                 's3://beam-outputs/pilates-outputs/sfbay-TR-20220812/beam/year-2019-iteration-5/ITERS/it.0/0.events.csv.gz',
#                     's3://beam-outputs/pilates-outputs/sfbay-baseline-20220816/beam/year-2020-iteration-5/ITERS/it.0/0.events.csv.gz',
#                 's3://beam-outputs/pilates-outputs/sfbay-TR-20220812/beam/year-2020-iteration-5/ITERS/it.0/0.events.csv.gz',
#                     's3://beam-outputs/pilates-outputs/sfbay-baseline-20220816/beam/year-2021-iteration-5/ITERS/it.0/0.events.csv.gz',
#                 's3://beam-outputs/pilates-outputs/sfbay-TR-20220812/beam/year-2021-iteration-5/ITERS/it.0/0.events.csv.gz',
#             ]

                    
# years = ['2018','2019','2020','2021']
#####################################################################################
dataFilepath_sim = [
#                 's3://beam-outputs/pilates-outputs/sfbay-baseline-20220801/beam/year-2018-iteration-5/ITERS/it.0/0.events.csv.gz',
#                 's3://beam-outputs/pilates-outputs/sfbay-AC-SF-CA-20220801/beam/year-2018-iteration-5/ITERS/it.0/0.events.csv.gz',
#                 's3://beam-outputs/pilates-outputs/sfbay-baseline-20220801/beam/year-2019-iteration-5/ITERS/it.0/0.events.csv.gz',
#                 's3://beam-outputs/pilates-outputs/sfbay-AC-SF-CA-20220801/beam/year-2019-iteration-5/ITERS/it.0/0.events.csv.gz',
#                 's3://beam-outputs/pilates-outputs/sfbay-baseline-20220801/beam/year-2020-iteration-5/ITERS/it.0/0.events.csv.gz',
#                 's3://beam-outputs/pilates-outputs/sfbay-AC-SF-CA-20220801/beam/year-2020-iteration-5/ITERS/it.0/0.events.csv.gz',
#                 's3://beam-outputs/pilates-outputs/sfbay-baseline-20220801/beam/year-2021-iteration-5/ITERS/it.0/0.events.csv.gz',
#                 's3://beam-outputs/pilates-outputs/sfbay-AC-SF-CA-20220801/beam/year-2021-iteration-5/ITERS/it.0/0.events.csv.gz',
                's3://beam-outputs/pilates-outputs/sfbay-baseline-20220822/beam/year-2018-iteration-5/ITERS/it.0/0.events.csv.gz',
                's3://beam-outputs/pilates-outputs/test_49_3/beam/year-2018-iteration-3/ITERS/it.0/0.events.csv.gz',
#                     's3://beam-outputs/pilates-outputs/sfbay-baseline-20220816/beam/year-2019-iteration-5/ITERS/it.0/0.events.csv.gz',
#                 's3://beam-outputs/pilates-outputs/sfbay-TR-20220812/beam/year-2019-iteration-5/ITERS/it.0/0.events.csv.gz',
#                     's3://beam-outputs/pilates-outputs/sfbay-baseline-20220816/beam/year-2020-iteration-5/ITERS/it.0/0.events.csv.gz',
#                 's3://beam-outputs/pilates-outputs/sfbay-TR-20220812/beam/year-2020-iteration-5/ITERS/it.0/0.events.csv.gz',
#                     's3://beam-outputs/pilates-outputs/sfbay-baseline-20220816/beam/year-2021-iteration-5/ITERS/it.0/0.events.csv.gz',
#                 's3://beam-outputs/pilates-outputs/sfbay-TR-20220812/beam/year-2021-iteration-5/ITERS/it.0/0.events.csv.gz',
            ]

                    
years = ['2018']

names = []
for year in years:
    names.append('Baseline'+year)
    names.append('Future'+year)

output_filepath = '/Users/cpoliziani/Downloads/Transit Rich/Results/'

GTFS_filepath = '/Users/cpoliziani/Downloads/Data/TR/GTFS/r5-simple-no-local/'
GTFS_filepath2 = '/Users/cpoliziani/Downloads/Data/TR/GTFS/r5-simple-no-local-TR-49/'

RA_output = 'agency_ridershipNew3.csv'
RR_output = 'route_ridershipNew3.csv'

GTFSs = ['3D','AC','AM','AY','BA','CA','CC','CE','CM','CT','DE','EM','GG','HF','MA','PE','RV', 'SB',
        'SC','SF','SM','SO','SR','ST','TD','UC','VC','VN','VT','WC','WH']

GTFS_3D = GTFS_filepath+'3D/'
GTFS_AC = GTFS_filepath+'AC/'
GTFS_AM = GTFS_filepath+'AM/'
GTFS_AY = GTFS_filepath+'AY/'
GTFS_BA = GTFS_filepath+'BA/'
GTFS_Caltrain = GTFS_filepath+'Caltrain/'
GTFS_CC = GTFS_filepath+'CC/'
GTFS_CE = GTFS_filepath+'CE/'
GTFS_CM = GTFS_filepath+'CM/'
GTFS_CT = GTFS_filepath+'CT/'
GTFS_DE = GTFS_filepath+'DE/'
GTFS_EM = GTFS_filepath+'EM/'
GTFS_GG = GTFS_filepath+'GG/'
GTFS_HF = GTFS_filepath+'HF/'
GTFS_MA = GTFS_filepath+'MA/'
GTFS_PE = GTFS_filepath+'PE/'
GTFS_RV = GTFS_filepath+'RV/'
GTFS_SB = GTFS_filepath+'SB/'
GTFS_SC = GTFS_filepath+'SC/'
GTFS_SF = GTFS_filepath+'SF/'
GTFS_SM = GTFS_filepath+'SM/'
GTFS_SO = GTFS_filepath+'SO/'
GTFS_SR = GTFS_filepath+'SR/'
GTFS_ST = GTFS_filepath+'ST/'
GTFS_TD = GTFS_filepath+'TD/'
GTFS_UC = GTFS_filepath+'UC/'
GTFS_VC = GTFS_filepath+'VC/'
GTFS_VN = GTFS_filepath+'VN/'
GTFS_VT = GTFS_filepath+'VT/'
GTFS_WC = GTFS_filepath+'WC/'
GTFS_WH = GTFS_filepath+'WH/'


GTFS_3D2 = GTFS_filepath2+'3D/'
GTFS_AC2 = GTFS_filepath2+'AC/'
GTFS_AM2 = GTFS_filepath2+'AM/'
GTFS_AY2 = GTFS_filepath2+'AY/'
GTFS_BA2 = GTFS_filepath2+'BA/'
GTFS_Caltrain2 = GTFS_filepath2+'Caltrain/'
GTFS_CC2 = GTFS_filepath2+'CC/'
GTFS_CE2 = GTFS_filepath2+'CE/'
GTFS_CM2 = GTFS_filepath2+'CM/'
GTFS_CT2 = GTFS_filepath2+'CT/'
GTFS_DE2 = GTFS_filepath2+'DE/'
GTFS_EM2 = GTFS_filepath2+'EM/'
GTFS_GG2 = GTFS_filepath2+'GG/'
GTFS_HF2 = GTFS_filepath2+'HF/'
GTFS_MA2 = GTFS_filepath2+'MA/'
GTFS_PE2 = GTFS_filepath2+'PE/'
GTFS_RV2 = GTFS_filepath2+'RV/'
GTFS_SB2 = GTFS_filepath2+'SB/'
GTFS_SC2 = GTFS_filepath2+'SC/'
GTFS_SF2 = GTFS_filepath2+'SF/'
GTFS_SM2 = GTFS_filepath2+'SM/'
GTFS_SO2 = GTFS_filepath2+'SO/'
GTFS_SR2 = GTFS_filepath2+'SR/'
GTFS_ST2 = GTFS_filepath2+'ST/'
GTFS_TD2 = GTFS_filepath2+'TD/'
GTFS_UC2 = GTFS_filepath2+'UC/'
GTFS_VC2 = GTFS_filepath2+'VC/'
GTFS_VN2 = GTFS_filepath2+'VN/'
GTFS_VT2 = GTFS_filepath2+'VT/'
GTFS_WC2 = GTFS_filepath2+'WC/'
GTFS_WH2 = GTFS_filepath2+'WH/'

GTFS_baseline = [GTFS_3D,
                    GTFS_AC,
                    GTFS_AM,
                    GTFS_AY,
                    GTFS_BA,
                    GTFS_Caltrain,
                    GTFS_CC,
                    GTFS_CE,
                    GTFS_CM,
                    GTFS_CT,
                    GTFS_DE,
                    GTFS_EM,
                    GTFS_GG,
                    GTFS_HF,
                    GTFS_MA,
                    GTFS_PE,
                    GTFS_RV,
                    GTFS_SB,
                    GTFS_SC,
                    GTFS_SF,
                    GTFS_SM,
                    GTFS_SO,
                    GTFS_SR,
                    GTFS_ST,
                    GTFS_TD,
                    GTFS_UC,
                    GTFS_VC,
                    GTFS_VN,
                    GTFS_VT,
                    GTFS_WC,
                    GTFS_WH]

GTFS_TR = [GTFS_3D2,
                    GTFS_AC2,
                    GTFS_AM2,
                    GTFS_AY2,
                    GTFS_BA2,
                    GTFS_Caltrain2,
                    GTFS_CC2,
                    GTFS_CE2,
                    GTFS_CM2,
                    GTFS_CT2,
                    GTFS_DE2,
                    GTFS_EM2,
                    GTFS_GG2,
                    GTFS_HF2,
                    GTFS_MA2,
                    GTFS_PE2,
                    GTFS_RV2,
                    GTFS_SB2,
                    GTFS_SC2,
                    GTFS_SF2,
                    GTFS_SM2,
                    GTFS_SO2,
                    GTFS_SR2,
                    GTFS_ST2,
                    GTFS_TD2,
                    GTFS_UC2,
                    GTFS_VC2,
                    GTFS_VN2,
                    GTFS_VT2,
                    GTFS_WC2,
                    GTFS_WH2]
GTFS_filepaths = [GTFS_baseline, GTFS_TR,GTFS_baseline, GTFS_TR,GTFS_baseline, GTFS_TR,GTFS_baseline, GTFS_TR]



nrows = None



In [2]:


#Ridership route
RR = pd.DataFrame()
#Ridership agency
RA = pd.DataFrame()

for fp, name, GTFS_filepath in zip(dataFilepath_sim,names, GTFS_filepaths):
    print('evaluate ridership')
    #import pathtraversal and person enter vehicles
    PT, PE = processEvents(fp, nrows)
    #filter PE transit trips from PT
    PE = filter_transit(PT, PE)
    #Guess transit agency for each PE
    PE = guess_agency(PE)
    #Guess transit route for each PE
    PE = guess_route(PE, GTFS_filepath)
    #Route Ridership
    rr = PE['route_id'].value_counts()
    sum_agency = 0
    sum_agency_bsva = 0
    sum_route = 0
    for route, count in zip(rr.keys(), rr):
        RR.at[route, name] = count
        sum_route += count
    ra = PE['agency'].value_counts()
    for agency, count in zip(ra.keys(), ra):
        RA.at[agency, name] = count
        sum_agency += count
        if agency in ['BA', 'SF', 'VT', 'AC']:
            RA.at[agency, name+' BA-SF-VT-AC'] = count
            sum_agency_bsva += count
    for route, count in zip(rr.keys(), rr):
        RR.at[route, name+' shares'] = count/sum_route
    ra = PE['agency'].value_counts()
    for agency, count in zip(ra.keys(), ra):
        RA.at[agency, name+' shares'] = count/sum_agency
        if agency in ['BA', 'SF', 'VT', 'AC']:
            RA.at[agency, name+' shares BA-SF-VT-AC'] = count/sum_agency_bsva
    RA.to_csv(output_filepath+RA_output)
    RR.to_csv(output_filepath+RR_output)

for year in years:
    diff = []
    diff_abs = []
    for baseline, future in zip(RA['Baseline'+year],RA['Future'+year]):
        diff.append((future-baseline)/baseline)
        diff_abs.append((future-baseline))
    RA['Diff %'+year] = diff
    RA['Diff'+year] = diff_abs

tot_NTD = 1756364558 + 15283299+5703705+49795740+110802986+7386518+49247910+27027693 + 8437926+50222832 + 2818648
    
RA.at['BA', 'clipper BA-SF-VT-AC 2016 share target'] = 350485/773719
RA.at['BA', 'MTC BA-SF-VT-AC 2016 share target'] = 458900/1564500
RA.at['BA', 'NTD BA-SF-VT-AC 2019 share target'] = (1756364558 + 15283299)/tot_NTD

RA.at['SF', 'clipper BA-SF-VT-AC 2016 share target'] = 293991/773719
RA.at['SF', 'MTC BA-SF-VT-AC 2016 share target'] = 777000/1564500
RA.at['SF', 'NTD BA-SF-VT-AC 2019 share target'] = (5703705+49795740+110802986+7386518+49247910)/tot_NTD

RA.at['VT', 'clipper BA-SF-VT-AC 2016 share target'] = 43950/773719
RA.at['VT', 'MTC BA-SF-VT-AC 2016 share target'] = 146700/1564500
RA.at['VT', 'NTD BA-SF-VT-AC 2019 share target'] = (27027693 + 8437926)/tot_NTD

RA.at['AC', 'clipper BA-SF-VT-AC 2016 share target'] = 85293/773719
RA.at['AC', 'MTC BA-SF-VT-AC 2016 share target'] = 181900/1564500
RA.at['AC', 'NTD BA-SF-VT-AC 2019 share target'] = (50222832 + 2818648)/tot_NTD

RA.at['BA', 'clipper BA-SF-VT-AC 2020 Jan av ridership target'] = 350485
RA.at['BA', 'MTC BA-SF-VT-AC 2016 av ridership target'] = 458900
RA.at['BA', 'NTD BA-SF-VT-AC 2019 ridership target'] = (1756364558 + 15283299)

RA.at['SF', 'clipper BA-SF-VT-AC 2020 Jan av ridership target'] = 293991
RA.at['SF', 'MTC BA-SF-VT-AC 2016 av ridership target'] = 777000
RA.at['SF', 'NTD BA-SF-VT-AC 2019 ridership target'] = (5703705+49795740+110802986+7386518+49247910)

RA.at['VT', 'clipper BA-SF-VT-AC 2020 Jan av ridership target'] = 43950
RA.at['VT', 'MTC BA-SF-VT-AC 2016 av ridership target'] = 146700
RA.at['VT', 'NTD BA-SF-VT-AC 2019 ridership target'] =  (27027693 + 8437926)

RA.at['AC', 'clipper BA-SF-VT-AC 2020 Jan av ridership target'] = 85293
RA.at['AC', 'MTC BA-SF-VT-AC 2016 av ridership target'] = 181900
RA.at['AC', 'NTD BA-SF-VT-AC 2019 ridership target'] = (50222832 + 2818648)

RA.to_csv(output_filepath+RA_output)
RR.to_csv(output_filepath+RR_output)




evaluate ridership
read s3://beam-outputs/pilates-outputs/sfbay-baseline-20220822/beam/year-2018-iteration-5/ITERS/it.0/0.events.csv.gz


  for chunk in pd.read_csv(dataFilepath_sim, compression = 'gzip',chunksize=1500000, nrows = nrows):


PathTraversal           355896
PersonEntersVehicle     254363
departure               183651
PersonLeavesVehicle     134821
ModeChoice              101597
actend                  101533
LeavingParkingEvent      79407
actstart                 75088
arrival                  75088
PersonCost               66631
ParkingEvent             60125
TeleportationEvent        9433
ReserveRideHail           2291
Replanning                  64
ChargingPlugInEvent          4
RefuelSessionEvent           4
ChargingPlugOutEvent         4
Name: type, dtype: int64


  for chunk in pd.read_csv(dataFilepath_sim, compression = 'gzip',chunksize=1500000, nrows = nrows):


PathTraversal          375409
PersonEntersVehicle    188683
PersonLeavesVehicle    153238
ModeChoice             119235
actend                 118915
departure              118915
arrival                 92021
actstart                92017
LeavingParkingEvent     82265
PersonCost              73897
ParkingEvent            64421
TeleportationEvent      17778
ReserveRideHail          2959
Replanning                247
Name: type, dtype: int64


  for chunk in pd.read_csv(dataFilepath_sim, compression = 'gzip',chunksize=1500000, nrows = nrows):


PathTraversal           371352
PersonEntersVehicle     177833
PersonLeavesVehicle     170840
ModeChoice              109892
departure               108884
actend                  108884
actstart                103726
arrival                 103722
PersonCost               78520
LeavingParkingEvent      76580
ParkingEvent             66546
TeleportationEvent       19400
ReserveRideHail           2866
Replanning                 942
ChargingPlugInEvent          7
RefuelSessionEvent           3
ChargingPlugOutEvent         3
Name: type, dtype: int64


  for chunk in pd.read_csv(dataFilepath_sim, compression = 'gzip',chunksize=1500000, nrows = nrows):


PathTraversal           360538
PersonEntersVehicle     179133
PersonLeavesVehicle     167783
ModeChoice              113911
actend                  112886
departure               112886
arrival                 102941
actstart                102941
LeavingParkingEvent      78607
PersonCost               77035
ParkingEvent             65108
TeleportationEvent       21051
ReserveRideHail           4033
Replanning                1129
RefuelSessionEvent           7
ChargingPlugOutEvent         7
ChargingPlugInEvent          4
Name: type, dtype: int64


  for chunk in pd.read_csv(dataFilepath_sim, compression = 'gzip',chunksize=1500000, nrows = nrows):


PathTraversal           370499
PersonLeavesVehicle     179405
PersonEntersVehicle     169805
arrival                 110870
actstart                110870
ModeChoice              104727
actend                  103216
departure               103205
PersonCost               80093
LeavingParkingEvent      73771
ParkingEvent             66627
TeleportationEvent       21761
ReserveRideHail           3810
Replanning                1330
ChargingPlugInEvent          5
RefuelSessionEvent           3
ChargingPlugOutEvent         3
Name: type, dtype: int64


  for chunk in pd.read_csv(dataFilepath_sim, compression = 'gzip',chunksize=1500000, nrows = nrows):


PathTraversal           400705
PersonLeavesVehicle     191442
PersonEntersVehicle     164402
arrival                 111469
actstart                111467
ModeChoice               92121
departure                91585
actend                   91574
PersonCost               82246
LeavingParkingEvent      74131
ParkingEvent             68161
TeleportationEvent       15499
ReserveRideHail           4438
Replanning                 748
RefuelSessionEvent           5
ChargingPlugOutEvent         5
ChargingPlugInEvent          2
Name: type, dtype: int64


  for chunk in pd.read_csv(dataFilepath_sim, compression = 'gzip',chunksize=1500000, nrows = nrows):


PathTraversal           395153
PersonEntersVehicle     175031
PersonLeavesVehicle     174877
arrival                 101337
actstart                101310
ModeChoice              101114
actend                  100854
departure               100854
PersonCost               80562
LeavingParkingEvent      80165
ParkingEvent             68312
TeleportationEvent       14519
ReserveRideHail           5625
Replanning                 277
ChargingPlugInEvent          4
RefuelSessionEvent           3
ChargingPlugOutEvent         3
Name: type, dtype: int64


  for chunk in pd.read_csv(dataFilepath_sim, compression = 'gzip',chunksize=1500000, nrows = nrows):


PathTraversal           389098
PersonEntersVehicle     175192
PersonLeavesVehicle     172723
ModeChoice              104194
actend                  103080
departure               103080
actstart                100938
arrival                 100912
PersonCost               80675
LeavingParkingEvent      80344
ParkingEvent             69079
TeleportationEvent       14597
ReserveRideHail           5827
Replanning                 252
RefuelSessionEvent           3
ChargingPlugOutEvent         3
ChargingPlugInEvent          3
Name: type, dtype: int64


  for chunk in pd.read_csv(dataFilepath_sim, compression = 'gzip',chunksize=1500000, nrows = nrows):


PathTraversal           387465
PersonEntersVehicle     174840
PersonLeavesVehicle     172339
actend                  103768
departure               103768
ModeChoice              103134
arrival                 102465
actstart                102465
LeavingParkingEvent      80719
PersonCost               79245
ParkingEvent             68683
TeleportationEvent       15103
ReserveRideHail           5798
Replanning                 203
RefuelSessionEvent           2
ChargingPlugOutEvent         2
ChargingPlugInEvent          1
Name: type, dtype: int64


  for chunk in pd.read_csv(dataFilepath_sim, compression = 'gzip',chunksize=1500000, nrows = nrows):


PathTraversal           383346
PersonEntersVehicle     175816
PersonLeavesVehicle     171648
ModeChoice              105965
departure               105744
actend                  105744
actstart                101544
arrival                 101542
LeavingParkingEvent      79828
PersonCost               79434
ParkingEvent             68312
TeleportationEvent       14724
ReserveRideHail           6055
Replanning                 288
ChargingPlugInEvent          6
RefuelSessionEvent           2
ChargingPlugOutEvent         2
Name: type, dtype: int64


  for chunk in pd.read_csv(dataFilepath_sim, compression = 'gzip',chunksize=1500000, nrows = nrows):


PathTraversal           380421
PersonEntersVehicle     175058
PersonLeavesVehicle     172008
ModeChoice              105279
departure               104945
actend                  104945
arrival                 102146
actstart                102146
PersonCost               80763
LeavingParkingEvent      80351
ParkingEvent             70431
TeleportationEvent       15855
ReserveRideHail           5412
Replanning                 228
RefuelSessionEvent           5
ChargingPlugOutEvent         5
ChargingPlugInEvent          2
Name: type, dtype: int64


  for chunk in pd.read_csv(dataFilepath_sim, compression = 'gzip',chunksize=1500000, nrows = nrows):


PathTraversal           380431
PersonLeavesVehicle     173197
PersonEntersVehicle     172615
ModeChoice              104905
departure               104800
actend                  104800
arrival                 104537
actstart                104523
PersonCost               79804
LeavingParkingEvent      78316
ParkingEvent             69081
TeleportationEvent       17119
ReserveRideHail           5702
Replanning                 166
ChargingPlugInEvent          2
RefuelSessionEvent           1
ChargingPlugOutEvent         1
Name: type, dtype: int64


  for chunk in pd.read_csv(dataFilepath_sim, compression = 'gzip',chunksize=1500000, nrows = nrows):


PathTraversal           379263
PersonEntersVehicle     171411
PersonLeavesVehicle     169973
ModeChoice              107439
actend                  106726
departure               106726
actstart                105753
arrival                 105749
PersonCost               78108
LeavingParkingEvent      76377
ParkingEvent             66778
TeleportationEvent       19677
ReserveRideHail           5796
Replanning                 219
RefuelSessionEvent           2
ChargingPlugOutEvent         2
ChargingPlugInEvent          1
Name: type, dtype: int64


  for chunk in pd.read_csv(dataFilepath_sim, compression = 'gzip',chunksize=1500000, nrows = nrows):


PathTraversal           368416
PersonEntersVehicle     172739
PersonLeavesVehicle     166430
actend                  112218
departure               112217
ModeChoice              112102
actstart                106387
arrival                 106376
PersonCost               76621
LeavingParkingEvent      73809
ParkingEvent             64239
TeleportationEvent       21972
ReserveRideHail           6241
Replanning                 229
ChargingPlugInEvent          2
RefuelSessionEvent           1
ChargingPlugOutEvent         1
Name: type, dtype: int64


  for chunk in pd.read_csv(dataFilepath_sim, compression = 'gzip',chunksize=1500000, nrows = nrows):


PathTraversal           374403
PersonEntersVehicle     175148
PersonLeavesVehicle     168178
ModeChoice              109598
departure               109501
actend                  109500
arrival                 105505
actstart                105505
PersonCost               76373
LeavingParkingEvent      75085
ParkingEvent             64240
TeleportationEvent       20861
ReserveRideHail           5839
Replanning                 253
RefuelSessionEvent           4
ChargingPlugOutEvent         4
ChargingPlugInEvent          3
Name: type, dtype: int64


  for chunk in pd.read_csv(dataFilepath_sim, compression = 'gzip',chunksize=1500000, nrows = nrows):


PathTraversal           382615
PersonLeavesVehicle     173897
PersonEntersVehicle     173412
arrival                 105206
actstart                105203
ModeChoice              104764
actend                  104437
departure               104437
PersonCost               79592
LeavingParkingEvent      75778
ParkingEvent             67448
TeleportationEvent       17742
ReserveRideHail           5288
Replanning                 179
RefuelSessionEvent           1
ChargingPlugOutEvent         1
Name: type, dtype: int64


  for chunk in pd.read_csv(dataFilepath_sim, compression = 'gzip',chunksize=1500000, nrows = nrows):


PathTraversal           372562
PersonEntersVehicle     185190
PersonLeavesVehicle     166337
ModeChoice              111378
actend                  111237
departure               111237
actstart                 98754
arrival                  98752
LeavingParkingEvent      80376
PersonCost               77182
ParkingEvent             65939
TeleportationEvent       15713
ReserveRideHail           5074
Replanning                 256
ChargingPlugInEvent          7
RefuelSessionEvent           3
ChargingPlugOutEvent         3
Name: type, dtype: int64


  for chunk in pd.read_csv(dataFilepath_sim, compression = 'gzip',chunksize=1500000, nrows = nrows):


PathTraversal           382599
PersonLeavesVehicle     177967
PersonEntersVehicle     175365
arrival                 103876
actstart                103850
ModeChoice              102082
departure               101811
actend                  101811
PersonCost               82619
LeavingParkingEvent      76967
ParkingEvent             70219
TeleportationEvent       15671
ReserveRideHail           4836
Replanning                 312
RefuelSessionEvent           6
ChargingPlugOutEvent         6
ChargingPlugInEvent          3
Name: type, dtype: int64


  for chunk in pd.read_csv(dataFilepath_sim, compression = 'gzip',chunksize=1500000, nrows = nrows):


PathTraversal           373399
PersonEntersVehicle     182085
PersonLeavesVehicle     174131
ModeChoice              108073
actend                  107169
departure               107168
actstart                101334
arrival                 101330
PersonCost               79763
LeavingParkingEvent      78284
ParkingEvent             67674
TeleportationEvent       14008
ReserveRideHail           5041
Replanning                 528
ChargingPlugInEvent          7
RefuelSessionEvent           3
ChargingPlugOutEvent         3
Name: type, dtype: int64


  for chunk in pd.read_csv(dataFilepath_sim, compression = 'gzip',chunksize=1500000, nrows = nrows):


PathTraversal           382406
PersonLeavesVehicle     180533
PersonEntersVehicle     174571
actstart                105111
arrival                 105088
ModeChoice              101257
departure               100247
actend                  100246
PersonCost               82211
LeavingParkingEvent      76907
ParkingEvent             70026
TeleportationEvent       15053
ReserveRideHail           5571
Replanning                 757
RefuelSessionEvent           6
ChargingPlugOutEvent         6
ChargingPlugInEvent          4
Name: type, dtype: int64


  for chunk in pd.read_csv(dataFilepath_sim, compression = 'gzip',chunksize=1500000, nrows = nrows):


PathTraversal           401938
PersonLeavesVehicle     195988
PersonEntersVehicle     161799
arrival                 112532
actstart                112531
ModeChoice               89273
departure                88925
actend                   88925
PersonCost               85517
ParkingEvent             72622
LeavingParkingEvent      71219
TeleportationEvent       12890
ReserveRideHail           5241
Replanning                 582
RefuelSessionEvent           7
ChargingPlugOutEvent         7
ChargingPlugInEvent          4
Name: type, dtype: int64


  for chunk in pd.read_csv(dataFilepath_sim, compression = 'gzip',chunksize=1500000, nrows = nrows):


PathTraversal           390804
PersonLeavesVehicle     183231
PersonEntersVehicle     170248
arrival                 107157
actstart                107146
ModeChoice               98326
actend                   98186
departure                98186
PersonCost               81959
LeavingParkingEvent      74601
ParkingEvent             69642
TeleportationEvent       14087
ReserveRideHail           5891
Replanning                 532
ChargingPlugInEvent          2
RefuelSessionEvent           1
ChargingPlugOutEvent         1
Name: type, dtype: int64


  for chunk in pd.read_csv(dataFilepath_sim, compression = 'gzip',chunksize=1500000, nrows = nrows):


PathTraversal           380665
PersonLeavesVehicle     180471
PersonEntersVehicle     171643
actstart                107837
arrival                 107829
ModeChoice              101663
actend                  100808
departure               100808
PersonCost               81755
LeavingParkingEvent      74437
ParkingEvent             69326
TeleportationEvent       15723
ReserveRideHail           6294
Replanning                 736
RefuelSessionEvent           2
ChargingPlugOutEvent         2
ChargingPlugInEvent          1
Name: type, dtype: int64


  for chunk in pd.read_csv(dataFilepath_sim, compression = 'gzip',chunksize=1500000, nrows = nrows):


PathTraversal          372269
PersonLeavesVehicle    169341
PersonEntersVehicle    126578
actstart                99821
arrival                 99817
PersonCost              71172
ModeChoice              67136
actend                  66849
departure               66849
ParkingEvent            62993
LeavingParkingEvent     60659
TeleportationEvent       9329
ReserveRideHail          3699
Replanning                397
Name: type, dtype: int64
         vehicle    time
172383    229730   18000
172388    398535   18003
172391     30873   18003
172402    663474   18000
172414    172651   18003
...          ...     ...
35776802  202477  211586
35776822  202477  212315
35776838  202477  212631
35776862   92528  214507
35776882   92528  215007

[2061889 rows x 2 columns]
car          2770060
walk         1957021
bus          1337343
car_hov2      405068
car_hov3      286232
tram           54924
bike           34471
cable_car      17836
subway         14358
rail            1702
ferry            

  for chunk in pd.read_csv(dataFilepath_sim, compression = 'gzip',chunksize=1500000, nrows = nrows):


PathTraversal           356650
PersonEntersVehicle     254021
departure               183362
PersonLeavesVehicle     135477
ModeChoice              100853
actend                  100805
LeavingParkingEvent      79232
arrival                  75379
actstart                 75377
PersonCost               66837
ParkingEvent             60344
TeleportationEvent        9339
ReserveRideHail           2273
Replanning                  45
ChargingPlugInEvent          2
RefuelSessionEvent           2
ChargingPlugOutEvent         2
Name: type, dtype: int64


  for chunk in pd.read_csv(dataFilepath_sim, compression = 'gzip',chunksize=1500000, nrows = nrows):


PathTraversal           377351
PersonEntersVehicle     186896
PersonLeavesVehicle     155501
ModeChoice              117312
departure               117083
actend                  117082
arrival                  93263
actstart                 93242
LeavingParkingEvent      81344
PersonCost               74859
ParkingEvent             65074
TeleportationEvent       17824
ReserveRideHail           2930
Replanning                 234
ChargingPlugInEvent          3
RefuelSessionEvent           1
ChargingPlugOutEvent         1
Name: type, dtype: int64


  for chunk in pd.read_csv(dataFilepath_sim, compression = 'gzip',chunksize=1500000, nrows = nrows):


PathTraversal           368583
PersonEntersVehicle     180281
PersonLeavesVehicle     167778
ModeChoice              112751
actend                  111504
departure               111504
actstart                102031
arrival                 102008
PersonCost               77338
LeavingParkingEvent      77167
ParkingEvent             65490
TeleportationEvent       19518
ReserveRideHail           3034
Replanning                1000
ChargingPlugInEvent          5
RefuelSessionEvent           4
ChargingPlugOutEvent         4
Name: type, dtype: int64


  for chunk in pd.read_csv(dataFilepath_sim, compression = 'gzip',chunksize=1500000, nrows = nrows):


PathTraversal           365333
PersonEntersVehicle     175469
PersonLeavesVehicle     171976
ModeChoice              109714
actend                  108851
departure               108848
arrival                 105780
actstart                105764
PersonCost               78742
LeavingParkingEvent      76729
ParkingEvent             66255
TeleportationEvent       21545
ReserveRideHail           3883
Replanning                1095
RefuelSessionEvent           6
ChargingPlugOutEvent         6
ChargingPlugInEvent          4
Name: type, dtype: int64


  for chunk in pd.read_csv(dataFilepath_sim, compression = 'gzip',chunksize=1500000, nrows = nrows):


PathTraversal           365042
PersonLeavesVehicle     174589
PersonEntersVehicle     173890
ModeChoice              110233
departure               107759
actend                  107756
actstart                107541
arrival                 107525
PersonCost               78132
LeavingParkingEvent      75823
ParkingEvent             65343
TeleportationEvent       21370
ReserveRideHail           3902
Replanning                1089
RefuelSessionEvent           2
ChargingPlugOutEvent         2
ChargingPlugInEvent          2
Name: type, dtype: int64


  for chunk in pd.read_csv(dataFilepath_sim, compression = 'gzip',chunksize=1500000, nrows = nrows):


PathTraversal           401809
PersonLeavesVehicle     192189
PersonEntersVehicle     164477
arrival                 111845
actstart                111841
departure                91021
actend                   91021
ModeChoice               90330
PersonCost               82604
LeavingParkingEvent      74029
ParkingEvent             68094
TeleportationEvent       15448
ReserveRideHail           4593
Replanning                 682
RefuelSessionEvent           6
ChargingPlugOutEvent         6
ChargingPlugInEvent          5
Name: type, dtype: int64


  for chunk in pd.read_csv(dataFilepath_sim, compression = 'gzip',chunksize=1500000, nrows = nrows):


PathTraversal          394551
PersonEntersVehicle    175253
PersonLeavesVehicle    174604
actstart               101671
arrival                101668
ModeChoice             101661
actend                 101199
departure              101199
LeavingParkingEvent     80232
PersonCost              79838
ParkingEvent            68039
TeleportationEvent      14431
ReserveRideHail          5382
Replanning                272
Name: type, dtype: int64


  for chunk in pd.read_csv(dataFilepath_sim, compression = 'gzip',chunksize=1500000, nrows = nrows):


PathTraversal           390541
PersonEntersVehicle     174585
PersonLeavesVehicle     173241
ModeChoice              102718
actend                  102713
departure               102713
actstart                101184
arrival                 101184
PersonCost               80730
LeavingParkingEvent      80209
ParkingEvent             69381
TeleportationEvent       14730
ReserveRideHail           5849
Replanning                 209
ChargingPlugInEvent          7
RefuelSessionEvent           3
ChargingPlugOutEvent         3
Name: type, dtype: int64


  for chunk in pd.read_csv(dataFilepath_sim, compression = 'gzip',chunksize=1500000, nrows = nrows):


PathTraversal           387561
PersonEntersVehicle     173802
PersonLeavesVehicle     173279
ModeChoice              103223
departure               103041
actend                  103041
arrival                 102890
actstart                102861
LeavingParkingEvent      80019
PersonCost               79954
ParkingEvent             69040
TeleportationEvent       15159
ReserveRideHail           5914
Replanning                 197
RefuelSessionEvent           7
ChargingPlugOutEvent         7
ChargingPlugInEvent          5
Name: type, dtype: int64


  for chunk in pd.read_csv(dataFilepath_sim, compression = 'gzip',chunksize=1500000, nrows = nrows):


PathTraversal           383331
PersonEntersVehicle     176434
PersonLeavesVehicle     170638
ModeChoice              106587
actend                  106335
departure               106334
actstart                101048
arrival                 101018
LeavingParkingEvent      80242
PersonCost               78907
ParkingEvent             68010
TeleportationEvent       14922
ReserveRideHail           5996
Replanning                 184
ChargingPlugInEvent          6
RefuelSessionEvent           4
ChargingPlugOutEvent         4
Name: type, dtype: int64


  for chunk in pd.read_csv(dataFilepath_sim, compression = 'gzip',chunksize=1500000, nrows = nrows):


PathTraversal           382731
PersonEntersVehicle     174730
PersonLeavesVehicle     171537
ModeChoice              104587
departure               104321
actend                  104320
arrival                 102094
actstart                102094
PersonCost               80773
LeavingParkingEvent      80642
ParkingEvent             70355
TeleportationEvent       16079
ReserveRideHail           5486
Replanning                 236
RefuelSessionEvent           6
ChargingPlugOutEvent         6
ChargingPlugInEvent          3
Name: type, dtype: int64


  for chunk in pd.read_csv(dataFilepath_sim, compression = 'gzip',chunksize=1500000, nrows = nrows):


PathTraversal           377001
PersonLeavesVehicle     173331
PersonEntersVehicle     172833
ModeChoice              105951
actend                  105806
departure               105801
actstart                104666
arrival                 104666
PersonCost               79947
LeavingParkingEvent      77989
ParkingEvent             69164
TeleportationEvent       16976
ReserveRideHail           5668
Replanning                 197
ChargingPlugInEvent          2
RefuelSessionEvent           1
ChargingPlugOutEvent         1
Name: type, dtype: int64


  for chunk in pd.read_csv(dataFilepath_sim, compression = 'gzip',chunksize=1500000, nrows = nrows):


PathTraversal           379959
PersonEntersVehicle     172612
PersonLeavesVehicle     169367
ModeChoice              107487
departure               107035
actend                  107030
arrival                 104875
actstart                104869
PersonCost               78084
LeavingParkingEvent      76464
ParkingEvent             66648
TeleportationEvent       19313
ReserveRideHail           5999
Replanning                 251
RefuelSessionEvent           3
ChargingPlugOutEvent         3
ChargingPlugInEvent          1
Name: type, dtype: int64


  for chunk in pd.read_csv(dataFilepath_sim, compression = 'gzip',chunksize=1500000, nrows = nrows):


PathTraversal          369026
PersonEntersVehicle    172447
PersonLeavesVehicle    166499
ModeChoice             112186
actend                 112185
departure              112185
actstart               106134
arrival                106132
PersonCost              76522
LeavingParkingEvent     74094
ParkingEvent            64302
TeleportationEvent      22037
ReserveRideHail          6005
Replanning                246
Name: type, dtype: int64


  for chunk in pd.read_csv(dataFilepath_sim, compression = 'gzip',chunksize=1500000, nrows = nrows):


PathTraversal           375654
PersonEntersVehicle     174504
PersonLeavesVehicle     169033
ModeChoice              108506
departure               108255
actend                  108255
arrival                 105931
actstart                105910
PersonCost               76807
LeavingParkingEvent      75521
ParkingEvent             64558
TeleportationEvent       20927
ReserveRideHail           5878
Replanning                 253
ChargingPlugInEvent          4
RefuelSessionEvent           2
ChargingPlugOutEvent         2
Name: type, dtype: int64


  for chunk in pd.read_csv(dataFilepath_sim, compression = 'gzip',chunksize=1500000, nrows = nrows):


PathTraversal           381286
PersonEntersVehicle     173728
PersonLeavesVehicle     173348
ModeChoice              105579
actend                  105288
departure               105287
actstart                105267
arrival                 105242
PersonCost               78942
LeavingParkingEvent      75801
ParkingEvent             66893
TeleportationEvent       17774
ReserveRideHail           5304
Replanning                 257
RefuelSessionEvent           2
ChargingPlugOutEvent         2
Name: type, dtype: int64


  for chunk in pd.read_csv(dataFilepath_sim, compression = 'gzip',chunksize=1500000, nrows = nrows):


PathTraversal           374843
PersonEntersVehicle     185016
PersonLeavesVehicle     166243
ModeChoice              110783
departure               110402
actend                  110401
arrival                  98805
actstart                 98804
LeavingParkingEvent      80351
PersonCost               77138
ParkingEvent             65854
TeleportationEvent       15949
ReserveRideHail           5159
Replanning                 248
ChargingPlugInEvent          2
RefuelSessionEvent           1
ChargingPlugOutEvent         1
Name: type, dtype: int64


  for chunk in pd.read_csv(dataFilepath_sim, compression = 'gzip',chunksize=1500000, nrows = nrows):


PathTraversal           380337
PersonLeavesVehicle     177009
PersonEntersVehicle     176720
ModeChoice              103552
actend                  103330
departure               103330
arrival                 102566
actstart                102559
PersonCost               82864
LeavingParkingEvent      76645
ParkingEvent             70269
TeleportationEvent       15445
ReserveRideHail           4942
Replanning                 419
ChargingPlugInEvent          5
RefuelSessionEvent           4
ChargingPlugOutEvent         4
Name: type, dtype: int64


  for chunk in pd.read_csv(dataFilepath_sim, compression = 'gzip',chunksize=1500000, nrows = nrows):


PathTraversal           375171
PersonEntersVehicle     179969
PersonLeavesVehicle     176101
ModeChoice              106332
actend                  105655
departure               105654
actstart                103293
arrival                 103285
PersonCost               79551
LeavingParkingEvent      77706
ParkingEvent             67635
TeleportationEvent       14079
ReserveRideHail           5132
Replanning                 423
ChargingPlugInEvent          8
RefuelSessionEvent           3
ChargingPlugOutEvent         3
Name: type, dtype: int64


  for chunk in pd.read_csv(dataFilepath_sim, compression = 'gzip',chunksize=1500000, nrows = nrows):


PathTraversal           382661
PersonLeavesVehicle     180720
PersonEntersVehicle     174670
arrival                 104432
actstart                104431
ModeChoice              100695
departure                99963
actend                   99962
PersonCost               83447
LeavingParkingEvent      76556
ParkingEvent             71154
TeleportationEvent       15160
ReserveRideHail           5419
Replanning                 705
RefuelSessionEvent           9
ChargingPlugOutEvent         9
ChargingPlugInEvent          7
Name: type, dtype: int64


  for chunk in pd.read_csv(dataFilepath_sim, compression = 'gzip',chunksize=1500000, nrows = nrows):


PathTraversal           400006
PersonLeavesVehicle     195781
PersonEntersVehicle     162216
arrival                 112640
actstart                112638
ModeChoice               90345
actend                   89981
departure                89981
PersonCost               84367
LeavingParkingEvent      72010
ParkingEvent             71818
TeleportationEvent       12580
ReserveRideHail           5035
Replanning                 591
RefuelSessionEvent           5
ChargingPlugOutEvent         5
ChargingPlugInEvent          1
Name: type, dtype: int64


  for chunk in pd.read_csv(dataFilepath_sim, compression = 'gzip',chunksize=1500000, nrows = nrows):


PathTraversal           393038
PersonLeavesVehicle     182217
PersonEntersVehicle     170639
arrival                 106483
actstart                106478
ModeChoice               98066
departure                97561
actend                   97561
PersonCost               82341
LeavingParkingEvent      75016
ParkingEvent             69745
TeleportationEvent       14126
ReserveRideHail           6134
Replanning                 581
RefuelSessionEvent           5
ChargingPlugOutEvent         5
ChargingPlugInEvent          4
Name: type, dtype: int64


  for chunk in pd.read_csv(dataFilepath_sim, compression = 'gzip',chunksize=1500000, nrows = nrows):


PathTraversal           380366
PersonLeavesVehicle     181078
PersonEntersVehicle     171288
arrival                 108263
actstart                108260
ModeChoice              101534
actend                  100745
departure               100744
PersonCost               81432
LeavingParkingEvent      74421
ParkingEvent             69225
TeleportationEvent       15795
ReserveRideHail           6200
Replanning                 646
ChargingPlugInEvent          1
RefuelSessionEvent           1
ChargingPlugOutEvent         1
Name: type, dtype: int64


  for chunk in pd.read_csv(dataFilepath_sim, compression = 'gzip',chunksize=1500000, nrows = nrows):


PathTraversal          375085
PersonLeavesVehicle    170049
PersonEntersVehicle    127710
actstart               100308
arrival                100297
PersonCost              71523
ModeChoice              67903
departure               67492
actend                  67491
ParkingEvent            63317
LeavingParkingEvent     61207
TeleportationEvent       9341
ReserveRideHail          3781
Replanning                511
Name: type, dtype: int64
         vehicle    time
173810    620727   18007
174050    740481   18021
174057    229418   18050
174068     29974   18064
174079    133521   18003
...          ...     ...
35785469   39198  182176
35785836  528903  205746
35785913  528903  209876
35785932  202477  211277
35786008   92528  215806

[2062981 rows x 2 columns]
car          2770165
walk         1958263
bus          1341993
car_hov2      404751
car_hov3      286217
tram           54924
bike           34530
cable_car      17836
subway         15118
rail            1947
ferry            

In [3]:


RA


Unnamed: 0,Baseline2018,Baseline2018 BA-SF-VT-AC,Baseline2018 shares,Baseline2018 shares BA-SF-VT-AC,Future2018,Future2018 BA-SF-VT-AC,Future2018 shares,Future2018 shares BA-SF-VT-AC,Diff %2018,Diff2018,clipper BA-SF-VT-AC 2016 share target,MTC BA-SF-VT-AC 2016 share target,NTD BA-SF-VT-AC 2019 share target,clipper BA-SF-VT-AC 2020 Jan av ridership target,MTC BA-SF-VT-AC 2016 av ridership target,NTD BA-SF-VT-AC 2019 ridership target
SF,96467.0,96467.0,0.325643,0.404498,97766.0,97766.0,0.328833,0.408297,0.013466,1299.0,0.379971,0.496644,0.107022,293991.0,777000.0,222936900.0
AC,56183.0,56183.0,0.189657,0.235582,56226.0,56226.0,0.189114,0.234815,0.000765,43.0,0.110238,0.116267,0.025463,85293.0,181900.0,53041480.0
VT,52724.0,52724.0,0.17798,0.221078,52508.0,52508.0,0.176609,0.219288,-0.004097,-216.0,0.056804,0.093768,0.017025,43950.0,146700.0,35465620.0
BA,33112.0,33112.0,0.111776,0.138843,32948.0,32948.0,0.11082,0.1376,-0.004953,-164.0,0.452987,0.293321,0.85049,350485.0,458900.0,1771648000.0
ST,16540.0,,0.055834,,16209.0,,0.054518,,-0.020012,-331.0,,,,,,
GG,7370.0,,0.024879,,7319.0,,0.024617,,-0.00692,-51.0,,,,,,
CC,5693.0,,0.019218,,5995.0,,0.020164,,0.053048,302.0,,,,,,
WH,3007.0,,0.010151,,3050.0,,0.010259,,0.0143,43.0,,,,,,
3D,2918.0,,0.00985,,3033.0,,0.010201,,0.039411,115.0,,,,,,
SR,2871.0,,0.009692,,2842.0,,0.009559,,-0.010101,-29.0,,,,,,


In [16]:
RR[RR.index=='SF:18608']

Unnamed: 0,Baseline2018,Baseline2018 shares,Future2018,Future2018 shares
SF:18608,,,4216.0,0.01418


In [5]:

RR[RR.index.str.contains('AC:1-142')]


Unnamed: 0,Baseline2018,Baseline2018 shares,Future2018,Future2018 shares
AC:1-142,2182.0,0.007366,,


In [6]:

RR[RR.index.str.contains('AC:1T-142')]


Unnamed: 0,Baseline2018,Baseline2018 shares,Future2018,Future2018 shares
AC:1T-142,,,2636.0,0.008866


In [7]:



RR[RR.index.str.contains('CA:')]



Unnamed: 0,Baseline2018,Baseline2018 shares,Future2018,Future2018 shares
CA:12868,1157.0,0.003906,1293.0,0.004349
CA:12869,679.0,0.002292,872.0,0.002933
CA:12867,388.0,0.00131,411.0,0.001382


In [8]:


RR[RR.index.str.contains('SF:1000')]


Unnamed: 0,Baseline2018,Baseline2018 shares,Future2018,Future2018 shares
SF:1000,,,1626.0,0.005469


In [9]:

#The Central Subway Project will improve public transportation in San Francisco by extending the Muni Metro T Third Line through SoMa

#J
RR[RR.index.str.contains('SF:12475')]


Unnamed: 0,Baseline2018,Baseline2018 shares,Future2018,Future2018 shares
SF:12475,2368.0,0.007994,2347.0,0.007894


In [10]:

#K/T
RR[RR.index.str.contains('SF:12476')]


Unnamed: 0,Baseline2018,Baseline2018 shares,Future2018,Future2018 shares
SF:12476,4202.0,0.014185,4210.0,0.01416


In [11]:

#L
RR[RR.index.str.contains('SF:12477')]


Unnamed: 0,Baseline2018,Baseline2018 shares,Future2018,Future2018 shares
SF:12477,3148.0,0.010627,3160.0,0.010629


In [12]:

#M
RR[RR.index.str.contains('SF:12478')]


Unnamed: 0,Baseline2018,Baseline2018 shares,Future2018,Future2018 shares
SF:12478,3147.0,0.010623,3204.0,0.010777


In [13]:

#N
RR[RR.index.str.contains('SF:12479')]



Unnamed: 0,Baseline2018,Baseline2018 shares,Future2018,Future2018 shares
SF:12479,4310.0,0.014549,4414.0,0.014846


In [14]:
for year in years:
    
    print('total SF light rail baseline'+year,
         (list(RR[RR.index.str.contains('SF:12475')]['Baseline'+year])[0]+
          list(RR[RR.index.str.contains('SF:12476')]['Baseline'+year])[0]+
          list(RR[RR.index.str.contains('SF:12477')]['Baseline'+year])[0]+
          list(RR[RR.index.str.contains('SF:12478')]['Baseline'+year])[0]+
          list(RR[RR.index.str.contains('SF:12479')]['Baseline'+year])[0])
         )
    print('total SF light rail future'+year,
         (list(RR[RR.index.str.contains('SF:12475')]['Future'+year])[0]+
          list(RR[RR.index.str.contains('SF:12476')]['Future'+year])[0]+
          list(RR[RR.index.str.contains('SF:12477')]['Future'+year])[0]+
          list(RR[RR.index.str.contains('SF:12478')]['Future'+year])[0]+
          list(RR[RR.index.str.contains('SF:12479')]['Future'+year])[0]+
          list(RR[RR.index.str.contains('SF:1000')]['Future'+year])[0])
         )

total SF light rail baseline2018 17175.0
total SF light rail future2018 18961.0


In [15]:
#Imports
import numpy as np
import pandas as pd
import difflib
import matplotlib.pyplot as plt
import time

#########Analysis of what agents used in the baseline
index_baseline =6 
index_future =7
fp_PtoPTss_baseline = '/Users/cpoliziani/Downloads/personToVehicles.csv.gz'
fp_PtoPTss_future = '/Users/cpoliziani/Downloads/personToVehicles (1).csv.gz'

# print('read PtoPTss baseline',fp_PtoPTss_baseline)
PtoPTss_baseline = pd.read_csv(fp_PtoPTss_baseline)
# print('read PtoPTss TR', fp_PtoPTss_future)
PtoPTss_TR = pd.read_csv(fp_PtoPTss_future)
# print('read plans baseline',dataFilepath_sim[index_baseline][:-13]+'plans.csv.gz')
plans_baseline = pd.read_csv(dataFilepath_sim[index_baseline][:-13]+'plans.csv.gz')
# print('read plans TR', dataFilepath_sim[index_future][:-13]+'plans.csv.gz')
plans_TR = pd.read_csv(dataFilepath_sim[index_future][:-13]+'plans.csv.gz')
# print('read GTFS trips fpr AC, SF and CA')
line_1T_trips_TR = pd.read_csv(GTFS_AC2+'trips.txt')
line_1T_trips_TR = line_1T_trips_TR[line_1T_trips_TR['route_id'].str.contains('1T-142')]
line_CS_trips_TR = pd.read_csv(GTFS_SF2+'trips.txt')
line_CS_trips_TR = line_CS_trips_TR[line_CS_trips_TR['route_id']==1000]
line_CA_trips_TR = pd.read_csv(GTFS_Caltrain2+'trips.txt')
# print('read mode choice')
mode_choice_baseline = pd.read_csv(dataFilepath_sim[index_baseline])
mode_choice_TR = pd.read_csv(dataFilepath_sim[index_future])
mode_choice_baseline = mode_choice_baseline[mode_choice_baseline['type']=='ModeChoice']
mode_choice_TR = mode_choice_TR[mode_choice_TR['type']=='ModeChoice']

  PtoPTss_baseline = pd.read_csv(fp_PtoPTss_baseline)
  PtoPTss_TR = pd.read_csv(fp_PtoPTss_future)


IndexError: list index out of range

In [None]:
PtoPTss_AC_TR = PtoPTss_TR[PtoPTss_TR['vehicle2'].str.contains('AC', na=False)]
PtoPTss_SF_TR = PtoPTss_TR[PtoPTss_TR['vehicle2'].str.contains('SF', na=False)]
PtoPTss_CA_TR = PtoPTss_TR[PtoPTss_TR['vehicle2'].str.contains('Ca', na=False)]

#Filter SF trips - CS only
is_CS = []
vehicles = list(line_CS_trips_TR['trip_id'])
for vehicle in PtoPTss_SF_TR['vehicleID']:
    if int(vehicle[3:]) in vehicles:
        is_CS.append(True)
    else:
        is_CS.append(False)
print('Filter CS trips [tot, remained]',len(is_CS),sum(is_CS))
PtoPTss_SF_TR['is_CS'] = is_CS
PtoPTss_SF_TR = PtoPTss_SF_TR[PtoPTss_SF_TR['is_CS']==True]


is_AC = []
vehicles = list(line_1T_trips_TR['trip_id'])
for vehicle in PtoPTss_AC_TR['vehicleID']:
    if int(vehicle[3:]) in vehicles:
        is_AC.append(True)
    else:
        is_AC.append(False)
print('Filter 1T trips [tot, remained]',len(is_AC),sum(is_AC))
PtoPTss_AC_TR['is_AC'] = is_AC
PtoPTss_AC_TR = PtoPTss_AC_TR[PtoPTss_AC_TR['is_AC']==True]

In [None]:
persons_baseline = pd.DataFrame()
i = 0
trips_TR = pd.concat([line_CA_trips_TR['trip_id'],line_CS_trips_TR['trip_id'],line_1T_trips_TR['trip_id']])
##############################
# PtoPTss_TR2=pd.concat([PtoPTss_AC_TR,PtoPTss_SF_TR,PtoPTss_CA_TR])
# PtoPTss_TR2_dict = PtoPTss.groupby(['personID', 'planIndex']).apply(lambda x: [sum(x.length),
#                                                     sum(x.duration)]).to_dict()
# print(len(PtoPTss_CA_TR))
PtoPTss_TR_CA_dict = PtoPTss_CA_TR.groupby(['personID', 'planIndex']).apply(lambda x: [sum(x.length),
                                                    sum(x.duration)]).to_dict()
# print(len(PtoPTss_TR_CA_dict.keys()))
# print(len(PtoPTss_SF_TR))
PtoPTss_TR_SF_dict = PtoPTss_SF_TR.groupby(['personID', 'planIndex']).apply(lambda x: [sum(x.length),
                                                    sum(x.duration)]).to_dict()
# print(len(PtoPTss_TR_SF_dict.keys()))
# print(len(PtoPTss_AC_TR))
PtoPTss_TR_AC_dict = PtoPTss_AC_TR.groupby(['personID', 'planIndex']).apply(lambda x: [sum(x.length),
                                                    sum(x.duration)]).to_dict()
# print(len(PtoPTss_TR_AC_dict.keys()))
# print(len(PtoPTss_baseline))
PtoPTss_baseline_dict = PtoPTss_baseline.groupby(['personID', 'planIndex']).apply(lambda x: [sum(x.length),
                                                    sum(x.duration)]).to_dict()
# print(len(PtoPTss_baseline_dict.keys()))
# print(len(PtoPTss_TR))
PtoPTss_TR_dict = PtoPTss_TR.groupby(['personID', 'planIndex']).apply(lambda x: [sum(x.length),
                                                sum(x.duration)]).to_dict()
print(len(PtoPTss_TR_dict.keys()))


#Add logsum
person_trips_baseline = pd.read_csv('/Users/cpoliziani/Downloads/tourTripsMerged_Baseline.csv')
person_trips_baseline['trip_num']=person_trips_baseline['trip_num']*2-1
person_trips_TR = pd.read_csv('/Users/cpoliziani/Downloads/tourTripsMerged_TR.csv')
person_trips_TR['trip_num']=person_trips_TR['trip_num']*2-1

In [None]:
def addGeometryIdToDataFrame(df, gdf, xcol, ycol, idColumn="geometry", df_geom='epsg:32610'): 
    gdf.set_crs(epsg = "3310", inplace = True)
    gdf_data = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df[xcol], df[ycol]))
    gdf_data.set_crs(epsg = "32610", inplace = True)
    joined = gpd.sjoin(gdf_data.to_crs('epsg:26910'), gdf.to_crs('epsg:26910'))
    gdf_data = gdf_data.merge(joined['ZCTA'], left_index=True, right_index=True, how="left")
    gdf_data.rename(columns={'ZCTA': idColumn}, inplace=True)
    df = pd.DataFrame(gdf_data.drop(columns='geometry'))
#     df.drop(columns=[xcol, ycol], inplace=True)
    return df.loc[~df.index.duplicated(keep='first'), :]

In [None]:
persons_baseline = pd.DataFrame(columns = ['Person',
                                           'Plan Index',
                                           'Planned Depart Time Baseline',
                                           'Planned Depart Time TR',
                                           'Activity From Baseline',
                                           'Activity To Baseline',
                                           'Activity From TR',
                                           'Activity To TR',
                                           'Project Tried',
                                           'Vehicles Used Baseline',
                                           'Vehicle Types Used Baseline',                                          
                                           'Bus agencies Used Baseline',
                                           'First Bus agency Used Baseline',
                                           'Vehicles Used TR',
                                           'Vehicle Types Used TR',                                          
                                           'Bus agencies Used TR',
                                           'First Bus agency Used TR',
                                           'Switch From',
                                           'Trip Length Baseline',                       
                                           'Trip Duration Baseline',
                                           'Trip Length TR',
                                           'Trip Duration TR',
                                           'Diff Length',
                                           'Diff Duration',
                                           'Planned Mode Baseline',                                         
                                           'Planned Mode TR',                                          
                                           'Chosen Mode Baseline',                                          
                                           'Chosen Mode TR',
                                           'X Activity From TR',
                                           'Y Activity From TR',
                                           'X Activity To TR',
                                           'Y Activity To TR',
                                           'X Activity From Baseline',
                                           'Y Activity From Baseline',
                                           'X Activity To Baseline',
                                           'Y Activity To Baseline',
                                           'Log Sum Baseline',
                                           'Log Sum TR',
                                           'Diff Log Sum',
#                                            'ZIP Departure TR',                                          
#                                            'ZIP Arrival TR',                                          
#                                            'ZIP Departure Baseline',                                          
#                                            'ZIP Arrival Baseline',                                          
                                          ])

PtoPTss_baseline_bus =  PtoPTss_baseline[(PtoPTss_baseline['mode'] =='bus')|
                                         (PtoPTss_baseline['mode'] =='subway')|
                                         (PtoPTss_baseline['mode'] =='tram')|
                                         (PtoPTss_baseline['mode'] =='rail')|
                                         (PtoPTss_baseline['mode'] =='cable_car')|
                                         (PtoPTss_baseline['mode'] =='ferry')]
PtoPTss_TR_bus =  PtoPTss_TR[(PtoPTss_TR['mode'] =='bus')|
                                         (PtoPTss_TR['mode'] =='subway')|
                                         (PtoPTss_TR['mode'] =='tram')|
                                         (PtoPTss_TR['mode'] =='rail')|
                                         (PtoPTss_TR['mode'] =='cable_car')|
                                         (PtoPTss_TR['mode'] =='ferry')]   
print(len(PtoPTss_TR_SF_dict))
print(len(PtoPTss_TR_CA_dict))
print(len(PtoPTss_TR_AC_dict))


i=0      
for row in PtoPTss_TR_SF_dict.keys():
    i+=1
    if i%50==0:
        print(i)
        
    try:
        persons_baseline.at[i,'Person'] = row[0]
    except:
        print('Warning', row, 'person')
    try:
        persons_baseline.at[i,'Plan Index'] = int(row[1])
    except:
        print('Warning', row, 'plan_index')
    try:
        persons_baseline.at[i,'Activity From Baseline'] = list(plans_baseline['activityType'][(plans_baseline['personId']==row[0])&(plans_baseline['planElementIndex']==row[1]-1)])[0]
    except:
        print('Warning', row, 'Activity From Baseline')
    try:
        persons_baseline.at[i,'Activity To Baseline'] = list(plans_baseline['activityType'][(plans_baseline['personId']==row[0])&(plans_baseline['planElementIndex']==row[1]+1)])[0]
    except:
        print('Warning', row, 'Activity To Baseline')
    try:
        persons_baseline.at[i,'Activity From TR'] = list(plans_TR['activityType'][(plans_TR['personId']==row[0])&(plans_TR['planElementIndex']==row[1]-1)])[0]
    except:
        print('Warning', row, 'Activity From TR')
    try:
        persons_baseline.at[i,'Activity To TR'] = list(plans_TR['activityType'][(plans_TR['personId']==row[0])&(plans_TR['planElementIndex']==row[1]+1)])[0]
    except:
        print('Warning', row, 'Activity To TR')  
    try:
        persons_baseline.at[i,'Planned Depart Time Baseline'] = int(list(plans_baseline['activityEndTime'][(plans_baseline['personId']==row[0])&(plans_baseline['planElementIndex']==row[1]-1)])[0])
    except:
        print('Warning', row, 'plan_dep_time_baseline')
    try:
        persons_baseline.at[i,'Planned Depart Time TR'] = int(list(plans_TR['activityEndTime'][(plans_TR['personId']==row[0])&(plans_TR['planElementIndex']==row[1]-1)])[0])
    except:
        print('Warning', row, 'plan_dep_time_TR')
    try:
        persons_baseline.at[i,'Project Tried'] = 'SF - Central Subway'
    except:
        print('Warning', row, 'project tried')
    try:
        persons_baseline.at[i,'Log Sum Baseline'] = np.mean(list(person_trips_baseline['mode_choice_logsum_y'][(person_trips_baseline['person_id']==row[0])]))
    except:
        print('Warning', row, 'Log Sum Baseline')
    try:
        persons_baseline.at[i,'Log Sum TR'] = np.mean(list(person_trips_TR['mode_choice_logsum_y'][(person_trips_TR['person_id']==row[0])]))
    except:
        print('Warning', row, 'Log Sum TR')
    try:
        persons_baseline.at[i,'Diff Log Sum'] = persons_baseline.at[i,'Log Sum TR']-persons_baseline.at[i,'Log Sum Baseline']
    except:
        print('Warning', row, 'Diff Log Sum')
    try:
        persons_baseline.at[i,'Vehicles Used Baseline'] = np.unique(list(PtoPTss_baseline['vehicleID'][(PtoPTss_baseline['personID']==row[0])&(PtoPTss_baseline['planIndex']==row[1])]))
    except:
        print('Warning', row, 'Vehicles Used Baseline')
    try:
        persons_baseline.at[i,'Vehicles Used TR'] = np.unique(list(PtoPTss_TR['vehicleID'][(PtoPTss_TR['personID']==row[0])&(PtoPTss_TR['planIndex']==row[1])]))
    except:
        print('Warning', row, 'Vehicles Used TR')
    try:
        persons_baseline.at[i,'Bus agencies Used Baseline'] = np.unique(list(PtoPTss_baseline_bus['vehicle2'][(PtoPTss_baseline_bus['personID']==row[0])&(PtoPTss_baseline_bus['planIndex']==row[1])]))
    except:
        print('Warning', row, 'Bus agencies Used Baseline')
    try:
        persons_baseline.at[i,'Bus agencies Used TR'] = np.unique(list(PtoPTss_TR_bus['vehicle2'][(PtoPTss_TR_bus['personID']==row[0])&(PtoPTss_TR_bus['planIndex']==row[1])]))
    except:
        print('Warning', row, 'Bus agencies Used TR') 
    try:
        persons_baseline.at[i,'First Bus agency Used Baseline'] = np.unique(list(PtoPTss_baseline_bus['vehicle2'][(PtoPTss_baseline_bus['personID']==row[0])&(PtoPTss_baseline_bus['planIndex']==row[1])]))[0]
    except:
        print('Warning', row, 'First Bus agency Used Baseline')
    try:
        persons_baseline.at[i,'First Bus agency Used TR'] = np.unique(list(PtoPTss_TR_bus['vehicle2'][(PtoPTss_TR_bus['personID']==row[0])&(PtoPTss_TR_bus['planIndex']==row[1])]))[0]
    except:
        print('Warning', row, 'First Bus agency Used TR')
    try:
        persons_baseline.at[i,'Trip Length TR'] = int(PtoPTss_TR_dict[row][0])
    except:
        print('Warning', row, 'TR_length')
    try:
        persons_baseline.at[i,'Trip Duration TR'] = int(PtoPTss_TR_dict[row][1])
    except:
        print('Warning', row, 'TR_duration')
    try:
        persons_baseline.at[i,'Trip Length Baseline'] = int(PtoPTss_baseline_dict[row][0])
    except:
        print('Warning', row, 'baseline_length')
    try:
        persons_baseline.at[i,'Trip Duration Baseline'] = int(PtoPTss_baseline_dict[row][1])
    except:
        print('Warning', row, 'baseline_duration')
    try:
        persons_baseline.at[i,'Diff Length'] = int(persons_baseline.at[i,'Trip Length TR']-persons_baseline.at[i,'Trip Length Baseline'])
    except:
        print('Warning', row, 'diff_length')
    try:
        persons_baseline.at[i,'Diff Duration'] = int(persons_baseline.at[i,'Trip Duration TR']-persons_baseline.at[i,'Trip Duration Baseline'])
    except:
        print('Warning', row, 'diff_duration')
    try:
        persons_baseline.at[i,'X Activity From TR'] = list(plans_TR['activityLocationX'][(plans_TR['personId']==row[0])&(plans_TR['planElementIndex']==row[1]-1)])[0]
    except:
        print('Warning', row, 'x_activity_from_TR')
    try:
        persons_baseline.at[i,'Y Activity From TR'] = list(plans_TR['activityLocationY'][(plans_TR['personId']==row[0])&(plans_TR['planElementIndex']==row[1]-1)])[0]
    except:
        print('Warning', row, 'y_activity_From_TR')
    try:
        persons_baseline.at[i,'X Activity To TR'] = list(plans_TR['activityLocationX'][(plans_TR['personId']==row[0])&(plans_TR['planElementIndex']==row[1]+1)])[0]
    except:
        print('Warning', row, 'x_activity_To_TR')
    try:
        persons_baseline.at[i,'Y Activity To TR'] = list(plans_TR['activityLocationY'][(plans_TR['personId']==row[0])&(plans_TR['planElementIndex']==row[1]+1)])[0]
    except:
        print('Warning', row, 'y_activity_To_TR')   
    try:
        persons_baseline.at[i,'X Activity From Baseline'] = list(plans_baseline['activityLocationX'][(plans_baseline['personId']==row[0])&(plans_baseline['planElementIndex']==row[1]-1)])[0]
    except:
        print('Warning', row, 'x_activity_from_Baseline')
    try:
        persons_baseline.at[i,'Y Activity From Baseline'] = list(plans_baseline['activityLocationY'][(plans_baseline['personId']==row[0])&(plans_baseline['planElementIndex']==row[1]-1)])[0]
    except:
        print('Warning', row, 'y_activity_From_Baseline')
    try:
        persons_baseline.at[i,'X Activity To Baseline'] = list(plans_baseline['activityLocationX'][(plans_baseline['personId']==row[0])&(plans_baseline['planElementIndex']==row[1]+1)])[0]
    except:
        print('Warning', row, 'x_activity_To_Baseline')
    try:
        persons_baseline.at[i,'Y Activity To Baseline'] = list(plans_baseline['activityLocationY'][(plans_baseline['personId']==row[0])&(plans_baseline['planElementIndex']==row[1]+1)])[0]
    except:
        print('Warning', row, 'y_activity_To_Baseline')   
    try:
        persons_baseline.at[i,'Vehicle Types Used Baseline'] = np.unique(list(PtoPTss_baseline['mode'][(PtoPTss_baseline['personID']==row[0])&(PtoPTss_baseline['planIndex']==row[1])]))
    except:
        print('Warning', row, 'Vehicle Types Used baseline')
    try:
        persons_baseline.at[i,'Vehicle Types Used TR'] = np.unique(list(PtoPTss_TR['mode'][(PtoPTss_TR['personID']==row[0])&(PtoPTss_TR['planIndex']==row[1])]))
    except:
        print('Warning', row, 'Vehicle Types Used TR')
    try:
        persons_baseline.at[i,'Planned Mode Baseline'] = list(plans_baseline['legMode'][(plans_baseline['personId']==row[0])&(plans_baseline['planElementIndex']==row[1])])[0]
    except:
        print('Warning', row, 'Baseline planned mode')
    try:
        persons_baseline.at[i,'Planned Mode TR'] = list(plans_TR['legMode'][(plans_TR['personId']==row[0])&(plans_TR['planElementIndex']==row[1])])[0]
    except:
        print('Warning', row, 'TR planned mode')
    try:
        plan_dep_time_baseline = list(plans_baseline['activityEndTime'][(plans_baseline['personId']==row[0])&(plans_baseline['planElementIndex']==row[1]-1)])[0]
        persons_baseline.at[i,'Chosen Mode Baseline'] = list(mode_choice_baseline['mode'][(mode_choice_baseline['person']==row[0])&(mode_choice_baseline['time']>=plan_dep_time_baseline-1)])[0]
    except:
        print('Warning', row, 'mode_choice_baseline')
    try:
        persons_baseline.at[i,'Chosen Mode TR'] = list(mode_choice_TR['mode'][(mode_choice_TR['person']==row[0])&(mode_choice_TR['time']>=persons_baseline.at[i,'Planned Depart Time TR']-1)])[0]
    except:
        print('Warning', row, 'mode_choice_TR')
        
####################################
for row in PtoPTss_TR_CA_dict.keys():
    i+=1
    if i%10==0:
        print(i)
        
        
    try:
        persons_baseline.at[i,'Person'] = row[0]
    except:
        print('Warning', row, 'person')
    try:
        persons_baseline.at[i,'Plan Index'] = int(row[1])
    except:
        print('Warning', row, 'plan_index')
    try:
        persons_baseline.at[i,'Activity From Baseline'] = list(plans_baseline['activityType'][(plans_baseline['personId']==row[0])&(plans_baseline['planElementIndex']==row[1]-1)])[0]
    except:
        print('Warning', row, 'Activity From Baseline')
    try:
        persons_baseline.at[i,'Activity To Baseline'] = list(plans_baseline['activityType'][(plans_baseline['personId']==row[0])&(plans_baseline['planElementIndex']==row[1]+1)])[0]
    except:
        print('Warning', row, 'Activity To Baseline')
    try:
        persons_baseline.at[i,'Activity From TR'] = list(plans_TR['activityType'][(plans_TR['personId']==row[0])&(plans_TR['planElementIndex']==row[1]-1)])[0]
    except:
        print('Warning', row, 'Activity From TR')
    try:
        persons_baseline.at[i,'Activity To TR'] = list(plans_TR['activityType'][(plans_TR['personId']==row[0])&(plans_TR['planElementIndex']==row[1]+1)])[0]
    except:
        print('Warning', row, 'Activity To TR')  
    try:
        persons_baseline.at[i,'Planned Depart Time Baseline'] = int(list(plans_baseline['activityEndTime'][(plans_baseline['personId']==row[0])&(plans_baseline['planElementIndex']==row[1]-1)])[0])
    except:
        print('Warning', row, 'plan_dep_time_baseline')
    try:
        persons_baseline.at[i,'Planned Depart Time TR'] = int(list(plans_TR['activityEndTime'][(plans_TR['personId']==row[0])&(plans_TR['planElementIndex']==row[1]-1)])[0])
    except:
        print('Warning', row, 'plan_dep_time_TR')
    try:
        persons_baseline.at[i,'Project Tried'] = 'CA - Electrification Project'
    except:
        print('Warning', row, 'project tried')
    try:
        persons_baseline.at[i,'Log Sum Baseline'] = np.mean(list(person_trips_baseline['mode_choice_logsum_y'][(person_trips_baseline['person_id']==row[0])]))
    except:
        print('Warning', row, 'Log Sum Baseline')
    try:
        persons_baseline.at[i,'Log Sum TR'] = np.mean(list(person_trips_TR['mode_choice_logsum_y'][(person_trips_TR['person_id']==row[0])]))
    except:
        print('Warning', row, 'Log Sum TR')
    try:
        persons_baseline.at[i,'Diff Log Sum'] = persons_baseline.at[i,'Log Sum TR']-persons_baseline.at[i,'Log Sum Baseline']
    except:
        print('Warning', row, 'Diff Log Sum')
    try:
        persons_baseline.at[i,'Vehicles Used Baseline'] = np.unique(list(PtoPTss_baseline['vehicleID'][(PtoPTss_baseline['personID']==row[0])&(PtoPTss_baseline['planIndex']==row[1])]))
    except:
        print('Warning', row, 'Vehicles Used Baseline')
    try:
        persons_baseline.at[i,'Vehicles Used TR'] = np.unique(list(PtoPTss_TR['vehicleID'][(PtoPTss_TR['personID']==row[0])&(PtoPTss_TR['planIndex']==row[1])]))
    except:
        print('Warning', row, 'Vehicles Used TR')
    try:
        persons_baseline.at[i,'Bus agencies Used Baseline'] = np.unique(list(PtoPTss_baseline_bus['vehicle2'][(PtoPTss_baseline_bus['personID']==row[0])&(PtoPTss_baseline_bus['planIndex']==row[1])]))
    except:
        print('Warning', row, 'Bus agencies Used Baseline')
    try:
        persons_baseline.at[i,'Bus agencies Used TR'] = np.unique(list(PtoPTss_TR_bus['vehicle2'][(PtoPTss_TR_bus['personID']==row[0])&(PtoPTss_TR_bus['planIndex']==row[1])]))
    except:
        print('Warning', row, 'Bus agencies Used TR')
    try:
        persons_baseline.at[i,'First Bus agency Used Baseline'] = np.unique(list(PtoPTss_baseline_bus['vehicle2'][(PtoPTss_baseline_bus['personID']==row[0])&(PtoPTss_baseline_bus['planIndex']==row[1])]))[0]
    except:
        print('Warning', row, 'First Bus agency Used Baseline')
    try:
        persons_baseline.at[i,'First Bus agency Used TR'] = np.unique(list(PtoPTss_TR_bus['vehicle2'][(PtoPTss_TR_bus['personID']==row[0])&(PtoPTss_TR_bus['planIndex']==row[1])]))[0]
    except:
        print('Warning', row, 'First Bus agency Used TR')  
    try:
        persons_baseline.at[i,'Trip Length TR'] = int(PtoPTss_TR_dict[row][0])
    except:
        print('Warning', row, 'TR_length')
    try:
        persons_baseline.at[i,'Trip Duration TR'] = int(PtoPTss_TR_dict[row][1])
    except:
        print('Warning', row, 'TR_duration')
    try:
        persons_baseline.at[i,'Trip Length Baseline'] = int(PtoPTss_baseline_dict[row][0])
    except:
        print('Warning', row, 'baseline_length')
    try:
        persons_baseline.at[i,'Trip Duration Baseline'] = int(PtoPTss_baseline_dict[row][1])
    except:
        print('Warning', row, 'baseline_duration')
    try:
        persons_baseline.at[i,'Diff Length'] = int(persons_baseline.at[i,'Trip Length TR']-persons_baseline.at[i,'Trip Length Baseline'])
    except:
        print('Warning', row, 'diff_length')
    try:
        persons_baseline.at[i,'Diff Duration'] = int(persons_baseline.at[i,'Trip Duration TR']-persons_baseline.at[i,'Trip Duration Baseline'])
    except:
        print('Warning', row, 'diff_duration')
    try:
        persons_baseline.at[i,'X Activity From TR'] = list(plans_TR['activityLocationX'][(plans_TR['personId']==row[0])&(plans_TR['planElementIndex']==row[1]-1)])[0]
    except:
        print('Warning', row, 'x_activity_from_TR')
    try:
        persons_baseline.at[i,'Y Activity From TR'] = list(plans_TR['activityLocationY'][(plans_TR['personId']==row[0])&(plans_TR['planElementIndex']==row[1]-1)])[0]
    except:
        print('Warning', row, 'y_activity_From_TR')
    try:
        persons_baseline.at[i,'X Activity To TR'] = list(plans_TR['activityLocationX'][(plans_TR['personId']==row[0])&(plans_TR['planElementIndex']==row[1]+1)])[0]
    except:
        print('Warning', row, 'x_activity_To_TR')
    try:
        persons_baseline.at[i,'Y Activity To TR'] = list(plans_TR['activityLocationY'][(plans_TR['personId']==row[0])&(plans_TR['planElementIndex']==row[1]+1)])[0]
    except:
        print('Warning', row, 'y_activity_To_TR')   
    try:
        persons_baseline.at[i,'X Activity From Baseline'] = list(plans_baseline['activityLocationX'][(plans_baseline['personId']==row[0])&(plans_baseline['planElementIndex']==row[1]-1)])[0]
    except:
        print('Warning', row, 'x_activity_from_Baseline')
    try:
        persons_baseline.at[i,'Y Activity From Baseline'] = list(plans_baseline['activityLocationY'][(plans_baseline['personId']==row[0])&(plans_baseline['planElementIndex']==row[1]-1)])[0]
    except:
        print('Warning', row, 'y_activity_From_Baseline')
    try:
        persons_baseline.at[i,'X Activity To Baseline'] = list(plans_baseline['activityLocationX'][(plans_baseline['personId']==row[0])&(plans_baseline['planElementIndex']==row[1]+1)])[0]
    except:
        print('Warning', row, 'x_activity_To_Baseline')
    try:
        persons_baseline.at[i,'Y Activity To Baseline'] = list(plans_baseline['activityLocationY'][(plans_baseline['personId']==row[0])&(plans_baseline['planElementIndex']==row[1]+1)])[0]
    except:
        print('Warning', row, 'y_activity_To_Baseline')   
    try:
        persons_baseline.at[i,'Vehicle Types Used Baseline'] = np.unique(list(PtoPTss_baseline['mode'][(PtoPTss_baseline['personID']==row[0])&(PtoPTss_baseline['planIndex']==row[1])]))
    except:
        print('Warning', row, 'Vehicle Types Used baseline')
    try:
        persons_baseline.at[i,'Vehicle Types Used TR'] = np.unique(list(PtoPTss_TR['mode'][(PtoPTss_TR['personID']==row[0])&(PtoPTss_TR['planIndex']==row[1])]))
    except:
        print('Warning', row, 'Vehicle Types Used TR')
    try:
        persons_baseline.at[i,'Planned Mode Baseline'] = list(plans_baseline['legMode'][(plans_baseline['personId']==row[0])&(plans_baseline['planElementIndex']==row[1])])[0]
    except:
        print('Warning', row, 'Baseline planned mode')
    try:
        persons_baseline.at[i,'Planned Mode TR'] = list(plans_TR['legMode'][(plans_TR['personId']==row[0])&(plans_TR['planElementIndex']==row[1])])[0]
    except:
        print('Warning', row, 'TR planned mode')
    try:
        plan_dep_time_baseline = list(plans_baseline['activityEndTime'][(plans_baseline['personId']==row[0])&(plans_baseline['planElementIndex']==row[1]-1)])[0]
        persons_baseline.at[i,'Chosen Mode Baseline'] = list(mode_choice_baseline['mode'][(mode_choice_baseline['person']==row[0])&(mode_choice_baseline['time']>=plan_dep_time_baseline-1)])[0]
    except:
        print('Warning', row, 'mode_choice_baseline')
    try:
        persons_baseline.at[i,'Chosen Mode TR'] = list(mode_choice_TR['mode'][(mode_choice_TR['person']==row[0])&(mode_choice_TR['time']>=persons_baseline.at[i,'Planned Depart Time TR']-1)])[0]
    except:
        print('Warning', row, 'mode_choice_TR')
        
####################################
for row in PtoPTss_TR_AC_dict.keys():
    i+=1
    if i%10==0:
        print(i) 
        
        
    try:
        persons_baseline.at[i,'Person'] = row[0]
    except:
        print('Warning', row, 'person')
    try:
        persons_baseline.at[i,'Plan Index'] = int(row[1])
    except:
        print('Warning', row, 'plan_index')    
    try:
        persons_baseline.at[i,'Activity From Baseline'] = list(plans_baseline['activityType'][(plans_baseline['personId']==row[0])&(plans_baseline['planElementIndex']==row[1]-1)])[0]
    except:
        print('Warning', row, 'Activity From Baseline')
    try:
        persons_baseline.at[i,'Activity To Baseline'] = list(plans_baseline['activityType'][(plans_baseline['personId']==row[0])&(plans_baseline['planElementIndex']==row[1]+1)])[0]
    except:
        print('Warning', row, 'Activity To Baseline')
    try:
        persons_baseline.at[i,'Activity From TR'] = list(plans_TR['activityType'][(plans_TR['personId']==row[0])&(plans_TR['planElementIndex']==row[1]-1)])[0]
    except:
        print('Warning', row, 'Activity From TR')
    try:
        persons_baseline.at[i,'Activity To TR'] = list(plans_TR['activityType'][(plans_TR['personId']==row[0])&(plans_TR['planElementIndex']==row[1]+1)])[0]
    except:
        print('Warning', row, 'Activity To TR')   
    try:
        persons_baseline.at[i,'Planned Depart Time Baseline'] = int(list(plans_baseline['activityEndTime'][(plans_baseline['personId']==row[0])&(plans_baseline['planElementIndex']==row[1]-1)])[0])
    except:
        print('Warning', row, 'plan_dep_time_baseline')
    try:
        persons_baseline.at[i,'Planned Depart Time TR'] = int(list(plans_TR['activityEndTime'][(plans_TR['personId']==row[0])&(plans_TR['planElementIndex']==row[1]-1)])[0])
    except:
        print('Warning', row, 'plan_dep_time_TR')
    try:
        persons_baseline.at[i,'Project Tried'] = 'AC - 1TEMPO'
    except:
        print('Warning', row, 'project tried')
    try:
        persons_baseline.at[i,'Log Sum Baseline'] = np.mean(list(person_trips_baseline['mode_choice_logsum_y'][(person_trips_baseline['person_id']==row[0])]))
    except:
        print('Warning', row, 'Log Sum Baseline')
    try:
        persons_baseline.at[i,'Log Sum TR'] = np.mean(list(person_trips_TR['mode_choice_logsum_y'][(person_trips_TR['person_id']==row[0])]))
    except:
        print('Warning', row, 'Log Sum TR')
    try:
        persons_baseline.at[i,'Diff Log Sum'] = persons_baseline.at[i,'Log Sum TR']-persons_baseline.at[i,'Log Sum Baseline']
    except:
        print('Warning', row, 'Diff Log Sum')
    try:
        persons_baseline.at[i,'Vehicles Used Baseline'] = np.unique(list(PtoPTss_baseline['vehicleID'][(PtoPTss_baseline['personID']==row[0])&(PtoPTss_baseline['planIndex']==row[1])]))
    except:
        print('Warning', row, 'Vehicles Used Baseline')
    try:
        persons_baseline.at[i,'Vehicles Used TR'] = np.unique(list(PtoPTss_TR['vehicleID'][(PtoPTss_TR['personID']==row[0])&(PtoPTss_TR['planIndex']==row[1])]))
    except:
        print('Warning', row, 'Vehicles Used TR')    
    try:
        persons_baseline.at[i,'Bus agencies Used Baseline'] = np.unique(list(PtoPTss_baseline_bus['vehicle2'][(PtoPTss_baseline_bus['personID']==row[0])&(PtoPTss_baseline_bus['planIndex']==row[1])]))
    except:
        print('Warning', row, 'Bus agencies Used Baseline')
    try:
        persons_baseline.at[i,'Bus agencies Used TR'] = np.unique(list(PtoPTss_TR_bus['vehicle2'][(PtoPTss_TR_bus['personID']==row[0])&(PtoPTss_TR_bus['planIndex']==row[1])]))
    except:
        print('Warning', row, 'Bus agencies Used TR')
    try:
        persons_baseline.at[i,'First Bus agency Used Baseline'] = np.unique(list(PtoPTss_baseline_bus['vehicle2'][(PtoPTss_baseline_bus['personID']==row[0])&(PtoPTss_baseline_bus['planIndex']==row[1])]))[0]
    except:
        print('Warning', row, 'First Bus agency Used Baseline')
    try:
        persons_baseline.at[i,'First Bus agency Used TR'] = np.unique(list(PtoPTss_TR_bus['vehicle2'][(PtoPTss_TR_bus['personID']==row[0])&(PtoPTss_TR_bus['planIndex']==row[1])]))[0]
    except:
        print('Warning', row, 'First Bus agency Used TR')
    try:
        persons_baseline.at[i,'Trip Length TR'] = int(PtoPTss_TR_dict[row][0])
    except:
        print('Warning', row, 'TR_length')
    try:
        persons_baseline.at[i,'Trip Duration TR'] = int(PtoPTss_TR_dict[row][1])
    except:
        print('Warning', row, 'TR_duration')
    try:
        persons_baseline.at[i,'Trip Length Baseline'] = int(PtoPTss_baseline_dict[row][0])
    except:
        print('Warning', row, 'baseline_length')
    try:
        persons_baseline.at[i,'Trip Duration Baseline'] = int(PtoPTss_baseline_dict[row][1])
    except:
        print('Warning', row, 'baseline_duration')
    try:
        persons_baseline.at[i,'Diff Length'] = int(persons_baseline.at[i,'Trip Length TR']-persons_baseline.at[i,'Trip Length Baseline'])
    except:
        print('Warning', row, 'diff_length')
    try:
        persons_baseline.at[i,'Diff Duration'] = int(persons_baseline.at[i,'Trip Duration TR']-persons_baseline.at[i,'Trip Duration Baseline'])
    except:
        print('Warning', row, 'diff_duration')
    try:
        persons_baseline.at[i,'X Activity From TR'] = list(plans_TR['activityLocationX'][(plans_TR['personId']==row[0])&(plans_TR['planElementIndex']==row[1]-1)])[0]
    except:
        print('Warning', row, 'x_activity_from_TR')
    try:
        persons_baseline.at[i,'Y Activity From TR'] = list(plans_TR['activityLocationY'][(plans_TR['personId']==row[0])&(plans_TR['planElementIndex']==row[1]-1)])[0]
    except:
        print('Warning', row, 'y_activity_From_TR')
    try:
        persons_baseline.at[i,'X Activity To TR'] = list(plans_TR['activityLocationX'][(plans_TR['personId']==row[0])&(plans_TR['planElementIndex']==row[1]+1)])[0]
    except:
        print('Warning', row, 'x_activity_To_TR')
    try:
        persons_baseline.at[i,'Y Activity To TR'] = list(plans_TR['activityLocationY'][(plans_TR['personId']==row[0])&(plans_TR['planElementIndex']==row[1]+1)])[0]
    except:
        print('Warning', row, 'y_activity_To_TR')   
    try:
        persons_baseline.at[i,'X Activity From Baseline'] = list(plans_baseline['activityLocationX'][(plans_baseline['personId']==row[0])&(plans_baseline['planElementIndex']==row[1]-1)])[0]
    except:
        print('Warning', row, 'x_activity_from_Baseline')
    try:
        persons_baseline.at[i,'Y Activity From Baseline'] = list(plans_baseline['activityLocationY'][(plans_baseline['personId']==row[0])&(plans_baseline['planElementIndex']==row[1]-1)])[0]
    except:
        print('Warning', row, 'y_activity_From_Baseline')
    try:
        persons_baseline.at[i,'X Activity To Baseline'] = list(plans_baseline['activityLocationX'][(plans_baseline['personId']==row[0])&(plans_baseline['planElementIndex']==row[1]+1)])[0]
    except:
        print('Warning', row, 'x_activity_To_Baseline')
    try:
        persons_baseline.at[i,'Y Activity To Baseline'] = list(plans_baseline['activityLocationY'][(plans_baseline['personId']==row[0])&(plans_baseline['planElementIndex']==row[1]+1)])[0]
    except:
        print('Warning', row, 'y_activity_To_Baseline')   
    try:
        persons_baseline.at[i,'Vehicle Types Used Baseline'] = np.unique(list(PtoPTss_baseline['mode'][(PtoPTss_baseline['personID']==row[0])&(PtoPTss_baseline['planIndex']==row[1])]))
    except:
        print('Warning', row, 'Vehicle Types Used baseline')
    try:
        persons_baseline.at[i,'Vehicle Types Used TR'] = np.unique(list(PtoPTss_TR['mode'][(PtoPTss_TR['personID']==row[0])&(PtoPTss_TR['planIndex']==row[1])]))
    except:
        print('Warning', row, 'Vehicle Types Used TR')
    try:
        persons_baseline.at[i,'Planned Mode Baseline'] = list(plans_baseline['legMode'][(plans_baseline['personId']==row[0])&(plans_baseline['planElementIndex']==row[1])])[0]
    except:
        print('Warning', row, 'Baseline planned mode')
    try:
        persons_baseline.at[i,'Planned Mode TR'] = list(plans_TR['legMode'][(plans_TR['personId']==row[0])&(plans_TR['planElementIndex']==row[1])])[0]
    except:
        print('Warning', row, 'TR planned mode')
    try:
        plan_dep_time_baseline = list(plans_baseline['activityEndTime'][(plans_baseline['personId']==row[0])&(plans_baseline['planElementIndex']==row[1]-1)])[0]
        persons_baseline.at[i,'Chosen Mode Baseline'] = list(mode_choice_baseline['mode'][(mode_choice_baseline['person']==row[0])&(mode_choice_baseline['time']>=plan_dep_time_baseline-1)])[0]
    except:
        print('Warning', row, 'mode_choice_baseline')
    try:
        persons_baseline.at[i,'Chosen Mode TR'] = list(mode_choice_TR['mode'][(mode_choice_TR['person']==row[0])&(mode_choice_TR['time']>=persons_baseline.at[i,'Planned Depart Time TR']-1)])[0]
    except:
        print('Warning', row, 'mode_choice_TR')

        
zipcode = gpd.read_file('/Users/cpoliziani/Downloads/TAZ to ZIP/ZCTA2010.shp')
persons_baseline = addGeometryIdToDataFrame(persons_baseline, zipcode, 'X Activity From TR', 'Y Activity From TR', 'ZIP Departure TR')
persons_baseline = addGeometryIdToDataFrame(persons_baseline, zipcode, 'X Activity To TR', 'Y Activity To TR', 'ZIP Arrival TR')
persons_baseline = addGeometryIdToDataFrame(persons_baseline, zipcode, 'X Activity From Baseline', 'Y Activity From Baseline', 'ZIP Departure Baseline')
persons_baseline = addGeometryIdToDataFrame(persons_baseline, zipcode, 'X Activity To Baseline', 'Y Activity To Baseline', 'ZIP Arrival Baseline')


#Add Switch From Column
switch_type = []
for agencies_baseline, project in zip(persons_baseline['Bus agencies Used Baseline'],persons_baseline['Project Tried']):
    if len(agencies_baseline) == 0:
        switch_type.append('Switch from another mode')
    elif project == 'SF - Central Subway' and 'SF' in agencies_baseline:
        switch_type.append('Switch from same transit agency')
    elif project == 'AC - 1TEMPO' and 'AC' in agencies_baseline:
        switch_type.append('Switch from same transit agency')
    elif project == 'CA - Electrification Project' and 'ca' in agencies_baseline:
        switch_type.append('Switch from same transit agency')
    else:
        switch_type.append('Switch from another transit agency')

persons_baseline['Switch From'] = switch_type

persons_baseline.to_csv('/Users/cpoliziani/Downloads/person_database.csv')



In [None]:
switch_type = []
for agencies_baseline, project in zip(persons_baseline['Bus agencies Used Baseline'],persons_baseline['Project Tried']):
    if len(agencies_baseline) == 0:
        switch_type.append('Switch from another mode')
    elif project == 'SF - Central Subway' and 'SF' in agencies_baseline:
        switch_type.append('Switch from same transit agency')
    elif project == 'AC - 1TEMPO' and 'AC' in agencies_baseline:
        switch_type.append('Switch from same transit agency')
    elif project == 'CA - Electrification Project' and 'ca' in agencies_baseline:
        switch_type.append('Switch from same transit agency')
    else:
        switch_type.append('Switch from another transit agency')

persons_baseline['Switch From'] = switch_type

persons_baseline.to_csv('/Users/cpoliziani/Downloads/person_database.csv')


In [None]:
person_trips_baseline = pd.read_csv('/Users/cpoliziani/Downloads/tourTripsMerged_Baseline.csv')


In [None]:
person_trips_baseline['trip_num'].value_counts()

In [None]:
person_trips_baseline

In [None]:
persons_baseline

In [None]:
events = pd.read_csv('s3://beam-outputs/output/newyork/nyc-2-ridehail-200k__2022-09-29_11-44-14_esl/ITERS/it.0/0.events.csv.gz')




In [None]:
veh = events[['vehicle','type']].dropna()
veh = veh[(veh['vehicle'].str.contains('rideHail'))&(veh['type']=='PathTraversal')]
vei = []
for ve in veh['vehicle']:
    vei.append(ve[-10:])

In [None]:
np.unique(vei, return_counts=True)

In [None]:
pd.read_csv('/Users/cpoliziani/Downloads/0.skimsRidehail (1).csv.gz')

In [None]:

# for row in PtoPTss_TR_CA_dict.keys():
#     i+=1
#     if i%1000==0:
#         print(i)
#     try:
#         persons_baseline.at[i,'person'] = row[0]
#         persons_baseline.at[i,'plan_index'] = row[1]
#         persons_baseline.at[i,'project tried'] = 'CA - Electrification Project'
#         persons_baseline.at[i,'TR_length'] = PtoPTss_TR_dict[row][0]
#         persons_baseline.at[i,'TR_duration'] = PtoPTss_TR_dict[row][1]
#         persons_baseline.at[i,'baseline_length'] = PtoPTss_baseline_dict[row][0]
#         persons_baseline.at[i,'baseline_duration'] = PtoPTss_baseline_dict[row][1]
#         persons_baseline.at[i,'diff_length'] = persons_baseline.at[i,'TR_length']-persons_baseline.at[i,'baseline_length']
#         persons_baseline.at[i,'diff_duration'] = persons_baseline.at[i,'TR_duration']-persons_baseline.at[i,'baseline_duration']
#         persons_baseline.at[i,'x_activity_TR'] = list(plans_TR['activityLocationX'][(plans_TR['personId']==row[0])&(plans_TR['planElementIndex']==row[1]-1)])[0]
#         persons_baseline.at[i,'y_activity_TR'] = list(plans_TR['activityLocationY'][(plans_TR['personId']==row[0])&(plans_TR['planElementIndex']==row[1]-1)])[0]
#         persons_baseline.at[i,'mode_baseline'] = list(plans_baseline['legMode'][(plans_baseline['personId']==row[0])&(plans_baseline['planElementIndex']==row[1])])[0]
#     except:
#         persons_baseline.at[i,'x_activity_TR'] = list(plans_TR['activityLocationX'][(plans_TR['personId']==row[0])&(plans_TR['planElementIndex']==row[1]-1)])[0]
#         persons_baseline.at[i,'y_activity_TR'] = list(plans_TR['activityLocationY'][(plans_TR['personId']==row[0])&(plans_TR['planElementIndex']==row[1]-1)])[0]
#         persons_baseline.at[i,'mode_baseline'] = list(plans_baseline['legMode'][(plans_baseline['personId']==row[0])&(plans_baseline['planElementIndex']==row[1])])[0]
#         persons_baseline.at[i,'person'] = row[0]
#         persons_baseline.at[i,'plan_index'] = row[1]
#         persons_baseline.at[i,'project tried'] = 'CA - Electrification Project'
#         print('WARNING:',row[0], row[1]) 


# for row in PtoPTss_TR_AC_dict.keys():
#     i+=1
#     if i%1000==0:
#         print(i)
#     try:
#         persons_baseline.at[i,'person'] = row[0]
#         persons_baseline.at[i,'plan_index'] = row[1]
#         persons_baseline.at[i,'project tried'] = 'AC - 1 Tempo BRT Line'
#         persons_baseline.at[i,'TR_length'] = PtoPTss_TR_dict[row][0]
#         persons_baseline.at[i,'TR_duration'] = PtoPTss_TR_dict[row][1]
#         persons_baseline.at[i,'baseline_length'] = PtoPTss_baseline_dict[row][0]
#         persons_baseline.at[i,'baseline_duration'] = PtoPTss_baseline_dict[row][1]
#         persons_baseline.at[i,'diff_length'] = persons_baseline.at[i,'TR_length']-persons_baseline.at[i,'baseline_length']
#         persons_baseline.at[i,'diff_duration'] = persons_baseline.at[i,'TR_duration']-persons_baseline.at[i,'baseline_duration']
#         persons_baseline.at[i,'x_activity_TR'] = list(plans_TR['activityLocationX'][(plans_TR['personId']==row[0])&(plans_TR['planElementIndex']==row[1]-1)])[0]
#         persons_baseline.at[i,'y_activity_TR'] = list(plans_TR['activityLocationY'][(plans_TR['personId']==row[0])&(plans_TR['planElementIndex']==row[1]-1)])[0]
#         persons_baseline.at[i,'mode_baseline'] = list(plans_baseline['legMode'][(plans_baseline['personId']==row[0])&(plans_baseline['planElementIndex']==row[1])])[0]
#     except:
#         persons_baseline.at[i,'x_activity_TR'] = list(plans_TR['activityLocationX'][(plans_TR['personId']==row[0])&(plans_TR['planElementIndex']==row[1]-1)])[0]
#         persons_baseline.at[i,'y_activity_TR'] = list(plans_TR['activityLocationY'][(plans_TR['personId']==row[0])&(plans_TR['planElementIndex']==row[1]-1)])[0]
#         persons_baseline.at[i,'mode_baseline'] = list(plans_baseline['legMode'][(plans_baseline['personId']==row[0])&(plans_baseline['planElementIndex']==row[1])])[0]
#         persons_baseline.at[i,'person'] = row[0]
#         persons_baseline.at[i,'plan_index'] = row[1]
#         persons_baseline.at[i,'project tried'] = 'AC - 1 Tempo BRT Line'
#         print('WARNING:',row[0], row[1])




##############
# trips_TR = pd.concat([line_CA_trips_TR['trip_id'],line_CS_trips_TR['trip_id'],line_1T_trips_TR['trip_id']])
# for vehicle, person, plan_index, vehicle2 in zip(PtoPTss['vehicleID'],PtoPTss['personID'],PtoPTss['planIndex'],PtoPTss['vehicle2'] ):
#     if int(vehicle.split(':')[1]) in list(trips_TR):
#         i+=1
#         if i%1000==0:
#             print(i)
#         try:

#             persons_baseline.at[i,'person'] = person
#             persons_baseline.at[i,'plan_index'] = plan_index
#             persons_baseline.at[i,'mode_baseline'] = list(plans_baseline['legMode'][(plans_baseline['personId']==person)&(plans_baseline['planElementIndex']==plan_index)])[0]
#             persons_baseline.at[i,'x_activity_TR'] = list(plans_TR['activityLocationX'][(plans_TR['personId']==person)&(plans_TR['planElementIndex']==plan_index-1)])[0]
#             persons_baseline.at[i,'y_activity_TR'] = list(plans_TR['activityLocationY'][(plans_TR['personId']==person)&(plans_TR['planElementIndex']==plan_index-1)])[0]
#             persons_baseline.at[i,'vehicle_now'] = vehicle
#             if vehicle2 == 'SF':
#                 persons_baseline.at[i,'project'] = 'CS'
#             if vehicle2 == 'Ca':
#                 persons_baseline.at[i,'project'] = 'CA'
#             if vehicle2 == 'AC':
#                 persons_baseline.at[i,'project'] = '1T'

#             persons_baseline.at[i,'vehicle_baseline'] = list(PtoPTss_baseline['vehicleID'][(PtoPTss_baseline['personID']==person)&(PtoPTss_baseline['planIndex']==plan_index)])[0]
#             persons_baseline.at[i,'agency_baseline'] = list(PtoPTss_baseline['vehicle2'][(PtoPTss_baseline['personID']==person)&(PtoPTss_baseline['planIndex']==plan_index)])[0]
#             persons_baseline.at[i,'length_baseline'] = list(PtoPTss_baseline['length'][(PtoPTss_baseline['personID']==person)&(PtoPTss_baseline['planIndex']==plan_index)])[0]
#             persons_baseline.at[i,'length_TR'] = list(PtoPTss_TR['length'][(PtoPTss_baseline['personID']==person)&(PtoPTss_baseline['planIndex']==plan_index)])[0]
#             persons_baseline.at[i,'duration_baseline'] = list(PtoPTss_baseline['duration'][(PtoPTss_baseline['personID']==person)&(PtoPTss_baseline['planIndex']==plan_index)])[0]
#             persons_baseline.at[i,'duration_TR'] = list(PtoPTss_TR['duration'][(PtoPTss_baseline['personID']==person)&(PtoPTss_baseline['planIndex']==plan_index)])[0]
#             persons_baseline.at[i,'speed_baseline'] = persons_baseline.loc[i,'length_baseline']/persons_baseline.loc[i,'duration_baseline']
#             persons_baseline.at[i,'speed_TR'] = persons_baseline.loc[i,'length_TR']/persons_baseline.loc[i,'duration_TR']
#             persons_baseline.at[i,'diff_length'] = persons_baseline.loc[i,'length_TR']-persons_baseline.loc[i,'length_baseline'] 
#             persons_baseline.at[i,'diff_duration'] = persons_baseline.loc[i,'duration_TR']-persons_baseline.loc[i,'duration_baseline'] 
#             persons_baseline.at[i,'diff_speed'] = persons_baseline.loc[i,'speed_TR']-persons_baseline.loc[i,'speed_baseline'] 
#         except:
#             persons_baseline.at[i,'person'] = person
#             persons_baseline.at[i,'plan_index'] = plan_index
#             persons_baseline.at[i,'vehicle_now'] = vehicle
#             if vehicle2 == 'SF':
#                 persons_baseline.at[i,'project'] = 'CS'
#             if vehicle2 == 'Ca':
#                 persons_baseline.at[i,'project'] = 'CA'
#             if vehicle2 == 'AC':
#                 persons_baseline.at[i,'project'] = '1T'

#             print('PtoPTss_baseline not found person and plan index',person,plan_index, vehicle)
#     print('Warning')




# persons_baseline = pd.DataFrame(['person','plan_index','mode_baseline','vehicle_baseline','agency_baseline',
#                                  'x_activity_TR','y_activity_TR','vehicle_now'])
# i = 0
# PtoPTss=pd.concat(PtoPTss_AC_TR,PtoPTss_SF_TR,PtoPTss_CA_TR)
# for vehicle, person, plan_index in zip(PtoPTss['vehicleID'],PtoPTss['personID'],PtoPTss['planIndex'] ):
#     if int(vehicle.split(':')[1]) in list(line_1T_trips_TR):
#         i+=1
#         persons_baseline.loc[i,'person'] = person
#         persons_baseline.loc[i,'plan_index'] = plan_index
#         persons_baseline.loc[i,'mode_baseline'] = list(plans_baseline['legMode'][(plans_baseline['personId']==person)&(plans_baseline['planElementIndex']==plan_index)])[0]
#         persons_baseline.loc[i,'x_activity_TR'] = list(plans_TR['activityLocationX'][(plans_TR['personId']==person)&(plans_TR['planElementIndex']==plan_index-1)])[0]
#         persons_baseline.loc[i,'y_activity_TR'] = list(plans_TR['activityLocationY'][(plans_TR['personId']==person)&(plans_TR['planElementIndex']==plan_index-1)])[0]
#         persons_baseline.loc[i,'vehicle_now'] = '1T'
#         try:
#             persons_baseline.loc[i,'vehicle_baseline'] = list(PtoPTss_baseline['vehicleID'][(PtoPTss_baseline['personID']==person)&(PtoPTss_baseline['planIndex']==plan_index)])[0]
#             persons_baseline.loc[i,'agency_baseline'] = list(PtoPTss_baseline['vehicle2'][(PtoPTss_baseline['personID']==person)&(PtoPTss_baseline['planIndex']==plan_index)])[0]
#             persons_baseline.loc[i,'length_baseline'] = list(PtoPTss_baseline['length'][(PtoPTss_baseline['personID']==person)&(PtoPTss_baseline['planIndex']==plan_index)])[0]
#             persons_baseline.loc[i,'length_TR'] = list(PtoPTss_TR['length'][(PtoPTss_baseline['personID']==person)&(PtoPTss_baseline['planIndex']==plan_index)])[0]
#             persons_baseline.loc[i,'duration_baseline'] = list(PtoPTss_baseline['duration'][(PtoPTss_baseline['personID']==person)&(PtoPTss_baseline['planIndex']==plan_index)])[0]
#             persons_baseline.loc[i,'duration_TR'] = list(PtoPTss_TR['duration'][(PtoPTss_baseline['personID']==person)&(PtoPTss_baseline['planIndex']==plan_index)])[0]
#             persons_baseline.loc[i,'speed_baseline'] = persons_baseline.loc[i,'length_baseline']/persons_baseline.loc[i,'duration_baseline']
#             persons_baseline.loc[i,'speed_TR'] = persons_baseline.loc[i,'length_TR']/persons_baseline.loc[i,'duration_TR']
#             persons_baseline.loc[i,'diff_length'] = persons_baseline.loc[i,'length_TR']-persons_baseline.loc[i,'length_baseline'] 
#             persons_baseline.loc[i,'diff_duration'] = persons_baseline.loc[i,'duration_TR']-persons_baseline.loc[i,'duration_baseline'] 
#             persons_baseline.loc[i,'speed_length'] = persons_baseline.loc[i,'speed_TR']-persons_baseline.loc[i,'speed_baseline'] 
#         except:
#             persons_baseline.loc[i,'vehicle_baseline'] = None
#             persons_baseline.loc[i,'agency_baseline'] = None
#             print('PtoPTss_baseline not found person and plan index',person,plan_index, vehicle)


# persons_baseline.to_csv('/Users/cpoliziani/Downloads/person_database.csv')

# PtoPTss_baseline


# persons_baseline = pd.DataFrame()
# i = 0
# PtoPTss=pd.concat([PtoPTss_AC_TR,PtoPTss_SF_TR,PtoPTss_CA_TR])
# for vehicle, person, plan_index, vehicle2 in zip(PtoPTss['vehicleID'],PtoPTss['personID'],PtoPTss['planIndex'],PtoPTss['vehicle2'] ):
#     if int(vehicle.split(':')[1]) in list(line_1T_trips_TR):
#         i+=1

#             persons_baseline.at[i,'person'] = person
#             persons_baseline.at[i,'plan_index'] = plan_index
#             persons_baseline.at[i,'mode_baseline'] = list(plans_baseline['legMode'][(plans_baseline['personId']==person)&(plans_baseline['planElementIndex']==plan_index)])[0]
#             persons_baseline.at[i,'x_activity_TR'] = list(plans_TR['activityLocationX'][(plans_TR['personId']==person)&(plans_TR['planElementIndex']==plan_index-1)])[0]
#             persons_baseline.at[i,'y_activity_TR'] = list(plans_TR['activityLocationY'][(plans_TR['personId']==person)&(plans_TR['planElementIndex']==plan_index-1)])[0]
#             persons_baseline.at[i,'vehicle_now'] = vehicle
#             if vehicle2 == 'SF':
#                 persons_baseline.at[i,'project'] = 'CS'
#             if vehicle2 == 'Ca':
#                 persons_baseline.at[i,'project'] = 'CA'
#             if vehicle2 == 'AC':
#                 persons_baseline.at[i,'project'] = '1T'
#         try:

#             persons_baseline.at[i,'vehicle_baseline'] = list(PtoPTss_baseline['vehicleID'][(PtoPTss_baseline['personID']==person)&(PtoPTss_baseline['planIndex']==plan_index)])[0]
#             persons_baseline.at[i,'agency_baseline'] = list(PtoPTss_baseline['vehicle2'][(PtoPTss_baseline['personID']==person)&(PtoPTss_baseline['planIndex']==plan_index)])[0]
#             persons_baseline.at[i,'length_baseline'] = list(PtoPTss_baseline['length'][(PtoPTss_baseline['personID']==person)&(PtoPTss_baseline['planIndex']==plan_index)])[0]
#             persons_baseline.at[i,'length_TR'] = list(PtoPTss_TR['length'][(PtoPTss_baseline['personID']==person)&(PtoPTss_baseline['planIndex']==plan_index)])[0]
#             persons_baseline.at[i,'duration_baseline'] = list(PtoPTss_baseline['duration'][(PtoPTss_baseline['personID']==person)&(PtoPTss_baseline['planIndex']==plan_index)])[0]
#             persons_baseline.at[i,'duration_TR'] = list(PtoPTss_TR['duration'][(PtoPTss_baseline['personID']==person)&(PtoPTss_baseline['planIndex']==plan_index)])[0]
#             persons_baseline.at[i,'speed_baseline'] = persons_baseline.loc[i,'length_baseline']/persons_baseline.loc[i,'duration_baseline']
#             persons_baseline.at[i,'speed_TR'] = persons_baseline.loc[i,'length_TR']/persons_baseline.loc[i,'duration_TR']
#             persons_baseline.at[i,'diff_length'] = persons_baseline.loc[i,'length_TR']-persons_baseline.loc[i,'length_baseline'] 
#             persons_baseline.at[i,'diff_duration'] = persons_baseline.loc[i,'duration_TR']-persons_baseline.loc[i,'duration_baseline'] 
#             persons_baseline.at[i,'diff_speed'] = persons_baseline.loc[i,'speed_TR']-persons_baseline.loc[i,'speed_baseline'] 
#         except:
#             persons_baseline.at[i,'person'] = person
#             persons_baseline.at[i,'plan_index'] = plan_index
#             persons_baseline.at[i,'vehicle_now'] = vehicle
#             if vehicle2 == 'SF':
#                 persons_baseline.at[i,'project'] = 'CS'
#             if vehicle2 == 'Ca':
#                 persons_baseline.at[i,'project'] = 'CA'
#             if vehicle2 == 'AC':
#                 persons_baseline.at[i,'project'] = '1T'

#             print('PtoPTss_baseline not found person and plan index',person,plan_index, vehicle)


# persons_baseline.to_csv('/Users/cpoliziani/Downloads/person_database.csv')


In [None]:
LIRR_capacities_apr2020 = pd.read_csv('/Users/cpoliziani/Downloads/Data/EPI/Capacities/apr2020LIRR.csv')
LIRR_capacities_aug2020 = pd.read_csv('/Users/cpoliziani/Downloads/Data/EPI/Capacities/aug2020LIRR.csv')
LIRR_capacities_aug2021 = pd.read_csv('/Users/cpoliziani/Downloads/Data/EPI/Capacities/aug2021LIRR.csv')
LIRR_capacities_jan2022 = pd.read_csv('/Users/cpoliziani/Downloads/Data/EPI/Capacities/jan2022LIRR.csv')
LIRR_capacities_may2022 = pd.read_csv('/Users/cpoliziani/Downloads/Data/EPI/Capacities/apr2022LIRR.csv')

MNR_capacities_apr2020 = pd.read_csv('/Users/cpoliziani/Downloads/Data/EPI/Capacities/apr2020MNR.csv')
MNR_capacities_aug2020 = pd.read_csv('/Users/cpoliziani/Downloads/Data/EPI/Capacities/aug2020MNR.csv')
MNR_capacities_aug2021 = pd.read_csv('/Users/cpoliziani/Downloads/Data/EPI/Capacities/aug2021MNR.csv')
MNR_capacities_jan2022 = pd.read_csv('/Users/cpoliziani/Downloads/Data/EPI/Capacities/jan2022MNR.csv')
MNR_capacities_may2022 = pd.read_csv('/Users/cpoliziani/Downloads/Data/EPI/Capacities/apr2022MNR.csv')

# LIRR_capacities_apr2020 = pd.read_csv('/Users/cpoliziani/Downloads/LIRR_capacities.csv')
# MNR_capacities = MNR_capacities.groupby(['Train']).apply(lambda x: [list(x['Total Seat'])]).to_dict()
# LIRR_capacities = LIRR_capacities.groupby(['Train']).apply(lambda x: [list(x['Total Seats'])]).to_dict()


capacities_data = [LIRR_capacities_apr2020,
        LIRR_capacities_aug2020,
        LIRR_capacities_aug2021,
        LIRR_capacities_jan2022,
        LIRR_capacities_may2022,
        MNR_capacities_apr2020,
        MNR_capacities_aug2020,
        MNR_capacities_aug2021,
        MNR_capacities_jan2022,
        MNR_capacities_may2022,
        ]

for capacity_data in capacities_data:
    trains = []
    for train in capacity_data['Train']:
        try:
            trains.append(int(train))
        except:
            trains.append(train)
    capacity_data['Train'] = trains
    


LIRR_capacities_apr2020 = LIRR_capacities_apr2020.groupby(['Train']).apply(lambda x: [list(x['Total Seats'])]).to_dict()
LIRR_capacities_aug2020 = LIRR_capacities_aug2020.groupby(['Train']).apply(lambda x: [list(x['Total Seats'])]).to_dict()
LIRR_capacities_aug2021 = LIRR_capacities_aug2021.groupby(['Train']).apply(lambda x: [list(x['Total Seats'])]).to_dict()
LIRR_capacities_jan2022 = LIRR_capacities_jan2022.groupby(['Train']).apply(lambda x: [list(x['Total Seats'])]).to_dict()
LIRR_capacities_may2022 = LIRR_capacities_may2022.groupby(['Train']).apply(lambda x: [list(x['Total Seats'])]).to_dict()

MNR_capacities_apr2020 = MNR_capacities_apr2020.groupby(['Train']).apply(lambda x: [list(x['Total Seat'])]).to_dict()
MNR_capacities_aug2020 = MNR_capacities_aug2020.groupby(['Train']).apply(lambda x: [list(x['Total Seat'])]).to_dict()
MNR_capacities_aug2021 = MNR_capacities_aug2021.groupby(['Train']).apply(lambda x: [list(x['Total Seat'])]).to_dict()
MNR_capacities_jan2022 = MNR_capacities_jan2022.groupby(['Train']).apply(lambda x: [list(x['Total Seat'])]).to_dict()
MNR_capacities_may2022 = MNR_capacities_may2022.groupby(['Train']).apply(lambda x: [list(x['Total Seat'])]).to_dict()


LIRR_trips_apr2020 = pd.read_csv('/Users/cpoliziani/Downloads/Data/EPI/Capacities/r5-prod-1april2020/Long_Island_Rail_20200318/trips.txt')
LIRR_trips_aug2020 = pd.read_csv('/Users/cpoliziani/Downloads/Data/EPI/Capacities/r5-prod-5august2020/Long_Island_Rail_20200629/trips.txt')
LIRR_trips_aug2021 = pd.read_csv('/Users/cpoliziani/Downloads/Data/EPI/Capacities/r5-prod-4august2021/Long_Island_Rail_20210726/trips.txt')
LIRR_trips_jan2022 = pd.read_csv('/Users/cpoliziani/Downloads/Data/EPI/Capacities/r5-prod-5january2022/Long_Island_Rail_20211216/trips.txt')
LIRR_trips_may2022 = pd.read_csv('/Users/cpoliziani/Downloads/Data/EPI/Capacities/r5-prod-11may2022/Long_Island_Rail_20220430/trips.txt')

LIRR_trips_apr2020['agency_id']='LI'
LIRR_trips_aug2020['agency_id']='LI'
LIRR_trips_aug2021['agency_id']='LI'
LIRR_trips_jan2022['agency_id']='LI'
LIRR_trips_may2022['agency_id']='LI'

MNR_trips_apr2020 = pd.read_csv('/Users/cpoliziani/Downloads/Data/EPI/Capacities/r5-prod-1april2020/Metro-North_Railroad_20200325/trips.txt')
MNR_trips_aug2020 = pd.read_csv('/Users/cpoliziani/Downloads/Data/EPI/Capacities/r5-prod-5august2020/Metro-North_Railroad_20200731/trips.txt')
MNR_trips_aug2021 = pd.read_csv('/Users/cpoliziani/Downloads/Data/EPI/Capacities/r5-prod-4august2021/Metro-North_Railroad_20210721/trips.txt')
MNR_trips_jan2022 = pd.read_csv('/Users/cpoliziani/Downloads/Data/EPI/Capacities/r5-prod-5january2022/Metro-North_Railroad_20211222/trips.txt')
MNR_trips_may2022 = pd.read_csv('/Users/cpoliziani/Downloads/Data/EPI/Capacities/r5-prod-11may2022/Metro-North_Railroad_20220429/trips.txt')

MNR_trips_apr2020['agency_id']='1'
MNR_trips_aug2020['agency_id']='1'
MNR_trips_aug2021['agency_id']='1'
MNR_trips_jan2022['agency_id']='1'
MNR_trips_may2022['agency_id']='1'

# MNR_routes_apr2020 = pd.read_csv('/Users/cpoliziani/Downloads/Data/EPI/Capacities/r5-prod-1april2020/Metro-North_Railroad_20200325/routes.txt')
# MNR_routes_aug2020 = pd.read_csv('/Users/cpoliziani/Downloads/Data/EPI/Capacities/r5-prod-5august2020/Metro-North_Railroad_20200731/routes.txt')
# MNR_routes_aug2021 = pd.read_csv('/Users/cpoliziani/Downloads/Data/EPI/Capacities/r5-prod-4august2021/Metro-North_Railroad_20210721/routes.txt')
# MNR_routes_jan2022 = pd.read_csv('/Users/cpoliziani/Downloads/Data/EPI/Capacities/r5-prod-5january2022/Metro-North_Railroad_20211222/routes.txt')
# MNR_routes_may2022 = pd.read_csv('/Users/cpoliziani/Downloads/Data/EPI/Capacities/r5-prod-11may2022/Metro-North_Railroad_20220429/routes.txt')

# trips_data_MNR = [MNR_trips_apr2020,
#         MNR_trips_aug2020,
#         MNR_trips_aug2021,
#         MNR_trips_jan2022,
#         MNR_trips_may2022,
#         ]

# routes_data_MNR = [MNR_routes_apr2020,
#         MNR_routes_aug2020,
#         MNR_routes_aug2021,
#         MNR_routes_jan2022,
#         MNR_routes_may2022,
#         ]

# for trip_data_MNR, route_data_MNR, i  in zip(trips_data_MNR,routes_data_MNR, range(len(trips_data))):
#     agencies =[]
#     for route in trip_data_MNR['route_id']:
#         agencies.append(list(route_data_MNR['agency_id'][route_data_MNR['route_id']==route])[0])
#     trip_data_MNR['agency_id']=agencies


# MNR_trips_apr2020 = trips_data_MNR[0]
# MNR_trips_aug2020 = trips_data_MNR[1]
# MNR_trips_aug2021 = trips_data_MNR[2]
# MNR_trips_jan2022 = trips_data_MNR[3]
# MNR_trips_may2022 = trips_data_MNR[4]

trips_data = [LIRR_trips_apr2020,
        LIRR_trips_aug2020,
        LIRR_trips_aug2021,
        LIRR_trips_jan2022,
        LIRR_trips_may2022,
        MNR_trips_apr2020,
        MNR_trips_aug2020,
        MNR_trips_aug2021,
        MNR_trips_jan2022,
        MNR_trips_may2022,
        ]

capacities_data = [LIRR_capacities_apr2020,
        LIRR_capacities_aug2020,
        LIRR_capacities_aug2021,
        LIRR_capacities_jan2022,
        LIRR_capacities_may2022,
        MNR_capacities_apr2020,
        MNR_capacities_aug2020,
        MNR_capacities_aug2021,
        MNR_capacities_jan2022,
        MNR_capacities_may2022,
        ]

for trip_data, capacity_data in zip(trips_data, capacities_data):

    capacities = []
    wrong_ids = []
    print('number of trip', len(trip_data),'number of capacities', len(capacity_data))
    print('number of unique trip', len(np.unique(trip_data['trip_short_name'])))
    for trip_short_name in trip_data['trip_short_name']:
        try:
            capacities.append(capacity_data[trip_short_name][0][0])
        except:
            try:
                capacities.append(capacity_data[float(trip_short_name)][0][0])
            except:
                wrong_ids.append(trip_short_name)
                capacities.append(np.nan)
    trip_data['capacity'] = capacities
    print('Warning!! Not found',len(wrong_ids))
#     print(trip_data)
    print('######################')



In [None]:
for trip_data in trips_data:
    max_cap = max(list(trip_data['capacity'].dropna()))
    print(max_cap)
    trip_data['capacity'] = trip_data['capacity'].fillna(max_cap)


In [None]:


trips_data_apr2020 = [trips_data[0],trips_data[5]]
trips_data_aug2020 = [trips_data[1],trips_data[6]]
trips_data_aug2021 = [trips_data[2],trips_data[7]]
trips_data_jan2022 = [trips_data[3],trips_data[8]]
trips_data_may2022 = [trips_data[4],trips_data[9]]

transitVehicleTypesbyTrip = pd.DataFrame(columns = ['agencyId',
                                           'routeId',
                                           'tripId',
                                           'capacity',
                                            'vehicleTypeId',
                                          ])
i=0      
for trip_data, j in zip(trips_data_apr2020,[0,1]):
    for agencyID, tripID, routeID, capacity in zip(trip_data['agency_id'], trip_data['trip_id'],trip_data['route_id'],trip_data['capacity']):
        i+=1
        if i%5000==0:
            print(i)
        transitVehicleTypesbyTrip.at[i,'agencyId'] = agencyID
        transitVehicleTypesbyTrip.at[i,'routeId'] = routeID
        if j == 0:
            transitVehicleTypesbyTrip.at[i,'tripId'] = 'Long_Island_Rail_20200318:'+str(tripID)
        elif j == 1:
            transitVehicleTypesbyTrip.at[i,'tripId'] = 'Metro-North_Railroad_20200325:'+str(tripID)
        transitVehicleTypesbyTrip.at[i,'capacity'] = capacity
        transitVehicleTypesbyTrip.at[i,'vehicleTypeId'] = 'RAIL-DEFAULT'

transitVehicleTypesbyTrip.to_csv('/Users/cpoliziani/Downloads/Data/EPI/Capacities/transitVehicleTypesbyTrip_apr2020.csv')


transitVehicleTypesbyTrip = pd.DataFrame(columns = ['agencyId',
                                           'routeId',
                                           'tripId',
                                           'capacity',
                                            'vehicleTypeId',
                                          ])
i=0      
for trip_data, j in zip(trips_data_aug2020,[0,1]):
    for agencyID, tripID, routeID, capacity in zip(trip_data['agency_id'], trip_data['trip_id'],trip_data['route_id'],trip_data['capacity']):
        i+=1
        if i%5000==0:
            print(i)
        transitVehicleTypesbyTrip.at[i,'agencyId'] = agencyID
        transitVehicleTypesbyTrip.at[i,'routeId'] = routeID
        if j == 0:
            transitVehicleTypesbyTrip.at[i,'tripId'] = 'Long_Island_Rail_20200629:'+str(tripID)
        elif j == 1:
            transitVehicleTypesbyTrip.at[i,'tripId'] = 'Metro-North_Railroad_20200731:'+str(tripID)
        transitVehicleTypesbyTrip.at[i,'capacity'] = capacity
        transitVehicleTypesbyTrip.at[i,'vehicleTypeId'] = 'RAIL-DEFAULT'

transitVehicleTypesbyTrip.to_csv('/Users/cpoliziani/Downloads/Data/EPI/Capacities/transitVehicleTypesbyTrip_aug2020.csv')



transitVehicleTypesbyTrip = pd.DataFrame(columns = ['agencyId',
                                           'routeId',
                                           'tripId',
                                           'capacity',
                                            'vehicleTypeId',
                                          ])
i=0      
for trip_data, j in zip(trips_data_aug2021,[0,1]):
    for agencyID, tripID, routeID, capacity in zip(trip_data['agency_id'], trip_data['trip_id'],trip_data['route_id'],trip_data['capacity']):
        i+=1
        if i%5000==0:
            print(i)
        transitVehicleTypesbyTrip.at[i,'agencyId'] = agencyID
        transitVehicleTypesbyTrip.at[i,'routeId'] = routeID
        if j == 0:
            transitVehicleTypesbyTrip.at[i,'tripId'] = 'Long_Island_Rail_20210726:'+str(tripID)
        elif j == 1:
            transitVehicleTypesbyTrip.at[i,'tripId'] = 'Metro-North_Railroad_20210721:'+str(tripID)
        transitVehicleTypesbyTrip.at[i,'capacity'] = capacity
        transitVehicleTypesbyTrip.at[i,'vehicleTypeId'] = 'RAIL-DEFAULT'

transitVehicleTypesbyTrip.to_csv('/Users/cpoliziani/Downloads/Data/EPI/Capacities/transitVehicleTypesbyTrip_aug2021.csv')



transitVehicleTypesbyTrip = pd.DataFrame(columns = ['agencyId',
                                           'routeId',
                                           'tripId',
                                           'capacity',
                                            'vehicleTypeId',
                                          ])
i=0      
for trip_data, j in zip(trips_data_jan2022,[0,1]):
    for agencyID, tripID, routeID, capacity in zip(trip_data['agency_id'], trip_data['trip_id'],trip_data['route_id'],trip_data['capacity']):
        i+=1
        if i%5000==0:
            print(i)
        transitVehicleTypesbyTrip.at[i,'agencyId'] = agencyID
        transitVehicleTypesbyTrip.at[i,'routeId'] = routeID
        if j == 0:
            transitVehicleTypesbyTrip.at[i,'tripId'] = 'Long_Island_Rail_20211216:'+str(tripID)
        elif j == 1:
            transitVehicleTypesbyTrip.at[i,'tripId'] = 'Metro-North_Railroad_20211222:'+str(tripID)
        transitVehicleTypesbyTrip.at[i,'capacity'] = capacity
        transitVehicleTypesbyTrip.at[i,'vehicleTypeId'] = 'RAIL-DEFAULT'

transitVehicleTypesbyTrip.to_csv('/Users/cpoliziani/Downloads/Data/EPI/Capacities/transitVehicleTypesbyTrip_jan2022.csv')



transitVehicleTypesbyTrip = pd.DataFrame(columns = ['agencyId',
                                           'routeId',
                                           'tripId',
                                            'capacity',
                                           'vehicleTypeId',
                                          ])
i=0      
for trip_data, j in zip(trips_data_may2022,[0,1]):
    for agencyID, tripID, routeID, capacity in zip(trip_data['agency_id'], trip_data['trip_id'],trip_data['route_id'],trip_data['capacity']):
        i+=1
        if i%5000==0:
            print(i)
        transitVehicleTypesbyTrip.at[i,'agencyId'] = agencyID
        transitVehicleTypesbyTrip.at[i,'routeId'] = routeID
        if j == 0:
            transitVehicleTypesbyTrip.at[i,'tripId'] = 'Long_Island_Rail_20220430:'+str(tripID)
        elif j == 1:
            transitVehicleTypesbyTrip.at[i,'tripId'] = 'Metro-North_Railroad_20220429:'+str(tripID)
        transitVehicleTypesbyTrip.at[i,'capacity'] = capacity
        transitVehicleTypesbyTrip.at[i,'vehicleTypeId'] = 'RAIL-DEFAULT'

transitVehicleTypesbyTrip.to_csv('/Users/cpoliziani/Downloads/Data/EPI/Capacities/transitVehicleTypesbyTrip_may2022.csv')



In [None]:
trips_data_apr2020[1]

In [None]:
pd.read_csv('s3://beam-outputs/output/newyork/new-york-jan2022-0-of-10__2022-09-21_17-07-49_qdx/ITERS/it.10/10.events.csv.gz', nrows = 90000)




In [None]:
# #Imports
# import numpy as np
# import pandas as pd
# import geopandas as gpd
# import difflib
# import matplotlib.pyplot as plt
# import time

# def  processEvents(dataFilepath_sim, nrows):
#     PTs = []
#     PEVs = []
#     print('read', dataFilepath_sim)
#     for chunk in pd.read_csv(dataFilepath_sim, compression = 'gzip',chunksize=1500000, nrows = nrows):
#         if sum((chunk['type'] == 'PathTraversal')) > 0:
#             chunk['vehicle'] = chunk['vehicle'].astype(str)
#             PT = chunk.loc[(chunk['type'] == 'PathTraversal') & (chunk['length'] > 0)].dropna(how='all', axis=1)
#             PT['departureTime'] = PT['departureTime'].astype(int)
#             PT['arrivalTime'] = PT['arrivalTime'].astype(int)

#             PTs.append(PT[['driver', 'vehicle', 'mode', 'length', 'startX', 'startY', 'endX', 'endY', 'vehicleType',
#                            'arrivalTime', 'departureTime', 'primaryFuel', 'primaryFuelType', 'secondaryFuel',
#                            'secondaryFuelType', 'numPassengers', 'riders','time']])
#             print(chunk.type.value_counts())
#             PEV = chunk.loc[(chunk.type == "PersonEntersVehicle") &
#                             ~(chunk['person'].apply(str).str.contains('Agent').fillna(False)) &
#                             ~(chunk['vehicle'].str.contains('body').fillna(False)), :].dropna(how='all', axis=1)
#             if ~PEV.empty:
#                 PEV['time'] = PEV['time'].astype(int)
#                 PEVs.append(PEV)

#     Events_file_sim_PT = pd.concat(PTs)[['vehicle','time','endY','endX','startY','startX','mode']]
#     Events_file_sim_PE = pd.concat(PEVs)[['vehicle','time',]]
#     print(Events_file_sim_PE)

#     return Events_file_sim_PT, Events_file_sim_PE



# #Filter transit trips
# def filter_transit(Events_file_sim_PT,Events_file_sim_PE):
#     print(Events_file_sim_PT['mode'].value_counts())
#     Events_file_sim_PT = Events_file_sim_PT[(Events_file_sim_PT['mode'] == 'bus')|
#                                             (Events_file_sim_PT['mode'] == 'tram')|
#                                             (Events_file_sim_PT['mode'] == 'subway')|
#                                             (Events_file_sim_PT['mode'] == 'cable_car')|
#                                             (Events_file_sim_PT['mode'] == 'ferry')|
#                                             (Events_file_sim_PT['mode'] == 'rail')]
#     print(Events_file_sim_PT['mode'].value_counts())
#     Events_file_sim_PE = Events_file_sim_PE[Events_file_sim_PE['vehicle'].isin(Events_file_sim_PT['vehicle'])]
    
#     return Events_file_sim_PE

# def guess_agency(Events_file_sim_PE):

#     agencies = []
#     for vehicleID in Events_file_sim_PE['vehicle']:
#         agency = vehicleID.split(':')[0]
#         if agency == 'petalumatransit-petaluma-ca-us':
#             agencies.append('PE')
#         elif agency == 'westcat-ca-us':
#             agencies.append('WC')
#         elif agency == 'caltrain-ca-us':
#             agencies.append('CA')
#         elif agency == 'riovista-ca-us':
#             agencies.append('RV')
#         elif agency == 'unioncity-ca-us':
#             agencies.append('UC')
#         else:
#             if len(agency) == 2:
#                 agencies.append(agency)
#             elif agency == 'Caltrain':
#                 agencies.append('CA')
#             else:
#                 print('Warning, this agency is not recognized:', agency)
#     Events_file_sim_PE['agency'] = agencies

#     print(np.unique(agencies))
    
#     return Events_file_sim_PE

# def guess_route(Events_file_sim_PE, GTFS_filepaths):

#     GTFS_trip_files = {}

#     for GTFS_filepath, GTFS in zip(GTFS_filepaths,GTFSs):
#         GTFS_trip_files[GTFS] = pd.read_csv(GTFS_filepath+'trips.txt')

#     route_ids = []
#     total_routes = len(Events_file_sim_PE['vehicle'])
#     i = 0
#     time_start = time.time()
#     for vehicle, agency in zip(Events_file_sim_PE['vehicle'],Events_file_sim_PE['agency']):
#         i+=1
#         if i%10000 ==0:
#             print(i,'/',total_routes,'. Time = ', time.time()-time_start, '. Estimated remaining time:', (time.time()-time_start)/i*total_routes-(time.time()-time_start))
#         if agency == 'SM':
#             route_ids.append(agency+':'+str(list(GTFS_trip_files[agency]['route_id'][
#                 GTFS_trip_files[agency]['trip_id'].astype(str)==
#                      str(vehicle).split(':')[1]+'|'
#                      +str(vehicle).split(':')[2]+':'
#                      +str(vehicle).split(':')[3]+'|'
#                      +str(vehicle).split(':')[4]+':'
#                      +str(vehicle).split(':')[5]+':'
#                      +str(vehicle).split(':')[6]])[0]))
#         else:
# #             print(list(GTFS_trip_files[agency]['route_id'][GTFS_trip_files[agency]['trip_id'].astype(str)==str(vehicle.split(':')[1])])[0])
#             try:
#                 route_ids.append(agency+':'+str(list(GTFS_trip_files[agency]['route_id'][GTFS_trip_files[agency]['trip_id'].astype(str)==str(vehicle.split(':')[1])])[0]))
#             except:
#                 print('Warning, trip non found for vehicle', vehicle)
#                 route_ids.append('tripID not found')
#     #     elif agency == 'GG':
#     #         route_ids.append('GG:'+str(list(GTFS_trip_files[agency]['route_id'][GTFS_trip_files[agency]['trip_id']==vehicle.split(':')[1]])[0]))
#     #     elif agency == 'SF':
#     #         route_ids.append('SF:'+str(list(GTFS_trip_files[agency]['route_id'][GTFS_trip_files[agency]['trip_id']==vehicle.split(':')[1]])[0]))
#     Events_file_sim_PE['route_id'] = route_ids
    
#     return Events_file_sim_PE












# dataFilepath_sim = [
# #                 's3://beam-outputs/pilates-outputs/sfbay-baseline-20220801/beam/year-2018-iteration-5/ITERS/it.0/0.events.csv.gz',
# #                 's3://beam-outputs/pilates-outputs/sfbay-AC-SF-CA-20220801/beam/year-2018-iteration-5/ITERS/it.0/0.events.csv.gz',
# #                 's3://beam-outputs/pilates-outputs/sfbay-baseline-20220801/beam/year-2019-iteration-5/ITERS/it.0/0.events.csv.gz',
# #                 's3://beam-outputs/pilates-outputs/sfbay-AC-SF-CA-20220801/beam/year-2019-iteration-5/ITERS/it.0/0.events.csv.gz',
# #                 's3://beam-outputs/pilates-outputs/sfbay-baseline-20220801/beam/year-2020-iteration-5/ITERS/it.0/0.events.csv.gz',
# #                 's3://beam-outputs/pilates-outputs/sfbay-AC-SF-CA-20220801/beam/year-2020-iteration-5/ITERS/it.0/0.events.csv.gz',
# #                 's3://beam-outputs/pilates-outputs/sfbay-baseline-20220801/beam/year-2021-iteration-5/ITERS/it.0/0.events.csv.gz',
# #                 's3://beam-outputs/pilates-outputs/sfbay-AC-SF-CA-20220801/beam/year-2021-iteration-5/ITERS/it.0/0.events.csv.gz',
#                 's3://beam-outputs/pilates-outputs/sfbay-baseline-20220816/beam/year-2018-iteration-5/ITERS/it.0/0.events.csv.gz',
#                 's3://beam-outputs/pilates-outputs/sfbay-TR-20220812/beam/year-2018-iteration-5/ITERS/it.0/0.events.csv.gz',
#                     's3://beam-outputs/pilates-outputs/sfbay-baseline-20220816/beam/year-2019-iteration-5/ITERS/it.0/0.events.csv.gz',
#                 's3://beam-outputs/pilates-outputs/sfbay-TR-20220812/beam/year-2019-iteration-5/ITERS/it.0/0.events.csv.gz',
#                     's3://beam-outputs/pilates-outputs/sfbay-baseline-20220816/beam/year-2020-iteration-5/ITERS/it.0/0.events.csv.gz',
#                 's3://beam-outputs/pilates-outputs/sfbay-TR-20220812/beam/year-2020-iteration-5/ITERS/it.0/0.events.csv.gz',
#                     's3://beam-outputs/pilates-outputs/sfbay-baseline-20220816/beam/year-2021-iteration-5/ITERS/it.0/0.events.csv.gz',
#                 's3://beam-outputs/pilates-outputs/sfbay-TR-20220812/beam/year-2021-iteration-5/ITERS/it.0/0.events.csv.gz',
#             ]

                    
# years = ['2018','2019','2020','2021']

# names = []
# for year in years:
#     names.append('Baseline'+year)
#     names.append('Future'+year)

# output_filepath = '/Users/cpoliziani/Downloads/Transit Rich/Results/'

# GTFS_filepath = '/Users/cpoliziani/Downloads/Data/TR/GTFS/r5-simple-no-local/'
# GTFS_filepath2 = '/Users/cpoliziani/Downloads/Data/TR/GTFS/r5-simple-no-local-TR/'

# RA_output = 'agency_ridershipNew3.csv'
# RR_output = 'route_ridershipNew3.csv'

# GTFSs = ['3D','AC','AM','AY','BA','CA','CC','CE','CM','CT','DE','EM','GG','HF','MA','PE','RV', 'SB',
#         'SC','SF','SM','SO','SR','ST','TD','UC','VC','VN','VT','WC','WH']

# GTFS_3D = GTFS_filepath+'3D/'
# GTFS_AC = GTFS_filepath+'AC/'
# GTFS_AM = GTFS_filepath+'AM/'
# GTFS_AY = GTFS_filepath+'AY/'
# GTFS_BA = GTFS_filepath+'BA/'
# GTFS_Caltrain = GTFS_filepath+'Caltrain/'
# GTFS_CC = GTFS_filepath+'CC/'
# GTFS_CE = GTFS_filepath+'CE/'
# GTFS_CM = GTFS_filepath+'CM/'
# GTFS_CT = GTFS_filepath+'CT/'
# GTFS_DE = GTFS_filepath+'DE/'
# GTFS_EM = GTFS_filepath+'EM/'
# GTFS_GG = GTFS_filepath+'GG/'
# GTFS_HF = GTFS_filepath+'HF/'
# GTFS_MA = GTFS_filepath+'MA/'
# GTFS_PE = GTFS_filepath+'PE/'
# GTFS_RV = GTFS_filepath+'RV/'
# GTFS_SB = GTFS_filepath+'SB/'
# GTFS_SC = GTFS_filepath+'SC/'
# GTFS_SF = GTFS_filepath+'SF/'
# GTFS_SM = GTFS_filepath+'SM/'
# GTFS_SO = GTFS_filepath+'SO/'
# GTFS_SR = GTFS_filepath+'SR/'
# GTFS_ST = GTFS_filepath+'ST/'
# GTFS_TD = GTFS_filepath+'TD/'
# GTFS_UC = GTFS_filepath+'UC/'
# GTFS_VC = GTFS_filepath+'VC/'
# GTFS_VN = GTFS_filepath+'VN/'
# GTFS_VT = GTFS_filepath+'VT/'
# GTFS_WC = GTFS_filepath+'WC/'
# GTFS_WH = GTFS_filepath+'WH/'


# GTFS_3D2 = GTFS_filepath2+'3D/'
# GTFS_AC2 = GTFS_filepath2+'AC/'
# GTFS_AM2 = GTFS_filepath2+'AM/'
# GTFS_AY2 = GTFS_filepath2+'AY/'
# GTFS_BA2 = GTFS_filepath2+'BA/'
# GTFS_Caltrain2 = GTFS_filepath2+'Caltrain/'
# GTFS_CC2 = GTFS_filepath2+'CC/'
# GTFS_CE2 = GTFS_filepath2+'CE/'
# GTFS_CM2 = GTFS_filepath2+'CM/'
# GTFS_CT2 = GTFS_filepath2+'CT/'
# GTFS_DE2 = GTFS_filepath2+'DE/'
# GTFS_EM2 = GTFS_filepath2+'EM/'
# GTFS_GG2 = GTFS_filepath2+'GG/'
# GTFS_HF2 = GTFS_filepath2+'HF/'
# GTFS_MA2 = GTFS_filepath2+'MA/'
# GTFS_PE2 = GTFS_filepath2+'PE/'
# GTFS_RV2 = GTFS_filepath2+'RV/'
# GTFS_SB2 = GTFS_filepath2+'SB/'
# GTFS_SC2 = GTFS_filepath2+'SC/'
# GTFS_SF2 = GTFS_filepath2+'SF/'
# GTFS_SM2 = GTFS_filepath2+'SM/'
# GTFS_SO2 = GTFS_filepath2+'SO/'
# GTFS_SR2 = GTFS_filepath2+'SR/'
# GTFS_ST2 = GTFS_filepath2+'ST/'
# GTFS_TD2 = GTFS_filepath2+'TD/'
# GTFS_UC2 = GTFS_filepath2+'UC/'
# GTFS_VC2 = GTFS_filepath2+'VC/'
# GTFS_VN2 = GTFS_filepath2+'VN/'
# GTFS_VT2 = GTFS_filepath2+'VT/'
# GTFS_WC2 = GTFS_filepath2+'WC/'
# GTFS_WH2 = GTFS_filepath2+'WH/'

# GTFS_baseline = [GTFS_3D,
#                     GTFS_AC,
#                     GTFS_AM,
#                     GTFS_AY,
#                     GTFS_BA,
#                     GTFS_Caltrain,
#                     GTFS_CC,
#                     GTFS_CE,
#                     GTFS_CM,
#                     GTFS_CT,
#                     GTFS_DE,
#                     GTFS_EM,
#                     GTFS_GG,
#                     GTFS_HF,
#                     GTFS_MA,
#                     GTFS_PE,
#                     GTFS_RV,
#                     GTFS_SB,
#                     GTFS_SC,
#                     GTFS_SF,
#                     GTFS_SM,
#                     GTFS_SO,
#                     GTFS_SR,
#                     GTFS_ST,
#                     GTFS_TD,
#                     GTFS_UC,
#                     GTFS_VC,
#                     GTFS_VN,
#                     GTFS_VT,
#                     GTFS_WC,
#                     GTFS_WH]

# GTFS_TR = [GTFS_3D2,
#                     GTFS_AC2,
#                     GTFS_AM2,
#                     GTFS_AY2,
#                     GTFS_BA2,
#                     GTFS_Caltrain2,
#                     GTFS_CC2,
#                     GTFS_CE2,
#                     GTFS_CM2,
#                     GTFS_CT2,
#                     GTFS_DE2,
#                     GTFS_EM2,
#                     GTFS_GG2,
#                     GTFS_HF2,
#                     GTFS_MA2,
#                     GTFS_PE2,
#                     GTFS_RV2,
#                     GTFS_SB2,
#                     GTFS_SC2,
#                     GTFS_SF2,
#                     GTFS_SM2,
#                     GTFS_SO2,
#                     GTFS_SR2,
#                     GTFS_ST2,
#                     GTFS_TD2,
#                     GTFS_UC2,
#                     GTFS_VC2,
#                     GTFS_VN2,
#                     GTFS_VT2,
#                     GTFS_WC2,
#                     GTFS_WH2]
# GTFS_filepaths = [GTFS_baseline, GTFS_TR,GTFS_baseline, GTFS_TR,GTFS_baseline, GTFS_TR,GTFS_baseline, GTFS_TR]



# nrows = None



# #Ridership route
# RR = pd.DataFrame()
# #Ridership agency
# RA = pd.DataFrame()

# for fp, name, GTFS_filepath in zip(dataFilepath_sim,names, GTFS_filepaths):
#     print('evaluate ridership')
#     #import pathtraversal and person enter vehicles
#     PT, PE = processEvents(fp, nrows)
#     #filter PE transit trips from PT
#     PE = filter_transit(PT, PE)
#     #Guess transit agency for each PE
#     PE = guess_agency(PE)
#     #Guess transit route for each PE
#     PE = guess_route(PE, GTFS_filepath)
#     #Route Ridership
#     rr = PE['route_id'].value_counts()
#     sum_agency = 0
#     sum_agency_bsva = 0
#     sum_route = 0
#     for route, count in zip(rr.keys(), rr):
#         RR.at[route, name] = count
#         sum_route += count
#     ra = PE['agency'].value_counts()
#     for agency, count in zip(ra.keys(), ra):
#         RA.at[agency, name] = count
#         sum_agency += count
#         if agency in ['BA', 'SF', 'VT', 'AC']:
#             RA.at[agency, name+' BA-SF-VT-AC'] = count
#             sum_agency_bsva += count
#     for route, count in zip(rr.keys(), rr):
#         RR.at[route, name+' shares'] = count/sum_route
#     ra = PE['agency'].value_counts()
#     for agency, count in zip(ra.keys(), ra):
#         RA.at[agency, name+' shares'] = count/sum_agency
#         if agency in ['BA', 'SF', 'VT', 'AC']:
#             RA.at[agency, name+' shares BA-SF-VT-AC'] = count/sum_agency_bsva
#     RA.to_csv(output_filepath+RA_output)
#     RR.to_csv(output_filepath+RR_output)

# for year in years:
#     diff = []
#     diff_abs = []
#     for baseline, future in zip(RA['Baseline'+year],RA['Future'+year]):
#         diff.append((future-baseline)/baseline)
#         diff_abs.append((future-baseline))
#     RA['Diff %'+year] = diff
#     RA['Diff'+year] = diff_abs

# tot_NTD = 1756364558 + 15283299+5703705+49795740+110802986+7386518+49247910+27027693 + 8437926+50222832 + 2818648
    
# RA.at['BA', 'clipper BA-SF-VT-AC 2016 share target'] = 350485/773719
# RA.at['BA', 'MTC BA-SF-VT-AC 2016 share target'] = 458900/1564500
# RA.at['BA', 'NTD BA-SF-VT-AC 2019 share target'] = (1756364558 + 15283299)/tot_NTD

# RA.at['SF', 'clipper BA-SF-VT-AC 2016 share target'] = 293991/773719
# RA.at['SF', 'MTC BA-SF-VT-AC 2016 share target'] = 777000/1564500
# RA.at['SF', 'NTD BA-SF-VT-AC 2019 share target'] = (5703705+49795740+110802986+7386518+49247910)/tot_NTD

# RA.at['VT', 'clipper BA-SF-VT-AC 2016 share target'] = 43950/773719
# RA.at['VT', 'MTC BA-SF-VT-AC 2016 share target'] = 146700/1564500
# RA.at['VT', 'NTD BA-SF-VT-AC 2019 share target'] = (27027693 + 8437926)/tot_NTD

# RA.at['AC', 'clipper BA-SF-VT-AC 2016 share target'] = 85293/773719
# RA.at['AC', 'MTC BA-SF-VT-AC 2016 share target'] = 181900/1564500
# RA.at['AC', 'NTD BA-SF-VT-AC 2019 share target'] = (50222832 + 2818648)/tot_NTD

# RA.at['BA', 'clipper BA-SF-VT-AC 2020 Jan av ridership target'] = 350485
# RA.at['BA', 'MTC BA-SF-VT-AC 2016 av ridership target'] = 458900
# RA.at['BA', 'NTD BA-SF-VT-AC 2019 ridership target'] = (1756364558 + 15283299)

# RA.at['SF', 'clipper BA-SF-VT-AC 2020 Jan av ridership target'] = 293991
# RA.at['SF', 'MTC BA-SF-VT-AC 2016 av ridership target'] = 777000
# RA.at['SF', 'NTD BA-SF-VT-AC 2019 ridership target'] = (5703705+49795740+110802986+7386518+49247910)

# RA.at['VT', 'clipper BA-SF-VT-AC 2020 Jan av ridership target'] = 43950
# RA.at['VT', 'MTC BA-SF-VT-AC 2016 av ridership target'] = 146700
# RA.at['VT', 'NTD BA-SF-VT-AC 2019 ridership target'] =  (27027693 + 8437926)

# RA.at['AC', 'clipper BA-SF-VT-AC 2020 Jan av ridership target'] = 85293
# RA.at['AC', 'MTC BA-SF-VT-AC 2016 av ridership target'] = 181900
# RA.at['AC', 'NTD BA-SF-VT-AC 2019 ridership target'] = (50222832 + 2818648)

# RA.to_csv(output_filepath+RA_output)
# RR.to_csv(output_filepath+RR_output)






# RA



# RR[RR.index.str.contains('AC:1-142')]



# RR[RR.index.str.contains('AC:1T-142')]





# RR[RR.index.str.contains('CA:')]





# RR[RR.index.str.contains('SF:1000')]



# #The Central Subway Project will improve public transportation in San Francisco by extending the Muni Metro T Third Line through SoMa

# #J
# RR[RR.index.str.contains('SF:12475')]



# #K/T
# RR[RR.index.str.contains('SF:12476')]



# #L
# RR[RR.index.str.contains('SF:12477')]



# #M
# RR[RR.index.str.contains('SF:12478')]



# #N
# RR[RR.index.str.contains('SF:12479')]



# for year in years:
    
#     print('total SF light rail baseline'+year,
#          (list(RR[RR.index.str.contains('SF:12475')]['Baseline'+year])[0]+
#           list(RR[RR.index.str.contains('SF:12476')]['Baseline'+year])[0]+
#           list(RR[RR.index.str.contains('SF:12477')]['Baseline'+year])[0]+
#           list(RR[RR.index.str.contains('SF:12478')]['Baseline'+year])[0]+
#           list(RR[RR.index.str.contains('SF:12479')]['Baseline'+year])[0])
#          )
#     print('total SF light rail future'+year,
#          (list(RR[RR.index.str.contains('SF:12475')]['Future'+year])[0]+
#           list(RR[RR.index.str.contains('SF:12476')]['Future'+year])[0]+
#           list(RR[RR.index.str.contains('SF:12477')]['Future'+year])[0]+
#           list(RR[RR.index.str.contains('SF:12478')]['Future'+year])[0]+
#           list(RR[RR.index.str.contains('SF:12479')]['Future'+year])[0]+
#           list(RR[RR.index.str.contains('SF:1000')]['Future'+year])[0])
#          )

# #Imports
# import numpy as np
# import pandas as pd
# import difflib
# import matplotlib.pyplot as plt
# import time

# #########Analysis of what agents used in the baseline
# index_baseline =6 
# index_future =7
# fp_PtoPTss_baseline = '/Users/cpoliziani/Downloads/personToVehicles.csv.gz'
# fp_PtoPTss_future = '/Users/cpoliziani/Downloads/personToVehicles (1).csv.gz'

# # print('read PtoPTss baseline',fp_PtoPTss_baseline)
# PtoPTss_baseline = pd.read_csv(fp_PtoPTss_baseline)
# # print('read PtoPTss TR', fp_PtoPTss_future)
# PtoPTss_TR = pd.read_csv(fp_PtoPTss_future)
# # print('read plans baseline',dataFilepath_sim[index_baseline][:-13]+'plans.csv.gz')
# plans_baseline = pd.read_csv(dataFilepath_sim[index_baseline][:-13]+'plans.csv.gz')
# # print('read plans TR', dataFilepath_sim[index_future][:-13]+'plans.csv.gz')
# plans_TR = pd.read_csv(dataFilepath_sim[index_future][:-13]+'plans.csv.gz')
# # print('read GTFS trips fpr AC, SF and CA')
# line_1T_trips_TR = pd.read_csv(GTFS_AC2+'trips.txt')
# line_1T_trips_TR = line_1T_trips_TR[line_1T_trips_TR['route_id'].str.contains('1T-142')]
# line_CS_trips_TR = pd.read_csv(GTFS_SF2+'trips.txt')
# line_CS_trips_TR = line_CS_trips_TR[line_CS_trips_TR['route_id']==1000]
# line_CA_trips_TR = pd.read_csv(GTFS_Caltrain2+'trips.txt')
# # print('read mode choice')
# mode_choice_baseline = pd.read_csv(dataFilepath_sim[index_baseline])
# mode_choice_TR = pd.read_csv(dataFilepath_sim[index_future])
# mode_choice_baseline = mode_choice_baseline[mode_choice_baseline['type']=='ModeChoice']
# mode_choice_TR = mode_choice_TR[mode_choice_TR['type']=='ModeChoice']

# PtoPTss_AC_TR = PtoPTss_TR[PtoPTss_TR['vehicle2'].str.contains('AC', na=False)]
# PtoPTss_SF_TR = PtoPTss_TR[PtoPTss_TR['vehicle2'].str.contains('SF', na=False)]
# PtoPTss_CA_TR = PtoPTss_TR[PtoPTss_TR['vehicle2'].str.contains('Ca', na=False)]

# #Filter SF trips - CS only
# is_CS = []
# vehicles = list(line_CS_trips_TR['trip_id'])
# for vehicle in PtoPTss_SF_TR['vehicleID']:
#     if int(vehicle[3:]) in vehicles:
#         is_CS.append(True)
#     else:
#         is_CS.append(False)
# print('Filter CS trips [tot, remained]',len(is_CS),sum(is_CS))
# PtoPTss_SF_TR['is_CS'] = is_CS
# PtoPTss_SF_TR = PtoPTss_SF_TR[PtoPTss_SF_TR['is_CS']==True]


# is_AC = []
# vehicles = list(line_1T_trips_TR['trip_id'])
# for vehicle in PtoPTss_AC_TR['vehicleID']:
#     if int(vehicle[3:]) in vehicles:
#         is_AC.append(True)
#     else:
#         is_AC.append(False)
# print('Filter 1T trips [tot, remained]',len(is_AC),sum(is_AC))
# PtoPTss_AC_TR['is_AC'] = is_AC
# PtoPTss_AC_TR = PtoPTss_AC_TR[PtoPTss_AC_TR['is_AC']==True]

# persons_baseline = pd.DataFrame()
# i = 0
# trips_TR = pd.concat([line_CA_trips_TR['trip_id'],line_CS_trips_TR['trip_id'],line_1T_trips_TR['trip_id']])
# ##############################
# # PtoPTss_TR2=pd.concat([PtoPTss_AC_TR,PtoPTss_SF_TR,PtoPTss_CA_TR])
# # PtoPTss_TR2_dict = PtoPTss.groupby(['personID', 'planIndex']).apply(lambda x: [sum(x.length),
# #                                                     sum(x.duration)]).to_dict()
# # print(len(PtoPTss_CA_TR))
# PtoPTss_TR_CA_dict = PtoPTss_CA_TR.groupby(['personID', 'planIndex']).apply(lambda x: [sum(x.length),
#                                                     sum(x.duration)]).to_dict()
# # print(len(PtoPTss_TR_CA_dict.keys()))
# # print(len(PtoPTss_SF_TR))
# PtoPTss_TR_SF_dict = PtoPTss_SF_TR.groupby(['personID', 'planIndex']).apply(lambda x: [sum(x.length),
#                                                     sum(x.duration)]).to_dict()
# # print(len(PtoPTss_TR_SF_dict.keys()))
# # print(len(PtoPTss_AC_TR))
# PtoPTss_TR_AC_dict = PtoPTss_AC_TR.groupby(['personID', 'planIndex']).apply(lambda x: [sum(x.length),
#                                                     sum(x.duration)]).to_dict()
# # print(len(PtoPTss_TR_AC_dict.keys()))
# # print(len(PtoPTss_baseline))
# PtoPTss_baseline_dict = PtoPTss_baseline.groupby(['personID', 'planIndex']).apply(lambda x: [sum(x.length),
#                                                     sum(x.duration)]).to_dict()
# # print(len(PtoPTss_baseline_dict.keys()))
# # print(len(PtoPTss_TR))
# PtoPTss_TR_dict = PtoPTss_TR.groupby(['personID', 'planIndex']).apply(lambda x: [sum(x.length),
#                                                 sum(x.duration)]).to_dict()
# print(len(PtoPTss_TR_dict.keys()))


# #Add logsum
# person_trips_baseline = pd.read_csv('/Users/cpoliziani/Downloads/tourTripsMerged_Baseline.csv')
# person_trips_baseline['trip_num']=person_trips_baseline['trip_num']*2-1
# person_trips_TR = pd.read_csv('/Users/cpoliziani/Downloads/tourTripsMerged_TR.csv')
# person_trips_TR['trip_num']=person_trips_TR['trip_num']*2-1


# def addGeometryIdToDataFrame(df, gdf, xcol, ycol, idColumn="geometry", df_geom='epsg:32610'): 
#     gdf.set_crs(epsg = "3310", inplace = True)
#     gdf_data = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df[xcol], df[ycol]))
#     gdf_data.set_crs(epsg = "32610", inplace = True)
#     joined = gpd.sjoin(gdf_data.to_crs('epsg:26910'), gdf.to_crs('epsg:26910'))
#     gdf_data = gdf_data.merge(joined['ZCTA'], left_index=True, right_index=True, how="left")
#     gdf_data.rename(columns={'ZCTA': idColumn}, inplace=True)
#     df = pd.DataFrame(gdf_data.drop(columns='geometry'))
# #     df.drop(columns=[xcol, ycol], inplace=True)
#     return df.loc[~df.index.duplicated(keep='first'), :]

# persons_baseline = pd.DataFrame(columns = ['Person',
#                                            'Plan Index',
#                                            'Planned Depart Time Baseline',
#                                            'Planned Depart Time TR',
#                                            'Activity From Baseline',
#                                            'Activity To Baseline',
#                                            'Activity From TR',
#                                            'Activity To TR',
#                                            'Project Tried',
#                                            'Vehicles Used Baseline',
#                                            'Vehicle Types Used Baseline',                                          
#                                            'Bus agencies Used Baseline',
#                                            'First Bus agency Used Baseline',
#                                            'Vehicles Used TR',
#                                            'Vehicle Types Used TR',                                          
#                                            'Bus agencies Used TR',
#                                            'First Bus agency Used TR',
#                                            'Switch From',
#                                            'Trip Length Baseline',                       
#                                            'Trip Duration Baseline',
#                                            'Trip Length TR',
#                                            'Trip Duration TR',
#                                            'Diff Length',
#                                            'Diff Duration',
#                                            'Planned Mode Baseline',                                         
#                                            'Planned Mode TR',                                          
#                                            'Chosen Mode Baseline',                                          
#                                            'Chosen Mode TR',
#                                            'X Activity From TR',
#                                            'Y Activity From TR',
#                                            'X Activity To TR',
#                                            'Y Activity To TR',
#                                            'X Activity From Baseline',
#                                            'Y Activity From Baseline',
#                                            'X Activity To Baseline',
#                                            'Y Activity To Baseline',
#                                            'Log Sum Baseline',
#                                            'Log Sum TR',
#                                            'Diff Log Sum TR',
# #                                            'ZIP Departure TR',                                          
# #                                            'ZIP Arrival TR',                                          
# #                                            'ZIP Departure Baseline',                                          
# #                                            'ZIP Arrival Baseline',                                          
#                                           ])

# PtoPTss_baseline_bus =  PtoPTss_baseline[(PtoPTss_baseline['mode'] =='bus')|
#                                          (PtoPTss_baseline['mode'] =='subway')|
#                                          (PtoPTss_baseline['mode'] =='tram')|
#                                          (PtoPTss_baseline['mode'] =='rail')|
#                                          (PtoPTss_baseline['mode'] =='cable_car')|
#                                          (PtoPTss_baseline['mode'] =='ferry')]
# PtoPTss_TR_bus =  PtoPTss_TR[(PtoPTss_TR['mode'] =='bus')|
#                                          (PtoPTss_TR['mode'] =='subway')|
#                                          (PtoPTss_TR['mode'] =='tram')|
#                                          (PtoPTss_TR['mode'] =='rail')|
#                                          (PtoPTss_TR['mode'] =='cable_car')|
#                                          (PtoPTss_TR['mode'] =='ferry')]   
# print(len(PtoPTss_TR_SF_dict))
# print(len(PtoPTss_TR_CA_dict))
# print(len(PtoPTss_TR_AC_dict))


# i=0      
# for row in PtoPTss_TR_SF_dict.keys():
#     i+=1
#     if i%50==0:
#         print(i)
#         break
#     try:
#         persons_baseline.at[i,'Person'] = row[0]
#     except:
#         print('Warning', row, 'person')
#     try:
#         persons_baseline.at[i,'Plan Index'] = int(row[1])
#     except:
#         print('Warning', row, 'plan_index')
#     try:
#         persons_baseline.at[i,'Activity From Baseline'] = int(list(plans_baseline['activityType'][(plans_baseline['personId']==row[0])&(plans_baseline['planElementIndex']==row[1]-1)])[0])
#     except:
#         print('Warning', row, 'Activity From Baseline')
#     try:
#         persons_baseline.at[i,'Activity To Baseline'] = int(list(plans_baseline['activityType'][(plans_baseline['personId']==row[0])&(plans_baseline['planElementIndex']==row[1]+1)])[0])
#     except:
#         print('Warning', row, 'Activity To Baseline')
#     try:
#         persons_baseline.at[i,'Activity From TR'] = int(list(plans_TR['activityType'][(plans_TR['personId']==row[0])&(plans_TR['planElementIndex']==row[1]-1)])[0])
#     except:
#         print('Warning', row, 'Activity From TR')
#     try:
#         persons_baseline.at[i,'Activity To TR'] = int(list(plans_TR['activityType'][(plans_TR['personId']==row[0])&(plans_TR['planElementIndex']==row[1]+1)])[0])
#     except:
#         print('Warning', row, 'Activity To TR')  
#     try:
#         persons_baseline.at[i,'Planned Depart Time Baseline'] = int(list(plans_baseline['activityEndTime'][(plans_baseline['personId']==row[0])&(plans_baseline['planElementIndex']==row[1]-1)])[0])
#     except:
#         print('Warning', row, 'plan_dep_time_baseline')
#     try:
#         persons_baseline.at[i,'Planned Depart Time TR'] = int(list(plans_TR['activityEndTime'][(plans_TR['personId']==row[0])&(plans_TR['planElementIndex']==row[1]-1)])[0])
#     except:
#         print('Warning', row, 'plan_dep_time_TR')
#     try:
#         persons_baseline.at[i,'Project Tried'] = 'SF - Central Subway'
#     except:
#         print('Warning', row, 'project tried')
#     try:
#         persons_baseline.at[i,'Log Sum Baseline'] = np.unique(list(person_trips_baseline['mode_choice_logsum_y'][(person_trips_baseline['person_id']==row[0])&(person_trips_baseline['trip_num']==row[1])]))
#     except:
#         print('Warning', row, 'Log Sum Baseline')
#     try:
#         persons_baseline.at[i,'Log Sum TR'] = np.unique(list(person_TR['mode_choice_logsum_y'][(person_TR['person_id']==row[0])&(person_TR['trip_num']==row[1])]))
#     except:
#         print('Warning', row, 'Log Sum TR')
#     try:
#         persons_baseline.at[i,'Diff Log Sum'] = persons_baseline.at[i,'Log Sum TR']-persons_baseline.at[i,'Log Sum Baseline']
#     except:
#         print('Warning', row, 'Diff Log Sum TR')
#     try:
#         persons_baseline.at[i,'Vehicles Used Baseline'] = np.unique(list(PtoPTss_baseline['vehicleID'][(PtoPTss_baseline['personID']==row[0])&(PtoPTss_baseline['planIndex']==row[1])]))
#     except:
#         print('Warning', row, 'Vehicles Used Baseline')
#     try:
#         persons_baseline.at[i,'Vehicles Used TR'] = np.unique(list(PtoPTss_TR['vehicleID'][(PtoPTss_TR['personID']==row[0])&(PtoPTss_TR['planIndex']==row[1])]))
#     except:
#         print('Warning', row, 'Vehicles Used TR')
#     try:
#         persons_baseline.at[i,'Bus agencies Used Baseline'] = np.unique(list(PtoPTss_baseline_bus['vehicle2'][(PtoPTss_baseline_bus['personID']==row[0])&(PtoPTss_baseline_bus['planIndex']==row[1])]))
#     except:
#         print('Warning', row, 'Bus agencies Used Baseline')
#     try:
#         persons_baseline.at[i,'Bus agencies Used TR'] = np.unique(list(PtoPTss_TR_bus['vehicle2'][(PtoPTss_TR_bus['personID']==row[0])&(PtoPTss_TR_bus['planIndex']==row[1])]))
#     except:
#         print('Warning', row, 'Bus agencies Used TR') 
#     try:
#         persons_baseline.at[i,'First Bus agency Used Baseline'] = np.unique(list(PtoPTss_baseline_bus['vehicle2'][(PtoPTss_baseline_bus['personID']==row[0])&(PtoPTss_baseline_bus['planIndex']==row[1])]))[0]
#     except:
#         print('Warning', row, 'First Bus agency Used Baseline')
#     try:
#         persons_baseline.at[i,'First Bus agency Used TR'] = np.unique(list(PtoPTss_TR_bus['vehicle2'][(PtoPTss_TR_bus['personID']==row[0])&(PtoPTss_TR_bus['planIndex']==row[1])]))[0]
#     except:
#         print('Warning', row, 'First Bus agency Used TR')
#     try:
#         persons_baseline.at[i,'Trip Length TR'] = int(PtoPTss_TR_dict[row][0])
#     except:
#         print('Warning', row, 'TR_length')
#     try:
#         persons_baseline.at[i,'Trip Duration TR'] = int(PtoPTss_TR_dict[row][1])
#     except:
#         print('Warning', row, 'TR_duration')
#     try:
#         persons_baseline.at[i,'Trip Length Baseline'] = int(PtoPTss_baseline_dict[row][0])
#     except:
#         print('Warning', row, 'baseline_length')
#     try:
#         persons_baseline.at[i,'Trip Duration Baseline'] = int(PtoPTss_baseline_dict[row][1])
#     except:
#         print('Warning', row, 'baseline_duration')
#     try:
#         persons_baseline.at[i,'Diff Length'] = int(persons_baseline.at[i,'Trip Length TR']-persons_baseline.at[i,'Trip Length Baseline'])
#     except:
#         print('Warning', row, 'diff_length')
#     try:
#         persons_baseline.at[i,'Diff Duration'] = int(persons_baseline.at[i,'Trip Duration TR']-persons_baseline.at[i,'Trip Duration Baseline'])
#     except:
#         print('Warning', row, 'diff_duration')
#     try:
#         persons_baseline.at[i,'X Activity From TR'] = list(plans_TR['activityLocationX'][(plans_TR['personId']==row[0])&(plans_TR['planElementIndex']==row[1]-1)])[0]
#     except:
#         print('Warning', row, 'x_activity_from_TR')
#     try:
#         persons_baseline.at[i,'Y Activity From TR'] = list(plans_TR['activityLocationY'][(plans_TR['personId']==row[0])&(plans_TR['planElementIndex']==row[1]-1)])[0]
#     except:
#         print('Warning', row, 'y_activity_From_TR')
#     try:
#         persons_baseline.at[i,'X Activity To TR'] = list(plans_TR['activityLocationX'][(plans_TR['personId']==row[0])&(plans_TR['planElementIndex']==row[1]+1)])[0]
#     except:
#         print('Warning', row, 'x_activity_To_TR')
#     try:
#         persons_baseline.at[i,'Y Activity To TR'] = list(plans_TR['activityLocationY'][(plans_TR['personId']==row[0])&(plans_TR['planElementIndex']==row[1]+1)])[0]
#     except:
#         print('Warning', row, 'y_activity_To_TR')   
#     try:
#         persons_baseline.at[i,'X Activity From Baseline'] = list(plans_baseline['activityLocationX'][(plans_baseline['personId']==row[0])&(plans_baseline['planElementIndex']==row[1]-1)])[0]
#     except:
#         print('Warning', row, 'x_activity_from_Baseline')
#     try:
#         persons_baseline.at[i,'Y Activity From Baseline'] = list(plans_baseline['activityLocationY'][(plans_baseline['personId']==row[0])&(plans_baseline['planElementIndex']==row[1]-1)])[0]
#     except:
#         print('Warning', row, 'y_activity_From_Baseline')
#     try:
#         persons_baseline.at[i,'X Activity To Baseline'] = list(plans_baseline['activityLocationX'][(plans_baseline['personId']==row[0])&(plans_baseline['planElementIndex']==row[1]+1)])[0]
#     except:
#         print('Warning', row, 'x_activity_To_Baseline')
#     try:
#         persons_baseline.at[i,'Y Activity To Baseline'] = list(plans_baseline['activityLocationY'][(plans_baseline['personId']==row[0])&(plans_baseline['planElementIndex']==row[1]+1)])[0]
#     except:
#         print('Warning', row, 'y_activity_To_Baseline')   
#     try:
#         persons_baseline.at[i,'Vehicle Types Used Baseline'] = np.unique(list(PtoPTss_baseline['mode'][(PtoPTss_baseline['personID']==row[0])&(PtoPTss_baseline['planIndex']==row[1])]))
#     except:
#         print('Warning', row, 'Vehicle Types Used baseline')
#     try:
#         persons_baseline.at[i,'Vehicle Types Used TR'] = np.unique(list(PtoPTss_TR['mode'][(PtoPTss_TR['personID']==row[0])&(PtoPTss_TR['planIndex']==row[1])]))
#     except:
#         print('Warning', row, 'Vehicle Types Used TR')
#     try:
#         persons_baseline.at[i,'Planned Mode Baseline'] = list(plans_baseline['legMode'][(plans_baseline['personId']==row[0])&(plans_baseline['planElementIndex']==row[1])])[0]
#     except:
#         print('Warning', row, 'Baseline planned mode')
#     try:
#         persons_baseline.at[i,'Planned Mode TR'] = list(plans_TR['legMode'][(plans_TR['personId']==row[0])&(plans_TR['planElementIndex']==row[1])])[0]
#     except:
#         print('Warning', row, 'TR planned mode')
#     try:
#         plan_dep_time_baseline = list(plans_baseline['activityEndTime'][(plans_baseline['personId']==row[0])&(plans_baseline['planElementIndex']==row[1]-1)])[0]
#         persons_baseline.at[i,'Chosen Mode Baseline'] = list(mode_choice_baseline['mode'][(mode_choice_baseline['person']==row[0])&(mode_choice_baseline['time']>=plan_dep_time_baseline-1)])[0]
#     except:
#         print('Warning', row, 'mode_choice_baseline')
#     try:
#         persons_baseline.at[i,'Chosen Mode TR'] = list(mode_choice_TR['mode'][(mode_choice_TR['person']==row[0])&(mode_choice_TR['time']>=persons_baseline.at[i,'Planned Depart Time TR']-1)])[0]
#     except:
#         print('Warning', row, 'mode_choice_TR')
        
# ####################################
# for row in PtoPTss_TR_CA_dict.keys():
#     i+=1
#     if i%50==0:
#         print(i)
#         break
        
#     try:
#         persons_baseline.at[i,'Person'] = row[0]
#     except:
#         print('Warning', row, 'person')
#     try:
#         persons_baseline.at[i,'Plan Index'] = int(row[1])
#     except:
#         print('Warning', row, 'plan_index')
#     try:
#         persons_baseline.at[i,'Activity From Baseline'] = int(list(plans_baseline['activityType'][(plans_baseline['personId']==row[0])&(plans_baseline['planElementIndex']==row[1]-1)])[0])
#     except:
#         print('Warning', row, 'Activity From Baseline')
#     try:
#         persons_baseline.at[i,'Activity To Baseline'] = int(list(plans_baseline['activityType'][(plans_baseline['personId']==row[0])&(plans_baseline['planElementIndex']==row[1]+1)])[0])
#     except:
#         print('Warning', row, 'Activity To Baseline')
#     try:
#         persons_baseline.at[i,'Activity From TR'] = int(list(plans_TR['activityType'][(plans_TR['personId']==row[0])&(plans_TR['planElementIndex']==row[1]-1)])[0])
#     except:
#         print('Warning', row, 'Activity From TR')
#     try:
#         persons_baseline.at[i,'Activity To TR'] = int(list(plans_TR['activityType'][(plans_TR['personId']==row[0])&(plans_TR['planElementIndex']==row[1]+1)])[0])
#     except:
#         print('Warning', row, 'Activity To TR')  
#     try:
#         persons_baseline.at[i,'Planned Depart Time Baseline'] = int(list(plans_baseline['activityEndTime'][(plans_baseline['personId']==row[0])&(plans_baseline['planElementIndex']==row[1]-1)])[0])
#     except:
#         print('Warning', row, 'plan_dep_time_baseline')
#     try:
#         persons_baseline.at[i,'Planned Depart Time TR'] = int(list(plans_TR['activityEndTime'][(plans_TR['personId']==row[0])&(plans_TR['planElementIndex']==row[1]-1)])[0])
#     except:
#         print('Warning', row, 'plan_dep_time_TR')
#     try:
#         persons_baseline.at[i,'Project Tried'] = 'CA - Electrification Project'
#     except:
#         print('Warning', row, 'project tried')
#     try:
#         persons_baseline.at[i,'Log Sum Baseline'] = np.unique(list(person_trips_baseline['mode_choice_logsum_y'][(person_trips_baseline['person_id']==row[0])&(person_trips_baseline['trip_num']==row[1])]))
#     except:
#         print('Warning', row, 'Log Sum Baseline')
#     try:
#         persons_baseline.at[i,'Log Sum TR'] = np.unique(list(person_TR['mode_choice_logsum_y'][(person_TR['person_id']==row[0])&(person_TR['trip_num']==row[1])]))
#     except:
#         print('Warning', row, 'Log Sum TR')
#     try:
#         persons_baseline.at[i,'Diff Log Sum'] = persons_baseline.at[i,'Log Sum TR']-persons_baseline.at[i,'Log Sum Baseline']
#     except:
#         print('Warning', row, 'Diff Log Sum TR')
#     try:
#         persons_baseline.at[i,'Vehicles Used Baseline'] = np.unique(list(PtoPTss_baseline['vehicleID'][(PtoPTss_baseline['personID']==row[0])&(PtoPTss_baseline['planIndex']==row[1])]))
#     except:
#         print('Warning', row, 'Vehicles Used Baseline')
#     try:
#         persons_baseline.at[i,'Vehicles Used TR'] = np.unique(list(PtoPTss_TR['vehicleID'][(PtoPTss_TR['personID']==row[0])&(PtoPTss_TR['planIndex']==row[1])]))
#     except:
#         print('Warning', row, 'Vehicles Used TR')
#     try:
#         persons_baseline.at[i,'Bus agencies Used Baseline'] = np.unique(list(PtoPTss_baseline_bus['vehicle2'][(PtoPTss_baseline_bus['personID']==row[0])&(PtoPTss_baseline_bus['planIndex']==row[1])]))
#     except:
#         print('Warning', row, 'Bus agencies Used Baseline')
#     try:
#         persons_baseline.at[i,'Bus agencies Used TR'] = np.unique(list(PtoPTss_TR_bus['vehicle2'][(PtoPTss_TR_bus['personID']==row[0])&(PtoPTss_TR_bus['planIndex']==row[1])]))
#     except:
#         print('Warning', row, 'Bus agencies Used TR')
#     try:
#         persons_baseline.at[i,'First Bus agency Used Baseline'] = np.unique(list(PtoPTss_baseline_bus['vehicle2'][(PtoPTss_baseline_bus['personID']==row[0])&(PtoPTss_baseline_bus['planIndex']==row[1])]))[0]
#     except:
#         print('Warning', row, 'First Bus agency Used Baseline')
#     try:
#         persons_baseline.at[i,'First Bus agency Used TR'] = np.unique(list(PtoPTss_TR_bus['vehicle2'][(PtoPTss_TR_bus['personID']==row[0])&(PtoPTss_TR_bus['planIndex']==row[1])]))[0]
#     except:
#         print('Warning', row, 'First Bus agency Used TR')  
#     try:
#         persons_baseline.at[i,'Trip Length TR'] = int(PtoPTss_TR_dict[row][0])
#     except:
#         print('Warning', row, 'TR_length')
#     try:
#         persons_baseline.at[i,'Trip Duration TR'] = int(PtoPTss_TR_dict[row][1])
#     except:
#         print('Warning', row, 'TR_duration')
#     try:
#         persons_baseline.at[i,'Trip Length Baseline'] = int(PtoPTss_baseline_dict[row][0])
#     except:
#         print('Warning', row, 'baseline_length')
#     try:
#         persons_baseline.at[i,'Trip Duration Baseline'] = int(PtoPTss_baseline_dict[row][1])
#     except:
#         print('Warning', row, 'baseline_duration')
#     try:
#         persons_baseline.at[i,'Diff Length'] = int(persons_baseline.at[i,'Trip Length TR']-persons_baseline.at[i,'Trip Length Baseline'])
#     except:
#         print('Warning', row, 'diff_length')
#     try:
#         persons_baseline.at[i,'Diff Duration'] = int(persons_baseline.at[i,'Trip Duration TR']-persons_baseline.at[i,'Trip Duration Baseline'])
#     except:
#         print('Warning', row, 'diff_duration')
#     try:
#         persons_baseline.at[i,'X Activity From TR'] = list(plans_TR['activityLocationX'][(plans_TR['personId']==row[0])&(plans_TR['planElementIndex']==row[1]-1)])[0]
#     except:
#         print('Warning', row, 'x_activity_from_TR')
#     try:
#         persons_baseline.at[i,'Y Activity From TR'] = list(plans_TR['activityLocationY'][(plans_TR['personId']==row[0])&(plans_TR['planElementIndex']==row[1]-1)])[0]
#     except:
#         print('Warning', row, 'y_activity_From_TR')
#     try:
#         persons_baseline.at[i,'X Activity To TR'] = list(plans_TR['activityLocationX'][(plans_TR['personId']==row[0])&(plans_TR['planElementIndex']==row[1]+1)])[0]
#     except:
#         print('Warning', row, 'x_activity_To_TR')
#     try:
#         persons_baseline.at[i,'Y Activity To TR'] = list(plans_TR['activityLocationY'][(plans_TR['personId']==row[0])&(plans_TR['planElementIndex']==row[1]+1)])[0]
#     except:
#         print('Warning', row, 'y_activity_To_TR')   
#     try:
#         persons_baseline.at[i,'X Activity From Baseline'] = list(plans_baseline['activityLocationX'][(plans_baseline['personId']==row[0])&(plans_baseline['planElementIndex']==row[1]-1)])[0]
#     except:
#         print('Warning', row, 'x_activity_from_Baseline')
#     try:
#         persons_baseline.at[i,'Y Activity From Baseline'] = list(plans_baseline['activityLocationY'][(plans_baseline['personId']==row[0])&(plans_baseline['planElementIndex']==row[1]-1)])[0]
#     except:
#         print('Warning', row, 'y_activity_From_Baseline')
#     try:
#         persons_baseline.at[i,'X Activity To Baseline'] = list(plans_baseline['activityLocationX'][(plans_baseline['personId']==row[0])&(plans_baseline['planElementIndex']==row[1]+1)])[0]
#     except:
#         print('Warning', row, 'x_activity_To_Baseline')
#     try:
#         persons_baseline.at[i,'Y Activity To Baseline'] = list(plans_baseline['activityLocationY'][(plans_baseline['personId']==row[0])&(plans_baseline['planElementIndex']==row[1]+1)])[0]
#     except:
#         print('Warning', row, 'y_activity_To_Baseline')   
#     try:
#         persons_baseline.at[i,'Vehicle Types Used Baseline'] = np.unique(list(PtoPTss_baseline['mode'][(PtoPTss_baseline['personID']==row[0])&(PtoPTss_baseline['planIndex']==row[1])]))
#     except:
#         print('Warning', row, 'Vehicle Types Used baseline')
#     try:
#         persons_baseline.at[i,'Vehicle Types Used TR'] = np.unique(list(PtoPTss_TR['mode'][(PtoPTss_TR['personID']==row[0])&(PtoPTss_TR['planIndex']==row[1])]))
#     except:
#         print('Warning', row, 'Vehicle Types Used TR')
#     try:
#         persons_baseline.at[i,'Planned Mode Baseline'] = list(plans_baseline['legMode'][(plans_baseline['personId']==row[0])&(plans_baseline['planElementIndex']==row[1])])[0]
#     except:
#         print('Warning', row, 'Baseline planned mode')
#     try:
#         persons_baseline.at[i,'Planned Mode TR'] = list(plans_TR['legMode'][(plans_TR['personId']==row[0])&(plans_TR['planElementIndex']==row[1])])[0]
#     except:
#         print('Warning', row, 'TR planned mode')
#     try:
#         plan_dep_time_baseline = list(plans_baseline['activityEndTime'][(plans_baseline['personId']==row[0])&(plans_baseline['planElementIndex']==row[1]-1)])[0]
#         persons_baseline.at[i,'Chosen Mode Baseline'] = list(mode_choice_baseline['mode'][(mode_choice_baseline['person']==row[0])&(mode_choice_baseline['time']>=plan_dep_time_baseline-1)])[0]
#     except:
#         print('Warning', row, 'mode_choice_baseline')
#     try:
#         persons_baseline.at[i,'Chosen Mode TR'] = list(mode_choice_TR['mode'][(mode_choice_TR['person']==row[0])&(mode_choice_TR['time']>=persons_baseline.at[i,'Planned Depart Time TR']-1)])[0]
#     except:
#         print('Warning', row, 'mode_choice_TR')
        
# ####################################
# for row in PtoPTss_TR_AC_dict.keys():
#     i+=1
#     if i%50==0:
#         print(i) 
#         break
        
#     try:
#         persons_baseline.at[i,'Person'] = row[0]
#     except:
#         print('Warning', row, 'person')
#     try:
#         persons_baseline.at[i,'Plan Index'] = int(row[1])
#     except:
#         print('Warning', row, 'plan_index')    
#     try:
#         persons_baseline.at[i,'Activity From Baseline'] = int(list(plans_baseline['activityType'][(plans_baseline['personId']==row[0])&(plans_baseline['planElementIndex']==row[1]-1)])[0])
#     except:
#         print('Warning', row, 'Activity From Baseline')
#     try:
#         persons_baseline.at[i,'Activity To Baseline'] = int(list(plans_baseline['activityType'][(plans_baseline['personId']==row[0])&(plans_baseline['planElementIndex']==row[1]+1)])[0])
#     except:
#         print('Warning', row, 'Activity To Baseline')
#     try:
#         persons_baseline.at[i,'Activity From TR'] = int(list(plans_TR['activityType'][(plans_TR['personId']==row[0])&(plans_TR['planElementIndex']==row[1]-1)])[0])
#     except:
#         print('Warning', row, 'Activity From TR')
#     try:
#         persons_baseline.at[i,'Activity To TR'] = int(list(plans_TR['activityType'][(plans_TR['personId']==row[0])&(plans_TR['planElementIndex']==row[1]+1)])[0])
#     except:
#         print('Warning', row, 'Activity To TR')   
#     try:
#         persons_baseline.at[i,'Planned Depart Time Baseline'] = int(list(plans_baseline['activityEndTime'][(plans_baseline['personId']==row[0])&(plans_baseline['planElementIndex']==row[1]-1)])[0])
#     except:
#         print('Warning', row, 'plan_dep_time_baseline')
#     try:
#         persons_baseline.at[i,'Planned Depart Time TR'] = int(list(plans_TR['activityEndTime'][(plans_TR['personId']==row[0])&(plans_TR['planElementIndex']==row[1]-1)])[0])
#     except:
#         print('Warning', row, 'plan_dep_time_TR')
#     try:
#         persons_baseline.at[i,'Project Tried'] = 'AC - 1TEMPO'
#     except:
#         print('Warning', row, 'project tried')
#     try:
#         persons_baseline.at[i,'Log Sum Baseline'] = np.unique(list(person_trips_baseline['mode_choice_logsum_y'][(person_trips_baseline['person_id']==row[0])&(person_trips_baseline['trip_num']==row[1])]))
#     except:
#         print('Warning', row, 'Log Sum Baseline')
#     try:
#         persons_baseline.at[i,'Log Sum TR'] = np.unique(list(person_TR['mode_choice_logsum_y'][(person_TR['person_id']==row[0])&(person_TR['trip_num']==row[1])]))
#     except:
#         print('Warning', row, 'Log Sum TR')
#     try:
#         persons_baseline.at[i,'Diff Log Sum'] = persons_baseline.at[i,'Log Sum TR']-persons_baseline.at[i,'Log Sum Baseline']
#     except:
#         print('Warning', row, 'Diff Log Sum TR')
#     try:
#         persons_baseline.at[i,'Vehicles Used Baseline'] = np.unique(list(PtoPTss_baseline['vehicleID'][(PtoPTss_baseline['personID']==row[0])&(PtoPTss_baseline['planIndex']==row[1])]))
#     except:
#         print('Warning', row, 'Vehicles Used Baseline')
#     try:
#         persons_baseline.at[i,'Vehicles Used TR'] = np.unique(list(PtoPTss_TR['vehicleID'][(PtoPTss_TR['personID']==row[0])&(PtoPTss_TR['planIndex']==row[1])]))
#     except:
#         print('Warning', row, 'Vehicles Used TR')    
#     try:
#         persons_baseline.at[i,'Bus agencies Used Baseline'] = np.unique(list(PtoPTss_baseline_bus['vehicle2'][(PtoPTss_baseline_bus['personID']==row[0])&(PtoPTss_baseline_bus['planIndex']==row[1])]))
#     except:
#         print('Warning', row, 'Bus agencies Used Baseline')
#     try:
#         persons_baseline.at[i,'Bus agencies Used TR'] = np.unique(list(PtoPTss_TR_bus['vehicle2'][(PtoPTss_TR_bus['personID']==row[0])&(PtoPTss_TR_bus['planIndex']==row[1])]))
#     except:
#         print('Warning', row, 'Bus agencies Used TR')
#     try:
#         persons_baseline.at[i,'First Bus agency Used Baseline'] = np.unique(list(PtoPTss_baseline_bus['vehicle2'][(PtoPTss_baseline_bus['personID']==row[0])&(PtoPTss_baseline_bus['planIndex']==row[1])]))[0]
#     except:
#         print('Warning', row, 'First Bus agency Used Baseline')
#     try:
#         persons_baseline.at[i,'First Bus agency Used TR'] = np.unique(list(PtoPTss_TR_bus['vehicle2'][(PtoPTss_TR_bus['personID']==row[0])&(PtoPTss_TR_bus['planIndex']==row[1])]))[0]
#     except:
#         print('Warning', row, 'First Bus agency Used TR')
#     try:
#         persons_baseline.at[i,'Trip Length TR'] = int(PtoPTss_TR_dict[row][0])
#     except:
#         print('Warning', row, 'TR_length')
#     try:
#         persons_baseline.at[i,'Trip Duration TR'] = int(PtoPTss_TR_dict[row][1])
#     except:
#         print('Warning', row, 'TR_duration')
#     try:
#         persons_baseline.at[i,'Trip Length Baseline'] = int(PtoPTss_baseline_dict[row][0])
#     except:
#         print('Warning', row, 'baseline_length')
#     try:
#         persons_baseline.at[i,'Trip Duration Baseline'] = int(PtoPTss_baseline_dict[row][1])
#     except:
#         print('Warning', row, 'baseline_duration')
#     try:
#         persons_baseline.at[i,'Diff Length'] = int(persons_baseline.at[i,'Trip Length TR']-persons_baseline.at[i,'Trip Length Baseline'])
#     except:
#         print('Warning', row, 'diff_length')
#     try:
#         persons_baseline.at[i,'Diff Duration'] = int(persons_baseline.at[i,'Trip Duration TR']-persons_baseline.at[i,'Trip Duration Baseline'])
#     except:
#         print('Warning', row, 'diff_duration')
#     try:
#         persons_baseline.at[i,'X Activity From TR'] = list(plans_TR['activityLocationX'][(plans_TR['personId']==row[0])&(plans_TR['planElementIndex']==row[1]-1)])[0]
#     except:
#         print('Warning', row, 'x_activity_from_TR')
#     try:
#         persons_baseline.at[i,'Y Activity From TR'] = list(plans_TR['activityLocationY'][(plans_TR['personId']==row[0])&(plans_TR['planElementIndex']==row[1]-1)])[0]
#     except:
#         print('Warning', row, 'y_activity_From_TR')
#     try:
#         persons_baseline.at[i,'X Activity To TR'] = list(plans_TR['activityLocationX'][(plans_TR['personId']==row[0])&(plans_TR['planElementIndex']==row[1]+1)])[0]
#     except:
#         print('Warning', row, 'x_activity_To_TR')
#     try:
#         persons_baseline.at[i,'Y Activity To TR'] = list(plans_TR['activityLocationY'][(plans_TR['personId']==row[0])&(plans_TR['planElementIndex']==row[1]+1)])[0]
#     except:
#         print('Warning', row, 'y_activity_To_TR')   
#     try:
#         persons_baseline.at[i,'X Activity From Baseline'] = list(plans_baseline['activityLocationX'][(plans_baseline['personId']==row[0])&(plans_baseline['planElementIndex']==row[1]-1)])[0]
#     except:
#         print('Warning', row, 'x_activity_from_Baseline')
#     try:
#         persons_baseline.at[i,'Y Activity From Baseline'] = list(plans_baseline['activityLocationY'][(plans_baseline['personId']==row[0])&(plans_baseline['planElementIndex']==row[1]-1)])[0]
#     except:
#         print('Warning', row, 'y_activity_From_Baseline')
#     try:
#         persons_baseline.at[i,'X Activity To Baseline'] = list(plans_baseline['activityLocationX'][(plans_baseline['personId']==row[0])&(plans_baseline['planElementIndex']==row[1]+1)])[0]
#     except:
#         print('Warning', row, 'x_activity_To_Baseline')
#     try:
#         persons_baseline.at[i,'Y Activity To Baseline'] = list(plans_baseline['activityLocationY'][(plans_baseline['personId']==row[0])&(plans_baseline['planElementIndex']==row[1]+1)])[0]
#     except:
#         print('Warning', row, 'y_activity_To_Baseline')   
#     try:
#         persons_baseline.at[i,'Vehicle Types Used Baseline'] = np.unique(list(PtoPTss_baseline['mode'][(PtoPTss_baseline['personID']==row[0])&(PtoPTss_baseline['planIndex']==row[1])]))
#     except:
#         print('Warning', row, 'Vehicle Types Used baseline')
#     try:
#         persons_baseline.at[i,'Vehicle Types Used TR'] = np.unique(list(PtoPTss_TR['mode'][(PtoPTss_TR['personID']==row[0])&(PtoPTss_TR['planIndex']==row[1])]))
#     except:
#         print('Warning', row, 'Vehicle Types Used TR')
#     try:
#         persons_baseline.at[i,'Planned Mode Baseline'] = list(plans_baseline['legMode'][(plans_baseline['personId']==row[0])&(plans_baseline['planElementIndex']==row[1])])[0]
#     except:
#         print('Warning', row, 'Baseline planned mode')
#     try:
#         persons_baseline.at[i,'Planned Mode TR'] = list(plans_TR['legMode'][(plans_TR['personId']==row[0])&(plans_TR['planElementIndex']==row[1])])[0]
#     except:
#         print('Warning', row, 'TR planned mode')
#     try:
#         plan_dep_time_baseline = list(plans_baseline['activityEndTime'][(plans_baseline['personId']==row[0])&(plans_baseline['planElementIndex']==row[1]-1)])[0]
#         persons_baseline.at[i,'Chosen Mode Baseline'] = list(mode_choice_baseline['mode'][(mode_choice_baseline['person']==row[0])&(mode_choice_baseline['time']>=plan_dep_time_baseline-1)])[0]
#     except:
#         print('Warning', row, 'mode_choice_baseline')
#     try:
#         persons_baseline.at[i,'Chosen Mode TR'] = list(mode_choice_TR['mode'][(mode_choice_TR['person']==row[0])&(mode_choice_TR['time']>=persons_baseline.at[i,'Planned Depart Time TR']-1)])[0]
#     except:
#         print('Warning', row, 'mode_choice_TR')

        
# zipcode = gpd.read_file('/Users/cpoliziani/Downloads/TAZ to ZIP/ZCTA2010.shp')
# persons_baseline = addGeometryIdToDataFrame(persons_baseline, zipcode, 'X Activity From TR', 'Y Activity From TR', 'ZIP Departure TR')
# persons_baseline = addGeometryIdToDataFrame(persons_baseline, zipcode, 'X Activity To TR', 'Y Activity To TR', 'ZIP Arrival TR')
# persons_baseline = addGeometryIdToDataFrame(persons_baseline, zipcode, 'X Activity From Baseline', 'Y Activity From Baseline', 'ZIP Departure Baseline')
# persons_baseline = addGeometryIdToDataFrame(persons_baseline, zipcode, 'X Activity To Baseline', 'Y Activity To Baseline', 'ZIP Arrival Baseline')


# #Add Switch From Column
# switch_type = []
# for agencies_baseline, project in zip(persons_baseline['Bus agencies Used Baseline'],persons_baseline['Project Tried']):
#     if len(agencies_baseline) == 0:
#         switch_type.append('Switch from another mode')
#     elif project == 'SF - Central Subway' and 'SF' in agencies_baseline:
#         switch_type.append('Switch from same transit agency')
#     elif project == 'AC - 1TEMPO' and 'AC' in agencies_baseline:
#         switch_type.append('Switch from same transit agency')
#     elif project == 'CA - Electrification Project' and 'ca' in agencies_baseline:
#         switch_type.append('Switch from same transit agency')
#     else:
#         switch_type.append('Switch from another transit agency')

# persons_baseline['Switch From'] = switch_type

# persons_baseline.to_csv('/Users/cpoliziani/Downloads/person_database.csv')


# person_trips_baseline





# persons_baseline_AC = persons_baseline[persons_baseline['Project Tried']=='AC - 1TEMPO']

# persons_baseline_AC.dropna(axis=0)

# sum(persons_baseline_AC['Diff Duration'])

# sum(persons_baseline_AC['Diff Length'])







# events = pd.read_csv('s3://beam-outputs/pilates-outputs/sfbay-baseline-20220816/beam/year-2018-iteration-5/ITERS/it.0/0.events.csv.gz', nrows = 900000)




# veh = events[['vehicle','type']].dropna()
# veh = veh[(veh['vehicle'].str.contains('rideHail'))&(veh['type']=='PathTraversal')]
# vei = []
# for ve in veh['vehicle']:
#     vei.append(ve[-10:])

# np.unique(vei, return_counts=True)

# pd.read_csv('/Users/cpoliziani/Downloads/0.skimsRidehail (1).csv.gz')












# # for row in PtoPTss_TR_CA_dict.keys():
# #     i+=1
# #     if i%1000==0:
# #         print(i)
# #     try:
# #         persons_baseline.at[i,'person'] = row[0]
# #         persons_baseline.at[i,'plan_index'] = row[1]
# #         persons_baseline.at[i,'project tried'] = 'CA - Electrification Project'
# #         persons_baseline.at[i,'TR_length'] = PtoPTss_TR_dict[row][0]
# #         persons_baseline.at[i,'TR_duration'] = PtoPTss_TR_dict[row][1]
# #         persons_baseline.at[i,'baseline_length'] = PtoPTss_baseline_dict[row][0]
# #         persons_baseline.at[i,'baseline_duration'] = PtoPTss_baseline_dict[row][1]
# #         persons_baseline.at[i,'diff_length'] = persons_baseline.at[i,'TR_length']-persons_baseline.at[i,'baseline_length']
# #         persons_baseline.at[i,'diff_duration'] = persons_baseline.at[i,'TR_duration']-persons_baseline.at[i,'baseline_duration']
# #         persons_baseline.at[i,'x_activity_TR'] = list(plans_TR['activityLocationX'][(plans_TR['personId']==row[0])&(plans_TR['planElementIndex']==row[1]-1)])[0]
# #         persons_baseline.at[i,'y_activity_TR'] = list(plans_TR['activityLocationY'][(plans_TR['personId']==row[0])&(plans_TR['planElementIndex']==row[1]-1)])[0]
# #         persons_baseline.at[i,'mode_baseline'] = list(plans_baseline['legMode'][(plans_baseline['personId']==row[0])&(plans_baseline['planElementIndex']==row[1])])[0]
# #     except:
# #         persons_baseline.at[i,'x_activity_TR'] = list(plans_TR['activityLocationX'][(plans_TR['personId']==row[0])&(plans_TR['planElementIndex']==row[1]-1)])[0]
# #         persons_baseline.at[i,'y_activity_TR'] = list(plans_TR['activityLocationY'][(plans_TR['personId']==row[0])&(plans_TR['planElementIndex']==row[1]-1)])[0]
# #         persons_baseline.at[i,'mode_baseline'] = list(plans_baseline['legMode'][(plans_baseline['personId']==row[0])&(plans_baseline['planElementIndex']==row[1])])[0]
# #         persons_baseline.at[i,'person'] = row[0]
# #         persons_baseline.at[i,'plan_index'] = row[1]
# #         persons_baseline.at[i,'project tried'] = 'CA - Electrification Project'
# #         print('WARNING:',row[0], row[1]) 


# # for row in PtoPTss_TR_AC_dict.keys():
# #     i+=1
# #     if i%1000==0:
# #         print(i)
# #     try:
# #         persons_baseline.at[i,'person'] = row[0]
# #         persons_baseline.at[i,'plan_index'] = row[1]
# #         persons_baseline.at[i,'project tried'] = 'AC - 1 Tempo BRT Line'
# #         persons_baseline.at[i,'TR_length'] = PtoPTss_TR_dict[row][0]
# #         persons_baseline.at[i,'TR_duration'] = PtoPTss_TR_dict[row][1]
# #         persons_baseline.at[i,'baseline_length'] = PtoPTss_baseline_dict[row][0]
# #         persons_baseline.at[i,'baseline_duration'] = PtoPTss_baseline_dict[row][1]
# #         persons_baseline.at[i,'diff_length'] = persons_baseline.at[i,'TR_length']-persons_baseline.at[i,'baseline_length']
# #         persons_baseline.at[i,'diff_duration'] = persons_baseline.at[i,'TR_duration']-persons_baseline.at[i,'baseline_duration']
# #         persons_baseline.at[i,'x_activity_TR'] = list(plans_TR['activityLocationX'][(plans_TR['personId']==row[0])&(plans_TR['planElementIndex']==row[1]-1)])[0]
# #         persons_baseline.at[i,'y_activity_TR'] = list(plans_TR['activityLocationY'][(plans_TR['personId']==row[0])&(plans_TR['planElementIndex']==row[1]-1)])[0]
# #         persons_baseline.at[i,'mode_baseline'] = list(plans_baseline['legMode'][(plans_baseline['personId']==row[0])&(plans_baseline['planElementIndex']==row[1])])[0]
# #     except:
# #         persons_baseline.at[i,'x_activity_TR'] = list(plans_TR['activityLocationX'][(plans_TR['personId']==row[0])&(plans_TR['planElementIndex']==row[1]-1)])[0]
# #         persons_baseline.at[i,'y_activity_TR'] = list(plans_TR['activityLocationY'][(plans_TR['personId']==row[0])&(plans_TR['planElementIndex']==row[1]-1)])[0]
# #         persons_baseline.at[i,'mode_baseline'] = list(plans_baseline['legMode'][(plans_baseline['personId']==row[0])&(plans_baseline['planElementIndex']==row[1])])[0]
# #         persons_baseline.at[i,'person'] = row[0]
# #         persons_baseline.at[i,'plan_index'] = row[1]
# #         persons_baseline.at[i,'project tried'] = 'AC - 1 Tempo BRT Line'
# #         print('WARNING:',row[0], row[1])




# ##############
# # trips_TR = pd.concat([line_CA_trips_TR['trip_id'],line_CS_trips_TR['trip_id'],line_1T_trips_TR['trip_id']])
# # for vehicle, person, plan_index, vehicle2 in zip(PtoPTss['vehicleID'],PtoPTss['personID'],PtoPTss['planIndex'],PtoPTss['vehicle2'] ):
# #     if int(vehicle.split(':')[1]) in list(trips_TR):
# #         i+=1
# #         if i%1000==0:
# #             print(i)
# #         try:

# #             persons_baseline.at[i,'person'] = person
# #             persons_baseline.at[i,'plan_index'] = plan_index
# #             persons_baseline.at[i,'mode_baseline'] = list(plans_baseline['legMode'][(plans_baseline['personId']==person)&(plans_baseline['planElementIndex']==plan_index)])[0]
# #             persons_baseline.at[i,'x_activity_TR'] = list(plans_TR['activityLocationX'][(plans_TR['personId']==person)&(plans_TR['planElementIndex']==plan_index-1)])[0]
# #             persons_baseline.at[i,'y_activity_TR'] = list(plans_TR['activityLocationY'][(plans_TR['personId']==person)&(plans_TR['planElementIndex']==plan_index-1)])[0]
# #             persons_baseline.at[i,'vehicle_now'] = vehicle
# #             if vehicle2 == 'SF':
# #                 persons_baseline.at[i,'project'] = 'CS'
# #             if vehicle2 == 'Ca':
# #                 persons_baseline.at[i,'project'] = 'CA'
# #             if vehicle2 == 'AC':
# #                 persons_baseline.at[i,'project'] = '1T'

# #             persons_baseline.at[i,'vehicle_baseline'] = list(PtoPTss_baseline['vehicleID'][(PtoPTss_baseline['personID']==person)&(PtoPTss_baseline['planIndex']==plan_index)])[0]
# #             persons_baseline.at[i,'agency_baseline'] = list(PtoPTss_baseline['vehicle2'][(PtoPTss_baseline['personID']==person)&(PtoPTss_baseline['planIndex']==plan_index)])[0]
# #             persons_baseline.at[i,'length_baseline'] = list(PtoPTss_baseline['length'][(PtoPTss_baseline['personID']==person)&(PtoPTss_baseline['planIndex']==plan_index)])[0]
# #             persons_baseline.at[i,'length_TR'] = list(PtoPTss_TR['length'][(PtoPTss_baseline['personID']==person)&(PtoPTss_baseline['planIndex']==plan_index)])[0]
# #             persons_baseline.at[i,'duration_baseline'] = list(PtoPTss_baseline['duration'][(PtoPTss_baseline['personID']==person)&(PtoPTss_baseline['planIndex']==plan_index)])[0]
# #             persons_baseline.at[i,'duration_TR'] = list(PtoPTss_TR['duration'][(PtoPTss_baseline['personID']==person)&(PtoPTss_baseline['planIndex']==plan_index)])[0]
# #             persons_baseline.at[i,'speed_baseline'] = persons_baseline.loc[i,'length_baseline']/persons_baseline.loc[i,'duration_baseline']
# #             persons_baseline.at[i,'speed_TR'] = persons_baseline.loc[i,'length_TR']/persons_baseline.loc[i,'duration_TR']
# #             persons_baseline.at[i,'diff_length'] = persons_baseline.loc[i,'length_TR']-persons_baseline.loc[i,'length_baseline'] 
# #             persons_baseline.at[i,'diff_duration'] = persons_baseline.loc[i,'duration_TR']-persons_baseline.loc[i,'duration_baseline'] 
# #             persons_baseline.at[i,'diff_speed'] = persons_baseline.loc[i,'speed_TR']-persons_baseline.loc[i,'speed_baseline'] 
# #         except:
# #             persons_baseline.at[i,'person'] = person
# #             persons_baseline.at[i,'plan_index'] = plan_index
# #             persons_baseline.at[i,'vehicle_now'] = vehicle
# #             if vehicle2 == 'SF':
# #                 persons_baseline.at[i,'project'] = 'CS'
# #             if vehicle2 == 'Ca':
# #                 persons_baseline.at[i,'project'] = 'CA'
# #             if vehicle2 == 'AC':
# #                 persons_baseline.at[i,'project'] = '1T'

# #             print('PtoPTss_baseline not found person and plan index',person,plan_index, vehicle)
# #     print('Warning')




# # persons_baseline = pd.DataFrame(['person','plan_index','mode_baseline','vehicle_baseline','agency_baseline',
# #                                  'x_activity_TR','y_activity_TR','vehicle_now'])
# # i = 0
# # PtoPTss=pd.concat(PtoPTss_AC_TR,PtoPTss_SF_TR,PtoPTss_CA_TR)
# # for vehicle, person, plan_index in zip(PtoPTss['vehicleID'],PtoPTss['personID'],PtoPTss['planIndex'] ):
# #     if int(vehicle.split(':')[1]) in list(line_1T_trips_TR):
# #         i+=1
# #         persons_baseline.loc[i,'person'] = person
# #         persons_baseline.loc[i,'plan_index'] = plan_index
# #         persons_baseline.loc[i,'mode_baseline'] = list(plans_baseline['legMode'][(plans_baseline['personId']==person)&(plans_baseline['planElementIndex']==plan_index)])[0]
# #         persons_baseline.loc[i,'x_activity_TR'] = list(plans_TR['activityLocationX'][(plans_TR['personId']==person)&(plans_TR['planElementIndex']==plan_index-1)])[0]
# #         persons_baseline.loc[i,'y_activity_TR'] = list(plans_TR['activityLocationY'][(plans_TR['personId']==person)&(plans_TR['planElementIndex']==plan_index-1)])[0]
# #         persons_baseline.loc[i,'vehicle_now'] = '1T'
# #         try:
# #             persons_baseline.loc[i,'vehicle_baseline'] = list(PtoPTss_baseline['vehicleID'][(PtoPTss_baseline['personID']==person)&(PtoPTss_baseline['planIndex']==plan_index)])[0]
# #             persons_baseline.loc[i,'agency_baseline'] = list(PtoPTss_baseline['vehicle2'][(PtoPTss_baseline['personID']==person)&(PtoPTss_baseline['planIndex']==plan_index)])[0]
# #             persons_baseline.loc[i,'length_baseline'] = list(PtoPTss_baseline['length'][(PtoPTss_baseline['personID']==person)&(PtoPTss_baseline['planIndex']==plan_index)])[0]
# #             persons_baseline.loc[i,'length_TR'] = list(PtoPTss_TR['length'][(PtoPTss_baseline['personID']==person)&(PtoPTss_baseline['planIndex']==plan_index)])[0]
# #             persons_baseline.loc[i,'duration_baseline'] = list(PtoPTss_baseline['duration'][(PtoPTss_baseline['personID']==person)&(PtoPTss_baseline['planIndex']==plan_index)])[0]
# #             persons_baseline.loc[i,'duration_TR'] = list(PtoPTss_TR['duration'][(PtoPTss_baseline['personID']==person)&(PtoPTss_baseline['planIndex']==plan_index)])[0]
# #             persons_baseline.loc[i,'speed_baseline'] = persons_baseline.loc[i,'length_baseline']/persons_baseline.loc[i,'duration_baseline']
# #             persons_baseline.loc[i,'speed_TR'] = persons_baseline.loc[i,'length_TR']/persons_baseline.loc[i,'duration_TR']
# #             persons_baseline.loc[i,'diff_length'] = persons_baseline.loc[i,'length_TR']-persons_baseline.loc[i,'length_baseline'] 
# #             persons_baseline.loc[i,'diff_duration'] = persons_baseline.loc[i,'duration_TR']-persons_baseline.loc[i,'duration_baseline'] 
# #             persons_baseline.loc[i,'speed_length'] = persons_baseline.loc[i,'speed_TR']-persons_baseline.loc[i,'speed_baseline'] 
# #         except:
# #             persons_baseline.loc[i,'vehicle_baseline'] = None
# #             persons_baseline.loc[i,'agency_baseline'] = None
# #             print('PtoPTss_baseline not found person and plan index',person,plan_index, vehicle)


# # persons_baseline.to_csv('/Users/cpoliziani/Downloads/person_database.csv')

# # PtoPTss_baseline


# # persons_baseline = pd.DataFrame()
# # i = 0
# # PtoPTss=pd.concat([PtoPTss_AC_TR,PtoPTss_SF_TR,PtoPTss_CA_TR])
# # for vehicle, person, plan_index, vehicle2 in zip(PtoPTss['vehicleID'],PtoPTss['personID'],PtoPTss['planIndex'],PtoPTss['vehicle2'] ):
# #     if int(vehicle.split(':')[1]) in list(line_1T_trips_TR):
# #         i+=1

# #             persons_baseline.at[i,'person'] = person
# #             persons_baseline.at[i,'plan_index'] = plan_index
# #             persons_baseline.at[i,'mode_baseline'] = list(plans_baseline['legMode'][(plans_baseline['personId']==person)&(plans_baseline['planElementIndex']==plan_index)])[0]
# #             persons_baseline.at[i,'x_activity_TR'] = list(plans_TR['activityLocationX'][(plans_TR['personId']==person)&(plans_TR['planElementIndex']==plan_index-1)])[0]
# #             persons_baseline.at[i,'y_activity_TR'] = list(plans_TR['activityLocationY'][(plans_TR['personId']==person)&(plans_TR['planElementIndex']==plan_index-1)])[0]
# #             persons_baseline.at[i,'vehicle_now'] = vehicle
# #             if vehicle2 == 'SF':
# #                 persons_baseline.at[i,'project'] = 'CS'
# #             if vehicle2 == 'Ca':
# #                 persons_baseline.at[i,'project'] = 'CA'
# #             if vehicle2 == 'AC':
# #                 persons_baseline.at[i,'project'] = '1T'
# #         try:

# #             persons_baseline.at[i,'vehicle_baseline'] = list(PtoPTss_baseline['vehicleID'][(PtoPTss_baseline['personID']==person)&(PtoPTss_baseline['planIndex']==plan_index)])[0]
# #             persons_baseline.at[i,'agency_baseline'] = list(PtoPTss_baseline['vehicle2'][(PtoPTss_baseline['personID']==person)&(PtoPTss_baseline['planIndex']==plan_index)])[0]
# #             persons_baseline.at[i,'length_baseline'] = list(PtoPTss_baseline['length'][(PtoPTss_baseline['personID']==person)&(PtoPTss_baseline['planIndex']==plan_index)])[0]
# #             persons_baseline.at[i,'length_TR'] = list(PtoPTss_TR['length'][(PtoPTss_baseline['personID']==person)&(PtoPTss_baseline['planIndex']==plan_index)])[0]
# #             persons_baseline.at[i,'duration_baseline'] = list(PtoPTss_baseline['duration'][(PtoPTss_baseline['personID']==person)&(PtoPTss_baseline['planIndex']==plan_index)])[0]
# #             persons_baseline.at[i,'duration_TR'] = list(PtoPTss_TR['duration'][(PtoPTss_baseline['personID']==person)&(PtoPTss_baseline['planIndex']==plan_index)])[0]
# #             persons_baseline.at[i,'speed_baseline'] = persons_baseline.loc[i,'length_baseline']/persons_baseline.loc[i,'duration_baseline']
# #             persons_baseline.at[i,'speed_TR'] = persons_baseline.loc[i,'length_TR']/persons_baseline.loc[i,'duration_TR']
# #             persons_baseline.at[i,'diff_length'] = persons_baseline.loc[i,'length_TR']-persons_baseline.loc[i,'length_baseline'] 
# #             persons_baseline.at[i,'diff_duration'] = persons_baseline.loc[i,'duration_TR']-persons_baseline.loc[i,'duration_baseline'] 
# #             persons_baseline.at[i,'diff_speed'] = persons_baseline.loc[i,'speed_TR']-persons_baseline.loc[i,'speed_baseline'] 
# #         except:
# #             persons_baseline.at[i,'person'] = person
# #             persons_baseline.at[i,'plan_index'] = plan_index
# #             persons_baseline.at[i,'vehicle_now'] = vehicle
# #             if vehicle2 == 'SF':
# #                 persons_baseline.at[i,'project'] = 'CS'
# #             if vehicle2 == 'Ca':
# #                 persons_baseline.at[i,'project'] = 'CA'
# #             if vehicle2 == 'AC':
# #                 persons_baseline.at[i,'project'] = '1T'

# #             print('PtoPTss_baseline not found person and plan index',person,plan_index, vehicle)


# # persons_baseline.to_csv('/Users/cpoliziani/Downloads/person_database.csv')








# LIRR_capacities_apr2020 = pd.read_csv('/Users/cpoliziani/Downloads/Data/EPI/Capacities/apr2020LIRR.csv')
# LIRR_capacities_aug2020 = pd.read_csv('/Users/cpoliziani/Downloads/Data/EPI/Capacities/aug2020LIRR.csv')
# LIRR_capacities_aug2021 = pd.read_csv('/Users/cpoliziani/Downloads/Data/EPI/Capacities/aug2021LIRR.csv')
# LIRR_capacities_jan2022 = pd.read_csv('/Users/cpoliziani/Downloads/Data/EPI/Capacities/jan2022LIRR.csv')
# LIRR_capacities_may2022 = pd.read_csv('/Users/cpoliziani/Downloads/Data/EPI/Capacities/apr2022LIRR.csv')

# MNR_capacities_apr2020 = pd.read_csv('/Users/cpoliziani/Downloads/Data/EPI/Capacities/apr2020MNR.csv')
# MNR_capacities_aug2020 = pd.read_csv('/Users/cpoliziani/Downloads/Data/EPI/Capacities/aug2020MNR.csv')
# MNR_capacities_aug2021 = pd.read_csv('/Users/cpoliziani/Downloads/Data/EPI/Capacities/aug2021MNR.csv')
# MNR_capacities_jan2022 = pd.read_csv('/Users/cpoliziani/Downloads/Data/EPI/Capacities/jan2022MNR.csv')
# MNR_capacities_may2022 = pd.read_csv('/Users/cpoliziani/Downloads/Data/EPI/Capacities/apr2022MNR.csv')

# # LIRR_capacities_apr2020 = pd.read_csv('/Users/cpoliziani/Downloads/LIRR_capacities.csv')
# # MNR_capacities = MNR_capacities.groupby(['Train']).apply(lambda x: [list(x['Total Seat'])]).to_dict()
# # LIRR_capacities = LIRR_capacities.groupby(['Train']).apply(lambda x: [list(x['Total Seats'])]).to_dict()


# capacities_data = [LIRR_capacities_apr2020,
#         LIRR_capacities_aug2020,
#         LIRR_capacities_aug2021,
#         LIRR_capacities_jan2022,
#         LIRR_capacities_may2022,
#         MNR_capacities_apr2020,
#         MNR_capacities_aug2020,
#         MNR_capacities_aug2021,
#         MNR_capacities_jan2022,
#         MNR_capacities_may2022,
#         ]

# for capacity_data in capacities_data:
#     trains = []
#     for train in capacity_data['Train']:
#         try:
#             trains.append(int(train))
#         except:
#             trains.append(train)
#     capacity_data['Train'] = trains
    


# LIRR_capacities_apr2020 = LIRR_capacities_apr2020.groupby(['Train']).apply(lambda x: [list(x['Total Seats'])]).to_dict()
# LIRR_capacities_aug2020 = LIRR_capacities_aug2020.groupby(['Train']).apply(lambda x: [list(x['Total Seats'])]).to_dict()
# LIRR_capacities_aug2021 = LIRR_capacities_aug2021.groupby(['Train']).apply(lambda x: [list(x['Total Seats'])]).to_dict()
# LIRR_capacities_jan2022 = LIRR_capacities_jan2022.groupby(['Train']).apply(lambda x: [list(x['Total Seats'])]).to_dict()
# LIRR_capacities_may2022 = LIRR_capacities_may2022.groupby(['Train']).apply(lambda x: [list(x['Total Seats'])]).to_dict()

# MNR_capacities_apr2020 = MNR_capacities_apr2020.groupby(['Train']).apply(lambda x: [list(x['Total Seat'])]).to_dict()
# MNR_capacities_aug2020 = MNR_capacities_aug2020.groupby(['Train']).apply(lambda x: [list(x['Total Seat'])]).to_dict()
# MNR_capacities_aug2021 = MNR_capacities_aug2021.groupby(['Train']).apply(lambda x: [list(x['Total Seat'])]).to_dict()
# MNR_capacities_jan2022 = MNR_capacities_jan2022.groupby(['Train']).apply(lambda x: [list(x['Total Seat'])]).to_dict()
# MNR_capacities_may2022 = MNR_capacities_may2022.groupby(['Train']).apply(lambda x: [list(x['Total Seat'])]).to_dict()


# LIRR_trips_apr2020 = pd.read_csv('/Users/cpoliziani/Downloads/Data/EPI/Capacities/r5-prod-1april2020/Long_Island_Rail_20200318/trips.txt')
# LIRR_trips_aug2020 = pd.read_csv('/Users/cpoliziani/Downloads/Data/EPI/Capacities/r5-prod-5august2020/Long_Island_Rail_20200629/trips.txt')
# LIRR_trips_aug2021 = pd.read_csv('/Users/cpoliziani/Downloads/Data/EPI/Capacities/r5-prod-4august2021/Long_Island_Rail_20210726/trips.txt')
# LIRR_trips_jan2022 = pd.read_csv('/Users/cpoliziani/Downloads/Data/EPI/Capacities/r5-prod-5january2022/Long_Island_Rail_20211216/trips.txt')
# LIRR_trips_may2022 = pd.read_csv('/Users/cpoliziani/Downloads/Data/EPI/Capacities/r5-prod-11may2022/Long_Island_Rail_20220430/trips.txt')

# LIRR_trips_apr2020['agency_id']='LI'
# LIRR_trips_aug2020['agency_id']='LI'
# LIRR_trips_aug2021['agency_id']='LI'
# LIRR_trips_jan2022['agency_id']='LI'
# LIRR_trips_may2022['agency_id']='LI'

# MNR_trips_apr2020 = pd.read_csv('/Users/cpoliziani/Downloads/Data/EPI/Capacities/r5-prod-1april2020/Metro-North_Railroad_20200325/trips.txt')
# MNR_trips_aug2020 = pd.read_csv('/Users/cpoliziani/Downloads/Data/EPI/Capacities/r5-prod-5august2020/Metro-North_Railroad_20200731/trips.txt')
# MNR_trips_aug2021 = pd.read_csv('/Users/cpoliziani/Downloads/Data/EPI/Capacities/r5-prod-4august2021/Metro-North_Railroad_20210721/trips.txt')
# MNR_trips_jan2022 = pd.read_csv('/Users/cpoliziani/Downloads/Data/EPI/Capacities/r5-prod-5january2022/Metro-North_Railroad_20211222/trips.txt')
# MNR_trips_may2022 = pd.read_csv('/Users/cpoliziani/Downloads/Data/EPI/Capacities/r5-prod-11may2022/Metro-North_Railroad_20220429/trips.txt')

# MNR_trips_apr2020['agency_id']='1'
# MNR_trips_aug2020['agency_id']='1'
# MNR_trips_aug2021['agency_id']='1'
# MNR_trips_jan2022['agency_id']='1'
# MNR_trips_may2022['agency_id']='1'

# # MNR_routes_apr2020 = pd.read_csv('/Users/cpoliziani/Downloads/Data/EPI/Capacities/r5-prod-1april2020/Metro-North_Railroad_20200325/routes.txt')
# # MNR_routes_aug2020 = pd.read_csv('/Users/cpoliziani/Downloads/Data/EPI/Capacities/r5-prod-5august2020/Metro-North_Railroad_20200731/routes.txt')
# # MNR_routes_aug2021 = pd.read_csv('/Users/cpoliziani/Downloads/Data/EPI/Capacities/r5-prod-4august2021/Metro-North_Railroad_20210721/routes.txt')
# # MNR_routes_jan2022 = pd.read_csv('/Users/cpoliziani/Downloads/Data/EPI/Capacities/r5-prod-5january2022/Metro-North_Railroad_20211222/routes.txt')
# # MNR_routes_may2022 = pd.read_csv('/Users/cpoliziani/Downloads/Data/EPI/Capacities/r5-prod-11may2022/Metro-North_Railroad_20220429/routes.txt')

# # trips_data_MNR = [MNR_trips_apr2020,
# #         MNR_trips_aug2020,
# #         MNR_trips_aug2021,
# #         MNR_trips_jan2022,
# #         MNR_trips_may2022,
# #         ]

# # routes_data_MNR = [MNR_routes_apr2020,
# #         MNR_routes_aug2020,
# #         MNR_routes_aug2021,
# #         MNR_routes_jan2022,
# #         MNR_routes_may2022,
# #         ]

# # for trip_data_MNR, route_data_MNR, i  in zip(trips_data_MNR,routes_data_MNR, range(len(trips_data))):
# #     agencies =[]
# #     for route in trip_data_MNR['route_id']:
# #         agencies.append(list(route_data_MNR['agency_id'][route_data_MNR['route_id']==route])[0])
# #     trip_data_MNR['agency_id']=agencies


# # MNR_trips_apr2020 = trips_data_MNR[0]
# # MNR_trips_aug2020 = trips_data_MNR[1]
# # MNR_trips_aug2021 = trips_data_MNR[2]
# # MNR_trips_jan2022 = trips_data_MNR[3]
# # MNR_trips_may2022 = trips_data_MNR[4]

# trips_data = [LIRR_trips_apr2020,
#         LIRR_trips_aug2020,
#         LIRR_trips_aug2021,
#         LIRR_trips_jan2022,
#         LIRR_trips_may2022,
#         MNR_trips_apr2020,
#         MNR_trips_aug2020,
#         MNR_trips_aug2021,
#         MNR_trips_jan2022,
#         MNR_trips_may2022,
#         ]

# capacities_data = [LIRR_capacities_apr2020,
#         LIRR_capacities_aug2020,
#         LIRR_capacities_aug2021,
#         LIRR_capacities_jan2022,
#         LIRR_capacities_may2022,
#         MNR_capacities_apr2020,
#         MNR_capacities_aug2020,
#         MNR_capacities_aug2021,
#         MNR_capacities_jan2022,
#         MNR_capacities_may2022,
#         ]

# for trip_data, capacity_data in zip(trips_data, capacities_data):

#     capacities = []
#     wrong_ids = []
#     print('number of trip', len(trip_data),'number of capacities', len(capacity_data))
#     print('number of unique trip', len(np.unique(trip_data['trip_short_name'])))
#     for trip_short_name in trip_data['trip_short_name']:
#         try:
#             capacities.append(capacity_data[trip_short_name][0][0])
#         except:
#             try:
#                 capacities.append(capacity_data[float(trip_short_name)][0][0])
#             except:
#                 wrong_ids.append(trip_short_name)
#                 capacities.append(np.nan)
#     trip_data['capacity'] = capacities
#     print('Warning!! Not found',len(wrong_ids))
# #     print(trip_data)
#     print('######################')



# for trip_data in trips_data:
#     max_cap = max(list(trip_data['capacity'].dropna()))
#     print(max_cap)
#     trip_data['capacity'] = trip_data['capacity'].fillna(max_cap)




# trips_data_apr2020 = [trips_data[0],trips_data[5]]
# trips_data_aug2020 = [trips_data[1],trips_data[6]]
# trips_data_aug2021 = [trips_data[2],trips_data[7]]
# trips_data_jan2022 = [trips_data[3],trips_data[8]]
# trips_data_may2022 = [trips_data[4],trips_data[9]]

# transitVehicleTypesbyTrip = pd.DataFrame(columns = ['agencyId',
#                                            'routeId',
#                                            'tripId',
#                                            'capacity',
#                                             'vehicleTypeId',
#                                           ])
# i=0      
# for trip_data, j in zip(trips_data_apr2020,[0,1]):
#     for agencyID, tripID, routeID, capacity in zip(trip_data['agency_id'], trip_data['trip_id'],trip_data['route_id'],trip_data['capacity']):
#         i+=1
#         if i%5000==0:
#             print(i)
#         transitVehicleTypesbyTrip.at[i,'agencyId'] = agencyID
#         transitVehicleTypesbyTrip.at[i,'routeId'] = routeID
#         if j == 0:
#             transitVehicleTypesbyTrip.at[i,'tripId'] = 'Long_Island_Rail_20200318:'+str(tripID)
#         elif j == 1:
#             transitVehicleTypesbyTrip.at[i,'tripId'] = 'Metro-North_Railroad_20200325:'+str(tripID)
#         transitVehicleTypesbyTrip.at[i,'capacity'] = capacity
#         transitVehicleTypesbyTrip.at[i,'vehicleTypeId'] = 'RAIL-DEFAULT'

# transitVehicleTypesbyTrip.to_csv('/Users/cpoliziani/Downloads/Data/EPI/Capacities/transitVehicleTypesbyTrip_apr2020.csv')


# transitVehicleTypesbyTrip = pd.DataFrame(columns = ['agencyId',
#                                            'routeId',
#                                            'tripId',
#                                            'capacity',
#                                             'vehicleTypeId',
#                                           ])
# i=0      
# for trip_data, j in zip(trips_data_aug2020,[0,1]):
#     for agencyID, tripID, routeID, capacity in zip(trip_data['agency_id'], trip_data['trip_id'],trip_data['route_id'],trip_data['capacity']):
#         i+=1
#         if i%5000==0:
#             print(i)
#         transitVehicleTypesbyTrip.at[i,'agencyId'] = agencyID
#         transitVehicleTypesbyTrip.at[i,'routeId'] = routeID
#         if j == 0:
#             transitVehicleTypesbyTrip.at[i,'tripId'] = 'Long_Island_Rail_20200629:'+str(tripID)
#         elif j == 1:
#             transitVehicleTypesbyTrip.at[i,'tripId'] = 'Metro-North_Railroad_20200731:'+str(tripID)
#         transitVehicleTypesbyTrip.at[i,'capacity'] = capacity
#         transitVehicleTypesbyTrip.at[i,'vehicleTypeId'] = 'RAIL-DEFAULT'

# transitVehicleTypesbyTrip.to_csv('/Users/cpoliziani/Downloads/Data/EPI/Capacities/transitVehicleTypesbyTrip_aug2020.csv')



# transitVehicleTypesbyTrip = pd.DataFrame(columns = ['agencyId',
#                                            'routeId',
#                                            'tripId',
#                                            'capacity',
#                                             'vehicleTypeId',
#                                           ])
# i=0      
# for trip_data, j in zip(trips_data_aug2021,[0,1]):
#     for agencyID, tripID, routeID, capacity in zip(trip_data['agency_id'], trip_data['trip_id'],trip_data['route_id'],trip_data['capacity']):
#         i+=1
#         if i%5000==0:
#             print(i)
#         transitVehicleTypesbyTrip.at[i,'agencyId'] = agencyID
#         transitVehicleTypesbyTrip.at[i,'routeId'] = routeID
#         if j == 0:
#             transitVehicleTypesbyTrip.at[i,'tripId'] = 'Long_Island_Rail_20210726:'+str(tripID)
#         elif j == 1:
#             transitVehicleTypesbyTrip.at[i,'tripId'] = 'Metro-North_Railroad_20210721:'+str(tripID)
#         transitVehicleTypesbyTrip.at[i,'capacity'] = capacity
#         transitVehicleTypesbyTrip.at[i,'vehicleTypeId'] = 'RAIL-DEFAULT'

# transitVehicleTypesbyTrip.to_csv('/Users/cpoliziani/Downloads/Data/EPI/Capacities/transitVehicleTypesbyTrip_aug2021.csv')



# transitVehicleTypesbyTrip = pd.DataFrame(columns = ['agencyId',
#                                            'routeId',
#                                            'tripId',
#                                            'capacity',
#                                             'vehicleTypeId',
#                                           ])
# i=0      
# for trip_data, j in zip(trips_data_jan2022,[0,1]):
#     for agencyID, tripID, routeID, capacity in zip(trip_data['agency_id'], trip_data['trip_id'],trip_data['route_id'],trip_data['capacity']):
#         i+=1
#         if i%5000==0:
#             print(i)
#         transitVehicleTypesbyTrip.at[i,'agencyId'] = agencyID
#         transitVehicleTypesbyTrip.at[i,'routeId'] = routeID
#         if j == 0:
#             transitVehicleTypesbyTrip.at[i,'tripId'] = 'Long_Island_Rail_20211216:'+str(tripID)
#         elif j == 1:
#             transitVehicleTypesbyTrip.at[i,'tripId'] = 'Metro-North_Railroad_20211222:'+str(tripID)
#         transitVehicleTypesbyTrip.at[i,'capacity'] = capacity
#         transitVehicleTypesbyTrip.at[i,'vehicleTypeId'] = 'RAIL-DEFAULT'

# transitVehicleTypesbyTrip.to_csv('/Users/cpoliziani/Downloads/Data/EPI/Capacities/transitVehicleTypesbyTrip_jan2022.csv')



# transitVehicleTypesbyTrip = pd.DataFrame(columns = ['agencyId',
#                                            'routeId',
#                                            'tripId',
#                                             'capacity',
#                                            'vehicleTypeId',
#                                           ])
# i=0      
# for trip_data, j in zip(trips_data_may2022,[0,1]):
#     for agencyID, tripID, routeID, capacity in zip(trip_data['agency_id'], trip_data['trip_id'],trip_data['route_id'],trip_data['capacity']):
#         i+=1
#         if i%5000==0:
#             print(i)
#         transitVehicleTypesbyTrip.at[i,'agencyId'] = agencyID
#         transitVehicleTypesbyTrip.at[i,'routeId'] = routeID
#         if j == 0:
#             transitVehicleTypesbyTrip.at[i,'tripId'] = 'Long_Island_Rail_20220430:'+str(tripID)
#         elif j == 1:
#             transitVehicleTypesbyTrip.at[i,'tripId'] = 'Metro-North_Railroad_20220429:'+str(tripID)
#         transitVehicleTypesbyTrip.at[i,'capacity'] = capacity
#         transitVehicleTypesbyTrip.at[i,'vehicleTypeId'] = 'RAIL-DEFAULT'

# transitVehicleTypesbyTrip.to_csv('/Users/cpoliziani/Downloads/Data/EPI/Capacities/transitVehicleTypesbyTrip_may2022.csv')



# trips_data_apr2020[1]

# pd.read_csv('s3://beam-outputs/output/newyork/new-york-jan2022-0-of-10__2022-09-21_17-07-49_qdx/ITERS/it.10/10.events.csv.gz', nrows = 90000)




