In [13]:
#Imports
import numpy as np
import pandas as pd
import geopandas as gpd
import difflib
import matplotlib.pyplot as plt
import time
import zipfile

def  processEvents(dataFilepath_sim, nrows):
    PTs = []
    PEVs = []
    print('read', dataFilepath_sim)
    for chunk in pd.read_csv(dataFilepath_sim, compression = 'gzip',chunksize=2500000, nrows = nrows, usecols = ['vehicle','time','endY','endX','startY','startX','mode', 'length', 'type', 'person']):
        if sum((chunk['type'] == 'PathTraversal')) > 0:
            chunk['vehicle'] = chunk['vehicle'].astype(str)
            PT = chunk.loc[(chunk['type'] == 'PathTraversal') & (chunk['length'] > 0)].dropna(how='all', axis=1)

            PTs.append(PT)
            print(chunk.type.value_counts())
            PEV = chunk.loc[(chunk.type == "PersonEntersVehicle") &
                            ~(chunk['person'].apply(str).str.contains('Agent').fillna(False)) &
                            ~(chunk['vehicle'].str.contains('body').fillna(False)), :].dropna(how='all', axis=1)
            if ~PEV.empty:
                PEV['time'] = PEV['time'].astype(int)
                PEVs.append(PEV)

    Events_file_sim_PT = pd.concat(PTs)[['vehicle','time','endY','endX','startY','startX','mode']]
    Events_file_sim_PE = pd.concat(PEVs)[['vehicle','time',]]
    print(Events_file_sim_PE)

    return Events_file_sim_PT, Events_file_sim_PE



#Filter transit trips
def filter_transit(Events_file_sim_PT,Events_file_sim_PE):
    print(Events_file_sim_PT['mode'].value_counts())
    Events_file_sim_PT = Events_file_sim_PT[(Events_file_sim_PT['mode'] == 'bus')|
                                            (Events_file_sim_PT['mode'] == 'tram')|
                                            (Events_file_sim_PT['mode'] == 'subway')|
                                            (Events_file_sim_PT['mode'] == 'cable_car')|
                                            (Events_file_sim_PT['mode'] == 'ferry')|
                                            (Events_file_sim_PT['mode'] == 'rail')]
    print(Events_file_sim_PT['mode'].value_counts())
    Events_file_sim_PE = Events_file_sim_PE[Events_file_sim_PE['vehicle'].isin(Events_file_sim_PT['vehicle'])]
    
    return Events_file_sim_PE, Events_file_sim_PT

def guess_agency(Events_file_sim_PE):

    agencies = []
    for vehicleID in Events_file_sim_PE['vehicle']:
        agency = vehicleID.split(':')[0]
        if agency == 'petalumatransit-petaluma-ca-us':
            agencies.append('PE')
        elif agency == 'westcat-ca-us':
            agencies.append('WC')
        elif agency == 'caltrain-ca-us':
            agencies.append('CA')
        elif agency == 'riovista-ca-us':
            agencies.append('RV')
        elif agency == 'unioncity-ca-us':
            agencies.append('UC')
        else:
            if len(agency) == 2:
                agencies.append(agency)
            elif agency == 'Caltrain':
                agencies.append('CA')
            else:
                print('Warning, this agency is not recognized:', agency)
    Events_file_sim_PE['agency'] = agencies

    print('AGENCIES',np.unique(agencies))
    
    return Events_file_sim_PE

def guess_route(Events_file_sim_PE, GTFS_filepaths):

    GTFS_trip_files = {}

    for GTFS_filepath, GTFS in zip(GTFS_filepaths,GTFSs):
        with zipfile.ZipFile(GTFS_filepath, 'r') as zip_ref:
            trip_file = pd.read_csv(zip_ref.open('trips.txt'))
            trip_to_route = dict(zip(trip_file['trip_id'].astype(str), trip_file['route_id'].astype(str)))
            GTFS_trip_files[str(GTFS)] = trip_to_route

    route_ids = []
    total_routes = len(Events_file_sim_PE['vehicle'])
    i = 0
    time_start = time.time()
    for vehicle, agency in zip(Events_file_sim_PE['vehicle'],Events_file_sim_PE['agency']):
        i+=1
        if i%10000 ==0:
            print(i,'/',total_routes,'. Time = ', time.time()-time_start, '. Estimated remaining time:', (time.time()-time_start)/i*total_routes-(time.time()-time_start))
        if agency == 'SM':
            try:
                route_ids.append(agency+':'+str(GTFS_trip_files[agency][str(vehicle).split(':')[1]+'|'
                         +str(vehicle).split(':')[2]+':'
                         +str(vehicle).split(':')[3]+'|'
                         +str(vehicle).split(':')[4]+':'
                         +str(vehicle).split(':')[5]+':'
                         +str(vehicle).split(':')[6]]))
            except:
                try:
                    route_ids.append(agency+':'+str(GTFS_trip_files[agency][str(vehicle).split(':')[1]+'|'
                             +str(vehicle).split(':')[2]+':'
                             +str(vehicle).split(':')[3]+'|'
                             +str(vehicle).split(':')[4]+':'
                             +str(vehicle).split(':')[5]]))
                except:
                    print('Warning, trip non found for vehicle', vehicle)
        else:
            try:
                route_ids.append(agency+':'+str(GTFS_trip_files[agency][vehicle.split(':')[1]]))
            except:
                print('Warning, trip non found for vehicle', vehicle)
                route_ids.append('tripID not found')

    Events_file_sim_PE['route_id'] = route_ids
    
    return Events_file_sim_PE


def addGeometryIdToDataFrame(df, gdf, xcol, ycol, idColumn="geometry", df_geom='epsg:32610'): 
    gdf.set_crs(epsg = "3310", inplace = True)
    gdf_data = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df[xcol], df[ycol]))
    gdf_data.set_crs(epsg = "32610", inplace = True)
    joined = gpd.sjoin(gdf_data.to_crs('epsg:26910'), gdf.to_crs('epsg:26910'))
    gdf_data = gdf_data.merge(joined['ZCTA'], left_index=True, right_index=True, how="left")
    gdf_data.rename(columns={'ZCTA': idColumn}, inplace=True)
    df = pd.DataFrame(gdf_data.drop(columns='geometry'))
#     df.drop(columns=[xcol, ycol], inplace=True)
    return df#.loc[~df.index.duplicated(keep='first'), :]


#Read
BGs = gpd.read_file('/vsicurl/https://github.com/LBNL-UCB-STI/beam-core-analysis/raw/main/Users/Zach/scenario/sfbay-blockgroups-2010/641aa0d4-ce5b-4a81-9c30-8790c4ab8cfb202047-1-wkkklf.j5ouj.shp')


dataFilepath_dict = {
        'gs://beam-core-outputs/sfbay-baseline-20230526/beam/year-2020-iteration-4/ITERS/it.0/0.events.csv.gz':'Baseline_10pct', # other corrections...BEAM 0.9.5
        'gs://beam-core-outputs/sfbay-tr-speed0_25-20230626/beam/year-2020-iteration-4/ITERS/it.0/0.events.csv.gz':'TRspeed-0_25',
        'gs://beam-core-outputs/sfbay-tr-speed0_5-20230626/beam/year-2020-iteration-4/ITERS/it.0/0.events.csv.gz':'TRspeed-0_5',
        'gs://beam-core-outputs/sfbay-tr-speed1_5-20230623/beam/year-2020-iteration-4/ITERS/it.0/0.events.csv.gz':'TRspeed-1_5',
        'gs://beam-core-outputs/sfbay-tr-speed2_0-20230623/beam/year-2020-iteration-4/ITERS/it.0/0.events.csv.gz':'TRspeed-2_0',
        'gs://beam-core-outputs/sfbay-tr-discount-100-20230703/beam/year-2020-iteration-4/ITERS/it.0/0.events.csv.gz':'TRdisc-100',
        'gs://beam-core-outputs/sfbay-tr-discount-75-20230703/beam/year-2020-iteration-4/ITERS/it.0/0.events.csv.gz':'TRdisc-75',
        'gs://beam-core-outputs/sfbay-tr-discount-50-20230703/beam/year-2020-iteration-4/ITERS/it.0/0.events.csv.gz':'TRdisc-50',
        'gs://beam-core-outputs/sfbay-tr-discount-25-20230703/beam/year-2020-iteration-4/ITERS/it.0/0.events.csv.gz':'TRdisc-25',
        'gs://beam-core-outputs/sfbay-tr_capacity_0_25-20230608/beam/year-2020-iteration-4/ITERS/it.0/0.events.csv.gz':'capacity-0.25',
        'gs://beam-core-outputs/sfbay-tr_capacity_0_5-20230608/beam/year-2020-iteration-4/ITERS/it.0/0.events.csv.gz':'capacity-0.5',
        'gs://beam-core-outputs/sfbay-tr_capacity_1_5-20230608/beam/year-2020-iteration-4/ITERS/it.0/0.events.csv.gz':'capacity-1.5',
        'gs://beam-core-outputs/sfbay-tr_capacity_2-20230607/beam/year-2020-iteration-4/ITERS/it.0/0.events.csv.gz':'capacity-2',
        'gs://beam-core-outputs/sfbay-tr-frequency_0_25-20230614/beam/year-2020-iteration-4/ITERS/it.0/0.events.csv.gz':'freq-0.25',
        'gs://beam-core-outputs/sfbay-tr-frequency_0_50-20230614/beam/year-2020-iteration-4/ITERS/it.0/0.events.csv.gz':'freq-0.5',
        'gs://beam-core-outputs/sfbay-tr-frequency_1_5-20230614/beam/year-2020-iteration-4/ITERS/it.0/0.events.csv.gz':'freq-1.5',
        'gs://beam-core-outputs/sfbay-tr-frequency_2-20230616/beam/year-2020-iteration-4/ITERS/it.0/0.events.csv.gz':'freq-2',

    }

dataFilepath_sim = dataFilepath_dict.keys()
names = dataFilepath_dict.values()

output_filepath = 'outputs/'

GTFS_filepath1 = 'inputs/GTFS_Baseline10may2023/'
GTFS_filepath2 = 'inputs/GTFS_Speed025/'
GTFS_filepath3 = 'inputs/GTFS_Speed05/'
GTFS_filepath4 = 'inputs/GTFS_Speed1_5/'
GTFS_filepath5 = 'inputs/GTFS_Speed2/'
GTFS_filepath6 = 'inputs/GTFS_Baseline10may2023/'
GTFS_filepath7 = 'inputs/GTFS_Baseline10may2023/'
GTFS_filepath8 = 'inputs/GTFS_Baseline10may2023/'
GTFS_filepath9 = 'inputs/GTFS_Baseline10may2023/'
GTFS_filepath10 = 'inputs/GTFS_Baseline10may2023/'
GTFS_filepath11 = 'inputs/GTFS_Baseline10may2023/'
GTFS_filepath12 = 'inputs/GTFS_Baseline10may2023/'
GTFS_filepath13 = 'inputs/GTFS_Baseline10may2023/'
GTFS_filepath14 = 'inputs/GTFS_Freq025/'
GTFS_filepath15 = 'inputs/GTFS_Freq05/'
GTFS_filepath16 = 'inputs/GTFS_Freq1_5/'
GTFS_filepath17 = 'inputs/GTFS_Freq2/'


RA_output = 'agency_ridershipNewitROI2.csv'
RR_output = 'route_ridershipNewitROI2.csv'

GTFSs = ['AC', 'AM', 'AY', 'BA', 'CA', 'CC', 'CE', 'CM', 'DE', 'EM', 'FF', 'GG', 'HF', 'MA', 'PE', 'RV', 'SB', 'SC', 'SF', 'SL', 'SM', 'SO', 'SR', 'ST', 'TD', 'UC', 'VC', 'VN', 'VT', 'WC', 'WH']

GTFS_1 = [GTFS_filepath1 + f'{agency}.zip' for agency in GTFSs]
GTFS_2 = [GTFS_filepath2 + f'{agency}.zip' for agency in GTFSs]
GTFS_3 = [GTFS_filepath3 + f'{agency}.zip' for agency in GTFSs]
GTFS_4 = [GTFS_filepath4 + f'{agency}.zip' for agency in GTFSs]
GTFS_5 = [GTFS_filepath5 + f'{agency}.zip' for agency in GTFSs]
GTFS_6 = [GTFS_filepath6 + f'{agency}.zip' for agency in GTFSs]
GTFS_7 = [GTFS_filepath7 + f'{agency}.zip' for agency in GTFSs]
GTFS_8 = [GTFS_filepath8 + f'{agency}.zip' for agency in GTFSs]
GTFS_9 = [GTFS_filepath9 + f'{agency}.zip' for agency in GTFSs]
GTFS_10 = [GTFS_filepath10 + f'{agency}.zip' for agency in GTFSs]
GTFS_11 = [GTFS_filepath10 + f'{agency}.zip' for agency in GTFSs]
GTFS_12 = [GTFS_filepath10 + f'{agency}.zip' for agency in GTFSs]
GTFS_13 = [GTFS_filepath10 + f'{agency}.zip' for agency in GTFSs]
GTFS_14 = [GTFS_filepath10 + f'{agency}.zip' for agency in GTFSs]
GTFS_15 = [GTFS_filepath10 + f'{agency}.zip' for agency in GTFSs]
GTFS_16 = [GTFS_filepath10 + f'{agency}.zip' for agency in GTFSs]
GTFS_17 = [GTFS_filepath10 + f'{agency}.zip' for agency in GTFSs]
GTFS_filepaths = [GTFS_1, GTFS_2, GTFS_3, GTFS_4, GTFS_5, GTFS_6,GTFS_7, GTFS_8, GTFS_9, GTFS_10,
                 GTFS_11, GTFS_12, GTFS_13, GTFS_14, GTFS_15, GTFS_16, GTFS_17]
nrows = None

In [14]:
#Ridership route
RR = pd.DataFrame()
#Ridership agency
RA = pd.DataFrame()

for fp, name, GTFS_filepath in zip(dataFilepath_sim,names, GTFS_filepaths):
    print('evaluate ridership', name)
    # import pathtraversal and person enter vehicles
    PT, PE = processEvents(fp, nrows)
    print('filter PE transit trips from PT')
    PE, PT = filter_transit(PT, PE)
    print('Guess transit agency for each PE')
    PE = guess_agency(PE)
    PT = guess_agency(PT)
    print('Guess transit route for each PE')
    PE = guess_route(PE, GTFS_filepath)
    PT = guess_route(PT, GTFS_filepath)
    
    dictionary = dict(zip(PT['vehicle'].values, PT['route_id'].values))
    pd.DataFrame(dictionary.items()).to_csv('outputs/routetovehicledict%s.csv'%(name))
    
    print('Route Ridership')
    rr = PE['route_id'].value_counts()
    for route, count in zip(rr.keys(), rr):
        RR.at[route, name] = count
        
    ra = PE['agency'].value_counts()
    for agency, count in zip(ra.keys(), ra):
        RA.at[agency, name] = count
        
        
    # for route, count in zip(rr.keys(), rr):
    #     RR.at[route, name+' shares'] = count/sum_route
    # for agency, count in zip(ra.keys(), ra):
    #     RA.at[agency, name+' shares'] = count/sum_agency

    RA.to_csv(output_filepath+RA_output)
    RR.to_csv(output_filepath+RR_output)

# tot_NTD = 1756364558 + 15283299+5703705+49795740+110802986+7386518+49247910+27027693 + 8437926+50222832 + 2818648
    
# RA.at['BA', 'clipper BA-SF-VT-AC 2016 share target'] = 350485/773719
# RA.at['BA', 'MTC BA-SF-VT-AC 2016 share target'] = 458900/1564500
# RA.at['BA', 'NTD BA-SF-VT-AC 2019 share target'] = (1756364558 + 15283299)/tot_NTD

# RA.at['SF', 'clipper BA-SF-VT-AC 2016 share target'] = 293991/773719
# RA.at['SF', 'MTC BA-SF-VT-AC 2016 share target'] = 777000/1564500
# RA.at['SF', 'NTD BA-SF-VT-AC 2019 share target'] = (5703705+49795740+110802986+7386518+49247910)/tot_NTD

# RA.at['VT', 'clipper BA-SF-VT-AC 2016 share target'] = 43950/773719
# RA.at['VT', 'MTC BA-SF-VT-AC 2016 share target'] = 146700/1564500
# RA.at['VT', 'NTD BA-SF-VT-AC 2019 share target'] = (27027693 + 8437926)/tot_NTD

# RA.at['AC', 'clipper BA-SF-VT-AC 2016 share target'] = 85293/773719
# RA.at['AC',
#       'MTC BA-SF-VT-AC 2016 share target'] = 181900/1564500
# RA.at['AC', 'NTD BA-SF-VT-AC 2019 share target'] = (50222832 + 2818648)/tot_NTD

# RA.at['BA', 'clipper BA-SF-VT-AC 2020 Jan av ridership target'] = 350485
# RA.at['BA', 'MTC BA-SF-VT-AC 2016 av ridership target'] = 458900
# RA.at['BA', 'NTD BA-SF-VT-AC 2019 ridership target'] = (1756364558 + 15283299)

# RA.at['SF', 'clipper BA-SF-VT-AC 2020 Jan av ridership target'] = 293991
# RA.at['SF', 'MTC BA-SF-VT-AC 2016 av ridership target'] = 777000
# RA.at['SF', 'NTD BA-SF-VT-AC 2019 ridership target'] = (5703705+49795740+110802986+7386518+49247910)

# RA.at['VT', 'clipper BA-SF-VT-AC 2020 Jan av ridership target'] = 43950
# RA.at['VT', 'MTC BA-SF-VT-AC 2016 av ridership target'] = 146700
# RA.at['VT', 'NTD BA-SF-VT-AC 2019 ridership target'] =  (27027693 + 8437926)

# RA.at['AC', 'clipper BA-SF-VT-AC 2020 Jan av ridership target'] = 85293
# RA.at['AC', 'MTC BA-SF-VT-AC 2016 av ridership target'] = 181900
# RA.at['AC', 'NTD BA-SF-VT-AC 2019 ridership target'] = (50222832 + 2818648)

RA.to_csv(output_filepath+RA_output)
RR.to_csv(output_filepath+RR_output)




evaluate ridership Baseline_10pct
read gs://beam-core-outputs/sfbay-baseline-20230526/beam/year-2020-iteration-4/ITERS/it.0/0.events.csv.gz


  for chunk in pd.read_csv(dataFilepath_sim, compression = 'gzip',chunksize=2500000, nrows = nrows, usecols = ['vehicle','time','endY','endX','startY','startX','mode', 'length', 'type', 'person']):


ParkingEvent            865440
PathTraversal           429256
PersonEntersVehicle     246693
PersonLeavesVehicle     167846
departure               164767
ModeChoice              120938
actend                  120649
LeavingParkingEvent      96045
arrival                  95668
actstart                 95664
PersonCost               80778
TeleportationEvent       12460
Replanning                2052
ReserveRideHail           1734
ChargingPlugInEvent          4
RefuelSessionEvent           3
ChargingPlugOutEvent         3
Name: type, dtype: int64
PathTraversal           596728
PersonEntersVehicle     309515
PersonLeavesVehicle     262055
ModeChoice              201335
actend                  199574
departure               199574
actstart                163054
arrival                 163051
LeavingParkingEvent     137722
PersonCost              121077
ParkingEvent            106967
TeleportationEvent       31809
Replanning                5332
ReserveRideHail           2189
ChargingPlugIn

  for chunk in pd.read_csv(dataFilepath_sim, compression = 'gzip',chunksize=2500000, nrows = nrows, usecols = ['vehicle','time','endY','endX','startY','startX','mode', 'length', 'type', 'person']):


ParkingEvent            865378
PathTraversal           433655
PersonEntersVehicle     239514
PersonLeavesVehicle     170579
departure               157418
ModeChoice              122293
actend                  121562
arrival                  97069
actstart                 97068
LeavingParkingEvent      96617
PersonCost               79493
TeleportationEvent       12431
Replanning                4566
ReserveRideHail           2342
ChargingPlugInEvent          5
RefuelSessionEvent           5
ChargingPlugOutEvent         5
Name: type, dtype: int64
PathTraversal           586292
PersonEntersVehicle     306253
PersonLeavesVehicle     266669
ModeChoice              200939
actend                  197861
departure               197861
arrival                 166380
actstart                166380
LeavingParkingEvent     138061
PersonCost              118597
ParkingEvent            108081
TeleportationEvent       31615
Replanning               11319
ReserveRideHail           3670
ChargingPlugIn

  for chunk in pd.read_csv(dataFilepath_sim, compression = 'gzip',chunksize=2500000, nrows = nrows, usecols = ['vehicle','time','endY','endX','startY','startX','mode', 'length', 'type', 'person']):


ParkingEvent            866102
PathTraversal           428538
PersonEntersVehicle     240044
PersonLeavesVehicle     170423
departure               158069
ModeChoice              123004
actend                  122254
arrival                  97326
actstart                 97323
LeavingParkingEvent      97241
PersonCost               80056
TeleportationEvent       12570
Replanning                4539
ReserveRideHail           2501
ChargingPlugInEvent          4
RefuelSessionEvent           3
ChargingPlugOutEvent         3
Name: type, dtype: int64
PathTraversal           584496
PersonEntersVehicle     306030
PersonLeavesVehicle     264848
ModeChoice              202625
departure               198813
actend                  198813
actstart                166154
arrival                 166152
LeavingParkingEvent     138336
PersonCost              118557
ParkingEvent            108039
TeleportationEvent       31960
Replanning               11194
ReserveRideHail           3972
RefuelSessionE

  for chunk in pd.read_csv(dataFilepath_sim, compression = 'gzip',chunksize=2500000, nrows = nrows, usecols = ['vehicle','time','endY','endX','startY','startX','mode', 'length', 'type', 'person']):


ParkingEvent            866580
PathTraversal           418608
PersonEntersVehicle     247898
PersonLeavesVehicle     168613
departure               165940
ModeChoice              122186
actend                  121828
LeavingParkingEvent      97068
actstart                 96648
arrival                  96648
PersonCost               81419
TeleportationEvent       12568
Replanning                2257
ReserveRideHail           1721
ChargingPlugInEvent          6
RefuelSessionEvent           6
ChargingPlugOutEvent         6
Name: type, dtype: int64
PathTraversal           591270
PersonEntersVehicle     310814
PersonLeavesVehicle     259477
ModeChoice              203004
actend                  201136
departure               201136
arrival                 162406
actstart                162398
LeavingParkingEvent     139434
PersonCost              121235
ParkingEvent            107729
TeleportationEvent       32071
Replanning                5629
ReserveRideHail           2246
ChargingPlugIn

  for chunk in pd.read_csv(dataFilepath_sim, compression = 'gzip',chunksize=2500000, nrows = nrows, usecols = ['vehicle','time','endY','endX','startY','startX','mode', 'length', 'type', 'person']):


ParkingEvent            858016
PathTraversal           454917
PersonEntersVehicle     270451
departure               194895
PersonLeavesVehicle     151711
ModeChoice              111258
actend                  111001
LeavingParkingEvent      88124
arrival                  86047
actstart                 86045
PersonCost               73029
TeleportationEvent       11332
Replanning                1650
ReserveRideHail           1505
ChargingPlugInEvent          7
RefuelSessionEvent           6
ChargingPlugOutEvent         6
Name: type, dtype: int64
PathTraversal           665200
PersonEntersVehicle     302289
PersonLeavesVehicle     253844
ModeChoice              193275
actend                  192684
departure               192680
actstart                155747
arrival                 155745
LeavingParkingEvent     133454
PersonCost              117281
ParkingEvent            102776
TeleportationEvent       29582
Replanning                3435
ReserveRideHail           1988
RefuelSessionE

  for chunk in pd.read_csv(dataFilepath_sim, compression = 'gzip',chunksize=2500000, nrows = nrows, usecols = ['vehicle','time','endY','endX','startY','startX','mode', 'length', 'type', 'person']):


ParkingEvent            864801
PathTraversal           427545
PersonEntersVehicle     247964
PersonLeavesVehicle     167117
departure               165903
ModeChoice              122289
actend                  121784
LeavingParkingEvent      95987
arrival                  94996
actstart                 94992
PersonCost               80467
TeleportationEvent       12261
Replanning                2158
ReserveRideHail           1721
ChargingPlugInEvent          5
RefuelSessionEvent           5
ChargingPlugOutEvent         5
Name: type, dtype: int64
PathTraversal           597225
PersonEntersVehicle     310835
PersonLeavesVehicle     263140
ModeChoice              200838
actend                  199270
departure               199270
actstart                162724
arrival                 162722
LeavingParkingEvent     136756
PersonCost              121391
ParkingEvent            106334
TeleportationEvent       31225
Replanning                5968
ReserveRideHail           2277
ChargingPlugIn

  for chunk in pd.read_csv(dataFilepath_sim, compression = 'gzip',chunksize=2500000, nrows = nrows, usecols = ['vehicle','time','endY','endX','startY','startX','mode', 'length', 'type', 'person']):


ParkingEvent            865106
PathTraversal           428681
PersonEntersVehicle     247434
PersonLeavesVehicle     167460
departure               165307
ModeChoice              121442
actend                  121167
LeavingParkingEvent      95940
arrival                  95418
actstart                 95417
PersonCost               80513
TeleportationEvent       12389
Replanning                2090
ReserveRideHail           1627
ChargingPlugInEvent          3
RefuelSessionEvent           3
ChargingPlugOutEvent         3
Name: type, dtype: int64
PathTraversal           596230
PersonEntersVehicle     310346
PersonLeavesVehicle     262740
ModeChoice              201573
actend                  199898
departure               199898
arrival                 162704
actstart                162702
LeavingParkingEvent     136848
PersonCost              121385
ParkingEvent            106703
TeleportationEvent       31325
Replanning                5522
ReserveRideHail           2105
ChargingPlugIn

  for chunk in pd.read_csv(dataFilepath_sim, compression = 'gzip',chunksize=2500000, nrows = nrows, usecols = ['vehicle','time','endY','endX','startY','startX','mode', 'length', 'type', 'person']):


ParkingEvent            865301
PathTraversal           428744
PersonEntersVehicle     247145
PersonLeavesVehicle     167781
departure               165064
ModeChoice              121290
actend                  120948
LeavingParkingEvent      96111
arrival                  95509
actstart                 95496
PersonCost               80614
TeleportationEvent       12290
Replanning                2094
ReserveRideHail           1600
ChargingPlugInEvent          5
RefuelSessionEvent           4
ChargingPlugOutEvent         4
Name: type, dtype: int64
PathTraversal           597232
PersonEntersVehicle     309716
PersonLeavesVehicle     262539
ModeChoice              200900
departure               199296
actend                  199294
actstart                162949
arrival                 162944
LeavingParkingEvent     137354
PersonCost              121599
ParkingEvent            107158
TeleportationEvent       31659
Replanning                5256
ReserveRideHail           2087
RefuelSessionE

  for chunk in pd.read_csv(dataFilepath_sim, compression = 'gzip',chunksize=2500000, nrows = nrows, usecols = ['vehicle','time','endY','endX','startY','startX','mode', 'length', 'type', 'person']):


ParkingEvent            865070
PathTraversal           427482
PersonEntersVehicle     246574
PersonLeavesVehicle     167667
departure               165471
ModeChoice              122816
actend                  121341
actstart                 95577
arrival                  95577
LeavingParkingEvent      95565
PersonCost               80592
TeleportationEvent       12407
Replanning                2104
ReserveRideHail           1723
ChargingPlugInEvent         12
RefuelSessionEvent          11
ChargingPlugOutEvent        11
Name: type, dtype: int64
PathTraversal           596170
PersonEntersVehicle     311614
PersonLeavesVehicle     261251
ModeChoice              200753
departure               200425
actend                  200418
arrival                 162382
actstart                162378
LeavingParkingEvent     138557
PersonCost              120705
ParkingEvent            106608
TeleportationEvent       31440
Replanning                5076
ReserveRideHail           2201
ChargingPlugIn

  for chunk in pd.read_csv(dataFilepath_sim, compression = 'gzip',chunksize=2500000, nrows = nrows, usecols = ['vehicle','time','endY','endX','startY','startX','mode', 'length', 'type', 'person']):


ParkingEvent            865187
PathTraversal           429824
PersonEntersVehicle     244216
PersonLeavesVehicle     166601
departure               164145
ModeChoice              123688
actend                  120050
LeavingParkingEvent      95628
arrival                  95623
actstart                 95622
PersonCost               79873
TeleportationEvent       12376
Replanning                5463
ReserveRideHail           1691
ChargingPlugInEvent          5
RefuelSessionEvent           4
ChargingPlugOutEvent         4
Name: type, dtype: int64
PathTraversal           597292
PersonEntersVehicle     301870
PersonLeavesVehicle     253158
ModeChoice              215936
departure               199064
actend                  199064
actstart                160084
arrival                 160083
LeavingParkingEvent     136843
PersonCost              116799
ParkingEvent            105697
TeleportationEvent       31237
Replanning               20727
ReserveRideHail           2117
RefuelSessionE

  for chunk in pd.read_csv(dataFilepath_sim, compression = 'gzip',chunksize=2500000, nrows = nrows, usecols = ['vehicle','time','endY','endX','startY','startX','mode', 'length', 'type', 'person']):


ParkingEvent            865173
PathTraversal           428530
PersonEntersVehicle     247048
PersonLeavesVehicle     167159
departure               165080
ModeChoice              122048
actend                  120957
LeavingParkingEvent      96335
arrival                  95230
actstart                 95229
PersonCost               80413
TeleportationEvent       12321
Replanning                2805
ReserveRideHail           1663
ChargingPlugInEvent          3
RefuelSessionEvent           3
ChargingPlugOutEvent         3
Name: type, dtype: int64
PathTraversal           598729
PersonEntersVehicle     306466
PersonLeavesVehicle     259469
ModeChoice              205246
departure               198874
actend                  198873
arrival                 162436
actstart                162424
LeavingParkingEvent     136982
PersonCost              119907
ParkingEvent            106588
TeleportationEvent       31779
Replanning               10106
ReserveRideHail           2106
ChargingPlugIn

  for chunk in pd.read_csv(dataFilepath_sim, compression = 'gzip',chunksize=2500000, nrows = nrows, usecols = ['vehicle','time','endY','endX','startY','startX','mode', 'length', 'type', 'person']):


ParkingEvent            865185
PathTraversal           428597
PersonEntersVehicle     247166
PersonLeavesVehicle     167630
departure               165202
ModeChoice              121386
actend                  121086
LeavingParkingEvent      96163
arrival                  95509
actstart                 95507
PersonCost               80543
TeleportationEvent       12334
Replanning                2020
ReserveRideHail           1663
ChargingPlugInEvent          3
RefuelSessionEvent           3
ChargingPlugOutEvent         3
Name: type, dtype: int64
PathTraversal           597484
PersonEntersVehicle     310397
PersonLeavesVehicle     262338
ModeChoice              200171
departure               199569
actend                  199569
arrival                 163205
actstart                163187
LeavingParkingEvent     138314
PersonCost              120926
ParkingEvent            106730
TeleportationEvent       31778
Replanning                4230
ReserveRideHail           2065
ChargingPlugIn

  for chunk in pd.read_csv(dataFilepath_sim, compression = 'gzip',chunksize=2500000, nrows = nrows, usecols = ['vehicle','time','endY','endX','startY','startX','mode', 'length', 'type', 'person']):


ParkingEvent            865468
PathTraversal           429005
PersonEntersVehicle     246389
PersonLeavesVehicle     168152
departure               164787
ModeChoice              120928
actend                  120641
actstart                 95973
arrival                  95973
LeavingParkingEvent      95966
PersonCost               80657
TeleportationEvent       12429
Replanning                2000
ReserveRideHail           1625
ChargingPlugInEvent          3
RefuelSessionEvent           2
ChargingPlugOutEvent         2
Name: type, dtype: int64
PathTraversal           595434
PersonEntersVehicle     311460
PersonLeavesVehicle     261700
ModeChoice              201160
actend                  200607
departure               200599
arrival                 162772
actstart                162772
LeavingParkingEvent     138049
PersonCost              121052
ParkingEvent            106623
TeleportationEvent       31652
Replanning                4001
ReserveRideHail           2108
RefuelSessionE

  for chunk in pd.read_csv(dataFilepath_sim, compression = 'gzip',chunksize=2500000, nrows = nrows, usecols = ['vehicle','time','endY','endX','startY','startX','mode', 'length', 'type', 'person']):


ParkingEvent            870674
PathTraversal           381450
PersonEntersVehicle     233767
PersonLeavesVehicle     175083
departure               149581
ModeChoice              138135
actend                  135724
LeavingParkingEvent     105270
arrival                 101818
actstart                101818
PersonCost               84094
TeleportationEvent       13808
Replanning                6433
ReserveRideHail           2327
ChargingPlugInEvent          6
RefuelSessionEvent           6
ChargingPlugOutEvent         6
Name: type, dtype: int64
PathTraversal           543383
PersonEntersVehicle     306568
PersonLeavesVehicle     265115
ModeChoice              212096
departure               202177
actend                  202169
arrival                 169463
actstart                169455
LeavingParkingEvent     141831
PersonCost              122136
ParkingEvent            111621
TeleportationEvent       32887
Replanning               17663
ReserveRideHail           3410
ChargingPlugIn

  for chunk in pd.read_csv(dataFilepath_sim, compression = 'gzip',chunksize=2500000, nrows = nrows, usecols = ['vehicle','time','endY','endX','startY','startX','mode', 'length', 'type', 'person']):


ParkingEvent            868479
PathTraversal           396535
PersonEntersVehicle     240109
PersonLeavesVehicle     172940
departure               155357
ModeChoice              132226
actend                  131306
LeavingParkingEvent     102621
actstart                 99446
arrival                  99446
PersonCost               83024
TeleportationEvent       12992
Replanning                3616
ReserveRideHail           1897
ChargingPlugInEvent          2
RefuelSessionEvent           2
ChargingPlugOutEvent         2
Name: type, dtype: int64
PathTraversal           560879
PersonEntersVehicle     310683
PersonLeavesVehicle     265139
ModeChoice              206982
departure               202118
actend                  202117
arrival                 166920
actstart                166919
LeavingParkingEvent     141003
PersonCost              122354
ParkingEvent            110055
TeleportationEvent       32533
Replanning                9876
ReserveRideHail           2407
ChargingPlugIn

  for chunk in pd.read_csv(dataFilepath_sim, compression = 'gzip',chunksize=2500000, nrows = nrows, usecols = ['vehicle','time','endY','endX','startY','startX','mode', 'length', 'type', 'person']):


ParkingEvent            861549
PathTraversal           440597
PersonEntersVehicle     258572
departure               180726
PersonLeavesVehicle     159495
ModeChoice              116889
actend                  116646
LeavingParkingEvent      91726
arrival                  90846
actstart                 90840
PersonCost               76675
TeleportationEvent       11997
Replanning                1852
ReserveRideHail           1577
ChargingPlugInEvent          5
RefuelSessionEvent           4
ChargingPlugOutEvent         4
Name: type, dtype: int64
PathTraversal           632815
PersonEntersVehicle     305318
PersonLeavesVehicle     258716
ModeChoice              197007
actend                  195007
departure               195007
actstart                159750
arrival                 159746
LeavingParkingEvent     135432
PersonCost              119458
ParkingEvent            104953
TeleportationEvent       30756
Replanning                3972
ReserveRideHail           2048
RefuelSessionE

IOPub data rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_data_rate_limit`.

Current values:
ServerApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
ServerApp.rate_limit_window=3.0 (secs)



In [1]:


RA


NameError: name 'RA' is not defined

In [48]:
RR[RR.index=='SF:18608']

Unnamed: 0,Baseline,Baseline shares,TR,TR shares,TR-BA,TR-BA shares,TR-BSVI,TR-BSVI shares,TR-BSVII,TR-BSVII shares,TR-CORE,TR-CORE shares,TR-1T,TR-1T shares,TR-CA,TR-CA shares,TR-CS,TR-CS shares,TR-VN,TR-VN shares
SF:18608,,,7379.0,0.077666,,,,,,,,,,,,,,,7722.0,0.081277


In [49]:
RR[RR.index=='SF:12327']

Unnamed: 0,Baseline,Baseline shares,TR,TR shares,TR-BA,TR-BA shares,TR-BSVI,TR-BSVI shares,TR-BSVII,TR-BSVII shares,TR-CORE,TR-CORE shares,TR-1T,TR-1T shares,TR-CA,TR-CA shares,TR-CS,TR-CS shares,TR-VN,TR-VN shares
SF:12327,4634.0,0.048774,,,4700.0,0.049469,4517.0,0.047543,4575.0,0.048153,4899.0,0.051564,4606.0,0.04848,4605.0,0.048469,,,,


In [None]:
#30 Stockton
RR[RR.index=='SF:12311']

Unnamed: 0,Baseline,Baseline shares,TR,TR shares,TR-BA,TR-BA shares,TR-BSVI,TR-BSVI shares,TR-BSVII,TR-BSVII shares,TR-CORE,TR-CORE shares,TR-1T,TR-1T shares,TR-CA,TR-CA shares,TR-CS,TR-CS shares,TR-VN,TR-VN shares
SF:12311,3520.0,0.037049,3261.0,0.034323,3471.0,0.036533,3681.0,0.038744,3504.0,0.036881,3623.0,0.038133,3512.0,0.036965,3580.0,0.037681,3313.0,0.03487,3556.0,0.037428


In [51]:
#45
RR[RR.index=='SF:12324']

Unnamed: 0,Baseline,Baseline shares,TR,TR shares,TR-BA,TR-BA shares,TR-BSVI,TR-BSVI shares,TR-BSVII,TR-BSVII shares,TR-CORE,TR-CORE shares,TR-1T,TR-1T shares,TR-CA,TR-CA shares,TR-CS,TR-CS shares,TR-VN,TR-VN shares
SF:12324,2266.0,0.02385,2254.0,0.023724,2369.0,0.024934,2367.0,0.024913,2254.0,0.023724,2333.0,0.024556,2262.0,0.023808,2363.0,0.024871,2129.0,0.022408,2276.0,0.023956


In [52]:
RR[RR.index.str.contains('AC:1-142')]

Unnamed: 0,Baseline,Baseline shares,TR,TR shares,TR-BA,TR-BA shares,TR-BSVI,TR-BSVI shares,TR-BSVII,TR-BSVII shares,TR-CORE,TR-CORE shares,TR-1T,TR-1T shares,TR-CA,TR-CA shares,TR-CS,TR-CS shares,TR-VN,TR-VN shares
AC:1-142,4364.0,0.045932,,,4373.0,0.046027,4411.0,0.046427,4589.0,0.048301,4210.0,0.044312,,,4492.0,0.04728,4372.0,0.046017,4276.0,0.045006


In [53]:
RR[RR.index.str.contains('AC:1T-142')]

Unnamed: 0,Baseline,Baseline shares,TR,TR shares,TR-BA,TR-BA shares,TR-BSVI,TR-BSVI shares,TR-BSVII,TR-BSVII shares,TR-CORE,TR-CORE shares,TR-1T,TR-1T shares,TR-CA,TR-CA shares,TR-CS,TR-CS shares,TR-VN,TR-VN shares
AC:1T-142,,,5135.0,0.054048,,,,,,,,,5189.0,0.054616,,,,,,


In [54]:
RR[RR.index.str.contains('CA:')]

Unnamed: 0,Baseline,Baseline shares,TR,TR shares,TR-BA,TR-BA shares,TR-BSVI,TR-BSVI shares,TR-BSVII,TR-BSVII shares,TR-CORE,TR-CORE shares,TR-1T,TR-1T shares,TR-CA,TR-CA shares,TR-CS,TR-CS shares,TR-VN,TR-VN shares
CA:12868,2979.0,0.031355,3287.0,0.034597,2924.0,0.030776,2868.0,0.030187,2977.0,0.031334,2941.0,0.030955,2932.0,0.03086,3304.0,0.034776,3010.0,0.031681,2929.0,0.030829
CA:12869,1814.0,0.019093,2136.0,0.022482,1802.0,0.018967,1804.0,0.018988,1818.0,0.019135,1721.0,0.018114,1729.0,0.018198,2183.0,0.022977,1759.0,0.018514,1767.0,0.018598
CA:12867,934.0,0.009831,961.0,0.010115,848.0,0.008925,829.0,0.008725,838.0,0.00882,874.0,0.009199,910.0,0.009578,1063.0,0.011188,900.0,0.009473,956.0,0.010062


In [55]:
#T TR
RR[RR.index.str.contains('SF:1001')]

Unnamed: 0,Baseline,Baseline shares,TR,TR shares,TR-BA,TR-BA shares,TR-BSVI,TR-BSVI shares,TR-BSVII,TR-BSVII shares,TR-CORE,TR-CORE shares,TR-1T,TR-1T shares,TR-CA,TR-CA shares,TR-CS,TR-CS shares,TR-VN,TR-VN shares
SF:1001,,,3846.0,0.04048,,,,,,,,,,,,,3888.0,0.040922,,


In [56]:
#K TR
RR[RR.index.str.contains('SF:1002')]

Unnamed: 0,Baseline,Baseline shares,TR,TR shares,TR-BA,TR-BA shares,TR-BSVI,TR-BSVI shares,TR-BSVII,TR-BSVII shares,TR-CORE,TR-CORE shares,TR-1T,TR-1T shares,TR-CA,TR-CA shares,TR-CS,TR-CS shares,TR-VN,TR-VN shares
SF:1002,,,4970.0,0.052311,,,,,,,,,,,,,5114.0,0.053826,,


In [57]:
#T -OWL
RR[RR.index.str.contains('SF:1106')]

Unnamed: 0,Baseline,Baseline shares,TR,TR shares,TR-BA,TR-BA shares,TR-BSVI,TR-BSVI shares,TR-BSVII,TR-BSVII shares,TR-CORE,TR-CORE shares,TR-1T,TR-1T shares,TR-CA,TR-CA shares,TR-CS,TR-CS shares,TR-VN,TR-VN shares


In [58]:
#K-OWL
RR[RR.index.str.contains('SF:1102')]

Unnamed: 0,Baseline,Baseline shares,TR,TR shares,TR-BA,TR-BA shares,TR-BSVI,TR-BSVI shares,TR-BSVII,TR-BSVII shares,TR-CORE,TR-CORE shares,TR-1T,TR-1T shares,TR-CA,TR-CA shares,TR-CS,TR-CS shares,TR-VN,TR-VN shares
SF:1102,25.0,0.000263,,,25.0,0.000263,16.0,0.000168,31.0,0.000326,25.0,0.000263,31.0,0.000326,22.0,0.000232,,,17.0,0.000179


In [59]:
#L-OWL
RR[RR.index.str.contains('SF:1103')]

Unnamed: 0,Baseline,Baseline shares,TR,TR shares,TR-BA,TR-BA shares,TR-BSVI,TR-BSVI shares,TR-BSVII,TR-BSVII shares,TR-CORE,TR-CORE shares,TR-1T,TR-1T shares,TR-CA,TR-CA shares,TR-CS,TR-CS shares,TR-VN,TR-VN shares
SF:1103,,,,,1.0,1.1e-05,,,,,,,,,1.0,1.1e-05,,,,


In [60]:
#M-OWL
RR[RR.index.str.contains('SF:1104')]

Unnamed: 0,Baseline,Baseline shares,TR,TR shares,TR-BA,TR-BA shares,TR-BSVI,TR-BSVI shares,TR-BSVII,TR-BSVII shares,TR-CORE,TR-CORE shares,TR-1T,TR-1T shares,TR-CA,TR-CA shares,TR-CS,TR-CS shares,TR-VN,TR-VN shares


In [61]:
#N-OWL
RR[RR.index.str.contains('SF:1105')]

Unnamed: 0,Baseline,Baseline shares,TR,TR shares,TR-BA,TR-BA shares,TR-BSVI,TR-BSVI shares,TR-BSVII,TR-BSVII shares,TR-CORE,TR-CORE shares,TR-1T,TR-1T shares,TR-CA,TR-CA shares,TR-CS,TR-CS shares,TR-VN,TR-VN shares
SF:1105,,,,,,,3.0,3.2e-05,3.0,3.2e-05,,,2.0,2.1e-05,1.0,1.1e-05,1.0,1.1e-05,2.0,2.1e-05


In [62]:
#E
RR[RR.index.str.contains('SF:12344')]

Unnamed: 0,Baseline,Baseline shares,TR,TR shares,TR-BA,TR-BA shares,TR-BSVI,TR-BSVI shares,TR-BSVII,TR-BSVII shares,TR-CORE,TR-CORE shares,TR-1T,TR-1T shares,TR-CA,TR-CA shares,TR-CS,TR-CS shares,TR-VN,TR-VN shares
SF:12344,249.0,0.002621,315.0,0.003315,250.0,0.002631,279.0,0.002937,270.0,0.002842,266.0,0.0028,277.0,0.002916,259.0,0.002726,291.0,0.003063,255.0,0.002684


In [63]:
#NX
RR[RR.index.str.contains('SF:12351')]

Unnamed: 0,Baseline,Baseline shares,TR,TR shares,TR-BA,TR-BA shares,TR-BSVI,TR-BSVI shares,TR-BSVII,TR-BSVII shares,TR-CORE,TR-CORE shares,TR-1T,TR-1T shares,TR-CA,TR-CA shares,TR-CS,TR-CS shares,TR-VN,TR-VN shares
SF:12351,313.0,0.003294,310.0,0.003263,293.0,0.003084,294.0,0.003094,285.0,0.003,293.0,0.003084,313.0,0.003294,314.0,0.003305,333.0,0.003505,283.0,0.002979


In [64]:
#J
RR[RR.index.str.contains('SF:12475')]

Unnamed: 0,Baseline,Baseline shares,TR,TR shares,TR-BA,TR-BA shares,TR-BSVI,TR-BSVI shares,TR-BSVII,TR-BSVII shares,TR-CORE,TR-CORE shares,TR-1T,TR-1T shares,TR-CA,TR-CA shares,TR-CS,TR-CS shares,TR-VN,TR-VN shares
SF:12475,3001.0,0.031586,2751.0,0.028955,3113.0,0.032765,2973.0,0.031292,2998.0,0.031555,3123.0,0.032871,3008.0,0.03166,3036.0,0.031955,2917.0,0.030702,2855.0,0.03005


In [65]:
#K/T
RR[RR.index.str.contains('SF:12476')]

Unnamed: 0,Baseline,Baseline shares,TR,TR shares,TR-BA,TR-BA shares,TR-BSVI,TR-BSVI shares,TR-BSVII,TR-BSVII shares,TR-CORE,TR-CORE shares,TR-1T,TR-1T shares,TR-CA,TR-CA shares,TR-CS,TR-CS shares,TR-VN,TR-VN shares
SF:12476,8651.0,0.091055,,,8506.0,0.089528,8621.0,0.090739,8564.0,0.090139,8576.0,0.090265,8678.0,0.091339,8580.0,0.090307,,,8564.0,0.090139


In [66]:
#L
RR[RR.index.str.contains('SF:12477')]

Unnamed: 0,Baseline,Baseline shares,TR,TR shares,TR-BA,TR-BA shares,TR-BSVI,TR-BSVI shares,TR-BSVII,TR-BSVII shares,TR-CORE,TR-CORE shares,TR-1T,TR-1T shares,TR-CA,TR-CA shares,TR-CS,TR-CS shares,TR-VN,TR-VN shares
SF:12477,6098.0,0.064183,6035.0,0.06352,6061.0,0.063794,6074.0,0.063931,5945.0,0.062573,5935.0,0.062468,5979.0,0.062931,6053.0,0.06371,6138.0,0.064604,6005.0,0.063205


In [67]:
#M
RR[RR.index.str.contains('SF:12478')]

Unnamed: 0,Baseline,Baseline shares,TR,TR shares,TR-BA,TR-BA shares,TR-BSVI,TR-BSVI shares,TR-BSVII,TR-BSVII shares,TR-CORE,TR-CORE shares,TR-1T,TR-1T shares,TR-CA,TR-CA shares,TR-CS,TR-CS shares,TR-VN,TR-VN shares
SF:12478,5239.0,0.055142,4813.0,0.050658,5086.0,0.053532,5270.0,0.055468,5124.0,0.053932,5290.0,0.055679,5298.0,0.055763,5240.0,0.055153,4945.0,0.052048,5374.0,0.056563


In [68]:
#N
RR[RR.index.str.contains('SF:12479')]

Unnamed: 0,Baseline,Baseline shares,TR,TR shares,TR-BA,TR-BA shares,TR-BSVI,TR-BSVI shares,TR-BSVII,TR-BSVII shares,TR-CORE,TR-CORE shares,TR-1T,TR-1T shares,TR-CA,TR-CA shares,TR-CS,TR-CS shares,TR-VN,TR-VN shares
SF:12479,9250.0,0.097359,10265.0,0.108042,9435.0,0.099306,9457.0,0.099538,9393.0,0.098864,9542.0,0.100433,9571.0,0.100738,9477.0,0.099748,10281.0,0.108211,9299.0,0.097875


In [69]:
RR[RR.index.str.contains('BA:')]

Unnamed: 0,Baseline,Baseline shares,TR,TR shares,TR-BA,TR-BA shares,TR-BSVI,TR-BSVI shares,TR-BSVII,TR-BSVII shares,TR-CORE,TR-CORE shares,TR-1T,TR-1T shares,TR-CA,TR-CA shares,TR-CS,TR-CS shares,TR-VN,TR-VN shares
BA:1,47235.0,0.497163,24130.0,0.253976,24236.0,0.255092,47290.0,0.497742,47094.0,0.495679,24118.0,0.25385,47299.0,0.497837,47299.0,0.497837,47101.0,0.495753,47320.0,0.498058
BA:7,40534.0,0.426633,19575.0,0.206033,19232.0,0.202423,40337.0,0.42456,40328.0,0.424465,19398.0,0.20417,40295.0,0.424118,40209.0,0.423213,40466.0,0.425918,40515.0,0.426433
BA:11,29621.0,0.31177,13433.0,0.141387,13231.0,0.13926,29504.0,0.310539,29591.0,0.311455,13292.0,0.139903,29459.0,0.310065,29380.0,0.309234,29735.0,0.31297,29698.0,0.312581
BA:3,20145.0,0.212033,14459.0,0.152186,14421.0,0.151786,21229.0,0.223442,25450.0,0.267869,10871.0,0.114421,19907.0,0.209528,20031.0,0.210833,20150.0,0.212085,20333.0,0.214011
BA:5,17193.0,0.180962,11123.0,0.117073,11046.0,0.116263,18058.0,0.190066,19783.0,0.208222,10090.0,0.1062,17043.0,0.179383,17137.0,0.180372,17476.0,0.18394,17371.0,0.182835
BA:13,17.0,0.000179,,,,,21.0,0.000221,23.0,0.000242,,,20.0,0.000211,20.0,0.000211,18.0,0.000189,11.0,0.000116
BA:19,9.0,9.5e-05,,,,,15.0,0.000158,9.0,9.5e-05,,,6.0,6.3e-05,3.0,3.2e-05,7.0,7.4e-05,6.0,6.3e-05
BA:2,,,21173.0,0.222853,21147.0,0.222579,,,,,21245.0,0.22361,,,,,,,,
BA:8,,,16913.0,0.178015,16974.0,0.178657,,,,,17012.0,0.179057,,,,,,,,
BA:12,,,15861.0,0.166942,15684.0,0.165079,,,,,15781.0,0.1661,,,,,,,,


# **Create Person Experience Table**

In [1]:
#Imports
import numpy as np
import pandas as pd
import difflib
import matplotlib.pyplot as plt
import time
import traceback
from tqdm import tqdm
import time
from tqdm.notebook import tqdm
import sys



#########Analysis of what agents used in the baseline

dataFilepath_PT = [
        'gs://beam-core-outputs/sfbay-baseline2018-30pct-20230825/beam/year-2018-iteration-10/ITERS/it.0/0.events.csv.gz',
        'gs://beam-core-outputs/sfbay-tr-30pct-20231014/beam/year-2018-iteration-10/ITERS/it.0/0.events.csv.gz',
        'gs://beam-core-outputs/sfbay-tr-BARTBSV-30pct-20230825/beam/year-2018-iteration-10/ITERS/it.0/0.events.csv.gz',
        'gs://beam-core-outputs/sfbay-tr-BARTBSVI-30pct-20230828/beam/year-2018-iteration-10/ITERS/it.0/0.events.csv.gz',
        'gs://beam-core-outputs/sfbay-tr-BARTBSVII-30pct-20230828/beam/year-2018-iteration-10/ITERS/it.0/0.events.csv.gz',
        'gs://beam-core-outputs/sfbay-tr-BARTCORE-30pct-20230828/beam/year-2018-iteration-10/ITERS/it.0/0.events.csv.gz',
        'gs://beam-core-outputs/sfbay-tr-TempoBRT-30pct-20230825/beam/year-2018-iteration-10/ITERS/it.0/0.events.csv.gz',
        'gs://beam-core-outputs/sfbay-tr-CAElectrification-30pct-20230828/beam/year-2018-iteration-10/ITERS/it.0/0.events.csv.gz',
        'gs://beam-core-outputs/sfbay-tr-SF_CS-30pct-20231014/beam/year-2018-iteration-10/ITERS/it.0/0.events.csv.gz',
        'gs://beam-core-outputs/sfbay-tr-SF_VN-30pct-20231014/beam/year-2018-iteration-10/ITERS/it.0/0.events.csv.gz',
    ]

dataFilepath_Plan = [
        'gs://beam-core-outputs/sfbay-baseline2018-30pct-20230825/beam/year-2018-iteration-10/ITERS/it.0/0.plans.csv.gz',
        'gs://beam-core-outputs/sfbay-tr-30pct-20231014/beam/year-2018-iteration-10/ITERS/it.0/0.plans.csv.gz',
        'gs://beam-core-outputs/sfbay-tr-BARTBSV-30pct-20230825/beam/year-2018-iteration-10/ITERS/it.0/0.plans.csv.gz',
        'gs://beam-core-outputs/sfbay-tr-BARTBSVI-30pct-20230828/beam/year-2018-iteration-10/ITERS/it.0/0.plans.csv.gz',
        'gs://beam-core-outputs/sfbay-tr-BARTBSVII-30pct-20230828/beam/year-2018-iteration-10/ITERS/it.0/0.plans.csv.gz',
        'gs://beam-core-outputs/sfbay-tr-BARTCORE-30pct-20230828/beam/year-2018-iteration-10/ITERS/it.0/0.plans.csv.gz',
        'gs://beam-core-outputs/sfbay-tr-TempoBRT-30pct-20230825/beam/year-2018-iteration-10/ITERS/it.0/0.plans.csv.gz',
        'gs://beam-core-outputs/sfbay-tr-CAElectrification-30pct-20230828/beam/year-2018-iteration-10/ITERS/it.0/0.plans.csv.gz',
        'gs://beam-core-outputs/sfbay-tr-SF_CS-30pct-20231014/beam/year-2018-iteration-10/ITERS/it.0/0.plans.csv.gz',
        'gs://beam-core-outputs/sfbay-tr-SF_VN-30pct-20231014/beam/year-2018-iteration-10/ITERS/it.0/0.plans.csv.gz',
    ]

dataFilepath_PTO = [
        'gs://beam-core-outputs/sfbay-baseline2018-30pct-20230825/postprocessOutputs/year-2018-iteration-10/personToVehicles.csv',
        'gs://beam-core-outputs/sfbay-tr-30pct-20231014/postprocessOutputs/year-2018-iteration-10/personToVehicles.csv',
        'gs://beam-core-outputs/sfbay-tr-BARTBSV-30pct-20230825/postprocessOutputs/year-2018-iteration-10/personToVehicles.csv',
        'gs://beam-core-outputs/sfbay-tr-BARTBSVI-30pct-20230828/postprocessOutputs/year-2018-iteration-10/personToVehicles.csv',
        'gs://beam-core-outputs/sfbay-tr-BARTBSVII-30pct-20230828/postprocessOutputs/year-2018-iteration-10/personToVehicles.csv',
        'gs://beam-core-outputs/sfbay-tr-BARTCORE-30pct-20230828/postprocessOutputs/year-2018-iteration-10/personToVehicles.csv',
        'gs://beam-core-outputs/sfbay-tr-TempoBRT-30pct-20230825/postprocessOutputs/year-2018-iteration-10/personToVehicles.csv',
        'gs://beam-core-outputs/sfbay-tr-CAElectrification-30pct-20230828/postprocessOutputs/year-2018-iteration-10/personToVehicles.csv',
        'gs://beam-core-outputs/sfbay-tr-SF_CS-30pct-20231014/postprocessOutputs/year-2018-iteration-10/personToVehicles.csv',
        'gs://beam-core-outputs/sfbay-tr-SF_VN-30pct-20231014/postprocessOutputs/year-2018-iteration-10/personToVehicles.csv',
    ]

dataFilepath_inex = [
        'gs://beam-core-outputs/sfbay-baseline2018-30pct-20230825/inexus/sfbay_baseline_base-1.0_2018__20230825.csv.gz',
        'gs://beam-core-outputs/sfbay-tr-30pct-20231014/inexus/sfbay_baseline_TR-1.0_2018__20231014.csv.gz',
        'gs://beam-core-outputs/sfbay-tr-BARTBSV-30pct-20230825/inexus/sfbay_baseline_BART-BSV-1.0_2018__20230824.csv.gz',
        'gs://beam-core-outputs/sfbay-tr-BARTBSVI-30pct-20230828/inexus/sfbay_baseline_BSVI-1.0_2018__20230826.csv.gz',
        'gs://beam-core-outputs/sfbay-tr-BARTBSVII-30pct-20230828/inexus/sfbay_baseline_BSVII-1.0_2018__20230826.csv.gz',
        'gs://beam-core-outputs/sfbay-tr-BARTCORE-30pct-20230828/inexus/sfbay_baseline_CORE-1.0_2018__20230826.csv.gz',
        'gs://beam-core-outputs/sfbay-tr-TempoBRT-30pct-20230825/inexus/sfbay_baseline_1T-1.0_2018__20230825.csv.gz',
        'gs://beam-core-outputs/sfbay-tr-CAElectrification-30pct-20230828/inexus/sfbay_baseline_CA-electr-1.0_2018__20230826.csv.gz',
        'gs://beam-core-outputs/sfbay-tr-SF_CS-30pct-20231014/inexus/sfbay_baseline_CS-1.0_2018__20231014.csv.gz',
        'gs://beam-core-outputs/sfbay-tr-SF_VN-30pct-20231014/inexus/sfbay_baseline_VN-1.0_2018__20231014.csv.gz',
    ]

AgencyIDs = [[':'],
            ['BA:','AC:','CA:','SF:'],
            ['BA:'],
            ['BA:'],
            ['BA:'],
            ['BA:'],
            ['AC:'],
            ['CA:'],
            ['SF:'],
            ['SF:']]

RouteIDs = [[':'],
            ['BA:','AC:1T-142','CA:','SF:1001', 'SF:18608'],
            ['BA:'],
            ['BA:3','BA:4','BA:5','BA:6'],
            ['BA:3','BA:4','BA:5','BA:6'],
            ['BA:'],
            ['AC:1T-142'],
            ['CA:'],
            ['SF:1001'],
            ['SF:18608']]

Route_names = [[':'],
            ['BART - TR','1TEMPO - TR','CA - TR','SF:T - TR', 'SF:49 - TR'],
            ['BART - BSV CORE'],
            ['BA:3 - BSVI','BA:4 - BSVI','BA:5 - BSVI','BA:6 - BSVI'],
            ['BA:3 - BSVII','BA:4 - BSVII','BA:5 - BSVII','BA:6V'],
            ['BART - CORE'],
            ['1TEMPO - 1T'],
            ['CALTRAIN - CA'],
            ['SF:T - CS'],
            ['SF:49 - VN']]

dataFilepath_VtoR = ['outputs/routetovehicledictBaseline.csv',
                          'outputs/routetovehicledictTR.csv',
                          'outputs/routetovehicledictTR-BA.csv',
                          'outputs/routetovehicledictTR-BSVI.csv',
                          'outputs/routetovehicledictTR-BSVII.csv',
                          'outputs/routetovehicledictTR-CORE.csv',
                          'outputs/routetovehicledictTR-1T.csv',
                          'outputs/routetovehicledictTR-CA.csv',
                          'outputs/routetovehicledictTR-CS.csv',
                          'outputs/routetovehicledictTR-VN.csv',
]

names = ['Baseline','TR','TR-BA','TR-BSVI','TR-BSVII','TR-CORE','TR-1T','TR-CA','TR-CS','TR-VN']


In [None]:
# # person_trips_baseline
# # plans_baseline
# # events_baseline
# # mode_choice_baseline
# # PTs_baseline
# # PtoPTss_baseline
# # PtoPTss_baseline_dict


persons_baseline = pd.DataFrame()
persons_baseline = pd.DataFrame(columns = ['Person',
                                           'Plan Index',
                                           'Planned Depart Time Baseline',
                                           'Planned Depart Time TR',
                                           'Activity From Baseline',
                                           'Activity To Baseline',
                                           'Activity From TR',
                                           'Activity To TR',
                                           'Project Tried',
                                           'Vehicles Used Baseline',
                                           'Vehicle Types Used Baseline',                                          
                                           'Bus agencies Used Baseline',
                                           'First Bus agency Used Baseline',
                                           'Vehicles Used TR',
                                           'Vehicle Types Used TR',                                          
                                           'Bus agencies Used TR',
                                           'First Bus agency Used TR',
                                           'Switch From',
                                           'Trip Length Baseline',                       
                                           'Trip Duration Baseline',
                                           'Trip Length TR',
                                           'Trip Duration TR',
                                           'Diff Length',
                                           'Diff Duration',
                                           'Planned Mode Baseline',                                         
                                           'Planned Mode TR',                                          
                                           'Chosen Mode Baseline',                                          
                                           'Chosen Mode TR',
                                           'X Activity From TR',
                                           'Y Activity From TR',
                                           'X Activity To TR',
                                           'Y Activity To TR',
                                           'X Activity From Baseline',
                                           'Y Activity From Baseline',
                                           'X Activity To Baseline',
                                           'Y Activity To Baseline',
                                           'Log Sum Baseline',
                                           'Log Sum TR',
                                           'Diff Log Sum',
#                                            'ZIP Departure TR',                                          
#                                            'ZIP Arrival TR',                                          
#                                            'ZIP Departure Baseline',                                          
#                                            'ZIP Arrival Baseline',                                          
                                          ])



print('Read person_trips_baseline...')
person_trips_baseline = pd.read_csv(dataFilepath_inex[0], compression = 'gzip', 
                                    usecols = ['logsum_trip_Potential_INEXUS','person_id', 'destination_logsum_x',
                                               'destination_logsum_y','logsum_tours_mode_AS_tours','workplace_location_logsum'])
print('Read plans_baseline...')
plans_baseline = pd.read_csv(dataFilepath_Plan[0], usecols = ['activityType','personId','planElementIndex','activityEndTime','activityLocationX','activityLocationY','legMode','planSelected'])
plans_baseline = plans_baseline[plans_baseline.planSelected == True]
print('Read events_baseline...')
events_baseline = pd.read_csv(dataFilepath_PT[0], usecols = ['type','time','departureTime','mode','person','length','duration'])
mode_choice_baseline = events_baseline[events_baseline['type']=='ModeChoice']
PTs_baseline = events_baseline[events_baseline['type']=='PathTraversal']
dict_time_baseline = dict(zip(PTs_baseline.index, PTs_baseline.departureTime))
print('Read PtoPTss_Baseline...')
PtoPTss_baseline = pd.read_csv(dataFilepath_PTO[0])
PtoPTss_baseline_bus =  PtoPTss_baseline[(PtoPTss_baseline['mode'] =='bus')|
                                         (PtoPTss_baseline['mode'] =='subway')|
                                         (PtoPTss_baseline['mode'] =='tram')|
                                         (PtoPTss_baseline['mode'] =='rail')|
                                         (PtoPTss_baseline['mode'] =='cable_car')|
                                         (PtoPTss_baseline['mode'] =='ferry')]
PtoPTss_baseline['departureTime'] =  PtoPTss_baseline['pathTraversalID'].map(dict_time_baseline)
print('Create PTO dict...')
PtoPTss_baseline_dict = PtoPTss_baseline.groupby(['personID', 'planIndex']).apply(lambda x: [sum(x.length),
                                                    sum(x.duration),min(x.departureTime)]).to_dict()


Read person_trips_baseline...
Read plans_baseline...


In [None]:
i = 0

#SCENARIO
for dt_fp, pto_fp, plan_fp, inex_fp, vtoR_fp, agID, rouIDs, names in zip(dataFilepath_PT[1:], dataFilepath_PTO[1:], dataFilepath_Plan[1:], dataFilepath_inex[1:], dataFilepath_VtoR[1:], AgencyIDs[1:], RouteIDs[1:], Route_names[1:]):
    
    print('Analyzing', names)
    print('Read person_trips_baseline...')
    person_trips_TR = pd.read_csv(inex_fp, compression = 'gzip', 
                                    usecols = ['logsum_trip_Potential_INEXUS','person_id', 'destination_logsum_x',
                                               'destination_logsum_y','logsum_tours_mode_AS_tours','workplace_location_logsum'])
    print('Read plans_baseline...')
    plans_TR = pd.read_csv(plan_fp, usecols = ['activityType','personId','planElementIndex','activityEndTime','activityLocationX','activityLocationY','legMode', 'planSelected'])
    plans_TR = plans_TR[plans_TR.planSelected == True]
    print('Read events_baseline...')
    events_TR = pd.read_csv(dt_fp, usecols = ['type','time','departureTime','mode','person','length','duration'])
    mode_choice_TR = events_TR[events_TR['type']=='ModeChoice']
    PTs_TR = events_TR[events_TR['type']=='PathTraversal'] 
    dict_time_TR = dict(zip(PTs_TR.index, PTs_TR.departureTime))
    print('Read PtoPTss_Baseline...')
    PtoPTss_TR = pd.read_csv(pto_fp) 
    VtoR_TR = pd.read_csv(vtoR_fp)
    VtoR_TR = VtoR_TR.set_index('0')['1'].to_dict()
    PtoPTss_TR['route_id'] =  PtoPTss_TR['vehicleID'].map(VtoR_TR)
    PtoPTss_TR['departureTime'] =  PtoPTss_TR['pathTraversalID'].map(dict_time_TR)

    
 
    #ROUTE
    for rt, nm in zip(rouIDs, names):
        
        print('Analyze', nm)
                      
        print('Filter PtoPTss_TR_rt...')
        PtoPTss_TR_rt =  PtoPTss_TR[(PtoPTss_TR['mode'] =='bus')|
                                     (PtoPTss_TR['mode'] =='subway')|
                                     (PtoPTss_TR['mode'] =='tram')|
                                     (PtoPTss_TR['mode'] =='rail')|
                                     (PtoPTss_TR['mode'] =='cable_car')|
                                     (PtoPTss_TR['mode'] =='ferry')]
        PtoPTss_TR_rt = PtoPTss_TR_rt[PtoPTss_TR_rt['vehicle2'].isin(agID)]
        PtoPTss_TR_rt = PtoPTss_TR_rt[PtoPTss_TR_rt['vehicleID'].str.startswith(tuple(rouIDs))]
        
        print('Create PTO dict...')
        PtoPTss_TR_dict = PtoPTss_TR_rt.groupby(['personID', 'planIndex']).apply(lambda x: [sum(x.length),
                                                    sum(x.duration),min(x.departureTime)]).to_dict()
        
        #PTO
        total_rows = len(PtoPTss_TR_dict.keys())
        print('Number of rows:',total_rows)
        
        
        with tqdm(total=total_rows, position=0, file=sys.stdout, leave=True) as pbar:

            start_time = time.time()
            
            for row in PtoPTss_TR_dict.keys():

                pbar.update(1)
                i+=1
                # if i%50==0:
                #     print(i)


                try:

                    Plan_minus1 = plans_baseline[(plans_baseline['personId']==row[0])&(plans_baseline['planElementIndex']==row[1]-1)]
                    Plan_minus1_TR = plans_TR[(plans_TR['personId']==row[0])&(plans_TR['planElementIndex']==row[1]-1)]
                    Plan_plus1 = plans_baseline[(plans_baseline['personId']==row[0])&(plans_baseline['planElementIndex']==row[1]+1)]
                    Plan_plus1_TR = plans_TR[(plans_TR['personId']==row[0])&(plans_TR['planElementIndex']==row[1]+1)]
                    person_trips_baseline_person = person_trips_baseline[(person_trips_baseline['person_id']==row[0])]
                    person_trips_TR_person = person_trips_TR[(person_trips_TR['person_id']==row[0])]
                    PtoPTss_baseline_person = PtoPTss_baseline[(PtoPTss_baseline['personID']==row[0])&(PtoPTss_baseline['planIndex']==row[1])]
                    PtoPTss_TR_person = PtoPTss_TR[(PtoPTss_TR['personID']==row[0])&(PtoPTss_TR['planIndex']==row[1])]


                    persons_baseline.at[i,'Person'] = row[0]

                    persons_baseline.at[i,'Plan Index'] = int(row[1])

                    persons_baseline.at[i,'Activity From Baseline'] = list(Plan_minus1['activityType'])[0]

                    persons_baseline.at[i,'Activity To Baseline'] = list(Plan_plus1['activityType'])[0]

                    persons_baseline.at[i,'Activity From TR'] = list(Plan_minus1_TR['activityType'])[0]

                    persons_baseline.at[i,'Activity To TR'] = list(Plan_plus1_TR['activityType'])[0]

                    persons_baseline.at[i,'Planned Depart Time Baseline'] = int(list(Plan_minus1['activityEndTime'])[0])

                    persons_baseline.at[i,'Planned Depart Time TR'] = int(list(Plan_minus1_TR['activityEndTime'])[0])
                    
                    #### We sure mode choice was before the end of previous activityi???
                    mode_choice_baseline_person = mode_choice_baseline[(mode_choice_baseline['person']==row[0])&(mode_choice_baseline['time']<=persons_baseline.at[i,'Planned Depart Time Baseline'] +1)]
                    mode_choice_TR_person = mode_choice_TR[(mode_choice_TR['person']==row[0])&(mode_choice_TR['time']<=persons_baseline.at[i,'Planned Depart Time TR'] +1)]

                    persons_baseline.at[i,'Project Tried'] = nm

                    persons_baseline.at[i,'Potential INEXUS Baseline'] = np.mean(list(person_trips_baseline_person['logsum_trip_Potential_INEXUS']))

                    persons_baseline.at[i,'Potential INEXUS TR'] = np.mean(list(person_trips_TR_person['logsum_trip_Potential_INEXUS']))

                    persons_baseline.at[i,'Destination LOG SUM X Baseline'] = np.mean(list(person_trips_baseline_person['destination_logsum_x']))

                    persons_baseline.at[i,'Destination LOG SUM X TR'] = np.mean(list(person_trips_TR_person['destination_logsum_x']))

                    persons_baseline.at[i,'Destination LOG SUM Y Baseline'] = np.mean(list(person_trips_baseline_person['destination_logsum_y']))

                    persons_baseline.at[i,'Destination LOG SUM Y TR'] = np.mean(list(person_trips_TR_person['destination_logsum_y']))

                    persons_baseline.at[i,'LOG SUM Tours Mode AS Baseline'] = np.mean(list(person_trips_baseline_person['logsum_tours_mode_AS_tours']))

                    persons_baseline.at[i,'LOG SUM Tours Mode AS TR'] = np.mean(list(person_trips_TR_person['logsum_tours_mode_AS_tours']))

                    persons_baseline.at[i,'LOG SUM Work PLace Location Baseline'] = np.mean(list(person_trips_baseline_person['workplace_location_logsum']))

                    persons_baseline.at[i,'LOG SUM Work PLace Location TR'] = np.mean(list(person_trips_TR_person['workplace_location_logsum']))

                    persons_baseline.at[i,'Diff Potential INEXUS'] = persons_baseline.at[i,'Potential INEXUS TR']-persons_baseline.at[i,'Potential INEXUS Baseline']

                    persons_baseline.at[i,'Diff Destination LOG SUM X'] = persons_baseline.at[i,'Destination LOG SUM X TR']-persons_baseline.at[i,'Destination LOG SUM X Baseline']

                    persons_baseline.at[i,'Diff Destination LOG SUM Y'] = persons_baseline.at[i,'Destination LOG SUM Y TR']-persons_baseline.at[i,'Destination LOG SUM Y Baseline']

                    persons_baseline.at[i,'Diff LOG SUM Tours Mode AS'] = persons_baseline.at[i,'LOG SUM Tours Mode AS TR']-persons_baseline.at[i,'LOG SUM Tours Mode AS Baseline']

                    persons_baseline.at[i,'Diff LOG SUM Work PLace Location'] = persons_baseline.at[i,'LOG SUM Work PLace Location TR']-persons_baseline.at[i,'LOG SUM Work PLace Location Baseline']

                    persons_baseline.at[i,'Vehicles Used Baseline'] = np.unique(list(PtoPTss_baseline_person['vehicleID']))

                    persons_baseline.at[i,'Vehicles Used TR'] = np.unique(list(PtoPTss_TR_person['vehicleID']))

                    persons_baseline.at[i,'Bus agencies Used Baseline'] = np.unique(list(PtoPTss_baseline_bus['vehicle2'][(PtoPTss_baseline_bus['personID']==row[0])&(PtoPTss_baseline_bus['planIndex']==row[1])]))

                    persons_baseline.at[i,'Bus agencies Used TR'] = np.unique(list(PtoPTss_TR_rt['vehicle2'][(PtoPTss_TR_rt['personID']==row[0])&(PtoPTss_TR_rt['planIndex']==row[1])]))

                    first_bus_agency_baseline = list(PtoPTss_baseline_bus['vehicle2'][(PtoPTss_baseline_bus['personID']==row[0])&(PtoPTss_baseline_bus['planIndex']==row[1])])
                    persons_baseline.at[i,'First Bus agency Used Baseline'] = first_bus_agency_baseline[0] if first_bus_agency_baseline else ''

                    persons_baseline.at[i,'First Bus agency Used TR'] = list(PtoPTss_TR_rt['vehicle2'][(PtoPTss_TR_rt['personID']==row[0])&(PtoPTss_TR_rt['planIndex']==row[1])])[0]

                    #### GEt that from MODE CHOICE
                    
#                     persons_baseline.at[i,'Trip Length TR'] = int(PtoPTss_TR_dict[row][0])

#                     persons_baseline.at[i,'Trip Duration TR'] = int(PtoPTss_TR_dict[row][1])
                        
                    persons_baseline.at[i,'X Activity From TR'] = list(Plan_minus1_TR['activityLocationX'])[0]

                    persons_baseline.at[i,'Y Activity From TR'] = list(Plan_minus1_TR['activityLocationY'])[0]

                    persons_baseline.at[i,'X Activity To TR'] = list(Plan_plus1_TR['activityLocationX'])[0]

                    persons_baseline.at[i,'Y Activity To TR'] = list(Plan_plus1_TR['activityLocationY'])[0]

                    persons_baseline.at[i,'X Activity From Baseline'] = list(Plan_minus1['activityLocationX'])[0]

                    persons_baseline.at[i,'Y Activity From Baseline'] = list(Plan_minus1['activityLocationY'])[0]

                    persons_baseline.at[i,'X Activity To Baseline'] = list(Plan_plus1['activityLocationX'])[0]

                    persons_baseline.at[i,'Y Activity To Baseline'] = list(Plan_plus1['activityLocationY'])[0]

                    persons_baseline.at[i,'Vehicle Types Used Baseline'] = np.unique(list(PtoPTss_baseline_person['mode']))

                    persons_baseline.at[i,'Vehicle Types Used TR'] = np.unique(list(PtoPTss_TR_person['mode']))

                    persons_baseline.at[i,'Planned Mode Baseline'] = list(plans_baseline['legMode'][(plans_baseline['personId']==row[0])&(plans_baseline['planElementIndex']==row[1])])[0]

                    persons_baseline.at[i,'Planned Mode TR'] = list(plans_TR['legMode'][(plans_TR['personId']==row[0])&(plans_TR['planElementIndex']==row[1])])[0]

                    persons_baseline.at[i,'Chosen Mode Baseline'] = list(mode_choice_baseline_person['mode'])[-1]

                    persons_baseline.at[i,'Chosen Mode TR'] = list(mode_choice_TR_person['mode'])[-1]
                    
                    
                    persons_baseline.at[i,'Trip Length Baseline'] = list(mode_choice_baseline_person['length'])[-1]

                    persons_baseline.at[i,'Trip Duration Baseline'] = list(mode_choice_baseline_person['duration'])[-1]
                    
                    persons_baseline.at[i,'Trip Length TR'] = list(mode_choice_TR_person['length'])[-1]

                    persons_baseline.at[i,'Trip Duration TR'] = list(mode_choice_TR_person['duration'])[-1]
                    
                    persons_baseline.at[i,'Diff Length'] = int(persons_baseline.at[i,'Trip Length TR']-persons_baseline.at[i,'Trip Length Baseline'])

                    persons_baseline.at[i,'Diff Duration'] = int(persons_baseline.at[i,'Trip Duration TR']-persons_baseline.at[i,'Trip Duration Baseline'])
                    
                    
                    
                    
#                     persons_baseline.at[i,'Chosen Mode Baseline'] = list(mode_choice_baseline['mode'][(mode_choice_baseline['person']==row[0])&(mode_choice_baseline['time']<=PtoPTss_baseline_dict[row][2]+1)])[-1]

#                     persons_baseline.at[i,'Chosen Mode TR'] = list(mode_choice_TR['mode'][(mode_choice_TR['person']==row[0])&(mode_choice_TR['time']<=PtoPTss_TR_dict[row][2]+1)])[-1]
                    
                    
#                     #Not for people that were using HOV
#                     try:
#                         persons_baseline.at[i,'Trip Length Baseline'] = int(PtoPTss_baseline_dict[row][0])

#                         persons_baseline.at[i,'Trip Duration Baseline'] = int(PtoPTss_baseline_dict[row][1])

#                     except:
#                         print('HoV', PtoPTss_TR_dict[row], row, list(plans_baseline['legMode'][(plans_baseline['personId']==row[0])&(plans_baseline['planElementIndex']==row[1])])[0])
                            
#                             persons_baseline.at[i,'Diff Length'] = int(persons_baseline.at[i,'Trip Length TR']-persons_baseline.at[i,'Trip Length Baseline'])

#                             persons_baseline.at[i,'Diff Duration'] = int(persons_baseline.at[i,'Trip Duration TR']-persons_baseline.at[i,'Trip Duration Baseline'])                            
                            
#                         continue
                    
                    
                    

                except Exception as e:
                    # Print the error message to stderr
                    print(f'Error type: {type(e)}', file=sys.stderr)
                    print(f'Error message: {str(e)}', file=sys.stderr)

                    # Print the traceback to stderr
                    traceback.print_exc(file=sys.stderr)

                    print('Warning', row, PtoPTss_TR_dict[row], file=sys.stderr)
                    continue

                # Calculate and display progress information
                # Calculate and display progress information
                processed_rows = pbar.n
                remaining_rows = total_rows - processed_rows
                progress_percent = (processed_rows / total_rows) * 100

                # Estimate the remaining time based on the processing rate
                elapsed_time = time.time() - start_time
                rows_per_second = processed_rows / elapsed_time
                remaining_time_seconds = remaining_rows / rows_per_second
                remaining_time_hours = remaining_rows / rows_per_second / 3600

                pbar.set_postfix_str(f"Processed {processed_rows}/{total_rows} rows ({progress_percent:.2f}%), "
                              f"Estimated Remaining Time: {remaining_time_seconds:.2f} seconds"
                            f" or {remaining_time_hours:.2f} hours", 
                                    refresh=True)

        persons_baseline.to_csv('outputs/person_databaseTR.csv')

persons_baseline.to_csv('outputs/person_databaseTR.csv')        

                

        
        


In [6]:
# plans_baseline = pd.read_csv('gs://beam-core-outputs/sfbay-baseline2018-30pct-20230825/beam/year-2018-iteration-10/ITERS/it.0/0.plans.csv.gz', usecols = ['activityType','personId','planElementIndex','activityEndTime','activityLocationX','activityLocationY','legMode','planSelected'])

plans_baseline[plans_baseline.personId ==46419]

Unnamed: 0,personId,planSelected,planElementIndex,activityType,activityLocationX,activityLocationY,activityEndTime,legMode
22212872,46419,False,0,Home,563023.283987,4.186661e+06,18482.0,
22212873,46419,False,1,,,,,walk_transit
22212874,46419,False,2,work,551061.842321,4.180518e+06,21536.0,
22212875,46419,False,3,,,,,walk_transit
22212876,46419,False,4,work,550798.154015,4.181218e+06,28292.0,
...,...,...,...,...,...,...,...,...
22212959,46419,True,18,Home,561311.817495,4.184990e+06,58874.4,
22212960,46419,True,19,,,,,walk_transit
22212961,46419,True,20,othdiscr,552687.406690,4.182358e+06,68806.8,
22212962,46419,True,21,,,,,walk_transit


In [22]:
PtoPTss_baseline['mode'].value_counts()

car                   7330285
walk                  6209170
bus                   4258767
car_hov2              1053447
car_hov3               754981
subway                 625754
car_emer               414094
tram                   365671
car_hov2_emer          128687
car_RideHail           111586
bike                   105773
car_hov3_emer           89647
cable_car               41051
rail                    21705
car_RideHail_empty       1036
ferry                      39
bus_empty                  17
Name: mode, dtype: int64

In [11]:
plans_baseline[plans_baseline.personId ==317]

Unnamed: 0,personId,planSelected,planElementIndex,activityType,activityLocationX,activityLocationY,activityEndTime,legMode
45133305,317,True,0,Home,567054.747829,4190684.0,26060.4,
45133306,317,True,1,,,,,walk_transit
45133307,317,True,2,work,551778.854568,4182412.0,65430.0,
45133308,317,True,3,,,,,walk_transit
45133309,317,True,4,Home,567054.747829,4190684.0,66337.2,
45133310,317,True,5,,,,,hov2_teleportation
45133311,317,True,6,othdiscr,570433.395782,4190380.0,66391.2,
45133312,317,True,7,,,,,hov2_teleportation
45133313,317,True,8,Home,567054.747829,4190684.0,-inf,


In [12]:
plans_TR[plans_TR.personId ==317]

Unnamed: 0,personId,planSelected,planElementIndex,activityType,activityLocationX,activityLocationY,activityEndTime,legMode
45132831,317,True,0,Home,567054.747829,4190684.0,26899.2,
45132832,317,True,1,,,,,walk_transit
45132833,317,True,2,work,552059.356848,4182366.0,65552.4,
45132834,317,True,3,,,,,walk_transit
45132835,317,True,4,Home,567054.747829,4190684.0,66502.8,
45132836,317,True,5,,,,,hov2_teleportation
45132837,317,True,6,othdiscr,571240.239682,4189103.0,68234.4,
45132838,317,True,7,,,,,hov2_teleportation
45132839,317,True,8,Home,567054.747829,4190684.0,-inf,


In [None]:
import pandas as pd


plans_df = plans_df.sort_values(by=['PersonID', 'PlanIndex'])
plans_df_shifted = plans_df.shift(periods=1)
filtered_rows = plans_df['PlanIndex'] == (plans_df_shifted['PlanIndex'] + 1)
result_df = plans_df[filtered_rows]
result_dict = dict(zip(zip(result_df['PersonID'], result_df['PlanIndex']), result_df['ActivityType']))


In [6]:
PtoPTss_TR['vehicleID']

0                                        0
1                                        0
2                                        0
3                                        0
4                                        1
                         ...              
21473553    westcat-ca-us:160436A2376B3650
21473554    westcat-ca-us:160436A2376B3650
21473555    westcat-ca-us:160436A2376B3650
21473556    westcat-ca-us:160436A2376B3650
21473557    westcat-ca-us:160436A2376B3650
Name: vehicleID, Length: 21473558, dtype: object

In [7]:
VtoR_TR

Unnamed: 0.1,Unnamed: 0,0,1
0,0,AC:5593910,AC:851-142
1,1,AC:5593819,AC:802-142
2,2,AC:5573008,AC:840-142
3,3,AC:5572515,AC:800-142
4,4,SM:43:10748241:T1:15:00,SM:43
...,...,...,...
41465,41465,SF:7596614,SF:12297
41466,41466,SF:7605335,SF:12306
41467,41467,SF:7600910,SF:12303
41468,41468,SF:7605295,SF:12306
