In [1]:
# import libraries

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import gzip
import time
from itertools import groupby
import geopandas as gpd
import pathlib
from pathlib import Path
import os
import seaborn as sns
import datetime
import shapely
from shapely.geometry import shape, Point, LineString, Polygon
import warnings
from google.cloud import storage


import os
os.environ['USE_PYGEOS'] = '0'
import geopandas

In a future release, GeoPandas will switch to using Shapely by default. If you are using PyGEOS directly (calling PyGEOS functions on geometries from GeoPandas), this will then stop working and you are encouraged to migrate from PyGEOS to Shapely 2.0 (https://shapely.readthedocs.io/en/latest/migration_pygeos.html).
  import geopandas as gpd


In [2]:
'''
Setting the current working directory. Ideally the tree should follow look the following
BEAM
- Data
- Notebooks
    - Google Cloud Scripts
    - Local Scripts
- Output
    - City Name
        - Passenger
            - simulation_name        
        - Freight
            - simulation_name
'''

# This will return the folder where the script is stored
BASE_DIR = Path.cwd()
print(BASE_DIR)

# set the project directory (two-levels up, ideally)
project_folder = BASE_DIR.parent.parent
print(project_folder)

# General Declaration
gc_url = f"https://storage.googleapis.com/beam-core-outputs/"
iteration_no = "0" #change if we refer different simulation iteration
len_id_transit = 3 # I dont know why its been used, but copied from CP script
# conversion units
meter_to_mile = 0.000621371
percent_of_samples = 0.15
mps_to_mph = 2.23694

/home/jovyan/beam_root/jupyter/jupyter_home_old
/home/jovyan/beam_root


In [3]:
def get_filenames(variable_filename):
    simulation_data = {
            f"pilates-austin-baseline-calibrated-v3/beam/year-2020-iteration-4/ITERS/it.{iteration_no}/{iteration_no}.{variable_filename}": "Baseline",
            # Transit Capacity Scenarios
            f"pilates-austin-baseline-calibrated-v3-tr-capacity-025/beam/year-2020-iteration-4/ITERS/it.{iteration_no}/{iteration_no}.{variable_filename}": "TR_Cap_25pct",
            f"pilates-austin-baseline-calibrated-v3-tr-capacity-050/beam/year-2020-iteration-4/ITERS/it.{iteration_no}/{iteration_no}.{variable_filename}": "TR_Cap_50pct",
            f"pilates-austin-baseline-calibrated-v3-tr-capacity-150/beam/year-2020-iteration-4/ITERS/it.{iteration_no}/{iteration_no}.{variable_filename}": "TR_Cap_150pct",
            f"pilates-austin-baseline-calibrated-v3-tr-capacity-200/beam/year-2020-iteration-4/ITERS/it.{iteration_no}/{iteration_no}.{variable_filename}": "TR_Cap_200pct",
            # # Transit Speed Scenarios
            f"pilates-austin-baseline-calibrated-v3-tr-spd-050-attmpt-4/beam/year-2020-iteration-4/ITERS/it.{iteration_no}/{iteration_no}.{variable_filename}": "TR_Spd_50pct",
            f"pilates-austin-baseline-calibrated-v3-tr-spd-066-attmpt-4/beam/year-2020-iteration-4/ITERS/it.{iteration_no}/{iteration_no}.{variable_filename}": "TR_Spd_66pct",
            f"pilates-austin-baseline-calibrated-v3-tr-spd-200/beam/year-2020-iteration-4/ITERS/it.{iteration_no}/{iteration_no}.{variable_filename}": "TR_Spd_200pct",
            f"pilates-austin-baseline-calibrated-v3-tr-spd-400/beam/year-2020-iteration-4/ITERS/it.{iteration_no}/{iteration_no}.{variable_filename}": "TR_Spd_400pct",
            # # Transit Frequency Scenarios
            f"pilates-austin-baseline-calibrated-v3-tr-freq-025/beam/year-2020-iteration-4/ITERS/it.{iteration_no}/{iteration_no}.{variable_filename}": "TR_Frq_25pct",
            f"pilates-austin-baseline-calibrated-v3-tr-freq-050/beam/year-2020-iteration-4/ITERS/it.{iteration_no}/{iteration_no}.{variable_filename}": "TR_Frq_50pct",
            f"pilates-austin-baseline-calibrated-v3-tr-freq-150/beam/year-2020-iteration-4/ITERS/it.{iteration_no}/{iteration_no}.{variable_filename}": "TR_Frq_150pct",
            f"pilates-austin-baseline-calibrated-v3-tr-freq-200/beam/year-2020-iteration-4/ITERS/it.{iteration_no}/{iteration_no}.{variable_filename}": "TR_Frq_200pct",
            # # Transit Fare Reduction Scenarios
            f"pilates-austin-baseline-calibrated-v3-tr-incentive-0c/beam/year-2020-iteration-4/ITERS/it.{iteration_no}/{iteration_no}.{variable_filename}": "TR_Inc_0c",
            f"pilates-austin-baseline-calibrated-v3-tr-incentive-25c/beam/year-2020-iteration-4/ITERS/it.{iteration_no}/{iteration_no}.{variable_filename}": "TR_Inc_25c",
            f"pilates-austin-baseline-calibrated-v3-tr-incentive-50c/beam/year-2020-iteration-4/ITERS/it.{iteration_no}/{iteration_no}.{variable_filename}": "TR_Inc_50c",
            f"pilates-austin-baseline-calibrated-v3-tr-incentive-75c/beam/year-2020-iteration-4/ITERS/it.{iteration_no}/{iteration_no}.{variable_filename}": "TR_Inc_75c",
            # # Ridehail price Scenarios
            f"pilates-austin-baseline-calibrated-v3-rh-price-0125/beam/year-2020-iteration-4/ITERS/it.{iteration_no}/{iteration_no}.{variable_filename}": "RH_price_12.5pct",
            f"pilates-austin-baseline-calibrated-v3-rh-price-050/beam/year-2020-iteration-4/ITERS/it.{iteration_no}/{iteration_no}.{variable_filename}": "RH_price_50pct",
            f"pilates-austin-baseline-calibrated-v3-rh-price-200/beam/year-2020-iteration-4/ITERS/it.{iteration_no}/{iteration_no}.{variable_filename}": "RH_price_200pct",
            f"pilates-austin-baseline-calibrated-v3-rh-price-500/beam/year-2020-iteration-4/ITERS/it.{iteration_no}/{iteration_no}.{variable_filename}": "RH_price_500pct",
            # # # Ridehail fleet size
            f"pilates-austin-baseline-calibrated-v3-rh-fleetsize-0125/beam/year-2020-iteration-4/ITERS/it.{iteration_no}/{iteration_no}.{variable_filename}": "RH_fleetsize_12.5pct",
            f"pilates-austin-baseline-calibrated-v3-rh-fleetsize-050/beam/year-2020-iteration-4/ITERS/it.{iteration_no}/{iteration_no}.{variable_filename}": "RH_fleetsize_50pct",
            f"pilates-austin-baseline-calibrated-v3-rh-fleetsize-300-attmpt-2/beam/year-2020-iteration-4/ITERS/it.{iteration_no}/{iteration_no}.{variable_filename}": "RH_fleetsize_300pct",
            f"pilates-austin-baseline-calibrated-v3-rh-fleetsize-1000/beam/year-2020-iteration-4/ITERS/it.{iteration_no}/{iteration_no}.{variable_filename}": "RH_fleetsize_1000pct",
            # # # Ridehail fleet reposition
            f"pilates-austin-baseline-calibrated-v3-rh-reposition-0125/beam/year-2020-iteration-4/ITERS/it.{iteration_no}/{iteration_no}.{variable_filename}": "RH_reposition_12.5pct",
            f"pilates-austin-baseline-calibrated-v3-rh-reposition-050/beam/year-2020-iteration-4/ITERS/it.{iteration_no}/{iteration_no}.{variable_filename}": "RH_reposition_50pct",
            f"pilates-austin-baseline-calibrated-v3-rh-reposition-300/beam/year-2020-iteration-4/ITERS/it.{iteration_no}/{iteration_no}.{variable_filename}": "RH_reposition_300pct",
            f"pilates-austin-baseline-calibrated-v3-rh-reposition-1000/beam/year-2020-iteration-4/ITERS/it.{iteration_no}/{iteration_no}.{variable_filename}": "RH_reposition_1000pct",
            # # # Ridehail fleet detour
            f"pilates-austin-baseline-calibrated-v3-rh-detour-0125/beam/year-2020-iteration-4/ITERS/it.{iteration_no}/{iteration_no}.{variable_filename}": "RH_detour_12.5pct",
            f"pilates-austin-baseline-calibrated-v3-rh-detour-050/beam/year-2020-iteration-4/ITERS/it.{iteration_no}/{iteration_no}.{variable_filename}": "RH_detour_50pct",
            f"pilates-austin-baseline-calibrated-v3-rh-detour-300/beam/year-2020-iteration-4/ITERS/it.{iteration_no}/{iteration_no}.{variable_filename}": "RH_detour_300pct",
            f"pilates-austin-baseline-calibrated-v3-rh-detour-1000/beam/year-2020-iteration-4/ITERS/it.{iteration_no}/{iteration_no}.{variable_filename}": "RH_detour_1000pct",
            # # # NMT Incentives
            f"pilates-austin-baseline-calibrated-v3-nmt-incentive-025/beam/year-2020-iteration-4/ITERS/it.{iteration_no}/{iteration_no}.{variable_filename}": "NMT_Inc_25c",
            f"pilates-austin-baseline-calibrated-v3-nmt-incentive-050/beam/year-2020-iteration-4/ITERS/it.{iteration_no}/{iteration_no}.{variable_filename}": "NMT_Inc_50c",
            f"pilates-austin-baseline-calibrated-v3-nmt-incentive-100/beam/year-2020-iteration-4/ITERS/it.{iteration_no}/{iteration_no}.{variable_filename}": "NMT_Inc_100c",
            f"pilates-austin-baseline-calibrated-v3-nmt-incentive-200/beam/year-2020-iteration-4/ITERS/it.{iteration_no}/{iteration_no}.{variable_filename}": "NMT_Inc_200c",
        }
    return simulation_data
    

In [4]:
# Path Traversal Columns
PTsColumns = [
    'vehicle', 'time', 'type', 'mode', 'length', 'vehicleType', 'arrivalTime', 'departureTime',
    'capacity', 'secondaryFuel', 'primaryFuelType', 'secondaryFuelType', 'numPassengers', 'primaryFuel', 'startX',
    'startY', 'endX', 'endY'
]

MCsColumns = ['person', 'time', 'type', 'mode', 'length', 'legModes']
MCsColumns_no_leg_modes = ['person', 'time', 'type', 'mode', 'length']

# modes associated with PT travel
PTsModes = np.array(['walk', 'bike', 'bike_Sharing', 'bike_Sharing_empty', 'car', 'car_emer', 'car_hov2_emer',
                     'car_hov3_emer', 'car_RideHail', 'car_RideHail_empty', 'car_RideHail_WC', 'car_RideHail_WC_empty',
                     'car_CAV', 'car_hov2', 'car_hov3', 'bus', 'tram', 'rail', 'subway', 'cable_car', 'ferry',
                     'bus_empty', 'tram_empty', 'rail_empty',
                     'subway_empty', 'cable_car_empty', 'ferry_empty'])
PTsModesNames = ['Walk', 'Bike', 'Bike Sharing', 'Empty Bike Sharing', 'Car', 'Car Emergency', 'Car HOV2 Emergency',
                 'Car HOV3 Emergency', 'Ride Hail', 'Empty Ride Hail', 'Ride Hail WC', 'Empty Ride Hail WC', 'CAV',
                 'Car HOV2', 'Car HOV3', 'Bus', 'Tram', 'Rail', 'Subway', 'Cable Car', 'Ferry', 'Empty Bus',
                 'Empty Tram', 'Empty Rail', 'Empty Subway', 'Empty Cable Car', 'Empty Ferry', ]

# Available Transits
transit_modes = ['bus', 'subway', 'tram', 'rail', 'cable_car', 'ferry']
transit_MCmodes = ['bus', 'subway', 'tram', 'rail', 'walk_transit', 'ride_hail_transit', 'drive_transit',
                   'cable_car', 'bike_transit']
# Mode Choices available
MCsModes = np.array(['bus', 'subway', 'tram', 'rail', 'car', 'hov3_teleportation', 'bike', 'hov2_teleportation',
                     'walk', 'car_hov2', 'car_hov3', 'walk_transit', 'ride_hail', 'ride_hail_transit',
                     'ride_hail_pooled', 'drive_transit', 'cable_car', 'bike_transit'])
MCsModesNames = ['Bus', 'Subway', 'Tram', 'Rail', 'Car', 'HOV3 Passenger', 'Bike', 'HOV2 Passenger', 'Walk',
                 'HOV2 Driver', 'HOV3 Driver', 'Walk-Transit', 'Ride Hail', 'Ride Hail-Transit', 'Ride Hail Pooled',
                 'Drive-Transit', 'Cable Car', 'Bike-Transit']

# Fuel type available in BEAM
primaryFuelTypes = ['Biodiesel', 'Diesel', 'Gasoline', 'Electricity', 'Food']

In [5]:
def readEvents(directory):
    '''
    Reads the events file for the given simulation scenario
    filters the events file into Path Traversal, Person Entering Vehicles, Mode Choices, Replanning and ActStart
    returns them as lists variables PT, PEVs, PLVs, MCs, RPs, AS
    :param directory:
    :return PT, PEVs, PLVs, MCs, RPs, AS:
    '''
    # fullPath = directory + 'ITERS/it.0/0.events.csv.gz'
    PTs = list()  # Path Traversal
    PEVs = list()  # Person Entering Vehicles
    PLVs = list()  # PersonLeavesVehicle
    MCs = list()  # Mode Choices
    RPs = list()  # Replanning
    ASs = list()  # actstart

    print('Reading ', directory)
    readEvents_time = time.time()
    for chunk in pd.read_csv(directory, chunksize=4000000):
        if sum((chunk['type'] == 'PathTraversal')) > 0:
            chunk['vehicle'] = chunk['vehicle'].astype(str)
            # PT = Path Traversal events
            # print(len(chunk.loc[(chunk['type'] == 'PathTraversal')]), ': len chunk PT')
            PT = chunk.loc[(chunk['type'] == 'PathTraversal') & (chunk['length'] > 0)].dropna(how='all', axis=1)
            PT['links'] = PT['links'].fillna('0')
            PT['departureTime'] = PT['departureTime'].astype(int)
            PT['arrivalTime'] = PT['arrivalTime'].astype(int)
            PTs.append(PT[PTsColumns])
            # print(len(PT), ': after filtering zero-length PT')
            # PEV = Person Entering Vehicle
            # print(len(chunk.loc[(chunk['type'] == 'PersonEntersVehicle')]), ': len chunk PEV')
            #             PEV = chunk.loc[(chunk.type == "PersonEntersVehicle") &
            #                             ~(chunk['person'].apply(str).str.contains('Agent').fillna(False)) &
            #                             ~(chunk['vehicle'].str.contains('body').fillna(False)), :].dropna(how='all', axis=1)
            # PEV = persons entering vehicle
            PEV = chunk.loc[(chunk.type == "PersonEntersVehicle") &
                            ~(chunk['person'].apply(str).str.contains('Agent').fillna(False))
            , :].dropna(how='all', axis=1)
            # print(len(PEV), ': after filtering drivers')

            # PLV = Passenger Leaving Vehicle
            if len(PEV) > 0:
                PEV['person'] = PEV['person'].astype(int)
                PEV['time'] = PEV['time'].astype(int)
                PEVs.append(PEV)

            # PLV
            #             PLV = chunk.loc[(chunk.type == "PersonLeavesVehicle") &
            #                             ~(chunk['person'].apply(str).str.contains('Agent').fillna(False)) &
            #                             ~(chunk['vehicle'].str.contains('body').fillna(False)), :].dropna(how='all', axis=1)
            # PLV = persons leaving vehicles
            # print(len(chunk.loc[(chunk['type'] == 'PersonLeavesVehicle')]), ': len chunk PLV')
            PLV = chunk.loc[(chunk.type == "PersonLeavesVehicle") &
                            ~(chunk['person'].apply(str).str.contains('Agent').fillna(False))
            , :].dropna(how='all', axis=1)
            # print(len(PLV), ': after filtering drivers')
            if len(PLV) > 0:
                PLV['person'] = PLV['person'].astype(int)
                PLV['time'] = PLV['time'].astype(int)
                PLVs.append(PLV)
        if sum((chunk['type'] == 'ModeChoice')) > 0:
            # MC = Mode Choice
            MC = chunk.loc[(chunk['type'] == 'ModeChoice') & (chunk['length'] > 0)].dropna(how='all', axis=1)
            try:
                MCs.append(MC[MCsColumns])
            except:
                MCs.append(MC[MCsColumns_no_leg_modes])
                print('WARNING: probably no legModes')

        if sum((chunk['type'] == 'Replanning')) > 0:
            # RP = Replanning
            RP = chunk.loc[(chunk['type'] == 'Replanning')].dropna(how='all', axis=1)
            RPs.append(RP)

        if sum((chunk['type'] == 'actstart')) > 0:
            # AS = actstart
            AS = chunk.loc[(chunk['type'] == 'actstart')].dropna(how='all', axis=1)
            ASs.append(AS)

        # print(chunk['type'].value_counts())
    # print(len(pd.concat(PEVs)), ':len PEVs')
    # print(len(pd.concat(PLVs)), ':len PLVs')

    PEVs = pd.concat(PEVs)  # PEVs = persons entering vehicles events
    PLVs = pd.concat(PLVs)  # PLVs = persons leaving vehicles events
    PTs = pd.concat(PTs)  # PT = Path Traversal events
    MCs = pd.concat(MCs)  # MC = mode choice events
    RPs = pd.concat(RPs)  # RP = replanning events
    ASs = pd.concat(ASs)  # AS = activity start events

    # print(len(PTs), ':len PTs')
    # print(len(MCs), ':len MCs')
    # print(len(RPs), ':len RPs')
    # print(len(ASs), ':len ASs')
    print(f"Completed reading events file in "
          f"{time.strftime('%H:%M:%S'.format(str((time.time() - readEvents_time) % 1)), time.gmtime((time.time() - readEvents_time)))}")
    return MCs, PTs, PEVs, PLVs, RPs, ASs

In [6]:
def fixData(Mcs, PTs, PEVs, PLVs, len_id_transit):
    '''
    :param Mcs: list variable containing all events where events[type] = ModeChoice
    :param PTs: list variable containing all events where events[type] = PathTraversals
    :param PEVs: list variable containing all events where events[type] = PersonEnterVehicles
    :param PLVs: list variable containing all events where events[type] = PersonLeavesVehicles
    :param len_id_transit: globally declared =3
    :return:
    '''
    fixdata_time = time.time()
    PTs['duration'] = PTs['arrivalTime'] - PTs['departureTime']
    PTs['gallonsGasoline'] = 0
    PTs.loc[PTs['primaryFuelType'] == 'Gasoline', 'gallonsGasoline'] += (
            PTs.loc[PTs['primaryFuelType'] == 'Gasoline', 'primaryFuel'] * 8.3141841e-9)
    PTs.loc[PTs['secondaryFuelType'] == 'Gasoline', 'gallonsGasoline'] += (
            PTs.loc[PTs['secondaryFuelType'] == 'Gasoline', 'secondaryFuel'] * 8.3141841e-9)
    PTs['occupancy'] = PTs['numPassengers']

    PTs['isCAV'] = PTs['vehicleType'].str.contains('L5')
    PTs['isRH'] = PTs['vehicle'].str.contains('rideHail')
    PTs['isBS'] = PTs['vehicle'].str.contains('bay_wheels')  # ??? needed?
    PTs['isRH_WC'] = PTs['vehicleType'].str.contains('RH_Car-wheelchair')  # ??? needed?
    PTs['is_empty'] = PTs['numPassengers'] == 0
    PTs['is_RHempty'] = PTs['isRH'] & PTs['is_empty']
    PTs['is_car_emer'] = PTs['vehicle'].str.contains('emergency')

    PTs.loc[PTs['mode'] == 'car', 'capacity'] += 1
    PTs.loc[PTs['mode'] == 'car_hov2', 'capacity'] += 1
    PTs.loc[PTs['mode'] == 'car_hov3', 'capacity'] += 1
    PTs.loc[PTs['mode'] == 'walk', 'capacity'] = 1
    PTs.loc[PTs['mode'] == 'bike', 'capacity'] = 1

    PTs.loc[PTs['isRH'], 'mode'] += '_RideHail'
    PTs.loc[PTs['isBS'], 'mode'] += '_Sharing'
    PTs.loc[PTs['isRH_WC'], 'mode'] += '_WC'
    PTs.loc[PTs['isCAV'], 'mode'] += '_CAV'
    PTs.loc[PTs['is_RHempty'], 'mode'] += '_empty'
    PTs.loc[PTs['is_car_emer'], 'mode'] += '_emer'

    PTs.loc[PTs['mode'] == 'car', 'occupancy'] = 1
    PTs.loc[PTs['mode'] == 'car_emer', 'occupancy'] = 1
    PTs.loc[PTs['mode'] == 'car_hov2', 'occupancy'] += 1
    PTs.loc[PTs['mode'] == 'car_hov3', 'occupancy'] += 1
    PTs.loc[PTs['mode'] == 'car_hov3_emer', 'occupancy'] = 1
    PTs.loc[PTs['mode'] == 'walk', 'occupancy'] = 1
    PTs.loc[PTs['mode'] == 'bike', 'occupancy'] = 1

    PTs['vehicleMiles'] = PTs['length'] / 1609.34  # meters to miles
    PTs['passengerMiles'] = (PTs['length'] * PTs['occupancy']) / 1609.34
    PTs['totalEnergyInJoules'] = PTs['primaryFuel'] + PTs['secondaryFuel']

    # print("PT[is_empty]")
    # print(PTs['is_empty'])
    PTs['is_transit'] = 0
    for tm in transit_modes:  # Tag all public transport events within PathTraversal events file
        PTs['is' + tm] = PTs['mode'].str.contains(tm)
        PTs['is_' + tm + '_empty'] = PTs['is' + tm] & PTs['is_empty']
        PTs['is_transit'] += PTs['is' + tm]
        PTs.loc[PTs['is_' + tm + '_empty'], 'mode'] += '_empty'
        PTs.drop(columns=['is' + tm])
        PTs.drop(columns=['is_' + tm + '_empty'])
    #
    PTs.drop(columns=['isCAV', 'is_empty', 'is_RHempty', 'isRH_WC', 'is_car_emer'])
    #
    vehicles_2 = list()
    vehicles = PTs['vehicle']
    for vehicle in vehicles:
        vehicles_2.append(vehicle[:len_id_transit])
    vehicles_2 = np.array(vehicles_2)
    PTs['vehicle2'] = vehicles_2

    vehicles_2 = list()
    vehicles = PEVs['vehicle']
    for vehicle in vehicles:
        vehicles_2.append(vehicle[:len_id_transit])
    PEVs['vehicle2'] = vehicles_2

    vehicles_2 = list()
    vehicles = PLVs['vehicle']
    for vehicle in vehicles:
        vehicles_2.append(vehicle[:len_id_transit])
    PLVs['vehicle2'] = vehicles_2

    print(f"Fixed data in {time.strftime('%H:%M:%S'.format(str((time.time() - fixdata_time) % 1)), time.gmtime((time.time() - fixdata_time)))}")
    return Mcs, PTs, PEVs, PLVs

In [7]:
# Get trips by modes

# get linkstats file
linkstats_filename = "linkstats.csv.gz"
variable_filename = linkstats_filename
linkstats_simulation_data = get_filenames(variable_filename)
linstats_data_names = linkstats_simulation_data.keys()

# get events file
events_filename = "events.csv.gz"
variable_filename = events_filename
events_simulation_data = get_filenames(variable_filename)
events_data_names = events_simulation_data.keys()

# get plans file
plans_filename = "plans.csv.gz"
variable_filename = plans_filename
plans_simulation_data = get_filenames(variable_filename)
plans_data_names = plans_simulation_data.keys()


ModeChoices_dict = dict() #ModeChoice
PathTraversals_dict = dict() #PathTraversal
PersonEnteringVehicles_dict = dict() #PersonEnteringVehicle
PersonLeavingVehicles_dict = dict() #PersonLeavingVehicle
PersonToPathTraversals_dict = dict() #PersonToPathTraversals
Replannings_dict= dict() # Replanning
ActivityStarts_dict = dict() # Activities

for event_file, plan_file in zip(events_data_names, plans_data_names):
    simulation_name = event_file.split("/")[0]
    print(f"Simulation name: {simulation_name}")
    ModeChoices = list() #ModeChoice
    PathTraversals = list() #PathTraversal
    PersonEnteringVehicles = list() #PersonEnteringVehicle
    PersonLeavingVehicles = list() #PersonLeavingVehicle
    PersonToPathTraversals = list() #PersonToPathTraversals
    Replannings = list()
    ActivityStarts = list()
    
    ModeChoices, PathTraversals, PersonEnteringVehicles, PersonLeavingVehicles, Replannings, ActivityStarts = readEvents(f"{gc_url}{event_file}")  # get filtered events files
    ModeChoices, PathTraversals, PersonEnteringVehicles, PersonLeavingVehicles = fixData(ModeChoices, PathTraversals, PersonEnteringVehicles, PersonLeavingVehicles, len_id_transit)  # fix transit modes name
    
    ModeChoices_dict[simulation_name] = ModeChoices
    PathTraversals_dict[simulation_name] = PathTraversals
    PersonEnteringVehicles_dict[simulation_name] = PersonEnteringVehicles
    PersonLeavingVehicles_dict[simulation_name] = PersonLeavingVehicles
    Replannings_dict[simulation_name] = Replannings
    ActivityStarts_dict[simulation_name] = ActivityStarts


Simulation name: pilates-austin-baseline-calibrated-v3
Reading  https://storage.googleapis.com/beam-core-outputs/pilates-austin-baseline-calibrated-v3/beam/year-2020-iteration-4/ITERS/it.0/0.events.csv.gz


  for chunk in pd.read_csv(directory, chunksize=4000000):
  for chunk in pd.read_csv(directory, chunksize=4000000):
  for chunk in pd.read_csv(directory, chunksize=4000000):
  for chunk in pd.read_csv(directory, chunksize=4000000):
  for chunk in pd.read_csv(directory, chunksize=4000000):


Completed reading events file in 00:02:57
Fixed data in 00:00:32
Simulation name: pilates-austin-baseline-calibrated-v3-tr-capacity-025
Reading  https://storage.googleapis.com/beam-core-outputs/pilates-austin-baseline-calibrated-v3-tr-capacity-025/beam/year-2020-iteration-4/ITERS/it.0/0.events.csv.gz


KeyboardInterrupt: 

In [None]:
listSimulationNames = linkstats_simulation_data.values()
print(pd.DataFrame(listSimulationNames))
# dfTe

In [None]:
# trips counts according to modechoice events
modechoice_counts = pd.DataFrame()
for event_file in events_data_names:
    simulation_name = event_file.split("/")[0]
    trip_counts = ModeChoices_dict[simulation_name]["mode"].value_counts()
    print(trip_counts)
    teleportation = trip_counts["hov2_teleportation"] + trip_counts["hov3_teleportation"]
    car = trip_counts["car"]
    bus = trip_counts["walk_transit"] + trip_counts["drive_transit"] + trip_counts["bike_transit"]
    ridehail = trip_counts["ridehail"]
    ridehail_pool = trip_counts["ridehail_pooled"]
    bike = trip_counts["bike"]    
    break # get the baseline data
    

In [None]:
# trips counts according to personenteringvehicles events
personenteringvehicles_counts = pd.DataFrame()
for event_file in events_data_names:
    simulation_name = event_file.split("/")[0]
    trip_counts = PersonEnteringVehicles_dict[simulation_name]["mode"].value_counts()
    print(trip_counts)
    # teleportation = trip_counts["hov2_teleportation"] + trip_counts["hov3_teleportation"]
    # car = trip_counts["car"]
    # bus = trip_counts["walk_transit"] + trip_counts["drive_transit"] + trip_counts["bike_transit"]
    # ridehail = trip_counts["ridehail"]
    # ridehail_pool = trip_counts["ridehail_pooled"]
    # bike = trip_counts["bike"]    
    break

In [None]:
dfModeChoice = ModeChoices_dict["pilates-austin-baseline-calibrated-v3"]
dfPathTraversal = PathTraversals_dict["pilates-austin-baseline-calibrated-v3"]
dfPEV = PersonEnteringVehicles_dict["pilates-austin-baseline-calibrated-v3"]
dfPLV = PersonLeavingVehicles_dict["pilates-austin-baseline-calibrated-v3"]
dfRP = Replannings_dict["pilates-austin-baseline-calibrated-v3"]
dfActivities = ActivityStarts_dict["pilates-austin-baseline-calibrated-v3"]

# dfPEV.loc[dfPEV["vehicle"].str.contains("cap",na=False)]

In [None]:
modes_available = ['bus_empty', 'car_RideHail_empty', 'car', 'car_hov3', 'car_hov2',
       'walk', 'bike', 'car_RideHail', 'bus', 'car_emer', 'car_hov2_emer',
       'car_hov3_emer', 'tram_empty', 'tram']

for mode in modes_available:
    avg_trip_length = dfPathTraversal.loc[(dfPathTraversal["mode"]==mode)]["trip_length_in_miles"].sum()/len(dfPathTraversal.loc[(dfPathTraversal["mode"]==mode)])
    print(f"Average {mode} trip length: {avg_trip_length}")    

In [None]:
# plot average trip length by modes
modes_available = ['bus_empty', 'car_RideHail_empty', 'car', 'car_hov3', 'car_hov2',
       'walk', 'bike', 'car_RideHail', 'bus', 'car_emer', 'car_hov2_emer',
       'car_hov3_emer', 'tram_empty', 'tram']
simulation_name_list = list()
car_excluding_hovs_list = list()
walk_list = list()
RH_list = list()

for event_file in events_data_names:
    simulation_name = event_file.split("/")[0]
    simulation_name_list.append(simulation_name)
    dfPathTraversals = PathTraversals_dict[simulation_name]
    dfPathTraversals["trip_length_in_miles"] = dfPathTraversals["length"]*0.00062137
    dfPathTraversals["trip_length_in_miles"] = dfPathTraversals["trip_length_in_miles"].replace([np.nan, np.inf, -np.inf],0)
    # check if the 'mode' column contains any of the modes we are interested in
    mode = "car"
    avg_car_trip_length = np.divide(dfPathTraversals.loc[(dfPathTraversals["mode"]==mode)]["trip_length_in_miles"].sum(), len(dfPathTraversals.loc[(dfPathTraversals["mode"]==mode)]))
    car_excluding_hovs_list.append(avg_car_trip_length)
    mode = "walk"
    avg_walk_trip_length = np.divide(dfPathTraversals.loc[(dfPathTraversals["mode"]==mode)]["trip_length_in_miles"].sum(), len(dfPathTraversals.loc[(dfPathTraversals["mode"]==mode)]))
    walk_list.append(avg_walk_trip_length)
    mode = "car_RideHail"
    avg_RH_trip_length = np.divide(dfPathTraversals.loc[(dfPathTraversals["mode"]==mode)]["trip_length_in_miles"].sum(), len(dfPathTraversals.loc[(dfPathTraversals["mode"]==mode)]))
    RH_list.append(avg_RH_trip_length)
    
dictAvgTripLength = {
    "simulation_name":simulation_name_list,
    "avg_car_trip_miles":car_excluding_hovs_list,
    "avg_walk_trip_miles":walk_list,
    "avg_RH_trip_miles":RH_list,    
}

dfAvgTripLength = pd.DataFrame().from_dict(dictAvgTripLength)
dfAvgTripLength

In [None]:
baseline_cartriplength = dfAvgTripLength.loc[0,"avg_car_trip_miles"]
dfAvgTripLength["Baseline_CarTripLength"] = baseline_cartriplength
baseline_walktriplength = dfAvgTripLength.loc[0,"avg_walk_trip_miles"]
dfAvgTripLength["Baseline_WalkTripLength"] = baseline_walktriplength
baseline_RHtriplength = dfAvgTripLength.loc[0,"avg_RH_trip_miles"]
dfAvgTripLength["Baseline_RHTripLength"] = baseline_RHtriplength

# calculate perecentage change in vMT
dfAvgTripLength["pct_change_cartrip"] = ((dfAvgTripLength["avg_car_trip_miles"] - dfAvgTripLength["Baseline_CarTripLength"])*100/dfAvgTripLength["Baseline_CarTripLength"])
dfAvgTripLength["pct_change_walktrip"] = ((dfAvgTripLength["avg_walk_trip_miles"] - dfAvgTripLength["Baseline_WalkTripLength"])*100/dfAvgTripLength["Baseline_WalkTripLength"])
dfAvgTripLength["pct_change_rhtrip"] = ((dfAvgTripLength["avg_RH_trip_miles"] - dfAvgTripLength["Baseline_RHTripLength"])*100/dfAvgTripLength["Baseline_RHTripLength"])
# dfAvgTripLength

In [None]:
dfAvgTripLength.to_csv(project_folder.joinpath("BEAM-Analysis", "Output", "austin", "Passenger", "Austin_SummaryTables", "AvgTripLength.csv"))


In [None]:
# CarTripLength Comparision #

# plot figure size (Width x Height (in inches))
plt.figure(figsize=(8,7)) # default plotsize = width of 6.4 inches and a height of 4.8 inches

# mark color red, if drop in % wrt Baseline other green
colormat=np.where(dfAvgTripLength["pct_change_cartrip"]>0, 'g','r')

# Horizontal barplot for VMT
sns.barplot(y = dfAvgTripLength["simulation_name"], x = dfAvgTripLength["pct_change_cartrip"], palette=colormat)

# change specifically label font sizes using plt.xlabel()
sns.set(font_scale=0.45)

#add plot title
plt.title('PCT Change in Car Trip length (miles) w.r.t baseline', fontsize=16)

#add axis labels
plt.xlabel('% change', fontsize=12)
plt.ylabel('Sensitivity Scenarios', fontsize=12)

plt.tight_layout()

# save the plot as JPG file
plt.savefig("Change_in_CarTripLength.jpg", dpi=600)
plt.show()

In [None]:
# WalkTripLength Comparision #

# plot figure size (Width x Height (in inches))
plt.figure(figsize=(8,7)) # default plotsize = width of 6.4 inches and a height of 4.8 inches

# mark color red, if drop in % wrt Baseline other green
colormat=np.where(dfAvgTripLength["pct_change_walktrip"]>0, 'g','r')

# Horizontal barplot for VMT
sns.barplot(y = dfAvgTripLength["simulation_name"], x = dfAvgTripLength["pct_change_walktrip"], palette=colormat)
# change specifically label font sizes using plt.xlabel()
sns.set(font_scale=0.45)

#add plot title
plt.title('PCT Change in Walk Trip length (miles) w.r.t baseline', fontsize=16)

#add axis labels
plt.xlabel('% change', fontsize=12)
plt.ylabel('Sensitivity Scenarios', fontsize=12)

plt.tight_layout()

# save the plot as JPG file
plt.savefig("Change_in_WalkTripLength.jpg", dpi=600)
plt.show()

In [None]:
# RHTripLength Comparision #

# plot figure size (Width x Height (in inches))
plt.figure(figsize=(8,7)) # default plotsize = width of 6.4 inches and a height of 4.8 inches

# mark color red, if drop in % wrt Baseline other green
colormat=np.where(dfAvgTripLength["pct_change_rhtrip"]>0, 'g','r')

# Horizontal barplot for VMT
sns.barplot(y = dfAvgTripLength["simulation_name"], x = dfAvgTripLength["pct_change_rhtrip"], palette=colormat)
# change specifically label font sizes using plt.xlabel()
sns.set(font_scale=0.45)

#add plot title
plt.title('PCT Change in RH Trip length (miles) w.r.t baseline', fontsize=16)

#add axis labels
plt.xlabel('% change', fontsize=12)
plt.ylabel('Sensitivity Scenarios', fontsize=12)

# plt.xlabel("sepal_length", fontsize=40)
# plt.ylabel("petal_length", fontsize=40)

plt.tight_layout()

# save the plot as JPG file
plt.savefig("Change_in_RHTripLength.jpg", dpi=600)
plt.show()

In [None]:
# RH trips empty vs pooled vs single occupancy

# get pooled vs non-pooled trips
rhwaittime_filename = "rideHailIndividualWaitingTimes.csv"
variable_filename = rhwaittime_filename
rhwaittime_data = get_filenames(variable_filename) 

# trips counts according to personenteringvehicles events
sensitivity_name_list = list()
Total_RH_Trips_list = list()
Nonempty_RHTrips_list = list()
Empty_RHTrips_list = list()
Nonpooled_RHTrips_list = list()
Pooled_RHTrips_list = list()
avg_RHTrips_waitTimeInSeconds_list = list()
avg_RHTrips_pooled_waitTimeInSeconds_list = list()
avg_distance_RHTrips_list = list()

for event_file, rhwaittime in zip(events_data_names, rhwaittime_data):
    simulation_name = event_file.split("/")[0]
    sensitivity_name_list.append(simulation_name)
    dfRH = PathTraversals_dict[simulation_name]
    
    Total_RH_Trips_list.append(len(dfRH.loc[(dfRH["isRH"]==True)]))    
    nonempty_RHTrips = len(dfRH.loc[(dfRH["isRH"]==True)]) - len(dfRH.loc[(dfRH["isRH"]==True) & (dfRH["mode"].str.contains("empty",na=True))])
    Nonempty_RHTrips_list.append(nonempty_RHTrips)    
    Empty_RHTrips_list.append(len(dfRH.loc[(dfRH["isRH"]==True) & (dfRH["mode"].str.contains("empty",na=True))]))
    
    # Pooled vs non-pooled
    dfRHWaitTime = pd.read_csv(f"{gc_url}{rhwaittime}")
    Pooled_RHTrips_list.append(len(dfRHWaitTime.loc[(dfRHWaitTime["modeChoice"]=="ride_hail_pooled") & (dfRHWaitTime["waitingTimeInSeconds"]>0)]))
    Nonpooled_RHTrips_list.append(len(dfRHWaitTime.loc[(dfRHWaitTime["modeChoice"]!="ride_hail_pooled") & (dfRHWaitTime["waitingTimeInSeconds"]>0)]))
    
    # Average WaitingTimeInSeconds
    avg_RHTrips_waitTimes = dfRHWaitTime.loc[(dfRHWaitTime["modeChoice"]!="ride_hail_pooled")&(dfRHWaitTime["waitingTimeInSeconds"]>0)]["waitingTimeInSeconds"].sum()/len(dfRHWaitTime.loc[(dfRHWaitTime["modeChoice"]!="ride_hail_pooled") & (dfRHWaitTime["waitingTimeInSeconds"]>0)])
    avg_RHTrips_waitTimeInSeconds_list.append(avg_RHTrips_waitTimes)
    
    avg_RHTrips_waitTimes = dfRHWaitTime.loc[(dfRHWaitTime["modeChoice"]=="ride_hail_pooled")&(dfRHWaitTime["waitingTimeInSeconds"]>0)]["waitingTimeInSeconds"].sum()/len(dfRHWaitTime.loc[(dfRHWaitTime["modeChoice"]=="ride_hail_pooled") & (dfRHWaitTime["waitingTimeInSeconds"]>0)])
    avg_RHTrips_pooled_waitTimeInSeconds_list.append(avg_RHTrips_waitTimes)
    
    # Average Distance RHTrip makes 
    avg_distance_RHTrips = dfRH.loc[(dfRH["isRH"]==True) & (~dfRH["mode"].str.contains("empty",na=True))]["length"].sum()/len(dfRH.loc[(dfRH["isRH"]==True) & (~dfRH["mode"].str.contains("empty",na=True))])
    avg_distance_RHTrips *= 0.000621371
    avg_distance_RHTrips_list.append(avg_distance_RHTrips)
    
dictRHTable = {
                "simulation_name":sensitivity_name_list,
                "Total_RHTrips": Total_RH_Trips_list,
                "Empty_RHTrips": Empty_RHTrips_list,
                "NonEmpty_RHTrips": Nonempty_RHTrips_list,
                "Nonpooled_RHTrips": Nonpooled_RHTrips_list,
                "pooled_RHTrips": Pooled_RHTrips_list,
                "nonpooled_avg_waitTimeInSec": avg_RHTrips_waitTimeInSeconds_list,
                "pooled_avg_waitTimeInSec": avg_RHTrips_pooled_waitTimeInSeconds_list,
                "avg_miles_nonempty_RHTrips": avg_distance_RHTrips_list
                }

dfRHTable = pd.DataFrame.from_dict(dictRHTable)
dfRHTable.to_csv(project_folder.joinpath("BEAM-Analysis", "Output", "austin", "Passenger", "Austin_SummaryTables", "RidehailrelatedStats.csv"))

In [None]:
dfComparisionTable = pd.read_csv(project_folder.joinpath("BEAM-Analysis", "Output", "austin", "Passenger", "Austin_SummaryTables", "VMT_VHT_Transit.csv"))

In [None]:
# Total RH Trips

# plot figure size (Width x Height (in inches))
plt.figure(figsize=(8,7)) # default plotsize = width of 6.4 inches and a height of 4.8 inches

sns.barplot(y = dfRHTable["simulation_name"], x = dfRHTable["Total_RHTrips"]/1000)
# change specifically label font sizes using plt.xlabel()
sns.set(font_scale=0.55)

#add plot title
plt.title('Total RH Trips', fontsize=16)

#add axis labels
plt.xlabel('Number of Trips (in thousands)', fontsize=12)
plt.ylabel('Sensitivity Scenarios', fontsize=12)

plt.tight_layout()

# save the plot as JPG file
plt.savefig("RH_Trips.jpg", dpi=600)

In [None]:
# Empty RH Trips

# plot figure size (Width x Height (in inches))
plt.figure(figsize=(8,7)) # default plotsize = width of 6.4 inches and a height of 4.8 inches

sns.barplot(y = dfRHTable["simulation_name"], x = dfRHTable["NonEmpty_RHTrips"]/1000)
# change specifically label font sizes using plt.xlabel()
sns.set(font_scale=0.55)

#add plot title
plt.title('Non Empty RH Trips', fontsize=16)

#add axis labels
plt.xlabel('Number of Trips (in thousands)', fontsize=12)
plt.ylabel('Sensitivity Scenarios', fontsize=12)

plt.tight_layout()

# save the plot as JPG file
plt.savefig("Non Empty_RH_Trips.jpg", dpi=600)

In [None]:
# Pooled RH Trips

# plot figure size (Width x Height (in inches))
plt.figure(figsize=(8,7)) # default plotsize = width of 6.4 inches and a height of 4.8 inches

sns.barplot(y = dfRHTable["simulation_name"], x = dfRHTable["pooled_RHTrips"])
# change specifically label font sizes using plt.xlabel()
sns.set(font_scale=0.55)

#add plot title
plt.title('Pooled RH Trips', fontsize=16)

#add axis labels
plt.xlabel('Number of Trips', fontsize=12)
plt.ylabel('Sensitivity Scenarios', fontsize=12)

plt.tight_layout()

# save the plot as JPG file
plt.savefig("Pooled RH Trips.jpg", dpi=600)

In [None]:
# Avg RH Trip length

# plot figure size (Width x Height (in inches))
plt.figure(figsize=(8,7)) # default plotsize = width of 6.4 inches and a height of 4.8 inches

sns.barplot(y = dfRHTable["simulation_name"], x = dfRHTable["avg_miles_nonempty_RHTrips"])
# change specifically label font sizes using plt.xlabel()
sns.set(font_scale=0.55)

#add plot title
plt.title('Avg non-empty RH Trips length (in miles)', fontsize=16)

#add axis labels
plt.xlabel('Trip length (in miles)', fontsize=12)
plt.ylabel('Sensitivity Scenarios', fontsize=12)

plt.tight_layout()

# save the plot as JPG file
plt.savefig("avg_miles_nonempty_RHTrips.jpg", dpi=600)

In [None]:
# NonEmpty vs Empty RH Trips

# plot figure size (Width x Height (in inches))
plt.figure(figsize=(8,7)) # default plotsize = width of 6.4 inches and a height of 4.8 inches

sns.barplot(y = dfRHTable["simulation_name"], x = dfRHTable["NonEmpty_RHTrips"]*100/dfRHTable["Total_RHTrips"])
# change specifically label font sizes using plt.xlabel()
sns.set(font_scale=0.55)

#add plot title
plt.title('PCT of non RH Trips of Total', fontsize=16)

#add axis labels
plt.xlabel('% of non-empty RH Trips compared to total RH trips', fontsize=12)
plt.ylabel('Sensitivity Scenarios', fontsize=12)

plt.tight_layout()

# save the plot as JPG file
plt.savefig("pct_nonempty_RH_Trips.jpg", dpi=600)

In [None]:
# dfRHTable

In [None]:
# dfRHWaitTime = pd.read_csv("https://storage.googleapis.com/beam-core-outputs/pilates-austin-baseline-calibrated-v3/beam/year-2020-iteration-4/ITERS/it.0/0.rideHailIndividualWaitingTimes.csv")
# dfRHWaitTime.loc[(dfRHWaitTime["modeChoice"]!="ride_hail_pooled") & (dfRHWaitTime["waitingTimeInSeconds"]>0)]["waitingTimeInSeconds"].sum()/len(dfRHWaitTime.loc[(dfRHWaitTime["modeChoice"]!="ride_hail_pooled") & (dfRHWaitTime["waitingTimeInSeconds"]>0)])

In [None]:
# dfRHWaitTime["modeChoice"].unique()