In [1]:
import pandas as pd
import numpy as np
import geopandas as gpd
import h5py
import boto.s3
import glob

In [4]:
gemini_base_2023_events_loc = "https://beam-outputs.s3.amazonaws.com/output/sfbay/gemini-base-2035-activitysim__2022-05-27_18-37-20_sjx/ITERS/it.0/"

In [5]:
%%time
gemini_base_2023_events = pd.read_csv(gemini_base_2023_events_loc + '0.events.csv.gz', compression = 'gzip')



CPU times: total: 23min 55s
Wall time: 25min 14s


In [7]:
# Show all columns and rows
pd.options.display.max_columns = None
pd.options.display.max_rows = None

In [10]:
gemini_base_2023_events.head()

Unnamed: 0,person,vehicle,time,type,link,legMode,mode,incentive,tollCost,netCost,departTime,startX,startY,endX,endY,parkingTaz,chargingPointType,pricingModel,parkingType,score,driver,secondaryFuelLevel,price,locationY,locationX,primaryFuelLevel,facility,actType,riders,toStopIndex,fromStopIndex,seatingCapacity,tollPaid,capacity,arrivalTime,departureTime,linkTravelTime,secondaryFuel,secondaryFuelType,primaryFuelType,currentTourMode,vehicleType,links,numPassengers,length,primaryFuel,shiftStatus,parkingZoneId,fuel,duration,expectedMaximumUtility,availableAlternatives,location,personalVehicleAvailable,tourIndex,legModes,legVehicleIds,currentActivity,nextActivity,tripId,cost,reason
0,TransitDriverAgent-CC:111002011_merged_421004994,,0.0,departure,,be_a_transit_driver,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,TransitDriverAgent-BA:23R11,,0.0,departure,,be_a_transit_driver,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,TransitDriverAgent-SF:7600620,,0.0,departure,,be_a_transit_driver,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,TransitDriverAgent-ST:9568838-124-D-Weekday-54,,0.0,departure,,be_a_transit_driver,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,TransitDriverAgent-SF:7600620,SF:7600620,0.0,PersonEntersVehicle,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [12]:
# Rename the "mode" column
gemini_base_2023_events.rename(columns={"mode":"modeBEAM"}, inplace=True) 
# Replace "Work" with "work" in the "actType" column
gemini_base_2023_events["actType"].replace({"Work": "work"}, inplace=True)

In [21]:
eventsSF = gemini_base_2023_events.head(2000000)

In [22]:
# Remove person = TransitDriver or RidehailDriver because there are no agent information in these rows
eventsSF = eventsSF[~eventsSF.person.str.contains("Agent", na=False)].reset_index(drop=True)

In [23]:
# Adding the IDMerged Column
eventsSF['UniqueID'] = eventsSF['person'] #make a copy of the person column
eventsSF['personID'] = np.where(eventsSF['person'].isin(eventsSF['driver']), eventsSF['person'], np.nan) 
eventsSF['driverID'] = np.where(eventsSF['driver'].isin(eventsSF['person']), eventsSF['driver'], np.nan)

In [24]:
# Merging person and driver ids in one column
eventsSF['IDMerged'] = eventsSF['personID'].combine_first(eventsSF['driverID'])
eventsSF['IDMerged'] = eventsSF['UniqueID'].combine_first(eventsSF['IDMerged'])

In [25]:
# Dropping unused columns
eventsSF = eventsSF.drop(['personID','driverID','UniqueID'], axis=1) 

In [26]:
%%time
# Split the "riders' column and replicated rows for every rider
eventsSF['riders'] = eventsSF['riders'].str.split(':')
eventsSF = eventsSF.explode('riders')

CPU times: total: 5.73 s
Wall time: 5.74 s


In [27]:
# Combine riderID with IDMerged
eventsSF['riderID'] = np.where(eventsSF['riders'].isin(eventsSF['person']), eventsSF['riders'], np.nan)
eventsSF['IDMerged'] = eventsSF['riderID'].combine_first(eventsSF['IDMerged'])

In [28]:
# Dropping unused columns
eventsSF = eventsSF.drop(['riderID'], axis=1) 

In [29]:
# Remove driver = TransitDriver or RidehailDriver for IDMerged = NAN because there are no agent information in these rows 
eventsSF = eventsSF[~((eventsSF.driver.str.contains("Agent", na=False))&(eventsSF.IDMerged.isna()))].reset_index(drop=True)

In [30]:
%%time
# Filling NANs in ID related to charging events
eventsSF["chargeID"] = eventsSF.groupby('vehicle')['IDMerged'].transform(lambda x: x.ffill().bfill())

CPU times: total: 53.7 s
Wall time: 54.7 s
