In [1]:
import os
import pandas as pd
import numpy as np
import geopandas as gpd
import h5py
import boto.s3
import glob
import boto3
from zipfile import ZipFile
import shutil
import gcsfs

#### Read the files directly from S3

In [2]:
%%time
dtypes = {
    "time": "float32",
    "type": "category",
    "legMode": "category",
    "actType": "category", 
    "primaryFuelLevel": "float64",
    "legMode": "category",
    "chargingPointType":"category",
    "pricingModel":"category",
    "parkingType":"category",
    "mode":"category",
    "personalVehicleAvailable": "category",
    "person": "object",
    "driver": "object",
    "riders": "object",
    'primaryFuelType': "category",
    'secondaryFuelType': 'category',
    'currentTourMode': 'category',
    'currentActivity': 'category',
    'nextActivity': 'category'    
}
eventsSF = pd.read_csv('gs://beam-core-outputs/sfbay-2010-base-20221122/beam/year-2015-iteration-3/ITERS/it.0/0.events.csv.gz', compression = 'gzip', dtype = dtypes)



CPU times: total: 3min 44s
Wall time: 5min 25s


In [205]:
eventsSF.head()

Unnamed: 0,IDMerged,person,driver,riders,reason,time,type,vehicle,actType,vehicleType,price,fuel,shiftStatus,parkingTaz,chargingPointType,pricingModel,parkingType,locationY,locationX,parkingZoneId,duration,currentTourMode,endY,endX,startY,startX,arrivalTime,departureTime,link,facility,departTime,requireWheelchair,modeBEAM,expectedMaximumUtility,availableAlternatives,location,personalVehicleAvailable,length,tourIndex,legModes,legVehicleIds,currentActivity,nextActivity,tripId,score,primaryFuelLevel,secondaryFuelLevel,cost,links,numPassengers,primaryFuel,secondaryFuelType,primaryFuelType,toStopIndex,fromStopIndex,seatingCapacity,tollPaid,capacity,linkTravelTime,secondaryFuel,incentive,tollCost,cost_BEAM,legMode,scenario,lever,year,lever_position,modeBEAM_rh,actEndTime,actStartTime,duration_travelling,distance_travelling,distance_mode_choice,duration_walking,distance_walking,duration_on_bike,distance_bike,duration_in_ridehail,distance_ridehail,duration_in_privateCar,distance_privateCar,duration_in_transit,distance_transit,replanningTime,tourIndex_fixed,fuelFood,emissionFood,fuelElectricity,emissionElectricity,fuelDiesel,emissionDiesel,fuelBiodiesel,emissionBiodiesel,fuel_not_Food,fuelGasoline,emissionGasoline,fuel_marginal,emission_marginal,actEndType,actStartType,tripIndex,mode_choice_actual_BEAM,mode_choice_planned_BEAM,replanning_status,transit_bus,transit_subway,transit_tram,transit_rail,transit_cable_car,ride_hail_pooled
0,4,4.0,,,,57128.0,ModeChoice,,,,,,,,,,,,,,,car,,,,,,,,,,,car,,CAR,62008.0,True,59024.56,1.0,"WALK,CAR,CAR,WALK","body-4,358660,358660,body-4",Home,othdiscr,1513.0,,,,,,,,,,,,,,,,,,,,,baseline,default,2015,1,car,,,,,59024.56,,,,,,,,,,,,1.0,,,,,,,,,,,,,,,,1513.0,car,car,0,0,0,0,0,0,0
1,4,4.0,,,,57128.0,actend,,Home,,,,,,,,,,,,,,,,,,,,62008.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,baseline,default,2015,1,,57128.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Home,,1513.0,,,0,0,0,0,0,0,0
2,4,4.0,,,,57128.0,departure,,,,,,,,,,,,,,,,,,,,,,62008.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,car,baseline,default,2015,1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1513.0,,,0,0,0,0,0,0,0
3,4,4.0,,,,57128.0,PersonEntersVehicle,body-4,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,baseline,default,2015,1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1513.0,,,0,0,0,0,0,0,0
4,4,,4.0,4.0,,57128.0,PathTraversal,body-4,,BODY-TYPE-DEFAULT,,,,,,,,,,,,car,37.854097,-122.227937,37.854097,-122.227937,57128.0,57128.0,,,,,walk,,,,,0.0,,,,,,,,2210000.0,0.0,,,1.0,0.0,,Food,,,0.0,0.0,0.0,,0.0,,,,,baseline,default,2015,1,walk,,,0.0,0.0,,0.0,0.0,,,,,,,,,,,0.0,0.0,,,,,,,,,,0.0,0.0,,,1513.0,,,0,0,0,0,0,0,0


In [3]:
# Show all columns and rows
pd.options.display.max_columns = None
pd.options.display.max_rows = None

In [4]:
# Showing the entire number in dataframe
pd.set_option('float_format', '{:f}'.format)

In [5]:
# Adding scenario info
eventsSF['scenario'] = "baseline"
eventsSF['scenario'] = eventsSF['scenario'].astype("category")
eventsSF['lever'] = "default"
eventsSF['lever'] = eventsSF['lever'].astype("category")
eventsSF['year'] = 2015
eventsSF['lever_position'] = 1

In [6]:
# Rename the "mode" column
eventsSF.rename(columns={"mode":"modeBEAM"}, inplace=True) 
# Replace "Work" with "work" in the "actType" column
eventsSF["actType"].replace({"Work": "work"}, inplace=True)

In [7]:
eventsSF = eventsSF.copy()

In [8]:
# Remove person = TransitDriver or RidehailDriver because there are no agent information in these rows
eventsSF = eventsSF[~eventsSF.person.str.contains("Agent", na=False)].reset_index(drop=True)

#### New

In [9]:
eventsSF = eventsSF[~((eventsSF.type.str.contains("ParkingEvent", na=False))&(eventsSF['time']==0))].reset_index(drop=True)

In [10]:
# shift column 'person' to first position
first_column = eventsSF.pop('person')
second_column = eventsSF.pop('driver')
third_column = eventsSF.pop('riders')
# insert column using insert(position,column_name,first_column) function
eventsSF.insert(0, 'person', first_column)
eventsSF.insert(1, 'driver', second_column)
eventsSF.insert(2, 'riders', third_column)

In [11]:
# Adding the IDMerged Column
eventsSF['UniqueID'] = eventsSF['person'] #make a copy of the person column
eventsSF['personID'] = np.where(eventsSF['person'].isin(eventsSF['driver']), eventsSF['person'], np.nan) 
eventsSF['driverID'] = np.where(eventsSF['driver'].isin(eventsSF['person']), eventsSF['driver'], np.nan)

In [12]:
# Merging person and driver ids in one column
eventsSF['IDMerged'] = eventsSF['personID'].combine_first(eventsSF['driverID'])
eventsSF['IDMerged'] = eventsSF['UniqueID'].combine_first(eventsSF['IDMerged'])

In [13]:
# Dropping unused columns
eventsSF = eventsSF.drop(['personID','driverID','UniqueID'], axis=1) 

In [14]:
# Shift column 'IDMerged' to first position
first_column = eventsSF.pop('IDMerged')
# Insert column using insert(position,column_name,first_column) function
eventsSF.insert(0, 'IDMerged', first_column)

In [15]:
%%time
# Split the "riders' column and replicated rows for every rider
eventsSF['riders'] = eventsSF['riders'].str.split(':')
eventsSF = eventsSF.explode('riders')

CPU times: total: 1min 47s
Wall time: 1min 47s


In [16]:
# Combine riderID with IDMerged
eventsSF['riderID'] = np.where(eventsSF['riders'].isin(eventsSF['person']), eventsSF['riders'], np.nan)
eventsSF['IDMerged'] = eventsSF['riderID'].combine_first(eventsSF['IDMerged'])

In [17]:
# Dropping unused columns
eventsSF = eventsSF.drop(['riderID'], axis=1) 

In [18]:
# Remove driver = TransitDriver or RidehailDriver for IDMerged = NAN because there are no agent information in these rows 
eventsSF = eventsSF[~((eventsSF.driver.str.contains("Agent", na=False))&(eventsSF.IDMerged.isna()))].reset_index(drop=True)

In [19]:
#IDnan = eventsSF[eventsSF['IDMerged'].isna()]
#IDnan.shape

In [20]:
%%time
# Filling NANs in ID related to charging events
eventsSF["chargeID"] = eventsSF.groupby('vehicle')['IDMerged'].transform(lambda x: x.ffill().bfill())

CPU times: total: 4min 28s
Wall time: 4min 28s


In [21]:
# Combining chargeID with IDMerged so no NANs anymore
eventsSF['IDMerged'] = eventsSF['chargeID'].combine_first(eventsSF['IDMerged'])

In [22]:
#IDnan = eventsSF[eventsSF['IDMerged'].isna()]
#IDnan.shape

In [23]:
# Dropping unused columns
eventsSF = eventsSF.drop(['chargeID'], axis=1) 

In [24]:
%%time
# Change the IDMerged column type to numeric
eventsSF["IDMerged"] = pd.to_numeric(eventsSF.IDMerged)

CPU times: total: 17.7 s
Wall time: 17.7 s


In [25]:
# Sort by IDMerged and time columns
eventsSF = eventsSF.sort_values(['IDMerged','time']).reset_index(drop=True)

In [26]:
# We assume that the number of passengers is 1 for ride_hail_pooled
eventsSF['modeBEAM_rh'] = np.where(eventsSF.driver.str.contains("rideHailAgent", na=False), 'ride_hail' , eventsSF['modeBEAM'])

In [27]:
# Adding teleportation mode to the type = TeleportationEvent row 
eventsSF["modeBEAM_rh"] = np.where(eventsSF['type']=='TeleportationEvent', eventsSF.modeBEAM_rh.fillna(method='ffill'), eventsSF["modeBEAM_rh"])

In [28]:
eventsSF['modeBEAM_rh_pooled'] = np.where((eventsSF['type'] == 'PersonCost') & (eventsSF['modeBEAM'] == 'ride_hail_pooled'), 'ride_hail_pooled', np.nan)

In [29]:
eventsSF['modeBEAM_rh_ride_hail_transit'] = np.where((eventsSF['type'] == 'PersonCost') & (eventsSF['modeBEAM'] == 'ride_hail_transit'), 'ride_hail_transit', np.nan)

In [30]:
eventsSF['modeBEAM_rh_pooled'] = eventsSF['modeBEAM_rh_pooled'].shift(+1)

In [31]:
eventsSF['modeBEAM_rh_ride_hail_transit'] = eventsSF['modeBEAM_rh_ride_hail_transit'].shift(+1)

In [32]:
eventsSF['modeBEAM_rh'] = np.where((eventsSF['type'] == 'PathTraversal') & (eventsSF['modeBEAM'] == 'car') & (eventsSF['driver'].str.contains("rideHailAgent", na=False)) & (eventsSF['modeBEAM_rh_pooled'] != 'nan'), eventsSF['modeBEAM_rh_pooled'], eventsSF['modeBEAM_rh'])

In [33]:
# We don't know if ridehail_transit is ride_hail or ride_hail_pooled
eventsSF['modeBEAM_rh'] = np.where((eventsSF['type'] == 'PathTraversal') & (eventsSF['modeBEAM'] == 'car') & (eventsSF['driver'].str.contains("rideHailAgent", na=False)) & (eventsSF['modeBEAM_rh_ride_hail_transit'] != 'nan'), eventsSF['modeBEAM_rh_ride_hail_transit'], eventsSF['modeBEAM_rh'])

In [34]:
# Dropping the temporary columns
eventsSF = eventsSF.drop(['modeBEAM_rh_pooled'], axis=1)
eventsSF = eventsSF.drop(['modeBEAM_rh_ride_hail_transit'], axis=1)

#### Adding the census blocks

In [35]:
def addGeometryIdToDataFrame(df, gdf, xcol, ycol, idColumn="geometry", df_geom='epsg:4326'):
    gdf_data = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df[xcol], df[ycol]))
    gdf_data.crs = {'init': df_geom}
    joined = gpd.sjoin(gdf_data.to_crs('epsg:26910'), gdf.to_crs('epsg:26910'))
    gdf_data = gdf_data.merge(joined['blkgrpid'], left_index=True, right_index=True, how="left")
    gdf_data.rename(columns={'blkgrpid': idColumn}, inplace=True)
    df = pd.DataFrame(gdf_data.drop(columns='geometry'))
    df.drop(columns=[xcol, ycol], inplace=True)
    return df.loc[~df.index.duplicated(keep='first'), :]

In [36]:
# census_2010 = "https://beam-core-act.s3.amazonaws.com/deepDive/RawData/Census_Blocks/Scenario_2010_shp/"
BGs = gpd.read_file('/vsicurl/https://github.com/LBNL-UCB-STI/beam-core-analysis/raw/main/Users/Zach/scenario/sfbay-blockgroups-2010/641aa0d4-ce5b-4a81-9c30-8790c4ab8cfb202047-1-wkkklf.j5ouj.shp')

In [37]:
%%time
eventsSF = addGeometryIdToDataFrame(eventsSF, BGs, 'startX', 'startY', 'BlockGroupStart')

Exception ignored in: <function BaseGeometry.__del__ at 0x0000011FE360F280>
Traceback (most recent call last):
  File "C:\Users\nazanin\.conda\envs\geo_env\lib\site-packages\shapely\geometry\base.py", line 242, in __del__
    self.empty(val=None)
  File "C:\Users\nazanin\.conda\envs\geo_env\lib\site-packages\shapely\geometry\base.py", line 227, in empty
    self._lgeos.GEOSGeom_destroy(self.__geom__)
KeyboardInterrupt: 


KeyboardInterrupt: 

In [38]:
%%time
eventsSF = addGeometryIdToDataFrame(eventsSF, BGs, 'endX', 'endY', 'BlockGroupEnd')

Exception ignored in: <function BaseGeometry.__del__ at 0x0000011FE360F280>
Traceback (most recent call last):
  File "C:\Users\nazanin\.conda\envs\geo_env\lib\site-packages\shapely\geometry\base.py", line 242, in __del__
    self.empty(val=None)
  File "C:\Users\nazanin\.conda\envs\geo_env\lib\site-packages\shapely\geometry\base.py", line 227, in empty
    self._lgeos.GEOSGeom_destroy(self.__geom__)
KeyboardInterrupt: 
Exception ignored in: <function BaseGeometry.__del__ at 0x0000011FE360F280>
Traceback (most recent call last):
  File "C:\Users\nazanin\.conda\envs\geo_env\lib\site-packages\shapely\geometry\base.py", line 242, in __del__
    self.empty(val=None)
  File "C:\Users\nazanin\.conda\envs\geo_env\lib\site-packages\shapely\geometry\base.py", line 227, in empty
    self._lgeos.GEOSGeom_destroy(self.__geom__)
KeyboardInterrupt: 
Exception ignored in: <function BaseGeometry.__del__ at 0x0000011FE360F280>
Traceback (most recent call last):
  File "C:\Users\nazanin\.conda\envs\geo_

KeyboardInterrupt: 

#### Adding new columns

In [85]:
eventsSF['actEndTime'] = np.where(eventsSF['type']=='actend'
                     , eventsSF['time'], np.nan)

In [86]:
eventsSF['actStartTime'] = np.where(eventsSF['type']=='actstart'
                     , eventsSF['time'], np.nan)    

In [87]:
eventsSF['duration_travelling'] = np.where((eventsSF['type']=='PathTraversal')|(eventsSF['type']=='TeleportationEvent')
                     , eventsSF['arrivalTime'] - eventsSF['departureTime'], np.nan)

In [88]:
eventsSF['distance_travelling'] = np.where((eventsSF['type']=='PathTraversal')|((eventsSF['type']=='ModeChoice')&((eventsSF['modeBEAM']=='hov2_teleportation')|(eventsSF['modeBEAM']=='hov3_teleportation'))), eventsSF['length'], np.nan)

In [89]:
eventsSF['distance_mode_choice'] = np.where(eventsSF['type']=='ModeChoice', eventsSF['length'], np.nan)

In [90]:
eventsSF['duration_walking'] = np.where(eventsSF['modeBEAM']=='walk', eventsSF['duration_travelling'], np.nan)

In [91]:
eventsSF['distance_walking'] = np.where(eventsSF['modeBEAM']=='walk', eventsSF['distance_travelling'], np.nan)

In [92]:
eventsSF['duration_on_bike'] = np.where(eventsSF['modeBEAM']=='bike', eventsSF['duration_travelling'], np.nan)

In [93]:
eventsSF['distance_bike'] = np.where(eventsSF['modeBEAM']=='bike', eventsSF['distance_travelling'], np.nan)

In [94]:
eventsSF['duration_in_ridehail'] = np.where((eventsSF['modeBEAM_rh']=='ride_hail')|(eventsSF['modeBEAM_rh']=='ride_hail_pooled')|(eventsSF['modeBEAM_rh']=='ride_hail_transit'), 
                                            eventsSF['duration_travelling'], np.nan)

In [95]:
eventsSF['distance_ridehail'] = np.where((eventsSF['modeBEAM_rh']=='ride_hail')|(eventsSF['modeBEAM_rh']=='ride_hail_pooled')|(eventsSF['modeBEAM_rh']=='ride_hail_transit'), eventsSF['distance_travelling'], np.nan)

In [96]:
eventsSF['duration_in_privateCar'] = np.where((eventsSF['modeBEAM_rh']=='car')|(eventsSF['modeBEAM_rh']=='car_hov3')|(eventsSF['modeBEAM_rh']=='car_hov2')|
                                              (eventsSF['modeBEAM_rh']=='hov2_teleportation')|(eventsSF['modeBEAM_rh']=='hov3_teleportation') 
                                              , eventsSF['duration_travelling'], np.nan)

In [97]:
eventsSF['distance_privateCar'] = np.where((eventsSF['modeBEAM_rh']=='car')|(eventsSF['modeBEAM_rh']=='car_hov3')|(eventsSF['modeBEAM_rh']=='car_hov2')|
                                              (eventsSF['modeBEAM_rh']=='hov2_teleportation')|(eventsSF['modeBEAM_rh']=='hov3_teleportation'), eventsSF['distance_travelling'], np.nan)

In [98]:
eventsSF['duration_in_transit'] = np.where((eventsSF['modeBEAM']=='bike_transit')|(eventsSF['modeBEAM']=='drive_transit')|
                                           (eventsSF['modeBEAM']=='walk_transit')|(eventsSF['modeBEAM']=='bus')|
                                           (eventsSF['modeBEAM']=='tram')|(eventsSF['modeBEAM']=='subway')|
                                           (eventsSF['modeBEAM']=='rail')|(eventsSF['modeBEAM']=='cable_car')|
                                           (eventsSF['modeBEAM']=='ride_hail_transit'), eventsSF['duration_travelling'], np.nan)

In [99]:
eventsSF['distance_transit'] = np.where((eventsSF['modeBEAM']=='bike_transit')|(eventsSF['modeBEAM']=='drive_transit')|
                                        (eventsSF['modeBEAM']=='walk_transit')|(eventsSF['modeBEAM']=='bus')|
                                        (eventsSF['modeBEAM']=='tram')|(eventsSF['modeBEAM']=='subway')|
                                        (eventsSF['modeBEAM']=='rail')|(eventsSF['modeBEAM']=='cable_car')|
                                        (eventsSF['modeBEAM']=='ride_hail_transit'), eventsSF['distance_travelling'], np.nan)

In [100]:
# Removing the extra tour index happening after replanning events
eventsSF['replanningTime'] = np.where(eventsSF['type'] == 'Replanning', eventsSF['time'], np.nan)
eventsSF['replanningTime'] = eventsSF['replanningTime'].shift(+1)
eventsSF['tourIndex_fixed'] = np.where((eventsSF['type'] == 'ModeChoice')&(eventsSF['replanningTime'].notna()), np.nan, eventsSF['tourIndex'])

In [101]:
#eventsSF = eventsSF.set_index('IDMerged')

In [102]:
eventsSF['fuelFood'] = np.where((eventsSF['type']=='PathTraversal')&(eventsSF['primaryFuelType']=='Food'), 
                                eventsSF['primaryFuel'], np.nan)

In [103]:
eventsSF['emissionFood'] = eventsSF['fuelFood'] * 8.3141841e-9 * 0

In [104]:
eventsSF['fuelElectricity'] = np.where((eventsSF['type']=='PathTraversal')&(eventsSF['primaryFuelType']=='Electricity'), 
                                eventsSF['primaryFuel'], np.nan)

In [105]:
eventsSF['emissionElectricity'] = eventsSF['fuelElectricity'] * 2.77778e-10 * 947.2 * 0.0005

In [106]:
eventsSF['fuelDiesel'] = np.where((eventsSF['type']=='PathTraversal')&(eventsSF['primaryFuelType']=='Diesel'), 
                                eventsSF['primaryFuel'], np.nan)

In [107]:
eventsSF['emissionDiesel'] = eventsSF['fuelDiesel'] * 8.3141841e-9 * 10.180e-3

In [108]:
eventsSF['fuelBiodiesel'] = np.where((eventsSF['type']=='PathTraversal')&(eventsSF['primaryFuelType']=='Biodiesel'), 
                                eventsSF['primaryFuel'], np.nan)

In [109]:
eventsSF['emissionBiodiesel'] = eventsSF['fuelBiodiesel'] * 8.3141841e-9 * 10.180e-3

In [110]:
eventsSF['fuel_not_Food'] = np.where((eventsSF['type']=='PathTraversal')&(eventsSF['primaryFuelType']!='Food')
                            , eventsSF['primaryFuel']+eventsSF['secondaryFuel'], np.nan)

In [111]:
eventsSF['fuelGasoline'] = np.where((eventsSF['type']=='PathTraversal')&((eventsSF['primaryFuelType']=='Gasoline')|(eventsSF['secondaryFuelType']=='Gasoline')), 
                           eventsSF['primaryFuel']+eventsSF['secondaryFuel'], np.nan)

In [112]:
eventsSF['emissionGasoline'] = eventsSF['fuelGasoline'] * 8.3141841e-9 * 8.89e-3

In [113]:
# Marginal fuel
conditions  = [(eventsSF['modeBEAM_rh'] == 'ride_hail_pooled'), 
               (eventsSF['modeBEAM_rh'] == 'walk_transit') | (eventsSF['modeBEAM_rh'] == 'drive_transit')|
               (eventsSF['modeBEAM_rh'] == 'ride_hail_transit')|(eventsSF['modeBEAM_rh'] == 'bus')|(eventsSF['modeBEAM_rh'] == 'subway')|
               (eventsSF['modeBEAM_rh'] == 'rail')|(eventsSF['modeBEAM_rh'] == 'tram')|(eventsSF['modeBEAM_rh'] == 'cable_car')|
               (eventsSF['modeBEAM_rh'] == 'bike_transit'),
               (eventsSF['modeBEAM_rh'] == 'walk')|(eventsSF['modeBEAM_rh'] == 'bike'),
               (eventsSF['modeBEAM_rh'] == 'ride_hail')|(eventsSF['modeBEAM_rh'] == 'car')| 
               (eventsSF['modeBEAM_rh'] == 'car_hov2')| (eventsSF['modeBEAM_rh'] == 'car_hov3')|
               (eventsSF['modeBEAM_rh'] == 'hov2_teleportation')| (eventsSF['modeBEAM_rh'] == 'hov3_teleportation')]
choices = [eventsSF['fuel_not_Food']/eventsSF['numPassengers'], 0 , eventsSF['fuelFood'], eventsSF['fuel_not_Food']]

In [114]:
eventsSF['fuel_marginal'] = np.select(conditions, choices, default=np.nan)

In [115]:
# Marginal emission
conditions1  = [(eventsSF['modeBEAM_rh'] == 'ride_hail_pooled') & (eventsSF['fuelElectricity'].notna() != 0), 
               (eventsSF['modeBEAM_rh'] == 'ride_hail_pooled') & (eventsSF['fuelGasoline'].notna() != 0),
               (eventsSF['modeBEAM_rh'] == 'ride_hail_pooled') & (eventsSF['fuelBiodiesel'].notna() != 0),
               (eventsSF['modeBEAM_rh'] == 'ride_hail_pooled') & (eventsSF['fuelDiesel'].notna() != 0),             
               (eventsSF['modeBEAM_rh'] == 'walk_transit') | (eventsSF['modeBEAM_rh'] == 'drive_transit')|
               (eventsSF['modeBEAM_rh'] == 'ride_hail_transit')|(eventsSF['modeBEAM_rh'] == 'bus')|(eventsSF['modeBEAM_rh'] == 'subway')|
               (eventsSF['modeBEAM_rh'] == 'rail')|(eventsSF['modeBEAM_rh'] == 'tram')|(eventsSF['modeBEAM_rh'] == 'cable_car')|
               (eventsSF['modeBEAM_rh'] == 'bike_transit'),

               (eventsSF['modeBEAM_rh'] == 'walk')|(eventsSF['modeBEAM_rh'] == 'bike'),
               
               (eventsSF['modeBEAM_rh'] == 'ride_hail')|(eventsSF['modeBEAM_rh'] == 'car')| 
               (eventsSF['modeBEAM_rh'] == 'car_hov2')| (eventsSF['modeBEAM_rh'] == 'car_hov3')|
               (eventsSF['modeBEAM_rh'] == 'hov2_teleportation')| (eventsSF['modeBEAM_rh'] == 'hov3_teleportation')&
               (eventsSF['fuelElectricity'].notna() != 0),
              
               (eventsSF['modeBEAM_rh'] == 'ride_hail')|(eventsSF['modeBEAM_rh'] == 'car')| 
               (eventsSF['modeBEAM_rh'] == 'car_hov2')| (eventsSF['modeBEAM_rh'] == 'car_hov3')|
               (eventsSF['modeBEAM_rh'] == 'hov2_teleportation')| (eventsSF['modeBEAM_rh'] == 'hov3_teleportation')&
               (eventsSF['fuelGasoline'].notna() != 0),           
              
               (eventsSF['modeBEAM_rh'] == 'ride_hail')|(eventsSF['modeBEAM_rh'] == 'car')| 
               (eventsSF['modeBEAM_rh'] == 'car_hov2')| (eventsSF['modeBEAM_rh'] == 'car_hov3')|
               (eventsSF['modeBEAM_rh'] == 'hov2_teleportation')| (eventsSF['modeBEAM_rh'] == 'hov3_teleportation')&
               (eventsSF['fuelBiodiesel'].notna() != 0),   
               
               (eventsSF['modeBEAM_rh'] == 'ride_hail')|(eventsSF['modeBEAM_rh'] == 'car')| 
               (eventsSF['modeBEAM_rh'] == 'car_hov2')| (eventsSF['modeBEAM_rh'] == 'car_hov3')|
               (eventsSF['modeBEAM_rh'] == 'hov2_teleportation')| (eventsSF['modeBEAM_rh'] == 'hov3_teleportation')&
               (eventsSF['fuelDiesel'].notna() != 0),

               (eventsSF['modeBEAM_rh'] == 'ride_hail')|(eventsSF['modeBEAM_rh'] == 'car')| 
               (eventsSF['modeBEAM_rh'] == 'car_hov2')| (eventsSF['modeBEAM_rh'] == 'car_hov3')|
               (eventsSF['modeBEAM_rh'] == 'hov2_teleportation')| (eventsSF['modeBEAM_rh'] == 'hov3_teleportation')&
               (eventsSF['fuelFood'].notna() != 0)]

choices1 = [eventsSF['emissionElectricity']/eventsSF['numPassengers'],
           eventsSF['emissionGasoline']/eventsSF['numPassengers'],
           eventsSF['emissionBiodiesel']/eventsSF['numPassengers'],
           eventsSF['emissionDiesel']/eventsSF['numPassengers'],           
           0 , 
           eventsSF['emissionFood'], 
           eventsSF['emissionElectricity'],
           eventsSF['emissionGasoline'],
           eventsSF['emissionBiodiesel'],
           eventsSF['emissionDiesel'],
           eventsSF['emissionFood']]

In [116]:
eventsSF['emission_marginal'] = np.select(conditions1, choices1, default=np.nan)

In [117]:
eventsSF['actEndType'] = np.where(eventsSF['type']=='actend', eventsSF['actType'], "")

In [118]:
eventsSF['actStartType'] = np.where(eventsSF['type']=='actstart', eventsSF['actType'], "")

#### Trip Index

In [119]:
#eventsSF["tripIndex"] = eventsSF.groupby("IDMerged")["tourIndex_fixed"].rank(method="first", ascending=True)
eventsSF["tripIndex"] = eventsSF.tripId.fillna(method='ffill')

#### Mode Choice planned and actual

In [120]:
%%time
eventsSF['mode_choice_actual_BEAM'] = eventsSF.groupby(['IDMerged','tripId', 'type'])['modeBEAM'].transform('last')

CPU times: total: 1min 24s
Wall time: 1min 24s


In [121]:
%%time
eventsSF['mode_choice_planned_BEAM'] = eventsSF.groupby(['IDMerged','tripId', 'type'])['modeBEAM'].transform('first')

CPU times: total: 1min 23s
Wall time: 1min 23s


In [122]:
eventsSF['mode_choice_actual_BEAM'] = np.where(eventsSF['type'] != 'ModeChoice' , np.nan, eventsSF['mode_choice_actual_BEAM'])

In [123]:
eventsSF['mode_choice_planned_BEAM'] = np.where(eventsSF['type'] != 'ModeChoice' , np.nan, eventsSF['mode_choice_planned_BEAM'])

In [124]:
# Rename the "netCost" column
eventsSF.rename(columns={"netCost":"cost_BEAM"}, inplace=True) 

In [125]:
# Replanning events = 1, the rest = 0
eventsSF['replanning_status'] = np.where(eventsSF['type']=='Replanning', 1, 0)

In [126]:
%%time
eventsSF['reason'].replace('nan', np.NaN)

CPU times: total: 1.81 s
Wall time: 1.81 s


In [127]:
eventsSF['transit_bus'] = np.where(eventsSF['modeBEAM_rh']=='bus', 1, 0)
eventsSF['transit_subway'] = np.where(eventsSF['modeBEAM_rh']=='subway', 1, 0)
eventsSF['transit_tram'] = np.where(eventsSF['modeBEAM_rh']=='tram', 1, 0)
eventsSF['transit_rail'] = np.where(eventsSF['modeBEAM_rh']=='rail', 1, 0)
eventsSF['transit_cable_car'] = np.where(eventsSF['modeBEAM_rh']=='cable_car', 1, 0)

In [128]:
eventsSF['ride_hail_pooled'] = np.where(eventsSF['modeBEAM_rh']=='ride_hail_pooled', 1, 0)

In [129]:
%%time
Person_Trip_eventsSF = pd.pivot_table(
   eventsSF,
   index=['IDMerged','tripIndex'],
   aggfunc={'actStartTime': np.sum, 
            'actEndTime': np.sum, 
            'duration_travelling': np.sum, 
            'cost_BEAM': np.sum, 
            'actStartType': np.sum, 
            'actEndType': np.sum, 
            'duration_walking': np.sum, 
            'duration_in_privateCar': np.sum, 
            'duration_on_bike': np.sum, 
            'duration_in_ridehail': np.sum, 
            'distance_travelling': np.sum, 
            'duration_in_transit': np.sum, 
            'distance_walking': np.sum, 
            'distance_bike': np.sum, 
            'distance_ridehail': np.sum, 
            'distance_privateCar': np.sum, 
            'distance_transit': np.sum, 
            'legVehicleIds': np.sum, 
            'mode_choice_planned_BEAM':lambda x: ', '.join(set(x.dropna().astype(str))),
            'mode_choice_actual_BEAM':lambda x: ', '.join(set(x.dropna().astype(str))),
            'vehicle': lambda x: ', '.join(set(x.dropna().astype(str))),
            'numPassengers': lambda x: ', '.join(list(x.dropna().astype(str))),
            'distance_mode_choice': np.sum,
            'replanning_status': np.sum,
            'reason': lambda x: ', '.join(list(x.dropna().astype(str))),
            'parkingType': lambda x: ', '.join(list(x.dropna().astype(str))),
            'transit_bus': np.sum, 
            'transit_subway': np.sum, 
            'transit_tram': np.sum, 
            'transit_cable_car': np.sum,
            'ride_hail_pooled': np.sum, 
            'transit_rail': np.sum,
            'year': lambda x: ', '.join(set(x.dropna().astype(str))),
            'lever_position': lambda x: ', '.join(set(x.dropna().astype(str))),
            'scenario': lambda x: ', '.join(set(x.dropna().astype(str))),
            'fuelFood': np.sum, 
            'fuelElectricity': np.sum, 
            'fuelBiodiesel': np.sum, 
            'fuelDiesel': np.sum, 
            'fuel_not_Food': np.sum, 
            'fuelGasoline': np.sum, 
            'fuel_marginal': np.sum,
            'emissionFood': np.sum, 
            'emissionElectricity': np.sum, 
            'emissionDiesel': np.sum, 
            'emissionGasoline': np.sum,
            'emissionBiodiesel': np.sum, 
            'emission_marginal': np.sum,
            'lever': lambda x: ', '.join(set(x.dropna().astype(str)))
           }).reset_index() 

#'numPassengers': lambda x: ', '.join(set(x.dropna().astype(str))) 
#'mode_choice_actual_BEAM':lambda x: ', '.join(set(x.dropna().astype(str))) #
#'modeBEAM_rh': lambda x: ', '.join(list(x.dropna().astype(str))), 

CPU times: total: 48min 37s
Wall time: 48min 37s


In [130]:
Person_Trip_eventsSF['duration_door_to_door'] = Person_Trip_eventsSF['actStartTime'] - Person_Trip_eventsSF['actEndTime'] 

In [131]:
Person_Trip_eventsSF['waitTime'] = Person_Trip_eventsSF['duration_door_to_door'] - Person_Trip_eventsSF['duration_travelling'] 

In [132]:
Person_Trip_eventsSF['actPurpose'] = Person_Trip_eventsSF['actEndType'].astype(str) + "_to_" + Person_Trip_eventsSF['actStartType'].astype(str)

In [133]:
Person_Trip_eventsSF.rename(columns={"legVehicleIds":"vehicleIds_estimate"}, inplace=True) 

In [134]:
Person_Trip_eventsSF.rename(columns={"vehicle":"vehicleIds"}, inplace=True) 

#### New

In [135]:
# Column with five summarized modes
conditions  = [(Person_Trip_eventsSF['mode_choice_actual_BEAM'] == 'ride_hail') | (Person_Trip_eventsSF['mode_choice_actual_BEAM'] == 'ride_hail_pooled'), 
               (Person_Trip_eventsSF['mode_choice_actual_BEAM'] == 'walk_transit') | (Person_Trip_eventsSF['mode_choice_actual_BEAM'] == 'drive_transit')| (Person_Trip_eventsSF['mode_choice_actual_BEAM'] == 'ride_hail_transit')|(Person_Trip_eventsSF['mode_choice_actual_BEAM'] == 'bike_transit'),
               (Person_Trip_eventsSF['mode_choice_actual_BEAM'] == 'walk'), (Person_Trip_eventsSF['mode_choice_actual_BEAM'] == 'bike'),
               (Person_Trip_eventsSF['mode_choice_actual_BEAM'] == 'car') | (Person_Trip_eventsSF['mode_choice_actual_BEAM'] == 'car_hov2')| (Person_Trip_eventsSF['mode_choice_actual_BEAM'] == 'car_hov3')|(Person_Trip_eventsSF['mode_choice_actual_BEAM'] == 'hov2_teleportation')| (Person_Trip_eventsSF['mode_choice_actual_BEAM'] == 'hov3_teleportation')]
choices = [ 'ride_hail', 'transit', 'walk', 'bike', 'car']

In [136]:
Person_Trip_eventsSF['mode_choice_actual_5'] = np.select(conditions, choices, default= np.nan)

In [137]:
# Column with six summarized modes
conditions  = [(Person_Trip_eventsSF['mode_choice_actual_BEAM'] == 'ride_hail') | (Person_Trip_eventsSF['mode_choice_actual_BEAM'] == 'ride_hail_pooled'), 
               (Person_Trip_eventsSF['mode_choice_actual_BEAM'] == 'walk_transit') | (Person_Trip_eventsSF['mode_choice_actual_BEAM'] == 'drive_transit')|(Person_Trip_eventsSF['mode_choice_actual_BEAM'] == 'bike_transit'),
               (Person_Trip_eventsSF['mode_choice_actual_BEAM'] == 'walk'), (Person_Trip_eventsSF['mode_choice_actual_BEAM'] == 'bike'),
               (Person_Trip_eventsSF['mode_choice_actual_BEAM'] == 'car') | (Person_Trip_eventsSF['mode_choice_actual_BEAM'] == 'car_hov2')| (Person_Trip_eventsSF['mode_choice_actual_BEAM'] == 'car_hov3')|(Person_Trip_eventsSF['mode_choice_actual_BEAM'] == 'hov2_teleportation')| (Person_Trip_eventsSF['mode_choice_actual_BEAM'] == 'hov3_teleportation'),
               (Person_Trip_eventsSF['mode_choice_actual_BEAM'] == 'ride_hail_transit')]
choices = [ 'ride_hail', 'transit', 'walk', 'bike', 'car', 'ride_hail_transit']

In [138]:
Person_Trip_eventsSF['mode_choice_actual_6'] = pd.Series(np.select(conditions, choices, default= np.nan)).replace({'nan':np.nan})

In [139]:
# Column with four summarized modes
Person_Trip_eventsSF['mode_choice_actual_4']  = np.where((Person_Trip_eventsSF['mode_choice_actual_5'] == 'walk')|(Person_Trip_eventsSF['mode_choice_actual_5'] == 'bike'),
                                                        'walk/bike', Person_Trip_eventsSF['mode_choice_actual_5'])

In [140]:
Person_Trip_eventsSF = Person_Trip_eventsSF.drop(Person_Trip_eventsSF[Person_Trip_eventsSF.duration_door_to_door < 0].index)

In [141]:
#Person_Trip_eventsSF.to_csv('C:/Shared-Work/Data/CleanData/sf_2018_base_core_act.csv', index = False)

#### Merging with ActivitySim files

In [142]:
households = pd.read_csv('gs://beam-core-outputs/sfbay-2010-base-20221122/activitysim/year-2015-iteration-3/households.csv.gz', compression = 'gzip', dtype = dtypes)

In [143]:
persons = pd.read_csv('gs://beam-core-outputs/sfbay-2010-base-20221122/activitysim/year-2015-iteration-3/persons.csv.gz', compression = 'gzip', dtype = dtypes)

In [144]:
tours = pd.read_csv('gs://beam-core-outputs/sfbay-2010-base-20221122/activitysim/year-2015-iteration-3/final_tours.csv.gz', compression = 'gzip', dtype = dtypes)

In [145]:
trips = pd.read_csv('gs://beam-core-outputs/sfbay-2010-base-20221122/activitysim/year-2015-iteration-3/final_trips.csv.gz', compression = 'gzip', dtype = dtypes)

In [146]:
# Merge BEAM households and persons 
persons = persons.sort_values(by=['household_id']).reset_index(drop=True)
households = households.sort_values(by=['household_id']).reset_index(drop=True)
hhpersons = pd.merge(left=persons, right=households, how='left', on='household_id')
#hhpersons = pd.merge(left=persons, right=households, how='left', on='household_id', suffixes=('', '_drop'))
#hhpersons.drop([col for col in hhpersons.columns if 'drop' in col], axis=1, inplace=True)

In [147]:
# Merge tours, households and persons
tours = tours.sort_values(by=['person_id']).reset_index(drop=True)
hhpersons = hhpersons.sort_values(by=['person_id']).reset_index(drop=True)
hhperTours = pd.merge(left=tours, right=hhpersons, how='left', on='person_id')
#hhperTours = pd.merge(left=tours, right=hhpersons, how='left', on='person_id', suffixes=('', '_drop'))
#hhperTours.drop([col for col in hhperTours.columns if 'drop' in col], axis=1, inplace=True)

In [148]:
# Merge trips, tours, households and persons
trips = trips.sort_values(by=['person_id', 'tour_id']).reset_index(drop=True)
hhperTours = hhperTours.sort_values(by=['person_id','tour_id']).reset_index(drop=True)
tourTripsMerged = pd.merge(left=trips, right=hhperTours, how='left', on=['person_id','tour_id'])
#tourTripsMerged = pd.merge(left=trips, right=hhperTours, how='left', on=['person_id','tour_id'], suffixes=('', '_drop'))
#tourTripsMerged.drop([col for col in tourTripsMerged.columns if 'drop' in col], axis=1, inplace=True)

In [149]:
# Concat mode_choice_utilities files
path = "C:/Users/nazanin/Downloads/trip_mode_choice/trip_mode_choice/" #the path should be updated
all_files = glob.glob(path + "*utilities.csv")
li_mapper = map(lambda filename: pd.read_csv(filename, index_col = None, header = 0), all_files)
li2 = list(li_mapper)
SFmode_choice_utilities = pd.concat(li2, axis = 0, ignore_index=True)

In [150]:
#just utilities
# Merge trips, tours, households, persons, trip_mode_choice_raw, and utilities
tourTripsMerged = tourTripsMerged.sort_values(by=['trip_id'])
SFmode_choice_utilities = SFmode_choice_utilities.sort_values(by=['trip_id'])
SFActMerged= pd.merge(left=tourTripsMerged, right=SFmode_choice_utilities, how='left', on=['trip_id'])

In [151]:
# Both raw and utilities
# Merge trips, tours, households, persons, trip_mode_choice_raw, and utilities
#tourTripsMerged = tourTripsMerged.sort_values(by=['trip_id'])
#rawUtil = rawUtil.sort_values(by=['trip_id'])
#SFActMerged= pd.merge(left=tourTripsMerged, right=rawUtil, how='left', on=['trip_id'])

In [152]:
# Merge person_trip level BEAM with activity sim merged files
SFActMerged = SFActMerged.sort_values(by=['person_id', 'trip_id']).reset_index(drop=True)
Person_Trip_eventsSF = Person_Trip_eventsSF.sort_values(by=['IDMerged','tripIndex']).reset_index(drop=True)
eventsASim = pd.merge(left=Person_Trip_eventsSF, right=SFActMerged, how='left', left_on=["IDMerged", 'tripIndex'], right_on=['person_id', 'trip_id'])
#eventsASim = pd.merge(left=Person_Trip_eventsSF, right=tourTripsMerged, how='left',left_on = ["IDMerged", 'tripId'] , right_on=['person_id', 'trip_id'], suffixes=('', '_drop'))
#eventsASim.drop([col for col in eventsASim.columns if 'drop' in col], axis=1, inplace=True)

#### Updated the cells below - Might consider adding the INEXUS metrics as well

In [153]:
eventsASim.rename(columns={"mode_choice_logsum_y":"logsum_tours_mode_AS_tours"}, inplace=True)

In [154]:
eventsASim.rename(columns={"tour_mode":"tour_mode_AS_tours"}, inplace=True)

In [155]:
eventsASim.rename(columns={"mode_choice_logsum_x":"logsum_trip_Potential_INEXUS"}, inplace=True)

In [156]:
eventsASim.rename(columns={"trip_mode":"trip_mode_AS_trips"}, inplace=True)

In [157]:
# Add a column of income quartiles
quartiles = eventsASim['income'].quantile([0,.25, .5, .75,1]).tolist()

In [158]:
# Add income deciles
conditions  = [(eventsASim['income'] >= quartiles[0]) & (eventsASim['income'] < quartiles[1]), 
               (eventsASim['income'] >= quartiles[1]) & (eventsASim['income'] < quartiles[2]),
               (eventsASim['income'] >=  quartiles[2]) & (eventsASim['income'] < quartiles[3]),
               (eventsASim['income'] >= quartiles[3]) & (eventsASim['income'] <= quartiles[4])]

choices = [ '1stQ', '2ndQ', '3rdQ', '4thD']

In [159]:
eventsASim['income_quartiles'] = np.select(conditions, choices, default=None)

In [160]:
# Add a column of income deciles
deciles = eventsASim['income'].quantile([0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1]).tolist()

In [161]:
# Add income deciles
conditions  = [(eventsASim['income'] >= deciles[0]) & (eventsASim['income'] < deciles[1]), 
               (eventsASim['income'] >= deciles[1]) & (eventsASim['income'] < deciles[2]),
               (eventsASim['income'] >=  deciles[2]) & (eventsASim['income'] < deciles[3]),
               (eventsASim['income'] >= deciles[3]) & (eventsASim['income'] < deciles[4]), 
               (eventsASim['income'] >=  deciles[4]) & (eventsASim['income'] < deciles[5]),
               (eventsASim['income'] >=  deciles[5]) & (eventsASim['income'] < deciles[6]),
               (eventsASim['income'] >=  deciles[6]) & (eventsASim['income'] < deciles[7]),
               (eventsASim['income'] >=  deciles[7]) & (eventsASim['income'] < deciles[8]),
               (eventsASim['income'] >=  deciles[8]) & (eventsASim['income'] < deciles[9]),
               (eventsASim['income'] >=  deciles[9]) & (eventsASim['income'] <= deciles[10])]

choices = [ '1stD', '2ndD', '3rdD', 
           '4thD', '5thD', '6thD', '7thD', '8thD', '9thD','10thD']

In [162]:
eventsASim['income_deciles'] = np.select(conditions, choices, default=None)

In [163]:
eventsASim[eventsASim['logsum_tours_mode_AS_tours'].isna()].shape

(315372, 256)

In [152]:
# Save the output file to S3
eventsASim.to_csv('s3://beam-core-act/deepDive/CleanData/SanFrancisco/Baseline/sf_2015_baseline_test.csv', index=False)  #the path should be updated

#### INEXUS

In [164]:
# Mapping BEAM and ASIM modes
conditions  = [(eventsASim['mode_choice_actual_BEAM'] == 'walk')&(eventsASim['trip_mode_AS_trips'] == 'WALK'),
               (eventsASim['mode_choice_actual_BEAM'] == 'bike')&(eventsASim['trip_mode_AS_trips'] == 'BIKE'),
               (eventsASim['mode_choice_actual_BEAM'] == 'hov3_teleportation')&(eventsASim['trip_mode_AS_trips'] == 'SHARED3FREE'),
               (eventsASim['mode_choice_actual_BEAM'] == 'hov3_teleportation')&(eventsASim['trip_mode_AS_trips'] == 'SHARED3PAY'),
               (eventsASim['mode_choice_actual_BEAM'] == 'car_hov3')&(eventsASim['trip_mode_AS_trips'] == 'SHARED3FREE'),
               (eventsASim['mode_choice_actual_BEAM'] == 'car_hov3')&(eventsASim['trip_mode_AS_trips'] == 'SHARED3PAY'),
               (eventsASim['mode_choice_actual_BEAM'] == 'hov2_teleportation')&(eventsASim['trip_mode_AS_trips'] == 'SHARED2FREE'),
               (eventsASim['mode_choice_actual_BEAM'] == 'hov2_teleportation')&(eventsASim['trip_mode_AS_trips'] == 'SHARED2PAY'),
               (eventsASim['mode_choice_actual_BEAM'] == 'car_hov2')&(eventsASim['trip_mode_AS_trips'] == 'SHARED2PAY'),
               (eventsASim['mode_choice_actual_BEAM'] == 'car_hov2')&(eventsASim['trip_mode_AS_trips'] == 'SHARED2FREE'),
               (eventsASim['mode_choice_actual_BEAM'] == 'car')&(eventsASim['trip_mode_AS_trips'] == 'DRIVEALONEFREE'),
               (eventsASim['mode_choice_actual_BEAM'] == 'car')&(eventsASim['trip_mode_AS_trips'] == 'DRIVEALONEPAY'),
               (eventsASim['mode_choice_actual_BEAM'] == 'walk_transit')&(eventsASim['trip_mode_AS_trips'] == 'WALK_LRF'),
               (eventsASim['mode_choice_actual_BEAM'] == 'walk_transit')&(eventsASim['trip_mode_AS_trips'] == 'WALK_LOC'),
               (eventsASim['mode_choice_actual_BEAM'] == 'walk_transit')&(eventsASim['trip_mode_AS_trips'] == 'WALK_EXP'),
               (eventsASim['mode_choice_actual_BEAM'] == 'walk_transit')&(eventsASim['trip_mode_AS_trips'] == 'WALK_HVY'),
               (eventsASim['mode_choice_actual_BEAM'] == 'ride_hail')&(eventsASim['trip_mode_AS_trips'] == 'TNC_SINGLE'),
               (eventsASim['mode_choice_actual_BEAM'] == 'ride_hail')&(eventsASim['trip_mode_AS_trips'] == 'TNC_SHARED'),
               (eventsASim['mode_choice_actual_BEAM'] == 'ride_hail')&(eventsASim['trip_mode_AS_trips'] == 'TAXI'),
               (eventsASim['mode_choice_actual_BEAM'] == 'walk_transit')&(eventsASim['trip_mode_AS_trips'] == 'WALK_COM'),
               (eventsASim['mode_choice_actual_BEAM'] == 'drive_transit')&(eventsASim['trip_mode_AS_trips'] == 'DRIVE_COM'),
               (eventsASim['mode_choice_actual_BEAM'] == 'drive_transit')&(eventsASim['trip_mode_AS_trips'] == 'DRIVE_LRF'),
               (eventsASim['mode_choice_actual_BEAM'] == 'drive_transit')&(eventsASim['trip_mode_AS_trips'] == 'DRIVE_LOC'),
               (eventsASim['mode_choice_actual_BEAM'] == 'drive_transit')&(eventsASim['trip_mode_AS_trips'] == 'DRIVE_EXP'),
               (eventsASim['mode_choice_actual_BEAM'] == 'drive_transit')&(eventsASim['trip_mode_AS_trips'] == 'DRIVE_HVY')]


choices = [eventsASim['WALK'], eventsASim['BIKE'], eventsASim['SHARED3FREE'], eventsASim['SHARED3PAY'],
           eventsASim['SHARED3FREE'], eventsASim['SHARED3PAY'], eventsASim['SHARED2FREE'], 
           eventsASim['SHARED2PAY'], eventsASim['SHARED2PAY'], eventsASim['SHARED2FREE'], 
           eventsASim['DRIVEALONEFREE'], eventsASim['DRIVEALONEPAY'], eventsASim['WALK_LRF'], 
           eventsASim['WALK_LOC'], eventsASim['WALK_EXP'], eventsASim['WALK_HVY'], eventsASim['TNC_SINGLE'], 
           eventsASim['TNC_SHARED'], eventsASim['TAXI'], eventsASim['WALK_COM'], 
           eventsASim['DRIVE_COM'], eventsASim['DRIVE_LRF'], eventsASim['DRIVE_LOC'], eventsASim['DRIVE_EXP'],
           eventsASim['DRIVE_HVY']]

In [166]:
eventsASim['Realized_INEXUS'] = np.select(conditions, choices, default=np.nan)

In [263]:
eventsASim_drop = eventsASim.drop(eventsASim[eventsASim['TAXI'].isna()].index)

In [298]:
eventsASim_temp = eventsASim_drop[['IDMerged', 'tripIndex','actEndTime','actEndType','actStartTime','actStartType',
                                   'mode_choice_planned_BEAM','mode_choice_actual_BEAM','trip_mode_AS_trips',
                                  'DRIVEALONEFREE','DRIVEALONEPAY','SHARED2FREE','SHARED2PAY','SHARED3FREE','SHARED3PAY','WALK',
                                   'BIKE','WALK_LOC','WALK_LRF','WALK_EXP','WALK_HVY','WALK_COM','DRIVE_LOC','DRIVE_LRF',
                                   'DRIVE_EXP','DRIVE_HVY','DRIVE_COM','TAXI','TNC_SINGLE','TNC_SHARED']]

In [299]:
# Mapping BEAM and ASIM modes
conditions  = [(eventsASim_temp['mode_choice_actual_BEAM'] == 'walk')&(eventsASim_temp['trip_mode_AS_trips'] == 'WALK'),
               (eventsASim_temp['mode_choice_actual_BEAM'] == 'bike')&(eventsASim_temp['trip_mode_AS_trips'] == 'BIKE'),
               (eventsASim_temp['mode_choice_actual_BEAM'] == 'hov3_teleportation')&(eventsASim_temp['trip_mode_AS_trips'] == 'SHARED3FREE'),
               (eventsASim_temp['mode_choice_actual_BEAM'] == 'hov3_teleportation')&(eventsASim_temp['trip_mode_AS_trips'] == 'SHARED3PAY'),
               (eventsASim_temp['mode_choice_actual_BEAM'] == 'car_hov3')&(eventsASim_temp['trip_mode_AS_trips'] == 'SHARED3FREE'),
               (eventsASim_temp['mode_choice_actual_BEAM'] == 'car_hov3')&(eventsASim_temp['trip_mode_AS_trips'] == 'SHARED3PAY'),
               (eventsASim_temp['mode_choice_actual_BEAM'] == 'hov2_teleportation')&(eventsASim_temp['trip_mode_AS_trips'] == 'SHARED2FREE'),
               (eventsASim_temp['mode_choice_actual_BEAM'] == 'hov2_teleportation')&(eventsASim_temp['trip_mode_AS_trips'] == 'SHARED2PAY'),
               (eventsASim_temp['mode_choice_actual_BEAM'] == 'car_hov2')&(eventsASim_temp['trip_mode_AS_trips'] == 'SHARED2PAY'),
               (eventsASim_temp['mode_choice_actual_BEAM'] == 'car_hov2')&(eventsASim_temp['trip_mode_AS_trips'] == 'SHARED2FREE'),
               (eventsASim_temp['mode_choice_actual_BEAM'] == 'car')&(eventsASim_temp['trip_mode_AS_trips'] == 'DRIVEALONEFREE'),
               (eventsASim_temp['mode_choice_actual_BEAM'] == 'car')&(eventsASim_temp['trip_mode_AS_trips'] == 'DRIVEALONEPAY'),
               (eventsASim_temp['mode_choice_actual_BEAM'] == 'walk_transit')&(eventsASim_temp['trip_mode_AS_trips'] == 'WALK_LRF'),
               (eventsASim_temp['mode_choice_actual_BEAM'] == 'walk_transit')&(eventsASim_temp['trip_mode_AS_trips'] == 'WALK_LOC'),
               (eventsASim_temp['mode_choice_actual_BEAM'] == 'walk_transit')&(eventsASim_temp['trip_mode_AS_trips'] == 'WALK_EXP'),
               (eventsASim_temp['mode_choice_actual_BEAM'] == 'walk_transit')&(eventsASim_temp['trip_mode_AS_trips'] == 'WALK_HVY'),
               (eventsASim_temp['mode_choice_actual_BEAM'] == 'ride_hail')&(eventsASim_temp['trip_mode_AS_trips'] == 'TNC_SINGLE'),
               (eventsASim_temp['mode_choice_actual_BEAM'] == 'ride_hail')&(eventsASim_temp['trip_mode_AS_trips'] == 'TNC_SHARED'),
               (eventsASim_temp['mode_choice_actual_BEAM'] == 'ride_hail')&(eventsASim_temp['trip_mode_AS_trips'] == 'TAXI'),
               (eventsASim_temp['mode_choice_actual_BEAM'] == 'walk_transit')&(eventsASim_temp['trip_mode_AS_trips'] == 'WALK_COM'),
               (eventsASim_temp['mode_choice_actual_BEAM'] == 'drive_transit')&(eventsASim_temp['trip_mode_AS_trips'] == 'DRIVE_COM'),
               (eventsASim_temp['mode_choice_actual_BEAM'] == 'drive_transit')&(eventsASim_temp['trip_mode_AS_trips'] == 'DRIVE_LRF'),
               (eventsASim_temp['mode_choice_actual_BEAM'] == 'drive_transit')&(eventsASim_temp['trip_mode_AS_trips'] == 'DRIVE_LOC'),
               (eventsASim_temp['mode_choice_actual_BEAM'] == 'drive_transit')&(eventsASim_temp['trip_mode_AS_trips'] == 'DRIVE_EXP'),
               (eventsASim_temp['mode_choice_actual_BEAM'] == 'drive_transit')&(eventsASim_temp['trip_mode_AS_trips'] == 'DRIVE_HVY'),
               (eventsASim_temp['mode_choice_actual_BEAM'] == 'ride_hail_pooled')&(eventsASim_temp['trip_mode_AS_trips'] == 'TNC_SINGLE'),
               (eventsASim_temp['mode_choice_actual_BEAM'] == 'ride_hail_pooled')&(eventsASim_temp['trip_mode_AS_trips'] == 'TNC_SHARED'),
               (eventsASim_temp['mode_choice_actual_BEAM'] == 'ride_hail_pooled')&(eventsASim_temp['trip_mode_AS_trips'] == 'TAXI'),
               
               (eventsASim_temp['mode_choice_actual_BEAM'] == 'hov2_teleportation')&((eventsASim_temp['trip_mode_AS_trips'] == 'SHARED3FREE')|
               (eventsASim_temp['trip_mode_AS_trips'] == 'SHARED3PAY')|(eventsASim_temp['trip_mode_AS_trips'] == 'DRIVEALONEFREE')|
               (eventsASim_temp['trip_mode_AS_trips'] == 'DRIVEALONEPAY')),
               (eventsASim_temp['mode_choice_actual_BEAM'] == 'car_hov2')&(eventsASim_temp['trip_mode_AS_trips'] == 'SHARED3PAY'),
               (eventsASim_temp['mode_choice_actual_BEAM'] == 'car_hov2')&(eventsASim_temp['trip_mode_AS_trips'] == 'SHARED3FREE'),
               (eventsASim_temp['mode_choice_actual_BEAM'] == 'car_hov3')&(eventsASim_temp['trip_mode_AS_trips'] == 'SHARED2PAY'),
               (eventsASim_temp['mode_choice_actual_BEAM'] == 'car_hov3')&(eventsASim_temp['trip_mode_AS_trips'] == 'SHARED2FREE'),
               (eventsASim_temp['mode_choice_actual_BEAM'] == 'car_hov2')&(eventsASim_temp['trip_mode_AS_trips'] == 'DRIVEALONEFREE'),
               (eventsASim_temp['mode_choice_actual_BEAM'] == 'car_hov2')&(eventsASim_temp['trip_mode_AS_trips'] == 'DRIVEALONEPAY'),
               (eventsASim_temp['mode_choice_actual_BEAM'] == 'car_hov3')&(eventsASim_temp['trip_mode_AS_trips'] == 'DRIVEALONEFREE'),
               (eventsASim_temp['mode_choice_actual_BEAM'] == 'car_hov3')&(eventsASim_temp['trip_mode_AS_trips'] == 'DRIVEALONEPAY'),
               (eventsASim_temp['mode_choice_actual_BEAM'] == 'hov3_teleportation')&((eventsASim_temp['trip_mode_AS_trips'] == 'DRIVEALONEPAY')|
               (eventsASim_temp['trip_mode_AS_trips'] == 'DRIVEALONEFREE')|(eventsASim_temp['trip_mode_AS_trips'] == 'SHARED2PAY')|(eventsASim_temp['trip_mode_AS_trips'] == 'SHARED2FREE')),
               (eventsASim_temp['mode_choice_actual_BEAM'] == 'car')&((eventsASim_temp['trip_mode_AS_trips'] == 'SHARED3FREE')|(eventsASim_temp['trip_mode_AS_trips'] == 'SHARED2FREE')|
               (eventsASim_temp['trip_mode_AS_trips'] == 'SHARED3PAY')|(eventsASim_temp['trip_mode_AS_trips'] == 'SHARED2PAY'))]
                                                                      
                                                                                                                 
                                                                      
choices = [eventsASim_temp['WALK'], eventsASim_temp['BIKE'], eventsASim_temp['SHARED3FREE'], eventsASim_temp['SHARED3PAY'],
           eventsASim_temp['SHARED3FREE'], eventsASim_temp['SHARED3PAY'], eventsASim_temp['SHARED2FREE'], 
           eventsASim_temp['SHARED2PAY'], eventsASim_temp['SHARED2PAY'], eventsASim_temp['SHARED2FREE'], 
           eventsASim_temp['DRIVEALONEFREE'], eventsASim_temp['DRIVEALONEPAY'], eventsASim_temp['WALK_LRF'], 
           eventsASim_temp['WALK_LOC'], eventsASim_temp['WALK_EXP'], eventsASim_temp['WALK_HVY'], eventsASim_temp['TNC_SINGLE'], 
           eventsASim_temp['TNC_SHARED'], eventsASim_temp['TAXI'], eventsASim_temp['WALK_COM'], 
           eventsASim_temp['DRIVE_COM'], eventsASim_temp['DRIVE_LRF'], eventsASim_temp['DRIVE_LOC'], eventsASim_temp['DRIVE_EXP'],
           eventsASim_temp['DRIVE_HVY'], eventsASim_temp['TNC_SINGLE'], eventsASim_temp['TNC_SHARED'], eventsASim_temp['TAXI'],
          eventsASim_temp[['DRIVEALONEFREE','DRIVEALONEPAY','SHARED2FREE','SHARED3FREE','SHARED2PAY','SHARED3PAY','DRIVE_LOC','DRIVE_LRF','DRIVE_EXP','DRIVE_HVY','DRIVE_COM']].max(axis=1),
          eventsASim_temp[['DRIVEALONEFREE','DRIVEALONEPAY','SHARED2FREE','SHARED3FREE','SHARED2PAY','SHARED3PAY','DRIVE_LOC','DRIVE_LRF','DRIVE_EXP','DRIVE_HVY','DRIVE_COM']].max(axis=1),
          eventsASim_temp[['DRIVEALONEFREE','DRIVEALONEPAY','SHARED2FREE','SHARED3FREE','SHARED2PAY','SHARED3PAY','DRIVE_LOC','DRIVE_LRF','DRIVE_EXP','DRIVE_HVY','DRIVE_COM']].max(axis=1),
          eventsASim_temp[['DRIVEALONEFREE','DRIVEALONEPAY','SHARED2FREE','SHARED3FREE','SHARED2PAY','SHARED3PAY','DRIVE_LOC','DRIVE_LRF','DRIVE_EXP','DRIVE_HVY','DRIVE_COM']].max(axis=1),
          eventsASim_temp[['DRIVEALONEFREE','DRIVEALONEPAY','SHARED2FREE','SHARED3FREE','SHARED2PAY','SHARED3PAY','DRIVE_LOC','DRIVE_LRF','DRIVE_EXP','DRIVE_HVY','DRIVE_COM']].max(axis=1),
          eventsASim_temp[['DRIVEALONEFREE','DRIVEALONEPAY','SHARED2FREE','SHARED3FREE','SHARED2PAY','SHARED3PAY','DRIVE_LOC','DRIVE_LRF','DRIVE_EXP','DRIVE_HVY','DRIVE_COM']].max(axis=1),
          eventsASim_temp[['DRIVEALONEFREE','DRIVEALONEPAY','SHARED2FREE','SHARED3FREE','SHARED2PAY','SHARED3PAY','DRIVE_LOC','DRIVE_LRF','DRIVE_EXP','DRIVE_HVY','DRIVE_COM']].max(axis=1),
          eventsASim_temp[['DRIVEALONEFREE','DRIVEALONEPAY','SHARED2FREE','SHARED3FREE','SHARED2PAY','SHARED3PAY','DRIVE_LOC','DRIVE_LRF','DRIVE_EXP','DRIVE_HVY','DRIVE_COM']].max(axis=1),
          eventsASim_temp[['DRIVEALONEFREE','DRIVEALONEPAY','SHARED2FREE','SHARED3FREE','SHARED2PAY','SHARED3PAY','DRIVE_LOC','DRIVE_LRF','DRIVE_EXP','DRIVE_HVY','DRIVE_COM']].max(axis=1),
          eventsASim_temp[['DRIVEALONEFREE','DRIVEALONEPAY','SHARED2FREE','SHARED3FREE','SHARED2PAY','SHARED3PAY','DRIVE_LOC','DRIVE_LRF','DRIVE_EXP','DRIVE_HVY','DRIVE_COM']].max(axis=1),
          eventsASim_temp[['DRIVEALONEFREE','DRIVEALONEPAY','SHARED2FREE','SHARED3FREE','SHARED2PAY','SHARED3PAY','DRIVE_LOC','DRIVE_LRF','DRIVE_EXP','DRIVE_HVY','DRIVE_COM']].max(axis=1),
          eventsASim_temp[['DRIVEALONEFREE','DRIVEALONEPAY','SHARED2FREE','SHARED3FREE','SHARED2PAY','SHARED3PAY','DRIVE_LOC','DRIVE_LRF','DRIVE_EXP','DRIVE_HVY','DRIVE_COM']].max(axis=1),
          eventsASim_temp[['DRIVEALONEFREE','DRIVEALONEPAY','SHARED2FREE','SHARED3FREE','SHARED2PAY','SHARED3PAY','DRIVE_LOC','DRIVE_LRF','DRIVE_EXP','DRIVE_HVY','DRIVE_COM']].max(axis=1),
          eventsASim_temp[['DRIVEALONEFREE','DRIVEALONEPAY','SHARED2FREE','SHARED3FREE','SHARED2PAY','SHARED3PAY','DRIVE_LOC','DRIVE_LRF','DRIVE_EXP','DRIVE_HVY','DRIVE_COM']].max(axis=1),
           eventsASim_temp[['DRIVEALONEFREE','DRIVEALONEPAY','SHARED2FREE','SHARED3FREE','SHARED2PAY','SHARED3PAY','DRIVE_LOC','DRIVE_LRF','DRIVE_EXP','DRIVE_HVY','DRIVE_COM']].max(axis=1),
          eventsASim_temp[['DRIVEALONEFREE','DRIVEALONEPAY','SHARED2FREE','SHARED3FREE','SHARED2PAY','SHARED3PAY','DRIVE_LOC','DRIVE_LRF','DRIVE_EXP','DRIVE_HVY','DRIVE_COM']].max(axis=1),
          eventsASim_temp[['DRIVEALONEFREE','DRIVEALONEPAY','SHARED2FREE','SHARED3FREE','SHARED2PAY','SHARED3PAY','DRIVE_LOC','DRIVE_LRF','DRIVE_EXP','DRIVE_HVY','DRIVE_COM']].max(axis=1)]

In [300]:
           #(eventsASim_temp['mode_choice_actual_BEAM'] == 'car_hov2')&(eventsASim_temp['trip_mode_AS_trips'] == 'DRIVEALONEFREE'),
           #(eventsASim_temp['mode_choice_actual_BEAM'] == 'car_hov2')&(eventsASim_temp['trip_mode_AS_trips'] == 'DRIVEALONEPAY'),
           #(eventsASim_temp['mode_choice_actual_BEAM'] == 'car')&(eventsASim_temp['trip_mode_AS_trips'] == 'SHARED2FREE'),
               #(eventsASim_temp['mode_choice_actual_BEAM'] == 'car')&(eventsASim_temp['trip_mode_AS_trips'] == 'SHARED2PAY'),
               #(eventsASim_temp['mode_choice_actual_BEAM'] == 'hov2_teleportation')&(eventsASim_temp['trip_mode_AS_trips'] == 'SHARED3PAY'),
               #(eventsASim_temp['mode_choice_actual_BEAM'] == 'hov2_teleportation')&(eventsASim_temp['trip_mode_AS_trips'] == 'SHARED3FREE'),
               #(eventsASim_temp['mode_choice_actual_BEAM'] == 'car')&(eventsASim_temp['trip_mode_AS_trips'] == 'SHARED3FREE'),
               #(eventsASim_temp['mode_choice_actual_BEAM'] == 'car')&(eventsASim_temp['trip_mode_AS_trips'] == 'SHARED3PAY'),
               #(eventsASim_temp['mode_choice_actual_BEAM'] == 'hov3_teleportation')&(eventsASim_temp['trip_mode_AS_trips'] == 'SHARED2PAY'),
               #(eventsASim_temp['mode_choice_actual_BEAM'] == 'hov3_teleportation')&(eventsASim_temp['trip_mode_AS_trips'] == 'SHARED2FREE'),
               #(eventsASim_temp['mode_choice_actual_BEAM'] == 'car_hov3')&(eventsASim_temp['trip_mode_AS_trips'] == 'SHARED2FREE'),
               #(eventsASim_temp['mode_choice_actual_BEAM'] == 'car_hov3')&(eventsASim_temp['trip_mode_AS_trips'] == 'SHARED2PAY')]

          #eventsASim_temp['SHARED2FREE'], eventsASim_temp['SHARED2PAY'], 
          # eventsASim_temp['DRIVEALONEFREE'], eventsASim_temp['DRIVEALONEPAY'], 
          # eventsASim_temp['SHARED2PAY'], eventsASim_temp['SHARED2FREE'], eventsASim_temp['DRIVEALONEFREE'], 
          # eventsASim_temp['DRIVEALONEPAY'], eventsASim_temp['SHARED3PAY'], eventsASim_temp['SHARED3FREE'], 
          # eventsASim_temp['SHARED3FREE'], eventsASim_temp['SHARED3PAY']]

In [301]:
eventsASim_temp['Realized_INEXUS'] = np.select(conditions, choices, default=np.nan)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  eventsASim_temp['Realized_INEXUS'] = np.select(conditions, choices, default=np.nan)


In [254]:
eventsASim_temp['Realized_INEXUS'] = np.where((eventsASim_temp['mode_choice_actual_BEAM'] == 'car') & (eventsASim_temp['Realized_INEXUS'].isna()), eventsASim_temp[['DRIVEALONEFREE','DRIVEALONEPAY','SHARED2FREE','SHARED3FREE','SHARED2PAY','SHARED3PAY','DRIVE_LOC','DRIVE_LRF','DRIVE_EXP','DRIVE_HVY','DRIVE_COM']].max(axis=1), eventsASim_temp['Realized_INEXUS'])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  eventsASim_temp['Realized_INEXUS'] = np.where((eventsASim_temp['mode_choice_actual_BEAM'] == 'car') & (eventsASim_temp['Realized_INEXUS'].isna()), eventsASim_temp[['DRIVEALONEFREE','DRIVEALONEPAY','SHARED2FREE','SHARED3FREE','SHARED2PAY','SHARED3PAY','DRIVE_LOC','DRIVE_LRF','DRIVE_EXP','DRIVE_HVY','DRIVE_COM']].max(axis=1), eventsASim_temp['Realized_INEXUS'])


In [303]:
eventsASim_temp[(eventsASim_temp['Realized_INEXUS'] < -900)].shape

(0, 31)

In [302]:
eventsASim_temp[(eventsASim_temp['Realized_INEXUS'].isna())].shape

(389268, 31)

In [257]:
eventsASim_temp['Realized_INEXUS'] = np.where((eventsASim_temp['mode_choice_actual_BEAM'] == 'ride_hail') & (eventsASim_temp['Realized_INEXUS'].isna()), eventsASim_temp[['TAXI','TNC_SINGLE','TNC_SHARED']].max(axis=1), eventsASim_temp['Realized_INEXUS'])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  eventsASim_temp['Realized_INEXUS'] = np.where((eventsASim_temp['mode_choice_actual_BEAM'] == 'ride_hail') & (eventsASim_temp['Realized_INEXUS'].isna()), eventsASim_temp[['TAXI','TNC_SINGLE','TNC_SHARED']].max(axis=1), eventsASim_temp['Realized_INEXUS'])


In [259]:
eventsASim_temp['Realized_INEXUS'] = np.where((eventsASim_temp['mode_choice_actual_BEAM'] == 'ride_hail_pooled') & (eventsASim_temp['Realized_INEXUS'].isna()), eventsASim_temp[['TAXI','TNC_SINGLE','TNC_SHARED']].max(axis=1), eventsASim_temp['Realized_INEXUS'])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  eventsASim_temp['Realized_INEXUS'] = np.where((eventsASim_temp['mode_choice_actual_BEAM'] == 'ride_hail_pooled') & (eventsASim_temp['Realized_INEXUS'].isna()), eventsASim_temp[['TAXI','TNC_SINGLE','TNC_SHARED']].max(axis=1), eventsASim_temp['Realized_INEXUS'])


In [204]:
eventsASim_temp['Realized_INEXUS'] = np.where((eventsASim_temp['mode_choice_actual_BEAM'] == 'hov3_teleportation') & (eventsASim_temp['Realized_INEXUS'].isna()), eventsASim_temp[['DRIVEALONEFREE','DRIVEALONEPAY','SHARED2FREE','SHARED2PAY','SHARED3FREE','SHARED3PAY']].max(axis=1), eventsASim_temp['Realized_INEXUS'])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  eventsASim_temp['Realized_INEXUS'] = np.where((eventsASim_temp['mode_choice_actual_BEAM'] == 'hov3_teleportation') & (eventsASim_temp['Realized_INEXUS'].isna()), eventsASim_temp[['DRIVEALONEFREE','DRIVEALONEPAY','SHARED2FREE','SHARED2PAY','SHARED3FREE','SHARED3PAY']].max(axis=1), eventsASim_temp['Realized_INEXUS'])


In [205]:
eventsASim_temp['Realized_INEXUS'] = np.where((eventsASim_temp['mode_choice_actual_BEAM'] == 'hov2_teleportation') & (eventsASim_temp['Realized_INEXUS'].isna()), eventsASim_temp[['DRIVEALONEFREE','DRIVEALONEPAY','SHARED2FREE','SHARED2PAY','SHARED3FREE','SHARED3PAY']].max(axis=1), eventsASim_temp['Realized_INEXUS'])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  eventsASim_temp['Realized_INEXUS'] = np.where((eventsASim_temp['mode_choice_actual_BEAM'] == 'hov2_teleportation') & (eventsASim_temp['Realized_INEXUS'].isna()), eventsASim_temp[['DRIVEALONEFREE','DRIVEALONEPAY','SHARED2FREE','SHARED2PAY','SHARED3FREE','SHARED3PAY']].max(axis=1), eventsASim_temp['Realized_INEXUS'])


In [368]:
eventsASim_temp['Realized_INEXUS'] = np.where((eventsASim_temp['mode_choice_actual_BEAM'] == 'car_hov2') & (eventsASim_temp['Realized_INEXUS'].isna()), eventsASim_temp[['DRIVEALONEFREE','DRIVEALONEPAY','SHARED2FREE','SHARED2PAY','SHARED3FREE','SHARED3PAY']].max(axis=1), eventsASim_temp['Realized_INEXUS'])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  eventsASim_temp['Realized_INEXUS'] = np.where((eventsASim_temp['mode_choice_actual_BEAM'] == 'car_hov2') & (eventsASim_temp['Realized_INEXUS'].isna()), eventsASim_temp[['DRIVEALONEFREE','DRIVEALONEPAY','SHARED2FREE','SHARED2PAY','SHARED3FREE','SHARED3PAY']].max(axis=1), eventsASim_temp['Realized_INEXUS'])


In [369]:
eventsASim_temp['Realized_INEXUS'] = np.where((eventsASim_temp['mode_choice_actual_BEAM'] == 'car_hov3') & (eventsASim_temp['Realized_INEXUS'].isna()), eventsASim_temp[['DRIVEALONEFREE','DRIVEALONEPAY','SHARED2FREE','SHARED2PAY','SHARED3FREE','SHARED3PAY']].max(axis=1), eventsASim_temp['Realized_INEXUS'])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  eventsASim_temp['Realized_INEXUS'] = np.where((eventsASim_temp['mode_choice_actual_BEAM'] == 'car_hov3') & (eventsASim_temp['Realized_INEXUS'].isna()), eventsASim_temp[['DRIVEALONEFREE','DRIVEALONEPAY','SHARED2FREE','SHARED2PAY','SHARED3FREE','SHARED3PAY']].max(axis=1), eventsASim_temp['Realized_INEXUS'])


In [373]:
#eventsASim_temp['Realized_INEXUS'] = np.where((eventsASim_temp['mode_choice_actual_BEAM'] == 'walk_transit') & (eventsASim_temp['Realized_INEXUS'].isna()), eventsASim_temp[['WALK_LOC','WALK','WALK_LRF','WALK_EXP','WALK_HVY','WALK_COM']].max(axis=1), eventsASim_temp['Realized_INEXUS'])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  eventsASim_temp['Realized_INEXUS'] = np.where((eventsASim_temp['mode_choice_actual_BEAM'] == 'walk_transit') & (eventsASim_temp['Realized_INEXUS'].isna()), eventsASim_temp[['WALK_LOC','WALK','WALK_LRF','WALK_EXP','WALK_HVY','WALK_COM']].max(axis=1), eventsASim_temp['Realized_INEXUS'])


In [230]:
eventsASim_temp['Realized_INEXUS'] = np.where((eventsASim_temp['mode_choice_actual_BEAM'] == 'walk') & (eventsASim_temp['Realized_INEXUS'].isna()), eventsASim_temp[['WALK_LOC','WALK','WALK_LRF','WALK_EXP','WALK_HVY','WALK_COM']].max(axis=1), eventsASim_temp['Realized_INEXUS'])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  eventsASim_temp['Realized_INEXUS'] = np.where((eventsASim_temp['mode_choice_actual_BEAM'] == 'walk') & (eventsASim_temp['Realized_INEXUS'].isna()), eventsASim_temp[['WALK_LOC','WALK','WALK_LRF','WALK_EXP','WALK_HVY','WALK_COM']].max(axis=1), eventsASim_temp['Realized_INEXUS'])


In [231]:
eventsASim_temp['Realized_INEXUS'] = np.where((eventsASim_temp['mode_choice_actual_BEAM'] == 'bike') & (eventsASim_temp['Realized_INEXUS'].isna()), eventsASim_temp[['BIKE','WALK_LOC','WALK','WALK_LRF','WALK_EXP','WALK_HVY','WALK_COM']].max(axis=1), eventsASim_temp['Realized_INEXUS'])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  eventsASim_temp['Realized_INEXUS'] = np.where((eventsASim_temp['mode_choice_actual_BEAM'] == 'bike') & (eventsASim_temp['Realized_INEXUS'].isna()), eventsASim_temp[['BIKE','WALK_LOC','WALK','WALK_LRF','WALK_EXP','WALK_HVY','WALK_COM']].max(axis=1), eventsASim_temp['Realized_INEXUS'])


In [233]:
eventsASim_temp['Realized_INEXUS'] = np.where((eventsASim_temp['mode_choice_actual_BEAM'] == 'bike_transit') & (eventsASim_temp['Realized_INEXUS'].isna()), eventsASim_temp[['BIKE','WALK_LOC','WALK','WALK_LRF','WALK_EXP','WALK_HVY','WALK_COM']].max(axis=1), eventsASim_temp['Realized_INEXUS'])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  eventsASim_temp['Realized_INEXUS'] = np.where((eventsASim_temp['mode_choice_actual_BEAM'] == 'bike_transit') & (eventsASim_temp['Realized_INEXUS'].isna()), eventsASim_temp[['BIKE','WALK_LOC','WALK','WALK_LRF','WALK_EXP','WALK_HVY','WALK_COM']].max(axis=1), eventsASim_temp['Realized_INEXUS'])


In [234]:
eventsASim_temp['Realized_INEXUS'] = np.where((eventsASim_temp['mode_choice_actual_BEAM'] == 'drive_transit') & (eventsASim_temp['Realized_INEXUS'].isna()), eventsASim_temp[['DRIVEALONEFREE','DRIVEALONEPAY','SHARED2FREE','SHARED2PAY','SHARED3FREE','SHARED3PAY']].max(axis=1), eventsASim_temp['Realized_INEXUS'])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  eventsASim_temp['Realized_INEXUS'] = np.where((eventsASim_temp['mode_choice_actual_BEAM'] == 'drive_transit') & (eventsASim_temp['Realized_INEXUS'].isna()), eventsASim_temp[['DRIVEALONEFREE','DRIVEALONEPAY','SHARED2FREE','SHARED2PAY','SHARED3FREE','SHARED3PAY']].max(axis=1), eventsASim_temp['Realized_INEXUS'])


In [213]:
eventsASim_temp.head()

Unnamed: 0,IDMerged,tripIndex,actEndTime,actEndType,actStartTime,actStartType,mode_choice_planned_BEAM,mode_choice_actual_BEAM,trip_mode_AS_trips,DRIVEALONEFREE,DRIVEALONEPAY,SHARED2FREE,SHARED2PAY,SHARED3FREE,SHARED3PAY,WALK,BIKE,WALK_LOC,WALK_LRF,WALK_EXP,WALK_HVY,WALK_COM,DRIVE_LOC,DRIVE_LRF,DRIVE_EXP,DRIVE_HVY,DRIVE_COM,TAXI,TNC_SINGLE,TNC_SHARED,Realized_INEXUS
0,4,1513.0,57128.0,Home,59289.0,othdiscr,car,car,DRIVEALONEFREE,-1.753259,-1.765936,-1000.753259,-1000.760503,-1000.753259,-1000.75833,-65.051352,-1019.726757,-1001.60477,-1998.379995,-1998.379995,-2000.403421,-1998.379995,-1000.161266,-1998.426867,-1998.426867,-2000.846282,-1998.426867,-19.500145,-13.346279,-7.273501,-1.753259
1,4,1517.0,61163.0,othdiscr,63873.0,social,car,car,DRIVEALONEPAY,-1.670445,-1.682576,-1000.657486,-1000.664418,-1000.652302,-1000.657155,-56.402102,-1015.402128,-1998.970552,-1998.379995,-1998.379995,-1998.979183,-1998.379995,-1998.426867,-1998.426867,-1998.426867,-1998.426867,-1998.426867,-18.474693,-12.531355,-6.861858,-1.682576
3,8,2889.0,59007.0,Home,59690.0,shopping,car,car,DRIVEALONEFREE,-0.817971,-0.83221,0.558529,0.550393,-999.817971,-999.823666,-13.503713,-1000.857441,-1001.839628,-1998.310958,-1998.310958,-1999.995114,-1998.310958,-1000.318479,-1998.35783,-1998.35783,-1998.35783,-1998.35783,-14.50144,-13.517265,-11.583302,-0.817971
4,8,2893.0,59840.0,shopping,60679.0,Home,car,car,SHARED2FREE,-2.913043,-2.92049,-0.650436,-0.654692,-1000.672493,-1000.675472,-16.840786,-1001.191148,-1000.703633,-1998.310958,-1998.310958,-1998.310958,-1998.310958,-1998.35783,-1998.35783,-1998.35783,-1998.35783,-1998.35783,-16.014501,-14.187358,-11.485441,-0.650436
5,18,6169.0,38566.0,Home,40957.0,shopping,walk_transit,walk_transit,WALK_LOC,-999.568777,-999.571043,-5.314877,-5.316172,-5.688577,-5.689483,-8.532381,-1000.416482,-0.452994,-998.843351,-998.717751,-998.842251,-998.869651,-999.803487,-1998.429423,-1998.429423,-1998.429423,-1998.429423,-9.13062,-8.235243,-6.894363,-0.452994


In [214]:
eventsASim_temp[(eventsASim_temp['Realized_INEXUS'].isna())].shape

(198139, 31)

In [215]:
eventsASim_temp[(eventsASim_temp['Realized_INEXUS'] < -900)].shape

(41987, 31)

In [398]:
eventsASim_temp[(eventsASim_temp['Realized_INEXUS'] < -900)].head(10)

Unnamed: 0,IDMerged,tripIndex,actEndTime,actEndType,actStartTime,actStartType,mode_choice_planned_BEAM,mode_choice_actual_BEAM,trip_mode_AS_trips,DRIVEALONEFREE,DRIVEALONEPAY,SHARED2FREE,SHARED2PAY,SHARED3FREE,SHARED3PAY,WALK,BIKE,WALK_LOC,WALK_LRF,WALK_EXP,WALK_HVY,WALK_COM,DRIVE_LOC,DRIVE_LRF,DRIVE_EXP,DRIVE_HVY,DRIVE_COM,TAXI,TNC_SINGLE,TNC_SHARED,Realized_INEXUS
119,257,84329.0,36194.0,work,37316.0,atwork,car,car,WALK,-1998.231232,-1998.231379,-999.231232,-999.231316,-999.231232,-999.231291,-0.393102,-1998.313522,-999.388514,-1998.098654,-1998.098654,-1998.098654,-1998.098654,-1998.266738,-1998.266738,-1998.266738,-1998.266738,-1998.266738,-8.131405,-8.237605,-8.361245,-999.231232
120,257,84333.0,37466.0,atwork,37641.0,work,car,car,WALK,-1998.231232,-1998.231379,-999.231232,-999.231316,-999.231232,-999.231291,-0.393102,-1998.313522,-999.388514,-1998.098654,-1998.098654,-1998.098654,-1998.098654,-1998.266738,-1998.266738,-1998.266738,-1998.266738,-1998.266738,-8.131405,-8.237605,-8.361245,-999.231232
323,700,229633.0,37346.0,work,39662.0,atwork,car,car,WALK,-999.088188,-999.088283,-999.088188,-999.088242,-999.088188,-999.088226,-0.341314,-1998.21095,-999.766602,-1997.945298,-1997.945298,-1997.945298,-1997.945298,-1998.222061,-1998.222061,-1998.222061,-1998.222061,-1998.222061,-7.795621,-7.513685,-7.804696,-999.088188
324,700,229637.0,47563.0,atwork,48377.0,work,car,car,WALK,-999.896211,-999.896406,-999.617344,-999.617456,-999.505798,-999.505876,-0.403872,-1998.242229,-1000.241696,-1997.945298,-1997.945298,-1997.945298,-1997.945298,-1998.222061,-1998.222061,-1998.222061,-1998.222061,-1998.222061,-8.708262,-8.297894,-8.131228,-999.505798
394,856,280969.0,131800.0,Home,132517.0,othdiscr,hov3_teleportation,hov3_teleportation,BIKE,-1000.42265,-1000.514278,-1000.42265,-1000.475009,-1000.42265,-1000.459302,-27.429583,-3.145419,-1002.437409,-1998.333171,-1998.333171,-1999.616409,-1998.333171,-999.806252,-1998.380043,-1998.380043,-1998.380043,-1998.380043,-22.579017,-18.741263,-13.346698,-1000.42265
395,856,280973.0,132667.0,othdiscr,133487.0,Home,hov3_teleportation,hov3_teleportation,BIKE,-1001.423043,-1001.470952,-1000.791505,-1000.818881,-1000.538889,-1000.558053,-25.284398,-2.930901,-1998.911885,-1998.333171,-1998.333171,-1998.333171,-1998.333171,-1998.380043,-1998.380043,-1998.380043,-1998.380043,-1998.380043,-17.699244,-15.262166,-12.13813,-1000.538889
505,1259,412985.0,36835.0,work,37968.0,othmaint,car,car,WALK,-999.735985,-999.736369,-999.448407,-999.448627,-999.333376,-999.33353,-0.583679,-1998.357858,-999.339269,-1997.996749,-1997.996749,-1997.996749,-1997.996749,-1998.164833,-1998.164833,-1998.164833,-1998.164833,-1998.164833,-8.366296,-9.080646,-8.47881,-999.333376
506,1259,412986.0,38118.0,othmaint,38193.0,escort,car,car,WALK,-1000.417593,-1000.417739,-999.845128,-999.845211,-999.616142,-999.6162,0.067953,-1997.9836,-999.62122,-1997.899864,-1998.178864,-1997.899864,-1997.899864,-1998.090338,-1998.090338,-1998.090338,-1998.090338,-1998.090338,-8.224274,-9.147165,-8.510017,-999.616142
507,1259,412987.0,38973.0,escort,39008.0,atwork,car,car,WALK,-999.683543,-999.683635,-999.398655,-999.398708,-999.2847,-999.284737,0.290658,-1997.790129,-999.567445,-1998.437122,-1998.014627,-1997.735627,-1997.735627,-1998.149586,-1998.149586,-1998.149586,-1998.149586,-1998.149586,-7.917034,-8.786085,-8.201712,-999.2847
508,1259,412989.0,54241.0,atwork,54546.0,work,car,car,WALK,-1006.626007,-1006.629844,-1003.876212,-1003.878404,-1002.776294,-1002.777829,-2.152651,-1998.509576,-999.434695,-1998.521803,-1997.981502,-1997.981502,-1997.981502,-1998.149586,-1998.149586,-1998.149586,-1998.149586,-1998.149586,-14.337606,-14.245464,-11.630568,-1002.776294


In [226]:
eventsASim_temp[eventsASim_temp['Realized_INEXUS'].isna()].head(1000).to_csv("C:/Shared-Work/Data/CleanData/sample_test.csv", index =False)

In [256]:
eventsASim_temp.head(10000).to_csv("C:/Shared-Work/Data/CleanData/sample_test_2.csv", index =False)

In [122]:
# Delete the utilities files downloaded and saved in the system
os.remove('C:/Users/nazanin/Documents/beam-core-analysis/Users/Nazanin/trip_mode_choice.zip')   #the path should be updated
shutil.rmtree('C:/Users/nazanin/Documents/beam-core-analysis/Users/Nazanin/trip_mode_choice')   #the path should be updated