In [1]:
import pandas as pd
import numpy as np

In [2]:
# File location on S3 (The address should be updated depending on the version of the code using)
loc_2018_tripid_modified = "https://beam-outputs.s3.amazonaws.com/pilates-outputs/sfbay-2018-base-20220327/beam/year-2018-iteration-5/ITERS/it.0/"

In [3]:
%%time
# Reading the events file
dtypes = {
    "time": "float32",
    "type": "category",
    "legMode": "category",
    "actType": "category", 
    "primaryFuelLevel": "float64",
    "legMode": "category",
    "chargingPointType":"category",
    "pricingModel":"category",
    "parkingType":"category",
    "mode":"category",
    "personalVehicleAvailable": "category",
    "person": "object",
    "driver": "object",
    "riders": "object"
}

# Use list comprehension to remove the unwanted column in **usecol**
eventsSF = pd.read_csv(loc_2018_tripid_modified + '0.events.csv.gz', compression = 'gzip', dtype = dtypes)



Wall time: 3min 33s


In [4]:
#plansSF = pd.read_csv(loc_2018_tripid + '0.plans.csv.gz', compression = 'gzip', dtype = dtypes)

In [5]:
#plansSF.sort_values(by = "personId").head()

In [4]:
# Show all columns and rows
pd.options.display.max_columns = None
pd.options.display.max_rows = None

In [5]:
# Rename the "mode" column
eventsSF.rename(columns={"mode":"modeBEAM"}, inplace=True) 
# Replace "Work" with "work" in the "actType" column
eventsSF["actType"].replace({"Work": "work"}, inplace=True)

In [6]:
# Remove person = TransitDriver or RidehailDriver because there are no agent information in these rows
eventsSF = eventsSF[~eventsSF.person.str.contains("Agent", na=False)].reset_index(drop=True)

In [7]:
eventsSFCopy = eventsSF.copy()

In [8]:
# shift column 'Name' to first position
first_column = eventsSF.pop('person')
second_column = eventsSF.pop('driver')
third_column = eventsSF.pop('riders')
# insert column using insert(position,column_name,first_column) function
eventsSF.insert(0, 'person', first_column)
eventsSF.insert(1, 'driver', second_column)
eventsSF.insert(2, 'riders', third_column)

In [9]:
# Adding the IDMerged Column
eventsSF['UniqueID'] = eventsSF['person'] #make a copy of the person column
eventsSF['personID'] = np.where(eventsSF['person'].isin(eventsSF['driver']), eventsSF['person'], np.nan) 
eventsSF['driverID'] = np.where(eventsSF['driver'].isin(eventsSF['person']), eventsSF['driver'], np.nan)

In [10]:
# Merging person and driver ids in one column
eventsSF['IDMerged'] = eventsSF['personID'].combine_first(eventsSF['driverID'])
eventsSF['IDMerged'] = eventsSF['UniqueID'].combine_first(eventsSF['IDMerged'])

In [11]:
# Dropping unused columns
eventsSF = eventsSF.drop(['personID','driverID','UniqueID'], axis=1) 

In [12]:
# Shift column 'Name' to first position
first_column = eventsSF.pop('IDMerged')
# Insert column using insert(position,column_name,first_column) function
eventsSF.insert(0, 'IDMerged', first_column)

In [13]:
%%time
# Split the "riders' column and replicated rows for every rider
eventsSF['riders'] = eventsSF['riders'].str.split(':')
eventsSF = eventsSF.explode('riders')

Wall time: 1min 42s


In [14]:
# Combine riderID with IDMerged
eventsSF['riderID'] = np.where(eventsSF['riders'].isin(eventsSF['person']), eventsSF['riders'], np.nan)
eventsSF['IDMerged'] = eventsSF['riderID'].combine_first(eventsSF['IDMerged'])

In [15]:
# Dropping unused columns
eventsSF = eventsSF.drop(['riderID'], axis=1) 

In [16]:
# Either charging events or just ridehail or transit drivers
IDnan = eventsSF[eventsSF['IDMerged'].isna()]

In [17]:
# Remove driver = TransitDriver or RidehailDriver for IDMerged = NAN because there are no agent information in these rows 
eventsSF = eventsSF[~((eventsSF.driver.str.contains("Agent", na=False))&(eventsSF.IDMerged.isna()))].reset_index(drop=True)

In [18]:
IDnan = eventsSF[eventsSF['IDMerged'].isna()]
IDnan.shape

(3442, 63)

In [19]:
%%time
# Filling NANs in ID related to charging events
eventsSF["chargeID"] = eventsSF.groupby('vehicle')['IDMerged'].transform(lambda x: x.ffill().bfill())

Wall time: 4min 35s


In [20]:
# Combining chargeID with IDMerged so no NANs anymore
eventsSF['IDMerged'] = eventsSF['chargeID'].combine_first(eventsSF['IDMerged'])

In [21]:
# Dropping unused columns
eventsSF = eventsSF.drop(['chargeID'], axis=1) 

In [22]:
print(eventsSF.currentTourMode.unique().tolist())

['car', 'hov2_teleportation', nan, 'car_hov3', 'bike', 'walk', 'hov3_teleportation', 'car_hov2', 'walk_transit', 'ride_hail', 'drive_transit', 'ride_hail_pooled', 'ride_hail_transit', 'bike_transit']


In [23]:
print(eventsSF.modeBEAM.unique().tolist())

['car', 'hov2_teleportation', nan, 'walk', 'car_hov3', 'bike', 'hov3_teleportation', 'car_hov2', 'walk_transit', 'ride_hail', 'drive_transit', 'ride_hail_pooled', 'ride_hail_transit', 'bus', 'tram', 'subway', 'rail', 'cable_car', 'bike_transit']


In [24]:
print(eventsSF.type.unique().tolist())

['ModeChoice', 'actend', 'departure', 'PersonEntersVehicle', 'TeleportationEvent', 'arrival', 'actstart', 'PathTraversal', 'LeavingParkingEvent', 'ParkingEvent', 'PersonCost', 'PersonLeavesVehicle', 'ReserveRideHail', 'ChargingPlugInEvent', 'RefuelSessionEvent', 'ChargingPlugOutEvent', 'Replanning']


In [25]:
# Change the IDMerged column type to numeric
eventsSF["IDMerged"] = pd.to_numeric(eventsSF.IDMerged)

In [26]:
# Sort by IDMerged and time columns
eventsSF = eventsSF.sort_values(['IDMerged','time']).reset_index(drop=True)

In [27]:
# We assume that the number of passengers is 1 for ride_hail_pooled
eventsSF['modeBEAM_rh'] = np.where(eventsSF.driver.str.contains("rideHailAgent", na=False), 'ridehail' , eventsSF['modeBEAM'])

In [28]:
# Adding teleportation mode to the type = TeleportationEvent row 
eventsSF["modeBEAM_rh"] = np.where(eventsSF['type']=='TeleportationEvent', eventsSF.modeBEAM_rh.fillna(method='ffill'), eventsSF["modeBEAM_rh"])

In [29]:
eventsSF['modeBEAM_rh_pooled'] = np.where((eventsSF['type'] == 'PersonCost') & (eventsSF['modeBEAM'] == 'ride_hail_pooled'), 'ride_hail_pooled', np.nan)

In [30]:
eventsSF['modeBEAM_rh_ride_hail_transit'] = np.where((eventsSF['type'] == 'PersonCost') & (eventsSF['modeBEAM'] == 'ride_hail_transit'), 'ride_hail_transit', np.nan)

In [31]:
eventsSF['modeBEAM_rh_pooled'] = eventsSF['modeBEAM_rh_pooled'].shift(+1)

In [32]:
eventsSF['modeBEAM_rh_ride_hail_transit'] = eventsSF['modeBEAM_rh_ride_hail_transit'].shift(+1)

In [33]:
eventsSF['modeBEAM_rh'] = np.where((eventsSF['type'] == 'PathTraversal') & (eventsSF['modeBEAM'] == 'car') & (eventsSF['driver'].str.contains("rideHailAgent", na=False)) & (eventsSF['modeBEAM_rh_pooled'].notna()), eventsSF['modeBEAM_rh_pooled'], eventsSF['modeBEAM_rh'])

In [34]:
# We don't know if ridehail_transit is ride_hail or ride_hail_pooled
eventsSF['modeBEAM_rh'] = np.where((eventsSF['type'] == 'PathTraversal') & (eventsSF['modeBEAM'] == 'car') & (eventsSF['driver'].str.contains("rideHailAgent", na=False)) & (eventsSF['modeBEAM_rh_ride_hail_transit'].notna()), eventsSF['modeBEAM_rh_ride_hail_transit'], eventsSF['modeBEAM_rh'])

In [35]:
# Dropping the temporary columns
eventsSF = eventsSF.drop(['modeBEAM_rh_pooled'], axis=1)
eventsSF = eventsSF.drop(['modeBEAM_rh_ride_hail_transit'], axis=1)

In [38]:
#eventsSF[eventsSF['IDMerged'] == 11024]

In [39]:
dist = eventsSF.head(55000)

In [39]:
dist.to_csv('C:/Shared-Work/Data/CleanData/dist.csv', index = False)

In [56]:
#eventsSF[eventsSF['modeBEAM'] == "ride_hail_pooled"].head()

#### Adding new columns

In [36]:
eventsSF['actEndTime'] = np.where(eventsSF['type']=='actend'
                     , eventsSF['time'], np.nan)

In [37]:
eventsSF['actStartTime'] = np.where(eventsSF['type']=='actstart'
                     , eventsSF['time'], np.nan)    

In [38]:
eventsSF['travelTime'] = np.where((eventsSF['type']=='PathTraversal')|(eventsSF['type']=='TeleportationEvent')
                     , eventsSF['arrivalTime'] - eventsSF['departureTime'], np.nan)

In [39]:
eventsSF['travelDistance'] = np.where((eventsSF['type']=='PathTraversal')|((eventsSF['type']=='ModeChoice')&((eventsSF['modeBEAM']=='hov2_teleportation')|(eventsSF['modeBEAM']=='hov3_teleportation'))), eventsSF['length'], np.nan)

In [40]:
eventsSF['length_mode_choice'] = np.where(eventsSF['type']=='ModeChoice', eventsSF['length'], np.nan)

In [41]:
eventsSF['duration_walking'] = np.where(eventsSF['modeBEAM']=='walk', eventsSF['travelTime'], np.nan)

In [42]:
eventsSF['distance_walking'] = np.where(eventsSF['modeBEAM']=='walk', eventsSF['travelDistance'], np.nan)

In [43]:
eventsSF['duration_on_bike'] = np.where(eventsSF['modeBEAM']=='bike', eventsSF['travelTime'], np.nan)

In [44]:
eventsSF['distance_bike'] = np.where(eventsSF['modeBEAM']=='bike', eventsSF['travelDistance'], np.nan)

In [45]:
eventsSF['duration_in_ridehail'] = np.where(eventsSF['modeBEAM_rh']=='ride_hail', eventsSF['travelTime'], np.nan)

In [46]:
eventsSF['distance_ridehail'] = np.where(eventsSF['modeBEAM_rh']=='ride_hail', eventsSF['travelDistance'], np.nan)

In [47]:
eventsSF['duration_in_privateCar'] = np.where((eventsSF['modeBEAM_rh']=='car')|(eventsSF['modeBEAM_rh']=='car_hov3')|(eventsSF['modeBEAM_rh']=='car_hov2')|
                                              (eventsSF['modeBEAM_rh']=='hov2_teleportation')|(eventsSF['modeBEAM_rh']=='hov3_teleportation') 
                                              , eventsSF['travelTime'], np.nan)

In [48]:
eventsSF['distance_privateCar'] = np.where((eventsSF['modeBEAM_rh']=='car')|(eventsSF['modeBEAM_rh']=='car_hov3')|(eventsSF['modeBEAM_rh']=='car_hov2')|
                                              (eventsSF['modeBEAM_rh']=='hov2_teleportation')|(eventsSF['modeBEAM_rh']=='hov3_teleportation'), eventsSF['travelDistance'], np.nan)

In [49]:
eventsSF['duration_in_transit'] = np.where((eventsSF['modeBEAM']=='bike_transit')|(eventsSF['modeBEAM']=='drive_transit')|
                                           (eventsSF['modeBEAM']=='walk_transit')|(eventsSF['modeBEAM']=='bus')|
                                           (eventsSF['modeBEAM']=='tram')|(eventsSF['modeBEAM']=='subway')|
                                           (eventsSF['modeBEAM']=='rail')|(eventsSF['modeBEAM']=='cable_car')|
                                           (eventsSF['modeBEAM']=='ride_hail_transit'), eventsSF['travelTime'], np.nan)

In [50]:
eventsSF['distance_transit'] = np.where((eventsSF['modeBEAM']=='bike_transit')|(eventsSF['modeBEAM']=='drive_transit')|
                                        (eventsSF['modeBEAM']=='walk_transit')|(eventsSF['modeBEAM']=='bus')|
                                        (eventsSF['modeBEAM']=='tram')|(eventsSF['modeBEAM']=='subway')|
                                        (eventsSF['modeBEAM']=='rail')|(eventsSF['modeBEAM']=='cable_car')|
                                        (eventsSF['modeBEAM']=='ride_hail_transit'), eventsSF['travelDistance'], np.nan)

In [51]:
# Removing the extra tour index happening after replanning events
eventsSF['replanningTime'] = np.where(eventsSF['type'] == 'Replanning', eventsSF['time'], np.nan)
eventsSF['replanningTime'] = eventsSF['replanningTime'].shift(+1)
eventsSF['tourIndex_fixed'] = np.where((eventsSF['type'] == 'ModeChoice')&(eventsSF['replanningTime'].notna()), np.nan, eventsSF['tourIndex'])

In [56]:
#eventsSF['fuelFood'] = np.where((eventsSF['type']=='PathTraversal')&(eventsSF['primaryFuelType']=='Food'), 
                                #eventsSF['primaryFuel'], np.nan)

In [52]:
#eventsSF['fuelElectricity'] = np.where((eventsSF['type']=='PathTraversal')&(eventsSF['primaryFuelType']=='Electricity'), 
                                #eventsSF['primaryFuel'], np.nan)

In [53]:
#eventsSF['fuelDiesel'] = np.where((eventsSF['type']=='PathTraversal')&(eventsSF['primaryFuelType']=='Diesel'), 
                                #eventsSF['primaryFuel'], np.nan)

In [54]:
#eventsSF['fuelBiodiesel'] = np.where((eventsSF['type']=='PathTraversal')&(eventsSF['primaryFuelType']=='Biodiesel'), 
                                #eventsSF['primaryFuel'], np.nan)

In [55]:
#eventsSF['fuel_not_Food'] = np.where((eventsSF['type']=='PathTraversal')&(eventsSF['primaryFuelType']!='Food')
                            #, eventsSF['primaryFuel']+eventsSF['secondaryFuel'], np.nan)

In [57]:
#eventsSF['fuelGasoline'] = np.where((eventsSF['type']=='PathTraversal')&((eventsSF['primaryFuelType']=='Gasoline')|(eventsSF['secondaryFuelType']=='Gasoline')), 
                           #eventsSF['primaryFuel']+eventsSF['secondaryFuel'], np.nan)

In [52]:
eventsSF['actEndType'] = np.where(eventsSF['type']=='actend', eventsSF['actType'], "")

In [53]:
eventsSF['actStartType'] = np.where(eventsSF['type']=='actstart', eventsSF['actType'], "")

In [60]:
#eventsSF[(eventsSF['modeBEAM_rh'] == 'ride_hail_pooled')|(eventsSF['type'] == 'Replanning')].head()

#### Trip Index

In [54]:
eventsSF["tripIndex"] = eventsSF.groupby("IDMerged")["tourIndex_fixed"].rank(method="first", ascending=True)
eventsSF["tripIndex"] = eventsSF.tripIndex.fillna(method='ffill')

In [55]:
# Make a new column which determined mode choice numbers for each person and trip
s = eventsSF.groupby(['IDMerged','tripIndex', 'type']).cumcount().add(1).astype(str).str.zfill(2)

In [56]:
eventsSF['type_number'] = eventsSF['type'].astype(str) + s.astype(str)

In [57]:
eventsSF['mode_choice_planned_BEAM'] = np.where(eventsSF['type_number'] == 'ModeChoice01' , eventsSF['modeBEAM_rh'], np.nan)

In [58]:
eventsSF['mode_choice_replanned_BEAM'] = np.where(eventsSF['type_number'] == 'ModeChoice02', eventsSF['modeBEAM_rh'], np.nan)

In [59]:
eventsSF['replanning_status'] = np.where(eventsSF['type'] == 'Replanning', 1, 0)

In [None]:
#eventsSF[eventsSF['type'] == "Replanning"].tail(16)

In [None]:
num_pass = eventsSF[eventsSF['IDMerged']==280]

In [174]:
num_pass.to_csv('C:/Shared-Work/Data/CleanData/num_pass.csv', index = False)

In [128]:
#eventsSF["mode_choice_replanned_BEAM"] = eventsSF.groupby(['IDMerged','tripIndex'])['mode_choice_replanned_BEAM'].transform(lambda x: x.bfill())

In [68]:
eventsSF['mode_choice_actual_BEAM'] = eventsSF['mode_choice_replanned_BEAM'].combine_first(eventsSF['mode_choice_planned_BEAM'])

In [82]:
#eventsSF['mode_choice_actual_BEAM'] = eventsSF['mode_choice_actual_BEAM'].combine_first(eventsSF['modeBEAM_rh'])

In [None]:
#eventsSF['mode_planned_tour_ActSim'] = np.where(eventsSF['type'] == 'PathTraversal', eventsSF['currentTourMode'], np.nan)

In [175]:
eventsSF.head(10000).to_csv('C:/Shared-Work/Data/CleanData/eventsNew_check.csv', index = False)   

In [98]:
#eventsSF['numPassengers_agg'] = eventsSF.groupby(['IDMerged','tripIndex'], as_index=False).numPassengers.agg(lambda x: ', '.join(set(x.dropna().astype(str))))

In [60]:
# Rename the "netCost" column
eventsSF.rename(columns={"netCost":"cost_BEAM"}, inplace=True) 

In [None]:
#Change column type to string
#eventsSF['reason'] = eventsSF['reason'].astype(str)

In [None]:
%%time
#eventsSF['reason'].replace('nan', np.NaN)

Wall time: 880 ms


In [70]:
eventsSF.reason.unique()

array([nan, 'ResourceCapacityExhausted WALK_TRANSIT',
       'ResourceUnavailable RIDE_HAIL',
       'ResourceUnavailable RIDE_HAIL_POOLED',
       'ResourceCapacityExhausted RIDE_HAIL_TRANSIT',
       'MissedTransitPickup DRIVE_TRANSIT',
       'ResourceCapacityExhausted DRIVE_TRANSIT',
       'ResourceUnavailable RIDE_HAIL_TRANSIT',
       'MissedTransitPickup RIDE_HAIL_TRANSIT'], dtype=object)

In [61]:
%%time
Person_Trip_eventsSF = pd.pivot_table(
   eventsSF,
   index=['IDMerged','tripIndex'],
   aggfunc={'actStartTime': np.sum, 'actEndTime': np.sum, 'travelTime': np.sum, 'cost_BEAM': np.sum, 'actStartType': np.sum, 
            'actEndType': np.sum, 'duration_walking': np.sum, 'duration_in_privateCar': np.sum, 'duration_on_bike': np.sum, 
            'duration_in_ridehail': np.sum, 'travelDistance': np.sum, 'duration_in_transit': np.sum, 'distance_walking': np.sum, 
            'distance_bike': np.sum, 'distance_ridehail': np.sum, 'distance_privateCar': np.sum, 'distance_transit': np.sum, 
            'legVehicleIds': np.sum, 'mode_choice_planned_BEAM':np.sum,
            'tripId': np.sum, 'vehicle': lambda x: ', '.join(set(x.dropna().astype(str))),
            'numPassengers': lambda x: ', '.join(list(x.dropna().astype(str))),
            'length_mode_choice': np.sum, 'replanning_status': np.sum, 
            'reason': lambda x: ', '.join(list(x.dropna().astype(str)))}).reset_index()

#'numPassengers': lambda x: ', '.join(set(x.dropna().astype(str)))
#'mode_choice_actual_BEAM':lambda x: ', '.join(set(x.dropna().astype(str))) #
#'fuelFood': np.sum, 'fuelElectricity': np.sum, 'fuelBiodiesel': np.sum, 'fuelDiesel': np.sum, 'fuel_not_Food': np.sum, 'fuelGasoline': np.sum,
#'modeBEAM_rh': lambda x: ', '.join(list(x.dropna().astype(str))), 

Wall time: 12min 19s


In [62]:
Person_Trip_eventsSF['door_to_door_time'] = Person_Trip_eventsSF['actStartTime'] - Person_Trip_eventsSF['actEndTime'] 

In [63]:
Person_Trip_eventsSF['waitTime'] = Person_Trip_eventsSF['door_to_door_time'] - Person_Trip_eventsSF['travelTime'] 

In [64]:
Person_Trip_eventsSF['actPurpose'] = Person_Trip_eventsSF['actEndType'].astype(str) + "_to_" + Person_Trip_eventsSF['actStartType'].astype(str)

In [71]:
#Person_Trip_eventsSF.rename(columns={"modeBEAM_rh":"mode_BEAM_list"}, inplace=True) 

In [65]:
Person_Trip_eventsSF.rename(columns={"legVehicleIds":"legVehicleIds_estimate"}, inplace=True) 

In [66]:
Person_Trip_eventsSF.rename(columns={"vehicle":"vehicleIds"}, inplace=True) 

In [89]:
eventsSF[eventsSF['type'] == 'Replanning'].head()

Unnamed: 0,IDMerged,person,driver,riders,vehicle,time,type,modeBEAM,currentTourMode,expectedMaximumUtility,availableAlternatives,location,personalVehicleAvailable,length,tourIndex,legModes,legVehicleIds,currentActivity,nextActivity,tripId,reason,link,legMode,facility,actType,price,shiftStatus,parkingTaz,chargingPointType,pricingModel,parkingType,locationY,locationX,parkingZoneId,fuel,duration,vehicleType,endY,endX,startY,startX,arrivalTime,departureTime,secondaryFuelLevel,primaryFuelLevel,cost,toStopIndex,fromStopIndex,seatingCapacity,tollPaid,capacity,linkTravelTime,secondaryFuel,secondaryFuelType,primaryFuelType,links,numPassengers,primaryFuel,departTime,score,incentive,tollCost,cost_BEAM,modeBEAM_rh,actEndTime,actStartTime,travelTime,travelDistance,duration_walking,distance_walking,duration_on_bike,distance_bike,duration_in_ridehail,distance_ridehail,duration_in_privateCar,distance_privateCar,duration_in_transit,distance_transit,replanningTime,tourIndex_fixed,fuelFood,fuelElectricity,fuelDiesel,fuelBiodiesel,fuel_not_Food,fuelGasoline,actEndType,actStartType,tripIndex,type_number,mode_choice_planned_BEAM,mode_choice_replanned_BEAM,mode_choice_actual_BEAM
519,109,109,,,,42447.0,Replanning,,,,,,,,,,,,,,ResourceCapacityExhausted WALK_TRANSIT,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2.0,Replanning01,,walk_transit,walk_transit
522,109,109,,,,42453.0,Replanning,,,,,,,,,,,,,,ResourceCapacityExhausted WALK_TRANSIT,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2.0,Replanning02,,,
525,109,109,,,,42459.0,Replanning,,,,,,,,,,,,,,ResourceCapacityExhausted WALK_TRANSIT,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2.0,Replanning03,,,
1299,319,319,,,,30720.0,Replanning,,,,,,,,,,,,,,ResourceCapacityExhausted WALK_TRANSIT,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.0,Replanning01,,walk_transit,walk_transit
1302,319,319,,,,30976.0,Replanning,,,,,,,,,,,,,,ResourceCapacityExhausted WALK_TRANSIT,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.0,Replanning02,,,


In [111]:
b = Person_Trip_eventsSF[Person_Trip_eventsSF['IDMerged'] == 108]

In [69]:
Person_Trip_eventsSF.to_csv('C:/Shared-Work/Data/CleanData/PivotTables/PersonTrip_SF_Baseline_2018.csv', index = False)   

In [116]:
sample_bike2 = eventsSF[(eventsSF['primaryFuelType'] == "Gasoline") & (eventsSF['modeBEAM'] == "bike") ]

In [112]:
b.to_csv('C:/Shared-Work/Data/CleanData/b.csv', index = False)

#### Merging with activity sim persons and housholds files

In [67]:
actloc_2018 =  "https://beam-outputs.s3.amazonaws.com/pilates-outputs/sfbay-2018-base-20220327/activitysim/"

households = pd.read_csv(actloc_2018 + 'final_households.csv')
persons = pd.read_csv(actloc_2018 + 'final_persons.csv')
tours = pd.read_csv(actloc_2018 +'final_tours.csv')
plans = pd.read_csv(actloc_2018 +'final_plans.csv')
trips = pd.read_csv(actloc_2018 + 'final_trips.csv')

In [73]:
# Merge households and persons 
persons = persons.sort_values(by=['household_id'])
households = households.sort_values(by=['household_id'])
hhpersons = pd.merge(left=persons, right=households, how='left', on='household_id')
#hhpersons = pd.merge(left=persons, right=households, how='left', on='household_id', suffixes=('', '_drop'))
#hhpersons.drop([col for col in hhpersons.columns if 'drop' in col], axis=1, inplace=True)

In [80]:
# Merge tours, households and persons
tours = tours.sort_values(by=['person_id'])
hhpersons = hhpersons.sort_values(by=['person_id'])
hhperTours = pd.merge(left=tours, right=hhpersons, how='left', on='person_id')
#hhperTours = pd.merge(left=tours, right=hhpersons, how='left', on='person_id', suffixes=('', '_drop'))
#hhperTours.drop([col for col in hhperTours.columns if 'drop' in col], axis=1, inplace=True)

In [81]:
# Merge trips, tours, households and persons
trips = trips.sort_values(by=['person_id', 'tour_id'])
hhperTours = hhperTours.sort_values(by=['person_id','tour_id'])
tourTripsMerged = pd.merge(left=trips, right=hhperTours, how='left', on=['person_id','tour_id'])
#tourTripsMerged = pd.merge(left=trips, right=hhperTours, how='left', on=['person_id','tour_id'], suffixes=('', '_drop'))
#tourTripsMerged.drop([col for col in tourTripsMerged.columns if 'drop' in col], axis=1, inplace=True)

In [84]:
trips.head()

Unnamed: 0,trip_id,person_id,household_id,tour_id,primary_purpose,trip_num,outbound,trip_count,purpose,destination,origin,destination_logsum,depart,trip_mode,mode_choice_logsum
508286,577,1,1735309,72,school,1,True,2,othmaint,583,641,6.776413,8.0,SHARED3FREE,-1.948476
508287,578,1,1735309,72,school,2,True,2,school,634,583,,8.0,SHARED2FREE,-1.628539
508288,581,1,1735309,72,school,1,False,1,Home,641,634,,15.0,SHARED3FREE,-1.691244
508289,689,2,1735309,86,atwork,1,True,1,atwork,700,700,,12.0,DRIVEALONEFREE,-0.328882
508290,693,2,1735309,86,atwork,1,False,1,Work,700,700,,13.0,DRIVEALONEPAY,-0.329928


In [86]:
tourTripsMerged = tourTripsMerged.sort_values(by=['person_id', 'trip_id'])
Person_Trip_eventsSF = Person_Trip_eventsSF.sort_values(by=['IDMerged','tripId'])
eventsASim = pd.merge(left=Person_Trip_eventsSF, right=tourTripsMerged, how='left', left_on=["IDMerged", 'tripId'], right_on=['person_id', 'trip_id'])
#eventsASim = pd.merge(left=Person_Trip_eventsSF, right=tourTripsMerged, how='left',left_on = ["IDMerged", 'tripId'] , right_on=['person_id', 'trip_id'], suffixes=('', '_drop'))
#eventsASim.drop([col for col in eventsASim.columns if 'drop' in col], axis=1, inplace=True)

In [94]:
eventsASim.head()

Unnamed: 0,IDMerged,tripIndex,actEndTime,actEndType,actStartTime,actStartType,cost_BEAM,distance_bike,distance_privateCar,distance_ridehail,distance_transit,distance_walking,duration_in_privateCar,duration_in_ridehail,duration_in_transit,duration_on_bike,duration_walking,legVehicleIds_estimate,length_mode_choice,mode_choice_planned_BEAM,numPassengers,reason,replanning_status,travelDistance,travelTime,tripId,vehicleIds,door_to_door_time,waitTime,actPurpose,trip_id,person_id,household_id,tour_id,primary_purpose_x,trip_num,outbound,trip_count,purpose,destination_x,origin_x,destination_logsum_x,depart,trip_mode_AS_trips,logsum_trip_mode_AS_trips,tour_type,tour_type_count,tour_type_num,tour_num,tour_count,tour_category,number_of_participants,destination_y,origin_y,household_id_x,tdd,start,end,duration,composition,destination_logsum_y,tour_mode_AS_tours,logsum_tours_mode_AS_tours,atwork_subtour_frequency,parent_tour_id,stop_frequency,primary_purpose_y,earning,worker,student,hispanic.1,person_sex,PNUM,sex,household_id_y,hours,MAR,person_age,edu,school_zone_id,race_id,race,work_at_home,age,work_zone_id,p_hispanic,relate,hispanic,TAZ_x,ptype,pemploy,pstudent,home_x,home_y,age_16_to_19,age_16_p,adult,male,female,has_non_worker,has_retiree,has_preschool_kid,has_driving_kid,has_school_kid,has_full_time,has_part_time,has_university,student_is_employed,nonstudent_to_school,is_student,is_gradeschool,is_highschool,is_university,school_segment,is_worker,home_taz,value_of_time,school_taz,distance_to_school,roundtrip_auto_time_to_school,workplace_taz,workplace_location_logsum,distance_to_work,workplace_in_cbd,work_taz_area_type,roundtrip_auto_time_to_work,work_auto_savings,work_auto_savings_ratio,free_parking_at_work,cdap_activity,travel_active,under16_not_at_school,has_preschool_kid_at_home,has_school_kid_at_home,mandatory_tour_frequency,work_and_school_and_worker,work_and_school_and_student,num_mand,num_work_tours,num_joint_tours,non_mandatory_tour_frequency,num_non_mand,num_escort_tours,num_eatout_tours,num_shop_tours,num_maint_tours,num_discr_tours,num_social_tours,num_non_escort_tours,gt55,seniors,VEHICL,lcm_county_id,hh_children,block_id,gt2,hispanic_head,age_of_head,race_of_head,tenure_mover,hh_size,sf_detached,tenure,hh_cars,income,hh_age_of_head,serialno,num_workers,hh_race_of_head,hh_income,recent_mover,hh_workers,hispanic_status_of_head,hh_seniors,hhsize,hh_type,TAZ_y,HHT,sample_rate,chunk_id,income_in_thousands,income_segment,median_value_of_time,hh_value_of_time,num_non_workers,num_drivers,num_adults,num_children,num_young_children,num_children_5_to_15,num_children_16_to_17,num_college_age,num_young_adults,non_family,family,home_is_urban,home_is_rural,auto_ownership,hh_work_auto_savings_ratio,num_under16_not_at_school,num_travel_active,num_travel_active_adults,num_travel_active_preschoolers,num_travel_active_children,num_travel_active_non_preschoolers,participates_in_jtf_model,joint_tour_frequency,num_hh_joint_tours
0,1,1.0,29142.0,Home,29396.0,othmaint,0.0,0.0,4021.422,0.0,0.0,0.0,254.0,0.0,0.0,0.0,0.0,"body-1,teleportationSharedVehicle-1-1,body-1",4021.422,hov3_teleportation,,,0,4021.422,254.0,577.0,,254.0,0.0,Home_to_othmaint,577.0,1.0,1735309.0,72.0,school,1.0,True,2.0,othmaint,583.0,641.0,6.776413,8.0,SHARED3FREE,-1.948476,school,1.0,1.0,1.0,1.0,mandatory,1.0,634.0,641.0,1735309.0,61.0,8.0,15.0,7.0,,,SHARED3FREE,-0.87567,,,1out_0in,school,0.0,0.0,1.0,0.0,male,3.0,1.0,1735309.0,0.0,5.0,19 and under,1.0,633.0,6.0,asian,0.0,3.0,-1.0,no,2.0,0.0,641.0,8.0,4.0,1.0,-121.80644,37.350304,False,False,False,True,False,False,False,True,False,False,True,False,False,False,False,True,True,False,False,1.0,False,641.0,19.798008,634.0,1.505897,21.066668,-1.0,,,False,,0.0,0.0,0.0,False,M,True,False,False,False,school1,False,False,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,6085.0,yes,60855040000000.0,1.0,no,35.0,6.0,own not recent,four or more,yes,1.0,two or more,240000.0,gt35-lt65,2010000000000.0,2.0,asian,gt150,0.0,two or more,0.0,no,4.0,3.0,641.0,4.0,0.097,60138.0,240.0,4.0,12.86,29.682171,2.0,2.0,2.0,2.0,2.0,0.0,0.0,0.0,1.0,True,False,False,False,2.0,1.392788,0.0,4.0,2.0,2.0,2.0,2.0,True,0_tours,0.0
1,1,2.0,31741.0,othmaint,31867.0,school,0.0,0.0,2169.162,0.0,0.0,0.0,126.0,0.0,0.0,0.0,0.0,"body-1,teleportationSharedVehicle-2-1,body-1",2169.162,hov3_teleportation,,,0,2169.162,126.0,578.0,,126.0,0.0,othmaint_to_school,578.0,1.0,1735309.0,72.0,school,2.0,True,2.0,school,634.0,583.0,,8.0,SHARED2FREE,-1.628539,school,1.0,1.0,1.0,1.0,mandatory,1.0,634.0,641.0,1735309.0,61.0,8.0,15.0,7.0,,,SHARED3FREE,-0.87567,,,1out_0in,school,0.0,0.0,1.0,0.0,male,3.0,1.0,1735309.0,0.0,5.0,19 and under,1.0,633.0,6.0,asian,0.0,3.0,-1.0,no,2.0,0.0,641.0,8.0,4.0,1.0,-121.80644,37.350304,False,False,False,True,False,False,False,True,False,False,True,False,False,False,False,True,True,False,False,1.0,False,641.0,19.798008,634.0,1.505897,21.066668,-1.0,,,False,,0.0,0.0,0.0,False,M,True,False,False,False,school1,False,False,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,6085.0,yes,60855040000000.0,1.0,no,35.0,6.0,own not recent,four or more,yes,1.0,two or more,240000.0,gt35-lt65,2010000000000.0,2.0,asian,gt150,0.0,two or more,0.0,no,4.0,3.0,641.0,4.0,0.097,60138.0,240.0,4.0,12.86,29.682171,2.0,2.0,2.0,2.0,2.0,0.0,0.0,0.0,1.0,True,False,False,False,2.0,1.392788,0.0,4.0,2.0,2.0,2.0,2.0,True,0_tours,0.0
2,1,3.0,52106.0,school,52298.0,Home,0.0,0.0,2680.151,0.0,0.0,0.0,192.0,0.0,0.0,0.0,0.0,"body-1,teleportationSharedVehicle-3-1,body-1",2680.151,hov3_teleportation,,,0,2680.151,192.0,581.0,,192.0,0.0,school_to_Home,581.0,1.0,1735309.0,72.0,school,1.0,False,1.0,Home,641.0,634.0,,15.0,SHARED3FREE,-1.691244,school,1.0,1.0,1.0,1.0,mandatory,1.0,634.0,641.0,1735309.0,61.0,8.0,15.0,7.0,,,SHARED3FREE,-0.87567,,,1out_0in,school,0.0,0.0,1.0,0.0,male,3.0,1.0,1735309.0,0.0,5.0,19 and under,1.0,633.0,6.0,asian,0.0,3.0,-1.0,no,2.0,0.0,641.0,8.0,4.0,1.0,-121.80644,37.350304,False,False,False,True,False,False,False,True,False,False,True,False,False,False,False,True,True,False,False,1.0,False,641.0,19.798008,634.0,1.505897,21.066668,-1.0,,,False,,0.0,0.0,0.0,False,M,True,False,False,False,school1,False,False,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,6085.0,yes,60855040000000.0,1.0,no,35.0,6.0,own not recent,four or more,yes,1.0,two or more,240000.0,gt35-lt65,2010000000000.0,2.0,asian,gt150,0.0,two or more,0.0,no,4.0,3.0,641.0,4.0,0.097,60138.0,240.0,4.0,12.86,29.682171,2.0,2.0,2.0,2.0,2.0,0.0,0.0,0.0,1.0,True,False,False,False,2.0,1.392788,0.0,4.0,2.0,2.0,2.0,2.0,True,0_tours,0.0
3,2,2.0,44157.0,work,46155.0,atwork,3.757355,0.0,34257.62,0.0,0.0,786.423,1393.0,0.0,0.0,0.0,605.0,"body-2,539168,539168,body-2",35044.043,car,"1.0, 0.0, 0.0, 1.0",,0,35044.043,1998.0,689.0,"539168, body-2",1998.0,0.0,work_to_atwork,689.0,2.0,1735309.0,86.0,atwork,1.0,True,1.0,atwork,700.0,700.0,,12.0,DRIVEALONEFREE,-0.328882,eat,1.0,1.0,1.0,1.0,atwork,1.0,700.0,700.0,1735309.0,113.0,12.0,13.0,1.0,,16.164404,DRIVEALONEFREE,-0.243054,,121.0,0out_0in,atwork,77000.0,1.0,0.0,0.0,female,2.0,2.0,1735309.0,40.0,1.0,20 to 35,21.0,-1.0,6.0,asian,0.0,30.0,700.0,no,1.0,0.0,641.0,1.0,1.0,3.0,-121.80644,37.350304,False,True,True,False,True,False,False,True,False,False,True,False,False,False,False,False,False,False,False,0.0,True,641.0,29.682171,-1.0,,0.0,700.0,13.76163,17.63173,False,4.0,54.783333,725.3979,1.0,False,M,True,False,False,False,work1,False,False,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,6085.0,yes,60855040000000.0,1.0,no,35.0,6.0,own not recent,four or more,yes,1.0,two or more,240000.0,gt35-lt65,2010000000000.0,2.0,asian,gt150,0.0,two or more,0.0,no,4.0,3.0,641.0,4.0,0.097,60138.0,240.0,4.0,12.86,29.682171,2.0,2.0,2.0,2.0,2.0,0.0,0.0,0.0,1.0,True,False,False,False,2.0,1.392788,0.0,4.0,2.0,2.0,2.0,2.0,True,0_tours,0.0
4,2,3.0,47556.0,atwork,49279.0,work,0.690007,0.0,6291.127,0.0,0.0,1791.239,345.0,0.0,0.0,0.0,1378.0,"body-2,539168,539168,body-2",8082.366,car,"1.0, 0.0, 0.0, 1.0",,0,8082.366,1723.0,693.0,"539168, body-2",1723.0,0.0,atwork_to_work,693.0,2.0,1735309.0,86.0,atwork,1.0,False,1.0,Work,700.0,700.0,,13.0,DRIVEALONEPAY,-0.329928,eat,1.0,1.0,1.0,1.0,atwork,1.0,700.0,700.0,1735309.0,113.0,12.0,13.0,1.0,,16.164404,DRIVEALONEFREE,-0.243054,,121.0,0out_0in,atwork,77000.0,1.0,0.0,0.0,female,2.0,2.0,1735309.0,40.0,1.0,20 to 35,21.0,-1.0,6.0,asian,0.0,30.0,700.0,no,1.0,0.0,641.0,1.0,1.0,3.0,-121.80644,37.350304,False,True,True,False,True,False,False,True,False,False,True,False,False,False,False,False,False,False,False,0.0,True,641.0,29.682171,-1.0,,0.0,700.0,13.76163,17.63173,False,4.0,54.783333,725.3979,1.0,False,M,True,False,False,False,work1,False,False,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,6085.0,yes,60855040000000.0,1.0,no,35.0,6.0,own not recent,four or more,yes,1.0,two or more,240000.0,gt35-lt65,2010000000000.0,2.0,asian,gt150,0.0,two or more,0.0,no,4.0,3.0,641.0,4.0,0.097,60138.0,240.0,4.0,12.86,29.682171,2.0,2.0,2.0,2.0,2.0,0.0,0.0,0.0,1.0,True,False,False,False,2.0,1.392788,0.0,4.0,2.0,2.0,2.0,2.0,True,0_tours,0.0


In [90]:
eventsASim.rename(columns={"mode_choice_logsum_y":"logsum_tours_mode_AS_tours"}, inplace=True)

In [91]:
eventsASim.rename(columns={"tour_mode":"tour_mode_AS_tours"}, inplace=True)

In [92]:
eventsASim.rename(columns={"mode_choice_logsum_x":"logsum_trip_mode_AS_trips"}, inplace=True)

In [93]:
eventsASim.rename(columns={"trip_mode":"trip_mode_AS_trips"}, inplace=True)

In [95]:
eventsASim.to_csv('C:/Shared-Work/Data/CleanData/ASIM_BEAM_Merged_Baseline_2018.csv', index = False)   

In [97]:
from io import StringIO # python3; python2: BytesIO 
import boto
import boto3

conn = boto.s3.connect_to_region('us-east-2')
bucket = conn.get_bucket('beam-core-act')
csv_buffer = StringIO()
eventsASim.to_csv(csv_buffer)
s3_resource = boto3.resource('s3')
s3_resource.Object(bucket, 'eventsASim.csv').put(Body=csv_buffer.getvalue())

KeyboardInterrupt: 