In [1]:
import pandas as pd
import numpy as np

In [2]:
# File location on S3 (The address should be updated depending on the version of the code using)
loc_2018 = "https://beam-outputs.s3.amazonaws.com/pilates-outputs/sfbay-2018-base-20220306/beam/year-2018-iteration-5/ITERS/it.0/"
loc_2018_tripid = "https://beam-outputs.s3.amazonaws.com/pilates-outputs/sfbay-2018-base-20220316/beam/year-2018-iteration-2/ITERS/it.0/"

In [3]:
%%time
# Reading the events file
dtypes = {
    "time": "float32",
    "type": "category",
    "legMode": "category",
    "actType": "category", 
    "reason": "category",
    "primaryFuelLevel": "float64",
    "legMode": "category",
    "chargingPointType":"category",
    "pricingModel":"category",
    "parkingType":"category",
    "mode":"category",
    "personalVehicleAvailable": "category",
    "person": "object",
    "driver": "object",
    "riders": "object"
}

# Use list comprehension to remove the unwanted column in **usecol**
eventsSF = pd.read_csv(loc_2018_tripid + '0.events.csv.gz', compression = 'gzip', dtype = dtypes)



Wall time: 3min 34s


In [4]:
plansSF = pd.read_csv(loc_2018_tripid + '0.plans.csv.gz', compression = 'gzip', dtype = dtypes)

In [5]:
plansSF.sort_values(by = "personId").head()

Unnamed: 0,tripId,personId,planIndex,planScore,planSelected,planElementType,planElementIndex,activityType,activityLocationX,activityLocationY,activityEndTime,legMode,legDepartureTime,legTravelTime,legRouteType,legRouteStartLink,legRouteEndLink,legRouteTravelTime,legRouteDistance,legRouteLinks
1175340,,1,0,277.215965,True,activity,0,Home,605711.160337,4134402.0,26748.0,,,,,,,,,
1175341,577.0,1,0,277.215965,True,leg,1,,,,,hov3_teleportation,-inf,-inf,,,,,,
1175342,,1,0,277.215965,True,activity,2,othmaint,604273.529654,4131071.0,26866.8,,,,,,,,,
1175343,578.0,1,0,277.215965,True,leg,3,,,,,hov2_teleportation,-inf,-inf,,,,,,
1175344,,1,0,277.215965,True,activity,4,school,604436.586319,4135123.0,54720.0,,,,,,,,,


In [6]:
# Show all columns and rows
pd.options.display.max_columns = None
pd.options.display.max_rows = None

In [7]:
# Rename the "mode" column
eventsSF.rename(columns={"mode":"modeBEAM"}, inplace=True) 
# Replace "Work" with "work" in the "actType" column
eventsSF["actType"].replace({"Work": "work"}, inplace=True)

In [8]:
# Remove person = TransitDriver or RidehailDriver because there are no agent information in these rows
eventsSF = eventsSF[~eventsSF.person.str.contains("Agent", na=False)].reset_index(drop=True)

In [9]:
eventsSFCopy = eventsSF.copy()

In [10]:
# shift column 'Name' to first position
first_column = eventsSF.pop('person')
second_column = eventsSF.pop('driver')
third_column = eventsSF.pop('riders')
# insert column using insert(position,column_name,first_column) function
eventsSF.insert(0, 'person', first_column)
eventsSF.insert(1, 'driver', second_column)
eventsSF.insert(2, 'riders', third_column)

In [11]:
# Adding the IDMerged Column
eventsSF['UniqueID'] = eventsSF['person'] #make a copy of the person column
eventsSF['personID'] = np.where(eventsSF['person'].isin(eventsSF['driver']), eventsSF['person'], np.nan) 
eventsSF['driverID'] = np.where(eventsSF['driver'].isin(eventsSF['person']), eventsSF['driver'], np.nan)

In [12]:
# Merging person and driver ids in one column
eventsSF['IDMerged'] = eventsSF['personID'].combine_first(eventsSF['driverID'])
eventsSF['IDMerged'] = eventsSF['UniqueID'].combine_first(eventsSF['IDMerged'])

In [13]:
# Dropping unused columns
eventsSF = eventsSF.drop(['personID','driverID','UniqueID'], axis=1) 

In [14]:
# Shift column 'Name' to first position
first_column = eventsSF.pop('IDMerged')
# Insert column using insert(position,column_name,first_column) function
eventsSF.insert(0, 'IDMerged', first_column)

In [15]:
%%time
# Split the "riders' column and replicated rows for every rider
eventsSF['riders'] = eventsSF['riders'].str.split(':')
eventsSF = eventsSF.explode('riders')

Wall time: 1min 37s


In [16]:
# Combine riderID with IDMerged
eventsSF['riderID'] = np.where(eventsSF['riders'].isin(eventsSF['person']), eventsSF['riders'], np.nan)
eventsSF['IDMerged'] = eventsSF['riderID'].combine_first(eventsSF['IDMerged'])

In [17]:
# Dropping unused columns
eventsSF = eventsSF.drop(['riderID'], axis=1) 

In [18]:
# Either charging events or just ridehail or transit drivers
IDnan = eventsSF[eventsSF['IDMerged'].isna()]

In [19]:
# Remove driver = TransitDriver or RidehailDriver for IDMerged = NAN because there are no agent information in these rows 
eventsSF = eventsSF[~((eventsSF.driver.str.contains("Agent", na=False))&(eventsSF.IDMerged.isna()))].reset_index(drop=True)

In [20]:
IDnan = eventsSF[eventsSF['IDMerged'].isna()]
IDnan.shape

(6329, 63)

In [21]:
%%time
# Filling NANs in ID related to charging events
eventsSF["chargeID"] = eventsSF.groupby('vehicle')['IDMerged'].transform(lambda x: x.ffill().bfill())

Wall time: 4min 23s


In [22]:
# Combining chargeID with IDMerged so no NANs anymore
eventsSF['IDMerged'] = eventsSF['chargeID'].combine_first(eventsSF['IDMerged'])

In [23]:
# Dropping unused columns
eventsSF = eventsSF.drop(['chargeID'], axis=1) 

In [24]:
print(eventsSF.currentTourMode.unique().tolist())

['hov2_teleportation', 'hov3_teleportation', 'car', nan, 'bike', 'walk', 'car_hov2', 'car_hov3', 'walk_transit', 'drive_transit', 'ride_hail', 'ride_hail_pooled', 'ride_hail_transit', 'bike_transit']


In [25]:
print(eventsSF.modeBEAM.unique().tolist())

['hov2_teleportation', 'hov3_teleportation', 'car', nan, 'bike', 'walk', 'car_hov2', 'car_hov3', 'walk_transit', 'drive_transit', 'ride_hail', 'ride_hail_pooled', 'ride_hail_transit', 'bus', 'tram', 'subway', 'cable_car', 'rail', 'bike_transit']


In [26]:
print(eventsSF.type.unique().tolist())

['ModeChoice', 'actend', 'departure', 'PersonEntersVehicle', 'TeleportationEvent', 'arrival', 'actstart', 'PathTraversal', 'LeavingParkingEvent', 'PersonLeavesVehicle', 'ParkingEvent', 'PersonCost', 'ReserveRideHail', 'Replanning', 'ChargingPlugInEvent', 'RefuelSessionEvent', 'ChargingPlugOutEvent']


In [27]:
# Change the IDMerged column type to numeric
eventsSF["IDMerged"] = pd.to_numeric(eventsSF.IDMerged)

In [28]:
# Sort by IDMerged and time columns
eventsSF = eventsSF.sort_values(['IDMerged','time']).reset_index(drop=True)

In [29]:
# We assume that the number of passengers is 1 for ride_hail_pooled
eventsSF['modeBEAM_rh'] = np.where(eventsSF.driver.str.contains("rideHailAgent", na=False), 'ride_hail', eventsSF['modeBEAM'])

In [30]:
# Adding teleportation mode to the type = TeleportationEvent row 
eventsSF["modeBEAM_rh"] = np.where(eventsSF['type']=='TeleportationEvent', eventsSF.modeBEAM_rh.fillna(method='ffill'), eventsSF["modeBEAM_rh"])

In [150]:
#eventsSF[eventsSF['modeBEAM'] == "ride_hail_pooled"].head()

In [68]:
sample_car = eventsSF[eventsSF['IDMerged'] == 3]

In [69]:
sample_car.to_csv('C:/Shared-Work/Data/CleanData/sample_car.csv', index = False)

#### Adding new columns

In [31]:
eventsSF['actEndTime'] = np.where(eventsSF['type']=='actend'
                     , eventsSF['time'], np.nan)

In [32]:
eventsSF['actStartTime'] = np.where(eventsSF['type']=='actstart'
                     , eventsSF['time'], np.nan)    

In [33]:
eventsSF['travelTime'] = np.where((eventsSF['type']=='PathTraversal')|(eventsSF['type']=='TeleportationEvent')
                     , eventsSF['arrivalTime'] - eventsSF['departureTime'], np.nan)

In [34]:
eventsSF['travelDistance'] = np.where((eventsSF['type']=='PathTraversal')|((eventsSF['type']=='ModeChoice')&((eventsSF['modeBEAM']=='hov2_teleportation')|(eventsSF['modeBEAM']=='hov3_teleportation'))), eventsSF['length'], np.nan)

In [35]:
eventsSF['duration_walking'] = np.where(eventsSF['modeBEAM']=='walk', eventsSF['travelTime'], np.nan)

In [36]:
eventsSF['distance_walking'] = np.where(eventsSF['modeBEAM']=='walk', eventsSF['travelDistance'], np.nan)

In [37]:
eventsSF['duration_on_bike'] = np.where(eventsSF['modeBEAM']=='bike', eventsSF['travelTime'], np.nan)

In [38]:
eventsSF['distance_bike'] = np.where(eventsSF['modeBEAM']=='bike', eventsSF['travelDistance'], np.nan)

In [39]:
eventsSF['duration_in_ridehail'] = np.where(eventsSF['modeBEAM_rh']=='ride_hail', eventsSF['travelTime'], np.nan)

In [40]:
eventsSF['distance_ridehail'] = np.where(eventsSF['modeBEAM_rh']=='ride_hail', eventsSF['travelDistance'], np.nan)

In [41]:
eventsSF['duration_in_privateCar'] = np.where((eventsSF['modeBEAM_rh']=='car')|(eventsSF['modeBEAM_rh']=='car_hov3')|(eventsSF['modeBEAM_rh']=='car_hov2')|
                                              (eventsSF['modeBEAM_rh']=='hov2_teleportation')|(eventsSF['modeBEAM_rh']=='hov3_teleportation') 
                                              , eventsSF['travelTime'], np.nan)

In [42]:
eventsSF['distance_privateCar'] = np.where((eventsSF['modeBEAM_rh']=='car')|(eventsSF['modeBEAM_rh']=='car_hov3')|(eventsSF['modeBEAM_rh']=='car_hov2')|
                                              (eventsSF['modeBEAM_rh']=='hov2_teleportation')|(eventsSF['modeBEAM_rh']=='hov3_teleportation'), eventsSF['travelDistance'], np.nan)

In [43]:
eventsSF['duration_in_transit'] = np.where((eventsSF['modeBEAM']=='bike_transit')|(eventsSF['modeBEAM']=='drive_transit')|
                                           (eventsSF['modeBEAM']=='walk_transit')|(eventsSF['modeBEAM']=='bus')|
                                           (eventsSF['modeBEAM']=='tram')|(eventsSF['modeBEAM']=='subway')|
                                           (eventsSF['modeBEAM']=='rail')|(eventsSF['modeBEAM']=='cable_car')|
                                           (eventsSF['modeBEAM']=='ride_hail_transit'), eventsSF['travelTime'], np.nan)

In [44]:
eventsSF['distance_transit'] = np.where((eventsSF['modeBEAM']=='bike_transit')|(eventsSF['modeBEAM']=='drive_transit')|
                                        (eventsSF['modeBEAM']=='walk_transit')|(eventsSF['modeBEAM']=='bus')|
                                        (eventsSF['modeBEAM']=='tram')|(eventsSF['modeBEAM']=='subway')|
                                        (eventsSF['modeBEAM']=='rail')|(eventsSF['modeBEAM']=='cable_car')|
                                        (eventsSF['modeBEAM']=='ride_hail_transit'), eventsSF['travelDistance'], np.nan)

In [45]:
# Removing the extra tour index happening after replanning events
eventsSF['replanningTime'] = np.where(eventsSF['type'] == 'Replanning', eventsSF['time'], np.nan)
eventsSF['replanningTime'] = eventsSF['replanningTime'].shift(+1)
eventsSF['tourIndex_fixed'] = np.where((eventsSF['type'] == 'ModeChoice')&(eventsSF['replanningTime'].notna()), np.nan, eventsSF['tourIndex'])

In [46]:
eventsSF['fuelFood'] = np.where((eventsSF['type']=='PathTraversal')&(eventsSF['primaryFuelType']=='Food'), 
                                eventsSF['primaryFuel'], np.nan)

In [98]:
eventsSF['fuelElectricity'] = np.where((eventsSF['type']=='PathTraversal')&(eventsSF['primaryFuelType']=='Electricity'), 
                                eventsSF['primaryFuel'], np.nan)

In [99]:
eventsSF['fuelDiesel'] = np.where((eventsSF['type']=='PathTraversal')&(eventsSF['primaryFuelType']=='Diesel'), 
                                eventsSF['primaryFuel'], np.nan)

In [100]:
eventsSF['fuelBiodiesel'] = np.where((eventsSF['type']=='PathTraversal')&(eventsSF['primaryFuelType']=='Biodiesel'), 
                                eventsSF['primaryFuel'], np.nan)

In [120]:
eventsSF['fuel_not_Food'] = np.where((eventsSF['type']=='PathTraversal')&(eventsSF['primaryFuelType']!='Food')
                            , eventsSF['primaryFuel']+eventsSF['secondaryFuel'], np.nan)

In [127]:
eventsSF['fuelGasoline'] = np.where((eventsSF['type']=='PathTraversal')&((eventsSF['primaryFuelType']=='Gasoline')|(eventsSF['secondaryFuelType']=='Gasoline')), 
                           eventsSF['primaryFuel']+eventsSF['secondaryFuel'], np.nan)

In [79]:
eventsSF['actEndType'] = np.where(eventsSF['type']=='actend', eventsSF['actType'], "")

In [80]:
eventsSF['actStartType'] = np.where(eventsSF['type']=='actstart', eventsSF['actType'], "")

In [153]:
eventsSF[(eventsSF['modeBEAM_rh'] == 'ride_hail_pooled')|(eventsSF['type'] == 'Replanning')].head(58)

Unnamed: 0,IDMerged,person,driver,riders,primaryFuelLevel,vehicle,secondaryFuelLevel,price,parkingTaz,chargingPointType,pricingModel,parkingType,locationY,locationX,time,type,modeBEAM,incentive,tollCost,costBEAM,currentTourMode,expectedMaximumUtility,availableAlternatives,location,personalVehicleAvailable,length,tourIndex,legModes,legVehicleIds,currentActivity,nextActivity,tripId,link,legMode,facility,actType,departTime,startX,startY,endX,endY,arrivalTime,departureTime,vehicleType,shiftStatus,parkingZoneId,fuel,duration,links,numPassengers,primaryFuel,toStopIndex,fromStopIndex,seatingCapacity,tollPaid,capacity,linkTravelTime,secondaryFuel,secondaryFuelType,primaryFuelType,score,reason,cost,modeBEAM_rh,actEndTime,actStartTime,travelTime,travelDistance,duration_walking,distance_walking,duration_on_bike,distance_bike,duration_in_ridehail,distance_ridehail,duration_in_privateCar,distance_privateCar,duration_in_transit,distance_transit,replanningTime,tourIndex_fixed,fuelFood,tripIndex,actEndType,actStartType,fuelElectricity,fuelDiesel,fuelBiodiesel,fuel_not_Food,fuelGasoline
6729,1246,1246,,,,,,,,,,,,,71700.0,Replanning,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,ResourceUnavailable RIDE_HAIL,,,,,,,,,,,,,,,,,,,,2.0,,,,,,,
13757,3089,3089,,,,,,,,,,,,,23100.0,Replanning,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,ResourceUnavailable RIDE_HAIL,,,,,,,,,,,,,,,,,,,,1.0,,,,,,,
15087,3284,3284,,,,,,,,,,,,,25873.0,ModeChoice,ride_hail_pooled,,,,,,RIDE_HAIL:RIDE_HAIL_POOLED:WALK,163902.0,False,108246.799,1.0,"WALK,CAR,WALK","body-3284,rideHailVehicle-5535320@default,body...",escort,work,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,ride_hail_pooled,,,,,,,,,,,,,,,,1.0,,2.0,,,,,,,
15093,3284,3284,,,,,,,,,,,,,26400.0,Replanning,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,ResourceUnavailable RIDE_HAIL_POOLED,,,,,,,,,,,,,,,,,,,,2.0,,,,,,,
21854,4722,4722,,,,,,,,,,,,,64500.0,Replanning,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,ResourceUnavailable RIDE_HAIL,,,,,,,,,,,,,,,,,,,,2.0,,,,,,,
21858,4722,4722,,,,,,,,,,,,,64800.0,Replanning,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,ResourceUnavailable RIDE_HAIL,,,,,,,,,,,,,,,,,,,,2.0,,,,,,,
25977,5674,5674,,,,,,,,,,,,,64500.0,Replanning,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,ResourceUnavailable RIDE_HAIL,,,,,,,,,,,,,,,,,,,,2.0,,,,,,,
26537,5733,5733,,,,,,,,,,,,,77700.0,Replanning,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,ResourceUnavailable RIDE_HAIL,,,,,,,,,,,,,,,,,,,,5.0,,,,,,,
28683,6149,6149,,,,,,,,,,,,,39900.0,Replanning,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,ResourceUnavailable RIDE_HAIL,,,,,,,,,,,,,,,,,,,,1.0,,,,,,,
28701,6149,6149,,,,,,,,,,,,,67200.0,Replanning,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,ResourceUnavailable RIDE_HAIL,,,,,,,,,,,,,,,,,,,,2.0,,,,,,,


In [154]:
replan3 = eventsSF[eventsSF['IDMerged'] == 16204]

In [155]:
replan3.to_csv('C:/Shared-Work/Data/CleanData/replan3.csv', index = False)

In [None]:
eventsSF['mode_choice_planned_BEAM'] = np.where(eventsSF['type'] == 'ModeChoice', eventsSF['modeBEAM'], np.nan)

In [None]:
eventsSF['mode_choice_actual_BEAM'] = np.where(eventsSF['type'] == 'ModeChoice', eventsSF['modeBEAM'], np.nan)

In [None]:
eventsSF['mode_planned_tour_ActSim'] = np.where(eventsSF['type'] == 'ModeChoice', eventsSF['currentTourMode'], np.nan)

##### ridehail_pooled in modeBEAM_rh column
##### finding first mode choice of each trip index
##### finding last mode choice of each trip index

#### Trip Index

In [128]:
eventsSF["tripIndex"] = eventsSF.groupby("IDMerged")["tourIndex_fixed"].rank(method="first", ascending=True)
eventsSF["tripIndex"] = eventsSF.tripIndex.fillna(method='ffill')

In [None]:
eventsSF[eventsSF['IDMerged']==181]

In [131]:
# Rename the "netCost" column
eventsSF.rename(columns={"netCost":"cost_BEAM"}, inplace=True) 

In [132]:
Person_Trip_eventsSF = pd.pivot_table(
   eventsSF,
   index=['IDMerged','tripIndex'],
   aggfunc={'actStartTime': np.sum, 'actEndTime': np.sum, 'travelTime': np.sum, 'costBEAM': np.sum, 'actStartType': np.sum, 'actEndType': np.sum, 'duration_walking': np.sum,
            'duration_in_privateCar': np.sum, 'duration_on_bike': np.sum, 'duration_in_ridehail': np.sum, 'travelDistance': np.sum,
            'duration_in_transit': np.sum, 'distance_walking': np.sum, 'distance_bike': np.sum, 'distance_ridehail': np.sum, 
            'distance_privateCar': np.sum, 'distance_transit': np.sum, 'legVehicleIds': np.sum, 'fuelFood': np.sum,
            'fuelElectricity': np.sum, 'fuelBiodiesel': np.sum, 'fuelDiesel': np.sum, 'fuel_not_Food': np.sum,
            'fuelGasoline': np.sum}).reset_index()

In [133]:
Person_Trip_eventsSF['door_to_door_time'] = Person_Trip_eventsSF['actStartTime'] - Person_Trip_eventsSF['actEndTime'] 

In [134]:
Person_Trip_eventsSF['waitTime'] = Person_Trip_eventsSF['door_to_door_time'] - Person_Trip_eventsSF['travelTime'] 

In [135]:
Person_Trip_eventsSF['actPurpose'] = Person_Trip_eventsSF['actEndType'].astype(str) + "_to_" + Person_Trip_eventsSF['actStartType'].astype(str)

In [138]:
Person_Trip_eventsSF.head(10000).to_csv('C:/Shared-Work/Data/CleanData/pivot_check.csv', index = False)   

In [137]:
Person_Trip_eventsSF[Person_Trip_eventsSF['IDMerged'] == 18379]

Unnamed: 0,IDMerged,tripIndex,actEndTime,actEndType,actStartTime,actStartType,costBEAM,distance_bike,distance_privateCar,distance_ridehail,distance_transit,distance_walking,duration_in_privateCar,duration_in_ridehail,duration_in_transit,duration_on_bike,duration_walking,fuelBiodiesel,fuelDiesel,fuelElectricity,fuelFood,fuelGasoline,fuel_not_Food,legVehicleIds,travelDistance,travelTime,door_to_door_time,waitTime,actPurpose
6183,18379,1.0,26755.0,Home,27046.0,social,0.48126,0.0,4387.878,0.0,0.0,0.0,291.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5954271.0,5954271.0,"body-18379,18379-emergency-0,18379-emergency-0...",4387.878,291.0,291.0,0.0,Home_to_social
6184,18379,2.0,64389.0,social,67063.0,Home,0.000189,0.0,30.261,0.0,0.0,3473.507,2.0,0.0,0.0,0.0,2672.0,0.0,0.0,10547.724113,184095.871,0.0,10547.72,"body-18379,423164,423164,body-18379",3503.768,2674.0,2674.0,0.0,social_to_Home


In [116]:
sample_bike2 = eventsSF[(eventsSF['primaryFuelType'] == "Gasoline") & (eventsSF['modeBEAM'] == "bike") ]

In [120]:
sample_bike2.head()

Unnamed: 0,IDMerged,person,driver,riders,vehicleType,price,shiftStatus,parkingTaz,chargingPointType,pricingModel,parkingType,locationY,locationX,parkingZoneId,fuel,duration,vehicle,actType,time,type,link,legMode,primaryFuelLevel,secondaryFuelLevel,facility,score,modeBEAM,currentTourMode,expectedMaximumUtility,availableAlternatives,location,personalVehicleAvailable,length,tourIndex,legModes,legVehicleIds,currentActivity,nextActivity,reason,links,numPassengers,primaryFuel,toStopIndex,fromStopIndex,seatingCapacity,tollPaid,endY,endX,startY,startX,capacity,arrivalTime,departureTime,linkTravelTime,secondaryFuel,secondaryFuelType,primaryFuelType,incentive,tollCost,netCost,departTime,cost
248,468282,,468282,,BIKE-DEFAULT,,,,,,,,,,,,49567,,18146.0,PathTraversal,,,2203726.0,0.0,,,bike,bike,,,,,591.851,,,,,,,7179671799290231374313742,0.0,6273.6206,,,2.0,0.0,37.337513,-122.057953,37.337514,-122.060283,2.0,18146.0,18028.0,"27.254,13.274,0.638,52.229,52.229",0.0,,Gasoline,,,,,
316,2925369,,2925369,,BIKE-DEFAULT,,,,,,,,,,,,734823,,18095.0,PathTraversal,,,2205912.0,0.0,,,bike,bike,,,,,385.622,,,,,,,1018810189,0.0,4087.5932,,,2.0,0.0,37.967986,-121.785963,37.967986,-121.785963,2.0,18095.0,18018.0,"77.124,77.124",0.0,,Gasoline,,,,,
16062,2643355,,2643355,,BIKE-DEFAULT,,,,,,,,,,,,202653,,18250.0,PathTraversal,,,2197108.0,0.0,,,bike,bike,,,,,1216.258,,,,,,,68460824556902969022,0.0,12892.3348,,,2.0,0.0,37.839078,-122.275288,37.828203,-122.275229,2.0,18250.0,18007.0,"67.23,7.505,56.199,179.548",0.0,,Gasoline,,,,,
18520,2008662,,2008662,,BIKE-DEFAULT,,,,,,,,,,,,737344,,18291.0,PathTraversal,,,2209720.0,0.0,,,bike,bike,,,,,26.422,,,,,,,162585162584,0.0,280.0732,,,2.0,0.0,37.47375,-122.23224,37.47375,-122.23224,2.0,18291.0,18286.0,"5.284,5.284",0.0,,Gasoline,,,,,
26940,3707112,,3707112,,BIKE-DEFAULT,,,,,,,,,,,,52636,,18416.0,PathTraversal,,,2206459.0,0.0,,,bike,bike,,,,,334.028,,,,,,,3537735376,0.0,3540.6968,,,2.0,0.0,37.502374,-122.270943,37.502374,-122.270943,2.0,18416.0,18349.0,"66.806,66.806",0.0,,Gasoline,,,,,


In [109]:
sample_bike2.to_csv('C:/Shared-Work/Data/CleanData/sample_bike2.csv', index = False)

#### Merging with activity sim persons and housholds files

In [None]:
actloc_2018 =  "https://beam-outputs.s3.amazonaws.com/pilates-outputs/sfbay-2018-base-20220306/activitysim/"

households = pd.read_csv(actloc_2018 + 'final_households.csv')
persons = pd.read_csv(actloc_2018 + 'final_persons.csv')