In [1]:
import os
import os.path as op
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
%matplotlib inline
import seaborn as sns
sns.set_theme(style="darkgrid", color_codes=True)
sns.set(font_scale=1.35, style="ticks") #set styling preferences
import statsmodels.api as sm
from scipy import stats
import math
from math import pi
from pandas.api.types import is_string_dtype
from pandas.api.types import is_numeric_dtype
from scipy.spatial.distance import cdist
from scipy.cluster.vq import kmeans2,vq, whiten
import geopandas as gpd
import h5py
import boto.s3
import glob
import boto3
from zipfile import ZipFile
import shutil
import dask.dataframe as dd
from dask.delayed import delayed

In [2]:
# Show all columns and rows
pd.options.display.max_columns = None
pd.options.display.max_rows = None

#### Baseline

In [3]:
cols_to_use = ['IDMerged', 'tripIndex', 'actStartTime', 'actEndTime','duration_travelling', 'cost_BEAM', 'actStartType', 
               'actEndType', 'duration_walking', 'duration_in_privateCar', 'duration_on_bike', 'duration_in_ridehail', 
              'distance_travelling', 'duration_in_transit', 'distance_walking','distance_bike','distance_ridehail', 
              'distance_privateCar', 'distance_transit', 'mode_choice_planned_BEAM','mode_choice_actual_BEAM',
              'vehicleIds', 'distance_mode_choice', 'replanning_status', 'reason', 'fuel_marginal','BlockGroupStart',
              'startX', 'startY', 'bgid_start', 'tractid_start', 'juris_name_start', 'county_name_start', 'mpo_start', 
               'BlockGroupEnd', 'endX', 'endY', 'bgid_end', 'tractid_end', 'juris_name_end', 'county_name_end', 'mpo_end', 
               'emission_marginal', 'duration_door_to_door', 'waitTime_no_replanning', 'waitTime_replanning', 'actPurpose', 
               'mode_choice_actual_6', 'mode_choice_actual_5', 'mode_choice_actual_4', 'trip_mode_AS_trips', 'logsum_trip_Potential_INEXUS',
               'age', 'income', 'hh_cars', 'TAZ_x', 'home_taz', 'auto_ownership', 'home_is_urban', 'home_is_rural', 'DRIVEALONEFREE',
               'DRIVEALONEPAY', 'SHARED2FREE', 'SHARED2PAY', 'SHARED3FREE', 'SHARED3PAY', 'WALK', 'BIKE', 'WALK_LOC', 'WALK_LRF', 
               'WALK_EXP', 'WALK_HVY', 'WALK_COM', 'DRIVE_LOC', 'DRIVE_LRF', 'DRIVE_EXP', 'DRIVE_HVY', 'DRIVE_COM', 'TAXI',
               'TNC_SINGLE', 'TNC_SHARED', 'income_quartiles', 'income_deciles' ] # Specify the columns to read

In [4]:
%%time
s3 = boto3.client("s3")
key = "pilates-outputs/sfbay_baseline_telecommuting_20230221/inexus/sfbay_baseline_default-1.0_2020__20230220.csv.gz"  #the path should be updated
obj = s3.get_object(Bucket="beam-outputs", Key=key)
sfbase = pd.read_csv(obj['Body'], compression = 'gzip', index_col='Unnamed: 0')



CPU times: total: 1min 39s
Wall time: 1min 39s


In [7]:
sfbase.IDMerged.unique()

array([      4,       8,      18, ..., 6808781, 6808782, 6808834],
      dtype=int64)

In [6]:
sfbase.head(2)

Unnamed: 0,IDMerged,tripIndex,actStartTime,actEndTime,duration_travelling,cost_BEAM,actStartType,actEndType,duration_walking,duration_in_privateCar,duration_on_bike,duration_in_ridehail,distance_travelling,duration_in_transit,distance_walking,distance_bike,distance_ridehail,distance_privateCar,distance_transit,vehicleIds_estimate,mode_choice_planned_BEAM,mode_choice_actual_BEAM,vehicleIds,numPassengers,distance_mode_choice,replanning_status,reason,parkingType,transit_bus,transit_subway,transit_tram,transit_cable_car,ride_hail_pooled,transit_rail,fuelFood,fuelElectricity,fuelBiodiesel,fuelDiesel,fuel_not_Food,fuelGasoline,fuel_marginal,BlockGroupStart,startX,startY,bgid_start,tractid_start,juris_name_start,county_name_start,mpo_start,BlockGroupEnd,endX,endY,bgid_end,tractid_end,juris_name_end,county_name_end,mpo_end,emissionFood,emissionElectricity,emissionDiesel,emissionGasoline,emissionBiodiesel,emission_marginal,duration_door_to_door,waitTime_no_replanning,waitTime_replanning,actPurpose,mode_choice_actual_5,mode_choice_actual_6,mode_choice_actual_4,trip_id,person_id,household_id,tour_id,primary_purpose_x,trip_num,outbound,trip_count,purpose,destination_x,origin_x,destination_logsum_x,depart,trip_mode_AS_trips,logsum_trip_Potential_INEXUS,tour_type,tour_type_count,tour_type_num,tour_num,tour_count,tour_category,number_of_participants,destination_y,origin_y,household_id_x,tdd,start,end,duration,composition,destination_logsum_y,tour_mode_AS_tours,logsum_tours_mode_AS_tours,atwork_subtour_frequency,parent_tour_id,stop_frequency,primary_purpose_y,age,earning,edu,hispanic,hours,PNUM,race_id,relate,sex,student,work_at_home,worker,household_id_y,person_age,person_sex,race,hispanic.1,p_hispanic,MAR,TAZ_x,ptype,pemploy,pstudent,home_x,home_y,age_16_to_19,age_16_p,adult,male,female,has_non_worker,has_retiree,has_preschool_kid,has_driving_kid,has_school_kid,has_full_time,has_part_time,has_university,student_is_employed,nonstudent_to_school,is_student,is_gradeschool,is_highschool,is_university,school_segment,is_worker,home_taz,value_of_time,school_taz,distance_to_school,roundtrip_auto_time_to_school,workplace_taz,workplace_location_logsum,distance_to_work,workplace_in_cbd,work_taz_area_type,roundtrip_auto_time_to_work,work_auto_savings,work_auto_savings_ratio,free_parking_at_work,cdap_activity,travel_active,under16_not_at_school,has_preschool_kid_at_home,has_school_kid_at_home,mandatory_tour_frequency,work_and_school_and_worker,work_and_school_and_student,num_mand,num_work_tours,num_joint_tours,non_mandatory_tour_frequency,num_non_mand,num_escort_tours,num_eatout_tours,num_shop_tours,num_maint_tours,num_discr_tours,num_social_tours,num_non_escort_tours,serialno,VEHICL,income,race_of_head,age_of_head,num_workers,hispanic_status_of_head,tenure,recent_mover,sf_detached,hh_age_of_head,hh_race_of_head,hispanic_head,hh_size,hh_cars,hh_children,seniors,hh_income,hh_workers,tenure_mover,hh_seniors,block_id,lcm_county_id,hhsize,gt55,gt2,hh_type,TAZ_y,HHT,sample_rate,chunk_id,income_in_thousands,income_segment,median_value_of_time,hh_value_of_time,num_non_workers,num_drivers,num_adults,num_children,num_young_children,num_children_5_to_15,num_children_16_to_17,num_college_age,num_young_adults,non_family,family,home_is_urban,home_is_rural,auto_ownership,hh_work_auto_savings_ratio,num_under16_not_at_school,num_travel_active,num_travel_active_adults,num_travel_active_preschoolers,num_travel_active_children,num_travel_active_non_preschoolers,participates_in_jtf_model,joint_tour_frequency,num_hh_joint_tours,DRIVEALONEFREE,DRIVEALONEPAY,SHARED2FREE,SHARED2PAY,SHARED3FREE,SHARED3PAY,WALK,BIKE,WALK_LOC,WALK_LRF,WALK_EXP,WALK_HVY,WALK_COM,DRIVE_LOC,DRIVE_LRF,DRIVE_EXP,DRIVE_HVY,DRIVE_COM,TAXI,TNC_SINGLE,TNC_SHARED,income_quartiles,income_deciles
0,4,1513,55457.0,54777.0,680.0,1.609457,othdiscr,Home,0.0,680.0,0.0,0.0,14674.192,0.0,0.0,0.0,0.0,14674.192,0.0,"body-4,358556,358556,body-4",car,car,"body-4, 358556","1.0, 0.0, 0.0, 1.0",14674.192,0,,"Residential, Public",0,0,0,0,0,0,0.0,0.0,0.0,0.0,24050180.0,24050180.0,24050180.0,60014000000.0,-122.227937,37.854097,60014000000.0,6001400000.0,Oakland,Alameda County,San Francisco Bay Area (MTC),60014100000.0,-122.17196,37.750722,60014100000.0,6001410000.0,Oakland,Alameda County,San Francisco Bay Area (MTC),0.0,0.0,0.0,0.001778,0.0,0.0,680.0,0.0,0.0,Home_to_othdiscr,car,car,car,1513.0,4.0,865.0,189.0,othdiscr,1.0,True,1.0,othdiscr,891.0,1005.0,,15.0,DRIVEALONEFREE,-1.014988,othdiscr,1.0,1.0,1.0,1.0,non_mandatory,1.0,891.0,1005.0,865.0,146.0,15.0,16.0,1.0,,12.843532,DRIVEALONEFREE,-1.458456,,,0out_0in,othdiscr,68.0,0.0,22.0,0.0,0.0,1.0,2.0,0.0,2.0,0.0,0.0,0.0,865.0,above 60,female,black,0.0,no,1.0,1005.0,5.0,3.0,3.0,-122.227937,37.854097,False,True,True,False,True,False,True,False,False,False,False,False,False,False,False,False,False,False,False,0.0,False,1005.0,2.81873,-1.0,,0.0,-1.0,,,False,,0.0,0.0,0.0,False,N,True,False,False,False,,False,False,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,2012000000000.0,3.0,70550.0,2.0,68.0,0.0,0.0,1.0,0.0,yes,gt65,black,no,two,two or more,no,2.0,gt60-lt100,none,own not recent,yes,60014000000000.0,6001.0,2.0,1.0,1.0,4.0,1005.0,4.0,0.105,73380.0,70.55,3.0,10.44,2.81873,2.0,2.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,True,False,False,False,1.0,0.0,0.0,2.0,2.0,0.0,0.0,2.0,True,0_tours,0.0,-1.184862,-1.194734,-1000.184862,-1000.190503,-1000.184862,-1000.188811,-57.772093,-1016.087313,-1998.380369,-1998.380369,-1998.380369,-2000.092182,-1998.380369,-1001.227631,-1998.427241,-1998.427241,-2000.036363,-1998.427241,-16.603018,-10.597699,-10.606597,2ndQ,4thD
1,4,1517,61999.0,61185.0,814.0,1.685569,Home,othdiscr,0.0,814.0,0.0,0.0,15368.148,0.0,0.0,0.0,0.0,15368.148,0.0,"body-4,358556,358556,body-4",car,car,"body-4, 358556","1.0, 0.0, 0.0, 1.0",15368.148,0,,"Public, Residential",0,0,0,0,0,0,0.0,0.0,0.0,0.0,25818970.0,25818970.0,25818970.0,60014100000.0,-122.17196,37.750722,60014100000.0,6001410000.0,Oakland,Alameda County,San Francisco Bay Area (MTC),60014040000.0,-122.226711,37.849953,60014040000.0,6001404000.0,Oakland,Alameda County,San Francisco Bay Area (MTC),0.0,0.0,0.0,0.001908,0.0,0.0,814.0,0.0,0.0,othdiscr_to_Home,car,car,car,1517.0,4.0,865.0,189.0,othdiscr,1.0,False,1.0,Home,1005.0,891.0,,16.0,DRIVEALONEPAY,-1.415673,othdiscr,1.0,1.0,1.0,1.0,non_mandatory,1.0,891.0,1005.0,865.0,146.0,15.0,16.0,1.0,,12.843532,DRIVEALONEFREE,-1.458456,,,0out_0in,othdiscr,68.0,0.0,22.0,0.0,0.0,1.0,2.0,0.0,2.0,0.0,0.0,0.0,865.0,above 60,female,black,0.0,no,1.0,1005.0,5.0,3.0,3.0,-122.227937,37.854097,False,True,True,False,True,False,True,False,False,False,False,False,False,False,False,False,False,False,False,0.0,False,1005.0,2.81873,-1.0,,0.0,-1.0,,,False,,0.0,0.0,0.0,False,N,True,False,False,False,,False,False,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,2012000000000.0,3.0,70550.0,2.0,68.0,0.0,0.0,1.0,0.0,yes,gt65,black,no,two,two or more,no,2.0,gt60-lt100,none,own not recent,yes,60014000000000.0,6001.0,2.0,1.0,1.0,4.0,1005.0,4.0,0.105,73380.0,70.55,3.0,10.44,2.81873,2.0,2.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,True,False,False,False,1.0,0.0,0.0,2.0,2.0,0.0,0.0,2.0,True,0_tours,0.0,-1.58506,-1.595994,-1000.580569,-1000.586817,-1000.578772,-1000.583146,-65.175868,-1019.7892,-1998.380369,-1998.380369,-1998.380369,-1998.978355,-1998.380369,-1998.427241,-1998.427241,-1998.427241,-1998.427241,-1998.427241,-18.407549,-14.571624,-10.43257,2ndQ,4thD


In [12]:
sfbase.loc[(sfbase['income'] < 50000) & (sfbase['age'] < 25) & ((sfbase['pemploy'] == 1)|(sfbase['pemploy'] == 2)|(sfbase['pemploy'].isna()))]['IDMerged'].nunique()

7529

In [13]:
sfbase.loc[(sfbase['income'] < 50000) & ((sfbase['age'] >= 25)|(sfbase['age'] < 45)) &((sfbase['pemploy'] == 1)|(sfbase['pemploy'] == 2)|(sfbase['pemploy'].isna()))]['IDMerged'].nunique()

57780

In [14]:
sfbase.loc[(sfbase['income'] < 50000) & (sfbase['age'] >= 45) & ((sfbase['pemploy'] == 1)|(sfbase['pemploy'] == 2)|(sfbase['pemploy'].isna()))]['IDMerged'].nunique()

25662

In [15]:
sfbase.loc[((sfbase['income'] >= 50000)|(sfbase['income'] < 150000)) & ((sfbase['age'] >= 25)|(sfbase['age'] < 45)) & 
          ((sfbase['pemploy'] == 1)|(sfbase['pemploy'] == 2)|(sfbase['pemploy'].isna()))]['IDMerged'].nunique()

320733

In [16]:
sfbase.loc[((sfbase['income'] >= 50000)|(sfbase['income'] < 150000)) & (sfbase['age'] < 25) & 
           ((sfbase['pemploy'] == 1)|(sfbase['pemploy'] == 2)|(sfbase['pemploy'].isna()))]['IDMerged'].nunique()

28285

In [17]:
sfbase.loc[((sfbase['income'] >= 50000)|(sfbase['income'] < 150000)) & (sfbase['age'] >= 45) & 
           ((sfbase['pemploy'] == 1)|(sfbase['pemploy'] == 2)|(sfbase['pemploy'].isna()))]['IDMerged'].nunique()

156627

In [18]:
sfbase.loc[(sfbase['income'] >= 150000) & (sfbase['age'] >= 45) & 
           ((sfbase['pemploy'] == 1)|(sfbase['pemploy'] == 2)|(sfbase['pemploy'].isna()))]['IDMerged'].nunique()

52252

In [20]:
sfbase.loc[(sfbase['income'] >= 150000) & ((sfbase['age'] >= 25)|(sfbase['age'] < 45)) & 
           ((sfbase['pemploy'] == 1)|(sfbase['pemploy'] == 2)|(sfbase['pemploy'].isna()))]['IDMerged'].nunique()

98264

In [21]:
sfbase.loc[(sfbase['income'] >= 150000) & (sfbase['age'] < 25)& 
           ((sfbase['pemploy'] == 1)|(sfbase['pemploy'] == 2)|(sfbase['pemploy'].isna()))]['IDMerged'].nunique()

5800

In [17]:
%%time
s3 = boto3.client("s3")
key = "pilates-outputs/sfbay-baseline-2022124/inexus/sfbay_baseline_default-1.0_2020__20221224.csv.gz"  #the path should be updated
obj = s3.get_object(Bucket="beam-outputs", Key=key) 
sfbase_rh = pd.read_csv(obj['Body'], compression = 'gzip', index_col='Unnamed: 0')



CPU times: total: 1min 43s
Wall time: 1min 43s


In [21]:
col_tele = sfbase.columns.tolist()

In [22]:
col_norm = sfbase_rh.columns.tolist()

In [23]:
if col_tele == col_norm:
    print ("equal")
else:
    print ("different")

equal


In [14]:
col_tele = set(sfbase.columns)
col_norm = set(sfbase_rh.columns)

diff_cols1 = col_tele - col_norm
diff_cols2 = col_norm - col_tele

In [16]:
if len(diff_cols1) > 0:
    print("Columns in dataset 1 but not in dataset 2:", diff_cols1)

Columns in dataset 1 but not in dataset 2: {'num_social_tours', 'num_non_escort_tours', 'origin_y', 'recent_mover', 'hh_work_auto_savings_ratio', 'tour_type', 'num_hh_joint_tours', 'emissionGasoline', 'has_preschool_kid', 'has_preschool_kid_at_home', 'is_worker', 'income_segment', 'median_value_of_time', 'joint_tour_frequency', 'transit_cable_car', 'trip_num', 'cdap_activity', 'travel_active', 'num_non_workers', 'serialno', 'num_travel_active', 'student_is_employed', 'num_maint_tours', 'tdd', 'has_full_time', 'female', 'transit_rail', 'start', 'chunk_id', 'gt55', 'age_16_to_19', 'household_id', 'end', 'trip_count', 'non_mandatory_tour_frequency', 'workplace_taz', 'workplace_in_cbd', 'purpose', 'tour_id', 'primary_purpose_y', 'hh_seniors', 'transit_bus', 'duration', 'work_and_school_and_worker', 'has_school_kid_at_home', 'emissionBiodiesel', 'PNUM', 'hispanic_head', 'num_escort_tours', 'person_id', 'num_children', 'hours', 'num_children_5_to_15', 'lcm_county_id', 'num_under16_not_at_sch

####  Ridehail pooled riders match

In [22]:
sfbase.shape

(7435159, 83)

In [248]:
pooled.shape

(4545, 270)

In [249]:
sfbase[(sfbase['mode_choice_actual_BEAM'] == "ride_hail_pooled")|(sfbase['mode_choice_actual_BEAM'] == "ride_hail")].shape

(32412, 269)

In [275]:
pooled = sfbase[sfbase['mode_choice_actual_BEAM'] == "ride_hail_pooled"]

In [276]:
pooled['group'] = pooled['vehicleIds'].str.extract(r'(rideHail.*)')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pooled['group'] = pooled['vehicleIds'].str.extract(r'(rideHail.*)')


In [277]:
pooled['group'] = pooled['group'].str.split(',').str[0]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pooled['group'] = pooled['group'].str.split(',').str[0]


In [278]:
#pooled['end_time'] = pd.to_datetime(pooled['actStartTime'], unit='s')
#pooled['start_time'] = pd.to_datetime(pooled['actEndTime'], unit='s')

In [279]:
#pooled['start_time'] = pooled['start_time'].dt.time

In [280]:
#pooled['end_time'] = pooled['end_time'].dt.time

In [281]:
pooled = pooled.sort_values(['actEndTime', 'actStartTime'], ascending=[True, False])

In [282]:
%%time
mutual_pooled = pd.pivot_table(
   pooled,
   index=['group'],
   aggfunc={'IDMerged': lambda x: ', '.join(list(x.dropna().astype(str))),
            'tripIndex': lambda x: ', '.join(list(x.dropna().astype(str))),
            'actPurpose': lambda x: ', '.join(list(x.dropna().astype(str))),
            'mode_choice_planned_BEAM': lambda x: ', '.join(list(x.dropna().astype(str))),
            'mode_choice_actual_BEAM': lambda x: ', '.join(list(x.dropna().astype(str))),
            'actStartTime': lambda x: ', '.join(list(x.dropna().astype(str))),
            'actEndTime': lambda x: ', '.join(list(x.dropna().astype(str)))}).reset_index() 

CPU times: total: 1.19 s
Wall time: 1.16 s


In [283]:
mutual_pooled.head()

Unnamed: 0,group,IDMerged,actEndTime,actPurpose,actStartTime,mode_choice_actual_BEAM,mode_choice_planned_BEAM,tripIndex
0,rideHailVehicle-1007433@Uber,"923408, 600180, 486348","60713.0, 60900.0, 63671.0","othmaint_to_Home, shopping_to_Home, othmaint_t...","61754.0, 62845.0, 64111.0","ride_hail_pooled, ride_hail_pooled, ride_hail_...","ride_hail_pooled, ride_hail_pooled, ride_hail_...","302878053, 196859309, 159522381"
1,rideHailVehicle-1013349@Lyft,"1599568, 1599568, 1620634","21024.0, 22324.0, 24768.0","Home_to_othdiscr, othdiscr_to_Home, Home_to_work","22174.0, 23633.0, 26318.0","ride_hail_pooled, ride_hail_pooled, ride_hail_...","ride_hail_pooled, ride_hail_pooled, ride_hail_...","524658505, 524658509, 531568265"
2,rideHailVehicle-1017430@Uber,1215476,29379.0,Home_to_shopping,32282.0,ride_hail_pooled,ride_hail_pooled,398678905
3,rideHailVehicle-1023032@Uber,"1014815, 5948265, 4659245, 5932906","18090.0, 25790.0, 33354.0, 34628.0","Home_to_escort, Home_to_work, Home_to_othdiscr...","18541.0, 26526.0, 34509.0, 36342.0","ride_hail_pooled, ride_hail_pooled, ride_hail_...","ride_hail_pooled, ride_hail_pooled, ride_hail_...","332859393, 1951031233, 1528232561, 1945993393"
4,rideHailVehicle-1029101@Lyft,"3214829, 3131730","38905.0, 38955.0","Home_to_othmaint, Home_to_othmaint","40190.0, 39706.0","ride_hail_pooled, ride_hail_pooled","ride_hail_pooled, ride_hail_pooled","1054464137, 1027207665"


In [225]:
def get_shared_persons(row):
    persons = row['IDMerged'].split(', ')
    start_times = [float(x) for x in row['actEndTime'].split(', ')]
    end_times = [float(x) for x in row['actStartTime'].split(', ')]
    shared_persons = []
    for i in range(len(persons)):
        for j in range(i+1, len(persons)):
            if start_times[i] <= end_times[j] and start_times[j] <= end_times[i]:
                shared_persons.append((persons[i], persons[j]))
    return shared_persons

In [284]:
def get_shared_trips(row):
    persons = row['tripIndex'].split(', ')
    start_times = [float(x) for x in row['actEndTime'].split(', ')]
    end_times = [float(x) for x in row['actStartTime'].split(', ')]
    shared_pairs = set()
    for i in range(len(persons)):
        for j in range(i+1, len(persons)):
            if start_times[i] <= end_times[j] and start_times[j] <= end_times[i]:
                shared_pairs.add(frozenset([persons[i], persons[j]]))
    shared_persons = set()
    for pair in shared_pairs:
        group = set(pair)
        for i in range(len(persons)):
            if persons[i] in group:
                continue
            overlaps = True
            for person in group:
                if start_times[i] > end_times[persons.index(person)] or end_times[i] < start_times[persons.index(person)]:
                    overlaps = False
                    break
            if overlaps:
                group.add(persons[i])
        shared_persons.add(tuple(sorted(group)))
    return shared_persons

In [285]:
mutual_pooled['shared_trip_pairs'] = mutual_pooled.apply(get_shared_trips, axis=1)

In [286]:
def count_shared_pairs(x, pairs):
    return len([p for p in pairs if p in x])

In [287]:
all_pairs = set(mutual_pooled['shared_trip_pairs'].explode())
mutual_pooled['shared_trip_count'] = mutual_pooled['shared_trip_pairs'].apply(lambda x: count_shared_pairs(all_pairs, x))

In [288]:
mutual_pooled.tail()

Unnamed: 0,group,IDMerged,actEndTime,actPurpose,actStartTime,mode_choice_actual_BEAM,mode_choice_planned_BEAM,tripIndex,shared_trip_pairs,shared_trip_count
1893,rideHailVehicle-972250@Uber,"3910620, 5845419, 3604955, 3051058","48254.0, 48326.0, 52381.0, 54468.0","othmaint_to_Home, Home_to_othmaint, shopping_t...","49545.0, 49136.0, 54594.0, 55095.0","ride_hail_pooled, ride_hail_pooled, ride_hail_...","ride_hail_pooled, ride_hail_pooled, ride_hail_...","1282683589, 1917297657, 1182425509, 1000747249","{(1282683589, 1917297657), (1000747249, 118242...",2
1894,rideHailVehicle-978015@Uber,"623979, 549144, 926374, 4883843, 5143692","38209.0, 38991.0, 41880.0, 45460.0, 45460.0","shopping_to_Home, Home_to_eatout, Home_to_othm...","39069.0, 40429.0, 43510.0, 46525.0, 46335.0","ride_hail_pooled, ride_hail_pooled, ride_hail_...","ride_hail_pooled, ride_hail_pooled, ride_hail_...","204665342, 180119497, 303850897, 1601900705, 1...","{(180119497, 204665342), (1601900705, 16871312...",2
1895,rideHailVehicle-980128@Lyft,"541256, 612807, 265892","66571.0, 69897.0, 73011.0","Home_to_eatout, escort_to_Home, Home_to_othdiscr","67308.0, 70414.0, 73938.0","ride_hail_pooled, ride_hail_pooled, ride_hail_...","ride_hail_pooled, ride_hail_pooled, ride_hail_...","177532201, 201000773, 87212865",{},0
1896,rideHailVehicle-983163@Uber,"1926570, 2159820","67485.0, 75513.0","Home_to_shopping, shopping_to_Home","70908.0, 76962.0","ride_hail_pooled, ride_hail_pooled","ride_hail_pooled, ride_hail_pooled","631915113, 708425053",{},0
1897,rideHailVehicle-986278@Uber,"1085804, 1100524","64706.0, 65037.0","Home_to_othmaint, Home_to_shopping","65321.0, 65792.0","ride_hail_pooled, ride_hail_pooled","ride_hail_pooled, ride_hail_pooled","356143937, 360972137","{(356143937, 360972137)}",1


In [289]:
mutual_pooled[mutual_pooled['shared_trip_count']>0].shape

(739, 10)

In [290]:
mutual_pooled['shared_trip_count'].sum()

961

In [265]:
mutual_pooled.shape

(1898, 10)

In [266]:
pooled.shape

(4545, 270)

In [267]:
mutual_pooled.head()

Unnamed: 0,group,IDMerged,actEndTime,actPurpose,actStartTime,mode_choice_actual_BEAM,mode_choice_planned_BEAM,tripIndex,shared_person_pairs,shared_person_count
0,rideHailVehicle-1007433@Uber,"923408, 600180, 486348","60713.0, 60900.0, 63671.0","othmaint_to_Home, shopping_to_Home, othmaint_t...","61754.0, 62845.0, 64111.0","ride_hail_pooled, ride_hail_pooled, ride_hail_...","ride_hail_pooled, ride_hail_pooled, ride_hail_...","302878053, 196859309, 159522381","{(196859309, 302878053)}",1
1,rideHailVehicle-1013349@Lyft,"1599568, 1599568, 1620634","21024.0, 22324.0, 24768.0","Home_to_othdiscr, othdiscr_to_Home, Home_to_work","22174.0, 23633.0, 26318.0","ride_hail_pooled, ride_hail_pooled, ride_hail_...","ride_hail_pooled, ride_hail_pooled, ride_hail_...","524658505, 524658509, 531568265",{},0
2,rideHailVehicle-1017430@Uber,1215476,29379.0,Home_to_shopping,32282.0,ride_hail_pooled,ride_hail_pooled,398678905,{},0
3,rideHailVehicle-1023032@Uber,"1014815, 5948265, 4659245, 5932906","18090.0, 25790.0, 33354.0, 34628.0","Home_to_escort, Home_to_work, Home_to_othdiscr...","18541.0, 26526.0, 34509.0, 36342.0","ride_hail_pooled, ride_hail_pooled, ride_hail_...","ride_hail_pooled, ride_hail_pooled, ride_hail_...","332859393, 1951031233, 1528232561, 1945993393",{},0
4,rideHailVehicle-1029101@Lyft,"3214829, 3131730","38905.0, 38955.0","Home_to_othmaint, Home_to_othmaint","40190.0, 39706.0","ride_hail_pooled, ride_hail_pooled","ride_hail_pooled, ride_hail_pooled","1054464137, 1027207665","{(1027207665, 1054464137)}",1


In [270]:
mutual_pooled[mutual_pooled['group']=='rideHailVehicle-1934875@Uber']

Unnamed: 0,group,IDMerged,actEndTime,actPurpose,actStartTime,mode_choice_actual_BEAM,mode_choice_planned_BEAM,tripIndex,shared_person_pairs,shared_person_count
261,rideHailVehicle-1934875@Uber,"1255664, 1244065, 141574, 748614, 141574","19447.0, 19479.0, 22190.0, 23112.0, 23264.0","Home_to_social, Home_to_work, Home_to_othdiscr...","20788.0, 22241.0, 23114.0, 23853.0, 23539.0","ride_hail_pooled, ride_hail_pooled, ride_hail_...","ride_hail_pooled, ride_hail_pooled, ride_hail_...","411858081, 408053633, 46436393, 245545705, 464...","{(245545705, 46436394), (408053633, 411858081)...",4


In [291]:
mutual_pooled.to_csv('C:/Shared-Work/Data/CleanData/rider_count.csv', index = False)

In [55]:
#mutual_pooled = mutual_pooled.drop(['comma_count'], axis=1)

In [56]:
#mutual_pooled['comma_count'] = mutual_pooled['IDMerged'].str.count(',')

In [57]:
#df2 = pd.DataFrame(mutual_pooled['IDMerged'].str.split(',').values.tolist())
#df2.columns += 1
#df2 = df1.add_prefix('person_')

In [591]:
#mutual_pooled = mutual_pooled.join(mutual_pooled['actEndTime'].str.split(',', expand=True).add_prefix('endT_').fillna(np.nan))

In [592]:
#mutual_pooled = mutual_pooled.join(mutual_pooled['actStartTime'].str.split(',', expand=True).add_prefix('startT_').fillna(np.nan))

In [593]:
#mutual_pooled = mutual_pooled.join(mutual_pooled['IDMerged'].str.split(',', expand=True).add_prefix('person_').fillna(np.nan))

In [594]:
# convert the startT_1, startT_0, and endT_0 columns to numeric, and handle NaN values
#endT_columns = [col for col in mutual_pooled.columns if col.startswith("endT_")]

#for col in endT_columns:
    #mutual_pooled[col] = pd.to_numeric(mutual_pooled[col], errors='coerce')

In [595]:
#startT_columns = [col for col in mutual_pooled.columns if col.startswith("startT_")]

#for col in startT_columns:
    #mutual_pooled[col] = pd.to_numeric(mutual_pooled[col], errors='coerce')

In [597]:
# create an empty list to store mutual riders
#mutual_riders = []

# loop through each row of the dataframe
#for i, row in mutual_pooled.iterrows():
    # create a list to store the mutual riders for this row
    #this_row_riders = [] 
    # loop through each pair of persons
    #for j in range(11):
        #for k in range(j+1, 11):
            # check if the time intervals overlap
            #if (row[f'endT_{k}'] <= row[f'startT_{j}'] and row[f'startT_{j}'] >= row[f'endT_{k}']) or (row[f'endT_{k}'] <= row[f'startT_{j}'] and row[f'startT_{k}'] >= row[f'endT_{j}']):
                # if the intervals overlap, add both persons to the mutual_riders list
                #this_row_riders.extend([row[f'person_{j}'], row[f'person_{k}']])
    
    # remove any duplicates from the list of mutual riders and join the remaining rider ids into a single string
    #this_row_mutual_riders = ', '.join(set(this_row_riders))
    
    # add the mutual riders string as a new column to the data frame
    #mutual_pooled.loc[i, 'mutual_riders'] = this_row_mutual_riders

In [598]:
#mutual_pooled['rider_count'] = mutual_pooled['mutual_riders'].str.count(',')+1

In [554]:
# Select columns that start with 'startT_' or 'endT_'
#cols_to_drop = mutual_pooled.filter(regex='^startT_|^endT_|^person_').columns

# Drop the selected columns from the DataFrame
#mutual_pooled = mutual_pooled.drop(columns=cols_to_drop)

In [555]:
mutual_pooled.to_csv('C:/Shared-Work/Data/CleanData/rider_count.csv', index = False)

#### rest of the stacked code

In [15]:
# Adding scenario info
sfbase['scenario'] = "baseline"
sfbase['scenario'] = sfbase['scenario'].astype("category")
sfbase['lever'] = "default"
sfbase['lever'] = sfbase['lever'].astype("category")
sfbase['year'] = 2020
sfbase['lever_position_fltsz'] = 1
sfbase['lever_position_price'] = 1
sfbase['lever_n_fleets'] = 2
sfbase['fleetsize_uber'] = 0.00287777
sfbase['fleetsize_lyft'] = 0.00184043
sfbase['fleetsize_cruise'] = 0
sfbase['fleetsize_flywheel'] = 0
sfbase['fleetsize_waymo'] = 0

In [10]:
sfbase[sfbase['person_sex'].isna()].shape

(21407, 279)

In [11]:
#baseline2018 = "https://beam-core-act.s3.amazonaws.com/beam-outputs/pilates-outputs/sfbay-baseline-20221220/inexus"
#sfbase = pd.read_csv(baseline2018 + 'sfbay_baseline_default-1.0_2012__20221219.csv.gz')

In [12]:
# Showing the entire number in dataframe
pd.set_option('float_format', '{:f}'.format)

In [None]:
sfbase['incomeInThousands'] = sfbase['income']/1000

In [None]:
person_income = pd.pivot_table(
   small_stacked_rh_price,
   index=['IDMerged'],
   aggfunc={'incomeInThousands': lambda x: ', '.join(set(x.dropna().astype(str)))}).reset_index() 

In [8]:
# Add incomeInThousands twenty_one_ranks
conditions  = [(person_income['incomeInThousands'] >= twenty_one_ranks[0]) & (person_income['incomeInThousands'] < twenty_one_ranks[1]), 
               (person_income['incomeInThousands'] >= twenty_one_ranks[1]) & (person_income['incomeInThousands'] < twenty_one_ranks[2]),
               (person_income['incomeInThousands'] >=  twenty_one_ranks[2]) & (person_income['incomeInThousands'] < twenty_one_ranks[3]),
               (person_income['incomeInThousands'] >= twenty_one_ranks[3]) & (person_income['incomeInThousands'] < twenty_one_ranks[4]), 
               (person_income['incomeInThousands'] >=  twenty_one_ranks[4]) & (person_income['incomeInThousands'] < twenty_one_ranks[5]),
               (person_income['incomeInThousands'] >=  twenty_one_ranks[5]) & (person_income['incomeInThousands'] < twenty_one_ranks[6]),
               (person_income['incomeInThousands'] >=  twenty_one_ranks[6]) & (person_income['incomeInThousands'] < twenty_one_ranks[7]),
               (person_income['incomeInThousands'] >=  twenty_one_ranks[7]) & (person_income['incomeInThousands'] < twenty_one_ranks[8]),
               (person_income['incomeInThousands'] >=  twenty_one_ranks[8]) & (person_income['incomeInThousands'] < twenty_one_ranks[9]),
               (person_income['incomeInThousands'] >=  twenty_one_ranks[9]) & (person_income['incomeInThousands'] <= twenty_one_ranks[10]),
              (person_income['incomeInThousands'] >=  twenty_one_ranks[10]) & (person_income['incomeInThousands'] <= twenty_one_ranks[11]),
              (person_income['incomeInThousands'] >=  twenty_one_ranks[11]) & (person_income['incomeInThousands'] <= twenty_one_ranks[12]),
              (person_income['incomeInThousands'] >=  twenty_one_ranks[12]) & (person_income['incomeInThousands'] <= twenty_one_ranks[13]),
              (person_income['incomeInThousands'] >=  twenty_one_ranks[13]) & (person_income['incomeInThousands'] <= twenty_one_ranks[14]),
              (person_income['incomeInThousands'] >=  twenty_one_ranks[14]) & (person_income['incomeInThousands'] <= twenty_one_ranks[15]),
              (person_income['incomeInThousands'] >=  twenty_one_ranks[15]) & (person_income['incomeInThousands'] <= twenty_one_ranks[16]),
              (person_income['incomeInThousands'] >=  twenty_one_ranks[16]) & (person_income['incomeInThousands'] <= twenty_one_ranks[17]),
              (person_income['incomeInThousands'] >=  twenty_one_ranks[17]) & (person_income['incomeInThousands'] <= twenty_one_ranks[18]),
              (person_income['incomeInThousands'] >=  twenty_one_ranks[18]) & (person_income['incomeInThousands'] <= twenty_one_ranks[19]),
              (person_income['incomeInThousands'] >=  twenty_one_ranks[19]) & (person_income['incomeInThousands'] <= twenty_one_ranks[20]),
              (person_income['incomeInThousands'] >=  twenty_one_ranks[20]) & (person_income['incomeInThousands'] <= twenty_one_ranks[21])]


choices = [0, 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20]

In [9]:
person_income['incRank'] = np.select(conditions, choices, default=None)

#### Ridehail Fleet Size Scenarios

In [13]:
%%time
key = "pilates-outputs/sfbay_5fleets_100price_100fleet_20230209/inexus/sfbay_5_fleets_scenario_fleet_size-100_2020__20230209.csv.gz"  #the path should be updated
obj = s3.get_object(Bucket="beam-outputs", Key=key)
sf_rh_5flt_1 = pd.read_csv(obj['Body'], compression = 'gzip',index_col='Unnamed: 0')



CPU times: total: 1min 52s
Wall time: 1min 52s


In [16]:
sf_rh_5flt_1[sf_rh_5flt_1['trip_mode_AS_trips'].isna()].shape

(21150, 269)

In [21]:
# Adding scenario info
sf_rh_5flt_1['scenario'] = "ridehail"
sf_rh_5flt_1['scenario'] = sf_rh_5flt_1['scenario'].astype("category")
sf_rh_5flt_1['lever'] = "fleet_size_price_comb"
sf_rh_5flt_1['lever'] = sf_rh_5flt_1['lever'].astype("category")
sf_rh_5flt_1['year'] = 2020
sf_rh_5flt_1['lever_position_fltsz'] = 1
sf_rh_5flt_1['lever_position_price'] = 1
sf_rh_5flt_1['lever_n_fleets'] = 5
sf_rh_5flt_1['fleetsize_uber'] = 0.00175524
sf_rh_5flt_1['fleetsize_lyft'] = 0.00112253
sf_rh_5flt_1['fleetsize_cruise'] = 0.00092022
sf_rh_5flt_1['fleetsize_flywheel'] = 0.00061348
sf_rh_5flt_1['fleetsize_waymo'] = 0.00030674

In [18]:
%%time
key = "pilates-outputs/sfbay_5fleets_100price_164fleet_20230209/inexus/sfbay_5_fleets_scenario_fleet_size-164_2020__20230209.csv.gz"  #the path should be updated
obj = s3.get_object(Bucket="beam-outputs", Key=key)
sf_rh_5flt_1p64 = pd.read_csv(obj['Body'], compression = 'gzip',index_col='Unnamed: 0')



CPU times: total: 2min 5s
Wall time: 2min 5s


In [22]:
# Adding scenario info
sf_rh_5flt_1p64['scenario'] = "ridehail"
sf_rh_5flt_1p64['scenario'] = sf_rh_5flt_1p64['scenario'].astype("category")
sf_rh_5flt_1p64['lever'] = "fleet_size_price_comb"
sf_rh_5flt_1p64['lever'] = sf_rh_5flt_1p64['lever'].astype("category")
sf_rh_5flt_1p64['year'] = 2020
sf_rh_5flt_1p64['lever_position_fltsz'] = 1.64
sf_rh_5flt_1p64['lever_position_price'] = 1
sf_rh_5flt_1p64['lever_n_fleets'] = 5
sf_rh_5flt_1p64['fleetsize_uber'] = 0.00287777
sf_rh_5flt_1p64['fleetsize_lyft'] = 0.00184043
sf_rh_5flt_1p64['fleetsize_cruise'] = 0.00150873
sf_rh_5flt_1p64['fleetsize_flywheel'] = 0.00100582
sf_rh_5flt_1p64['fleetsize_waymo'] = 0.00050291

In [20]:
%%time
key = "pilates-outputs/sfbay_5fleets_100price_200fleet_20230209/inexus/sfbay_5_fleets_scenario_fleet_size-200_2020__20230209.csv.gz"  #the path should be updated
obj = s3.get_object(Bucket="beam-outputs", Key=key)
sf_rh_5flt_2 = pd.read_csv(obj['Body'], compression = 'gzip',index_col='Unnamed: 0')



CPU times: total: 2min 13s
Wall time: 2min 14s


In [23]:
# Adding scenario info
sf_rh_5flt_2['scenario'] = "ridehail"
sf_rh_5flt_2['scenario'] = sf_rh_5flt_2['scenario'].astype("category")
sf_rh_5flt_2['lever'] = "fleet_size_price_comb"
sf_rh_5flt_2['lever'] = sf_rh_5flt_2['lever'].astype("category")
sf_rh_5flt_2['year'] = 2020
sf_rh_5flt_2['lever_position_fltsz'] = 2
sf_rh_5flt_2['lever_position_price'] = 1
sf_rh_5flt_2['lever_n_fleets'] = 5
sf_rh_5flt_2['fleetsize_uber'] = 0.00351047
sf_rh_5flt_2['fleetsize_lyft'] = 0.00224507
sf_rh_5flt_2['fleetsize_cruise'] = 0.00184043
sf_rh_5flt_2['fleetsize_flywheel'] = 0.00122696
sf_rh_5flt_2['fleetsize_waymo'] = 0.00061348

In [13]:
%%time
key = "pilates-outputs/.............../inexus/..............."  #the path should be updated
obj = s3.get_object(Bucket="beam-outputs", Key=key)
sf_rh_5flt_4 = pd.read_csv(obj['Body'], compression = 'gzip',index_col='Unnamed: 0')



CPU times: total: 2min 7s
Wall time: 2min 8s


In [16]:
# Adding scenario info
sf_rh_5flt_4['scenario'] = "ridehail"
sf_rh_5flt_4['scenario'] = sf_rh_5flt_4['scenario'].astype("category")
sf_rh_5flt_4['lever'] = "fleet_size_price_comb"
sf_rh_5flt_4['lever'] = sf_rh_5flt_4['lever'].astype("category")
sf_rh_5flt_4['year'] = 2020
sf_rh_5flt_4['lever_position_fltsz'] = 4
sf_rh_5flt_4['lever_position_price'] = 1
sf_rh_5flt_4['lever_n_fleets'] = 5
sf_rh_5flt_4['fleetsize_uber'] = 0.00702094
sf_rh_5flt_4['fleetsize_lyft'] = 0.00449014
sf_rh_5flt_4['fleetsize_cruise'] = 0.00368087
sf_rh_5flt_4['fleetsize_flywheel'] = 0.00245391
sf_rh_5flt_4['fleetsize_waymo'] = 0.00122696

In [24]:
%%time
key = "pilates-outputs/sfbay_5fleets_100price_1000fleet_20230209/inexus/sfbay_5_fleets_scenario_fleet_size-1000_2020__20230209.csv.gz"  #the path should be updated
obj = s3.get_object(Bucket="beam-outputs", Key=key)
sf_rh_5flt_10 = pd.read_csv(obj['Body'], compression = 'gzip',index_col='Unnamed: 0')



CPU times: total: 2min 21s
Wall time: 2min 21s


In [25]:
# Adding scenario info
sf_rh_5flt_10['scenario'] = "ridehail"
sf_rh_5flt_10['scenario'] = sf_rh_5flt_10['scenario'].astype("category")
sf_rh_5flt_10['lever'] = "fleet_size_price_comb"
sf_rh_5flt_10['lever'] = sf_rh_5flt_10['lever'].astype("category")
sf_rh_5flt_10['year'] = 2020
sf_rh_5flt_10['lever_position_fltsz'] = 10
sf_rh_5flt_10['lever_position_price'] = 1
sf_rh_5flt_10['lever_n_fleets'] = 5
sf_rh_5flt_10['fleetsize_uber'] = 0.01755235
sf_rh_5flt_10['fleetsize_lyft'] = 0.01122534
sf_rh_5flt_10['fleetsize_cruise'] = 0.00920217
sf_rh_5flt_10['fleetsize_flywheel'] = 0.00613478
sf_rh_5flt_10['fleetsize_waymo'] = 0.00306739

In [26]:
frames = [sfbase, sf_rh_5flt_1, sf_rh_5flt_1p64, sf_rh_5flt_2,
          sf_rh_5flt_10]
sf_stacked = pd.concat(frames)

In [27]:
sf_stacked = sf_stacked.drop(['has_school_kid','has_preschool_kid','has_retiree','has_non_worker','adult','hispanic','relate',
                             'pstudent','pemploy','ptype','p_hispanic','age_16_p','age_16_to_19',
                             'person_age','MAR','hours','hispanic.1','student',
                             'worker','stop_frequency','composition','tdd','number_of_participants','tour_type_count',
                             'tour_type_num','tour_num','tour_count', 'outbound', 'trip_num','num_under16_not_at_school',
                             'family','non_family','num_young_adults','num_college_age','num_children_16_to_17',
                              'num_children_5_to_15','num_young_children'], axis=1)

In [28]:
sf_stacked = sf_stacked.drop(['vehicleIds_estimate','parent_tour_id','atwork_subtour_frequency','household_id_y',
                              'male','female', 'has_driving_kid','has_full_time','has_part_time',
                              'has_university','student_is_employed', 'nonstudent_to_school', 'is_student','is_gradeschool',
                              'is_gradeschool','is_highschool','is_university','school_segment','is_worker',  
                              'distance_to_school','roundtrip_auto_time_to_school',
                              'distance_to_work','workplace_in_cbd', 'work_taz_area_type', 'hh_race_of_head',
                             'roundtrip_auto_time_to_work','work_auto_savings_ratio','cdap_activity', 'travel_active', 
                              'under16_not_at_school','has_preschool_kid_at_home', 'gt2','hispanic_head',
                             'has_school_kid_at_home','work_and_school_and_worker', 'age_of_head', 'race_of_head',
                              'work_and_school_and_student', 'VEHICL','hh_children','hh_age_of_head','num_workers',
                              'gt55','seniors', 'recent_mover', 'hh_workers', 'hispanic_status_of_head', 'hh_seniors', 
                             'hh_type', 'HHT', 'sample_rate', 'chunk_id', 'income_segment', 'num_non_workers', 
                             'num_drivers', 'num_adults', 'num_children', 'hh_work_auto_savings_ratio', 'num_travel_active', 
                             'num_travel_active_adults','num_travel_active_preschoolers', 'num_travel_active_children', 
                              'num_travel_active_non_preschoolers', 'participates_in_jtf_model', 'joint_tour_frequency',
                              'num_hh_joint_tours'], axis=1)

In [29]:
sf_stacked = sf_stacked.drop(['serialno'], axis=1)

#### Stacked File

In [30]:
#stacked_tr_fr_path = "https://beam-core-act.s3.amazonaws.com/deepDive/CleanData/SanFrancisco/Stacked/"
#sf_stacked_tr_fr = pd.read_csv(stacked_tr_fr_path + 'sf_2018_stacked_tr_fr.csv')

In [31]:
sf_stacked = sf_stacked.sort_values(by=['IDMerged', 'tripIndex', 'lever_position_fltsz']).reset_index(drop=True)

In [32]:
# shift column 'person' to first position
#first_column = sf_stacked.pop('scenario')
#second_column = sf_stacked.pop('lever')
#third_column = sf_stacked.pop('lever_position')
#fourth_column = sf_stacked.pop('logsum_trip_mode_AS_trips')
# insert column using insert(position,column_name,first_column) function
#sf_stacked.insert(2, 'scenario', first_column)
#sf_stacked.insert(3, 'lever', second_column)
#sf_stacked.insert(4, 'lever_position', third_column)
#sf_stacked.insert(5, 'logsum_trip_mode_AS_trips', fourth_column)

In [36]:
# Adding Price lever position
#conditions  = [(sf_stacked['lever_position'] == 0.125),
#               (sf_stacked['lever_position'] == 0.25),
#               (sf_stacked['lever_position'] == 0.5),
#               (sf_stacked['lever_position'] == 1),
#               (sf_stacked['lever_position'] == 2),
#               (sf_stacked['lever_position'] == 4),
#               (sf_stacked['lever_position'] == 10),
#               (sf_stacked['lever_position'] == 27.5)]
#
#choices = [0.125, 0.25, 0.5, 1, 2, 4, 10, 27.5]

In [37]:
#sf_stacked['lever_position_fleetsize'] = np.select(conditions, choices, default=np.nan)

In [38]:
#sf_stacked['lever_position_price'] = 1

In [39]:
#sf_stacked_tr_fr['logsum_relative_to_baseline'] = sf_stacked_tr_fr.groupby(['IDMerged','tripIndex'])['logsum_trip_mode_AS_trips'].apply(lambda x: x.diff())

In [40]:
#sf_stacked_tr_fr['logsum_relative_to_baseline'] = sf_stacked_tr_fr['logsum_trip_mode_AS_trips'] - sf_stacked_tr_fr.loc[sf_stacked_tr_fr.groupby(['IDMerged','tripIndex'])['lever_position'].transform('idxmin'), 'logsum_trip_mode_AS_trips'].values

In [37]:
%%time
sf_stacked['logsum_relative_to_baseline'] = sf_stacked['logsum_trip_Potential_INEXUS'] - (sf_stacked['logsum_trip_Potential_INEXUS'].where((sf_stacked['lever_position_fltsz'].eq(1.0)) & (sf_stacked['lever_n_fleets'] == 2))
                                                                                          .groupby([sf_stacked['IDMerged'] , sf_stacked['tripIndex']])
                                                                                          .transform('first')
                                                                                          .convert_dtypes())

CPU times: total: 3.72 s
Wall time: 3.5 s


In [38]:
sf_stacked['door_to_door_time_relative_to_baseline'] = sf_stacked['duration_door_to_door'] - (sf_stacked['duration_door_to_door'].where((sf_stacked['lever_position_fltsz'].eq(1.0)) & (sf_stacked['lever_n_fleets'] == 2))
                                                                                          .groupby([sf_stacked['IDMerged'] , sf_stacked['tripIndex']])
                                                                                          .transform('first')
                                                                                          .convert_dtypes())

In [39]:
# Drop unused columns
sf_stacked = sf_stacked.drop(['sf_detached', 'household_id_x','destination_logsum_y','logsum_tours_mode_AS_tours',
                              'primary_purpose_y','PNUM','work_auto_savings','mandatory_tour_frequency'], axis=1)

In [43]:
sf_stacked[sf_stacked['replanning_status'] > 0].shape

(119291, 171)

In [44]:
# Add the mandatory category column
mandatory = ['work' , 'univ', 'school']
sf_stacked['mandatoryCat'] = np.where((sf_stacked.actEndType.isin(mandatory)) & (sf_stacked.actStartType.isin(mandatory)), 'from_M_to_M' , None)

In [45]:
sf_stacked['mandatoryCat'] = np.where((sf_stacked.actEndType == 'Home') & (sf_stacked.actStartType.isin(mandatory)), 'from_H_to_M' , sf_stacked['mandatoryCat'])

In [46]:
sf_stacked['mandatoryCat'] = np.where((sf_stacked.actEndType.isin(mandatory)) & (sf_stacked.actStartType == "Home"), 'from_M_to_H' , sf_stacked['mandatoryCat'])

In [47]:
non_mandatory = ['othmaint' , 'othdiscr', 'escort', 'eatout', 'social', 'shopping', 'atwork']
sf_stacked['mandatoryCat'] = np.where((sf_stacked.actEndType.isin(non_mandatory)) & (sf_stacked.actStartType.isin(non_mandatory)), 'from_N_to_N' , sf_stacked['mandatoryCat'])

In [48]:
# Drop unused columns
sf_stacked = sf_stacked.drop(['num_work_tours','num_joint_tours', 'non_mandatory_tour_frequency',
                                          'num_non_mand','num_escort_tours','num_eatout_tours','num_shop_tours',
                                          'num_maint_tours','num_discr_tours','num_social_tours','num_non_escort_tours'], axis=1)

In [53]:
#sf_stacked[sf_stacked['Realized_INEXUS'].isna()].head(10000).to_csv('C:/Shared-Work/Data/CleanData/ASIM_BEAM_Merged/realizedIN_empty.csv', index = False)

In [54]:
#%%time
#sf_stacked_tr_fr.to_csv('s3://beam-core-act/deepDive/CleanData/SanFrancisco/Stacked/sf_2018_stacked_tr_fr_v3.csv', index=False)

In [53]:
sf_stacked[sf_stacked['SHARED3FREE'].isna()].shape

(0, 161)

In [51]:
sf_stacked = sf_stacked[sf_stacked['person_sex'].notna()]

In [67]:
# Mapping BEAM and ASIM modes
conditions  = [(sf_stacked['mode_choice_actual_BEAM'] == 'walk')&(sf_stacked['trip_mode_AS_trips'] == 'WALK'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'bike')&(sf_stacked['trip_mode_AS_trips'] == 'BIKE'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'hov3_teleportation')&(sf_stacked['trip_mode_AS_trips'] == 'SHARED3FREE'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'hov3_teleportation')&(sf_stacked['trip_mode_AS_trips'] == 'SHARED3PAY'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'car_hov3')&(sf_stacked['trip_mode_AS_trips'] == 'SHARED3FREE'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'car_hov3')&(sf_stacked['trip_mode_AS_trips'] == 'SHARED3PAY'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'hov2_teleportation')&(sf_stacked['trip_mode_AS_trips'] == 'SHARED2FREE'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'hov2_teleportation')&(sf_stacked['trip_mode_AS_trips'] == 'SHARED2PAY'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'car_hov2')&(sf_stacked['trip_mode_AS_trips'] == 'SHARED2PAY'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'car_hov2')&(sf_stacked['trip_mode_AS_trips'] == 'SHARED2FREE'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'car')&(sf_stacked['trip_mode_AS_trips'] == 'DRIVEALONEFREE'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'car')&(sf_stacked['trip_mode_AS_trips'] == 'DRIVEALONEPAY'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'walk_transit')&(sf_stacked['trip_mode_AS_trips'] == 'WALK_LRF'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'walk_transit')&(sf_stacked['trip_mode_AS_trips'] == 'WALK_LOC'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'walk_transit')&(sf_stacked['trip_mode_AS_trips'] == 'WALK_EXP'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'walk_transit')&(sf_stacked['trip_mode_AS_trips'] == 'WALK_HVY'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'walk_transit')&(sf_stacked['trip_mode_AS_trips'] == 'WALK_COM'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'ride_hail')&(sf_stacked['trip_mode_AS_trips'] == 'TNC_SINGLE'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'ride_hail')&(sf_stacked['trip_mode_AS_trips'] == 'TNC_SHARED'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'ride_hail')&(sf_stacked['trip_mode_AS_trips'] == 'TAXI'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'drive_transit')&(sf_stacked['trip_mode_AS_trips'] == 'DRIVE_COM'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'drive_transit')&(sf_stacked['trip_mode_AS_trips'] == 'DRIVE_LRF'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'drive_transit')&(sf_stacked['trip_mode_AS_trips'] == 'DRIVE_LOC'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'drive_transit')&(sf_stacked['trip_mode_AS_trips'] == 'DRIVE_EXP'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'drive_transit')&(sf_stacked['trip_mode_AS_trips'] == 'DRIVE_HVY'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'ride_hail_pooled')&(sf_stacked['trip_mode_AS_trips'] == 'TNC_SINGLE'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'ride_hail_pooled')&(sf_stacked['trip_mode_AS_trips'] == 'TNC_SHARED'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'ride_hail_pooled')&(sf_stacked['trip_mode_AS_trips'] == 'TAXI'),
       
               (sf_stacked['mode_choice_actual_BEAM'] == 'hov2_teleportation')&((sf_stacked['trip_mode_AS_trips'] == 'SHARED3FREE')|
               (sf_stacked['trip_mode_AS_trips'] == 'SHARED3PAY')|(sf_stacked['trip_mode_AS_trips'] == 'DRIVEALONEFREE')|
               (sf_stacked['trip_mode_AS_trips'] == 'DRIVEALONEPAY')),
               
               (sf_stacked['mode_choice_actual_BEAM'] == 'car_hov2')&((sf_stacked['trip_mode_AS_trips'] == 'SHARED3PAY')|
               (sf_stacked['trip_mode_AS_trips'] == 'SHARED3FREE')|(sf_stacked['trip_mode_AS_trips'] == 'DRIVEALONEFREE')|
               (sf_stacked['trip_mode_AS_trips'] == 'DRIVEALONEPAY')),
               
               (sf_stacked['mode_choice_actual_BEAM'] == 'car_hov3')&((sf_stacked['trip_mode_AS_trips'] == 'SHARED2PAY')|
               (sf_stacked['trip_mode_AS_trips'] == 'SHARED2FREE')|(sf_stacked['trip_mode_AS_trips'] == 'DRIVEALONEFREE')|
               (sf_stacked['trip_mode_AS_trips'] == 'DRIVEALONEPAY')),   
               
               (sf_stacked['mode_choice_actual_BEAM'] == 'hov3_teleportation')&((sf_stacked['trip_mode_AS_trips'] == 'DRIVEALONEPAY')|
               (sf_stacked['trip_mode_AS_trips'] == 'DRIVEALONEFREE')|(sf_stacked['trip_mode_AS_trips'] == 'SHARED2PAY')|(sf_stacked['trip_mode_AS_trips'] == 'SHARED2FREE')),
               
               (sf_stacked['mode_choice_actual_BEAM'] == 'car')&((sf_stacked['trip_mode_AS_trips'] == 'SHARED3FREE')|(sf_stacked['trip_mode_AS_trips'] == 'SHARED2FREE')|
               (sf_stacked['trip_mode_AS_trips'] == 'SHARED3PAY')|(sf_stacked['trip_mode_AS_trips'] == 'SHARED2PAY')|(sf_stacked['trip_mode_AS_trips'] == 'DRIVE_LOC')|
               (sf_stacked['trip_mode_AS_trips'] == 'DRIVE_LRF')|(sf_stacked['trip_mode_AS_trips'] == 'DRIVE_EXP')|
               (sf_stacked['trip_mode_AS_trips'] == 'DRIVE_HVY')|(sf_stacked['trip_mode_AS_trips'] == 'DRIVE_COM')),
               
               (sf_stacked['mode_choice_actual_BEAM'] == 'walk')&((sf_stacked['trip_mode_AS_trips'] == 'WALK_LRF')|
               (sf_stacked['trip_mode_AS_trips'] == 'WALK_LOC')|(sf_stacked['trip_mode_AS_trips'] == 'WALK_EXP')|
               (sf_stacked['trip_mode_AS_trips'] == 'WALK_HVY')|(sf_stacked['trip_mode_AS_trips'] == 'WALK_COM')),
               
               (sf_stacked['mode_choice_actual_BEAM'] == 'walk_transit')&(sf_stacked['trip_mode_AS_trips'] == 'WALK'),
               
               ((sf_stacked['mode_choice_actual_BEAM'] == 'ride_hail_pooled')|(sf_stacked['mode_choice_actual_BEAM'] == 'ride_hail'))
               &(~((sf_stacked['trip_mode_AS_trips'] == 'TAXI')|(sf_stacked['trip_mode_AS_trips'] == 'TNC_SHARED')|
                (sf_stacked['trip_mode_AS_trips'] == 'TNC_SINGLE'))),
               
               (sf_stacked['mode_choice_actual_BEAM'] == 'walk_transit')&(~((sf_stacked['trip_mode_AS_trips'] == 'WALK_LRF')|
               (sf_stacked['trip_mode_AS_trips'] == 'WALK_LOC')|(sf_stacked['trip_mode_AS_trips'] == 'WALK_EXP')|
               (sf_stacked['trip_mode_AS_trips'] == 'WALK_HVY')|(sf_stacked['trip_mode_AS_trips'] == 'WALK_COM')|
               (sf_stacked['trip_mode_AS_trips'] == 'WALK'))),
               
               (sf_stacked['mode_choice_actual_BEAM'] == 'bike_transit'),
               
               (sf_stacked['mode_choice_actual_BEAM'] == 'car')&((sf_stacked['trip_mode_AS_trips'] == 'TNC_SHARED')|
               (sf_stacked['trip_mode_AS_trips'] == 'TNC_SINGLE')|(sf_stacked['trip_mode_AS_trips'] == 'TAXI'))]
 
                                                                      
                                                                                                                 
                                                                      
choices = [sf_stacked['WALK'], sf_stacked['BIKE'], sf_stacked['SHARED3FREE'], sf_stacked['SHARED3PAY'],
           sf_stacked['SHARED3FREE'], sf_stacked['SHARED3PAY'], sf_stacked['SHARED2FREE'], 
           sf_stacked['SHARED2PAY'], sf_stacked['SHARED2PAY'], sf_stacked['SHARED2FREE'], 
           sf_stacked['DRIVEALONEFREE'], sf_stacked['DRIVEALONEPAY'], sf_stacked['WALK_LRF'], 
           sf_stacked['WALK_LOC'], sf_stacked['WALK_EXP'], sf_stacked['WALK_HVY'], sf_stacked['WALK_COM'],
           sf_stacked['TNC_SINGLE'], 
           sf_stacked['TNC_SHARED'], sf_stacked['TAXI'],
           sf_stacked['DRIVE_COM'], sf_stacked['DRIVE_LRF'], sf_stacked['DRIVE_LOC'], sf_stacked['DRIVE_EXP'],
           sf_stacked['DRIVE_HVY'], sf_stacked['TNC_SINGLE'], sf_stacked['TNC_SHARED'], sf_stacked['TAXI'],
          sf_stacked[['DRIVEALONEFREE','DRIVEALONEPAY','SHARED2FREE','SHARED3FREE','SHARED2PAY','SHARED3PAY','DRIVE_LOC','DRIVE_LRF','DRIVE_EXP','DRIVE_HVY','DRIVE_COM']].max(axis=1),
          sf_stacked[['DRIVEALONEFREE','DRIVEALONEPAY','SHARED2FREE','SHARED3FREE','SHARED2PAY','SHARED3PAY','DRIVE_LOC','DRIVE_LRF','DRIVE_EXP','DRIVE_HVY','DRIVE_COM']].max(axis=1),
          sf_stacked[['DRIVEALONEFREE','DRIVEALONEPAY','SHARED2FREE','SHARED3FREE','SHARED2PAY','SHARED3PAY','DRIVE_LOC','DRIVE_LRF','DRIVE_EXP','DRIVE_HVY','DRIVE_COM']].max(axis=1),
          sf_stacked[['DRIVEALONEFREE','DRIVEALONEPAY','SHARED2FREE','SHARED3FREE','SHARED2PAY','SHARED3PAY','DRIVE_LOC','DRIVE_LRF','DRIVE_EXP','DRIVE_HVY','DRIVE_COM']].max(axis=1),
          sf_stacked[['DRIVEALONEFREE','DRIVEALONEPAY','SHARED2FREE','SHARED3FREE','SHARED2PAY','SHARED3PAY','DRIVE_LOC','DRIVE_LRF','DRIVE_EXP','DRIVE_HVY','DRIVE_COM']].max(axis=1),
          sf_stacked[['WALK','WALK_LOC','WALK_LRF','WALK_EXP','WALK_HVY','WALK_COM']].max(axis=1),
          sf_stacked[['WALK','WALK_LOC','WALK_LRF','WALK_EXP','WALK_HVY','WALK_COM']].max(axis=1),
          sf_stacked[['TAXI','TNC_SINGLE','TNC_SHARED']].max(axis=1),
          sf_stacked[['WALK','WALK_LOC','WALK_LRF','WALK_EXP','WALK_HVY','WALK_COM','DRIVE_LOC','DRIVE_LRF','DRIVE_EXP','DRIVE_HVY','DRIVE_COM']].max(axis=1),
          sf_stacked[['BIKE','WALK_LOC','WALK_LRF','WALK_EXP','WALK_HVY','WALK_COM','DRIVE_LOC','DRIVE_LRF','DRIVE_EXP','DRIVE_HVY','DRIVE_COM']].max(axis=1),
          sf_stacked[['DRIVEALONEFREE','DRIVEALONEPAY','SHARED2FREE','SHARED3FREE','SHARED2PAY','SHARED3PAY','DRIVE_LOC','DRIVE_LRF','DRIVE_EXP','DRIVE_HVY','DRIVE_COM']].max(axis=1)]

In [68]:
sf_stacked['Realized_INEXUS'] = np.select(conditions, choices, default=np.nan)

In [None]:
#sf_stacked = sf_stacked.drop(['Realized_INEXUS'], axis=1)

In [69]:
sf_stacked['Realized_INEXUS'] = np.where((sf_stacked['Realized_INEXUS'].isna())&((sf_stacked['mode_choice_actual_BEAM'] == 'car_hov2')|
                                          (sf_stacked['mode_choice_actual_BEAM'] == 'car_hov3')|
                                          (sf_stacked['mode_choice_actual_BEAM'] == 'hov2_teleportation')|
                                          (sf_stacked['mode_choice_actual_BEAM'] == 'hov3_teleportation')|
                                          (sf_stacked['mode_choice_actual_BEAM'] == 'car')|
                                          (sf_stacked['mode_choice_actual_BEAM'] == 'drive_transit')), 
                                           sf_stacked[['DRIVEALONEFREE','DRIVEALONEPAY','SHARED2FREE','SHARED3FREE','SHARED2PAY','SHARED3PAY','DRIVE_LOC','DRIVE_LRF','DRIVE_EXP','DRIVE_HVY','DRIVE_COM']].max(axis=1), sf_stacked['Realized_INEXUS'])

In [70]:
sf_stacked['Realized_INEXUS'] = np.where((sf_stacked['Realized_INEXUS'].isna())&((sf_stacked['mode_choice_actual_BEAM'] == 'walk')|
                                         (sf_stacked['mode_choice_actual_BEAM'] == 'walk_transit')), 
                                          sf_stacked[['WALK','WALK_LOC','WALK_LRF','WALK_EXP','WALK_HVY','WALK_COM','DRIVE_LOC','DRIVE_LRF','DRIVE_EXP','DRIVE_HVY','DRIVE_COM']].max(axis=1),sf_stacked['Realized_INEXUS'])

In [71]:
sf_stacked['Realized_INEXUS'] = np.where(sf_stacked['Realized_INEXUS'].isna()&(sf_stacked['mode_choice_actual_BEAM'] == 'bike'), 
                                         sf_stacked[['BIKE','WALK','WALK_LOC','WALK_LRF','WALK_EXP','WALK_HVY','WALK_COM']].max(axis=1),sf_stacked['Realized_INEXUS'])

In [72]:
sf_stacked[sf_stacked['Realized_INEXUS'].isna()].shape

(0, 162)

In [74]:
sf_stacked[sf_stacked['Realized_INEXUS']<-1000].shape

(8658, 162)

In [None]:
#sf_stacked_tr_fr['Realized_INEXUS'] = np.where((sf_stacked_tr_fr['Realized_INEXUS'].isna())&
                                               #(sf_stacked_tr_fr['mode_choice_actual_BEAM'] == 'walk_transit')&
                                               #(sf_stacked_tr_fr['vehicleIds'].str.contains('(caltrain)')), 
                                               #sf_stacked_tr_fr['WALK_COM'], sf_stacked_tr_fr['Realized_INEXUS'])

In [75]:
sf_stacked = sf_stacked[sf_stacked['Realized_INEXUS'].notna()]

In [76]:
%%time
sf_stacked['realized_relative_to_baseline'] = sf_stacked['Realized_INEXUS'] - (sf_stacked['Realized_INEXUS'].where((sf_stacked['lever_position_fltsz'].eq(1.0)) & (sf_stacked['lever_n_fleets'] == 2))
                                                                                          .groupby([sf_stacked['IDMerged'] , sf_stacked['tripIndex']])
                                                                                          .transform('first')
                                                                                          .convert_dtypes())

CPU times: total: 1min 29s
Wall time: 1min 30s


In [None]:
#sf_stacked_tr_fr[sf_stacked_tr_fr['Realized_INEXUS'].isna()].to_csv('C:/Shared-Work/Data/CleanData/ASIM_BEAM_Merged/realizedIN_empty.csv', index = False)

#### Social INEXUS

In [None]:
# add a lower and upper range for the bins in pd.cut
#quartiles = [0] + quartiles + [float('inf')]

In [None]:
# add a quartiles column to groups, using pd.cut
#sf_stacked['income_quartiles'] = pd.cut(sf_stacked['income'], bins=quartiles, labels=['1stQ', '2ndQ', '3rdQ', '4thQ'])

In [79]:
sf_stacked['alpha'] = 1

In [80]:
sf_stacked['socialCarbonCost'] = sf_stacked['emission_marginal']*185 # 51 or 102

In [81]:
sf_stacked['Social_INEXUS'] = (sf_stacked['Realized_INEXUS']/sf_stacked['alpha']) - sf_stacked['socialCarbonCost']

In [82]:
sf_stacked['Potential_INEXUS_in_dollar'] = sf_stacked['logsum_trip_Potential_INEXUS']/sf_stacked['alpha']

In [83]:
sf_stacked['Realized_INEXUS_in_dollar'] = sf_stacked['Realized_INEXUS']/sf_stacked['alpha']

#### The following chunks are useful but should be checked in the next runs (they resulted in exact same realized and potential inexus in dollar

In [None]:
# shift columns to first position
#first_column = sf_stacked.pop('Social_INEXUS')
# insert column using insert(position,column_name,first_column) function
#sf_stacked.insert(6, 'Social_INEXUS', first_column)

In [None]:
# shift columns to first position
#first_column = sf_stacked.pop('Potential_INEXUS_in_dollar')
#second_column = sf_stacked.pop('Realized_INEXUS_in_dollar')
# insert column using insert(position,column_name,first_column) function
#sf_stacked.insert(7, 'Potential_INEXUS_in_dollar', first_column)
#sf_stacked.insert(8, 'Realized_INEXUS_in_dollar', first_column)

In [103]:
sf_stacked = sf_stacked.drop(['Realized_INEXUS_in_dollar','Potential_INEXUS_in_dollar','alpha','hh_value_of_time','lcm_county_id',
                              'tenure_mover','hh_income','median_value_of_time','hhsize','hh_size',
                              'tenure','num_mand','free_parking_at_work',
                             'workplace_location_logsum','workplace_taz','school_taz','TAZ_x',
                             'work_at_home','sex','race_id','edu','earning','duration',
                             'tour_type','depart','destination_x', 'origin_x','purpose',
                             'trip_count','primary_purpose_x'], axis=1)

In [106]:
sf_stacked = sf_stacked.drop(['DRIVEALONEPAY','DRIVEALONEFREE',
                              'SHARED2FREE','SHARED2PAY',
                             'SHARED3FREE','SHARED3PAY','WALK','BIKE',
                             'WALK_LOC','WALK_LRF','WALK_EXP','WALK_HVY','WALK_COM','DRIVE_LOC',
                             'DRIVE_LRF','DRIVE_EXP','DRIVE_HVY', 'DRIVE_COM','TAXI',
                             'TNC_SINGLE','TNC_SHARED'], axis=1)

In [107]:
%%time
sf_stacked.to_csv('s3://beam-core-act/deepDive/CleanData/SanFrancisco/Stacked/sf_2020_stacked_rh_5fleet_price_2_9_23.csv', index=False)

CPU times: total: 16min 53s
Wall time: 21min 49s


In [88]:
%%time
key = "pilates-outputs/sfbay_5fleets_100price_164fleet_20230209/beam/year-2020-iteration-3/ITERS/it.0/0.events.csv.gz"  #the path should be updated
obj = s3.get_object(Bucket="beam-outputs", Key=key)
sf_rh_5flt_1p64_events = pd.read_csv(obj['Body'], compression = 'gzip')



CPU times: total: 10min 26s
Wall time: 13min 46s


In [99]:
sf_rh_5flt_1p64_events['mode'].unique()

array([nan, 'bus', 'tram', 'subway', 'rail', 'car', 'hov3_teleportation',
       'car_hov2', 'hov2_teleportation', 'walk', 'car_hov3', 'bike',
       'walk_transit', 'ride_hail', 'ride_hail_pooled', 'drive_transit',
       'cable_car', 'bike_transit', 'ferry'], dtype=object)

In [101]:
sf_rh_5flt_1p64_events[sf_rh_5flt_1p64_events['mode']=='ride_hail'].head()

Unnamed: 0,person,link,legMode,tripId,time,type,parkingTaz,chargingPointType,pricingModel,parkingType,locationY,locationX,cost,driver,vehicle,primaryFuelLevel,secondaryFuelLevel,price,mode,currentTourMode,expectedMaximumUtility,availableAlternatives,location,personalVehicleAvailable,length,tourIndex,legModes,legVehicleIds,currentActivity,nextActivity,vehicleType,shiftStatus,parkingZoneId,fuel,duration,actType,links,numPassengers,primaryFuel,riders,toStopIndex,fromStopIndex,seatingCapacity,tollPaid,endY,endX,startY,startX,capacity,arrivalTime,departureTime,linkTravelTime,secondaryFuel,secondaryFuelType,primaryFuelType,facility,incentive,tollCost,netCost,departTime,requireWheelchair,reason,score
895282,3510755.0,,,1151527865.0,18000.0,ModeChoice,,,,,,,,,,,,,ride_hail,ride_hail,,RIDE_HAIL,141206.0,False,5609.112,1.0,"WALK,CAR,WALK","body-3510755,rideHailVehicle-3247829@Lyft,body...",Home,othmaint,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
896435,6028935.0,,,1977490905.0,18090.0,ModeChoice,,,,,,,,,,,,,ride_hail,ride_hail,,RIDE_HAIL,55718.0,False,3755.244,1.0,"WALK,CAR,WALK","body-6028935,rideHailVehicle-6037643@Uber,body...",Home,othmaint,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
896436,4794778.0,,,1572687409.0,18054.0,ModeChoice,,,,,,,,,,,,,ride_hail,ride_hail,,RIDE_HAIL,121066.0,False,1567.269,1.0,"WALK,CAR,WALK","body-4794778,rideHailVehicle-4780935@Lyft,body...",Home,othmaint,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
896492,1118143.0,,,366750977.0,18072.0,ModeChoice,,,,,,,,,,,,,ride_hail,ride_hail,,RIDE_HAIL,107216.0,False,3261.645,1.0,"WALK,CAR,WALK","body-1118143,rideHailVehicle-1127566@Uber,body...",Home,escort,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
896522,4424703.0,,,1451302633.0,18093.0,ModeChoice,,,,,,,,,,,,,ride_hail,ride_hail,,RIDE_HAIL,63520.0,False,1416.933,1.0,"WALK,CAR,WALK","body-4424703,rideHailVehicle-4339463@Cruise,bo...",Home,eatout,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [None]:
# Reducing the size of the stacked file
dtypes = sf_stacked_rh_price.dtypes
for column in sf_stacked_rh_price.columns:
    if str(dtypes[column]) == 'int64':
        sf_stacked_rh_price[column] = sf_stacked_rh_price[column].astype(np.int32)
    elif str(dtypes[column]) == 'float64':
        sf_stacked_rh_price[column] = sf_stacked_rh_price[column].astype(np.float32)