In [1]:
import os
import os.path as op
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
%matplotlib inline
import seaborn as sns
sns.set_theme(style="darkgrid", color_codes=True)
sns.set(font_scale=1.35, style="ticks") #set styling preferences
import statsmodels.api as sm
from scipy import stats
import math
from math import pi
from pandas.api.types import is_string_dtype
from pandas.api.types import is_numeric_dtype
from scipy.spatial.distance import cdist
from scipy.cluster.vq import kmeans2,vq, whiten
import geopandas as gpd
import h5py
import boto.s3
import glob
import boto3
from zipfile import ZipFile
import shutil

In [2]:
# Show all columns and rows
pd.options.display.max_columns = None
pd.options.display.max_rows = None

#### Baseline

In [8]:
%%time
s3 = boto3.client("s3")
key = "pilates-outputs/sfbay_baseline_20230209/inexus/sfbay_baselineJan2023_Baseline-0_2020__20230209.csv.gz"  #the path should be updated
obj = s3.get_object(Bucket="beam-outputs", Key=key)
sfbase = pd.read_csv(obj['Body'], compression = 'gzip',index_col='Unnamed: 0')



CPU times: total: 1min 39s
Wall time: 1min 39s


In [15]:
# Adding scenario info
sfbase['scenario'] = "baseline"
sfbase['scenario'] = sfbase['scenario'].astype("category")
sfbase['lever'] = "default"
sfbase['lever'] = sfbase['lever'].astype("category")
sfbase['year'] = 2020
sfbase['lever_position_fltsz'] = 1
sfbase['lever_position_price'] = 1
sfbase['lever_n_fleets'] = 2
sfbase['fleetsize_uber'] = 0.00287777
sfbase['fleetsize_lyft'] = 0.00184043
sfbase['fleetsize_cruise'] = 0
sfbase['fleetsize_flywheel'] = 0
sfbase['fleetsize_waymo'] = 0

In [10]:
sfbase[sfbase['person_sex'].isna()].shape

(21407, 279)

In [11]:
#baseline2018 = "https://beam-core-act.s3.amazonaws.com/beam-outputs/pilates-outputs/sfbay-baseline-20221220/inexus"
#sfbase = pd.read_csv(baseline2018 + 'sfbay_baseline_default-1.0_2012__20221219.csv.gz')

In [12]:
# Showing the entire number in dataframe
pd.set_option('float_format', '{:f}'.format)

In [8]:
#describe = sfbase.describe()

In [9]:
#describe.to_csv('C:/Shared-Work/Data/CleanData/ASIM_BEAM_Merged/describe.csv')

#### Ridehail Fleet Size Scenarios

In [13]:
%%time
key = "pilates-outputs/sfbay_5fleets_100price_100fleet_20230209/inexus/sfbay_5_fleets_scenario_fleet_size-100_2020__20230209.csv.gz"  #the path should be updated
obj = s3.get_object(Bucket="beam-outputs", Key=key)
sf_rh_5flt_1 = pd.read_csv(obj['Body'], compression = 'gzip',index_col='Unnamed: 0')



CPU times: total: 1min 52s
Wall time: 1min 52s


In [16]:
sf_rh_5flt_1[sf_rh_5flt_1['trip_mode_AS_trips'].isna()].shape

(21150, 269)

In [21]:
# Adding scenario info
sf_rh_5flt_1['scenario'] = "ridehail"
sf_rh_5flt_1['scenario'] = sf_rh_5flt_1['scenario'].astype("category")
sf_rh_5flt_1['lever'] = "fleet_size_price_comb"
sf_rh_5flt_1['lever'] = sf_rh_5flt_1['lever'].astype("category")
sf_rh_5flt_1['year'] = 2020
sf_rh_5flt_1['lever_position_fltsz'] = 1
sf_rh_5flt_1['lever_position_price'] = 1
sf_rh_5flt_1['lever_n_fleets'] = 5
sf_rh_5flt_1['fleetsize_uber'] = 0.00175524
sf_rh_5flt_1['fleetsize_lyft'] = 0.00112253
sf_rh_5flt_1['fleetsize_cruise'] = 0.00092022
sf_rh_5flt_1['fleetsize_flywheel'] = 0.00061348
sf_rh_5flt_1['fleetsize_waymo'] = 0.00030674

In [18]:
%%time
key = "pilates-outputs/sfbay_5fleets_100price_164fleet_20230209/inexus/sfbay_5_fleets_scenario_fleet_size-164_2020__20230209.csv.gz"  #the path should be updated
obj = s3.get_object(Bucket="beam-outputs", Key=key)
sf_rh_5flt_1p64 = pd.read_csv(obj['Body'], compression = 'gzip',index_col='Unnamed: 0')



CPU times: total: 2min 5s
Wall time: 2min 5s


In [22]:
# Adding scenario info
sf_rh_5flt_1p64['scenario'] = "ridehail"
sf_rh_5flt_1p64['scenario'] = sf_rh_5flt_1p64['scenario'].astype("category")
sf_rh_5flt_1p64['lever'] = "fleet_size_price_comb"
sf_rh_5flt_1p64['lever'] = sf_rh_5flt_1p64['lever'].astype("category")
sf_rh_5flt_1p64['year'] = 2020
sf_rh_5flt_1p64['lever_position_fltsz'] = 1.64
sf_rh_5flt_1p64['lever_position_price'] = 1
sf_rh_5flt_1p64['lever_n_fleets'] = 5
sf_rh_5flt_1p64['fleetsize_uber'] = 0.00287777
sf_rh_5flt_1p64['fleetsize_lyft'] = 0.00184043
sf_rh_5flt_1p64['fleetsize_cruise'] = 0.00150873
sf_rh_5flt_1p64['fleetsize_flywheel'] = 0.00100582
sf_rh_5flt_1p64['fleetsize_waymo'] = 0.00050291

In [20]:
%%time
key = "pilates-outputs/sfbay_5fleets_100price_200fleet_20230209/inexus/sfbay_5_fleets_scenario_fleet_size-200_2020__20230209.csv.gz"  #the path should be updated
obj = s3.get_object(Bucket="beam-outputs", Key=key)
sf_rh_5flt_2 = pd.read_csv(obj['Body'], compression = 'gzip',index_col='Unnamed: 0')



CPU times: total: 2min 13s
Wall time: 2min 14s


In [23]:
# Adding scenario info
sf_rh_5flt_2['scenario'] = "ridehail"
sf_rh_5flt_2['scenario'] = sf_rh_5flt_2['scenario'].astype("category")
sf_rh_5flt_2['lever'] = "fleet_size_price_comb"
sf_rh_5flt_2['lever'] = sf_rh_5flt_2['lever'].astype("category")
sf_rh_5flt_2['year'] = 2020
sf_rh_5flt_2['lever_position_fltsz'] = 2
sf_rh_5flt_2['lever_position_price'] = 1
sf_rh_5flt_2['lever_n_fleets'] = 5
sf_rh_5flt_2['fleetsize_uber'] = 0.00351047
sf_rh_5flt_2['fleetsize_lyft'] = 0.00224507
sf_rh_5flt_2['fleetsize_cruise'] = 0.00184043
sf_rh_5flt_2['fleetsize_flywheel'] = 0.00122696
sf_rh_5flt_2['fleetsize_waymo'] = 0.00061348

In [13]:
%%time
key = "pilates-outputs/.............../inexus/..............."  #the path should be updated
obj = s3.get_object(Bucket="beam-outputs", Key=key)
sf_rh_5flt_4 = pd.read_csv(obj['Body'], compression = 'gzip',index_col='Unnamed: 0')



CPU times: total: 2min 7s
Wall time: 2min 8s


In [16]:
# Adding scenario info
sf_rh_5flt_4['scenario'] = "ridehail"
sf_rh_5flt_4['scenario'] = sf_rh_5flt_4['scenario'].astype("category")
sf_rh_5flt_4['lever'] = "fleet_size_price_comb"
sf_rh_5flt_4['lever'] = sf_rh_5flt_4['lever'].astype("category")
sf_rh_5flt_4['year'] = 2020
sf_rh_5flt_4['lever_position_fltsz'] = 4
sf_rh_5flt_4['lever_position_price'] = 1
sf_rh_5flt_4['lever_n_fleets'] = 5
sf_rh_5flt_4['fleetsize_uber'] = 0.00702094
sf_rh_5flt_4['fleetsize_lyft'] = 0.00449014
sf_rh_5flt_4['fleetsize_cruise'] = 0.00368087
sf_rh_5flt_4['fleetsize_flywheel'] = 0.00245391
sf_rh_5flt_4['fleetsize_waymo'] = 0.00122696

In [24]:
%%time
key = "pilates-outputs/sfbay_5fleets_100price_1000fleet_20230209/inexus/sfbay_5_fleets_scenario_fleet_size-1000_2020__20230209.csv.gz"  #the path should be updated
obj = s3.get_object(Bucket="beam-outputs", Key=key)
sf_rh_5flt_10 = pd.read_csv(obj['Body'], compression = 'gzip',index_col='Unnamed: 0')



CPU times: total: 2min 21s
Wall time: 2min 21s


In [25]:
# Adding scenario info
sf_rh_5flt_10['scenario'] = "ridehail"
sf_rh_5flt_10['scenario'] = sf_rh_5flt_10['scenario'].astype("category")
sf_rh_5flt_10['lever'] = "fleet_size_price_comb"
sf_rh_5flt_10['lever'] = sf_rh_5flt_10['lever'].astype("category")
sf_rh_5flt_10['year'] = 2020
sf_rh_5flt_10['lever_position_fltsz'] = 10
sf_rh_5flt_10['lever_position_price'] = 1
sf_rh_5flt_10['lever_n_fleets'] = 5
sf_rh_5flt_10['fleetsize_uber'] = 0.01755235
sf_rh_5flt_10['fleetsize_lyft'] = 0.01122534
sf_rh_5flt_10['fleetsize_cruise'] = 0.00920217
sf_rh_5flt_10['fleetsize_flywheel'] = 0.00613478
sf_rh_5flt_10['fleetsize_waymo'] = 0.00306739

In [26]:
frames = [sfbase, sf_rh_5flt_1, sf_rh_5flt_1p64, sf_rh_5flt_2,
          sf_rh_5flt_10]
sf_stacked = pd.concat(frames)

In [27]:
sf_stacked = sf_stacked.drop(['has_school_kid','has_preschool_kid','has_retiree','has_non_worker','adult','hispanic','relate',
                             'pstudent','pemploy','ptype','p_hispanic','age_16_p','age_16_to_19',
                             'person_age','MAR','hours','hispanic.1','student',
                             'worker','stop_frequency','composition','tdd','number_of_participants','tour_type_count',
                             'tour_type_num','tour_num','tour_count', 'outbound', 'trip_num','num_under16_not_at_school',
                             'family','non_family','num_young_adults','num_college_age','num_children_16_to_17',
                              'num_children_5_to_15','num_young_children'], axis=1)

In [28]:
sf_stacked = sf_stacked.drop(['vehicleIds_estimate','parent_tour_id','atwork_subtour_frequency','household_id_y',
                              'male','female', 'has_driving_kid','has_full_time','has_part_time',
                              'has_university','student_is_employed', 'nonstudent_to_school', 'is_student','is_gradeschool',
                              'is_gradeschool','is_highschool','is_university','school_segment','is_worker',  
                              'distance_to_school','roundtrip_auto_time_to_school',
                              'distance_to_work','workplace_in_cbd', 'work_taz_area_type', 'hh_race_of_head',
                             'roundtrip_auto_time_to_work','work_auto_savings_ratio','cdap_activity', 'travel_active', 
                              'under16_not_at_school','has_preschool_kid_at_home', 'gt2','hispanic_head',
                             'has_school_kid_at_home','work_and_school_and_worker', 'age_of_head', 'race_of_head',
                              'work_and_school_and_student', 'VEHICL','hh_children','hh_age_of_head','num_workers',
                              'gt55','seniors', 'recent_mover', 'hh_workers', 'hispanic_status_of_head', 'hh_seniors', 
                             'hh_type', 'HHT', 'sample_rate', 'chunk_id', 'income_segment', 'num_non_workers', 
                             'num_drivers', 'num_adults', 'num_children', 'hh_work_auto_savings_ratio', 'num_travel_active', 
                             'num_travel_active_adults','num_travel_active_preschoolers', 'num_travel_active_children', 
                              'num_travel_active_non_preschoolers', 'participates_in_jtf_model', 'joint_tour_frequency',
                              'num_hh_joint_tours'], axis=1)

In [29]:
sf_stacked = sf_stacked.drop(['serialno'], axis=1)

#### Stacked File

In [30]:
#stacked_tr_fr_path = "https://beam-core-act.s3.amazonaws.com/deepDive/CleanData/SanFrancisco/Stacked/"
#sf_stacked_tr_fr = pd.read_csv(stacked_tr_fr_path + 'sf_2018_stacked_tr_fr.csv')

In [31]:
sf_stacked = sf_stacked.sort_values(by=['IDMerged', 'tripIndex', 'lever_position_fltsz']).reset_index(drop=True)

In [32]:
# shift column 'person' to first position
#first_column = sf_stacked.pop('scenario')
#second_column = sf_stacked.pop('lever')
#third_column = sf_stacked.pop('lever_position')
#fourth_column = sf_stacked.pop('logsum_trip_mode_AS_trips')
# insert column using insert(position,column_name,first_column) function
#sf_stacked.insert(2, 'scenario', first_column)
#sf_stacked.insert(3, 'lever', second_column)
#sf_stacked.insert(4, 'lever_position', third_column)
#sf_stacked.insert(5, 'logsum_trip_mode_AS_trips', fourth_column)

In [36]:
# Adding Price lever position
#conditions  = [(sf_stacked['lever_position'] == 0.125),
#               (sf_stacked['lever_position'] == 0.25),
#               (sf_stacked['lever_position'] == 0.5),
#               (sf_stacked['lever_position'] == 1),
#               (sf_stacked['lever_position'] == 2),
#               (sf_stacked['lever_position'] == 4),
#               (sf_stacked['lever_position'] == 10),
#               (sf_stacked['lever_position'] == 27.5)]
#
#choices = [0.125, 0.25, 0.5, 1, 2, 4, 10, 27.5]

In [37]:
#sf_stacked['lever_position_fleetsize'] = np.select(conditions, choices, default=np.nan)

In [38]:
#sf_stacked['lever_position_price'] = 1

In [39]:
#sf_stacked_tr_fr['logsum_relative_to_baseline'] = sf_stacked_tr_fr.groupby(['IDMerged','tripIndex'])['logsum_trip_mode_AS_trips'].apply(lambda x: x.diff())

In [40]:
#sf_stacked_tr_fr['logsum_relative_to_baseline'] = sf_stacked_tr_fr['logsum_trip_mode_AS_trips'] - sf_stacked_tr_fr.loc[sf_stacked_tr_fr.groupby(['IDMerged','tripIndex'])['lever_position'].transform('idxmin'), 'logsum_trip_mode_AS_trips'].values

In [37]:
%%time
sf_stacked['logsum_relative_to_baseline'] = sf_stacked['logsum_trip_Potential_INEXUS'] - (sf_stacked['logsum_trip_Potential_INEXUS'].where((sf_stacked['lever_position_fltsz'].eq(1.0)) & (sf_stacked['lever_n_fleets'] == 2))
                                                                                          .groupby([sf_stacked['IDMerged'] , sf_stacked['tripIndex']])
                                                                                          .transform('first')
                                                                                          .convert_dtypes())

CPU times: total: 3.72 s
Wall time: 3.5 s


In [38]:
sf_stacked['door_to_door_time_relative_to_baseline'] = sf_stacked['duration_door_to_door'] - (sf_stacked['duration_door_to_door'].where((sf_stacked['lever_position_fltsz'].eq(1.0)) & (sf_stacked['lever_n_fleets'] == 2))
                                                                                          .groupby([sf_stacked['IDMerged'] , sf_stacked['tripIndex']])
                                                                                          .transform('first')
                                                                                          .convert_dtypes())

In [39]:
# Drop unused columns
sf_stacked = sf_stacked.drop(['sf_detached', 'household_id_x','destination_logsum_y','logsum_tours_mode_AS_tours',
                              'primary_purpose_y','PNUM','work_auto_savings','mandatory_tour_frequency'], axis=1)

In [43]:
sf_stacked[sf_stacked['replanning_status'] > 0].shape

(119291, 171)

In [44]:
# Add the mandatory category column
mandatory = ['work' , 'univ', 'school']
sf_stacked['mandatoryCat'] = np.where((sf_stacked.actEndType.isin(mandatory)) & (sf_stacked.actStartType.isin(mandatory)), 'from_M_to_M' , None)

In [45]:
sf_stacked['mandatoryCat'] = np.where((sf_stacked.actEndType == 'Home') & (sf_stacked.actStartType.isin(mandatory)), 'from_H_to_M' , sf_stacked['mandatoryCat'])

In [46]:
sf_stacked['mandatoryCat'] = np.where((sf_stacked.actEndType.isin(mandatory)) & (sf_stacked.actStartType == "Home"), 'from_M_to_H' , sf_stacked['mandatoryCat'])

In [47]:
non_mandatory = ['othmaint' , 'othdiscr', 'escort', 'eatout', 'social', 'shopping', 'atwork']
sf_stacked['mandatoryCat'] = np.where((sf_stacked.actEndType.isin(non_mandatory)) & (sf_stacked.actStartType.isin(non_mandatory)), 'from_N_to_N' , sf_stacked['mandatoryCat'])

In [48]:
# Drop unused columns
sf_stacked = sf_stacked.drop(['num_work_tours','num_joint_tours', 'non_mandatory_tour_frequency',
                                          'num_non_mand','num_escort_tours','num_eatout_tours','num_shop_tours',
                                          'num_maint_tours','num_discr_tours','num_social_tours','num_non_escort_tours'], axis=1)

In [53]:
#sf_stacked[sf_stacked['Realized_INEXUS'].isna()].head(10000).to_csv('C:/Shared-Work/Data/CleanData/ASIM_BEAM_Merged/realizedIN_empty.csv', index = False)

In [54]:
#%%time
#sf_stacked_tr_fr.to_csv('s3://beam-core-act/deepDive/CleanData/SanFrancisco/Stacked/sf_2018_stacked_tr_fr_v3.csv', index=False)

In [53]:
sf_stacked[sf_stacked['SHARED3FREE'].isna()].shape

(0, 161)

In [51]:
sf_stacked = sf_stacked[sf_stacked['person_sex'].notna()]

In [67]:
# Mapping BEAM and ASIM modes
conditions  = [(sf_stacked['mode_choice_actual_BEAM'] == 'walk')&(sf_stacked['trip_mode_AS_trips'] == 'WALK'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'bike')&(sf_stacked['trip_mode_AS_trips'] == 'BIKE'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'hov3_teleportation')&(sf_stacked['trip_mode_AS_trips'] == 'SHARED3FREE'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'hov3_teleportation')&(sf_stacked['trip_mode_AS_trips'] == 'SHARED3PAY'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'car_hov3')&(sf_stacked['trip_mode_AS_trips'] == 'SHARED3FREE'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'car_hov3')&(sf_stacked['trip_mode_AS_trips'] == 'SHARED3PAY'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'hov2_teleportation')&(sf_stacked['trip_mode_AS_trips'] == 'SHARED2FREE'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'hov2_teleportation')&(sf_stacked['trip_mode_AS_trips'] == 'SHARED2PAY'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'car_hov2')&(sf_stacked['trip_mode_AS_trips'] == 'SHARED2PAY'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'car_hov2')&(sf_stacked['trip_mode_AS_trips'] == 'SHARED2FREE'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'car')&(sf_stacked['trip_mode_AS_trips'] == 'DRIVEALONEFREE'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'car')&(sf_stacked['trip_mode_AS_trips'] == 'DRIVEALONEPAY'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'walk_transit')&(sf_stacked['trip_mode_AS_trips'] == 'WALK_LRF'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'walk_transit')&(sf_stacked['trip_mode_AS_trips'] == 'WALK_LOC'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'walk_transit')&(sf_stacked['trip_mode_AS_trips'] == 'WALK_EXP'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'walk_transit')&(sf_stacked['trip_mode_AS_trips'] == 'WALK_HVY'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'walk_transit')&(sf_stacked['trip_mode_AS_trips'] == 'WALK_COM'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'ride_hail')&(sf_stacked['trip_mode_AS_trips'] == 'TNC_SINGLE'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'ride_hail')&(sf_stacked['trip_mode_AS_trips'] == 'TNC_SHARED'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'ride_hail')&(sf_stacked['trip_mode_AS_trips'] == 'TAXI'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'drive_transit')&(sf_stacked['trip_mode_AS_trips'] == 'DRIVE_COM'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'drive_transit')&(sf_stacked['trip_mode_AS_trips'] == 'DRIVE_LRF'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'drive_transit')&(sf_stacked['trip_mode_AS_trips'] == 'DRIVE_LOC'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'drive_transit')&(sf_stacked['trip_mode_AS_trips'] == 'DRIVE_EXP'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'drive_transit')&(sf_stacked['trip_mode_AS_trips'] == 'DRIVE_HVY'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'ride_hail_pooled')&(sf_stacked['trip_mode_AS_trips'] == 'TNC_SINGLE'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'ride_hail_pooled')&(sf_stacked['trip_mode_AS_trips'] == 'TNC_SHARED'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'ride_hail_pooled')&(sf_stacked['trip_mode_AS_trips'] == 'TAXI'),
       
               (sf_stacked['mode_choice_actual_BEAM'] == 'hov2_teleportation')&((sf_stacked['trip_mode_AS_trips'] == 'SHARED3FREE')|
               (sf_stacked['trip_mode_AS_trips'] == 'SHARED3PAY')|(sf_stacked['trip_mode_AS_trips'] == 'DRIVEALONEFREE')|
               (sf_stacked['trip_mode_AS_trips'] == 'DRIVEALONEPAY')),
               
               (sf_stacked['mode_choice_actual_BEAM'] == 'car_hov2')&((sf_stacked['trip_mode_AS_trips'] == 'SHARED3PAY')|
               (sf_stacked['trip_mode_AS_trips'] == 'SHARED3FREE')|(sf_stacked['trip_mode_AS_trips'] == 'DRIVEALONEFREE')|
               (sf_stacked['trip_mode_AS_trips'] == 'DRIVEALONEPAY')),
               
               (sf_stacked['mode_choice_actual_BEAM'] == 'car_hov3')&((sf_stacked['trip_mode_AS_trips'] == 'SHARED2PAY')|
               (sf_stacked['trip_mode_AS_trips'] == 'SHARED2FREE')|(sf_stacked['trip_mode_AS_trips'] == 'DRIVEALONEFREE')|
               (sf_stacked['trip_mode_AS_trips'] == 'DRIVEALONEPAY')),   
               
               (sf_stacked['mode_choice_actual_BEAM'] == 'hov3_teleportation')&((sf_stacked['trip_mode_AS_trips'] == 'DRIVEALONEPAY')|
               (sf_stacked['trip_mode_AS_trips'] == 'DRIVEALONEFREE')|(sf_stacked['trip_mode_AS_trips'] == 'SHARED2PAY')|(sf_stacked['trip_mode_AS_trips'] == 'SHARED2FREE')),
               
               (sf_stacked['mode_choice_actual_BEAM'] == 'car')&((sf_stacked['trip_mode_AS_trips'] == 'SHARED3FREE')|(sf_stacked['trip_mode_AS_trips'] == 'SHARED2FREE')|
               (sf_stacked['trip_mode_AS_trips'] == 'SHARED3PAY')|(sf_stacked['trip_mode_AS_trips'] == 'SHARED2PAY')|(sf_stacked['trip_mode_AS_trips'] == 'DRIVE_LOC')|
               (sf_stacked['trip_mode_AS_trips'] == 'DRIVE_LRF')|(sf_stacked['trip_mode_AS_trips'] == 'DRIVE_EXP')|
               (sf_stacked['trip_mode_AS_trips'] == 'DRIVE_HVY')|(sf_stacked['trip_mode_AS_trips'] == 'DRIVE_COM')),
               
               (sf_stacked['mode_choice_actual_BEAM'] == 'walk')&((sf_stacked['trip_mode_AS_trips'] == 'WALK_LRF')|
               (sf_stacked['trip_mode_AS_trips'] == 'WALK_LOC')|(sf_stacked['trip_mode_AS_trips'] == 'WALK_EXP')|
               (sf_stacked['trip_mode_AS_trips'] == 'WALK_HVY')|(sf_stacked['trip_mode_AS_trips'] == 'WALK_COM')),
               
               (sf_stacked['mode_choice_actual_BEAM'] == 'walk_transit')&(sf_stacked['trip_mode_AS_trips'] == 'WALK'),
               
               ((sf_stacked['mode_choice_actual_BEAM'] == 'ride_hail_pooled')|(sf_stacked['mode_choice_actual_BEAM'] == 'ride_hail'))
               &(~((sf_stacked['trip_mode_AS_trips'] == 'TAXI')|(sf_stacked['trip_mode_AS_trips'] == 'TNC_SHARED')|
                (sf_stacked['trip_mode_AS_trips'] == 'TNC_SINGLE'))),
               
               (sf_stacked['mode_choice_actual_BEAM'] == 'walk_transit')&(~((sf_stacked['trip_mode_AS_trips'] == 'WALK_LRF')|
               (sf_stacked['trip_mode_AS_trips'] == 'WALK_LOC')|(sf_stacked['trip_mode_AS_trips'] == 'WALK_EXP')|
               (sf_stacked['trip_mode_AS_trips'] == 'WALK_HVY')|(sf_stacked['trip_mode_AS_trips'] == 'WALK_COM')|
               (sf_stacked['trip_mode_AS_trips'] == 'WALK'))),
               
               (sf_stacked['mode_choice_actual_BEAM'] == 'bike_transit'),
               
               (sf_stacked['mode_choice_actual_BEAM'] == 'car')&((sf_stacked['trip_mode_AS_trips'] == 'TNC_SHARED')|
               (sf_stacked['trip_mode_AS_trips'] == 'TNC_SINGLE')|(sf_stacked['trip_mode_AS_trips'] == 'TAXI'))]
 
                                                                      
                                                                                                                 
                                                                      
choices = [sf_stacked['WALK'], sf_stacked['BIKE'], sf_stacked['SHARED3FREE'], sf_stacked['SHARED3PAY'],
           sf_stacked['SHARED3FREE'], sf_stacked['SHARED3PAY'], sf_stacked['SHARED2FREE'], 
           sf_stacked['SHARED2PAY'], sf_stacked['SHARED2PAY'], sf_stacked['SHARED2FREE'], 
           sf_stacked['DRIVEALONEFREE'], sf_stacked['DRIVEALONEPAY'], sf_stacked['WALK_LRF'], 
           sf_stacked['WALK_LOC'], sf_stacked['WALK_EXP'], sf_stacked['WALK_HVY'], sf_stacked['WALK_COM'],
           sf_stacked['TNC_SINGLE'], 
           sf_stacked['TNC_SHARED'], sf_stacked['TAXI'],
           sf_stacked['DRIVE_COM'], sf_stacked['DRIVE_LRF'], sf_stacked['DRIVE_LOC'], sf_stacked['DRIVE_EXP'],
           sf_stacked['DRIVE_HVY'], sf_stacked['TNC_SINGLE'], sf_stacked['TNC_SHARED'], sf_stacked['TAXI'],
          sf_stacked[['DRIVEALONEFREE','DRIVEALONEPAY','SHARED2FREE','SHARED3FREE','SHARED2PAY','SHARED3PAY','DRIVE_LOC','DRIVE_LRF','DRIVE_EXP','DRIVE_HVY','DRIVE_COM']].max(axis=1),
          sf_stacked[['DRIVEALONEFREE','DRIVEALONEPAY','SHARED2FREE','SHARED3FREE','SHARED2PAY','SHARED3PAY','DRIVE_LOC','DRIVE_LRF','DRIVE_EXP','DRIVE_HVY','DRIVE_COM']].max(axis=1),
          sf_stacked[['DRIVEALONEFREE','DRIVEALONEPAY','SHARED2FREE','SHARED3FREE','SHARED2PAY','SHARED3PAY','DRIVE_LOC','DRIVE_LRF','DRIVE_EXP','DRIVE_HVY','DRIVE_COM']].max(axis=1),
          sf_stacked[['DRIVEALONEFREE','DRIVEALONEPAY','SHARED2FREE','SHARED3FREE','SHARED2PAY','SHARED3PAY','DRIVE_LOC','DRIVE_LRF','DRIVE_EXP','DRIVE_HVY','DRIVE_COM']].max(axis=1),
          sf_stacked[['DRIVEALONEFREE','DRIVEALONEPAY','SHARED2FREE','SHARED3FREE','SHARED2PAY','SHARED3PAY','DRIVE_LOC','DRIVE_LRF','DRIVE_EXP','DRIVE_HVY','DRIVE_COM']].max(axis=1),
          sf_stacked[['WALK','WALK_LOC','WALK_LRF','WALK_EXP','WALK_HVY','WALK_COM']].max(axis=1),
          sf_stacked[['WALK','WALK_LOC','WALK_LRF','WALK_EXP','WALK_HVY','WALK_COM']].max(axis=1),
          sf_stacked[['TAXI','TNC_SINGLE','TNC_SHARED']].max(axis=1),
          sf_stacked[['WALK','WALK_LOC','WALK_LRF','WALK_EXP','WALK_HVY','WALK_COM','DRIVE_LOC','DRIVE_LRF','DRIVE_EXP','DRIVE_HVY','DRIVE_COM']].max(axis=1),
          sf_stacked[['BIKE','WALK_LOC','WALK_LRF','WALK_EXP','WALK_HVY','WALK_COM','DRIVE_LOC','DRIVE_LRF','DRIVE_EXP','DRIVE_HVY','DRIVE_COM']].max(axis=1),
          sf_stacked[['DRIVEALONEFREE','DRIVEALONEPAY','SHARED2FREE','SHARED3FREE','SHARED2PAY','SHARED3PAY','DRIVE_LOC','DRIVE_LRF','DRIVE_EXP','DRIVE_HVY','DRIVE_COM']].max(axis=1)]

In [68]:
sf_stacked['Realized_INEXUS'] = np.select(conditions, choices, default=np.nan)

In [None]:
#sf_stacked = sf_stacked.drop(['Realized_INEXUS'], axis=1)

In [69]:
sf_stacked['Realized_INEXUS'] = np.where((sf_stacked['Realized_INEXUS'].isna())&((sf_stacked['mode_choice_actual_BEAM'] == 'car_hov2')|
                                          (sf_stacked['mode_choice_actual_BEAM'] == 'car_hov3')|
                                          (sf_stacked['mode_choice_actual_BEAM'] == 'hov2_teleportation')|
                                          (sf_stacked['mode_choice_actual_BEAM'] == 'hov3_teleportation')|
                                          (sf_stacked['mode_choice_actual_BEAM'] == 'car')|
                                          (sf_stacked['mode_choice_actual_BEAM'] == 'drive_transit')), 
                                           sf_stacked[['DRIVEALONEFREE','DRIVEALONEPAY','SHARED2FREE','SHARED3FREE','SHARED2PAY','SHARED3PAY','DRIVE_LOC','DRIVE_LRF','DRIVE_EXP','DRIVE_HVY','DRIVE_COM']].max(axis=1), sf_stacked['Realized_INEXUS'])

In [70]:
sf_stacked['Realized_INEXUS'] = np.where((sf_stacked['Realized_INEXUS'].isna())&((sf_stacked['mode_choice_actual_BEAM'] == 'walk')|
                                         (sf_stacked['mode_choice_actual_BEAM'] == 'walk_transit')), 
                                          sf_stacked[['WALK','WALK_LOC','WALK_LRF','WALK_EXP','WALK_HVY','WALK_COM','DRIVE_LOC','DRIVE_LRF','DRIVE_EXP','DRIVE_HVY','DRIVE_COM']].max(axis=1),sf_stacked['Realized_INEXUS'])

In [71]:
sf_stacked['Realized_INEXUS'] = np.where(sf_stacked['Realized_INEXUS'].isna()&(sf_stacked['mode_choice_actual_BEAM'] == 'bike'), 
                                         sf_stacked[['BIKE','WALK','WALK_LOC','WALK_LRF','WALK_EXP','WALK_HVY','WALK_COM']].max(axis=1),sf_stacked['Realized_INEXUS'])

In [72]:
sf_stacked[sf_stacked['Realized_INEXUS'].isna()].shape

(0, 162)

In [74]:
sf_stacked[sf_stacked['Realized_INEXUS']<-1000].shape

(8658, 162)

In [None]:
#sf_stacked_tr_fr['Realized_INEXUS'] = np.where((sf_stacked_tr_fr['Realized_INEXUS'].isna())&
                                               #(sf_stacked_tr_fr['mode_choice_actual_BEAM'] == 'walk_transit')&
                                               #(sf_stacked_tr_fr['vehicleIds'].str.contains('(caltrain)')), 
                                               #sf_stacked_tr_fr['WALK_COM'], sf_stacked_tr_fr['Realized_INEXUS'])

In [75]:
sf_stacked = sf_stacked[sf_stacked['Realized_INEXUS'].notna()]

In [76]:
%%time
sf_stacked['realized_relative_to_baseline'] = sf_stacked['Realized_INEXUS'] - (sf_stacked['Realized_INEXUS'].where((sf_stacked['lever_position_fltsz'].eq(1.0)) & (sf_stacked['lever_n_fleets'] == 2))
                                                                                          .groupby([sf_stacked['IDMerged'] , sf_stacked['tripIndex']])
                                                                                          .transform('first')
                                                                                          .convert_dtypes())

CPU times: total: 1min 29s
Wall time: 1min 30s


In [None]:
#sf_stacked_tr_fr[sf_stacked_tr_fr['Realized_INEXUS'].isna()].to_csv('C:/Shared-Work/Data/CleanData/ASIM_BEAM_Merged/realizedIN_empty.csv', index = False)

#### Social INEXUS

In [None]:
# add a lower and upper range for the bins in pd.cut
#quartiles = [0] + quartiles + [float('inf')]

In [None]:
# add a quartiles column to groups, using pd.cut
#sf_stacked['income_quartiles'] = pd.cut(sf_stacked['income'], bins=quartiles, labels=['1stQ', '2ndQ', '3rdQ', '4thQ'])

In [79]:
sf_stacked['alpha'] = 1

In [80]:
sf_stacked['socialCarbonCost'] = sf_stacked['emission_marginal']*185 # 51 or 102

In [81]:
sf_stacked['Social_INEXUS'] = (sf_stacked['Realized_INEXUS']/sf_stacked['alpha']) - sf_stacked['socialCarbonCost']

In [82]:
sf_stacked['Potential_INEXUS_in_dollar'] = sf_stacked['logsum_trip_Potential_INEXUS']/sf_stacked['alpha']

In [83]:
sf_stacked['Realized_INEXUS_in_dollar'] = sf_stacked['Realized_INEXUS']/sf_stacked['alpha']

#### The following chunks are useful but should be checked in the next runs (they resulted in exact same realized and potential inexus in dollar

In [None]:
# shift columns to first position
#first_column = sf_stacked.pop('Social_INEXUS')
# insert column using insert(position,column_name,first_column) function
#sf_stacked.insert(6, 'Social_INEXUS', first_column)

In [None]:
# shift columns to first position
#first_column = sf_stacked.pop('Potential_INEXUS_in_dollar')
#second_column = sf_stacked.pop('Realized_INEXUS_in_dollar')
# insert column using insert(position,column_name,first_column) function
#sf_stacked.insert(7, 'Potential_INEXUS_in_dollar', first_column)
#sf_stacked.insert(8, 'Realized_INEXUS_in_dollar', first_column)

In [103]:
sf_stacked = sf_stacked.drop(['Realized_INEXUS_in_dollar','Potential_INEXUS_in_dollar','alpha','hh_value_of_time','lcm_county_id',
                              'tenure_mover','hh_income','median_value_of_time','hhsize','hh_size',
                              'tenure','num_mand','free_parking_at_work',
                             'workplace_location_logsum','workplace_taz','school_taz','TAZ_x',
                             'work_at_home','sex','race_id','edu','earning','duration',
                             'tour_type','depart','destination_x', 'origin_x','purpose',
                             'trip_count','primary_purpose_x'], axis=1)

In [106]:
sf_stacked = sf_stacked.drop(['DRIVEALONEPAY','DRIVEALONEFREE',
                              'SHARED2FREE','SHARED2PAY',
                             'SHARED3FREE','SHARED3PAY','WALK','BIKE',
                             'WALK_LOC','WALK_LRF','WALK_EXP','WALK_HVY','WALK_COM','DRIVE_LOC',
                             'DRIVE_LRF','DRIVE_EXP','DRIVE_HVY', 'DRIVE_COM','TAXI',
                             'TNC_SINGLE','TNC_SHARED'], axis=1)

In [107]:
%%time
sf_stacked.to_csv('s3://beam-core-act/deepDive/CleanData/SanFrancisco/Stacked/sf_2020_stacked_rh_5fleet_price_2_9_23.csv', index=False)

CPU times: total: 16min 53s
Wall time: 21min 49s


In [88]:
%%time
key = "pilates-outputs/sfbay_5fleets_100price_164fleet_20230209/beam/year-2020-iteration-3/ITERS/it.0/0.events.csv.gz"  #the path should be updated
obj = s3.get_object(Bucket="beam-outputs", Key=key)
sf_rh_5flt_1p64_events = pd.read_csv(obj['Body'], compression = 'gzip')



CPU times: total: 10min 26s
Wall time: 13min 46s


In [99]:
sf_rh_5flt_1p64_events['mode'].unique()

array([nan, 'bus', 'tram', 'subway', 'rail', 'car', 'hov3_teleportation',
       'car_hov2', 'hov2_teleportation', 'walk', 'car_hov3', 'bike',
       'walk_transit', 'ride_hail', 'ride_hail_pooled', 'drive_transit',
       'cable_car', 'bike_transit', 'ferry'], dtype=object)

In [101]:
sf_rh_5flt_1p64_events[sf_rh_5flt_1p64_events['mode']=='ride_hail'].head()

Unnamed: 0,person,link,legMode,tripId,time,type,parkingTaz,chargingPointType,pricingModel,parkingType,locationY,locationX,cost,driver,vehicle,primaryFuelLevel,secondaryFuelLevel,price,mode,currentTourMode,expectedMaximumUtility,availableAlternatives,location,personalVehicleAvailable,length,tourIndex,legModes,legVehicleIds,currentActivity,nextActivity,vehicleType,shiftStatus,parkingZoneId,fuel,duration,actType,links,numPassengers,primaryFuel,riders,toStopIndex,fromStopIndex,seatingCapacity,tollPaid,endY,endX,startY,startX,capacity,arrivalTime,departureTime,linkTravelTime,secondaryFuel,secondaryFuelType,primaryFuelType,facility,incentive,tollCost,netCost,departTime,requireWheelchair,reason,score
895282,3510755.0,,,1151527865.0,18000.0,ModeChoice,,,,,,,,,,,,,ride_hail,ride_hail,,RIDE_HAIL,141206.0,False,5609.112,1.0,"WALK,CAR,WALK","body-3510755,rideHailVehicle-3247829@Lyft,body...",Home,othmaint,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
896435,6028935.0,,,1977490905.0,18090.0,ModeChoice,,,,,,,,,,,,,ride_hail,ride_hail,,RIDE_HAIL,55718.0,False,3755.244,1.0,"WALK,CAR,WALK","body-6028935,rideHailVehicle-6037643@Uber,body...",Home,othmaint,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
896436,4794778.0,,,1572687409.0,18054.0,ModeChoice,,,,,,,,,,,,,ride_hail,ride_hail,,RIDE_HAIL,121066.0,False,1567.269,1.0,"WALK,CAR,WALK","body-4794778,rideHailVehicle-4780935@Lyft,body...",Home,othmaint,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
896492,1118143.0,,,366750977.0,18072.0,ModeChoice,,,,,,,,,,,,,ride_hail,ride_hail,,RIDE_HAIL,107216.0,False,3261.645,1.0,"WALK,CAR,WALK","body-1118143,rideHailVehicle-1127566@Uber,body...",Home,escort,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
896522,4424703.0,,,1451302633.0,18093.0,ModeChoice,,,,,,,,,,,,,ride_hail,ride_hail,,RIDE_HAIL,63520.0,False,1416.933,1.0,"WALK,CAR,WALK","body-4424703,rideHailVehicle-4339463@Cruise,bo...",Home,eatout,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
