In [1]:
import os
import os.path as op
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
%matplotlib inline
import seaborn as sns
sns.set_theme(style="darkgrid", color_codes=True)
sns.set(font_scale=1.35, style="ticks") #set styling preferences
import statsmodels.api as sm
from scipy import stats
import math
from math import pi
from pandas.api.types import is_string_dtype
from pandas.api.types import is_numeric_dtype
from scipy.spatial.distance import cdist
from scipy.cluster.vq import kmeans2,vq, whiten

In [2]:
# Show all columns and rows
pd.options.display.max_columns = None
pd.options.display.max_rows = None

#### Baseline

In [3]:
%%time
baseline2018 = "https://beam-core-act.s3.amazonaws.com/deepDive/CleanData/SanFrancisco/Baseline/"
sfbase = pd.read_csv(baseline2018 + 'sf_2018_base_8_22_22.csv')



CPU times: total: 1min 9s
Wall time: 3min 52s


In [4]:
# Showing the entire number in dataframe
pd.set_option('float_format', '{:f}'.format)

In [5]:
#describe = sfbase.describe()

In [6]:
#describe.to_csv('C:/Shared-Work/Data/CleanData/ASIM_BEAM_Merged/describe.csv')

#### Ridehail Price Scenarios

In [7]:
%%time
rh_price_path = "https://beam-core-act.s3.amazonaws.com/deepDive/CleanData/SanFrancisco/Ridehail_Price/"
sf_rh_price_0 = pd.read_csv(rh_price_path + 'sf_2018_rh_price_0.csv')
sf_rh_price_0p25 = pd.read_csv(rh_price_path + 'sf_2018_rh_price_0p25.csv')
sf_rh_price_0p5 = pd.read_csv(rh_price_path + 'sf_2018_rh_price_0p5.csv')
sf_rh_price_1p5 = pd.read_csv(rh_price_path + 'sf_2018_rh_price_1p5.csv')
sf_rh_price_2 = pd.read_csv(rh_price_path + 'sf_2018_rh_price_2.csv')



CPU times: total: 6min 25s
Wall time: 19min 48s


In [13]:
sfbase.shape

(2468267, 257)

In [14]:
sf_rh_price_0p25.shape

(2469287, 257)

In [15]:
#sf_rh_flsz_175 = sf_rh_flsz_175.drop(sf_rh_flsz_175.columns[245:625], axis=1)

In [16]:
#sf_rh_flsz_175 = sf_rh_flsz_175.drop(sf_rh_flsz_175.columns[235:245], axis=1)

In [12]:
#sf_rh_flsz_175 = sf_rh_flsz_175.drop(sf_rh_flsz_175.columns[233:235], axis=1)

In [9]:
frames = [sf_rh_price_0, sf_rh_price_0p25, sf_rh_price_0p5, sfbase, sf_rh_price_1p5, sf_rh_price_2]
sf_stacked = pd.concat(frames)

In [10]:
sf_stacked = sf_stacked.drop(['BlockGroupEnd','BlockGroupStart','has_school_kid','has_preschool_kid','has_retiree','has_non_worker','adult','hispanic','relate',
                             'pstudent','pemploy','ptype','p_hispanic','age_16_p','age_16_to_19','work_zone_id',
                             'race_id','school_zone_id','person_age','MAR','hours','hispanic.1','student',
                             'worker','stop_frequency','composition','tdd','number_of_participants','tour_type_count',
                             'tour_type_num','tour_num','tour_count', 'outbound', 'trip_num','num_under16_not_at_school',
                             'family','non_family','num_young_adults','num_college_age','num_children_16_to_17',
                              'num_children_5_to_15','num_young_children'], axis=1)

In [11]:
sf_stacked = sf_stacked.drop(['vehicleIds_estimate','destination_x','origin_x','depart',
                             'origin_y', 'destination_y','parent_tour_id','atwork_subtour_frequency','household_id_y',
                              'TAZ_x','home_x','home_y','male','female', 'has_driving_kid','has_full_time','has_part_time',
                              'has_university','student_is_employed', 'nonstudent_to_school', 'is_student','is_gradeschool',
                              'is_gradeschool','is_highschool','is_university','school_segment','is_worker','home_taz',  
                             'school_taz','distance_to_school','roundtrip_auto_time_to_school','workplace_taz',
                              'distance_to_work','workplace_in_cbd', 'work_taz_area_type', 'hh_race_of_head',
                             'roundtrip_auto_time_to_work','work_auto_savings_ratio','cdap_activity', 'travel_active', 
                              'under16_not_at_school','has_preschool_kid_at_home', 'block_id','gt2','hispanic_head',
                             'has_school_kid_at_home','work_and_school_and_worker', 'age_of_head', 'race_of_head',
                              'work_and_school_and_student', 'VEHICL','hh_children','hh_age_of_head','num_workers',
                              'gt55','seniors', 'recent_mover', 'hh_workers', 'hispanic_status_of_head', 'hh_seniors', 
                             'hh_type', 'TAZ_y', 'HHT', 'sample_rate', 'chunk_id', 'income_segment', 'num_non_workers', 
                             'num_drivers', 'num_adults', 'num_children', 'hh_work_auto_savings_ratio', 'num_travel_active', 
                             'num_travel_active_adults','num_travel_active_preschoolers', 'num_travel_active_children', 
                              'num_travel_active_non_preschoolers', 'participates_in_jtf_model', 'joint_tour_frequency',
                              'num_hh_joint_tours'], axis=1)

In [13]:
sf_stacked = sf_stacked.drop(['serialno'], axis=1)

#### Stacked File

In [None]:
#stacked_tr_fr_path = "https://beam-core-act.s3.amazonaws.com/deepDive/CleanData/SanFrancisco/Stacked/"
#sf_stacked_tr_fr = pd.read_csv(stacked_tr_fr_path + 'sf_2018_stacked_tr_fr.csv')

In [14]:
sf_stacked = sf_stacked.sort_values(by=['IDMerged', 'tripIndex', 'lever_position']).reset_index(drop=True)

In [15]:
# shift column 'person' to first position
first_column = sf_stacked.pop('scenario')
second_column = sf_stacked.pop('lever')
third_column = sf_stacked.pop('lever_position')
fourth_column = sf_stacked.pop('logsum_trip_mode_AS_trips')
# insert column using insert(position,column_name,first_column) function
sf_stacked.insert(2, 'scenario', first_column)
sf_stacked.insert(3, 'lever', second_column)
sf_stacked.insert(4, 'lever_position', third_column)
sf_stacked.insert(5, 'logsum_trip_mode_AS_trips', fourth_column)

In [16]:
sf_stacked['lever_position_fleetsize'] = 1

In [17]:
# Adding Price lever position
conditions  = [(sf_stacked['lever_position'] == 0), 
               (sf_stacked['lever_position'] == 0.25),
               (sf_stacked['lever_position'] == 0.5),
               (sf_stacked['lever_position'] == 1),
               (sf_stacked['lever_position'] == 1.5),
               (sf_stacked['lever_position'] == 2)]

choices = [0, 0.25, 0.5, 1, 1.5, 2]

In [18]:
sf_stacked['lever_position_price'] = np.select(conditions, choices, default=np.nan)

In [None]:
#sf_stacked_tr_fr['logsum_relative_to_baseline'] = sf_stacked_tr_fr.groupby(['IDMerged','tripIndex'])['logsum_trip_mode_AS_trips'].apply(lambda x: x.diff())

In [None]:
#sf_stacked_tr_fr['logsum_relative_to_baseline'] = sf_stacked_tr_fr['logsum_trip_mode_AS_trips'] - sf_stacked_tr_fr.loc[sf_stacked_tr_fr.groupby(['IDMerged','tripIndex'])['lever_position'].transform('idxmin'), 'logsum_trip_mode_AS_trips'].values

In [19]:
%%time
sf_stacked['logsum_relative_to_baseline'] = sf_stacked['logsum_trip_mode_AS_trips'] - (sf_stacked['logsum_trip_mode_AS_trips'].where(sf_stacked['lever_position'].eq(1.0))
                                                                                                  .groupby([sf_stacked['IDMerged'],sf_stacked['tripIndex']])
                                                                                                   .transform('first')
                                                                                                   .convert_dtypes())

CPU times: total: 3.91 s
Wall time: 3.8 s


In [20]:
sf_stacked['door_to_door_time_relative_to_baseline'] = sf_stacked['duration_door_to_door'] - (sf_stacked['duration_door_to_door'].where(sf_stacked['lever_position'].eq(1.0))
                                                                                                  .groupby([sf_stacked['IDMerged'],sf_stacked['tripIndex']])
                                                                                                   .transform('first')
                                                                                                   .convert_dtypes())

In [None]:
# shift columns to first position
first_column = sf_stacked.pop('logsum_relative_to_baseline')
second_column = sf_stacked.pop('door_to_door_time_relative_to_baseline')
third_column = sf_stacked.pop('income')
fourth_column = sf_stacked.pop('actPurpose')
# insert column using insert(position,column_name,first_column) function
sf_stacked.insert(5, 'logsum_relative_to_baseline', first_column)
sf_stacked.insert(6, 'door_to_door_time_relative_to_baseline', second_column)
sf_stacked.insert(7, 'income', third_column)
sf_stacked.insert(8, 'actPurpose', fourth_column)

In [21]:
# Drop unused columns
sf_stacked = sf_stacked.drop(['sf_detached','destination_logsum_x',
                                          'household_id_x','destination_logsum_y','logsum_tours_mode_AS_tours','primary_purpose_y',
                                          'PNUM','workplace_location_logsum','work_auto_savings','mandatory_tour_frequency'], axis=1)

In [22]:
# Rename to potential INEXUS
sf_stacked.rename(columns={"logsum_trip_mode_AS_trips":"logsum_trip_potential_INEXUS"}, inplace=True) 

In [None]:
# shift columns to first position
#first_column = sf_stacked_tr_fr.pop('mode_choice_actual_6')
second_column = sf_stacked.pop('mode_choice_actual_BEAM')
#third_column = sf_stacked_tr_fr.pop('income')
#fourth_column = sf_stacked_tr_fr.pop('actPurpose')
# insert column using insert(position,column_name,first_column) function
#sf_stacked_tr_fr.insert(9, 'mode_choice_actual_6', first_column)
sf_stacked.insert(10, 'mode_choice_actual_BEAM', second_column)
#sf_stacked_tr_fr.insert(11, 'income', third_column)
#sf_stacked_tr_fr.insert(12, 'actPurpose', fourth_column)

In [None]:
sf_stacked[sf_stacked['mode_choice_actual_6'] == 'ride_hail_transit'].shape

In [None]:
sf_stacked[sf_stacked['replanning_status'] > 0].shape

In [23]:
# Add the mandatory category column
mandatory = ['work' , 'univ', 'school']
sf_stacked['mandatoryCat'] = np.where((sf_stacked.actEndType.isin(mandatory)) & (sf_stacked.actStartType.isin(mandatory)), 'from_M_to_M' , None)

In [24]:
sf_stacked['mandatoryCat'] = np.where((sf_stacked.actEndType == 'Home') & (sf_stacked.actStartType.isin(mandatory)), 'from_H_to_M' , sf_stacked['mandatoryCat'])

In [25]:
sf_stacked['mandatoryCat'] = np.where((sf_stacked.actEndType.isin(mandatory)) & (sf_stacked.actStartType == "Home"), 'from_M_to_H' , sf_stacked['mandatoryCat'])

In [26]:
non_mandatory = ['othmaint' , 'othdiscr', 'escort', 'eatout', 'social', 'shopping', 'atwork']
sf_stacked['mandatoryCat'] = np.where((sf_stacked.actEndType.isin(non_mandatory)) & (sf_stacked.actStartType.isin(non_mandatory)), 'from_N_to_N' , sf_stacked['mandatoryCat'])

In [None]:
# shift columns to first position
first_column = sf_stacked.pop('mandatoryCat')
# insert column using insert(position,column_name,first_column) function
sf_stacked.insert(11, 'mandatoryCat', first_column)

In [27]:
# Drop unused columns
sf_stacked = sf_stacked.drop(['num_work_tours','num_joint_tours', 'non_mandatory_tour_frequency',
                                          'num_non_mand','num_escort_tours','num_eatout_tours','num_shop_tours',
                                          'num_maint_tours','num_discr_tours','num_social_tours','num_non_escort_tours'], axis=1)

In [None]:
sf_stacked.mode_choice_actual_BEAM.unique()

In [None]:
sf_stacked.trip_mode_AS_trips.unique()

In [28]:
# Rename to Potential INEXUS
sf_stacked.rename(columns={"logsum_trip_potential_INEXUS":"logsum_trip_Potential_INEXUS"}, inplace=True) 

In [50]:
# # Mapping BEAM and ASIM modes
# conditions  = [(sf_stacked['mode_choice_actual_BEAM'] == 'walk'), 
#                (sf_stacked['mode_choice_actual_BEAM'] == 'bike'),
#                (sf_stacked['mode_choice_actual_BEAM'] == 'hov3_teleportation')&(sf_stacked['trip_mode_AS_trips'] == 'SHARED3FREE'),
#                (sf_stacked['mode_choice_actual_BEAM'] == 'hov3_teleportation')&(sf_stacked['trip_mode_AS_trips'] == 'SHARED3PAY'),
#                (sf_stacked['mode_choice_actual_BEAM'] == 'car_hov3')&(sf_stacked['trip_mode_AS_trips'] == 'SHARED3FREE'),
#                (sf_stacked['mode_choice_actual_BEAM'] == 'car_hov3')&(sf_stacked['trip_mode_AS_trips'] == 'SHARED3PAY'),
#                (sf_stacked['mode_choice_actual_BEAM'] == 'hov2_teleportation')&(sf_stacked['trip_mode_AS_trips'] == 'SHARED2FREE'),
#                (sf_stacked['mode_choice_actual_BEAM'] == 'hov2_teleportation')&(sf_stacked['trip_mode_AS_trips'] == 'SHARED2PAY'),
#                (sf_stacked['mode_choice_actual_BEAM'] == 'car_hov2')&(sf_stacked['trip_mode_AS_trips'] == 'SHARED2PAY'),
#                (sf_stacked['mode_choice_actual_BEAM'] == 'car_hov2')&(sf_stacked['trip_mode_AS_trips'] == 'SHARED2FREE'),
#                (sf_stacked['mode_choice_actual_BEAM'] == 'car')&(sf_stacked['trip_mode_AS_trips'] == 'DRIVEALONEFREE'),
#                (sf_stacked['mode_choice_actual_BEAM'] == 'car')&(sf_stacked['trip_mode_AS_trips'] == 'DRIVEALONEPAY'),
#                (sf_stacked['mode_choice_actual_BEAM'] == 'walk_transit')&(sf_stacked['trip_mode_AS_trips'] == 'WALK_LRF'),
#                (sf_stacked['mode_choice_actual_BEAM'] == 'walk_transit')&(sf_stacked['trip_mode_AS_trips'] == 'WALK_LOC'),
#                (sf_stacked['mode_choice_actual_BEAM'] == 'walk_transit')&(sf_stacked['trip_mode_AS_trips'] == 'WALK_EXP'),
#                (sf_stacked['mode_choice_actual_BEAM'] == 'walk_transit')&(sf_stacked['trip_mode_AS_trips'] == 'WALK_HVY'),
#                (sf_stacked['mode_choice_actual_BEAM'] == 'ride_hail')&(sf_stacked['trip_mode_AS_trips'] == 'TNC_SINGLE'),
#                (sf_stacked['mode_choice_actual_BEAM'] == 'ride_hail')&(sf_stacked['trip_mode_AS_trips'] == 'TNC_SHARED'),
#                (sf_stacked['mode_choice_actual_BEAM'] == 'ride_hail_pooled')&(sf_stacked['trip_mode_AS_trips'] == 'TNC_SINGLE'),
#                (sf_stacked['mode_choice_actual_BEAM'] == 'ride_hail')&(sf_stacked['trip_mode_AS_trips'] == 'TAXI'),
#                (sf_stacked['mode_choice_actual_BEAM'] == 'walk_transit')&(sf_stacked['trip_mode_AS_trips'] == 'WALK_COM'),
#                (sf_stacked['mode_choice_actual_BEAM'] == 'drive_transit')&(sf_stacked['trip_mode_AS_trips'] == 'DRIVE_COM'),
#                (sf_stacked['mode_choice_actual_BEAM'] == 'drive_transit')&(sf_stacked['trip_mode_AS_trips'] == 'DRIVE_LRF'),
#                (sf_stacked['mode_choice_actual_BEAM'] == 'drive_transit')&(sf_stacked['trip_mode_AS_trips'] == 'DRIVE_LOC'),
#                (sf_stacked['mode_choice_actual_BEAM'] == 'drive_transit')&(sf_stacked['trip_mode_AS_trips'] == 'DRIVE_EXP'),
#                (sf_stacked['mode_choice_actual_BEAM'] == 'drive_transit')&(sf_stacked['trip_mode_AS_trips'] == 'DRIVE_HVY')]

# choices = [sf_stacked['WALK'], sf_stacked['BIKE'], sf_stacked['SHARED3FREE'], sf_stacked['SHARED3PAY'],
#            sf_stacked['SHARED3FREE'], sf_stacked['SHARED3PAY'], sf_stacked['SHARED2FREE'], 
#            sf_stacked['SHARED2PAY'], sf_stacked['SHARED2PAY'], sf_stacked['SHARED2FREE'], 
#            sf_stacked['DRIVEALONEFREE'], sf_stacked['DRIVEALONEPAY'], sf_stacked['WALK_LRF'], 
#            sf_stacked['WALK_LOC'], sf_stacked['WALK_EXP'], sf_stacked['WALK_HVY'], sf_stacked['TNC_SINGLE'], 
#            sf_stacked['TNC_SHARED'], sf_stacked['TNC_SINGLE'], sf_stacked['TAXI'], sf_stacked['WALK_COM'], 
#            sf_stacked['DRIVE_COM'], sf_stacked['DRIVE_LRF'], sf_stacked['DRIVE_LOC'], sf_stacked['DRIVE_EXP'],
#            sf_stacked['DRIVE_HVY']]

In [51]:
#sf_stacked['Realized_INEXUS'] = np.select(conditions, choices, default=np.nan)

In [None]:
# shift columns to first position
#first_column = sf_stacked.pop('Realized_INEXUS')
# insert column using insert(position,column_name,first_column) function
#sf_stacked.insert(5, 'Realized_INEXUS', first_column)

In [57]:
#sf_stacked[sf_stacked['Realized_INEXUS'].isna()].shape

In [None]:
#sf_stacked[sf_stacked['Realized_INEXUS'].isna()].head(10000).to_csv('C:/Shared-Work/Data/CleanData/ASIM_BEAM_Merged/realizedIN_empty.csv', index = False)

In [None]:
#%%time
#sf_stacked_tr_fr.to_csv('s3://beam-core-act/deepDive/CleanData/SanFrancisco/Stacked/sf_2018_stacked_tr_fr_v3.csv', index=False)

In [58]:
sf_stacked = sf_stacked[sf_stacked['trip_mode_AS_trips'].notna()]

In [67]:
sf_stacked = sf_stacked.drop(['Realized_INEXUS'], axis=1)

In [68]:
# Mapping BEAM and ASIM modes
conditions  = [(sf_stacked['mode_choice_actual_BEAM'] == 'walk'), 
               (sf_stacked['mode_choice_actual_BEAM'] == 'bike'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'hov3_teleportation')&(sf_stacked['trip_mode_AS_trips'] == 'SHARED3FREE'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'hov3_teleportation')&(sf_stacked['trip_mode_AS_trips'] == 'SHARED3PAY'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'car_hov3')&(sf_stacked['trip_mode_AS_trips'] == 'SHARED3FREE'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'car_hov3')&(sf_stacked['trip_mode_AS_trips'] == 'SHARED3PAY'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'hov2_teleportation')&(sf_stacked['trip_mode_AS_trips'] == 'SHARED2FREE'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'hov2_teleportation')&(sf_stacked['trip_mode_AS_trips'] == 'SHARED2PAY'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'car_hov2')&(sf_stacked['trip_mode_AS_trips'] == 'SHARED2PAY'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'car_hov2')&(sf_stacked['trip_mode_AS_trips'] == 'SHARED2FREE'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'car')&(sf_stacked['trip_mode_AS_trips'] == 'DRIVEALONEFREE'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'car')&(sf_stacked['trip_mode_AS_trips'] == 'DRIVEALONEPAY'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'walk_transit')&(sf_stacked['trip_mode_AS_trips'] == 'WALK_LRF'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'walk_transit')&(sf_stacked['trip_mode_AS_trips'] == 'WALK_LOC'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'walk_transit')&(sf_stacked['trip_mode_AS_trips'] == 'WALK_EXP'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'walk_transit')&(sf_stacked['trip_mode_AS_trips'] == 'WALK_HVY'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'ride_hail')&(sf_stacked['trip_mode_AS_trips'] == 'TNC_SINGLE'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'ride_hail')&(sf_stacked['trip_mode_AS_trips'] == 'TNC_SHARED'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'ride_hail')&(sf_stacked['trip_mode_AS_trips'] == 'TAXI'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'walk_transit')&(sf_stacked['trip_mode_AS_trips'] == 'WALK_COM'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'drive_transit')&(sf_stacked['trip_mode_AS_trips'] == 'DRIVE_COM'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'drive_transit')&(sf_stacked['trip_mode_AS_trips'] == 'DRIVE_LRF'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'drive_transit')&(sf_stacked['trip_mode_AS_trips'] == 'DRIVE_LOC'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'drive_transit')&(sf_stacked['trip_mode_AS_trips'] == 'DRIVE_EXP'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'drive_transit')&(sf_stacked['trip_mode_AS_trips'] == 'DRIVE_HVY'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'hov3_teleportation')&(sf_stacked['trip_mode_AS_trips'] == 'SHARED2FREE'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'car')&(sf_stacked['trip_mode_AS_trips'] == 'SHARED2PAY'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'car')&(sf_stacked['trip_mode_AS_trips'] == 'SHARED3PAY'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'car_hov2')&(sf_stacked['trip_mode_AS_trips'] == 'SHARED3FREE'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'car_hov2')&(sf_stacked['trip_mode_AS_trips'] == 'SHARED3PAY'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'car_hov3')&(sf_stacked['trip_mode_AS_trips'] == 'SHARED2FREE'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'car_hov3')&(sf_stacked['trip_mode_AS_trips'] == 'SHARED2PAY'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'hov2_teleportation')&(sf_stacked['trip_mode_AS_trips'] == 'SHARED3FREE'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'hov2_teleportation')&(sf_stacked['trip_mode_AS_trips'] == 'SHARED3PAY'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'car')&(sf_stacked['trip_mode_AS_trips'] == 'SHARED3FREE'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'car_hov2')&(sf_stacked['trip_mode_AS_trips'] == 'DRIVEALONEPAY'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'car_hov2')&(sf_stacked['trip_mode_AS_trips'] == 'DRIVEALONEFREE'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'hov2_teleportation')&(sf_stacked['trip_mode_AS_trips'] == 'DRIVEALONEFREE'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'hov2_teleportation')&(sf_stacked['trip_mode_AS_trips'] == 'DRIVEALONEPAY'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'car')&(sf_stacked['trip_mode_AS_trips'] == 'SHARED2FREE'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'car_hov3')&(sf_stacked['trip_mode_AS_trips'] == 'DRIVEALONEPAY'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'car_hov3')&(sf_stacked['trip_mode_AS_trips'] == 'DRIVEALONEFREE'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'hov3_teleportation')&(sf_stacked['trip_mode_AS_trips'] == 'DRIVEALONEFREE'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'hov3_teleportation')&(sf_stacked['trip_mode_AS_trips'] == 'DRIVEALONEPAY'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'hov3_teleportation')&(sf_stacked['trip_mode_AS_trips'] == 'SHARED2PAY'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'ride_hail_pooled')&(sf_stacked['ride_hail_pooled'] > 1),
               (sf_stacked['mode_choice_actual_BEAM'] == 'hov3_teleportation')&(sf_stacked['trip_mode_AS_trips'] == 'WALK'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'hov2_teleportation')&(sf_stacked['trip_mode_AS_trips'] == 'WALK'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'car_hov2')&((sf_stacked['trip_mode_AS_trips'] == 'WALK_HVY')|
                    (sf_stacked['trip_mode_AS_trips'] == 'WALK_COM')|(sf_stacked['trip_mode_AS_trips'] == 'WALK_EXP')|
                    (sf_stacked['trip_mode_AS_trips'] == 'DRIVE_COM')|(sf_stacked['trip_mode_AS_trips'] == 'DRIVE_EXP')|
                    (sf_stacked['trip_mode_AS_trips'] == 'DRIVE_HVY')|(sf_stacked['trip_mode_AS_trips'] == 'DRIVE_LOC')|
                    (sf_stacked['trip_mode_AS_trips'] == 'DRIVE_LRF')|(sf_stacked['trip_mode_AS_trips'] == 'WALK_LRF')| 
                    (sf_stacked['trip_mode_AS_trips'] == 'WALK_LOC')|(sf_stacked['trip_mode_AS_trips'] == 'WALK')|
                    (sf_stacked['trip_mode_AS_trips'] == 'BIKE')|(sf_stacked['trip_mode_AS_trips'] == 'TNC_SINGLE')|
                    (sf_stacked['trip_mode_AS_trips'] == 'TNC_SHARED')|(sf_stacked['trip_mode_AS_trips'] == 'TAXI'))
                    & (sf_stacked['cost_BEAM'] > 2.5),
               (sf_stacked['mode_choice_actual_BEAM'] == 'car_hov3')&((sf_stacked['trip_mode_AS_trips'] == 'WALK_HVY')|
                    (sf_stacked['trip_mode_AS_trips'] == 'WALK_COM')|(sf_stacked['trip_mode_AS_trips'] == 'WALK_EXP')|
                    (sf_stacked['trip_mode_AS_trips'] == 'DRIVE_COM')|(sf_stacked['trip_mode_AS_trips'] == 'DRIVE_EXP')|
                    (sf_stacked['trip_mode_AS_trips'] == 'DRIVE_HVY')|(sf_stacked['trip_mode_AS_trips'] == 'DRIVE_LOC')|
                    (sf_stacked['trip_mode_AS_trips'] == 'DRIVE_LRF')|(sf_stacked['trip_mode_AS_trips'] == 'WALK_LRF')| 
                    (sf_stacked['trip_mode_AS_trips'] == 'WALK_LOC')|(sf_stacked['trip_mode_AS_trips'] == 'WALK')|
                    (sf_stacked['trip_mode_AS_trips'] == 'BIKE')|(sf_stacked['trip_mode_AS_trips'] == 'TNC_SINGLE')|
                    (sf_stacked['trip_mode_AS_trips'] == 'TNC_SHARED')|(sf_stacked['trip_mode_AS_trips'] == 'TAXI'))
                    & (sf_stacked['cost_BEAM'] > 2.5),
              (sf_stacked['mode_choice_actual_BEAM'] == 'car_hov2')&((sf_stacked['trip_mode_AS_trips'] == 'WALK_HVY')|
                    (sf_stacked['trip_mode_AS_trips'] == 'WALK_COM')|(sf_stacked['trip_mode_AS_trips'] == 'WALK_EXP')|
                    (sf_stacked['trip_mode_AS_trips'] == 'DRIVE_COM')|(sf_stacked['trip_mode_AS_trips'] == 'DRIVE_EXP')|
                    (sf_stacked['trip_mode_AS_trips'] == 'DRIVE_HVY')|(sf_stacked['trip_mode_AS_trips'] == 'DRIVE_LOC')|
                    (sf_stacked['trip_mode_AS_trips'] == 'DRIVE_LRF')|(sf_stacked['trip_mode_AS_trips'] == 'WALK_LRF')| 
                    (sf_stacked['trip_mode_AS_trips'] == 'WALK_LOC')|(sf_stacked['trip_mode_AS_trips'] == 'WALK')|
                    (sf_stacked['trip_mode_AS_trips'] == 'BIKE')|(sf_stacked['trip_mode_AS_trips'] == 'TNC_SINGLE')|
                    (sf_stacked['trip_mode_AS_trips'] == 'TNC_SHARED')|(sf_stacked['trip_mode_AS_trips'] == 'TAXI'))
                    & (sf_stacked['cost_BEAM'] <= 2.5),
               (sf_stacked['mode_choice_actual_BEAM'] == 'car_hov3')&((sf_stacked['trip_mode_AS_trips'] == 'WALK_HVY')|
                    (sf_stacked['trip_mode_AS_trips'] == 'WALK_COM')|(sf_stacked['trip_mode_AS_trips'] == 'WALK_EXP')|
                    (sf_stacked['trip_mode_AS_trips'] == 'DRIVE_COM')|(sf_stacked['trip_mode_AS_trips'] == 'DRIVE_EXP')|
                    (sf_stacked['trip_mode_AS_trips'] == 'DRIVE_HVY')|(sf_stacked['trip_mode_AS_trips'] == 'DRIVE_LOC')|
                    (sf_stacked['trip_mode_AS_trips'] == 'DRIVE_LRF')|(sf_stacked['trip_mode_AS_trips'] == 'WALK_LRF')| 
                    (sf_stacked['trip_mode_AS_trips'] == 'WALK_LOC')|(sf_stacked['trip_mode_AS_trips'] == 'WALK')|
                    (sf_stacked['trip_mode_AS_trips'] == 'BIKE')|(sf_stacked['trip_mode_AS_trips'] == 'TNC_SINGLE')|
                    (sf_stacked['trip_mode_AS_trips'] == 'TNC_SHARED')|(sf_stacked['trip_mode_AS_trips'] == 'TAXI'))
                    & (sf_stacked['cost_BEAM'] <= 2.5),
               (sf_stacked['mode_choice_actual_BEAM'] == 'car')&((sf_stacked['trip_mode_AS_trips'] == 'WALK_HVY')|
                    (sf_stacked['trip_mode_AS_trips'] == 'WALK_COM')|(sf_stacked['trip_mode_AS_trips'] == 'WALK_EXP')|
                    (sf_stacked['trip_mode_AS_trips'] == 'DRIVE_COM')|(sf_stacked['trip_mode_AS_trips'] == 'DRIVE_EXP')|
                    (sf_stacked['trip_mode_AS_trips'] == 'DRIVE_HVY')|(sf_stacked['trip_mode_AS_trips'] == 'DRIVE_LOC')|
                    (sf_stacked['trip_mode_AS_trips'] == 'DRIVE_LRF')|(sf_stacked['trip_mode_AS_trips'] == 'WALK_LRF')| 
                    (sf_stacked['trip_mode_AS_trips'] == 'WALK_LOC')|(sf_stacked['trip_mode_AS_trips'] == 'WALK')|
                    (sf_stacked['trip_mode_AS_trips'] == 'BIKE')|(sf_stacked['trip_mode_AS_trips'] == 'TNC_SINGLE')|
                    (sf_stacked['trip_mode_AS_trips'] == 'TNC_SHARED')|(sf_stacked['trip_mode_AS_trips'] == 'TAXI'))
                    & (sf_stacked['cost_BEAM'] <= 2.5),
               (sf_stacked['mode_choice_actual_BEAM'] == 'car')&((sf_stacked['trip_mode_AS_trips'] == 'WALK_HVY')|
                    (sf_stacked['trip_mode_AS_trips'] == 'WALK_COM')|(sf_stacked['trip_mode_AS_trips'] == 'WALK_EXP')|
                    (sf_stacked['trip_mode_AS_trips'] == 'DRIVE_COM')|(sf_stacked['trip_mode_AS_trips'] == 'DRIVE_EXP')|
                    (sf_stacked['trip_mode_AS_trips'] == 'DRIVE_HVY')|(sf_stacked['trip_mode_AS_trips'] == 'DRIVE_LOC')|
                    (sf_stacked['trip_mode_AS_trips'] == 'DRIVE_LRF')|(sf_stacked['trip_mode_AS_trips'] == 'WALK_LRF')| 
                    (sf_stacked['trip_mode_AS_trips'] == 'WALK_LOC')|(sf_stacked['trip_mode_AS_trips'] == 'WALK')|
                    (sf_stacked['trip_mode_AS_trips'] == 'BIKE')|(sf_stacked['trip_mode_AS_trips'] == 'TNC_SINGLE')|
                    (sf_stacked['trip_mode_AS_trips'] == 'TNC_SHARED')|(sf_stacked['trip_mode_AS_trips'] == 'TAXI'))
                    & (sf_stacked['cost_BEAM'] > 2.5),
               (sf_stacked['mode_choice_actual_BEAM'] == 'ride_hail_pooled'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'ride_hail_transit')&(sf_stacked['trip_mode_AS_trips'] == 'DRIVE_COM'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'ride_hail_transit')&(sf_stacked['trip_mode_AS_trips'] == 'DRIVE_LRF'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'ride_hail_transit')&(sf_stacked['trip_mode_AS_trips'] == 'DRIVE_LOC'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'ride_hail_transit')&(sf_stacked['trip_mode_AS_trips'] == 'DRIVE_EXP'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'ride_hail_transit')&(sf_stacked['trip_mode_AS_trips'] == 'DRIVE_HVY'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'ride_hail_transit')&(sf_stacked['trip_mode_AS_trips'] == 'WALK_COM'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'ride_hail_transit')&(sf_stacked['trip_mode_AS_trips'] == 'WALK_LRF'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'ride_hail_transit')&(sf_stacked['trip_mode_AS_trips'] == 'WALK_LOC'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'ride_hail_transit')&(sf_stacked['trip_mode_AS_trips'] == 'WALK_EXP'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'ride_hail_transit')&(sf_stacked['trip_mode_AS_trips'] == 'WALK_HVY')]
        
choices = [sf_stacked['WALK'], sf_stacked['BIKE'], sf_stacked['SHARED3FREE'], sf_stacked['SHARED3PAY'],
           sf_stacked['SHARED3FREE'], sf_stacked['SHARED3PAY'], sf_stacked['SHARED2FREE'], 
           sf_stacked['SHARED2PAY'], sf_stacked['SHARED2PAY'], sf_stacked['SHARED2FREE'], 
           sf_stacked['DRIVEALONEFREE'], sf_stacked['DRIVEALONEPAY'], sf_stacked['WALK_LRF'], 
           sf_stacked['WALK_LOC'], sf_stacked['WALK_EXP'], sf_stacked['WALK_HVY'], sf_stacked['TNC_SINGLE'], 
           sf_stacked['TNC_SHARED'], sf_stacked['TAXI'], sf_stacked['WALK_COM'], 
           sf_stacked['DRIVE_COM'], sf_stacked['DRIVE_LRF'], sf_stacked['DRIVE_LOC'], sf_stacked['DRIVE_EXP'],
           sf_stacked['DRIVE_HVY'], sf_stacked['SHARED3FREE'], sf_stacked['DRIVEALONEPAY'], sf_stacked['DRIVEALONEPAY'],
           sf_stacked['SHARED2FREE'], sf_stacked['SHARED2PAY'], sf_stacked['SHARED3FREE'],  sf_stacked['SHARED3PAY'],
           sf_stacked['SHARED2FREE'], sf_stacked['SHARED2PAY'], sf_stacked['DRIVEALONEFREE'], sf_stacked['SHARED2PAY'], 
           sf_stacked['SHARED2FREE'], sf_stacked['SHARED2FREE'], sf_stacked['SHARED2PAY'], sf_stacked['DRIVEALONEFREE'], 
           sf_stacked['SHARED3PAY'], sf_stacked['SHARED3FREE'], sf_stacked['SHARED3FREE'], sf_stacked['SHARED3PAY'],
           sf_stacked['SHARED3PAY'], sf_stacked['TNC_SHARED'], sf_stacked['SHARED3FREE'], sf_stacked['SHARED2FREE'],
           sf_stacked['SHARED2PAY'], sf_stacked['SHARED3PAY'], sf_stacked['SHARED2FREE'],sf_stacked['SHARED3FREE'],
           sf_stacked['DRIVEALONEFREE'], sf_stacked['DRIVEALONEPAY'], sf_stacked['TNC_SHARED'], sf_stacked['DRIVE_COM'],
           sf_stacked['DRIVE_LRF'], sf_stacked['DRIVE_LOC'], sf_stacked['DRIVE_EXP'], sf_stacked['DRIVE_HVY'],
           sf_stacked['WALK_COM'], sf_stacked['WALK_LRF'], sf_stacked['WALK_LOC'], sf_stacked['WALK_EXP'],
           sf_stacked['WALK_HVY']]

In [69]:
sf_stacked['Realized_INEXUS'] = np.select(conditions, choices, default=np.nan)

In [None]:
# shift columns to first position
#first_column = sf_stacked.pop('Realized_INEXUS')
# insert column using insert(position,column_name,first_column) function
#sf_stacked.insert(5, 'Realized_INEXUS', first_column)

In [70]:
sf_stacked['Realized_INEXUS'] = np.where((sf_stacked['Realized_INEXUS'].isna())&
                                               (sf_stacked['mode_choice_actual_BEAM'] == 'ride_hail')&
                                               ((sf_stacked['numPassengers'] == '1.0, 1.0, 1.0')|
                                                (sf_stacked['numPassengers'] =='1.0, 1.0, 1.0, 1.0')|
                                                (sf_stacked['numPassengers'] =='1.0, 1.0, 1.0, 1.0, 1.0')|
                                                (sf_stacked['numPassengers'] =='1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0')|
                                                (sf_stacked['numPassengers'] =='1.0, 8.0, 8.0, 7.0, 1.0, 1.0, 1.0, 1.0')|
                                                (sf_stacked['numPassengers'] =='1.0, 16.0, 1.0, 1.0, 1.0, 1.0')|
                                                (sf_stacked['numPassengers'] =='1.0, 10.0, 11.0, 1.0, 1.0, 1.0, 1.0')), 
                                                sf_stacked['TNC_SINGLE'], sf_stacked['Realized_INEXUS']) 

In [71]:
sf_stacked['Realized_INEXUS'] = np.where((sf_stacked['Realized_INEXUS'].isna())&
                                               (sf_stacked['mode_choice_actual_BEAM'] == 'hov2_teleportation'), 
                                                sf_stacked.loc[:, ["SHARED2FREE", "SHARED2PAY"]].max(1), sf_stacked['Realized_INEXUS']) 

In [72]:
sf_stacked['Realized_INEXUS'] = np.where((sf_stacked['Realized_INEXUS'].isna())&
                                               (sf_stacked['mode_choice_actual_BEAM'] == 'hov3_teleportation'), 
                                                sf_stacked.loc[:, ["SHARED3FREE", "SHARED3PAY"]].max(1), sf_stacked['Realized_INEXUS']) 

In [73]:
sf_stacked['Realized_INEXUS'] = np.where((sf_stacked['Realized_INEXUS'].isna())&
                                               (sf_stacked['mode_choice_actual_BEAM'] == 'walk_transit'), 
                                                sf_stacked.loc[:, ["WALK_COM", "WALK_LRF", "WALK_LOC", "WALK_EXP",
                                                      "WALK_HVY"]].max(1), sf_stacked['Realized_INEXUS']) 

In [74]:
sf_stacked['Realized_INEXUS'] = np.where((sf_stacked['Realized_INEXUS'].isna())&
                                               (sf_stacked['mode_choice_actual_BEAM'] == 'drive_transit'), 
                                                sf_stacked.loc[:, ["DRIVE_COM", "DRIVE_LRF", "DRIVE_LOC", "DRIVE_EXP",
                                                      "DRIVE_HVY"]].max(1), sf_stacked['Realized_INEXUS']) 

In [75]:
sf_stacked['Realized_INEXUS'] = np.where((sf_stacked['Realized_INEXUS'].isna())&
                                               (sf_stacked['mode_choice_actual_BEAM'] == 'bike_transit')&
                                               (sf_stacked['trip_mode_AS_trips'] == 'BIKE'), 
                                                sf_stacked['BIKE'], sf_stacked['Realized_INEXUS']) 

In [76]:
sf_stacked['Realized_INEXUS'] = np.where((sf_stacked['Realized_INEXUS'].isna())&
                                               (sf_stacked['mode_choice_actual_BEAM'] == 'bike_transit')&
                                               (sf_stacked['trip_mode_AS_trips'] == 'BIKE'), 
                                                sf_stacked['BIKE'], sf_stacked['Realized_INEXUS']) 

In [77]:
sf_stacked['Realized_INEXUS'] = np.where((sf_stacked['Realized_INEXUS'].isna())&
                                               (sf_stacked['mode_choice_actual_BEAM'] == 'ride_hail'), 
                                                sf_stacked['TNC_SINGLE'], sf_stacked['Realized_INEXUS'])

In [78]:
sf_stacked[sf_stacked['Realized_INEXUS'].isna()].shape

(3303, 121)

In [None]:
#sf_stacked_tr_fr['Realized_INEXUS'] = np.where((sf_stacked_tr_fr['Realized_INEXUS'].isna())&
                                               #(sf_stacked_tr_fr['mode_choice_actual_BEAM'] == 'walk_transit')&
                                               #(sf_stacked_tr_fr['vehicleIds'].str.contains('(caltrain)')), 
                                               #sf_stacked_tr_fr['WALK_COM'], sf_stacked_tr_fr['Realized_INEXUS'])

In [79]:
sf_stacked = sf_stacked[sf_stacked['Realized_INEXUS'].notna()]

In [None]:
#sf_stacked_tr_fr[sf_stacked_tr_fr['Realized_INEXUS'].isna()].to_csv('C:/Shared-Work/Data/CleanData/ASIM_BEAM_Merged/realizedIN_empty.csv', index = False)

#### Social INEXUS

In [80]:
sf_stacked.income.quantile([0, 0.25,0.5,0.75, 1.00])

0.000000    -14600.000000
0.250000     50000.000000
0.500000     94000.000000
0.750000    155000.000000
1.000000   1354000.000000
Name: income, dtype: float64

In [81]:
# determine the quartile values to use with pd.cut
quantiles = sf_stacked['income'].quantile([0,.25, .5, .75,1]).tolist()

In [82]:
quantiles

[-14600.0, 50000.0, 94000.0, 155000.0, 1354000.0]

In [None]:
# add a lower and upper range for the bins in pd.cut
#quartiles = [0] + quartiles + [float('inf')]

In [None]:
# add a quartiles column to groups, using pd.cut
#sf_stacked['income_quartiles'] = pd.cut(sf_stacked['income'], bins=quartiles, labels=['1stQ', '2ndQ', '3rdQ', '4thQ'])

In [83]:
sf_stacked['income_quartiles'] = np.where((sf_stacked['income']>= -14600)&(sf_stacked['income']< 50000), '1stQ', '4thQ')

In [84]:
sf_stacked['income_quartiles'] = np.where((sf_stacked['income']>= 50000)&(sf_stacked['income']< 94000), '2ndQ', sf_stacked['income_quartiles'])

In [85]:
sf_stacked['income_quartiles'] = np.where((sf_stacked['income']>= 94000)&(sf_stacked['income']< 155000), '3rdQ', sf_stacked['income_quartiles'])

In [86]:
m = sf_stacked.groupby(['income_quartiles'])['income'].median()

In [87]:
m

income_quartiles
1stQ    28900.000000
2ndQ    70000.000000
3rdQ   120000.000000
4thQ   217200.000000
Name: income, dtype: float64

In [88]:
# Mapping BEAM and ASIM modes
conditions  = [(sf_stacked['income_quartiles'] == '1stQ'), 
               (sf_stacked['income_quartiles'] == '2ndQ'),
               (sf_stacked['income_quartiles'] == '3rdQ'),
               (sf_stacked['income_quartiles'] == '4thQ')]

choices = [29300, 70000, 120000, 219000]

In [89]:
sf_stacked['MedianQuartiles'] = np.select(conditions, choices, default=np.nan)

In [90]:
sf_stacked['alpha'] = 0.1

In [91]:
sf_stacked['socialCarbonCost'] = sf_stacked['emission_marginal']*185 # 51 or 102

In [92]:
sf_stacked['Social_INEXUS'] = (sf_stacked['Realized_INEXUS']/sf_stacked['alpha']) - sf_stacked['socialCarbonCost']

In [93]:
sf_stacked['Potential_INEXUS_in_dollar'] = sf_stacked['logsum_trip_Potential_INEXUS']/sf_stacked['alpha']

In [94]:
sf_stacked['Realized_INEXUS_in_dollar'] = sf_stacked['Realized_INEXUS']/sf_stacked['alpha']

#### The following chunks are useful but should be checked in the next runs (they resulted in exact same realized and potential inexus in dollar

In [None]:
# shift columns to first position
#first_column = sf_stacked.pop('Social_INEXUS')
# insert column using insert(position,column_name,first_column) function
#sf_stacked.insert(6, 'Social_INEXUS', first_column)

In [None]:
# shift columns to first position
#first_column = sf_stacked.pop('Potential_INEXUS_in_dollar')
#second_column = sf_stacked.pop('Realized_INEXUS_in_dollar')
# insert column using insert(position,column_name,first_column) function
#sf_stacked.insert(7, 'Potential_INEXUS_in_dollar', first_column)
#sf_stacked.insert(8, 'Realized_INEXUS_in_dollar', first_column)

In [95]:
%%time
sf_stacked.to_csv('s3://beam-core-act/deepDive/CleanData/SanFrancisco/Stacked/sf_2018_stacked_rh_price_10_8_22.csv', index=False)

CPU times: total: 20min 31s
Wall time: 27min 39s
