In [1]:
import os
import os.path as op
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
%matplotlib inline
import seaborn as sns
sns.set_theme(style="darkgrid", color_codes=True)
sns.set(font_scale=1.35, style="ticks") #set styling preferences
import statsmodels.api as sm
from scipy import stats
import math
from math import pi
from pandas.api.types import is_string_dtype
from pandas.api.types import is_numeric_dtype
from scipy.spatial.distance import cdist
from scipy.cluster.vq import kmeans2,vq, whiten

In [2]:
# Show all columns and rows
pd.options.display.max_columns = None
pd.options.display.max_rows = None

#### Baseline

In [4]:
%%time
baseline2018 = "https://beam-core-act.s3.amazonaws.com/deepDive/CleanData/SanFrancisco/Baseline/"
sfbase = pd.read_csv(baseline2018 + 'sf_2018_base_8_22_22.csv')



CPU times: total: 1min 7s
Wall time: 3min 19s


In [5]:
# Showing the entire number in dataframe
pd.set_option('float_format', '{:f}'.format)

In [11]:
#describe = sfbase.describe()

In [12]:
#describe.to_csv('C:/Shared-Work/Data/CleanData/ASIM_BEAM_Merged/describe.csv')

#### Ridehail Scenarios

In [13]:
%%time
rh_flsz_path = "https://beam-core-act.s3.amazonaws.com/deepDive/CleanData/SanFrancisco/RideHail_FleetSize/"
sf_rh_flsz_0125 = pd.read_csv(rh_flsz_path + 'sf_2018_rh_flsz_0p125.csv')
sf_rh_flsz_025 = pd.read_csv(rh_flsz_path + 'sf_2018_rh_flsz_0p25.csv')
sf_rh_flsz_05 = pd.read_csv(rh_flsz_path + 'sf_2018_rh_flsz_0p5.csv')
sf_rh_flsz_1p75 = pd.read_csv(rh_flsz_path + 'sf_2018_rh_flsz_1p75.csv')
sf_rh_flsz_2p25 = pd.read_csv(rh_flsz_path + 'sf_2018_rh_flsz_2p25.csv')
sf_rh_flsz_3 = pd.read_csv(rh_flsz_path + 'sf_2018_rh_flsz_3.csv')



CPU times: total: 8min 19s
Wall time: 21min 52s


In [14]:
sfbase.shape

(2468267, 257)

In [15]:
sf_rh_flsz_025.shape

(2468275, 257)

In [17]:
sf_rh_flsz_1p75.shape

(2468273, 257)

In [17]:
#sf_rh_flsz_175 = sf_rh_flsz_175.drop(sf_rh_flsz_175.columns[245:625], axis=1)

In [20]:
#sf_rh_flsz_175 = sf_rh_flsz_175.drop(sf_rh_flsz_175.columns[235:245], axis=1)

In [23]:
#sf_rh_flsz_175 = sf_rh_flsz_175.drop(sf_rh_flsz_175.columns[233:235], axis=1)

In [18]:
frames = [sf_rh_flsz_0125, sf_rh_flsz_025, sf_rh_flsz_05, sfbase, sf_rh_flsz_1p75, sf_rh_flsz_2p25, sf_rh_flsz_3]
sf_stacked = pd.concat(frames)

In [19]:
sf_stacked = sf_stacked.drop(['BlockGroupEnd','BlockGroupStart','has_school_kid','has_preschool_kid','has_retiree','has_non_worker','adult','hispanic','relate',
                             'pstudent','pemploy','ptype','p_hispanic','age_16_p','age_16_to_19','work_zone_id',
                             'race_id','school_zone_id','person_age','MAR','hours','hispanic.1','student',
                             'worker','stop_frequency','composition','tdd','number_of_participants','tour_type_count',
                             'tour_type_num','tour_num','tour_count', 'outbound', 'trip_num','num_under16_not_at_school',
                             'family','non_family','num_young_adults','num_college_age','num_children_16_to_17',
                              'num_children_5_to_15','num_young_children'], axis=1)

In [21]:
sf_stacked = sf_stacked.drop(['vehicleIds_estimate','destination_x','origin_x','depart',
                             'origin_y', 'destination_y','parent_tour_id','atwork_subtour_frequency','household_id_y',
                              'TAZ_x','home_x','home_y','male','female', 'has_driving_kid','has_full_time','has_part_time',
                              'has_university','student_is_employed', 'nonstudent_to_school', 'is_student','is_gradeschool',
                              'is_gradeschool','is_highschool','is_university','school_segment','is_worker','home_taz',  
                             'school_taz','distance_to_school','roundtrip_auto_time_to_school','workplace_taz',
                              'distance_to_work','workplace_in_cbd', 'work_taz_area_type', 'hh_race_of_head',
                             'roundtrip_auto_time_to_work','work_auto_savings_ratio','cdap_activity', 'travel_active', 
                              'under16_not_at_school','has_preschool_kid_at_home', 'block_id','gt2','hispanic_head',
                             'has_school_kid_at_home','work_and_school_and_worker', 'age_of_head', 'race_of_head',
                              'work_and_school_and_student', 'VEHICL','hh_children','hh_age_of_head','num_workers',
                              'gt55','seniors', 'recent_mover', 'hh_workers', 'hispanic_status_of_head', 'hh_seniors', 
                             'hh_type', 'TAZ_y', 'HHT', 'sample_rate', 'chunk_id', 'income_segment', 'num_non_workers', 
                             'num_drivers', 'num_adults', 'num_children', 'hh_work_auto_savings_ratio', 'num_travel_active', 
                             'num_travel_active_adults','num_travel_active_preschoolers', 'num_travel_active_children', 
                              'num_travel_active_non_preschoolers', 'participates_in_jtf_model', 'joint_tour_frequency',
                              'num_hh_joint_tours'], axis=1)

In [22]:
sf_stacked = sf_stacked.drop(['serialno'], axis=1)

#### Stacked File

In [25]:
#stacked_tr_fr_path = "https://beam-core-act.s3.amazonaws.com/deepDive/CleanData/SanFrancisco/Stacked/"
#sf_stacked_tr_fr = pd.read_csv(stacked_tr_fr_path + 'sf_2018_stacked_tr_fr.csv')

In [27]:
sf_stacked = sf_stacked.sort_values(by=['IDMerged', 'tripIndex', 'lever_position']).reset_index(drop=True)

In [28]:
# shift column 'person' to first position
first_column = sf_stacked.pop('scenario')
second_column = sf_stacked.pop('lever')
third_column = sf_stacked.pop('lever_position')
fourth_column = sf_stacked.pop('logsum_trip_mode_AS_trips')
# insert column using insert(position,column_name,first_column) function
sf_stacked.insert(2, 'scenario', first_column)
sf_stacked.insert(3, 'lever', second_column)
sf_stacked.insert(4, 'lever_position', third_column)
sf_stacked.insert(5, 'logsum_trip_mode_AS_trips', fourth_column)

In [29]:
#sf_stacked_tr_fr['logsum_relative_to_baseline'] = sf_stacked_tr_fr.groupby(['IDMerged','tripIndex'])['logsum_trip_mode_AS_trips'].apply(lambda x: x.diff())

In [30]:
#sf_stacked_tr_fr['logsum_relative_to_baseline'] = sf_stacked_tr_fr['logsum_trip_mode_AS_trips'] - sf_stacked_tr_fr.loc[sf_stacked_tr_fr.groupby(['IDMerged','tripIndex'])['lever_position'].transform('idxmin'), 'logsum_trip_mode_AS_trips'].values

In [31]:
%%time
sf_stacked['logsum_relative_to_baseline'] = sf_stacked['logsum_trip_mode_AS_trips'] - (sf_stacked['logsum_trip_mode_AS_trips'].where(sf_stacked['lever_position'].eq(1.0))
                                                                                                  .groupby([sf_stacked['IDMerged'],sf_stacked['tripIndex']])
                                                                                                   .transform('first')
                                                                                                   .convert_dtypes())

CPU times: total: 4.66 s
Wall time: 4.44 s


In [34]:
sf_stacked['door_to_door_time_relative_to_baseline'] = sf_stacked['duration_door_to_door'] - (sf_stacked['duration_door_to_door'].where(sf_stacked['lever_position'].eq(1.0))
                                                                                                  .groupby([sf_stacked['IDMerged'],sf_stacked['tripIndex']])
                                                                                                   .transform('first')
                                                                                                   .convert_dtypes())

In [35]:
# shift columns to first position
first_column = sf_stacked.pop('logsum_relative_to_baseline')
second_column = sf_stacked.pop('door_to_door_time_relative_to_baseline')
third_column = sf_stacked.pop('income')
fourth_column = sf_stacked.pop('actPurpose')
# insert column using insert(position,column_name,first_column) function
sf_stacked.insert(5, 'logsum_relative_to_baseline', first_column)
sf_stacked.insert(6, 'door_to_door_time_relative_to_baseline', second_column)
sf_stacked.insert(7, 'income', third_column)
sf_stacked.insert(8, 'actPurpose', fourth_column)

In [38]:
# Drop unused columns
sf_stacked = sf_stacked.drop(['sf_detached','destination_logsum_x',
                                          'household_id_x','destination_logsum_y','logsum_tours_mode_AS_tours','primary_purpose_y',
                                          'PNUM','workplace_location_logsum','work_auto_savings','mandatory_tour_frequency'], axis=1)

In [39]:
# Rename to potential INEXUS
sf_stacked.rename(columns={"logsum_trip_mode_AS_trips":"logsum_trip_potential_INEXUS"}, inplace=True) 

In [40]:
# shift columns to first position
#first_column = sf_stacked_tr_fr.pop('mode_choice_actual_6')
second_column = sf_stacked.pop('mode_choice_actual_BEAM')
#third_column = sf_stacked_tr_fr.pop('income')
#fourth_column = sf_stacked_tr_fr.pop('actPurpose')
# insert column using insert(position,column_name,first_column) function
#sf_stacked_tr_fr.insert(9, 'mode_choice_actual_6', first_column)
sf_stacked.insert(10, 'mode_choice_actual_BEAM', second_column)
#sf_stacked_tr_fr.insert(11, 'income', third_column)
#sf_stacked_tr_fr.insert(12, 'actPurpose', fourth_column)

In [41]:
sf_stacked[sf_stacked['mode_choice_actual_6'] == 'ride_hail_transit'].shape

(3405, 128)

In [42]:
sf_stacked[sf_stacked['replanning_status'] > 0].shape

(74806, 128)

In [43]:
# Add the mandatory category column
mandatory = ['work' , 'univ', 'school']
sf_stacked['mandatoryCat'] = np.where((sf_stacked.actEndType.isin(mandatory)) & (sf_stacked.actStartType.isin(mandatory)), 'from_M_to_M' , None)

In [44]:
sf_stacked['mandatoryCat'] = np.where((sf_stacked.actEndType == 'Home') & (sf_stacked.actStartType.isin(mandatory)), 'from_H_to_M' , sf_stacked['mandatoryCat'])

In [45]:
sf_stacked['mandatoryCat'] = np.where((sf_stacked.actEndType.isin(mandatory)) & (sf_stacked.actStartType == "Home"), 'from_M_to_H' , sf_stacked['mandatoryCat'])

In [46]:
non_mandatory = ['othmaint' , 'othdiscr', 'escort', 'eatout', 'social', 'shopping', 'atwork']
sf_stacked['mandatoryCat'] = np.where((sf_stacked.actEndType.isin(non_mandatory)) & (sf_stacked.actStartType.isin(non_mandatory)), 'from_N_to_N' , sf_stacked['mandatoryCat'])

In [47]:
# shift columns to first position
first_column = sf_stacked.pop('mandatoryCat')
# insert column using insert(position,column_name,first_column) function
sf_stacked.insert(11, 'mandatoryCat', first_column)

In [48]:
# Drop unused columns
sf_stacked = sf_stacked.drop(['num_work_tours','num_joint_tours', 'non_mandatory_tour_frequency',
                                          'num_non_mand','num_escort_tours','num_eatout_tours','num_shop_tours',
                                          'num_maint_tours','num_discr_tours','num_social_tours','num_non_escort_tours'], axis=1)

In [49]:
sf_stacked.head()

Unnamed: 0,IDMerged,tripIndex,scenario,lever,lever_position,logsum_relative_to_baseline,door_to_door_time_relative_to_baseline,income,actPurpose,logsum_trip_potential_INEXUS,mode_choice_actual_BEAM,mandatoryCat,actEndTime,actEndType,actStartTime,actStartType,cost_BEAM,distance_bike,distance_mode_choice,distance_privateCar,distance_ridehail,distance_transit,distance_travelling,distance_walking,duration_in_privateCar,duration_in_ridehail,duration_in_transit,duration_on_bike,duration_travelling,duration_walking,emissionBiodiesel,emissionDiesel,emissionElectricity,emissionFood,emissionGasoline,emission_marginal,fuelBiodiesel,fuelDiesel,fuelElectricity,fuelFood,fuelGasoline,fuel_marginal,fuel_not_Food,mode_choice_planned_BEAM,numPassengers,parkingType,reason,replanning_status,ride_hail_pooled,transit_bus,transit_cable_car,transit_rail,transit_subway,transit_tram,vehicleIds,year,duration_door_to_door,waitTime,mode_choice_actual_5,mode_choice_actual_6,trip_id,person_id,household_id,tour_id,primary_purpose_x,trip_count,purpose,trip_mode_AS_trips,tour_type,tour_category,start,end,duration,tour_mode_AS_tours,earning,person_sex,sex,edu,race,work_at_home,age,value_of_time,free_parking_at_work,num_mand,lcm_county_id,tenure_mover,hh_size,tenure,hh_cars,hh_income,hhsize,income_in_thousands,median_value_of_time,hh_value_of_time,home_is_urban,home_is_rural,auto_ownership,DRIVEALONEFREE,DRIVEALONEPAY,SHARED2FREE,SHARED2PAY,SHARED3FREE,SHARED3PAY,WALK,BIKE,WALK_LOC,WALK_LRF,WALK_EXP,WALK_HVY,WALK_COM,DRIVE_LOC,DRIVE_LRF,DRIVE_EXP,DRIVE_HVY,DRIVE_COM,TAXI,TNC_SINGLE,TNC_SHARED
0,1,577.0,ridehail,fleetsize,0.125,0.497253,-26.0,240000.0,Home_to_othmaint,-1.880066,hov2_teleportation,,30009.0,Home,30178.0,othmaint,0.0,0.0,4310.558,4310.558,0.0,0.0,4310.558,0.0,169.0,0.0,0.0,0.0,169.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,hov2_teleportation,,,,0,0,0,0,0,0,0,,2018,169.0,0.0,car,car,577,1,1735309,72,school,2,othmaint,SHARED2FREE,school,mandatory,8.0,15.0,7.0,SHARED2PAY,0.0,male,1,1.0,asian,0,3,19.798008,False,1,6085,own not recent,four or more,1,two or more,gt150,4,240.0,12.86,29.682171,False,False,2,-999.125436,-999.125825,-2.05797,-2.058193,-999.124084,-999.12424,-11.61792,-1000.983135,-1000.459856,-1998.770467,-1998.770467,-1998.770467,-1998.770467,-1999.307149,-2997.409495,-2997.409495,-2997.409495,-2997.409495,-7.968636,-7.847364,-8.318563
1,1,577.0,ridehail,fleetsize,0.25,0.53109,60.0,240000.0,Home_to_othmaint,-1.846229,hov2_teleportation,,29160.0,Home,29415.0,othmaint,0.0,0.0,5762.683,5762.683,0.0,0.0,5762.683,0.0,255.0,0.0,0.0,0.0,255.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,hov2_teleportation,,,,0,0,0,0,0,0,0,,2018,255.0,0.0,car,car,577,1,1735309,72,school,2,othmaint,SHARED2FREE,school,mandatory,8.0,15.0,7.0,SHARED2PAY,0.0,male,1,1.0,asian,0,3,19.798008,False,1,6085,own not recent,four or more,1,two or more,gt150,4,240.0,12.86,29.682171,False,False,2,-999.091716,-999.092017,-2.02425,-2.024422,-999.090364,-999.090484,-15.676167,-1001.38896,-1000.459856,-1998.770467,-1998.770467,-1998.770467,-1998.770467,-1999.307149,-2997.409495,-2997.409495,-2997.409495,-2997.409495,-7.821755,-7.822435,-8.256225
2,1,577.0,ridehail,fleetsize,0.5,0.301336,-16.0,240000.0,Home_to_othmaint,-2.075982,hov2_teleportation,,29065.0,Home,29244.0,othmaint,0.0,0.0,4664.36,4664.36,0.0,0.0,4664.36,0.0,179.0,0.0,0.0,0.0,179.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,hov2_teleportation,,,,0,0,0,0,0,0,0,,2018,179.0,0.0,car,car,577,1,1735309,72,school,2,othmaint,SHARED2FREE,school,mandatory,8.0,15.0,7.0,SHARED2PAY,0.0,male,1,1.0,asian,0,3,19.798008,False,1,6085,own not recent,four or more,1,two or more,gt150,4,240.0,12.86,29.682171,False,False,2,-999.321161,-999.321484,-2.253695,-2.253879,-999.319809,-999.319938,-11.28493,-1000.949836,-1000.459856,-1998.770467,-1998.770467,-1998.770467,-1998.770467,-1999.307149,-2997.409495,-2997.409495,-2997.409495,-2997.409495,-8.214544,-8.124605,-8.678848
3,1,577.0,baseline,default,1.0,0.0,0.0,240000.0,Home_to_othmaint,-2.377319,hov2_teleportation,,31052.0,Home,31247.0,othmaint,0.0,0.0,4940.606,4940.606,0.0,0.0,4940.606,0.0,195.0,0.0,0.0,0.0,195.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,hov2_teleportation,,,,0,0,0,0,0,0,0,,2018,195.0,0.0,car,car,577,1,1735309,72,school,2,othmaint,SHARED2FREE,school,mandatory,8.0,15.0,7.0,SHARED2PAY,0.0,male,1,1.0,asian,0,3,19.798008,False,1,6085,own not recent,four or more,1,two or more,gt150,4,240.0,12.86,29.682171,False,False,2,-999.621919,-999.622345,-2.554453,-2.554696,-999.620566,-999.620737,-38.227666,-1007.088933,-1000.459856,-1998.770467,-1998.770467,-1998.770467,-1998.770467,-1999.307149,-2997.409495,-2997.409495,-2997.409495,-2997.409495,-8.625908,-8.611274,-9.231232
4,1,577.0,ridehail,fleetsize,1.75,0.481759,-7.0,240000.0,Home_to_othmaint,-1.89556,hov2_teleportation,,30819.0,Home,31007.0,othmaint,0.0,0.0,4722.849,4722.849,0.0,0.0,4722.849,0.0,188.0,0.0,0.0,0.0,188.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,hov2_teleportation,,,,0,0,0,0,0,0,0,,2018,188.0,0.0,car,car,577,1,1735309,72,school,2,othmaint,SHARED2FREE,school,mandatory,8.0,15.0,7.0,SHARED2PAY,0.0,male,1,1.0,asian,0,3,19.798008,False,1,6085,own not recent,four or more,1,two or more,gt150,4,240.0,12.86,29.682171,False,False,2,-999.140845,-999.141285,-2.073379,-2.073631,-999.139493,-999.139669,-13.530149,-1001.174358,-1000.459856,-1998.770467,-1998.770467,-1998.770467,-1998.770467,-1999.307149,-2997.409495,-2997.409495,-2997.409495,-2997.409495,-8.028935,-7.838608,-8.360657


In [50]:
sf_stacked.mode_choice_actual_BEAM.unique()

array(['hov2_teleportation', 'car', 'hov3_teleportation', 'walk',
       'car_hov2', 'walk_transit', 'drive_transit', 'ride_hail_pooled',
       'car_hov3', 'ride_hail', 'bike', 'ride_hail_transit',
       'bike_transit'], dtype=object)

In [51]:
sf_stacked.trip_mode_AS_trips.unique()

array(['SHARED2FREE', 'DRIVEALONEFREE', 'DRIVEALONEPAY', 'SHARED2PAY',
       'SHARED3PAY', 'SHARED3FREE', 'WALK', 'WALK_LOC', 'TAXI',
       'DRIVE_HVY', 'TNC_SHARED', 'TNC_SINGLE', 'WALK_LRF', 'WALK_HVY',
       'BIKE', 'WALK_COM', 'DRIVE_LRF', 'DRIVE_LOC', 'DRIVE_COM'],
      dtype=object)

In [52]:
# Rename to Potential INEXUS
sf_stacked.rename(columns={"logsum_trip_potential_INEXUS":"logsum_trip_Potential_INEXUS"}, inplace=True) 

In [53]:
# Mapping BEAM and ASIM modes
conditions  = [(sf_stacked['mode_choice_actual_BEAM'] == 'walk'), 
               (sf_stacked['mode_choice_actual_BEAM'] == 'bike'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'hov3_teleportation')&(sf_stacked['trip_mode_AS_trips'] == 'SHARED3FREE'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'hov3_teleportation')&(sf_stacked['trip_mode_AS_trips'] == 'SHARED3PAY'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'car_hov3')&(sf_stacked['trip_mode_AS_trips'] == 'SHARED3FREE'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'car_hov3')&(sf_stacked['trip_mode_AS_trips'] == 'SHARED3PAY'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'hov2_teleportation')&(sf_stacked['trip_mode_AS_trips'] == 'SHARED2FREE'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'hov2_teleportation')&(sf_stacked['trip_mode_AS_trips'] == 'SHARED2PAY'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'car_hov2')&(sf_stacked['trip_mode_AS_trips'] == 'SHARED2PAY'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'car_hov2')&(sf_stacked['trip_mode_AS_trips'] == 'SHARED2FREE'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'car')&(sf_stacked['trip_mode_AS_trips'] == 'DRIVEALONEFREE'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'car')&(sf_stacked['trip_mode_AS_trips'] == 'DRIVEALONEPAY'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'walk_transit')&(sf_stacked['trip_mode_AS_trips'] == 'WALK_LRF'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'walk_transit')&(sf_stacked['trip_mode_AS_trips'] == 'WALK_LOC'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'walk_transit')&(sf_stacked['trip_mode_AS_trips'] == 'WALK_EXP'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'walk_transit')&(sf_stacked['trip_mode_AS_trips'] == 'WALK_HVY'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'ride_hail')&(sf_stacked['trip_mode_AS_trips'] == 'TNC_SINGLE'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'ride_hail')&(sf_stacked['trip_mode_AS_trips'] == 'TNC_SHARED'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'ride_hail_pooled')&(sf_stacked['trip_mode_AS_trips'] == 'TNC_SINGLE'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'ride_hail')&(sf_stacked['trip_mode_AS_trips'] == 'TAXI'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'walk_transit')&(sf_stacked['trip_mode_AS_trips'] == 'WALK_COM'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'drive_transit')&(sf_stacked['trip_mode_AS_trips'] == 'DRIVE_COM'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'drive_transit')&(sf_stacked['trip_mode_AS_trips'] == 'DRIVE_LRF'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'drive_transit')&(sf_stacked['trip_mode_AS_trips'] == 'DRIVE_LOC'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'drive_transit')&(sf_stacked['trip_mode_AS_trips'] == 'DRIVE_EXP'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'drive_transit')&(sf_stacked['trip_mode_AS_trips'] == 'DRIVE_HVY')]

choices = [sf_stacked['WALK'], sf_stacked['BIKE'], sf_stacked['SHARED3FREE'], sf_stacked['SHARED3PAY'],
           sf_stacked['SHARED3FREE'], sf_stacked['SHARED3PAY'], sf_stacked['SHARED2FREE'], 
           sf_stacked['SHARED2PAY'], sf_stacked['SHARED2PAY'], sf_stacked['SHARED2FREE'], 
           sf_stacked['DRIVEALONEFREE'], sf_stacked['DRIVEALONEPAY'], sf_stacked['WALK_LRF'], 
           sf_stacked['WALK_LOC'], sf_stacked['WALK_EXP'], sf_stacked['WALK_HVY'], sf_stacked['TNC_SINGLE'], 
           sf_stacked['TNC_SHARED'], sf_stacked['TNC_SINGLE'], sf_stacked['TAXI'], sf_stacked['WALK_COM'], 
           sf_stacked['DRIVE_COM'], sf_stacked['DRIVE_LRF'], sf_stacked['DRIVE_LOC'], sf_stacked['DRIVE_EXP'],
           sf_stacked['DRIVE_HVY']]

In [54]:
sf_stacked['Realized_INEXUS'] = np.select(conditions, choices, default=np.nan)

In [55]:
# shift columns to first position
first_column = sf_stacked.pop('Realized_INEXUS')
# insert column using insert(position,column_name,first_column) function
sf_stacked.insert(5, 'Realized_INEXUS', first_column)

In [56]:
sf_stacked.head()

Unnamed: 0,IDMerged,tripIndex,scenario,lever,lever_position,Realized_INEXUS,logsum_relative_to_baseline,door_to_door_time_relative_to_baseline,income,actPurpose,logsum_trip_Potential_INEXUS,mode_choice_actual_BEAM,mandatoryCat,actEndTime,actEndType,actStartTime,actStartType,cost_BEAM,distance_bike,distance_mode_choice,distance_privateCar,distance_ridehail,distance_transit,distance_travelling,distance_walking,duration_in_privateCar,duration_in_ridehail,duration_in_transit,duration_on_bike,duration_travelling,duration_walking,emissionBiodiesel,emissionDiesel,emissionElectricity,emissionFood,emissionGasoline,emission_marginal,fuelBiodiesel,fuelDiesel,fuelElectricity,fuelFood,fuelGasoline,fuel_marginal,fuel_not_Food,mode_choice_planned_BEAM,numPassengers,parkingType,reason,replanning_status,ride_hail_pooled,transit_bus,transit_cable_car,transit_rail,transit_subway,transit_tram,vehicleIds,year,duration_door_to_door,waitTime,mode_choice_actual_5,mode_choice_actual_6,trip_id,person_id,household_id,tour_id,primary_purpose_x,trip_count,purpose,trip_mode_AS_trips,tour_type,tour_category,start,end,duration,tour_mode_AS_tours,earning,person_sex,sex,edu,race,work_at_home,age,value_of_time,free_parking_at_work,num_mand,lcm_county_id,tenure_mover,hh_size,tenure,hh_cars,hh_income,hhsize,income_in_thousands,median_value_of_time,hh_value_of_time,home_is_urban,home_is_rural,auto_ownership,DRIVEALONEFREE,DRIVEALONEPAY,SHARED2FREE,SHARED2PAY,SHARED3FREE,SHARED3PAY,WALK,BIKE,WALK_LOC,WALK_LRF,WALK_EXP,WALK_HVY,WALK_COM,DRIVE_LOC,DRIVE_LRF,DRIVE_EXP,DRIVE_HVY,DRIVE_COM,TAXI,TNC_SINGLE,TNC_SHARED
0,1,577.0,ridehail,fleetsize,0.125,-2.05797,0.497253,-26.0,240000.0,Home_to_othmaint,-1.880066,hov2_teleportation,,30009.0,Home,30178.0,othmaint,0.0,0.0,4310.558,4310.558,0.0,0.0,4310.558,0.0,169.0,0.0,0.0,0.0,169.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,hov2_teleportation,,,,0,0,0,0,0,0,0,,2018,169.0,0.0,car,car,577,1,1735309,72,school,2,othmaint,SHARED2FREE,school,mandatory,8.0,15.0,7.0,SHARED2PAY,0.0,male,1,1.0,asian,0,3,19.798008,False,1,6085,own not recent,four or more,1,two or more,gt150,4,240.0,12.86,29.682171,False,False,2,-999.125436,-999.125825,-2.05797,-2.058193,-999.124084,-999.12424,-11.61792,-1000.983135,-1000.459856,-1998.770467,-1998.770467,-1998.770467,-1998.770467,-1999.307149,-2997.409495,-2997.409495,-2997.409495,-2997.409495,-7.968636,-7.847364,-8.318563
1,1,577.0,ridehail,fleetsize,0.25,-2.02425,0.53109,60.0,240000.0,Home_to_othmaint,-1.846229,hov2_teleportation,,29160.0,Home,29415.0,othmaint,0.0,0.0,5762.683,5762.683,0.0,0.0,5762.683,0.0,255.0,0.0,0.0,0.0,255.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,hov2_teleportation,,,,0,0,0,0,0,0,0,,2018,255.0,0.0,car,car,577,1,1735309,72,school,2,othmaint,SHARED2FREE,school,mandatory,8.0,15.0,7.0,SHARED2PAY,0.0,male,1,1.0,asian,0,3,19.798008,False,1,6085,own not recent,four or more,1,two or more,gt150,4,240.0,12.86,29.682171,False,False,2,-999.091716,-999.092017,-2.02425,-2.024422,-999.090364,-999.090484,-15.676167,-1001.38896,-1000.459856,-1998.770467,-1998.770467,-1998.770467,-1998.770467,-1999.307149,-2997.409495,-2997.409495,-2997.409495,-2997.409495,-7.821755,-7.822435,-8.256225
2,1,577.0,ridehail,fleetsize,0.5,-2.253695,0.301336,-16.0,240000.0,Home_to_othmaint,-2.075982,hov2_teleportation,,29065.0,Home,29244.0,othmaint,0.0,0.0,4664.36,4664.36,0.0,0.0,4664.36,0.0,179.0,0.0,0.0,0.0,179.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,hov2_teleportation,,,,0,0,0,0,0,0,0,,2018,179.0,0.0,car,car,577,1,1735309,72,school,2,othmaint,SHARED2FREE,school,mandatory,8.0,15.0,7.0,SHARED2PAY,0.0,male,1,1.0,asian,0,3,19.798008,False,1,6085,own not recent,four or more,1,two or more,gt150,4,240.0,12.86,29.682171,False,False,2,-999.321161,-999.321484,-2.253695,-2.253879,-999.319809,-999.319938,-11.28493,-1000.949836,-1000.459856,-1998.770467,-1998.770467,-1998.770467,-1998.770467,-1999.307149,-2997.409495,-2997.409495,-2997.409495,-2997.409495,-8.214544,-8.124605,-8.678848
3,1,577.0,baseline,default,1.0,-2.554453,0.0,0.0,240000.0,Home_to_othmaint,-2.377319,hov2_teleportation,,31052.0,Home,31247.0,othmaint,0.0,0.0,4940.606,4940.606,0.0,0.0,4940.606,0.0,195.0,0.0,0.0,0.0,195.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,hov2_teleportation,,,,0,0,0,0,0,0,0,,2018,195.0,0.0,car,car,577,1,1735309,72,school,2,othmaint,SHARED2FREE,school,mandatory,8.0,15.0,7.0,SHARED2PAY,0.0,male,1,1.0,asian,0,3,19.798008,False,1,6085,own not recent,four or more,1,two or more,gt150,4,240.0,12.86,29.682171,False,False,2,-999.621919,-999.622345,-2.554453,-2.554696,-999.620566,-999.620737,-38.227666,-1007.088933,-1000.459856,-1998.770467,-1998.770467,-1998.770467,-1998.770467,-1999.307149,-2997.409495,-2997.409495,-2997.409495,-2997.409495,-8.625908,-8.611274,-9.231232
4,1,577.0,ridehail,fleetsize,1.75,-2.073379,0.481759,-7.0,240000.0,Home_to_othmaint,-1.89556,hov2_teleportation,,30819.0,Home,31007.0,othmaint,0.0,0.0,4722.849,4722.849,0.0,0.0,4722.849,0.0,188.0,0.0,0.0,0.0,188.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,hov2_teleportation,,,,0,0,0,0,0,0,0,,2018,188.0,0.0,car,car,577,1,1735309,72,school,2,othmaint,SHARED2FREE,school,mandatory,8.0,15.0,7.0,SHARED2PAY,0.0,male,1,1.0,asian,0,3,19.798008,False,1,6085,own not recent,four or more,1,two or more,gt150,4,240.0,12.86,29.682171,False,False,2,-999.140845,-999.141285,-2.073379,-2.073631,-999.139493,-999.139669,-13.530149,-1001.174358,-1000.459856,-1998.770467,-1998.770467,-1998.770467,-1998.770467,-1999.307149,-2997.409495,-2997.409495,-2997.409495,-2997.409495,-8.028935,-7.838608,-8.360657


In [58]:
sf_stacked[sf_stacked['Realized_INEXUS'].isna()].shape

(3216090, 119)

In [27]:
#sf_stacked[sf_stacked['Realized_INEXUS'].isna()].head(10000).to_csv('C:/Shared-Work/Data/CleanData/ASIM_BEAM_Merged/realizedIN_empty.csv', index = False)

In [59]:
#%%time
#sf_stacked_tr_fr.to_csv('s3://beam-core-act/deepDive/CleanData/SanFrancisco/Stacked/sf_2018_stacked_tr_fr_v3.csv', index=False)

In [60]:
sf_stacked = sf_stacked[sf_stacked['trip_mode_AS_trips'].notna()]

In [61]:
sf_stacked = sf_stacked.drop(['Realized_INEXUS'], axis=1)

In [62]:
# Mapping BEAM and ASIM modes
conditions  = [(sf_stacked['mode_choice_actual_BEAM'] == 'walk'), 
               (sf_stacked['mode_choice_actual_BEAM'] == 'bike'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'hov3_teleportation')&(sf_stacked['trip_mode_AS_trips'] == 'SHARED3FREE'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'hov3_teleportation')&(sf_stacked['trip_mode_AS_trips'] == 'SHARED3PAY'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'car_hov3')&(sf_stacked['trip_mode_AS_trips'] == 'SHARED3FREE'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'car_hov3')&(sf_stacked['trip_mode_AS_trips'] == 'SHARED3PAY'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'hov2_teleportation')&(sf_stacked['trip_mode_AS_trips'] == 'SHARED2FREE'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'hov2_teleportation')&(sf_stacked['trip_mode_AS_trips'] == 'SHARED2PAY'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'car_hov2')&(sf_stacked['trip_mode_AS_trips'] == 'SHARED2PAY'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'car_hov2')&(sf_stacked['trip_mode_AS_trips'] == 'SHARED2FREE'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'car')&(sf_stacked['trip_mode_AS_trips'] == 'DRIVEALONEFREE'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'car')&(sf_stacked['trip_mode_AS_trips'] == 'DRIVEALONEPAY'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'walk_transit')&(sf_stacked['trip_mode_AS_trips'] == 'WALK_LRF'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'walk_transit')&(sf_stacked['trip_mode_AS_trips'] == 'WALK_LOC'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'walk_transit')&(sf_stacked['trip_mode_AS_trips'] == 'WALK_EXP'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'walk_transit')&(sf_stacked['trip_mode_AS_trips'] == 'WALK_HVY'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'ride_hail')&(sf_stacked['trip_mode_AS_trips'] == 'TNC_SINGLE'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'ride_hail')&(sf_stacked['trip_mode_AS_trips'] == 'TNC_SHARED'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'ride_hail')&(sf_stacked['trip_mode_AS_trips'] == 'TAXI'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'walk_transit')&(sf_stacked['trip_mode_AS_trips'] == 'WALK_COM'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'drive_transit')&(sf_stacked['trip_mode_AS_trips'] == 'DRIVE_COM'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'drive_transit')&(sf_stacked['trip_mode_AS_trips'] == 'DRIVE_LRF'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'drive_transit')&(sf_stacked['trip_mode_AS_trips'] == 'DRIVE_LOC'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'drive_transit')&(sf_stacked['trip_mode_AS_trips'] == 'DRIVE_EXP'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'drive_transit')&(sf_stacked['trip_mode_AS_trips'] == 'DRIVE_HVY'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'hov3_teleportation')&(sf_stacked['trip_mode_AS_trips'] == 'SHARED2FREE'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'car')&(sf_stacked['trip_mode_AS_trips'] == 'SHARED2PAY'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'car')&(sf_stacked['trip_mode_AS_trips'] == 'SHARED3PAY'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'car_hov2')&(sf_stacked['trip_mode_AS_trips'] == 'SHARED3FREE'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'car_hov2')&(sf_stacked['trip_mode_AS_trips'] == 'SHARED3PAY'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'car_hov3')&(sf_stacked['trip_mode_AS_trips'] == 'SHARED2FREE'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'car_hov3')&(sf_stacked['trip_mode_AS_trips'] == 'SHARED2PAY'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'hov2_teleportation')&(sf_stacked['trip_mode_AS_trips'] == 'SHARED3FREE'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'hov2_teleportation')&(sf_stacked['trip_mode_AS_trips'] == 'SHARED3PAY'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'car')&(sf_stacked['trip_mode_AS_trips'] == 'SHARED3FREE'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'car_hov2')&(sf_stacked['trip_mode_AS_trips'] == 'DRIVEALONEPAY'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'car_hov2')&(sf_stacked['trip_mode_AS_trips'] == 'DRIVEALONEFREE'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'hov2_teleportation')&(sf_stacked['trip_mode_AS_trips'] == 'DRIVEALONEFREE'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'hov2_teleportation')&(sf_stacked['trip_mode_AS_trips'] == 'DRIVEALONEPAY'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'car')&(sf_stacked['trip_mode_AS_trips'] == 'SHARED2FREE'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'car_hov3')&(sf_stacked['trip_mode_AS_trips'] == 'DRIVEALONEPAY'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'car_hov3')&(sf_stacked['trip_mode_AS_trips'] == 'DRIVEALONEFREE'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'hov3_teleportation')&(sf_stacked['trip_mode_AS_trips'] == 'DRIVEALONEFREE'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'hov3_teleportation')&(sf_stacked['trip_mode_AS_trips'] == 'DRIVEALONEPAY'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'hov3_teleportation')&(sf_stacked['trip_mode_AS_trips'] == 'SHARED2PAY'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'ride_hail_pooled')&(sf_stacked['ride_hail_pooled'] > 1),
               (sf_stacked['mode_choice_actual_BEAM'] == 'hov3_teleportation')&(sf_stacked['trip_mode_AS_trips'] == 'WALK'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'hov2_teleportation')&(sf_stacked['trip_mode_AS_trips'] == 'WALK'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'car_hov2')&((sf_stacked['trip_mode_AS_trips'] == 'WALK_HVY')|
                    (sf_stacked['trip_mode_AS_trips'] == 'WALK_COM')|(sf_stacked['trip_mode_AS_trips'] == 'WALK_EXP')|
                    (sf_stacked['trip_mode_AS_trips'] == 'DRIVE_COM')|(sf_stacked['trip_mode_AS_trips'] == 'DRIVE_EXP')|
                    (sf_stacked['trip_mode_AS_trips'] == 'DRIVE_HVY')|(sf_stacked['trip_mode_AS_trips'] == 'DRIVE_LOC')|
                    (sf_stacked['trip_mode_AS_trips'] == 'DRIVE_LRF')|(sf_stacked['trip_mode_AS_trips'] == 'WALK_LRF')| 
                    (sf_stacked['trip_mode_AS_trips'] == 'WALK_LOC')|(sf_stacked['trip_mode_AS_trips'] == 'WALK')|
                    (sf_stacked['trip_mode_AS_trips'] == 'BIKE')|(sf_stacked['trip_mode_AS_trips'] == 'TNC_SINGLE')|
                    (sf_stacked['trip_mode_AS_trips'] == 'TNC_SHARED')|(sf_stacked['trip_mode_AS_trips'] == 'TAXI'))
                    & (sf_stacked['cost_BEAM'] > 2.5),
               (sf_stacked['mode_choice_actual_BEAM'] == 'car_hov3')&((sf_stacked['trip_mode_AS_trips'] == 'WALK_HVY')|
                    (sf_stacked['trip_mode_AS_trips'] == 'WALK_COM')|(sf_stacked['trip_mode_AS_trips'] == 'WALK_EXP')|
                    (sf_stacked['trip_mode_AS_trips'] == 'DRIVE_COM')|(sf_stacked['trip_mode_AS_trips'] == 'DRIVE_EXP')|
                    (sf_stacked['trip_mode_AS_trips'] == 'DRIVE_HVY')|(sf_stacked['trip_mode_AS_trips'] == 'DRIVE_LOC')|
                    (sf_stacked['trip_mode_AS_trips'] == 'DRIVE_LRF')|(sf_stacked['trip_mode_AS_trips'] == 'WALK_LRF')| 
                    (sf_stacked['trip_mode_AS_trips'] == 'WALK_LOC')|(sf_stacked['trip_mode_AS_trips'] == 'WALK')|
                    (sf_stacked['trip_mode_AS_trips'] == 'BIKE')|(sf_stacked['trip_mode_AS_trips'] == 'TNC_SINGLE')|
                    (sf_stacked['trip_mode_AS_trips'] == 'TNC_SHARED')|(sf_stacked['trip_mode_AS_trips'] == 'TAXI'))
                    & (sf_stacked['cost_BEAM'] > 2.5),
              (sf_stacked['mode_choice_actual_BEAM'] == 'car_hov2')&((sf_stacked['trip_mode_AS_trips'] == 'WALK_HVY')|
                    (sf_stacked['trip_mode_AS_trips'] == 'WALK_COM')|(sf_stacked['trip_mode_AS_trips'] == 'WALK_EXP')|
                    (sf_stacked['trip_mode_AS_trips'] == 'DRIVE_COM')|(sf_stacked['trip_mode_AS_trips'] == 'DRIVE_EXP')|
                    (sf_stacked['trip_mode_AS_trips'] == 'DRIVE_HVY')|(sf_stacked['trip_mode_AS_trips'] == 'DRIVE_LOC')|
                    (sf_stacked['trip_mode_AS_trips'] == 'DRIVE_LRF')|(sf_stacked['trip_mode_AS_trips'] == 'WALK_LRF')| 
                    (sf_stacked['trip_mode_AS_trips'] == 'WALK_LOC')|(sf_stacked['trip_mode_AS_trips'] == 'WALK')|
                    (sf_stacked['trip_mode_AS_trips'] == 'BIKE')|(sf_stacked['trip_mode_AS_trips'] == 'TNC_SINGLE')|
                    (sf_stacked['trip_mode_AS_trips'] == 'TNC_SHARED')|(sf_stacked['trip_mode_AS_trips'] == 'TAXI'))
                    & (sf_stacked['cost_BEAM'] <= 2.5),
               (sf_stacked['mode_choice_actual_BEAM'] == 'car_hov3')&((sf_stacked['trip_mode_AS_trips'] == 'WALK_HVY')|
                    (sf_stacked['trip_mode_AS_trips'] == 'WALK_COM')|(sf_stacked['trip_mode_AS_trips'] == 'WALK_EXP')|
                    (sf_stacked['trip_mode_AS_trips'] == 'DRIVE_COM')|(sf_stacked['trip_mode_AS_trips'] == 'DRIVE_EXP')|
                    (sf_stacked['trip_mode_AS_trips'] == 'DRIVE_HVY')|(sf_stacked['trip_mode_AS_trips'] == 'DRIVE_LOC')|
                    (sf_stacked['trip_mode_AS_trips'] == 'DRIVE_LRF')|(sf_stacked['trip_mode_AS_trips'] == 'WALK_LRF')| 
                    (sf_stacked['trip_mode_AS_trips'] == 'WALK_LOC')|(sf_stacked['trip_mode_AS_trips'] == 'WALK')|
                    (sf_stacked['trip_mode_AS_trips'] == 'BIKE')|(sf_stacked['trip_mode_AS_trips'] == 'TNC_SINGLE')|
                    (sf_stacked['trip_mode_AS_trips'] == 'TNC_SHARED')|(sf_stacked['trip_mode_AS_trips'] == 'TAXI'))
                    & (sf_stacked['cost_BEAM'] <= 2.5),
               (sf_stacked['mode_choice_actual_BEAM'] == 'car')&((sf_stacked['trip_mode_AS_trips'] == 'WALK_HVY')|
                    (sf_stacked['trip_mode_AS_trips'] == 'WALK_COM')|(sf_stacked['trip_mode_AS_trips'] == 'WALK_EXP')|
                    (sf_stacked['trip_mode_AS_trips'] == 'DRIVE_COM')|(sf_stacked['trip_mode_AS_trips'] == 'DRIVE_EXP')|
                    (sf_stacked['trip_mode_AS_trips'] == 'DRIVE_HVY')|(sf_stacked['trip_mode_AS_trips'] == 'DRIVE_LOC')|
                    (sf_stacked['trip_mode_AS_trips'] == 'DRIVE_LRF')|(sf_stacked['trip_mode_AS_trips'] == 'WALK_LRF')| 
                    (sf_stacked['trip_mode_AS_trips'] == 'WALK_LOC')|(sf_stacked['trip_mode_AS_trips'] == 'WALK')|
                    (sf_stacked['trip_mode_AS_trips'] == 'BIKE')|(sf_stacked['trip_mode_AS_trips'] == 'TNC_SINGLE')|
                    (sf_stacked['trip_mode_AS_trips'] == 'TNC_SHARED')|(sf_stacked['trip_mode_AS_trips'] == 'TAXI'))
                    & (sf_stacked['cost_BEAM'] <= 2.5),
               (sf_stacked['mode_choice_actual_BEAM'] == 'car')&((sf_stacked['trip_mode_AS_trips'] == 'WALK_HVY')|
                    (sf_stacked['trip_mode_AS_trips'] == 'WALK_COM')|(sf_stacked['trip_mode_AS_trips'] == 'WALK_EXP')|
                    (sf_stacked['trip_mode_AS_trips'] == 'DRIVE_COM')|(sf_stacked['trip_mode_AS_trips'] == 'DRIVE_EXP')|
                    (sf_stacked['trip_mode_AS_trips'] == 'DRIVE_HVY')|(sf_stacked['trip_mode_AS_trips'] == 'DRIVE_LOC')|
                    (sf_stacked['trip_mode_AS_trips'] == 'DRIVE_LRF')|(sf_stacked['trip_mode_AS_trips'] == 'WALK_LRF')| 
                    (sf_stacked['trip_mode_AS_trips'] == 'WALK_LOC')|(sf_stacked['trip_mode_AS_trips'] == 'WALK')|
                    (sf_stacked['trip_mode_AS_trips'] == 'BIKE')|(sf_stacked['trip_mode_AS_trips'] == 'TNC_SINGLE')|
                    (sf_stacked['trip_mode_AS_trips'] == 'TNC_SHARED')|(sf_stacked['trip_mode_AS_trips'] == 'TAXI'))
                    & (sf_stacked['cost_BEAM'] > 2.5),
               (sf_stacked['mode_choice_actual_BEAM'] == 'ride_hail_pooled'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'ride_hail_transit')&(sf_stacked['trip_mode_AS_trips'] == 'DRIVE_COM'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'ride_hail_transit')&(sf_stacked['trip_mode_AS_trips'] == 'DRIVE_LRF'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'ride_hail_transit')&(sf_stacked['trip_mode_AS_trips'] == 'DRIVE_LOC'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'ride_hail_transit')&(sf_stacked['trip_mode_AS_trips'] == 'DRIVE_EXP'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'ride_hail_transit')&(sf_stacked['trip_mode_AS_trips'] == 'DRIVE_HVY'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'ride_hail_transit')&(sf_stacked['trip_mode_AS_trips'] == 'WALK_COM'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'ride_hail_transit')&(sf_stacked['trip_mode_AS_trips'] == 'WALK_LRF'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'ride_hail_transit')&(sf_stacked['trip_mode_AS_trips'] == 'WALK_LOC'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'ride_hail_transit')&(sf_stacked['trip_mode_AS_trips'] == 'WALK_EXP'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'ride_hail_transit')&(sf_stacked['trip_mode_AS_trips'] == 'WALK_HVY')]
        
choices = [sf_stacked['WALK'], sf_stacked['BIKE'], sf_stacked['SHARED3FREE'], sf_stacked['SHARED3PAY'],
           sf_stacked['SHARED3FREE'], sf_stacked['SHARED3PAY'], sf_stacked['SHARED2FREE'], 
           sf_stacked['SHARED2PAY'], sf_stacked['SHARED2PAY'], sf_stacked['SHARED2FREE'], 
           sf_stacked['DRIVEALONEFREE'], sf_stacked['DRIVEALONEPAY'], sf_stacked['WALK_LRF'], 
           sf_stacked['WALK_LOC'], sf_stacked['WALK_EXP'], sf_stacked['WALK_HVY'], sf_stacked['TNC_SINGLE'], 
           sf_stacked['TNC_SHARED'], sf_stacked['TAXI'], sf_stacked['WALK_COM'], 
           sf_stacked['DRIVE_COM'], sf_stacked['DRIVE_LRF'], sf_stacked['DRIVE_LOC'], sf_stacked['DRIVE_EXP'],
           sf_stacked['DRIVE_HVY'], sf_stacked['SHARED3FREE'], sf_stacked['DRIVEALONEPAY'], sf_stacked['DRIVEALONEPAY'],
           sf_stacked['SHARED2FREE'], sf_stacked['SHARED2PAY'], sf_stacked['SHARED3FREE'],  sf_stacked['SHARED3PAY'],
           sf_stacked['SHARED2FREE'], sf_stacked['SHARED2PAY'], sf_stacked['DRIVEALONEFREE'], sf_stacked['SHARED2PAY'], 
           sf_stacked['SHARED2FREE'], sf_stacked['SHARED2FREE'], sf_stacked['SHARED2PAY'], sf_stacked['DRIVEALONEFREE'], 
           sf_stacked['SHARED3PAY'], sf_stacked['SHARED3FREE'], sf_stacked['SHARED3FREE'], sf_stacked['SHARED3PAY'],
           sf_stacked['SHARED3PAY'], sf_stacked['TNC_SHARED'], sf_stacked['SHARED3FREE'], sf_stacked['SHARED2FREE'],
           sf_stacked['SHARED2PAY'], sf_stacked['SHARED3PAY'], sf_stacked['SHARED2FREE'],sf_stacked['SHARED3FREE'],
           sf_stacked['DRIVEALONEFREE'], sf_stacked['DRIVEALONEPAY'], sf_stacked['TNC_SHARED'], sf_stacked['DRIVE_COM'],
           sf_stacked['DRIVE_LRF'], sf_stacked['DRIVE_LOC'], sf_stacked['DRIVE_EXP'], sf_stacked['DRIVE_HVY'],
           sf_stacked['WALK_COM'], sf_stacked['WALK_LRF'], sf_stacked['WALK_LOC'], sf_stacked['WALK_EXP'],
           sf_stacked['WALK_HVY']]

In [63]:
sf_stacked['Realized_INEXUS'] = np.select(conditions, choices, default=np.nan)

In [64]:
# shift columns to first position
first_column = sf_stacked.pop('Realized_INEXUS')
# insert column using insert(position,column_name,first_column) function
sf_stacked.insert(5, 'Realized_INEXUS', first_column)

In [65]:
sf_stacked['Realized_INEXUS'] = np.where((sf_stacked['Realized_INEXUS'].isna())&
                                               (sf_stacked['mode_choice_actual_BEAM'] == 'ride_hail')&
                                               ((sf_stacked['numPassengers'] == '1.0, 1.0, 1.0')|
                                                (sf_stacked['numPassengers'] =='1.0, 1.0, 1.0, 1.0')|
                                                (sf_stacked['numPassengers'] =='1.0, 1.0, 1.0, 1.0, 1.0')|
                                                (sf_stacked['numPassengers'] =='1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0')|
                                                (sf_stacked['numPassengers'] =='1.0, 8.0, 8.0, 7.0, 1.0, 1.0, 1.0, 1.0')|
                                                (sf_stacked['numPassengers'] =='1.0, 16.0, 1.0, 1.0, 1.0, 1.0')|
                                                (sf_stacked['numPassengers'] =='1.0, 10.0, 11.0, 1.0, 1.0, 1.0, 1.0')), 
                                                sf_stacked['TNC_SINGLE'], sf_stacked['Realized_INEXUS']) 

In [66]:
sf_stacked['Realized_INEXUS'] = np.where((sf_stacked['Realized_INEXUS'].isna())&
                                               (sf_stacked['mode_choice_actual_BEAM'] == 'hov2_teleportation'), 
                                                sf_stacked.loc[:, ["SHARED2FREE", "SHARED2PAY"]].max(1), sf_stacked['Realized_INEXUS']) 

In [67]:
sf_stacked['Realized_INEXUS'] = np.where((sf_stacked['Realized_INEXUS'].isna())&
                                               (sf_stacked['mode_choice_actual_BEAM'] == 'hov3_teleportation'), 
                                                sf_stacked.loc[:, ["SHARED3FREE", "SHARED3PAY"]].max(1), sf_stacked['Realized_INEXUS']) 

In [68]:
sf_stacked['Realized_INEXUS'] = np.where((sf_stacked['Realized_INEXUS'].isna())&
                                               (sf_stacked['mode_choice_actual_BEAM'] == 'walk_transit'), 
                                                sf_stacked.loc[:, ["WALK_COM", "WALK_LRF", "WALK_LOC", "WALK_EXP",
                                                      "WALK_HVY"]].max(1), sf_stacked['Realized_INEXUS']) 

In [69]:
sf_stacked['Realized_INEXUS'] = np.where((sf_stacked['Realized_INEXUS'].isna())&
                                               (sf_stacked['mode_choice_actual_BEAM'] == 'drive_transit'), 
                                                sf_stacked.loc[:, ["DRIVE_COM", "DRIVE_LRF", "DRIVE_LOC", "DRIVE_EXP",
                                                      "DRIVE_HVY"]].max(1), sf_stacked['Realized_INEXUS']) 

In [71]:
sf_stacked['Realized_INEXUS'] = np.where((sf_stacked['Realized_INEXUS'].isna())&
                                               (sf_stacked['mode_choice_actual_BEAM'] == 'bike_transit')&
                                               (sf_stacked['trip_mode_AS_trips'] == 'BIKE'), 
                                                sf_stacked['BIKE'], sf_stacked['Realized_INEXUS']) 

In [72]:
sf_stacked['Realized_INEXUS'] = np.where((sf_stacked['Realized_INEXUS'].isna())&
                                               (sf_stacked['mode_choice_actual_BEAM'] == 'bike_transit')&
                                               (sf_stacked['trip_mode_AS_trips'] == 'BIKE'), 
                                                sf_stacked['BIKE'], sf_stacked['Realized_INEXUS']) 

In [73]:
sf_stacked['Realized_INEXUS'] = np.where((sf_stacked['Realized_INEXUS'].isna())&
                                               (sf_stacked['mode_choice_actual_BEAM'] == 'ride_hail'), 
                                                sf_stacked['TNC_SINGLE'], sf_stacked['Realized_INEXUS'])

In [74]:
sf_stacked[sf_stacked['Realized_INEXUS'].isna()].shape

(1241, 119)

In [None]:
#sf_stacked_tr_fr['Realized_INEXUS'] = np.where((sf_stacked_tr_fr['Realized_INEXUS'].isna())&
                                               #(sf_stacked_tr_fr['mode_choice_actual_BEAM'] == 'walk_transit')&
                                               #(sf_stacked_tr_fr['vehicleIds'].str.contains('(caltrain)')), 
                                               #sf_stacked_tr_fr['WALK_COM'], sf_stacked_tr_fr['Realized_INEXUS'])

In [75]:
sf_stacked = sf_stacked[sf_stacked['Realized_INEXUS'].notna()]

In [19]:
#sf_stacked_tr_fr[sf_stacked_tr_fr['Realized_INEXUS'].isna()].to_csv('C:/Shared-Work/Data/CleanData/ASIM_BEAM_Merged/realizedIN_empty.csv', index = False)

#### Social INEXUS

In [76]:
sf_stacked.income.quantile([0, 0.25,0.5,0.75, 1.00])

0.000000    -14600.000000
0.250000     50000.000000
0.500000     94000.000000
0.750000    155000.000000
1.000000   1354000.000000
Name: income, dtype: float64

In [77]:
# determine the quartile values to use with pd.cut
quartiles = sf_stacked['income'].quantile([.25, .5, .75]).tolist()

In [78]:
# add a lower and upper range for the bins in pd.cut
quartiles = [0] + quartiles + [float('inf')]

In [79]:
# add a quartiles column to groups, using pd.cut
sf_stacked['income_quartiles'] = pd.cut(sf_stacked['income'], bins=quartiles, labels=['1stQ', '2ndQ', '3rdQ', '4thQ'])

In [80]:
sf_stacked.head()

Unnamed: 0,IDMerged,tripIndex,scenario,lever,lever_position,Realized_INEXUS,logsum_relative_to_baseline,door_to_door_time_relative_to_baseline,income,actPurpose,logsum_trip_Potential_INEXUS,mode_choice_actual_BEAM,mandatoryCat,actEndTime,actEndType,actStartTime,actStartType,cost_BEAM,distance_bike,distance_mode_choice,distance_privateCar,distance_ridehail,distance_transit,distance_travelling,distance_walking,duration_in_privateCar,duration_in_ridehail,duration_in_transit,duration_on_bike,duration_travelling,duration_walking,emissionBiodiesel,emissionDiesel,emissionElectricity,emissionFood,emissionGasoline,emission_marginal,fuelBiodiesel,fuelDiesel,fuelElectricity,fuelFood,fuelGasoline,fuel_marginal,fuel_not_Food,mode_choice_planned_BEAM,numPassengers,parkingType,reason,replanning_status,ride_hail_pooled,transit_bus,transit_cable_car,transit_rail,transit_subway,transit_tram,vehicleIds,year,duration_door_to_door,waitTime,mode_choice_actual_5,mode_choice_actual_6,trip_id,person_id,household_id,tour_id,primary_purpose_x,trip_count,purpose,trip_mode_AS_trips,tour_type,tour_category,start,end,duration,tour_mode_AS_tours,earning,person_sex,sex,edu,race,work_at_home,age,value_of_time,free_parking_at_work,num_mand,lcm_county_id,tenure_mover,hh_size,tenure,hh_cars,hh_income,hhsize,income_in_thousands,median_value_of_time,hh_value_of_time,home_is_urban,home_is_rural,auto_ownership,DRIVEALONEFREE,DRIVEALONEPAY,SHARED2FREE,SHARED2PAY,SHARED3FREE,SHARED3PAY,WALK,BIKE,WALK_LOC,WALK_LRF,WALK_EXP,WALK_HVY,WALK_COM,DRIVE_LOC,DRIVE_LRF,DRIVE_EXP,DRIVE_HVY,DRIVE_COM,TAXI,TNC_SINGLE,TNC_SHARED,income_quartiles
0,1,577.0,ridehail,fleetsize,0.125,-2.05797,0.497253,-26.0,240000.0,Home_to_othmaint,-1.880066,hov2_teleportation,,30009.0,Home,30178.0,othmaint,0.0,0.0,4310.558,4310.558,0.0,0.0,4310.558,0.0,169.0,0.0,0.0,0.0,169.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,hov2_teleportation,,,,0,0,0,0,0,0,0,,2018,169.0,0.0,car,car,577,1,1735309,72,school,2,othmaint,SHARED2FREE,school,mandatory,8.0,15.0,7.0,SHARED2PAY,0.0,male,1,1.0,asian,0,3,19.798008,False,1,6085,own not recent,four or more,1,two or more,gt150,4,240.0,12.86,29.682171,False,False,2,-999.125436,-999.125825,-2.05797,-2.058193,-999.124084,-999.12424,-11.61792,-1000.983135,-1000.459856,-1998.770467,-1998.770467,-1998.770467,-1998.770467,-1999.307149,-2997.409495,-2997.409495,-2997.409495,-2997.409495,-7.968636,-7.847364,-8.318563,4thQ
1,1,577.0,ridehail,fleetsize,0.25,-2.02425,0.53109,60.0,240000.0,Home_to_othmaint,-1.846229,hov2_teleportation,,29160.0,Home,29415.0,othmaint,0.0,0.0,5762.683,5762.683,0.0,0.0,5762.683,0.0,255.0,0.0,0.0,0.0,255.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,hov2_teleportation,,,,0,0,0,0,0,0,0,,2018,255.0,0.0,car,car,577,1,1735309,72,school,2,othmaint,SHARED2FREE,school,mandatory,8.0,15.0,7.0,SHARED2PAY,0.0,male,1,1.0,asian,0,3,19.798008,False,1,6085,own not recent,four or more,1,two or more,gt150,4,240.0,12.86,29.682171,False,False,2,-999.091716,-999.092017,-2.02425,-2.024422,-999.090364,-999.090484,-15.676167,-1001.38896,-1000.459856,-1998.770467,-1998.770467,-1998.770467,-1998.770467,-1999.307149,-2997.409495,-2997.409495,-2997.409495,-2997.409495,-7.821755,-7.822435,-8.256225,4thQ
2,1,577.0,ridehail,fleetsize,0.5,-2.253695,0.301336,-16.0,240000.0,Home_to_othmaint,-2.075982,hov2_teleportation,,29065.0,Home,29244.0,othmaint,0.0,0.0,4664.36,4664.36,0.0,0.0,4664.36,0.0,179.0,0.0,0.0,0.0,179.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,hov2_teleportation,,,,0,0,0,0,0,0,0,,2018,179.0,0.0,car,car,577,1,1735309,72,school,2,othmaint,SHARED2FREE,school,mandatory,8.0,15.0,7.0,SHARED2PAY,0.0,male,1,1.0,asian,0,3,19.798008,False,1,6085,own not recent,four or more,1,two or more,gt150,4,240.0,12.86,29.682171,False,False,2,-999.321161,-999.321484,-2.253695,-2.253879,-999.319809,-999.319938,-11.28493,-1000.949836,-1000.459856,-1998.770467,-1998.770467,-1998.770467,-1998.770467,-1999.307149,-2997.409495,-2997.409495,-2997.409495,-2997.409495,-8.214544,-8.124605,-8.678848,4thQ
3,1,577.0,baseline,default,1.0,-2.554453,0.0,0.0,240000.0,Home_to_othmaint,-2.377319,hov2_teleportation,,31052.0,Home,31247.0,othmaint,0.0,0.0,4940.606,4940.606,0.0,0.0,4940.606,0.0,195.0,0.0,0.0,0.0,195.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,hov2_teleportation,,,,0,0,0,0,0,0,0,,2018,195.0,0.0,car,car,577,1,1735309,72,school,2,othmaint,SHARED2FREE,school,mandatory,8.0,15.0,7.0,SHARED2PAY,0.0,male,1,1.0,asian,0,3,19.798008,False,1,6085,own not recent,four or more,1,two or more,gt150,4,240.0,12.86,29.682171,False,False,2,-999.621919,-999.622345,-2.554453,-2.554696,-999.620566,-999.620737,-38.227666,-1007.088933,-1000.459856,-1998.770467,-1998.770467,-1998.770467,-1998.770467,-1999.307149,-2997.409495,-2997.409495,-2997.409495,-2997.409495,-8.625908,-8.611274,-9.231232,4thQ
4,1,577.0,ridehail,fleetsize,1.75,-2.073379,0.481759,-7.0,240000.0,Home_to_othmaint,-1.89556,hov2_teleportation,,30819.0,Home,31007.0,othmaint,0.0,0.0,4722.849,4722.849,0.0,0.0,4722.849,0.0,188.0,0.0,0.0,0.0,188.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,hov2_teleportation,,,,0,0,0,0,0,0,0,,2018,188.0,0.0,car,car,577,1,1735309,72,school,2,othmaint,SHARED2FREE,school,mandatory,8.0,15.0,7.0,SHARED2PAY,0.0,male,1,1.0,asian,0,3,19.798008,False,1,6085,own not recent,four or more,1,two or more,gt150,4,240.0,12.86,29.682171,False,False,2,-999.140845,-999.141285,-2.073379,-2.073631,-999.139493,-999.139669,-13.530149,-1001.174358,-1000.459856,-1998.770467,-1998.770467,-1998.770467,-1998.770467,-1999.307149,-2997.409495,-2997.409495,-2997.409495,-2997.409495,-8.028935,-7.838608,-8.360657,4thQ


In [81]:
m = sf_stacked.groupby(['income_quartiles'])['income'].median()

In [82]:
m

income_quartiles
1stQ    30000.000000
2ndQ    70600.000000
3rdQ   120000.000000
4thQ   219000.000000
Name: income, dtype: float64

In [83]:
# Mapping BEAM and ASIM modes
conditions  = [(sf_stacked['income_quartiles'] == '1stQ'), 
               (sf_stacked['income_quartiles'] == '2ndQ'),
               (sf_stacked['income_quartiles'] == '3rdQ'),
               (sf_stacked['income_quartiles'] == '4thQ')]

choices = [29300, 70000, 120000, 219000]

In [84]:
sf_stacked['MedianQuartiles'] = np.select(conditions, choices, default=np.nan)

In [85]:
sf_stacked['alpha'] = 0.1

In [87]:
sf_stacked['socialCarbonCost'] = sf_stacked['emission_marginal']*51 # or 102

In [88]:
sf_stacked['Social_INEXUS'] = (sf_stacked['Realized_INEXUS']/sf_stacked['alpha']) - sf_stacked['socialCarbonCost']

In [93]:
sf_stacked['Potential_INEXUS_in_dollar'] = sf_stacked['logsum_trip_Potential_INEXUS']/sf_stacked['alpha']

In [120]:
sf_stacked['Realized_INEXUS_in_dollar'] = sf_stacked['Realized_INEXUS']/sf_stacked['alpha']

#### The following chunks are useful but should be checked in the next runs (they resulted in exact same realized and potential inexus in dollar

In [122]:
# shift columns to first position
#first_column = sf_stacked.pop('Social_INEXUS')
# insert column using insert(position,column_name,first_column) function
#sf_stacked.insert(6, 'Social_INEXUS', first_column)

In [117]:
# shift columns to first position
#first_column = sf_stacked.pop('Potential_INEXUS_in_dollar')
#second_column = sf_stacked.pop('Realized_INEXUS_in_dollar')
# insert column using insert(position,column_name,first_column) function
#sf_stacked.insert(7, 'Potential_INEXUS_in_dollar', first_column)
#sf_stacked.insert(8, 'Realized_INEXUS_in_dollar', first_column)

In [124]:
%%time
sf_stacked.to_csv('s3://beam-core-act/deepDive/CleanData/SanFrancisco/Stacked/sf_2018_stacked_rh_flsz.csv', index=False)

CPU times: total: 25min 31s
Wall time: 33min 30s
