In [1]:
import os
import os.path as op
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
%matplotlib inline
import seaborn as sns
sns.set_theme(style="darkgrid", color_codes=True)
sns.set(font_scale=1.35, style="ticks") #set styling preferences
import statsmodels.api as sm
from scipy import stats
import math
from math import pi
from pandas.api.types import is_string_dtype
from pandas.api.types import is_numeric_dtype
from scipy.spatial.distance import cdist
from scipy.cluster.vq import kmeans2,vq, whiten
import geopandas as gpd
import h5py
import boto.s3
import glob
import boto3
from zipfile import ZipFile
import shutil

In [2]:
# Show all columns and rows
pd.options.display.max_columns = None
pd.options.display.max_rows = None

#### Baseline

In [3]:
%%time
s3 = boto3.client("s3")
key = "pilates-outputs/sfbay-baseline-2022124/inexus/sfbay_baseline_default-1.0_2020__20221224.csv.gz"  #the path should be updated
obj = s3.get_object(Bucket="beam-outputs", Key=key)
sfbase = pd.read_csv(obj['Body'], compression = 'gzip',index_col='Unnamed: 0')



CPU times: total: 1min 28s
Wall time: 1min 29s


In [4]:
# Adding scenario info
sfbase['scenario'] = "baseline"
sfbase['scenario'] = sfbase['scenario'].astype("category")
sfbase['lever'] = "default"
sfbase['lever'] = sfbase['lever'].astype("category")
sfbase['year'] = 2020
sfbase['lever_position'] = 1

In [5]:
sfbase[sfbase['person_sex'].isna()].shape

(19191, 273)

In [6]:
#baseline2018 = "https://beam-core-act.s3.amazonaws.com/beam-outputs/pilates-outputs/sfbay-baseline-20221220/inexus"
#sfbase = pd.read_csv(baseline2018 + 'sfbay_baseline_default-1.0_2012__20221219.csv.gz')

In [7]:
# Showing the entire number in dataframe
pd.set_option('float_format', '{:f}'.format)

In [8]:
#describe = sfbase.describe()

In [9]:
#describe.to_csv('C:/Shared-Work/Data/CleanData/ASIM_BEAM_Merged/describe.csv')

#### Ridehail Fleet Size Scenarios

In [6]:
%%time
key = "pilates-outputs/sfbay-baseline-rhfleet0_125/inexus/sfbay_rh_fleet_0_125_rh_fleet-0.125_2020__20230113.csv.gz"  #the path should be updated
obj = s3.get_object(Bucket="beam-outputs", Key=key)
sf_rh_fltsz_0p125 = pd.read_csv(obj['Body'], compression = 'gzip',index_col='Unnamed: 0')



CPU times: total: 1min 39s
Wall time: 1min 39s


In [7]:
sf_rh_fltsz_0p125[sf_rh_fltsz_0p125['trip_mode_AS_trips'].isna()].shape

(19474, 269)

In [8]:
# Adding scenario info
sf_rh_fltsz_0p125['scenario'] = "ridehail"
sf_rh_fltsz_0p125['scenario'] = sf_rh_fltsz_0p125['scenario'].astype("category")
sf_rh_fltsz_0p125['lever'] = "fleet_size"
sf_rh_fltsz_0p125['lever'] = sf_rh_fltsz_0p125['lever'].astype("category")
sf_rh_fltsz_0p125['year'] = 2020
sf_rh_fltsz_0p125['lever_position'] = 0.125

In [9]:
%%time
key = "pilates-outputs/sfbay-baseline-rhfleet0_25/inexus/sfbay_rh_fleet_0_25_rh_fleet-0.25_2020__20230113.csv.gz"  #the path should be updated
obj = s3.get_object(Bucket="beam-outputs", Key=key)
sf_rh_fltsz_0p25 = pd.read_csv(obj['Body'], compression = 'gzip',index_col='Unnamed: 0')



CPU times: total: 1min 52s
Wall time: 1min 52s


In [10]:
# Adding scenario info
sf_rh_fltsz_0p25['scenario'] = "ridehail"
sf_rh_fltsz_0p25['scenario'] = sf_rh_fltsz_0p25['scenario'].astype("category")
sf_rh_fltsz_0p25['lever'] = "fleet_size"
sf_rh_fltsz_0p25['lever'] = sf_rh_fltsz_0p25['lever'].astype("category")
sf_rh_fltsz_0p25['year'] = 2020
sf_rh_fltsz_0p25['lever_position'] = 0.25

In [11]:
%%time
key = "pilates-outputs/sfbay-baseline-rhfleet0_50/inexus/sfbay_rh_fleet_0_50_rh_fleet-0.5_2020__20230115.csv.gz"  #the path should be updated
obj = s3.get_object(Bucket="beam-outputs", Key=key)
sf_rh_fltsz_0p5 = pd.read_csv(obj['Body'], compression = 'gzip',index_col='Unnamed: 0')



CPU times: total: 2min 1s
Wall time: 2min 1s


In [12]:
# Adding scenario info
sf_rh_fltsz_0p5['scenario'] = "ridehail"
sf_rh_fltsz_0p5['scenario'] = sf_rh_fltsz_0p5['scenario'].astype("category")
sf_rh_fltsz_0p5['lever'] = "fleet_size"
sf_rh_fltsz_0p5['lever'] = sf_rh_fltsz_0p5['lever'].astype("category")
sf_rh_fltsz_0p5['year'] = 2020
sf_rh_fltsz_0p5['lever_position'] = 0.5

In [13]:
%%time
key = "pilates-outputs/sfbay-baseline-rhfleet2_00/inexus/sfbay_rh_fleet_2_0_rh_fleet-2.0_2020__20230115.csv.gz"  #the path should be updated
obj = s3.get_object(Bucket="beam-outputs", Key=key)
sf_rh_fltsz_2 = pd.read_csv(obj['Body'], compression = 'gzip',index_col='Unnamed: 0')



CPU times: total: 2min 7s
Wall time: 2min 8s


In [16]:
# Adding scenario info
sf_rh_fltsz_2['scenario'] = "ridehail"
sf_rh_fltsz_2['scenario'] = sf_rh_fltsz_2['scenario'].astype("category")
sf_rh_fltsz_2['lever'] = "fleet_size"
sf_rh_fltsz_2['lever'] = sf_rh_fltsz_2['lever'].astype("category")
sf_rh_fltsz_2['year'] = 2020
sf_rh_fltsz_2['lever_position'] = 2

In [15]:
%%time
key = "pilates-outputs/sfbay-baseline-rhfleet4_00/inexus/sfbay_rh_fleet_4_00_rh_fleet-4.0_2020__20230115.csv.gz"  #the path should be updated
obj = s3.get_object(Bucket="beam-outputs", Key=key)
sf_rh_fltsz_4 = pd.read_csv(obj['Body'], compression = 'gzip',index_col='Unnamed: 0')



CPU times: total: 2min 14s
Wall time: 2min 15s


In [17]:
# Adding scenario info
sf_rh_fltsz_4['scenario'] = "ridehail"
sf_rh_fltsz_4['scenario'] = sf_rh_fltsz_4['scenario'].astype("category")
sf_rh_fltsz_4['lever'] = "fleet_size"
sf_rh_fltsz_4['lever'] = sf_rh_fltsz_4['lever'].astype("category")
sf_rh_fltsz_4['year'] = 2020
sf_rh_fltsz_4['lever_position'] = 4

In [18]:
%%time
key = "pilates-outputs/sfbay-baseline-rhfleet10_00/inexus/sfbay_rh_fleet_10_00_rh_fleet-10.0_2020__20230115.csv.gz"  #the path should be updated
obj = s3.get_object(Bucket="beam-outputs", Key=key)
sf_rh_fltsz_10 = pd.read_csv(obj['Body'], compression = 'gzip',index_col='Unnamed: 0')



CPU times: total: 2min 27s
Wall time: 2min 27s


In [19]:
# Adding scenario info
sf_rh_fltsz_10['scenario'] = "ridehail"
sf_rh_fltsz_10['scenario'] = sf_rh_fltsz_10['scenario'].astype("category")
sf_rh_fltsz_10['lever'] = "fleet_size"
sf_rh_fltsz_10['lever'] = sf_rh_fltsz_10['lever'].astype("category")
sf_rh_fltsz_10['year'] = 2020
sf_rh_fltsz_10['lever_position'] = 10

In [20]:
%%time
key = "pilates-outputs/sfbay-baseline-rhfleet27_50/inexus/sfbay_rh_fleet_27_500_rh_fleet-27.5_2020__20230115.csv.gz"  #the path should be updated
obj = s3.get_object(Bucket="beam-outputs", Key=key)
sf_rh_fltsz_27p5 = pd.read_csv(obj['Body'], compression = 'gzip',index_col='Unnamed: 0')



CPU times: total: 2min 33s
Wall time: 2min 33s


In [21]:
# Adding scenario info
sf_rh_fltsz_27p5['scenario'] = "ridehail"
sf_rh_fltsz_27p5['scenario'] = sf_rh_fltsz_27p5['scenario'].astype("category")
sf_rh_fltsz_27p5['lever'] = "fleet_size"
sf_rh_fltsz_27p5['lever'] = sf_rh_fltsz_27p5['lever'].astype("category")
sf_rh_fltsz_27p5['year'] = 2020
sf_rh_fltsz_27p5['lever_position'] = 27.5

In [None]:
frames = [sf_rh_fltsz_0p125, sf_rh_fltsz_0p25, sf_rh_fltsz_0p5, sfbase,sf_rh_fltsz_2, sf_rh_fltsz_4, sf_rh_fltsz_10,
          sf_rh_fltsz_27p5]
sf_stacked = pd.concat(frames)

In [None]:
sf_stacked = sf_stacked.drop(['has_school_kid','has_preschool_kid','has_retiree','has_non_worker','adult','hispanic','relate',
                             'pstudent','pemploy','ptype','p_hispanic','age_16_p','age_16_to_19',
                             'person_age','MAR','hours','hispanic.1','student',
                             'worker','stop_frequency','composition','tdd','number_of_participants','tour_type_count',
                             'tour_type_num','tour_num','tour_count', 'outbound', 'trip_num','num_under16_not_at_school',
                             'family','non_family','num_young_adults','num_college_age','num_children_16_to_17',
                              'num_children_5_to_15','num_young_children'], axis=1)

In [None]:
sf_stacked = sf_stacked.drop(['vehicleIds_estimate','parent_tour_id','atwork_subtour_frequency','household_id_y',
                              'male','female', 'has_driving_kid','has_full_time','has_part_time',
                              'has_university','student_is_employed', 'nonstudent_to_school', 'is_student','is_gradeschool',
                              'is_gradeschool','is_highschool','is_university','school_segment','is_worker',  
                              'distance_to_school','roundtrip_auto_time_to_school',
                              'distance_to_work','workplace_in_cbd', 'work_taz_area_type', 'hh_race_of_head',
                             'roundtrip_auto_time_to_work','work_auto_savings_ratio','cdap_activity', 'travel_active', 
                              'under16_not_at_school','has_preschool_kid_at_home', 'gt2','hispanic_head',
                             'has_school_kid_at_home','work_and_school_and_worker', 'age_of_head', 'race_of_head',
                              'work_and_school_and_student', 'VEHICL','hh_children','hh_age_of_head','num_workers',
                              'gt55','seniors', 'recent_mover', 'hh_workers', 'hispanic_status_of_head', 'hh_seniors', 
                             'hh_type', 'HHT', 'sample_rate', 'chunk_id', 'income_segment', 'num_non_workers', 
                             'num_drivers', 'num_adults', 'num_children', 'hh_work_auto_savings_ratio', 'num_travel_active', 
                             'num_travel_active_adults','num_travel_active_preschoolers', 'num_travel_active_children', 
                              'num_travel_active_non_preschoolers', 'participates_in_jtf_model', 'joint_tour_frequency',
                              'num_hh_joint_tours'], axis=1)

In [None]:
sf_stacked = sf_stacked.drop(['serialno'], axis=1)

#### Stacked File

In [33]:
#stacked_tr_fr_path = "https://beam-core-act.s3.amazonaws.com/deepDive/CleanData/SanFrancisco/Stacked/"
#sf_stacked_tr_fr = pd.read_csv(stacked_tr_fr_path + 'sf_2018_stacked_tr_fr.csv')

In [34]:
sf_stacked = sf_stacked.sort_values(by=['IDMerged', 'tripIndex', 'lever_position']).reset_index(drop=True)

In [35]:
# shift column 'person' to first position
#first_column = sf_stacked.pop('scenario')
#second_column = sf_stacked.pop('lever')
#third_column = sf_stacked.pop('lever_position')
#fourth_column = sf_stacked.pop('logsum_trip_mode_AS_trips')
# insert column using insert(position,column_name,first_column) function
#sf_stacked.insert(2, 'scenario', first_column)
#sf_stacked.insert(3, 'lever', second_column)
#sf_stacked.insert(4, 'lever_position', third_column)
#sf_stacked.insert(5, 'logsum_trip_mode_AS_trips', fourth_column)

In [36]:
# Adding Price lever position
conditions  = [(sf_stacked['lever_position'] == 0.125),
               (sf_stacked['lever_position'] == 0.25),
               (sf_stacked['lever_position'] == 0.5),
               (sf_stacked['lever_position'] == 1),
               (sf_stacked['lever_position'] == 2),
               (sf_stacked['lever_position'] == 4),
               (sf_stacked['lever_position'] == 10),
               (sf_stacked['lever_position'] == 27.5)]

choices = [0.125, 0.25, 0.5, 1, 2, 4, 10, 27.5]

In [37]:
sf_stacked['lever_position_fleetsize'] = np.select(conditions, choices, default=np.nan)

In [38]:
sf_stacked['lever_position_price'] = 1

In [39]:
#sf_stacked_tr_fr['logsum_relative_to_baseline'] = sf_stacked_tr_fr.groupby(['IDMerged','tripIndex'])['logsum_trip_mode_AS_trips'].apply(lambda x: x.diff())

In [40]:
#sf_stacked_tr_fr['logsum_relative_to_baseline'] = sf_stacked_tr_fr['logsum_trip_mode_AS_trips'] - sf_stacked_tr_fr.loc[sf_stacked_tr_fr.groupby(['IDMerged','tripIndex'])['lever_position'].transform('idxmin'), 'logsum_trip_mode_AS_trips'].values

In [41]:
%%time
sf_stacked['logsum_relative_to_baseline'] = sf_stacked['logsum_trip_Potential_INEXUS'] - (sf_stacked['logsum_trip_Potential_INEXUS'].where(sf_stacked['lever_position'].eq(1.0))
                                                                                                  .groupby([sf_stacked['IDMerged'],sf_stacked['tripIndex']])
                                                                                                   .transform('first')
                                                                                                   .convert_dtypes())

CPU times: total: 5.56 s
Wall time: 5.4 s


In [42]:
sf_stacked['door_to_door_time_relative_to_baseline'] = sf_stacked['duration_door_to_door'] - (sf_stacked['duration_door_to_door'].where(sf_stacked['lever_position'].eq(1.0))
                                                                                                  .groupby([sf_stacked['IDMerged'],sf_stacked['tripIndex']])
                                                                                                   .transform('first')
                                                                                                   .convert_dtypes())

In [56]:
# Drop unused columns
sf_stacked = sf_stacked.drop(['sf_detached', 'household_id_x','destination_logsum_y','logsum_tours_mode_AS_tours',
                              'primary_purpose_y','PNUM','work_auto_savings','mandatory_tour_frequency'], axis=1)

KeyError: "['sf_detached', 'household_id_x', 'destination_logsum_y', 'logsum_tours_mode_AS_tours', 'primary_purpose_y', 'PNUM', 'work_auto_savings', 'mandatory_tour_frequency'] not found in axis"

In [None]:
sf_stacked.head()

In [46]:
sf_stacked[sf_stacked['mode_choice_actual_6'] == 'ride_hail_transit'].shape

(0, 166)

In [57]:
sf_stacked[sf_stacked['replanning_status'] > 0].shape

(137553, 156)

In [58]:
# Add the mandatory category column
mandatory = ['work' , 'univ', 'school']
sf_stacked['mandatoryCat'] = np.where((sf_stacked.actEndType.isin(mandatory)) & (sf_stacked.actStartType.isin(mandatory)), 'from_M_to_M' , None)

In [59]:
sf_stacked['mandatoryCat'] = np.where((sf_stacked.actEndType == 'Home') & (sf_stacked.actStartType.isin(mandatory)), 'from_H_to_M' , sf_stacked['mandatoryCat'])

In [60]:
sf_stacked['mandatoryCat'] = np.where((sf_stacked.actEndType.isin(mandatory)) & (sf_stacked.actStartType == "Home"), 'from_M_to_H' , sf_stacked['mandatoryCat'])

In [61]:
non_mandatory = ['othmaint' , 'othdiscr', 'escort', 'eatout', 'social', 'shopping', 'atwork']
sf_stacked['mandatoryCat'] = np.where((sf_stacked.actEndType.isin(non_mandatory)) & (sf_stacked.actStartType.isin(non_mandatory)), 'from_N_to_N' , sf_stacked['mandatoryCat'])

In [62]:
# Drop unused columns
sf_stacked = sf_stacked.drop(['num_work_tours','num_joint_tours', 'non_mandatory_tour_frequency',
                                          'num_non_mand','num_escort_tours','num_eatout_tours','num_shop_tours',
                                          'num_maint_tours','num_discr_tours','num_social_tours','num_non_escort_tours'], axis=1)

KeyError: "['num_work_tours', 'num_joint_tours', 'non_mandatory_tour_frequency', 'num_non_mand', 'num_escort_tours', 'num_eatout_tours', 'num_shop_tours', 'num_maint_tours', 'num_discr_tours', 'num_social_tours', 'num_non_escort_tours'] not found in axis"

In [53]:
#sf_stacked[sf_stacked['Realized_INEXUS'].isna()].head(10000).to_csv('C:/Shared-Work/Data/CleanData/ASIM_BEAM_Merged/realizedIN_empty.csv', index = False)

In [54]:
#%%time
#sf_stacked_tr_fr.to_csv('s3://beam-core-act/deepDive/CleanData/SanFrancisco/Stacked/sf_2018_stacked_tr_fr_v3.csv', index=False)

In [63]:
sf_stacked[sf_stacked['SHARED3FREE'].isna()].shape

(155180, 156)

In [64]:
sf_stacked = sf_stacked[sf_stacked['WALK'].notna()]

In [65]:
# Mapping BEAM and ASIM modes
conditions  = [(sf_stacked['mode_choice_actual_BEAM'] == 'walk')&(sf_stacked['trip_mode_AS_trips'] == 'WALK'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'bike')&(sf_stacked['trip_mode_AS_trips'] == 'BIKE'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'hov3_teleportation')&(sf_stacked['trip_mode_AS_trips'] == 'SHARED3FREE'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'hov3_teleportation')&(sf_stacked['trip_mode_AS_trips'] == 'SHARED3PAY'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'car_hov3')&(sf_stacked['trip_mode_AS_trips'] == 'SHARED3FREE'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'car_hov3')&(sf_stacked['trip_mode_AS_trips'] == 'SHARED3PAY'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'hov2_teleportation')&(sf_stacked['trip_mode_AS_trips'] == 'SHARED2FREE'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'hov2_teleportation')&(sf_stacked['trip_mode_AS_trips'] == 'SHARED2PAY'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'car_hov2')&(sf_stacked['trip_mode_AS_trips'] == 'SHARED2PAY'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'car_hov2')&(sf_stacked['trip_mode_AS_trips'] == 'SHARED2FREE'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'car')&(sf_stacked['trip_mode_AS_trips'] == 'DRIVEALONEFREE'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'car')&(sf_stacked['trip_mode_AS_trips'] == 'DRIVEALONEPAY'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'walk_transit')&(sf_stacked['trip_mode_AS_trips'] == 'WALK_LRF'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'walk_transit')&(sf_stacked['trip_mode_AS_trips'] == 'WALK_LOC'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'walk_transit')&(sf_stacked['trip_mode_AS_trips'] == 'WALK_EXP'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'walk_transit')&(sf_stacked['trip_mode_AS_trips'] == 'WALK_HVY'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'walk_transit')&(sf_stacked['trip_mode_AS_trips'] == 'WALK_COM'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'ride_hail')&(sf_stacked['trip_mode_AS_trips'] == 'TNC_SINGLE'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'ride_hail')&(sf_stacked['trip_mode_AS_trips'] == 'TNC_SHARED'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'ride_hail')&(sf_stacked['trip_mode_AS_trips'] == 'TAXI'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'drive_transit')&(sf_stacked['trip_mode_AS_trips'] == 'DRIVE_COM'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'drive_transit')&(sf_stacked['trip_mode_AS_trips'] == 'DRIVE_LRF'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'drive_transit')&(sf_stacked['trip_mode_AS_trips'] == 'DRIVE_LOC'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'drive_transit')&(sf_stacked['trip_mode_AS_trips'] == 'DRIVE_EXP'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'drive_transit')&(sf_stacked['trip_mode_AS_trips'] == 'DRIVE_HVY'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'ride_hail_pooled')&(sf_stacked['trip_mode_AS_trips'] == 'TNC_SINGLE'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'ride_hail_pooled')&(sf_stacked['trip_mode_AS_trips'] == 'TNC_SHARED'),
               (sf_stacked['mode_choice_actual_BEAM'] == 'ride_hail_pooled')&(sf_stacked['trip_mode_AS_trips'] == 'TAXI'),
       
               (sf_stacked['mode_choice_actual_BEAM'] == 'hov2_teleportation')&((sf_stacked['trip_mode_AS_trips'] == 'SHARED3FREE')|
               (sf_stacked['trip_mode_AS_trips'] == 'SHARED3PAY')|(sf_stacked['trip_mode_AS_trips'] == 'DRIVEALONEFREE')|
               (sf_stacked['trip_mode_AS_trips'] == 'DRIVEALONEPAY')),
               
               (sf_stacked['mode_choice_actual_BEAM'] == 'car_hov2')&((sf_stacked['trip_mode_AS_trips'] == 'SHARED3PAY')|
               (sf_stacked['trip_mode_AS_trips'] == 'SHARED3FREE')|(sf_stacked['trip_mode_AS_trips'] == 'DRIVEALONEFREE')|
               (sf_stacked['trip_mode_AS_trips'] == 'DRIVEALONEPAY')),
               
               (sf_stacked['mode_choice_actual_BEAM'] == 'car_hov3')&((sf_stacked['trip_mode_AS_trips'] == 'SHARED2PAY')|
               (sf_stacked['trip_mode_AS_trips'] == 'SHARED2FREE')|(sf_stacked['trip_mode_AS_trips'] == 'DRIVEALONEFREE')|
               (sf_stacked['trip_mode_AS_trips'] == 'DRIVEALONEPAY')),   
               
               (sf_stacked['mode_choice_actual_BEAM'] == 'hov3_teleportation')&((sf_stacked['trip_mode_AS_trips'] == 'DRIVEALONEPAY')|
               (sf_stacked['trip_mode_AS_trips'] == 'DRIVEALONEFREE')|(sf_stacked['trip_mode_AS_trips'] == 'SHARED2PAY')|(sf_stacked['trip_mode_AS_trips'] == 'SHARED2FREE')),
               
               (sf_stacked['mode_choice_actual_BEAM'] == 'car')&((sf_stacked['trip_mode_AS_trips'] == 'SHARED3FREE')|(sf_stacked['trip_mode_AS_trips'] == 'SHARED2FREE')|
               (sf_stacked['trip_mode_AS_trips'] == 'SHARED3PAY')|(sf_stacked['trip_mode_AS_trips'] == 'SHARED2PAY')|(sf_stacked['trip_mode_AS_trips'] == 'DRIVE_LOC')|
               (sf_stacked['trip_mode_AS_trips'] == 'DRIVE_LRF')|(sf_stacked['trip_mode_AS_trips'] == 'DRIVE_EXP')|
               (sf_stacked['trip_mode_AS_trips'] == 'DRIVE_HVY')|(sf_stacked['trip_mode_AS_trips'] == 'DRIVE_COM')),
               
               (sf_stacked['mode_choice_actual_BEAM'] == 'walk')&((sf_stacked['trip_mode_AS_trips'] == 'WALK_LRF')|
               (sf_stacked['trip_mode_AS_trips'] == 'WALK_LOC')|(sf_stacked['trip_mode_AS_trips'] == 'WALK_EXP')|
               (sf_stacked['trip_mode_AS_trips'] == 'WALK_HVY')|(sf_stacked['trip_mode_AS_trips'] == 'WALK_COM')),
               
               (sf_stacked['mode_choice_actual_BEAM'] == 'walk_transit')&(sf_stacked['trip_mode_AS_trips'] == 'WALK'),
               
               ((sf_stacked['mode_choice_actual_BEAM'] == 'ride_hail_pooled')|(sf_stacked['mode_choice_actual_BEAM'] == 'ride_hail'))
               &(~((sf_stacked['trip_mode_AS_trips'] == 'TAXI')|(sf_stacked['trip_mode_AS_trips'] == 'TNC_SHARED')|
                (sf_stacked['trip_mode_AS_trips'] == 'TNC_SINGLE'))),
               
               (sf_stacked['mode_choice_actual_BEAM'] == 'walk_transit')&(~((sf_stacked['trip_mode_AS_trips'] == 'WALK_LRF')|
               (sf_stacked['trip_mode_AS_trips'] == 'WALK_LOC')|(sf_stacked['trip_mode_AS_trips'] == 'WALK_EXP')|
               (sf_stacked['trip_mode_AS_trips'] == 'WALK_HVY')|(sf_stacked['trip_mode_AS_trips'] == 'WALK_COM')|
               (sf_stacked['trip_mode_AS_trips'] == 'WALK'))),
               
               (sf_stacked['mode_choice_actual_BEAM'] == 'bike_transit'),
               
               (sf_stacked['mode_choice_actual_BEAM'] == 'car')&((sf_stacked['trip_mode_AS_trips'] == 'TNC_SHARED')|
               (sf_stacked['trip_mode_AS_trips'] == 'TNC_SINGLE')|(sf_stacked['trip_mode_AS_trips'] == 'TAXI'))]
 
                                                                      
                                                                                                                 
                                                                      
choices = [sf_stacked['WALK'], sf_stacked['BIKE'], sf_stacked['SHARED3FREE'], sf_stacked['SHARED3PAY'],
           sf_stacked['SHARED3FREE'], sf_stacked['SHARED3PAY'], sf_stacked['SHARED2FREE'], 
           sf_stacked['SHARED2PAY'], sf_stacked['SHARED2PAY'], sf_stacked['SHARED2FREE'], 
           sf_stacked['DRIVEALONEFREE'], sf_stacked['DRIVEALONEPAY'], sf_stacked['WALK_LRF'], 
           sf_stacked['WALK_LOC'], sf_stacked['WALK_EXP'], sf_stacked['WALK_HVY'], sf_stacked['WALK_COM'],
           sf_stacked['TNC_SINGLE'], 
           sf_stacked['TNC_SHARED'], sf_stacked['TAXI'],
           sf_stacked['DRIVE_COM'], sf_stacked['DRIVE_LRF'], sf_stacked['DRIVE_LOC'], sf_stacked['DRIVE_EXP'],
           sf_stacked['DRIVE_HVY'], sf_stacked['TNC_SINGLE'], sf_stacked['TNC_SHARED'], sf_stacked['TAXI'],
          sf_stacked[['DRIVEALONEFREE','DRIVEALONEPAY','SHARED2FREE','SHARED3FREE','SHARED2PAY','SHARED3PAY','DRIVE_LOC','DRIVE_LRF','DRIVE_EXP','DRIVE_HVY','DRIVE_COM']].max(axis=1),
          sf_stacked[['DRIVEALONEFREE','DRIVEALONEPAY','SHARED2FREE','SHARED3FREE','SHARED2PAY','SHARED3PAY','DRIVE_LOC','DRIVE_LRF','DRIVE_EXP','DRIVE_HVY','DRIVE_COM']].max(axis=1),
          sf_stacked[['DRIVEALONEFREE','DRIVEALONEPAY','SHARED2FREE','SHARED3FREE','SHARED2PAY','SHARED3PAY','DRIVE_LOC','DRIVE_LRF','DRIVE_EXP','DRIVE_HVY','DRIVE_COM']].max(axis=1),
          sf_stacked[['DRIVEALONEFREE','DRIVEALONEPAY','SHARED2FREE','SHARED3FREE','SHARED2PAY','SHARED3PAY','DRIVE_LOC','DRIVE_LRF','DRIVE_EXP','DRIVE_HVY','DRIVE_COM']].max(axis=1),
          sf_stacked[['DRIVEALONEFREE','DRIVEALONEPAY','SHARED2FREE','SHARED3FREE','SHARED2PAY','SHARED3PAY','DRIVE_LOC','DRIVE_LRF','DRIVE_EXP','DRIVE_HVY','DRIVE_COM']].max(axis=1),
          sf_stacked[['WALK','WALK_LOC','WALK_LRF','WALK_EXP','WALK_HVY','WALK_COM']].max(axis=1),
          sf_stacked[['WALK','WALK_LOC','WALK_LRF','WALK_EXP','WALK_HVY','WALK_COM']].max(axis=1),
          sf_stacked[['TAXI','TNC_SINGLE','TNC_SHARED']].max(axis=1),
          sf_stacked[['WALK','WALK_LOC','WALK_LRF','WALK_EXP','WALK_HVY','WALK_COM','DRIVE_LOC','DRIVE_LRF','DRIVE_EXP','DRIVE_HVY','DRIVE_COM']].max(axis=1),
          sf_stacked[['BIKE','WALK_LOC','WALK_LRF','WALK_EXP','WALK_HVY','WALK_COM','DRIVE_LOC','DRIVE_LRF','DRIVE_EXP','DRIVE_HVY','DRIVE_COM']].max(axis=1),
          sf_stacked[['DRIVEALONEFREE','DRIVEALONEPAY','SHARED2FREE','SHARED3FREE','SHARED2PAY','SHARED3PAY','DRIVE_LOC','DRIVE_LRF','DRIVE_EXP','DRIVE_HVY','DRIVE_COM']].max(axis=1)]

In [66]:
sf_stacked['Realized_INEXUS'] = np.select(conditions, choices, default=np.nan)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  sf_stacked['Realized_INEXUS'] = np.select(conditions, choices, default=np.nan)


In [67]:
#sf_stacked = sf_stacked.drop(['Realized_INEXUS'], axis=1)

In [68]:
sf_stacked['Realized_INEXUS'] = np.where((sf_stacked['Realized_INEXUS'].isna())&((sf_stacked['mode_choice_actual_BEAM'] == 'car_hov2')|
                                          (sf_stacked['mode_choice_actual_BEAM'] == 'car_hov3')|
                                          (sf_stacked['mode_choice_actual_BEAM'] == 'hov2_teleportation')|
                                          (sf_stacked['mode_choice_actual_BEAM'] == 'hov3_teleportation')|
                                          (sf_stacked['mode_choice_actual_BEAM'] == 'car')|
                                          (sf_stacked['mode_choice_actual_BEAM'] == 'drive_transit')), 
                                           sf_stacked[['DRIVEALONEFREE','DRIVEALONEPAY','SHARED2FREE','SHARED3FREE','SHARED2PAY','SHARED3PAY','DRIVE_LOC','DRIVE_LRF','DRIVE_EXP','DRIVE_HVY','DRIVE_COM']].max(axis=1), sf_stacked['Realized_INEXUS'])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  sf_stacked['Realized_INEXUS'] = np.where((sf_stacked['Realized_INEXUS'].isna())&((sf_stacked['mode_choice_actual_BEAM'] == 'car_hov2')|


In [70]:
sf_stacked['Realized_INEXUS'] = np.where((sf_stacked['Realized_INEXUS'].isna())&((sf_stacked['mode_choice_actual_BEAM'] == 'walk')|
                                         (sf_stacked['mode_choice_actual_BEAM'] == 'walk_transit')), 
                                          sf_stacked[['WALK','WALK_LOC','WALK_LRF','WALK_EXP','WALK_HVY','WALK_COM','DRIVE_LOC','DRIVE_LRF','DRIVE_EXP','DRIVE_HVY','DRIVE_COM']].max(axis=1),sf_stacked['Realized_INEXUS'])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  sf_stacked['Realized_INEXUS'] = np.where((sf_stacked['Realized_INEXUS'].isna())&((sf_stacked['mode_choice_actual_BEAM'] == 'walk')|


In [71]:
sf_stacked['Realized_INEXUS'] = np.where(sf_stacked['Realized_INEXUS'].isna()&(sf_stacked['mode_choice_actual_BEAM'] == 'bike'), 
                                         sf_stacked[['BIKE','WALK','WALK_LOC','WALK_LRF','WALK_EXP','WALK_HVY','WALK_COM']].max(axis=1),sf_stacked['Realized_INEXUS'])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  sf_stacked['Realized_INEXUS'] = np.where(sf_stacked['Realized_INEXUS'].isna()&(sf_stacked['mode_choice_actual_BEAM'] == 'bike'),


In [107]:
sf_stacked[sf_stacked['Realized_INEXUS'].isna()].shape

(0, 162)

In [108]:
sf_stacked[sf_stacked['Realized_INEXUS']<-1000].shape

(0, 162)

In [103]:
sf_stacked['Realized_INEXUS'] = np.where(sf_stacked['Realized_INEXUS']<-1000, sf_stacked['Realized_INEXUS']+990, sf_stacked['Realized_INEXUS'])

In [None]:
#sf_stacked_tr_fr['Realized_INEXUS'] = np.where((sf_stacked_tr_fr['Realized_INEXUS'].isna())&
                                               #(sf_stacked_tr_fr['mode_choice_actual_BEAM'] == 'walk_transit')&
                                               #(sf_stacked_tr_fr['vehicleIds'].str.contains('(caltrain)')), 
                                               #sf_stacked_tr_fr['WALK_COM'], sf_stacked_tr_fr['Realized_INEXUS'])

In [None]:
sf_stacked = sf_stacked[sf_stacked['Realized_INEXUS'].notna()]

In [110]:
sf_stacked.head()

Unnamed: 0,IDMerged,tripIndex,actStartTime,actEndTime,duration_travelling,cost_BEAM,actStartType,actEndType,duration_walking,duration_in_privateCar,duration_on_bike,duration_in_ridehail,distance_travelling,duration_in_transit,distance_walking,distance_bike,distance_ridehail,distance_privateCar,distance_transit,mode_choice_planned_BEAM,mode_choice_actual_BEAM,vehicleIds,numPassengers,distance_mode_choice,replanning_status,reason,parkingType,transit_bus,transit_subway,transit_tram,transit_cable_car,ride_hail_pooled,transit_rail,fuelFood,fuelElectricity,fuelBiodiesel,fuelDiesel,fuel_not_Food,fuelGasoline,fuel_marginal,BlockGroupStart,startX,startY,bgid_start,tractid_start,juris_name_start,county_name_start,mpo_start,BlockGroupEnd,endX,endY,bgid_end,tractid_end,juris_name_end,county_name_end,mpo_end,emissionFood,emissionElectricity,emissionDiesel,emissionGasoline,emissionBiodiesel,emission_marginal,duration_door_to_door,waitTime_no_replanning,waitTime_replanning,actPurpose,mode_choice_actual_5,mode_choice_actual_6,mode_choice_actual_4,trip_id,person_id,household_id,tour_id,primary_purpose_x,trip_count,purpose,destination_x,origin_x,destination_logsum_x,depart,trip_mode_AS_trips,logsum_trip_Potential_INEXUS,tour_type,tour_category,destination_y,origin_y,start,end,duration,tour_mode_AS_tours,age,earning,edu,race_id,sex,work_at_home,person_sex,race,TAZ_x,home_x,home_y,home_taz,value_of_time,school_taz,workplace_taz,workplace_location_logsum,free_parking_at_work,num_mand,income,tenure,hh_size,hh_cars,hh_income,tenure_mover,block_id,lcm_county_id,hhsize,TAZ_y,income_in_thousands,median_value_of_time,hh_value_of_time,home_is_urban,home_is_rural,auto_ownership,DRIVEALONEFREE,DRIVEALONEPAY,SHARED2FREE,SHARED2PAY,SHARED3FREE,SHARED3PAY,WALK,BIKE,WALK_LOC,WALK_LRF,WALK_EXP,WALK_HVY,WALK_COM,DRIVE_LOC,DRIVE_LRF,DRIVE_EXP,DRIVE_HVY,DRIVE_COM,TAXI,TNC_SINGLE,TNC_SHARED,income_quartiles,income_deciles,scenario,lever,year,lever_position,lever_position_price,lever_position_fleetsize,logsum_relative_to_baseline,door_to_door_time_relative_to_baseline,mandatoryCat,Realized_INEXUS,alpha,socialCarbonCost,Social_INEXUS,Potential_INEXUS_in_dollar,Realized_INEXUS_in_dollar
0,4,1513,55528.0,54961.0,567.0,1.604735,othdiscr,Home,0.0,567.0,0.0,0.0,14631.142,0.0,0.0,0.0,0.0,14631.142,0.0,car,car,"358611, body-4","1.0, 0.0, 0.0, 1.0",14631.142,0,,"Residential, Public",0,0,0,0,0,0,0.0,0.0,0.0,0.0,22701321.975947,22701321.975947,22701321.975947,60014001001.0,-122.227937,37.854097,60014001001.0,6001400100.0,Oakland,Alameda County,San Francisco Bay Area (MTC),60014096004.0,-122.17196,37.750722,60014096004.0,6001409600.0,Oakland,Alameda County,San Francisco Bay Area (MTC),0.0,0.0,0.0,0.001678,0.0,0.0,567.0,0.0,0.0,Home_to_othdiscr,car,car,car,1513.0,4.0,865.0,189.0,othdiscr,1.0,othdiscr,891.0,1005.0,,15.0,DRIVEALONEFREE,-3.359118,othdiscr,non_mandatory,891.0,1005.0,15.0,16.0,1.0,DRIVEALONEFREE,68.0,0.0,22.0,2.0,2.0,0.0,female,black,1005.0,-122.227937,37.854097,1005.0,2.81873,-1.0,-1.0,,False,0.0,70550.0,1.0,two,two or more,gt60-lt100,own not recent,60014001001061.0,6001.0,2.0,1005.0,70.55,10.44,2.81873,False,False,1.0,-3.981448,-4.052351,-1002.981448,-1003.021964,-1002.981448,-1003.009809,-44.478549,-1009.44054,-1998.380369,-1998.380369,-1998.380369,-2002.05629,-1998.380369,-1001.227631,-1998.427241,-1998.427241,-2000.036363,-1998.427241,-29.154868,-4.326001,-5.991636,2ndQ,4thD,ridehail,price,2020,0.0,0.0,1,-2.571284,384.0,,-3.981448,0.1,0.0,-39.814482,-33.591179,-39.814482
1,4,1513,57775.0,57520.0,255.0,0.590549,othdiscr,Home,0.0,255.0,0.0,0.0,5384.318,0.0,0.0,0.0,0.0,5384.318,0.0,car,car,"358622, body-4","1.0, 0.0, 0.0, 1.0",5384.318,0,,"Residential, Public",0,0,0,0,0,0,0.0,0.0,0.0,0.0,8644696.241503,8644696.241503,8644696.241503,60014001001.0,-122.227937,37.854097,60014001001.0,6001400100.0,Oakland,Alameda County,San Francisco Bay Area (MTC),60014045024.0,-122.20481,37.827999,60014045024.0,6001404502.0,Oakland,Alameda County,San Francisco Bay Area (MTC),0.0,0.0,0.0,0.000639,0.0,0.0,255.0,0.0,0.0,Home_to_othdiscr,car,car,car,1513.0,4.0,865.0,189.0,othdiscr,1.0,othdiscr,913.0,1005.0,,15.0,DRIVEALONEFREE,-2.524608,othdiscr,non_mandatory,913.0,1005.0,15.0,16.0,1.0,DRIVEALONEFREE,68.0,0.0,22.0,2.0,2.0,0.0,female,black,1005.0,-122.227937,37.854097,1005.0,2.81873,-1.0,-1.0,,False,0.0,70550.0,1.0,two,two or more,gt60-lt100,own not recent,60014001001061.0,6001.0,2.0,1005.0,70.55,10.44,2.81873,False,False,1.0,-2.731231,-2.737002,-1001.731231,-1001.734529,-1001.731231,-1001.73354,-18.867786,-1001.310309,-1001.383874,-1998.378026,-1998.378026,-1998.378026,-1998.378026,-1000.241913,-1998.424898,-1998.424898,-1998.424898,-1998.424898,-20.386394,-6.182327,-6.118676,2ndQ,4thD,ridehail,price,2020,0.125,0.125,1,-1.736774,72.0,,-2.731231,0.1,0.0,-27.312315,-25.246083,-27.312315
2,4,1513,55962.0,55350.0,612.0,0.677538,othdiscr,Home,248.0,364.0,0.0,0.0,6891.688,0.0,321.777,0.0,0.0,6569.911,0.0,car,car,"body-4, 358548","1.0, 0.0, 0.0, 1.0",6177.442,0,,"Residential, Residential, Public",0,0,0,0,0,0,17054.181,0.0,0.0,0.0,10465691.253266,10465691.253266,10482745.434266,60014001001.0,-122.227937,37.854097,60014001001.0,6001400100.0,Oakland,Alameda County,San Francisco Bay Area (MTC),60014038001.0,-122.239304,37.815996,60014038001.0,6001403800.0,Oakland,Alameda County,San Francisco Bay Area (MTC),0.0,0.0,0.0,0.000774,0.0,0.0,612.0,0.0,0.0,Home_to_othdiscr,car,car,car,1513.0,4.0,865.0,189.0,othdiscr,1.0,othdiscr,917.0,1005.0,,15.0,DRIVEALONEFREE,-0.648215,othdiscr,non_mandatory,917.0,1005.0,15.0,16.0,1.0,DRIVEALONEFREE,68.0,0.0,22.0,2.0,2.0,0.0,female,black,1005.0,-122.227937,37.854097,1005.0,2.81873,-1.0,-1.0,,False,0.0,70550.0,1.0,two,two or more,gt60-lt100,own not recent,60014001001061.0,6001.0,2.0,1005.0,70.55,10.44,2.81873,False,False,1.0,-0.944112,-0.947232,-999.944112,-999.945895,-999.944112,-999.94536,-14.091297,-1000.828669,-1000.521735,-1998.373591,-1998.373591,-1998.373591,-1998.373591,-999.802942,-1998.420463,-1998.420463,-1998.420463,-1998.420463,-8.505649,-2.981777,-3.14933,2ndQ,4thD,ridehail,price,2020,0.25,0.25,1,0.139619,429.0,,-0.944112,0.1,0.0,-9.441121,-6.482153,-9.441121
3,4,1513,57048.0,56854.0,194.0,0.509574,othdiscr,Home,0.0,194.0,0.0,0.0,4646.029,0.0,0.0,0.0,0.0,4646.029,0.0,car,car,"body-4, 358654","1.0, 0.0, 0.0, 1.0",4646.029,0,,"Residential, Public",0,0,0,0,0,0,0.0,0.0,0.0,0.0,6831391.080106,6831391.080106,6831391.080106,60014001001.0,-122.227937,37.854097,60014001001.0,6001400100.0,Oakland,Alameda County,San Francisco Bay Area (MTC),60014261001.0,-122.235298,37.830552,60014261001.0,6001426100.0,Piedmont,Alameda County,San Francisco Bay Area (MTC),0.0,0.0,0.0,0.000505,0.0,0.0,194.0,0.0,0.0,Home_to_othdiscr,car,car,car,1513.0,4.0,865.0,189.0,othdiscr,1.0,othdiscr,916.0,1005.0,,15.0,DRIVEALONEFREE,-1.012956,othdiscr,non_mandatory,916.0,1005.0,15.0,16.0,1.0,DRIVEALONEFREE,68.0,0.0,22.0,2.0,2.0,0.0,female,black,1005.0,-122.227937,37.854097,1005.0,2.81873,-1.0,-1.0,,False,0.0,70550.0,1.0,two,two or more,gt60-lt100,own not recent,60014001001061.0,6001.0,2.0,1005.0,70.55,10.44,2.81873,False,False,1.0,-1.195758,-1.199882,-1000.195758,-1000.198114,-1000.195758,-1000.197407,-20.921898,-1001.517729,-1001.358499,-1998.380258,-1998.380258,-1998.380258,-1998.380258,-1999.111132,-1998.42713,-1998.42713,-1998.42713,-1998.42713,-12.054587,-7.910639,-5.605723,2ndQ,4thD,ridehail,price,2020,0.5,0.5,1,-0.225122,11.0,,-1.195758,0.1,0.0,-11.957577,-10.129557,-11.957577
4,4,1513,56512.0,56329.0,183.0,0.409358,othdiscr,Home,0.0,183.0,0.0,0.0,3732.315,0.0,0.0,0.0,0.0,3732.315,0.0,car,car,"body-4, 358648","1.0, 0.0, 0.0, 1.0",3732.315,0,,"Residential, Public",0,0,0,0,0,0,0.0,0.0,0.0,0.0,6208647.575095,6208647.575095,6208647.575095,60014001001.0,-122.227937,37.854097,60014001001.0,6001400100.0,Oakland,Alameda County,San Francisco Bay Area (MTC),60014045024.0,-122.209569,37.826726,60014045024.0,6001404502.0,Oakland,Alameda County,San Francisco Bay Area (MTC),0.0,0.0,0.0,0.000459,0.0,0.0,183.0,0.0,0.0,Home_to_othdiscr,car,car,car,1513.0,4.0,865.0,189.0,othdiscr,1.0,othdiscr,913.0,1005.0,,15.0,DRIVEALONEFREE,-0.787834,othdiscr,non_mandatory,913.0,1005.0,15.0,16.0,1.0,DRIVEALONEFREE,68.0,0.0,22.0,2.0,2.0,0.0,female,black,1005.0,-122.227937,37.854097,1005.0,2.81873,-1.0,-1.0,,False,0.0,70550.0,1.0,two,two or more,gt60-lt100,own not recent,60014001001061.0,6001.0,2.0,1005.0,70.55,10.44,2.81873,False,False,1.0,-0.962008,-0.965891,-999.962008,-999.964227,-999.962008,-999.963561,-30.824786,-1002.612487,-1001.553761,-1998.378026,-1998.378026,-1998.378026,-1998.378026,-1000.421875,-1998.424898,-1998.424898,-1998.424898,-1998.424898,-11.348414,-11.636467,-7.335184,2ndQ,4thD,baseline,default,2020,1.0,1.0,1,0.0,0.0,,-0.962008,0.1,0.0,-9.620076,-7.87834,-9.620076


In [None]:
#sf_stacked_tr_fr[sf_stacked_tr_fr['Realized_INEXUS'].isna()].to_csv('C:/Shared-Work/Data/CleanData/ASIM_BEAM_Merged/realizedIN_empty.csv', index = False)

#### Social INEXUS

In [None]:
# add a lower and upper range for the bins in pd.cut
#quartiles = [0] + quartiles + [float('inf')]

In [None]:
# add a quartiles column to groups, using pd.cut
#sf_stacked['income_quartiles'] = pd.cut(sf_stacked['income'], bins=quartiles, labels=['1stQ', '2ndQ', '3rdQ', '4thQ'])

In [72]:
sf_stacked['alpha'] = 0.1

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  sf_stacked['alpha'] = 0.1


In [73]:
sf_stacked['socialCarbonCost'] = sf_stacked['emission_marginal']*185 # 51 or 102

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  sf_stacked['socialCarbonCost'] = sf_stacked['emission_marginal']*185 # 51 or 102


In [74]:
sf_stacked['Social_INEXUS'] = (sf_stacked['Realized_INEXUS']/sf_stacked['alpha']) - sf_stacked['socialCarbonCost']

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  sf_stacked['Social_INEXUS'] = (sf_stacked['Realized_INEXUS']/sf_stacked['alpha']) - sf_stacked['socialCarbonCost']


In [75]:
sf_stacked['Potential_INEXUS_in_dollar'] = sf_stacked['logsum_trip_Potential_INEXUS']/sf_stacked['alpha']

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  sf_stacked['Potential_INEXUS_in_dollar'] = sf_stacked['logsum_trip_Potential_INEXUS']/sf_stacked['alpha']


In [76]:
sf_stacked['Realized_INEXUS_in_dollar'] = sf_stacked['Realized_INEXUS']/sf_stacked['alpha']

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  sf_stacked['Realized_INEXUS_in_dollar'] = sf_stacked['Realized_INEXUS']/sf_stacked['alpha']


#### The following chunks are useful but should be checked in the next runs (they resulted in exact same realized and potential inexus in dollar

In [None]:
# shift columns to first position
#first_column = sf_stacked.pop('Social_INEXUS')
# insert column using insert(position,column_name,first_column) function
#sf_stacked.insert(6, 'Social_INEXUS', first_column)

In [None]:
# shift columns to first position
#first_column = sf_stacked.pop('Potential_INEXUS_in_dollar')
#second_column = sf_stacked.pop('Realized_INEXUS_in_dollar')
# insert column using insert(position,column_name,first_column) function
#sf_stacked.insert(7, 'Potential_INEXUS_in_dollar', first_column)
#sf_stacked.insert(8, 'Realized_INEXUS_in_dollar', first_column)

In [77]:
%%time
sf_stacked.to_csv('s3://beam-core-act/deepDive/CleanData/SanFrancisco/Stacked/sf_2020_stacked_rh_fleetsz_1_20_23.csv', index=False)

CPU times: total: 35min 56s
Wall time: 50min 22s
