In [24]:
import os
import pandas as pd


# input files
TAZDATA_FILE  = os.path.join('../landuse', 'tazData.csv')
TOUR_FILE     = os.path.join('E:/projects/ccta/31000190/Jawad/2015_BaseY_BCM2015/main','indivTourData_1.csv')
WSLOC_FILE    = os.path.join('E:/projects/ccta/31000190/Jawad/2015_BaseY_BCM2015/main','wsLocResults_1.csv')
# todo: add to input if run during model
TELERATE_FILE = os.path.join('../INPUT','landuse','telecommute_max_rate_county.csv')

PARAMS_FILENAME = os.path.join('../INPUT','params.properties')

# input and output; input {}=previous calib iter, output {}=CALIB_ITER
TELECOMMUTE_CONSTANTS_FILE = os.path.join('../main','telecommute_constants.csv')

TARGET_AUTO_SHARE  = 0.40
# if max_telecommute_rate +/- this, then leave it alone
TELECOMMUTE_RATE_THRESHHOLD = 0.005

# todo: make this more intelligent
CONSTANT_INCREMENT = 0.05
CONSTANT_DECREMENT = 0.05

# see EN7 Telecommuting.xlsx (https://mtcdrive.box.com/s/uw3n8wyervle6r2cgoz1j6k4i5lmv253)
# for 2015 and before
P_notworking_if_noworktour_FT = 0.560554289
P_notworking_if_noworktour_PT = 0.553307383
# future
P_notworking_FT = 0.107904288
P_notworking_PT = 0.205942146

In [25]:
TAZDATA_COLS = ['ZONE','DISTRICT','SD','COUNTY','RETEMPN','FPSEMPN','HEREMPN','OTHEMPN','AGREMPN','MWTEMPN','TOTEMP']
tazdata_df = pd.read_csv(TAZDATA_FILE, index_col=False, sep=',', usecols=TAZDATA_COLS)
telecommute_rate_df = pd.read_csv(TELERATE_FILE, sep=',')
telecommute_rate_df = pd.merge(left=tazdata_df, right=telecommute_rate_df, how='left', on='COUNTY')
telecommute_rate_df['max_telecommuters'] = \
    (telecommute_rate_df['AGREMPN']*telecommute_rate_df['AGREMPN_tele']) + \
    (telecommute_rate_df['FPSEMPN']*telecommute_rate_df['FPSEMPN_tele']) + \
    (telecommute_rate_df['HEREMPN']*telecommute_rate_df['HEREMPN_tele']) + \
    (telecommute_rate_df['MWTEMPN']*telecommute_rate_df['MWTEMPN_tele']) + \
    (telecommute_rate_df['RETEMPN']*telecommute_rate_df['RETEMPN_tele']) + \
    (telecommute_rate_df['OTHEMPN']*telecommute_rate_df['OTHEMPN_tele'])
# aggregate back to county
telecommute_rate_df = telecommute_rate_df.groupby('COUNTY').agg({'max_telecommuters':'sum', 'TOTEMP':'sum'}).reset_index()
telecommute_rate_df['max_telecommute_rate'] = telecommute_rate_df['max_telecommuters']/telecommute_rate_df['TOTEMP']

telecommute_df = tazdata_df[['ZONE','SD','COUNTY']].copy()
telecommute_df

Unnamed: 0,ZONE,SD,COUNTY
0,1,2,1
1,2,3,1
2,3,3,1
3,4,1,1
4,5,2,1
...,...,...,...
6588,6589,35,10
6589,6590,35,10
6590,6591,35,10
6591,6592,35,10


In [26]:
TOUR_COLS = ['hh_id','tour_id','person_id','person_num','person_type',
                     'tour_purpose','tour_mode','dest_taz','sampleRate']
tours_df = pd.read_csv(TOUR_FILE, usecols=TOUR_COLS)

In [27]:
tours_df = tours_df.loc[ tours_df['tour_purpose'].str.slice(stop=4)=='work' ]
print('  Filtered to {} rows of work tours_df; head:\n{}'.format(len(tours_df), tours_df.head()))
num_work_tours_df = len(tours_df)
tours_df.drop_duplicates(subset=['hh_id','person_id','person_num'], keep='first', inplace=True)


  Filtered to 30140 rows of work tours_df; head:
     hh_id  person_id  person_num  person_type  tour_id    tour_purpose  \
0  1296553    3541255           1            3        0        work_med   
1  1296553    3541256           2            1        0        work_med   
2  1296419    3540933           1            1        0       work_high   
7  1297387    3542627           3            1        0  work_very high   
9  1297387    3542632           8            2        0  work_very high   

   dest_taz  tour_mode  sampleRate  
0       288         10        0.01  
1        31         10        0.01  
2       353         10        0.01  
7       555         12        0.01  
9       297         12        0.01  


In [78]:
wslocs_df = pd.read_csv(WSLOC_FILE, usecols=['HHID','PersonID','PersonNum','PersonType','WorkLocation'])
# make columns consistent
wslocs_df.rename(columns={'HHID':'hh_id',
                        'PersonID':'person_id',
                        'PersonNum':'person_num',
                        'PersonType':'person_type_str'}, inplace=True)
wslocs_df.head()

Unnamed: 0,hh_id,person_id,person_num,person_type_str,WorkLocation
0,1296553,3541255,1,University student,288
1,1296553,3541256,2,Full-time worker,31
2,1296419,3540933,1,Full-time worker,353
3,1296408,3540889,1,Non-worker,0
4,1296408,3540890,2,Non-worker,0


In [85]:
all_zones=list(range(1,6595,1))
work_locations = wslocs_df.WorkLocation.unique()
count=0
for elem in all_zones:
    if elem not in work_locations:
        count=count+1
print(count)

2796


In [86]:
wslocs_df = wslocs_df.loc[ wslocs_df['WorkLocation']>0 ]
work_tours_df = pd.merge(left=wslocs_df, right=tours_df,
                                  on =['hh_id','person_id','person_num'],
                                  how='outer',
                                  indicator=True)
work_tours_df.head()

Unnamed: 0,hh_id,person_id,person_num,person_type_str,WorkLocation,person_type,tour_id,tour_purpose,dest_taz,tour_mode,sampleRate,_merge
0,1296553,3541255,1,University student,288,3.0,0.0,work_med,288.0,10.0,0.01,both
1,1296553,3541256,2,Full-time worker,31,1.0,0.0,work_med,31.0,10.0,0.01,both
2,1296419,3540933,1,Full-time worker,353,1.0,0.0,work_high,353.0,10.0,0.01,both
3,1297387,3542627,3,Full-time worker,555,1.0,0.0,work_very high,555.0,12.0,0.01,both
4,1297387,3542628,4,Full-time worker,397,,,,,,,left_only


In [77]:
wslocs_df[wslocs_df['WorkLocation'].isin([6076, 6146, 822])]

Unnamed: 0,hh_id,person_id,person_num,person_type_str,WorkLocation


In [87]:
work_tours_df_with_dest = work_tours_df.loc[pd.notnull(work_tours_df.dest_taz),]
work_tours_df.loc[ pd.isnull(work_tours_df.tour_mode), 'tour_mode'] = 0
work_tours_df = work_tours_df.astype({'tour_mode': int})
work_tours_df.loc[ pd.isnull(work_tours_df.sampleRate), 'sampleRate'] = float(0.1)

work_tours_df.head()

Unnamed: 0,hh_id,person_id,person_num,person_type_str,WorkLocation,person_type,tour_id,tour_purpose,dest_taz,tour_mode,sampleRate,_merge
0,1296553,3541255,1,University student,288,3.0,0.0,work_med,288.0,10,0.01,both
1,1296553,3541256,2,Full-time worker,31,1.0,0.0,work_med,31.0,10,0.01,both
2,1296419,3540933,1,Full-time worker,353,1.0,0.0,work_high,353.0,10,0.01,both
3,1297387,3542627,3,Full-time worker,555,1.0,0.0,work_very high,555.0,12,0.01,both
4,1297387,3542628,4,Full-time worker,397,,,,,0,0.1,left_only


In [88]:
work_tours_df['simple_mode'] = 'unset'
work_tours_df.loc[ work_tours_df.tour_mode==0,                                'simple_mode'] = 'no tour'
work_tours_df.loc[(work_tours_df.tour_mode>  0)&(work_tours_df.tour_mode<=6), 'simple_mode'] = 'auto'
work_tours_df.loc[(work_tours_df.tour_mode>=19),                              'simple_mode'] = 'auto' # tnc
work_tours_df.loc[(work_tours_df.tour_mode>= 7)&(work_tours_df.tour_mode<=18),'simple_mode'] = 'non-auto'
work_tours_df.head()

Unnamed: 0,hh_id,person_id,person_num,person_type_str,WorkLocation,person_type,tour_id,tour_purpose,dest_taz,tour_mode,sampleRate,_merge,simple_mode
0,1296553,3541255,1,University student,288,3.0,0.0,work_med,288.0,10,0.01,both,non-auto
1,1296553,3541256,2,Full-time worker,31,1.0,0.0,work_med,31.0,10,0.01,both,non-auto
2,1296419,3540933,1,Full-time worker,353,1.0,0.0,work_high,353.0,10,0.01,both,non-auto
3,1297387,3542627,3,Full-time worker,555,1.0,0.0,work_very high,555.0,12,0.01,both,non-auto
4,1297387,3542628,4,Full-time worker,397,,,,,0,0.1,left_only,no tour


In [89]:
work_tours_df = pd.merge(left    =work_tours_df,
                                     right   =tazdata_df[['ZONE','SD','COUNTY']],
                                     left_on ='WorkLocation',
                                     right_on='ZONE',
                                     how     ='left')

work_tours_df.head()

Unnamed: 0,hh_id,person_id,person_num,person_type_str,WorkLocation,person_type,tour_id,tour_purpose,dest_taz,tour_mode,sampleRate,_merge,simple_mode,ZONE,SD,COUNTY
0,1296553,3541255,1,University student,288,3.0,0.0,work_med,288.0,10,0.01,both,non-auto,288,1,1
1,1296553,3541256,2,Full-time worker,31,1.0,0.0,work_med,31.0,10,0.01,both,non-auto,31,1,1
2,1296419,3540933,1,Full-time worker,353,1.0,0.0,work_high,353.0,10,0.01,both,non-auto,353,1,1
3,1297387,3542627,3,Full-time worker,555,1.0,0.0,work_very high,555.0,12,0.01,both,non-auto,555,1,1
4,1297387,3542628,4,Full-time worker,397,,,,,0,0.1,left_only,no tour,397,1,1


In [72]:
work_tours_df[work_tours_df['COUNTY']==2].SD.unique()

array([7, 6, 5, 3], dtype=int64)

In [90]:
work_tours_df['num_workers'] = 1.0/work_tours_df['sampleRate']
work_tours_df = work_tours_df.loc[(work_tours_df['person_type_str']=='Full-time worker')|
                                          (work_tours_df['person_type_str']=='Part-time worker'), ]
work_tours_df.head()

Unnamed: 0,hh_id,person_id,person_num,person_type_str,WorkLocation,person_type,tour_id,tour_purpose,dest_taz,tour_mode,sampleRate,_merge,simple_mode,ZONE,SD,COUNTY,num_workers
1,1296553,3541256,2,Full-time worker,31,1.0,0.0,work_med,31.0,10,0.01,both,non-auto,31,1,1,100.0
2,1296419,3540933,1,Full-time worker,353,1.0,0.0,work_high,353.0,10,0.01,both,non-auto,353,1,1,100.0
3,1297387,3542627,3,Full-time worker,555,1.0,0.0,work_very high,555.0,12,0.01,both,non-auto,555,1,1,100.0
4,1297387,3542628,4,Full-time worker,397,,,,,0,0.1,left_only,no tour,397,1,1,10.0
6,1297387,3542632,8,Part-time worker,297,2.0,0.0,work_very high,297.0,12,0.01,both,non-auto,297,3,1,100.0


In [91]:
work_mode_SD_df = work_tours_df.groupby(['person_type_str','COUNTY','SD','simple_mode']).agg({'num_workers':'sum'}).reset_index()
work_mode_SD_df = pd.pivot_table(work_mode_SD_df, index=['COUNTY','SD'], 
                                             columns=['person_type_str','simple_mode'], values='num_workers').reset_index()
work_mode_SD_df.columns = [' '.join(col).strip() for col in work_mode_SD_df.columns.values]
work_mode_SD_df['Full-time worker not-working'] = P_notworking_if_noworktour_FT*work_mode_SD_df['Full-time worker no tour']
work_mode_SD_df['Part-time worker not-working'] = P_notworking_if_noworktour_FT*work_mode_SD_df['Part-time worker no tour']
# they cannot exceed no tour
work_mode_SD_df['Full-time worker not-working'] = work_mode_SD_df[['Full-time worker not-working','Full-time worker no tour']].min(axis=1) # min across columns
work_mode_SD_df['Part-time worker not-working'] = work_mode_SD_df[['Part-time worker not-working','Part-time worker no tour']].min(axis=1) # min across columns
# remaining is telecommute
work_mode_SD_df['Full-time worker telecommute'] = work_mode_SD_df['Full-time worker no tour'] - work_mode_SD_df['Full-time worker not-working']
work_mode_SD_df['Part-time worker telecommute'] = work_mode_SD_df['Part-time worker no tour'] - work_mode_SD_df['Part-time worker not-working']
# create total workers not taking time off
work_mode_SD_df['Full-time worker working'] = work_mode_SD_df['Full-time worker auto'] + work_mode_SD_df['Full-time worker non-auto'] + work_mode_SD_df['Full-time worker telecommute']
work_mode_SD_df['Part-time worker working'] = work_mode_SD_df['Part-time worker auto'] + work_mode_SD_df['Part-time worker non-auto'] + work_mode_SD_df['Part-time worker telecommute']

# generic
work_mode_SD_df['working'    ] = work_mode_SD_df['Full-time worker working'    ] + work_mode_SD_df['Part-time worker working'    ]
work_mode_SD_df['telecommute'] = work_mode_SD_df['Full-time worker telecommute'] + work_mode_SD_df['Part-time worker telecommute']
work_mode_SD_df['auto'       ] = work_mode_SD_df['Full-time worker auto'       ] + work_mode_SD_df['Part-time worker auto'       ]

# "mode shares" are now a function of people working
work_mode_SD_df['telecommute_rate'] = work_mode_SD_df['telecommute'] / work_mode_SD_df['working']
work_mode_SD_df['auto_share']       = work_mode_SD_df['auto']        / work_mode_SD_df['working']
work_mode_SD_df.head()

Unnamed: 0,COUNTY,SD,Full-time worker auto,Full-time worker no tour,Full-time worker non-auto,Part-time worker auto,Part-time worker no tour,Part-time worker non-auto,Full-time worker not-working,Part-time worker not-working,Full-time worker telecommute,Part-time worker telecommute,Full-time worker working,Part-time worker working,working,telecommute,auto,telecommute_rate,auto_share
0,1,1,36400.0,6220.0,163500.0,6700.0,2000.0,36200.0,3486.647678,1121.108578,2733.352322,878.891422,202633.352322,43778.891422,246412.243744,3612.243744,43100.0,0.014659,0.17491
1,1,2,22900.0,1560.0,30000.0,5800.0,570.0,5700.0,874.464691,319.515945,685.535309,250.484055,53585.535309,11750.484055,65336.019364,936.019364,28700.0,0.014326,0.439268
2,1,3,41200.0,2010.0,29900.0,6100.0,860.0,8800.0,1126.714121,482.076689,883.285879,377.923311,71983.285879,15277.923311,87261.209191,1261.209191,47300.0,0.014453,0.542051
3,1,4,4900.0,340.0,4900.0,1400.0,120.0,900.0,190.588458,67.266515,149.411542,52.733485,9949.411542,2352.733485,12302.145027,202.145027,6300.0,0.016432,0.512106
4,2,3,1100.0,50.0,,200.0,20.0,100.0,28.027714,11.211086,21.972286,8.788914,,308.788914,,30.7612,1300.0,,


In [66]:
work_tours_df[work_tours_df['COUNTY']==2]

Unnamed: 0,hh_id,person_id,person_num,person_type_str,WorkLocation,person_type,tour_id,tour_purpose,dest_taz,tour_mode,sampleRate,_merge,simple_mode,ZONE,SD,COUNTY,num_workers
39,1298896,3545384,1,Full-time worker,647,,,,,0,0.10,left_only,no tour,647,7,2,10.0
71,1299972,3547406,1,Full-time worker,991,1.0,0.0,work_very high,991.0,1,0.01,both,auto,991,7,2,100.0
89,1300179,3547804,11,Full-time worker,860,,,,,0,0.10,left_only,no tour,860,6,2,10.0
99,1300602,3548679,2,Full-time worker,876,1.0,0.0,work_high,876.0,1,0.01,both,auto,876,5,2,100.0
120,1302215,3551978,2,Full-time worker,695,,,,,0,0.10,left_only,no tour,695,6,2,10.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
36696,974743,2808895,2,Full-time worker,651,1.0,0.0,work_high,651.0,1,0.01,both,auto,651,5,2,100.0
36739,980252,2822517,2,Part-time worker,982,2.0,0.0,work_high,982.0,1,0.01,both,auto,982,5,2,100.0
36761,1043423,2979154,1,Full-time worker,663,1.0,0.0,work_very high,663.0,1,0.01,both,auto,663,5,2,100.0
39466,1623263,4539414,1,Full-time worker,991,1.0,0.0,work_very high,991.0,1,0.01,both,auto,991,7,2,100.0


In [64]:
work_mode_SD_df[work_mode_SD_df['COUNTY']==2]

Unnamed: 0,person_type_str,COUNTY,SD,simple_mode,num_workers
12,Full-time worker,2,3,auto,1100.0
13,Full-time worker,2,3,no tour,50.0
14,Full-time worker,2,5,auto,52200.0
15,Full-time worker,2,5,no tour,1780.0
16,Full-time worker,2,5,non-auto,11600.0
17,Full-time worker,2,6,auto,64600.0
18,Full-time worker,2,6,no tour,2210.0
19,Full-time worker,2,6,non-auto,11600.0
20,Full-time worker,2,7,auto,44400.0
21,Full-time worker,2,7,no tour,1700.0


In [92]:
work_mode_SD_df = pd.merge(left  =work_mode_SD_df, 
                                       right =telecommute_rate_df[['COUNTY','max_telecommute_rate']],
                                       how   ='left',
                                       on    ='COUNTY')

work_mode_SD_df.head()

Unnamed: 0,COUNTY,SD,Full-time worker auto,Full-time worker no tour,Full-time worker non-auto,Part-time worker auto,Part-time worker no tour,Part-time worker non-auto,Full-time worker not-working,Part-time worker not-working,Full-time worker telecommute,Part-time worker telecommute,Full-time worker working,Part-time worker working,working,telecommute,auto,telecommute_rate,auto_share,max_telecommute_rate
0,1,1,36400.0,6220.0,163500.0,6700.0,2000.0,36200.0,3486.647678,1121.108578,2733.352322,878.891422,202633.352322,43778.891422,246412.243744,3612.243744,43100.0,0.014659,0.17491,0.266437
1,1,2,22900.0,1560.0,30000.0,5800.0,570.0,5700.0,874.464691,319.515945,685.535309,250.484055,53585.535309,11750.484055,65336.019364,936.019364,28700.0,0.014326,0.439268,0.266437
2,1,3,41200.0,2010.0,29900.0,6100.0,860.0,8800.0,1126.714121,482.076689,883.285879,377.923311,71983.285879,15277.923311,87261.209191,1261.209191,47300.0,0.014453,0.542051,0.266437
3,1,4,4900.0,340.0,4900.0,1400.0,120.0,900.0,190.588458,67.266515,149.411542,52.733485,9949.411542,2352.733485,12302.145027,202.145027,6300.0,0.016432,0.512106,0.266437
4,2,3,1100.0,50.0,,200.0,20.0,100.0,28.027714,11.211086,21.972286,8.788914,,308.788914,,30.7612,1300.0,,,0.221435


In [100]:
telecommute_df = pd.read_csv(TELECOMMUTE_CONSTANTS_FILE)

telecommute_df = telecommute_df[['ZONE','SD','COUNTY','telecommuteConstant']]
telecommute_df.rename(columns={'telecommuteConstant':'telecommuteConstant_prev'}, inplace=True)
telecommute_df = pd.merge(left=telecommute_df, right=work_mode_SD_df,how='left', indicator=True)
telecommute_df[telecommute_df['_merge']!='both']

Unnamed: 0,ZONE,SD,COUNTY,telecommuteConstant_prev,Full-time worker auto,Full-time worker no tour,Full-time worker non-auto,Part-time worker auto,Part-time worker no tour,Part-time worker non-auto,...,Part-time worker telecommute,Full-time worker working,Part-time worker working,working,telecommute,auto,telecommute_rate,auto_share,max_telecommute_rate,_merge
821,822,4,2,0,,,,,,,...,,,,,,,,,,left_only
6075,6076,1,9,0,,,,,,,...,,,,,,,,,,left_only
6145,6146,30,9,0,,,,,,,...,,,,,,,,,,left_only


In [101]:
for column in ['auto_share','telecommute_rate','max_telecommute_rate']:

            telecommute_df[column]=telecommute_df[column].fillna(0)
telecommute_df[telecommute_df['_merge']!='both']

Unnamed: 0,ZONE,SD,COUNTY,telecommuteConstant_prev,Full-time worker auto,Full-time worker no tour,Full-time worker non-auto,Part-time worker auto,Part-time worker no tour,Part-time worker non-auto,...,Part-time worker telecommute,Full-time worker working,Part-time worker working,working,telecommute,auto,telecommute_rate,auto_share,max_telecommute_rate,_merge
821,822,4,2,0,,,,,,,...,,,,,,,0.0,0.0,0.0,left_only
6075,6076,1,9,0,,,,,,,...,,,,,,,0.0,0.0,0.0,left_only
6145,6146,30,9,0,,,,,,,...,,,,,,,0.0,0.0,0.0,left_only


In [103]:
telecommute_df.sort_values(by=['ZONE'], ascending=True, inplace=True)
telecommute_df

Unnamed: 0,ZONE,SD,COUNTY,telecommuteConstant_prev,Full-time worker auto,Full-time worker no tour,Full-time worker non-auto,Part-time worker auto,Part-time worker no tour,Part-time worker non-auto,...,Part-time worker telecommute,Full-time worker working,Part-time worker working,working,telecommute,auto,telecommute_rate,auto_share,max_telecommute_rate,_merge
0,1,2,1,0,22900.0,1560.0,30000.0,5800.0,570.0,5700.0,...,250.484055,53585.535309,11750.484055,65336.019364,936.019364,28700.0,0.014326,0.439268,0.266437,both
1,2,3,1,0,41200.0,2010.0,29900.0,6100.0,860.0,8800.0,...,377.923311,71983.285879,15277.923311,87261.209191,1261.209191,47300.0,0.014453,0.542051,0.266437,both
2,3,3,1,0,41200.0,2010.0,29900.0,6100.0,860.0,8800.0,...,377.923311,71983.285879,15277.923311,87261.209191,1261.209191,47300.0,0.014453,0.542051,0.266437,both
3,4,1,1,0,36400.0,6220.0,163500.0,6700.0,2000.0,36200.0,...,878.891422,202633.352322,43778.891422,246412.243744,3612.243744,43100.0,0.014659,0.174910,0.266437,both
4,5,2,1,0,22900.0,1560.0,30000.0,5800.0,570.0,5700.0,...,250.484055,53585.535309,11750.484055,65336.019364,936.019364,28700.0,0.014326,0.439268,0.266437,both
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6588,6589,35,10,0,117300.0,6040.0,31100.0,18200.0,2140.0,6000.0,...,940.413822,151054.252094,25140.413822,176194.665916,3594.665916,135500.0,0.020402,0.769036,0.186124,both
6589,6590,35,10,0,117300.0,6040.0,31100.0,18200.0,2140.0,6000.0,...,940.413822,151054.252094,25140.413822,176194.665916,3594.665916,135500.0,0.020402,0.769036,0.186124,both
6590,6591,35,10,0,117300.0,6040.0,31100.0,18200.0,2140.0,6000.0,...,940.413822,151054.252094,25140.413822,176194.665916,3594.665916,135500.0,0.020402,0.769036,0.186124,both
6591,6592,35,10,0,117300.0,6040.0,31100.0,18200.0,2140.0,6000.0,...,940.413822,151054.252094,25140.413822,176194.665916,3594.665916,135500.0,0.020402,0.769036,0.186124,both
