In [5]:
import os
import numpy as np
import pandas as pd
import geopandas as gpd
import folium
import openmatrix as omx
import datetime

pd.set_option("display.max_columns", 100)
start_time = datetime.datetime.now()

In [6]:
### Run Time Function
def print_runtime(t1, t2):
    tot_sec = t2.timestamp()-t1.timestamp()
    hours = tot_sec//3600
    minutes = (tot_sec-hours*3600)//60
    seconds = tot_sec-hours*3600-minutes*60

    print("Run Time:", hours, 'hrs', minutes, 'mins', round(seconds), "sec")
    
    return

In [7]:
# Define Data Paths
### 2016 survey data
raw_2016_loc = r"C:\abm_runs\rohans\calibration\workplace_location\data\hts\sandag_2016_survey\data"

### 2022 survey data
raw_2022_loc = r"C:\abm_runs\rohans\calibration\workplace_location\data\hts\sandag_2022_survey\sandag_hts"

### Processed 2016 & 2022 data
survey_loc= r'C:\abm_runs\rohans\calibration\workplace_location\data\hts\survey_data'

### landuse file
landuse_loc = r"C:\abm_runs\rohans\input_2022\land_use.csv"

### zone shape files
maz_loc = r"C:\abm_runs\rohans\calibration\shp\mgra15\mgra15.shp"
taz_loc = r"C:\abm_runs\rohans\calibration\shp\taz15\taz15.shp"

### traffic skims file
skims_loc = r"C:\abm_runs\rohans\input_2022\traffic_skims_MD.omx" 

### Location to save outputs
output_loc = r"C:\abm_runs\rohans\calibration\workplace_location\output"

In [8]:
### Read files
landuse_df = pd.read_csv(landuse_loc)
taz_gdf = gpd.read_file(taz_loc)
maz_gdf = gpd.read_file(maz_loc)
skims = omx.open_file(skims_loc)
dist_mtx = np.array(skims['SOV_NT_M_DIST__MD']) 

### Change MAZ-TAZ crs to global coordinate system
# CA zone 6 to lat long
taz_gdf = taz_gdf.to_crs(epsg=4326)
maz_gdf = maz_gdf.to_crs(epsg=4326)

In [9]:
### Create crosswalks
maz_taz_xwalk = pd.Series(landuse_df.TAZ.values, index=landuse_df.MAZ).to_dict()
maz_pmsa_xwalk = pd.Series(landuse_df.pseudomsa.values, index=landuse_df.MAZ).to_dict()

In [10]:
# Read and prepare HTS survey data
### 2016 Raw Data
raw_hh_16 = pd.read_csv(os.path.join(raw_2016_loc, 'SDRTS_Household_Data_20170731.csv'))
raw_person_16 = pd.read_csv(os.path.join(raw_2016_loc, 'SDRTS_Person_Data_20170731.csv'))

### 2022 Raw Data
raw_hh_22 = pd.read_csv(os.path.join(raw_2022_loc, 'hh.csv'))
raw_person_22 = pd.read_csv(os.path.join(raw_2022_loc, 'person.csv'))

### Processed and Combined Data
persons = pd.read_csv(os.path.join(survey_loc, 'combined_persons.csv'))
households = pd.read_csv(os.path.join(survey_loc, 'combined_households.csv'))

In [11]:
### Get person weights
per_weight_16_df = raw_person_16[['hhid', 'pernum', 'hh_final_weight_456x']].rename(columns={'hhid': 'HH_ID', 'pernum': 'PER_ID', 'hh_final_weight_456x': 'PER_WEIGHT'})
per_weight_22_df = raw_person_22[['hh_id', 'person_num', 'person_weight']].rename(columns={'hh_id': 'HH_ID', 'person_num': 'PER_ID', 'person_weight': 'PER_WEIGHT'})
per_weight_df = pd.concat([per_weight_16_df, per_weight_22_df])
per_weight_df

Unnamed: 0,HH_ID,PER_ID,PER_WEIGHT
0,161000385,1,67.171900
1,161000385,2,67.171900
2,161000385,3,67.171900
3,161000385,4,67.171900
4,161000451,1,19.608600
...,...,...,...
5612,22118058,1,52.993912
5613,22118058,2,52.993912
5614,22118059,1,37.228734
5615,22118063,1,82.211000


### Create Worker Flows

In [12]:
### Add home_zone_id households
hh_home_zone = households[['HH_ID', 'home_zone_id']].value_counts().reset_index(name='count').drop(['count'], axis=1)
persons = pd.merge(persons, hh_home_zone, how='left', on='HH_ID', suffixes=('', '_x'))

### Add person weights
persons = pd.merge(persons, per_weight_df, how='left', on=['HH_ID', 'PER_ID'], suffixes=('', '_x'))

persons

Unnamed: 0,person_id,HH_ID,PER_ID,day,survey_year,ptype,pstudent,is_student,pemploy,AGE_CAT,age,PNUM,school_zone_id,workplace_zone_id,work_from_home,telecommute_frequency,sex,free_parking_at_work,transit_pass_subsidy,transit_pass_ownership,industry,relationship,household_id,external_worker_identification,external_workplace_zone_id,is_worker,educ,home_zone_id,PER_WEIGHT
0,1,161000385,1,1,2016,2,3,False,2,7.0,46,1,-1.0,5476.0,False,No_Telecommute,2,True,0,0,healthcare,0,1,1,-1.0,True,13,13119,67.171900
1,2,161000385,2,1,2016,4,3,False,3,8.0,51,2,-1.0,-1.0,False,No_Telecommute,1,False,0,0,,1,1,1,-1.0,False,13,13119,67.171900
2,3,161000385,3,1,2016,7,1,True,4,2.0,14,3,7056.0,-1.0,False,No_Telecommute,2,False,0,0,,2,1,1,-1.0,False,0,13119,67.171900
3,4,161000385,4,1,2016,7,1,True,4,2.0,14,4,13413.0,-1.0,False,No_Telecommute,2,False,0,0,,2,1,1,-1.0,False,0,13119,67.171900
4,5,161000451,1,1,2016,4,3,False,3,10.0,62,1,-1.0,-1.0,False,No_Telecommute,2,False,0,0,,0,2,1,-1.0,False,13,19303,19.608600
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
96876,96877,22118046,2,4,2022,1,3,False,1,7.0,49,2,-1.0,13672.0,False,No_Telecommute,1,False,0,0,construction,1,49758,1,-1.0,True,13,5406,365.660509
96877,96878,22118047,1,4,2022,1,3,False,1,5.0,32,1,-1.0,7462.0,False,No_Telecommute,2,True,0,0,other,0,49759,1,-1.0,True,13,11721,36.207345
96878,96879,22118053,1,4,2022,2,3,False,2,8.0,61,1,-1.0,857.0,False,2_3_days_week,2,True,0,0,retail,0,49760,1,-1.0,True,13,7270,37.234579
96879,96880,22118054,1,4,2022,2,3,False,2,5.0,30,1,-1.0,3759.0,False,2_3_days_week,2,False,0,0,business_srv,0,49761,1,-1.0,True,13,2490,781.833294


In [13]:
### Get home TAZs and PMSAs
persons['home_taz'] = persons['home_zone_id'].map(maz_taz_xwalk)
persons['home_pmsa'] = persons['home_zone_id'].map(maz_pmsa_xwalk)

### Get work TAZs and PMSAs
persons['work_taz'] = persons['workplace_zone_id'].map(maz_taz_xwalk)
persons['work_pmsa'] = persons['workplace_zone_id'].map(maz_pmsa_xwalk)

persons

Unnamed: 0,person_id,HH_ID,PER_ID,day,survey_year,ptype,pstudent,is_student,pemploy,AGE_CAT,age,PNUM,school_zone_id,workplace_zone_id,work_from_home,telecommute_frequency,sex,free_parking_at_work,transit_pass_subsidy,transit_pass_ownership,industry,relationship,household_id,external_worker_identification,external_workplace_zone_id,is_worker,educ,home_zone_id,PER_WEIGHT,home_taz,home_pmsa,work_taz,work_pmsa
0,1,161000385,1,1,2016,2,3,False,2,7.0,46,1,-1.0,5476.0,False,No_Telecommute,2,True,0,0,healthcare,0,1,1,-1.0,True,13,13119,67.171900,2439,3,3731.0,3.0
1,2,161000385,2,1,2016,4,3,False,3,8.0,51,2,-1.0,-1.0,False,No_Telecommute,1,False,0,0,,1,1,1,-1.0,False,13,13119,67.171900,2439,3,,
2,3,161000385,3,1,2016,7,1,True,4,2.0,14,3,7056.0,-1.0,False,No_Telecommute,2,False,0,0,,2,1,1,-1.0,False,0,13119,67.171900,2439,3,,
3,4,161000385,4,1,2016,7,1,True,4,2.0,14,4,13413.0,-1.0,False,No_Telecommute,2,False,0,0,,2,1,1,-1.0,False,0,13119,67.171900,2439,3,,
4,5,161000451,1,1,2016,4,3,False,3,10.0,62,1,-1.0,-1.0,False,No_Telecommute,2,False,0,0,,0,2,1,-1.0,False,13,19303,19.608600,1260,7,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
96876,96877,22118046,2,4,2022,1,3,False,1,7.0,49,2,-1.0,13672.0,False,No_Telecommute,1,False,0,0,construction,1,49758,1,-1.0,True,13,5406,365.660509,3245,4,1523.0,3.0
96877,96878,22118047,1,4,2022,1,3,False,1,5.0,32,1,-1.0,7462.0,False,No_Telecommute,2,True,0,0,other,0,49759,1,-1.0,True,13,11721,36.207345,3705,2,3559.0,4.0
96878,96879,22118053,1,4,2022,2,3,False,2,8.0,61,1,-1.0,857.0,False,2_3_days_week,2,True,0,0,retail,0,49760,1,-1.0,True,13,7270,37.234579,3089,4,3057.0,4.0
96879,96880,22118054,1,4,2022,2,3,False,2,5.0,30,1,-1.0,3759.0,False,2_3_days_week,2,False,0,0,business_srv,0,49761,1,-1.0,True,13,2490,781.833294,2309,2,2412.0,2.0


In [14]:
persons.columns

Index(['person_id', 'HH_ID', 'PER_ID', 'day', 'survey_year', 'ptype',
       'pstudent', 'is_student', 'pemploy', 'AGE_CAT', 'age', 'PNUM',
       'school_zone_id', 'workplace_zone_id', 'work_from_home',
       'telecommute_frequency', 'sex', 'free_parking_at_work',
       'transit_pass_subsidy', 'transit_pass_ownership', 'industry',
       'relationship', 'household_id', 'external_worker_identification',
       'external_workplace_zone_id', 'is_worker', 'educ', 'home_zone_id',
       'PER_WEIGHT', 'home_taz', 'home_pmsa', 'work_taz', 'work_pmsa'],
      dtype='object')

In [15]:
persons.external_worker_identification.value_counts()

1    96160
0      721
Name: external_worker_identification, dtype: int64

In [16]:
persons.work_from_home.value_counts()

False    92023
True      4858
Name: work_from_home, dtype: int64

In [17]:
### Get workers from all persons
workers = persons[persons['is_worker']==1]

### Get persons working within modeling region
home_zone_workers = workers[workers['work_taz']>12]
home_zone_workers

Unnamed: 0,person_id,HH_ID,PER_ID,day,survey_year,ptype,pstudent,is_student,pemploy,AGE_CAT,age,PNUM,school_zone_id,workplace_zone_id,work_from_home,telecommute_frequency,sex,free_parking_at_work,transit_pass_subsidy,transit_pass_ownership,industry,relationship,household_id,external_worker_identification,external_workplace_zone_id,is_worker,educ,home_zone_id,PER_WEIGHT,home_taz,home_pmsa,work_taz,work_pmsa
0,1,161000385,1,1,2016,2,3,False,2,7.0,46,1,-1.0,5476.0,False,No_Telecommute,2,True,0,0,healthcare,0,1,1,-1.0,True,13,13119,67.171900,2439,3,3731.0,3.0
8,9,161000914,2,1,2016,1,3,False,1,9.0,59,2,-1.0,4650.0,False,No_Telecommute,2,False,0,0,healthcare,1,4,1,-1.0,True,13,2699,65.056800,1699,3,2016.0,3.0
13,14,161001191,1,1,2016,1,3,False,1,5.0,30,1,7883.0,14005.0,False,No_Telecommute,2,False,0,0,mgmt_srv,0,7,1,-1.0,True,13,10907,0.000000,1368,3,1328.0,3.0
14,15,161001191,2,1,2016,3,2,True,2,5.0,30,2,10937.0,10937.0,False,No_Telecommute,1,False,0,0,construction,1,7,1,-1.0,True,13,10907,0.000000,1368,3,952.0,3.0
17,18,161001780,2,1,2016,1,3,False,1,7.0,49,2,-1.0,10264.0,False,No_Telecommute,2,True,0,0,business_srv,1,9,1,-1.0,True,13,13431,353.357900,3133,3,3502.0,2.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
96876,96877,22118046,2,4,2022,1,3,False,1,7.0,49,2,-1.0,13672.0,False,No_Telecommute,1,False,0,0,construction,1,49758,1,-1.0,True,13,5406,365.660509,3245,4,1523.0,3.0
96877,96878,22118047,1,4,2022,1,3,False,1,5.0,32,1,-1.0,7462.0,False,No_Telecommute,2,True,0,0,other,0,49759,1,-1.0,True,13,11721,36.207345,3705,2,3559.0,4.0
96878,96879,22118053,1,4,2022,2,3,False,2,8.0,61,1,-1.0,857.0,False,2_3_days_week,2,True,0,0,retail,0,49760,1,-1.0,True,13,7270,37.234579,3089,4,3057.0,4.0
96879,96880,22118054,1,4,2022,2,3,False,2,5.0,30,1,-1.0,3759.0,False,2_3_days_week,2,False,0,0,business_srv,0,49761,1,-1.0,True,13,2490,781.833294,2309,2,2412.0,2.0


In [18]:
### Get unique records
cols_to_keep = ['HH_ID', 'PER_ID', 'survey_year', 'home_taz', 'home_pmsa', 'work_taz', 'work_pmsa', 'PER_WEIGHT']
unique_home_zone_workers = home_zone_workers[cols_to_keep].value_counts().reset_index(name='count')
unique_home_zone_workers

Unnamed: 0,HH_ID,PER_ID,survey_year,home_taz,home_pmsa,work_taz,work_pmsa,PER_WEIGHT,count
0,171024982,1,2016,4207,4,1278.0,2.0,361.439600,7
1,171083888,1,2016,203,6,168.0,6.0,65.334600,7
2,171083880,1,2016,2271,2,2064.0,2.0,240.363700,7
3,161240740,2,2016,2150,2,1307.0,3.0,19.657000,7
4,161240740,1,2016,2150,2,3102.0,3.0,19.657000,7
...,...,...,...,...,...,...,...,...,...
7323,171122322,1,2016,1511,3,1511.0,3.0,53.776600,1
7324,22102018,1,2022,2907,2,3578.0,3.0,36.856854,1
7325,22102023,1,2022,3034,2,2838.0,3.0,37.273061,1
7326,22102023,2,2022,3034,2,3034.0,2.0,37.273061,1


In [20]:
### Worker Flows (weighted)
unique_workers_2016 = unique_home_zone_workers[unique_home_zone_workers['survey_year']==2016]
unique_workers_2022 = unique_home_zone_workers[unique_home_zone_workers['survey_year']==2022]

workers_flows_2016 = pd.crosstab(index=unique_workers_2016['home_pmsa'], 
                                columns=unique_workers_2016['work_pmsa'], 
                                values=unique_workers_2016['PER_WEIGHT'], 
                                aggfunc=np.sum, 
                                margins=False, dropna=False).round().fillna(0).astype(int)

workers_flows_2022 = pd.crosstab(index=unique_workers_2022['home_pmsa'], 
                                columns=unique_workers_2022['work_pmsa'], 
                                values=unique_workers_2022['PER_WEIGHT'], 
                                aggfunc=np.sum, 
                                margins=False, dropna=False).round().fillna(0).astype(int)

worker_flows = round((workers_flows_2016 + workers_flows_2022) * 0.5)

### Calculate total for 2016 and 2022 combined
workers_flows_2016.loc['All'] = workers_flows_2016.sum(axis=0)
workers_flows_2016['All'] = workers_flows_2016.sum(axis=1)

workers_flows_2016.to_csv(os.path.join(output_loc, r'workerflows_2016_hts.csv'))

### Calculate total for 2016 and 2022 combined
workers_flows_2022.loc['All'] = workers_flows_2022.sum(axis=0)
workers_flows_2022['All'] = workers_flows_2022.sum(axis=1)

workers_flows_2022.to_csv(os.path.join(output_loc, r'workerflows_2022_hts.csv'))

### Calculate total for 2016 and 2022 combined
worker_flows.loc['All'] = worker_flows.sum(axis=0)
worker_flows['All'] = worker_flows.sum(axis=1)

worker_flows.to_csv(os.path.join(output_loc, r'workerflows_hts.csv'))
worker_flows

work_pmsa,1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0,All
home_pmsa,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1,5687.0,4182.0,3642.0,392.0,268.0,338.0,10.0,0.0,14519.0
2,32416.0,79204.0,97438.0,16888.0,12796.0,3200.0,1822.0,66.0,243830.0
3,15024.0,47232.0,262883.0,8367.0,17966.0,12840.0,9636.0,0.0,373948.0
4,15196.0,27493.0,27166.0,46590.0,5475.0,2032.0,0.0,36.0,123988.0
5,7807.0,24496.0,53882.0,9410.0,61894.0,3528.0,1186.0,402.0,162605.0
6,2443.0,3116.0,34322.0,1510.0,122.0,82106.0,26443.0,0.0,150062.0
7,2740.0,1744.0,23230.0,702.0,1555.0,28990.0,57562.0,0.0,116523.0
8,0.0,264.0,0.0,646.0,666.0,0.0,0.0,0.0,1576.0
All,81313.0,187731.0,502563.0,84505.0,100742.0,133034.0,96659.0,504.0,1187051.0


In [21]:
### Function to get distance from skims
def get_distance(origin, destination, mtx=dist_mtx):
    dist = mtx[origin-1, destination-1]
    return dist

### Ceil distance to nearest 10 multiple
def ceil(x, step=1):
    return step * np.ceil(x/step)

### Calculate distance between home and work locations
home_zone_workers[['home_taz', 'work_taz']] = home_zone_workers[['home_taz', 'work_taz']].astype(int)
home_zone_workers['work_distance'] = home_zone_workers.apply(lambda x: get_distance(x.home_taz, x.work_taz), axis=1)
home_zone_workers['work_distance_stepped'] = home_zone_workers['work_distance'].apply(ceil, step=10)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  home_zone_workers[['home_taz', 'work_taz']] = home_zone_workers[['home_taz', 'work_taz']].astype(int)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  home_zone_workers['work_distance'] = home_zone_workers.apply(lambda x: get_distance(x.home_taz, x.work_taz), axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-ve

In [22]:
### Create work length frequency table
freq_pivot = pd.pivot_table(home_zone_workers, index='work_distance_stepped', values='PER_WEIGHT', aggfunc=['count', np.sum])
freq_pivot.columns = freq_pivot.columns.get_level_values(0)
freq_pivot

Unnamed: 0_level_0,count,sum
work_distance_stepped,Unnamed: 1_level_1,Unnamed: 2_level_1
10.0,19918,5444354.0
20.0,12508,3423539.0
30.0,3751,1007969.0
40.0,1045,252917.7
50.0,273,98889.06
60.0,79,24920.9
70.0,15,976.0876
100.0,7,3234.294


In [23]:
### Reformat work length frequency table
data = ['0-10', '10-20', '20-30', '30-40', '40-50', '50-60', '60-70', '70-80', '80-90', '90-100']
index_list = [10, 20, 30, 40, 50, 60, 70, 80, 90, 100]

work_dist_freq = pd.DataFrame(data, columns=['Distance'], index=index_list)
work_dist_freq['Fequency'] = freq_pivot['count'].round()
work_dist_freq['Fequency (Weighted)'] = freq_pivot['sum'].round()
work_dist_freq = work_dist_freq.fillna(0).reset_index(drop=True)

work_dist_freq.to_csv(os.path.join(output_loc, r'work_length_freq_hts.csv'), index=False)
work_dist_freq

Unnamed: 0,Distance,Fequency,Fequency (Weighted)
0,0-10,19918.0,5444354.0
1,10-20,12508.0,3423539.0
2,20-30,3751.0,1007969.0
3,30-40,1045.0,252918.0
4,40-50,273.0,98889.0
5,50-60,79.0,24921.0
6,60-70,15.0,976.0
7,70-80,0.0,0.0
8,80-90,0.0,0.0
9,90-100,7.0,3234.0


In [24]:
end_time = datetime.datetime.now()
print("Start Time:", start_time)
print("End Time:", end_time)
print_runtime(start_time, end_time)

Start Time: 2023-08-14 13:22:47.773625
End Time: 2023-08-14 13:22:57.603763
Run Time: 0.0 hrs 0.0 mins 10 sec
