In [None]:
import os
import numpy as np
import pandas as pd
import openmatrix as omx
import datetime
import matplotlib.pyplot as plt
import seaborn as sns

sns.set()
pd.set_option("display.max_columns", 100)
start_time = datetime.datetime.now()

In [None]:
### Run Time Function
def print_runtime(t1, t2):
    tot_sec = t2.timestamp()-t1.timestamp()
    hours = tot_sec//3600
    minutes = (tot_sec-hours*3600)//60
    seconds = tot_sec-hours*3600-minutes*60

    print("Run Time:", hours, 'hrs', minutes, 'mins', round(seconds), "sec")
    
    return

In [None]:
# Define Data Paths
### Activitysim output data
asim_output_loc = r"C:\abm_runs\rohans\output"

### 2022 survey data
raw_2022_loc = r"C:\abm_runs\rohans\calibration\workplace_location\data\hts\sandag_2022_survey\sandag_hts"

### Processed survey data - 2016 & 2022
survey_loc= r"C:\abm_runs\rohans\calibration\workplace_location\data\hts\survey_data"

### landuse file
landuse_loc = r"C:\abm_runs\rohans\input_2022\land_use.csv"

### traffic skims file
skims_loc = r"C:\abm_runs\rohans\input_2022\traffic_skims_MD.omx" 

### zone shape files
maz_loc = r"C:\abm_runs\rohans\calibration\shp\mgra15\mgra15.shp"
taz_loc = r"C:\abm_runs\rohans\calibration\shp\taz15\taz15.shp"

### Location to save outputs
output_loc = r"C:\abm_runs\rohans\calibration\workplace_location\output"

In [None]:
### Read common files
landuse_df = pd.read_csv(landuse_loc)
skims = omx.open_file(skims_loc)
dist_mtx = np.array(skims['SOV_NT_M_DIST__MD']) 

### Create crosswalks
maz_taz_xwalk = pd.Series(landuse_df.TAZ.values, index=landuse_df.MAZ).to_dict()

### Read data
asim_per = pd.read_csv(os.path.join(asim_output_loc, 'final_persons.csv'))
asim_hh = pd.read_csv(os.path.join(asim_output_loc, 'final_households.csv'))

### Process ActivitySim Data

In [None]:
### Read data
asim_per = pd.read_csv(os.path.join(asim_output_loc, 'final_persons.csv'))
asim_hh = pd.read_csv(os.path.join(asim_output_loc, 'final_households.csv'))

### Get weights
asim_per = pd.merge(asim_per, asim_hh[['household_id', 'sample_rate']], how='left', on=['household_id'], suffixes=('', '_x'))
asim_per['weight'] = 1/asim_per['sample_rate']

### Get home and work TAZs
asim_per['home_taz'] = asim_per['home_zone_id'].map(maz_taz_xwalk)
asim_per['work_taz'] = asim_per['workplace_zone_id'].map(maz_taz_xwalk)

### Get workers from all persons
workers = asim_per[asim_per['is_worker']==1]

### Get persons working within modeling region
home_zone_workers = workers[workers['work_taz']>12]

print('Total workers: {:,.0f} \nTotal workers in SANDAG Region: {:,.0f}'.format(workers.weight.sum(), home_zone_workers.weight.sum()))

In [None]:
### Function to get distance from skims
def get_distance(origin, destination, mtx=dist_mtx):
    dist = mtx[int(origin-1), int(destination-1)]
    return dist

In [None]:
### Calculate distance between home and work locations
home_zone_workers['work_distance'] = home_zone_workers.apply(lambda x: get_distance(x.home_taz, x.work_taz), axis=1)

### Code work distance for workers
home_zone_workers.loc[home_zone_workers['work_distance']<=2, 'Work Distance (miles)'] = '1. 0-2'
home_zone_workers.loc[(home_zone_workers['work_distance']>2)&(home_zone_workers['work_distance']<=5), 'Work Distance (miles)'] = '2. 2-5'
home_zone_workers.loc[(home_zone_workers['work_distance']>5)&(home_zone_workers['work_distance']<=10), 'Work Distance (miles)'] = '3. 5-10'
home_zone_workers.loc[(home_zone_workers['work_distance']>10)&(home_zone_workers['work_distance']<=20), 'Work Distance (miles)'] = '4. 10-20'
home_zone_workers.loc[(home_zone_workers['work_distance']>20)&(home_zone_workers['work_distance']<=30), 'Work Distance (miles)'] = '5. 20-30'
home_zone_workers.loc[home_zone_workers['work_distance']>30, 'Work Distance (miles)'] = '6. >30'

### Create work length frequency table
freq_pivot = pd.pivot_table(home_zone_workers, index='Work Distance (miles)', values='weight', aggfunc=['count', np.sum]).round().astype(int)
freq_pivot.columns = freq_pivot.columns.get_level_values(0)
freq_pivot.rename({'count': 'Asim Frequency', 'sum': 'Asim Frequency (Weighted)'}, axis=1, inplace=True)
freq_pivot.loc['Total'] = freq_pivot.sum(axis=0)
freq_pivot['Asim Share'] = round(freq_pivot['Asim Frequency (Weighted)']/freq_pivot.loc['Total', 'Asim Frequency (Weighted)'], 6)
freq_pivot

### Process HTS Data

In [None]:
### Read and process HTS data
### 2022 raw data
raw_hh_22 = pd.read_csv(os.path.join(raw_2022_loc, 'hh.csv'))
raw_per_22 = pd.read_csv(os.path.join(raw_2022_loc, 'person.csv'))

### Processed and Combined HTS data
hts_per = pd.read_csv(os.path.join(survey_loc, 'combined_persons.csv'))
hts_hh = pd.read_csv(os.path.join(survey_loc, 'combined_households.csv'))

### Only keep 2022 HTS data
hts_per = hts_per[hts_per['survey_year']==2022].reset_index(drop=True)
hts_hh = hts_hh[hts_hh['survey_year']==2022].reset_index(drop=True)

### Add home_zone_id households
hts_hh_home_zone = hts_hh[['HH_ID', 'home_zone_id']].value_counts().reset_index(name='count').drop(['count'], axis=1)
hts_per = pd.merge(hts_per, hts_hh_home_zone, how='left', on='HH_ID', suffixes=('', '_x'))

### Get home and work TAZs
hts_per['home_taz'] = hts_per['home_zone_id'].map(maz_taz_xwalk)
hts_per['work_taz'] = hts_per['workplace_zone_id'].map(maz_taz_xwalk)

### Get person weights
hts_per_weight_df = raw_per_22[['hh_id', 'person_num', 'person_weight']].rename(columns={'hh_id': 'HH_ID', 'person_num': 'PER_ID', 'person_weight': 'weight'})
hts_per = pd.merge(hts_per, hts_per_weight_df, how='left', on=['HH_ID', 'PER_ID'], suffixes=('', '_x'))

In [None]:
### Get workers from all persons
hts_workers = hts_per[hts_per['is_worker']==1]

### Get persons working within modeling region
hts_home_zone_workers = hts_workers[hts_workers['work_taz']>12]

### Get unique records
cols_to_keep = ['HH_ID', 'PER_ID', 'survey_year', 'home_taz', 'work_taz', 'weight']
unique_hts_workers = hts_workers[cols_to_keep].value_counts().reset_index(name='count')
unique_hts_home_zone_workers = hts_home_zone_workers[cols_to_keep].value_counts().reset_index(name='count')

print('Total HTS workers: {:,.0f}'.format(hts_workers.weight.sum()))
print('Total HTS workers in SANDAG Region: {:,.0f}'.format(unique_hts_home_zone_workers.weight.sum()))

In [None]:
### Calculate distance between home and work locations
unique_hts_home_zone_workers['work_distance'] = unique_hts_home_zone_workers.apply(lambda x: get_distance(x.home_taz, x.work_taz), axis=1)

### Code work distance for workers
unique_hts_home_zone_workers.loc[unique_hts_home_zone_workers['work_distance']<=2, 'Work Distance (miles)'] = '1. 0-2'
unique_hts_home_zone_workers.loc[(unique_hts_home_zone_workers['work_distance']>2)&(unique_hts_home_zone_workers['work_distance']<=5), 'Work Distance (miles)'] = '2. 2-5'
unique_hts_home_zone_workers.loc[(unique_hts_home_zone_workers['work_distance']>5)&(unique_hts_home_zone_workers['work_distance']<=10), 'Work Distance (miles)'] = '3. 5-10'
unique_hts_home_zone_workers.loc[(unique_hts_home_zone_workers['work_distance']>10)&(unique_hts_home_zone_workers['work_distance']<=20), 'Work Distance (miles)'] = '4. 10-20'
unique_hts_home_zone_workers.loc[(unique_hts_home_zone_workers['work_distance']>20)&(unique_hts_home_zone_workers['work_distance']<=30), 'Work Distance (miles)'] = '5. 20-30'
unique_hts_home_zone_workers.loc[unique_hts_home_zone_workers['work_distance']>30, 'Work Distance (miles)'] = '6. >30'

### Create work length frequency table
hts_freq_pivot = pd.pivot_table(unique_hts_home_zone_workers, index='Work Distance (miles)', values='weight', aggfunc=['count', np.sum]).round().astype(int)
hts_freq_pivot.columns = hts_freq_pivot.columns.get_level_values(0)
hts_freq_pivot.rename({'count': 'HTS Frequency', 'sum': 'HTS Frequency (Weighted)'}, axis=1, inplace=True)
hts_freq_pivot.loc['Total'] = hts_freq_pivot.sum(axis=0)
hts_freq_pivot['HTS Share'] = round(hts_freq_pivot['HTS Frequency (Weighted)']/hts_freq_pivot.loc['Total', 'HTS Frequency (Weighted)'], 6)
hts_freq_pivot

### Summarize ActivitySim and HTS

In [None]:
### Combine HTS and Asim frequency tables
comb_freq_pivot = pd.concat([hts_freq_pivot, freq_pivot], axis=1)
comb_freq_pivot['Diff Share'] = comb_freq_pivot['Asim Share'] - comb_freq_pivot['HTS Share']
comb_freq_pivot['HTS/Asim'] = comb_freq_pivot['HTS Share']/comb_freq_pivot['Asim Share']
comb_freq_pivot['Calibration Constant'] = np.log(comb_freq_pivot['HTS/Asim'])
comb_freq_pivot = comb_freq_pivot.rename({'1. 0-2': '0-2', '2. 2-5': '2-5', '3. 5-10': '5-10', '4. 10-20': '10-20', '5. 20-30': '20-30', '6. >30': '>30'}, axis=0).reset_index()

# comb_freq_pivot.to_csv(os.path.join(output_loc, r'work_length_freq_comparison.csv'), index=False)
comb_freq_pivot

In [None]:
### Process data for plot
unique_hts_home_zone_workers['Source'] = 'HTS'
home_zone_workers['Source'] = 'ActivitySim'

plot_cols = ['Work Distance (miles)', 'weight', 'Source']
plot_df = pd.concat([unique_hts_home_zone_workers[plot_cols], home_zone_workers[plot_cols]], 
                    ignore_index=True).sort_values('Work Distance (miles)')

### Plot work length frequency distribution
### HTS vs ActivitySim
fig, ax = plt.subplots(figsize=(15, 10))

xticks_labels = ['0 - 2', '2 - 5', '5 - 10', '10 - 20', '20 - 30', '>30']

sns.histplot(plot_df,
             x='Work Distance (miles)',
             weights='weight',
             hue='Source',
             hue_order=['HTS', 'ActivitySim'],
             common_norm=False,
             stat='percent', 
             multiple='dodge',
             shrink=.9)

ax.set_xlabel('Work Distance (in miles)')
ax.set_ylabel('Percent of Workers')
ax.set_xticks(np.arange(len(xticks_labels)), labels=xticks_labels)
plt.show()

In [None]:
# ### Plot work length frequency distribution [Another Format]
# ### HTS vs ActivitySim
# fig, ax = plt.subplots(figsize=(15, 10))

# df_target = unique_hts_home_zone_workers.sort_values('Work Distance (miles)')
# df_model = home_zone_workers.sort_values('Work Distance (miles)')

# xticks_labels = ['0 - 2', '2 - 5', '5 - 10', '10 - 20', '20 - 30', '>30']

# sns.histplot(df_target,
#              x='Work Distance (miles)',
#              weights='weight',
#              stat='percent',
#              label='HTS') 
# sns.histplot(df_model,
#              x='Work Distance (miles)',
#              weights='weight',
#              stat='percent',
#              label='ActivitySim') 

# ax.set_xlabel('Work Distance (in miles)')
# ax.set_ylabel('Percent of Workers')
# ax.set_xticks(np.arange(len(xticks_labels)), labels=xticks_labels)
# ax.legend()
# plt.show()

In [None]:
### Plot work length frequency distribution
### HTS vs ActivitySim
fig, ax = plt.subplots(figsize=(16, 8))

clipping_point = 50
bin_size = 1

sns.distplot(unique_hts_home_zone_workers.work_distance.clip(upper=clipping_point), 
            bins=int(clipping_point/bin_size), 
            hist_kws={'weights':unique_hts_home_zone_workers.weight.values},
            label='HTS') 
sns.distplot(home_zone_workers.work_distance.clip(upper=clipping_point), 
             bins=int(clipping_point/bin_size), 
             hist_kws={'weights':home_zone_workers.weight.values},
             label='ActivitySim') 

ax.set_xlabel('Work Distance (in miles)')
ax.legend()
plt.show()

In [None]:
end_time = datetime.datetime.now()
print("Start Time:", start_time)
print("End Time:", end_time)
print_runtime(start_time, end_time)