In [1]:
import os
import numpy as np
import pandas as pd
import openmatrix as omx
import datetime
import matplotlib.pyplot as plt
import seaborn as sns

sns.set()
pd.set_option('display.max_columns', 100)

In [2]:
# Define data paths and parameters
### Directory of the simulation.py file
simpy_dir = r"C:\abm_runs\rohans"

### Location of Configs
settings = r"C:\abm_runs\rohans\configs\resident\settings_mp.yaml"
configs_resident = r"C:\abm_runs\rohans\configs\resident"
configs_common = r"C:\abm_runs\rohans\configs\common"

### Location of Input and Output data
data = r"C:\abm_runs\rohans\input_2022"
output_dir = r"C:\abm_runs\rohans\output"

### Location of calibration targets
target_loc = r"C:\abm_runs\rohans\calibration\workplace_location\targets\work_length_targets.csv"

### Landuse data
landuse_loc = os.path.join(data, "land_use.csv")

### Traffic skims file
skims_loc = os.path.join(data, "traffic_skims_MD.omx" )

### Asim output files
person_loc = os.path.join(output_dir, "final_persons.csv")
hh_loc = os.path.join(output_dir, "final_households.csv")

### Number of calibration iterations
iterations = 3

### Crosswalk b/w target index to coefficient name in configs
coef_xwalk = {
    '0-2': 'coef_distance_0_2miles',
    '2-5': 'coef_distance_2_5miles',
    '5-10': 'coef_distance_5_10miles',
    '10-20': 'coef_distance_10_20miles',
    '20-30': 'coef_distance_20_30miles',
    '>30': 'coef_distance_30plusmiles'
}

In [3]:
# Read data
### Input files
landuse_df = pd.read_csv(landuse_loc)
skims = omx.open_file(skims_loc)
dist_mtx = np.array(skims['SOV_NT_M_DIST__MD']) 

### WLC coefficients
workplace_coef = pd.read_csv(os.path.join(configs_resident, 'workplace_location_coefficients.csv'), index_col='coefficient_name')

### Targets
target_freq = pd.read_csv(target_loc).set_index('work_distance')
print('HTS Targets: \n', target_freq)

HTS Targets: 
                hts_freq  hts_freq_wt  hts_share
work_distance                                  
0-2                 293       151489   0.140469
2-5                 302       186977   0.173376
5-10                458       284815   0.264097
10-20               583       327491   0.303669
20-30               172        90401   0.083825
>30                  64        37275   0.034564


In [4]:
### Create crosswalk b/w MAZs and TAZs
maz_taz_xwalk = pd.Series(landuse_df.TAZ.values, index=landuse_df.MAZ).to_dict()

In [5]:
### Run Time Function
def print_runtime(t1, t2):
    tot_sec = t2.timestamp()-t1.timestamp()
    hours = tot_sec//3600
    minutes = (tot_sec-hours*3600)//60
    seconds = tot_sec-hours*3600-minutes*60

    print('Run Time:', hours, 'hrs', minutes, 'mins', round(seconds), 'sec')
    
    return

### Cold Start: 
Run ActivitySim up to workplace_location model by editing settings_mp.yaml

In [6]:
### Change directory to model setup
### i.e. the location of simulation.py script
os.chdir(simpy_dir)

### Command to run ActivitySim
asim_command ='python simulation.py -s ' + settings + ' -c ' + configs_resident + ' -c ' + configs_common + ' -d ' + data + ' -o ' + output_dir
print('ActivitySim Run Command: \n', asim_command, '\n')

#Run ActivitySim
start_time = datetime.datetime.now()
os.system(asim_command)

ActivitySim Run Command: 
 python simulation.py -s C:\abm_runs\rohans\configs\resident\settings_mp.yaml -c C:\abm_runs\rohans\configs\resident -c C:\abm_runs\rohans\configs\common -d C:\abm_runs\rohans\input_2022 -o C:\abm_runs\rohans\output 



0

In [7]:
end_time = datetime.datetime.now()
print_runtime(start_time, end_time)

Run Time: 0.0 hrs 59.0 mins 54 sec


In [9]:
# Functions needed to summarize data and calculate calibration coefficients
### Function to get distance from skims
def get_distance(origin, destination, mtx=dist_mtx):
    dist = mtx[int(origin-1), int(destination-1)]
    return dist

### Function to calculate calibration coefficients by reading asim outputs
def calculate_calib_coef(person_loc=person_loc, 
                         hh_loc=hh_loc, 
                         maz_taz_xwalk=maz_taz_xwalk, 
                         target_freq=target_freq, 
                         coef_xwalk=coef_xwalk):
    ### Read model output files
    per = pd.read_csv(person_loc, usecols=['person_id', 'household_id', 'is_worker', 'home_zone_id', 'work_from_home', 'workplace_zone_id'])
    hh = pd.read_csv(hh_loc, usecols=['household_id', 'sample_rate'])

    ### Get weights
    per = pd.merge(per, hh[['household_id', 'sample_rate']], how='left', on=['household_id'], suffixes=('', '_x'))
    per['weight'] = 1/per['sample_rate']

    ### Get home and work TAZs
    per['home_taz'] = per['home_zone_id'].map(maz_taz_xwalk)
    per['work_taz'] = per['workplace_zone_id'].map(maz_taz_xwalk)

    ### Get workers from all persons
    workers = per[per['is_worker']==1]

    ### Get persons working within modeling region
    home_zone_workers = workers[workers['work_taz']>12]

    ### Calculate distance between home and work locations
    home_zone_workers['work_dist'] = home_zone_workers.apply(lambda x: get_distance(x.home_taz, x.work_taz), axis=1)

    ### Create a new column with labels for work distances
    home_zone_workers.loc[home_zone_workers['work_dist']<=2, 'work_distance'] = '0-2'
    home_zone_workers.loc[(home_zone_workers['work_dist']>2)&(home_zone_workers['work_dist']<=5), 'work_distance'] = '2-5'
    home_zone_workers.loc[(home_zone_workers['work_dist']>5)&(home_zone_workers['work_dist']<=10), 'work_distance'] = '5-10'
    home_zone_workers.loc[(home_zone_workers['work_dist']>10)&(home_zone_workers['work_dist']<=20), 'work_distance'] = '10-20'
    home_zone_workers.loc[(home_zone_workers['work_dist']>20)&(home_zone_workers['work_dist']<=30), 'work_distance'] = '20-30'
    home_zone_workers.loc[home_zone_workers['work_dist']>30, 'work_distance'] = '>30'

    ### Create work length frequency table
    asim_freq = pd.pivot_table(home_zone_workers, index='work_distance', values='weight', aggfunc=['count', np.sum]).round().astype(int)
    asim_freq.columns = asim_freq.columns.get_level_values(0)
    asim_freq.rename({'count': 'asim_freq', 'sum': 'asim_freq_wt'}, axis=1, inplace=True)
    asim_freq['asim_share'] = round(asim_freq['asim_freq_wt']/asim_freq['asim_freq_wt'].sum(), 6)

    ### Calculate calibration coefficients
    calib_df = pd.concat([target_freq, asim_freq], axis=1)
    calib_df['ratio'] = calib_df['hts_share']/calib_df['asim_share']
    calib_df['calib_coef'] = np.log(calib_df['ratio'])
    calib_df.rename(index=coef_xwalk, inplace=True)
    
    return calib_df

### Warm Start:
Use _resume_after_ feature in settings_mp.yaml to start the run from workplace_location

Do multiple calibration runs to achieve the set targets

In [11]:
for i in range(1, 5):
    calib_df = calculate_calib_coef()
    print(calib_df)

    ### Save previous coefficients
    workplace_coef.to_csv(os.path.join(configs_resident, 'workplace_location_coefficients_'+str(i-1)+'.csv'))

    ### Update and save coefficients
    workplace_coef.loc[coef_xwalk.values(), 'value'] += calib_df.loc[coef_xwalk.values(), 'calib_coef']
    workplace_coef.to_csv(os.path.join(configs_resident, 'workplace_location_coefficients.csv'))

    ### Run ActivitySim
    print('Starting Iteration -', i)
    start_time = datetime.datetime.now()
    os.system(asim_command)

    end_time = datetime.datetime.now()
    print_runtime(start_time, end_time)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  home_zone_workers['work_dist'] = home_zone_workers.apply(lambda x: get_distance(x.home_taz, x.work_taz), axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  home_zone_workers.loc[home_zone_workers['work_dist']<=2, 'work_distance'] = '0-2'


                           hts_freq  hts_freq_wt  hts_share  asim_freq  \
work_distance                                                            
coef_distance_0_2miles          293       151489   0.140469      51183   
coef_distance_2_5miles          302       186977   0.173376      68544   
coef_distance_5_10miles         458       284815   0.264097      70057   
coef_distance_10_20miles        583       327491   0.303669      61832   
coef_distance_20_30miles        172        90401   0.083825      19531   
coef_distance_30plusmiles        64        37275   0.034564       7118   

                           asim_freq_wt  asim_share     ratio  calib_coef  
work_distance                                                              
coef_distance_0_2miles           217800    0.183936  0.763684   -0.269601  
coef_distance_2_5miles           291677    0.246327  0.703845   -0.351197  
coef_distance_5_10miles          298115    0.251764  1.048986    0.047824  
coef_distance_10_20miles   

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  home_zone_workers['work_dist'] = home_zone_workers.apply(lambda x: get_distance(x.home_taz, x.work_taz), axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  home_zone_workers.loc[home_zone_workers['work_dist']<=2, 'work_distance'] = '0-2'


                           hts_freq  hts_freq_wt  hts_share  asim_freq  \
work_distance                                                            
coef_distance_0_2miles          293       151489   0.140469      43686   
coef_distance_2_5miles          302       186977   0.173376      52476   
coef_distance_5_10miles         458       284815   0.264097      74044   
coef_distance_10_20miles        583       327491   0.303669      78470   
coef_distance_20_30miles        172        90401   0.083825      21251   
coef_distance_30plusmiles        64        37275   0.034564       8338   

                           asim_freq_wt  asim_share     ratio  calib_coef  
work_distance                                                              
coef_distance_0_2miles           185898    0.156994  0.894741   -0.111221  
coef_distance_2_5miles           223302    0.188583  0.919362   -0.084076  
coef_distance_5_10miles          315081    0.266092  0.992503   -0.007526  
coef_distance_10_20miles   

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  home_zone_workers['work_dist'] = home_zone_workers.apply(lambda x: get_distance(x.home_taz, x.work_taz), axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  home_zone_workers.loc[home_zone_workers['work_dist']<=2, 'work_distance'] = '0-2'


                           hts_freq  hts_freq_wt  hts_share  asim_freq  \
work_distance                                                            
coef_distance_0_2miles          293       151489   0.140469      40695   
coef_distance_2_5miles          302       186977   0.173376      49290   
coef_distance_5_10miles         458       284815   0.264097      74082   
coef_distance_10_20miles        583       327491   0.303669      82613   
coef_distance_20_30miles        172        90401   0.083825      22566   
coef_distance_30plusmiles        64        37275   0.034564       9019   

                           asim_freq_wt  asim_share     ratio  calib_coef  
work_distance                                                              
coef_distance_0_2miles           173170    0.146245  0.960505   -0.040296  
coef_distance_2_5miles           209745    0.177133  0.978790   -0.021438  
coef_distance_5_10miles          315243    0.266228  0.991996   -0.008037  
coef_distance_10_20miles   

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  home_zone_workers['work_dist'] = home_zone_workers.apply(lambda x: get_distance(x.home_taz, x.work_taz), axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  home_zone_workers.loc[home_zone_workers['work_dist']<=2, 'work_distance'] = '0-2'


                           hts_freq  hts_freq_wt  hts_share  asim_freq  \
work_distance                                                            
coef_distance_0_2miles          293       151489   0.140469      39645   
coef_distance_2_5miles          302       186977   0.173376      48479   
coef_distance_5_10miles         458       284815   0.264097      73757   
coef_distance_10_20miles        583       327491   0.303669      83917   
coef_distance_20_30miles        172        90401   0.083825      23021   
coef_distance_30plusmiles        64        37275   0.034564       9446   

                           asim_freq_wt  asim_share     ratio  calib_coef  
work_distance                                                              
coef_distance_0_2miles           168702    0.142472  0.985941   -0.014159  
coef_distance_2_5miles           206294    0.174219  0.995161   -0.004850  
coef_distance_5_10miles          313860    0.265060  0.996367   -0.003640  
coef_distance_10_20miles   

In [14]:
### Create the work distance frequency distribution summary after calibration
calculate_calib_coef()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  home_zone_workers['work_dist'] = home_zone_workers.apply(lambda x: get_distance(x.home_taz, x.work_taz), axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  home_zone_workers.loc[home_zone_workers['work_dist']<=2, 'work_distance'] = '0-2'


Unnamed: 0_level_0,hts_freq,hts_freq_wt,hts_share,asim_freq,asim_freq_wt,asim_share,ratio,calib_coef
work_distance,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
coef_distance_0_2miles,293,151489,0.140469,39156,166621,0.140714,0.998259,-0.001743
coef_distance_2_5miles,302,186977,0.173376,48272,205413,0.173475,0.999429,-0.000571
coef_distance_5_10miles,458,284815,0.264097,73511,312813,0.264176,0.999701,-0.000299
coef_distance_10_20miles,583,327491,0.303669,84430,359277,0.303416,1.000834,0.000833
coef_distance_20_30miles,172,90401,0.083825,23311,99196,0.083773,1.000621,0.000621
coef_distance_30plusmiles,64,37275,0.034564,9585,40787,0.034445,1.003455,0.003449
