In [1]:
import pandas as pd
import os
import openmatrix as omx
import numpy as np
from itertools import product


_join = os.path.join

In [2]:
# path to the model directory
model_dir = r"C:\_projects\ABM_VY\src\asim-cvm"

# path to the model output directory
model_output_dir = _join(model_dir, r"output CVM")

# path to the model data directory
model_data_dir = _join(model_dir, "data")

In [3]:
# mappings

industries = {
    "Agriculture/Mining" : 1,
    "Construction" : 6,
    "Education/Other public services" : 9,
    "Industrial/Utilities": 3,
    "Info/Finance/Insurance/Real Estate/Professional services": 8,
    "Leisure/Accommodations and Food": 11,
    "Manufacturing" : 2,
    "Medical/Health Services" : 10,
    "Retail" : 4,
    "Transportation" : 7, 
    "Wholesale" : 5,
    "Military" : 12,
}

industry_groups = {
    "Agriculture/Mining" : 1,
    "Construction" : 2,
    "Education/Other public services" : 4,
    "Industrial/Utilities": 2,
    "Info/Finance/Insurance/Real Estate/Professional services": 3,
    "Leisure/Accommodations and Food":3,
    "Manufacturing" : 1,
    "Medical/Health Services" : 4,
    "Retail" : 2,
    "Transportation" : 2,
    "Wholesale" : 1,
    "Military" : 5,
}

rte_gen_industry2group ={
    "AGM": 1,
    "MFG": 1,
    "IUT": 2,
    "RET": 2,
    "WHL": 1,
    "CON": 2,
    "TRN": 2,
    "IFR": 3,
    "EPO": 4,
    "MHS": 4,
    "LAF": 3,
    "MIL": 5
}

est_attractor_industry2group ={
    "AGM": 3,
    "MFG": 1,
    "IUT": 2,
    "RET": 1,
    "WHL": 1,
    "CON": 3,
    "TRN": 3,
    "IFR": 4,
    "EPO": 3,
    "MHS": 3,
    "LAF": 2,
    "MIL": 5
}

vpc_alt = {
    "Goods_Mixed_LCV":1,
    "Goods_Mixed_MUT":2,
    "Goods_Mixed_SUT":3,
    "Goods_NonRes_LCV":4,
    "Goods_NonRes_MUT":5,
    "Goods_NonRes_SUT":6,
    "Goods_Res_LCV":7,
    "Goods_Res_MUT":8,
    "Goods_Res_SUT":9,
    "Maintenance_LCV":10,
    "Maintenance_MUT":11,
    "Maintenance_SUT":12,
    "Service_Mixed_LCV":13,
    "Service_Mixed_MUT":14,
    "Service_Mixed_SUT":15,
    "Service_NonRes_LCV":16,
    "Service_NonRes_MUT":17,
    "Service_NonRes_SUT":18,
    "Service_Res_LCV":19,
    "Service_Res_MUT":20,
    "Service_Res_SUT":21,
}

# TOD dictionary
tod_crwk = {i: 'EA' if i <= 6 else 'AM' if i <= 12 else 'MD' if i <= 25 else 'PM' if i <= 32 else 'EV' for i in range(1, 49)}

# convert all MGRA to TAZ
landuse = pd.read_csv(_join(model_data_dir, r"mgra15_based_input2022.csv"))
mgra2taz = dict(zip(landuse['mgra'], landuse['taz']))

In [4]:
# Additional functions
def array2df(array, cols =['orig', 'dest', 'od']):
    """
    Convert a numpy array to a dataframe
    """
    df = pd.DataFrame(array)
    df = pd.melt(df.reset_index(), id_vars='index', value_vars=df.columns)
    df['index'] = df['index'] + 1
    df['variable'] = df['variable'] + 1
    df.columns = cols
    
    return df

In [5]:
## household_attractor
## establishment_attractor
## cvm_accessibility
## route_generation
## route_generation_tnc
## route_purpose_and_vehicle
## route_start_time
## route_origination_type
## route_origination
## route_terminal_type
## route_terminal
## route_stops
## write_cvm_trip_matrices
## write_tables

## Household Attractor Model

In [6]:
# Read input data -- establishment file
households = pd.read_csv(os.path.join(model_output_dir, r"cvm_households.csv"))   
persons = pd.read_csv(os.path.join(model_data_dir, r"final_persons.csv"))

In [7]:
# Recode
households['Food'] = 0
households.loc[households['has_attraction_food'] == True, 'Food'] = 1
households['Package'] = 0
households.loc[households['has_attraction_package'] == True, 'Package'] = 1
households['Service'] = 0
households.loc[households['has_attraction_service'] == True, 'Service'] = 1

# Relabel the income categories as follows:
# Define the bins and labels for categorization
bins = [0, 50000, 100000, 200000,np.Inf] 
labels = [1, 2, 3, 4,]  

households['Income_Group'] = pd.cut(households['income'], bins=bins, labels=labels, right=False)

# Relabel the hh size categories as follows:

    # HHTS Category ---- Summary Category
    # 1   ----1
    # 2  ---- 2
    # 3  ---- 3 
    # 4  ---- 4
    # 5  ---- 5
    # 6  ---- 6+
    # 7  ---- 6+
    # 8  ---- 6+
    # 9  ---- 6+
    # 10  ---- 6+
    # 11  ---- 6+
    # 12  ---- 6+
    # 13  ---- 6+
    # 995 ---- 0

hhsize_category_mapping = {
    1: '1',
    2: '2',
    3: '3',
    4: '4',
    5: '5',
    6: '6+',
    7: '6+',
    8: '6+',
    9: '6+',
    10: '6+',
    11: '6+',
    12: '6+',
    13: '6+',
    995: '0',
}

households['HHsize_Group'] = households['hhsize'].map(hhsize_category_mapping)

# Relabel the vehicle ownership categories as follows:

    # HHTS Category ---- Summary Category
    # 1   ----1
    # 2  ---- 2
    # 3  ---- 3 
    # 4  ---- 4
    # 5  ---- 5+
    # 6  ---- 5+
    # 7  ---- 5+
    # 8  ---- 5+

veh_category_mapping = {
    1: '1',
    2: '2',
    3: '3',
    4: '4',
    5: '5+',
    6: '5+',
    7: '5+',
    8: '5+',
}

households['Auto_Ownership_Group'] = households['auto_ownership'].map(veh_category_mapping)

# Relabel the age to some binary categories as follows:
persons['Age_Group_1'] = 0
persons.loc[(persons.age >= 55) & (persons.age <75), 'Age_Group_1'] = 1
persons['Age_Group_2'] = 0
persons.loc[(persons.age >= 75) & (persons.age <150), 'Age_Group_2'] = 1 # added the max limit of 150, obviously age of more than 150 is not reasonable at least now!!
persons['Age_Group_3'] = 0
persons.loc[(persons.age < 55) & (persons.age >= 0), 'Age_Group_3'] = 1


In [8]:
# group persons data to hh level and create binary age group variables
persons_hh = persons.groupby(['household_id'], as_index=False). agg({
    'Age_Group_1' : 'max',
    'Age_Group_2' : 'max',
    'Age_Group_3' : 'max'
})

# merge the persons data to the households data
households = households.merge(persons_hh, on = 'household_id', how='left')


# Attractor Data
# Wide to long format

cols_to_melt = ['household_id', 'Food', 'Service', 'Package']
households_long = pd.melt(households[cols_to_melt], id_vars=['household_id'], var_name='Attractor', value_name='Value')

# adding additional attributes from the Households file
cols_hh = ['home_zone_id', 'household_id', 'income', 'hhsize', 'auto_ownership', 'Income_Group', 'HHsize_Group', 'Auto_Ownership_Group', 'Age_Group_1', 'Age_Group_2', 'Age_Group_3']

hh = households_long.merge(households[cols_hh], how='left', on='household_id')

In [9]:
# CVM Summary Results
mdl_hh_group = hh.groupby(['Attractor']).agg({'Value': 'mean'}).reset_index()

# Segment by Age Group 1
mdl_hh_Age_1 = hh.groupby(['Attractor', 'Age_Group_1']).agg({'Value': 'mean'})
mdl_hh_Age_1 = mdl_hh_Age_1.reset_index().pivot(columns='Attractor', index='Age_Group_1', values='Value').reset_index()

# Segment by Age Group 2
mdl_hh_Age_2 = hh.groupby(['Attractor', 'Age_Group_2']).agg({'Value': 'mean'})
mdl_hh_Age_2 = mdl_hh_Age_2.reset_index().pivot(columns='Attractor', index='Age_Group_2', values='Value').reset_index()

# Segment by Age Group 3
mdl_hh_Age_3 = hh.groupby(['Attractor', 'Age_Group_3']).agg({'Value': 'mean'})
mdl_hh_Age_3 = mdl_hh_Age_3.reset_index().pivot(columns='Attractor', index='Age_Group_3', values='Value').reset_index()

# Segment by Income
mdl_hh_Income = hh.groupby(['Attractor', 'Income_Group']).agg({'Value': 'mean'})
mdl_hh_Income = mdl_hh_Income.reset_index().pivot(columns='Attractor', index='Income_Group', values='Value').reset_index()


## Establishment Attractor Model

In [10]:
# read the cvm establishment output file
mdl_cvm_est = pd.read_csv(os.path.join(model_output_dir, "cvm_establishments.csv"))
mdl_cvm_est['industry_group2'] = mdl_cvm_est['industry_name'].map(est_attractor_industry2group)

temp1 = mdl_cvm_est.groupby(['industry_group2']).size().reset_index(name='count')
temp2 = mdl_cvm_est.groupby(['industry_group2'])['has_attraction'].sum().reset_index(name='attraction_count')
mdl_est_att_ind_grp = temp1.merge(temp2, on = 'industry_group2', how='left')

est = mdl_cvm_est[mdl_cvm_est['attractions']>0]
temp1 = est.groupby("industry_number")["attractions"].sum().reset_index(name='sum_attractions')
temp2 = est.groupby("industry_number")["attractions"].mean().reset_index(name='mean_attractions')
mdl_est_att_ind_num = temp1.merge(temp2, on=['industry_number'], how = 'outer')

  mdl_cvm_est = pd.read_csv(os.path.join(model_output_dir, "cvm_establishments.csv"))


## Route Generation

In [11]:
# read the cvm establishment output file
mdl_cvm_est = pd.read_csv(os.path.join(model_output_dir, "cvm_establishments.csv"))

mdl_cvm_est["industry_group_num"] = mdl_cvm_est['industry_name'].map(rte_gen_industry2group)
mdl_tot_rte_indgrp = mdl_cvm_est.groupby(['industry_group_num', 'has_generation'])['n_routes'].count().reset_index().rename(
    columns={'n_routes': 'total_routes'})

mdl_tot_rte_indnum = mdl_cvm_est.groupby(['industry_number'])['n_routes'].sum().reset_index().rename(
    columns={'n_routes': 'total_routes'})

  mdl_cvm_est = pd.read_csv(os.path.join(model_output_dir, "cvm_establishments.csv"))


In [12]:
# read the cvm routes file
mdl_cvm_routes = pd.read_csv(_join(model_output_dir, "cvm_routes.csv"))

# filter the TNC routes
mdl_cvm_tnc = mdl_cvm_routes.loc[mdl_cvm_routes['is_tnc'] == True]

# TNC route generation by business type
mdl_cvm_tnc_rte_gen = mdl_cvm_tnc.groupby(['business_type'])['route_id'].count().reset_index()

# Extract the non-TNC routes
mdl_cvm_routes_non_tnc = mdl_cvm_routes.loc[mdl_cvm_routes['is_tnc'] == False]

In [13]:
mdl_cvm_routes_non_tnc['group'] = 0

# Assign the group based on the route purpose, customer type and vehicle type
mdl_cvm_routes_non_tnc.loc[(mdl_cvm_routes_non_tnc['route_purpose'] == 'goods') & 
                 (mdl_cvm_routes_non_tnc['customer_type'] == 'mixed') & 
                 (mdl_cvm_routes_non_tnc['vehicle_type'] == 'LCV'), 'group'] = 'Goods_Mixed_LCV'

mdl_cvm_routes_non_tnc.loc[(mdl_cvm_routes_non_tnc['route_purpose'] == 'goods') & 
                 (mdl_cvm_routes_non_tnc['customer_type'] == 'mixed') & 
                 (mdl_cvm_routes_non_tnc['vehicle_type'] == 'MUT'), 'group'] = 'Goods_Mixed_MUT'

mdl_cvm_routes_non_tnc.loc[(mdl_cvm_routes_non_tnc['route_purpose'] == 'goods') & 
                 (mdl_cvm_routes_non_tnc['customer_type'] == 'mixed') & 
                 (mdl_cvm_routes_non_tnc['vehicle_type'] == 'SUT'), 'group'] = 'Goods_Mixed_SUT'

mdl_cvm_routes_non_tnc.loc[(mdl_cvm_routes_non_tnc['route_purpose'] == 'goods') & 
                 (mdl_cvm_routes_non_tnc['customer_type'] == 'nonresidential') & 
                 (mdl_cvm_routes_non_tnc['vehicle_type'] == 'LCV'), 'group'] = 'Goods_NonRes_LCV'

mdl_cvm_routes_non_tnc.loc[(mdl_cvm_routes_non_tnc['route_purpose'] == 'goods') & 
                 (mdl_cvm_routes_non_tnc['customer_type'] == 'nonresidential') & 
                 (mdl_cvm_routes_non_tnc['vehicle_type'] == 'MUT'), 'group'] = 'Goods_NonRes_MUT'

mdl_cvm_routes_non_tnc.loc[(mdl_cvm_routes_non_tnc['route_purpose'] == 'goods') & 
                 (mdl_cvm_routes_non_tnc['customer_type'] == 'nonresidential') & 
                 (mdl_cvm_routes_non_tnc['vehicle_type'] == 'SUT'), 'group'] = 'Goods_NonRes_SUT'

mdl_cvm_routes_non_tnc.loc[(mdl_cvm_routes_non_tnc['route_purpose'] == 'goods') & 
                 (mdl_cvm_routes_non_tnc['customer_type'] == 'residential') & 
                 (mdl_cvm_routes_non_tnc['vehicle_type'] == 'LCV'), 'group'] = 'Goods_Res_LCV'

mdl_cvm_routes_non_tnc.loc[(mdl_cvm_routes_non_tnc['route_purpose'] == 'goods') & 
                 (mdl_cvm_routes_non_tnc['customer_type'] == 'residential') & 
                 (mdl_cvm_routes_non_tnc['vehicle_type'] == 'MUT'), 'group'] = 'Goods_Res_MUT'

mdl_cvm_routes_non_tnc.loc[(mdl_cvm_routes_non_tnc['route_purpose'] == 'goods') & 
                 (mdl_cvm_routes_non_tnc['customer_type'] == 'residential') & 
                 (mdl_cvm_routes_non_tnc['vehicle_type'] == 'SUT'), 'group'] = 'Goods_Res_SUT'

mdl_cvm_routes_non_tnc.loc[(mdl_cvm_routes_non_tnc['route_purpose'] == 'maintenance') & 
                 (mdl_cvm_routes_non_tnc['vehicle_type'] == 'LCV'), 'group'] = 'Maintenance_LCV'

mdl_cvm_routes_non_tnc.loc[(mdl_cvm_routes_non_tnc['route_purpose'] == 'maintenance') & 
                 (mdl_cvm_routes_non_tnc['vehicle_type'] == 'MUT'), 'group'] = 'Maintenance_MUT'

mdl_cvm_routes_non_tnc.loc[(mdl_cvm_routes_non_tnc['route_purpose'] == 'maintenance') & 
                 (mdl_cvm_routes_non_tnc['vehicle_type'] == 'SUT'), 'group'] = 'Maintenance_SUT'

mdl_cvm_routes_non_tnc.loc[(mdl_cvm_routes_non_tnc['route_purpose'] == 'service') & 
                 (mdl_cvm_routes_non_tnc['customer_type'] == 'mixed') & 
                 (mdl_cvm_routes_non_tnc['vehicle_type'] == 'LCV'), 'group'] = 'Service_Mixed_LCV'

mdl_cvm_routes_non_tnc.loc[(mdl_cvm_routes_non_tnc['route_purpose'] == 'service') & 
                 (mdl_cvm_routes_non_tnc['customer_type'] == 'mixed') & 
                 (mdl_cvm_routes_non_tnc['vehicle_type'] == 'MUT'), 'group'] = 'Service_Mixed_MUT'

mdl_cvm_routes_non_tnc.loc[(mdl_cvm_routes_non_tnc['route_purpose'] == 'service') & 
                 (mdl_cvm_routes_non_tnc['customer_type'] == 'mixed') & 
                 (mdl_cvm_routes_non_tnc['vehicle_type'] == 'SUT'), 'group'] = 'Service_Mixed_SUT'

mdl_cvm_routes_non_tnc.loc[(mdl_cvm_routes_non_tnc['route_purpose'] == 'service') & 
                 (mdl_cvm_routes_non_tnc['customer_type'] == 'nonresidential') & 
                 (mdl_cvm_routes_non_tnc['vehicle_type'] == 'LCV'), 'group'] = 'Service_NonRes_LCV'

mdl_cvm_routes_non_tnc.loc[(mdl_cvm_routes_non_tnc['route_purpose'] == 'service') & 
                 (mdl_cvm_routes_non_tnc['customer_type'] == 'nonresidential') & 
                 (mdl_cvm_routes_non_tnc['vehicle_type'] == 'MUT'), 'group'] = 'Service_NonRes_MUT'

mdl_cvm_routes_non_tnc.loc[(mdl_cvm_routes_non_tnc['route_purpose'] == 'service') & 
                 (mdl_cvm_routes_non_tnc['customer_type'] == 'nonresidential') & 
                 (mdl_cvm_routes_non_tnc['vehicle_type'] == 'SUT'), 'group'] = 'Service_NonRes_SUT'

mdl_cvm_routes_non_tnc.loc[(mdl_cvm_routes_non_tnc['route_purpose'] == 'service') & 
                 (mdl_cvm_routes_non_tnc['customer_type'] == 'residential') & 
                 (mdl_cvm_routes_non_tnc['vehicle_type'] == 'LCV'), 'group'] = 'Service_Res_LCV'

mdl_cvm_routes_non_tnc.loc[(mdl_cvm_routes_non_tnc['route_purpose'] == 'service') & 
                 (mdl_cvm_routes_non_tnc['customer_type'] == 'residential') & 
                 (mdl_cvm_routes_non_tnc['vehicle_type'] == 'MUT'), 'group'] = 'Service_Res_MUT'

mdl_cvm_routes_non_tnc.loc[(mdl_cvm_routes_non_tnc['route_purpose'] == 'service') & 
                 (mdl_cvm_routes_non_tnc['customer_type'] == 'residential') & 
                 (mdl_cvm_routes_non_tnc['vehicle_type'] == 'SUT'), 'group'] = 'Service_Res_SUT'

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  mdl_cvm_routes_non_tnc['group'] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  mdl_cvm_routes_non_tnc.loc[(mdl_cvm_routes_non_tnc['route_purpose'] == 'goods') &


In [14]:
# Map the group to the vpc_alt
mdl_cvm_routes_non_tnc['vpc_alt'] = mdl_cvm_routes_non_tnc['group'].map(vpc_alt)

# route generation by industry group
route_purp_veh_smry = mdl_cvm_routes_non_tnc.groupby(['vpc_alt', 'group'])['route_id'].count().reset_index()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  mdl_cvm_routes_non_tnc['vpc_alt'] = mdl_cvm_routes_non_tnc['group'].map(vpc_alt)


## Route Start Time

In [15]:
mdl_cvm_routes = pd.read_csv(_join(model_output_dir, "cvm_routes.csv")) #includes both tnc and non-tnc routes
mdl_rte_start_time = mdl_cvm_routes['start_time'].value_counts().reset_index().sort_values(by='start_time')

mdl_cvm_routes_non_tnc['start_time_period'] = 0
mdl_cvm_routes_non_tnc['start_time_period'] = mdl_cvm_routes_non_tnc['start_time'].map(tod_crwk)
mdl_rte_cvm_start_time_period = mdl_cvm_routes_non_tnc['start_time_period'].value_counts().reset_index()

In [None]:
mdl_cvm_routes_tnc['start_time_period'] = 0
mdl_cvm_routes_tnc['start_time_period'] = mdl_cvm_routes_tnc['start_time'].map(tod_crwk)
mdl_tnc_route_start_time_period = mdl_cvm_routes_tnc['start_time_period'].value_counts().reset_index()

## Route Origination Terminal Type


In [16]:
# get AM distances from skims for each mode for origin
skims_dist = omx.open_file(_join(model_data_dir, "traffic_skims_AM.omx"))
lcv_am_dist = array2df(np.array(skims_dist['TRK_L_DIST__AM']), cols=['orig', 'dest', 'am_dist'])
sut_am_dist = array2df(np.array(skims_dist['TRK_M_DIST__AM']), cols=['orig', 'dest', 'am_dist'])
mut_am_dist = array2df(np.array(skims_dist['TRK_H_DIST__AM']), cols=['orig', 'dest', 'am_dist'])
skims_dist.close()

# get MD distances from skims for each mode for terminal
skims_dist = omx.open_file(_join(model_data_dir, "traffic_skims_MD.omx"))
lcv_md_dist = array2df(np.array(skims_dist['TRK_L_DIST__MD']), cols=['orig', 'dest', 'md_dist'])
sut_md_dist = array2df(np.array(skims_dist['TRK_M_DIST__MD']), cols=['orig', 'dest', 'md_dist'])
mut_md_dist = array2df(np.array(skims_dist['TRK_H_DIST__MD']), cols=['orig', 'dest', 'md_dist'])
skims_dist.close()

In [17]:
# output files from model
mdl_cvm_routes = pd.read_csv(_join(model_output_dir, "cvm_routes.csv"))
mdl_establishments = pd.read_csv(_join(model_output_dir, "cvm_establishments.csv"))

mdl_establishments['estab_taz'] = mdl_establishments['zone_id'].map(mgra2taz)
mdl_cvm_routes['orig_taz'] = mdl_cvm_routes['origination_zone'].map(mgra2taz)
mdl_cvm_routes['dest_taz'] = mdl_cvm_routes['terminal_zone'].map(mgra2taz)

  mdl_establishments = pd.read_csv(_join(model_output_dir, "cvm_establishments.csv"))


In [18]:
mdl_cvm_routes = pd.merge(mdl_cvm_routes, 
                          mdl_establishments[['establishment_id', 'estab_taz']], 
                          on='establishment_id', 
                          how='left'
                          )

# merge the skim distance with routes for each mode - from establishment taz to origin taz
mdl_lcv_routes = mdl_cvm_routes[mdl_cvm_routes['vehicle_type'] == 'LCV'].copy()
mdl_lcv_routes = pd.merge(mdl_lcv_routes, lcv_am_dist, left_on=['estab_taz', 'orig_taz'], right_on=['orig', 'dest'], how='left')

mdl_sut_routes = mdl_cvm_routes[mdl_cvm_routes['vehicle_type'] == 'SUT'].copy()
mdl_sut_routes = pd.merge(mdl_sut_routes, sut_am_dist, left_on=['estab_taz', 'orig_taz'], right_on=['orig', 'dest'], how='left')

mdl_mut_routes = mdl_cvm_routes[mdl_cvm_routes['vehicle_type'] == 'MUT'].copy()
mdl_mut_routes = pd.merge(mdl_mut_routes, mut_am_dist, left_on=['estab_taz', 'orig_taz'], right_on=['orig', 'dest'], how='left')

mdl_cvm_routes = pd.concat([mdl_lcv_routes, mdl_sut_routes, mdl_mut_routes], axis=0)
mdl_cvm_routes = mdl_cvm_routes.rename(columns={'am_dist': 'dist_estab2orig'}) #distance from establishment to origin
mdl_cvm_routes = mdl_cvm_routes.reset_index(drop=True)

In [19]:
# merge skim distance with routes for each mode - from establishment taz to destination taz
print(len(mdl_cvm_routes))
mdl_lcv_routes = mdl_cvm_routes[mdl_cvm_routes['vehicle_type'] == 'LCV'].copy()
mdl_lcv_routes = pd.merge(mdl_lcv_routes, lcv_md_dist, left_on=['estab_taz', 'dest_taz'], right_on=['orig', 'dest'], how='left')

mdl_sut_routes = mdl_cvm_routes[mdl_cvm_routes['vehicle_type'] == 'SUT'].copy()
mdl_sut_routes = pd.merge(mdl_sut_routes, sut_md_dist, left_on=['estab_taz', 'dest_taz'], right_on=['orig', 'dest'], how='left')

mdl_mut_routes = mdl_cvm_routes[mdl_cvm_routes['vehicle_type'] == 'MUT'].copy()
mdl_mut_routes = pd.merge(mdl_mut_routes, mut_md_dist, left_on=['estab_taz', 'dest_taz'], right_on=['orig', 'dest'], how='left')

mdl_cvm_routes = pd.concat([mdl_lcv_routes, mdl_sut_routes, mdl_mut_routes], axis=0)
mdl_cvm_routes = mdl_cvm_routes.rename(columns={'md_dist': 'dist_estab2dest'}) #distance from establishment to destination
mdl_cvm_routes = mdl_cvm_routes.reset_index(drop=True)
print(len(mdl_cvm_routes))

227350
227350


In [20]:
# Summaries
mdl_cvm_routes_nontnc = mdl_cvm_routes.loc[mdl_cvm_routes['is_tnc'] == False] # excluding TNC routes
mdl_indg_orig_type = mdl_cvm_routes_nontnc.groupby(['origination_stop_type', 'business_type'])['route_id'].count().reset_index()
mdl_veh_orig_type = mdl_cvm_routes_nontnc.groupby(['origination_stop_type', 'vehicle_type'])['route_id'].count().reset_index()
mdl_veh_orig_type_mean_dist = mdl_cvm_routes_nontnc.groupby(['origination_stop_type'])['dist_estab2orig'].mean().reset_index()

mdl_indg_dest_type = mdl_cvm_routes_nontnc.groupby(['terminal_stop_type', 'business_type'])['route_id'].count().reset_index()
mdl_veh_dest_type = mdl_cvm_routes_nontnc.groupby(['terminal_stop_type', 'vehicle_type'])['route_id'].count().reset_index()
mdl_veh_dest_type_mean_dist = mdl_cvm_routes_nontnc.groupby(['terminal_stop_type'])['dist_estab2dest'].mean().reset_index()

## Next Stop location

In [21]:
# create empty dataframes for the summary tables
mdl_purposes = ['base', 'goods_delivery', 'goods_pickup', 'home', 'maintenance', 'originate', 'service', 'terminate']
mdl_purp_smry_temp = pd.DataFrame(list(product(mdl_purposes, repeat=2)), columns=['trip_origin_purpose', 'trip_destination_purpose'])

tod = ['AM', 'MD', 'PM', 'EV', 'EA']
mdl_tod_smry_temp = pd.DataFrame(list(product(mdl_purposes, tod)), columns=['trip_destination_purpose', 'trip_tod'])
mdl_dest_smry_temp = pd.DataFrame(list(mdl_purposes), columns=['trip_destination_purpose'])

vtype = ['LCV' , 'SUT', 'MUT']
mdl_vtype_smry_temp = pd.DataFrame(list(vtype), columns=['vehicle_type'])

mdl_dtype = ['commercial', 'base', 'residential', 'warehouse', 'intermodal']
mdl_dtype_smry_temp = pd.DataFrame(list(mdl_dtype), columns=['trip_destination_type'])

In [22]:
# get distances from skims for each mode
time_period = ['AM', 'MD', 'PM', 'EV', 'EA']

for tp in time_period:
    skims_dist = omx.open_file(_join(model_data_dir, "traffic_skims_" + tp + ".omx"))
    globals()['lcv_' + tp.lower() + '_dist'] = array2df(np.array(skims_dist['TRK_L_DIST__' + tp]), cols=['orig', 'dest', '_dist'])
    globals()['sut_' + tp.lower() + '_dist'] = array2df(np.array(skims_dist['TRK_M_DIST__' + tp]), cols=['orig', 'dest', '_dist'])
    globals()['mut_' + tp.lower() + '_dist'] = array2df(np.array(skims_dist['TRK_H_DIST__' + tp]), cols=['orig', 'dest', '_dist'])
    globals()['tnc_' + tp.lower() + '_dist'] = array2df(np.array(skims_dist['TRK_L_DIST__' + tp]), cols=['orig', 'dest', '_dist'])
    skims_dist.close()

In [23]:
# read trips and routes from model output
mdl_cvm_trips = pd.read_csv(_join(model_output_dir, "cvm_cv_trips.csv"))
mdl_cvm_routes = pd.read_csv(_join(model_output_dir, "cvm_routes.csv"))

mdl_cvm_trips['orig_taz'] = mdl_cvm_trips['trip_origin'].map(mgra2taz)
mdl_cvm_trips['dest_taz'] = mdl_cvm_trips['trip_destination'].map(mgra2taz)

# add TOD to model data
mdl_cvm_trips['trip_tod'] = mdl_cvm_trips['trip_start_time'].map(tod_crwk)

# merge with cvm routes
mdl_cvm_trips = pd.merge(mdl_cvm_trips, mdl_cvm_routes, on='route_id', how='left')

# filter CVM and TNC trips
mdl_cvm_trips_nontnc = mdl_cvm_trips[mdl_cvm_trips['is_tnc'] == False]
mdl_tnc_trips = mdl_cvm_trips[mdl_cvm_trips['is_tnc'] == True]

In [24]:
# add distance to trips
print(f"Before Merge {len(mdl_cvm_trips_nontnc)}")

time_period = ['AM', 'MD', 'PM', 'EV', 'EA']
mdl_all_cvm_trips = []
vehicles = ['lcv', 'sut', 'mut']
for tp in time_period:
    for vehicle in vehicles:
        # print(tp, vehicle)
        mdl_temp = mdl_cvm_trips_nontnc.loc[(mdl_cvm_trips['trip_tod'] == tp) & (mdl_cvm_trips['vehicle_type'] == vehicle.upper())]
        mdl_trips = mdl_temp.merge(globals()[vehicle + '_' + tp.lower() + '_dist'], left_on=['orig_taz', 'dest_taz'], right_on=['orig', 'dest'], how='left')
        mdl_trips = mdl_trips.drop(['orig', 'dest'], axis=1)
        mdl_all_cvm_trips.append(mdl_trips)

mdl_cvm_trips_test = pd.concat(mdl_all_cvm_trips)
print(f"After Merge {len(mdl_cvm_trips_test)}")

mdl_cvm_trips_nontnc = mdl_cvm_trips_test.copy()

Before Merge 1552574
After Merge 1552574


In [25]:
# summaries
mdl_od_act_seg_name = mdl_cvm_trips_nontnc.groupby(['trip_origin_purpose', 'trip_destination_purpose'])['route_id'].count().reset_index()
mdl_od_act_seg_name = mdl_purp_smry_temp.merge(mdl_od_act_seg_name, on=['trip_origin_purpose', 'trip_destination_purpose'], how='left')
mdl_od_act_seg_name = mdl_od_act_seg_name.fillna(0)
mdl_od_act_seg_name = mdl_od_act_seg_name.pivot(index='trip_origin_purpose', columns='trip_destination_purpose', values='route_id').reset_index()

mdl_cvm_tod_smry = mdl_cvm_trips_nontnc.groupby(['trip_tod'])['route_id'].count().reset_index()
mdl_cvm_dest_type = mdl_cvm_trips_nontnc.groupby(['trip_destination_type'])['route_id'].count().reset_index()
mdl_cvm_mean_dist_vtype = mdl_cvm_trips_nontnc.groupby(['vehicle_type'])['_dist'].mean().reset_index()

In [26]:
# Empty dataframes for the summary tables - TNC
mdl_purposes = ['base', 'goods_delivery', 'goods_pickup', 'home', 'maintenance', 'originate', 'service', 'terminate']
mdl_purp_smry_temp = pd.DataFrame(list(product(mdl_purposes, repeat=2)), columns=['trip_origin_purpose', 'trip_destination_purpose'])

tod = ['AM', 'MD', 'PM', 'EV', 'EA']
mdl_tod_smry_temp = pd.DataFrame(list(product(mdl_purposes, tod)), columns=['trip_destination_purpose', 'trip_tod'])
mdl_dest_smry_temp = pd.DataFrame(list(mdl_purposes), columns=['trip_destination_purpose'])

vtype = ['LCV' , 'SUT', 'MUT']
mdl_vtype_smry_temp = pd.DataFrame(list(vtype), columns=['vehicle_type'])

mdl_dtype = ['commercial', 'base', 'residential', 'warehouse', 'intermodal']
mdl_dtype_smry_temp = pd.DataFrame(list(mdl_dtype), columns=['trip_destination_type'])

In [27]:
# add distance 
print(f"Before Merge {len(mdl_tnc_trips)}")
time_period = ['AM', 'MD', 'PM', 'EV', 'EA']
mdl_all_tnc_trips = []
vehicles = 'tnc'
for tp in time_period:
        mdl_temp = mdl_tnc_trips.loc[(mdl_tnc_trips['trip_tod'] == tp)]
        mdl_trips = mdl_temp.merge(globals()[vehicles + '_' + tp.lower() + '_dist'], left_on=['orig_taz', 'dest_taz'], right_on=['orig', 'dest'], how='left')
        mdl_trips = mdl_trips.drop(['orig', 'dest'], axis=1)
        mdl_all_tnc_trips.append(mdl_trips)

mdl_tnc_trips_test = pd.concat(mdl_all_tnc_trips)
print(f"After Merge {len(mdl_tnc_trips_test)}")

mdl_tnc_trips = mdl_tnc_trips_test.copy()

Before Merge 59784
After Merge 59784


In [28]:
# summaries for TNC
mdl_tnc_od_act_seg_name = mdl_tnc_trips.groupby(['trip_origin_purpose', 'trip_destination_purpose'])['route_id'].count().reset_index()
mdl_tnc_od_act_seg_name = mdl_purp_smry_temp.merge(mdl_tnc_od_act_seg_name, on=['trip_origin_purpose', 'trip_destination_purpose'], how='left')
mdl_tnc_od_act_seg_name = mdl_tnc_od_act_seg_name.fillna(0)
mdl_tnc_od_act_seg_name = mdl_tnc_od_act_seg_name.pivot(index='trip_origin_purpose', columns='trip_destination_purpose', values='route_id').reset_index()

mdl_tnc_tod_smry = mdl_tnc_trips.groupby(['trip_tod'])['route_id'].count().reset_index()
mdl_dest_smry_temp2 = pd.DataFrame(list(mdl_purposes), columns=['trip_destination_type'])

mdl_tnc_trp_dest_type = mdl_tnc_trips.groupby(['trip_destination_type'])['route_id'].count().reset_index()
mdl_tnc_trp_dest_type = mdl_dtype_smry_temp.merge(mdl_tnc_trp_dest_type, on='trip_destination_type' , how = 'left')

mdl_tnc_mean_dist_vtype = mdl_tnc_trips.groupby(['vehicle_type'])['_dist'].mean().reset_index()
mdl_tnc_mean_dist_vtype = mdl_vtype_smry_temp.merge(mdl_tnc_mean_dist_vtype, on='vehicle_type' , how = 'left')

In [29]:
# TNC Trips TOD
cvm_trips = pd.read_csv(_join(model_output_dir, "cvm_cv_trips.csv"))
cvm_routes = pd.read_csv(_join(model_output_dir, "cvm_routes.csv"))

cvm_trips = pd.merge(cvm_trips, cvm_routes[['route_id', 'is_tnc']], on='route_id', how='left')
cvm_trips_tnc = cvm_trips.loc[cvm_trips['is_tnc'] == True]

cvm_trips_tnc['trip_start_time_period'] = 0
cvm_trips_tnc['trip_start_time_period'] = cvm_trips_tnc['trip_start_time'].map(tod_crwk)

mdl_cvm_trips_tnc_tod_smry = cvm_trips_tnc.groupby(['trip_start_time_period'])['route_id'].count().reset_index()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cvm_trips_tnc['trip_start_time_period'] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cvm_trips_tnc['trip_start_time_period'] = cvm_trips_tnc['trip_start_time'].map(tod_crwk)


In [30]:
# Create an Excel writer object
writer = pd.ExcelWriter('Final_CVM_Model_Summary.xlsx')

# Household Attractor Model
mdl_hh_group.to_excel(writer, sheet_name='ModelData', startrow=2 , startcol=0, index=False)
mdl_hh_Income.to_excel(writer, sheet_name='ModelData', startrow=2 , startcol=5, index=False)
mdl_hh_Age_1.to_excel(writer, sheet_name='ModelData', startrow=2 , startcol=10, index=False)
mdl_hh_Age_2.to_excel(writer, sheet_name='ModelData', startrow=2 , startcol=15, index=False)
mdl_hh_Age_3.to_excel(writer, sheet_name='ModelData', startrow=2 , startcol=20, index=False)

# Establishment Attractor Model
mdl_est_att_ind_grp.to_excel(writer, sheet_name='ModelData', startrow=2 , startcol=25, index=False)
mdl_est_att_ind_num.to_excel(writer, sheet_name='ModelData', startrow=2 , startcol=30, index=False)

mdl_tot_rte_indgrp.to_excel(writer, sheet_name='ModelData', startrow=2 , startcol=35, index=False)
mdl_tot_rte_indnum.to_excel(writer, sheet_name='ModelData', startrow=2 , startcol=40, index=False)

# Route purpose and vehicle summary
route_purp_veh_smry.to_excel(writer, sheet_name='ModelData', startrow=2 , startcol=45, index=False)

# Route Start Time
mdl_rte_start_time.to_excel(writer, sheet_name='ModelData', startrow=2 , startcol=50, index=False)
mdl_rte_cvm_start_time_period.to_excel(writer, sheet_name='ModelData', startrow=2 , startcol=55, index=False)

# TNC Route Generation
mdl_cvm_tnc_rte_gen.to_excel(writer, sheet_name='ModelData', startrow=2 , startcol=60, index=False)

# Origination and Terminal type
mdl_indg_orig_type.to_excel(writer, sheet_name='ModelData', startrow=2 , startcol=65, index=False)
mdl_veh_orig_type.to_excel(writer, sheet_name='ModelData', startrow=2 , startcol=70, index=False)
mdl_veh_orig_type_mean_dist.to_excel(writer, sheet_name='ModelData', startrow=2 , startcol=75, index=False)
mdl_indg_dest_type.to_excel(writer, sheet_name='ModelData', startrow=2 , startcol=81, index=False)
mdl_veh_dest_type.to_excel(writer, sheet_name='ModelData', startrow=2 , startcol=86, index=False)
mdl_veh_dest_type_mean_dist.to_excel(writer, sheet_name='ModelData', startrow=2 , startcol=91, index=False)

# Next Stop Location
mdl_od_act_seg_name.to_excel(writer, sheet_name='ModelData', startrow=2 , startcol=96, index=False)
mdl_cvm_tod_smry.to_excel(writer, sheet_name='ModelData', startrow=2 , startcol=107, index=False)
mdl_cvm_dest_type.to_excel(writer, sheet_name='ModelData', startrow=2 , startcol=111, index=False)
mdl_cvm_mean_dist_vtype.to_excel(writer, sheet_name='ModelData', startrow=2 , startcol=115, index=False)

# Next Stop Location TNC
mdl_tnc_od_act_seg_name.to_excel(writer, sheet_name='ModelData', startrow=2 , startcol=120, index=False)
mdl_tnc_tod_smry.to_excel(writer, sheet_name='ModelData', startrow=2 , startcol=131, index=False)
mdl_tnc_trp_dest_type.to_excel(writer, sheet_name='ModelData', startrow=2 , startcol=135, index=False)
mdl_tnc_mean_dist_vtype.to_excel(writer, sheet_name='ModelData', startrow=2 , startcol=139, index=False)

# TNC Trips TOD
mdl_tnc_route_start_time_period.to_excel(writer, sheet_name='ModelData', startrow=2 , startcol=144, index=False)

# Close the Excel writer
writer.close()

In [31]:
print("Complete!")

Complete!
