### SANDAG Commercial Vehicle Model
#### Task 3 -- Data Exploration for Design Decisions

#### Establishment TNC Use

In [None]:
import numpy as np
import pandas as pd
import os
# from matplotlib import pyplot as plt
# from matplotlib import ticker

In [None]:
pd.set_option("display.max_rows", None)

In [None]:
# Import file paths from script
from FilePaths import *
print("root_dir = \n", root_dir, "\n")
print("raw_data_dir = \n", raw_data_dir,"\n")
print("proc_data_dir = \n", proc_data_dir,"\n")
print("lookup_dir = \n", lookup_dir,"\n")

#### Read in establishment file and create expansion weights

In [None]:
# Read input data -- establishment file
df_estab = pd.read_excel(os.path.join(root_dir,
                                      raw_data_dir,
                                      r"CVS\SANDAG 2022 CV DataBase & Dictionaires_03_03_2023.xlsx"),
                                     sheet_name=r"Establishment Data")

df_estab.set_index('company_id')
df_estab.head()

In [None]:
df_estab.loc[:, 'emp_total'] = df_estab['employees_fulltime_count'] + df_estab['employees_parttime_count']

#### Read in file from CVS survey weighting report -- Most likely number of establishments by Industry

In [None]:
df_expand = pd.read_excel(os.path.join(root_dir,
                                      raw_data_dir,
                                      r"CVS\CVS_EstabExpansion.xlsx"),
                                     sheet_name=r"CVS_Expansion_Totals")
df_expand.info()
df_expand.SectorID.fillna(0, inplace=True)

In [None]:
df_expand

In [None]:
# Create dictionary for establishment expansion targets
expandEstab = dict()
expandEstab['Emp_0_9'] = dict(zip(df_expand['SectorID'],df_expand['Emp_0_9']))
expandEstab['Emp_10p'] = dict(zip(df_expand['SectorID'],df_expand['Emp_10p']))
expandEstab

In [None]:
# Add establishment population-level estimates (targets)
df_estab.loc[df_estab['emp_total']<=9, 'emp_lt10'] = 1
df_estab.loc[df_estab['emp_total']>9, 'emp_lt10'] = 0
df_estab['emp_lt10'] = df_estab['emp_lt10'].astype(int)

df_estab.loc[df_estab['emp_total']<=9, \
             'region_estab_wght'] = df_estab['base_location_Industry Group'].map(expandEstab['Emp_0_9'])

df_estab.loc[df_estab['emp_total']>9, \
             'region_estab_wght'] = df_estab['base_location_Industry Group'].map(expandEstab['Emp_10p'])

df_estab['region_estab_wght'] = df_estab['region_estab_wght'].astype('int64')
#df_estab[['emp_total','base_location_Industry Group','emp_lt10','region_estab_wght']].head()

In [None]:
# Calculate Establishment Weights
estab_weights = df_estab.groupby(['region_estab_wght']).size().reset_index(name='num_estabs')
estab_weights['estab_exp_weight'] = estab_weights['region_estab_wght'] / estab_weights['num_estabs']
estab_weights = dict(zip(estab_weights['region_estab_wght'],estab_weights['estab_exp_weight']))

In [None]:
df_estab.loc[:,'estab_exp_weight'] = df_estab['region_estab_wght'].map(estab_weights)
df_estab[['emp_total','base_location_Industry Group','emp_lt10','region_estab_wght','estab_exp_weight']].head(10)
df_estab = df_estab.drop(['region_estab_wght'], axis=1)

In [None]:
# Finalize Establishment Weights in Table
estab_wts = round(df_estab.groupby(['base_location_Industry Group','emp_lt10'])['estab_exp_weight'].mean().reset_index(),3)
print(estab_wts.to_string(index=False))

#### Find TNC Usage Rates by Establishment Industries

In [None]:
indus_lookup = {
    1: "Agriculture/Mining",
    2: "Manufacturing",
    3: "Industrial/Utilities",
    4: "Retail",
    5: "Wholesale",
    6: "Construction",
    7: "Transportation",
    8: "Info/Finance/Insurance/Real Estate/Professional services",
    9: "Education/Public/Other services",
    10: "Medical/Health Services",
    11: "Leisure/Accommodations and Food",
    96: "Other/Non-Classified"
}


In [None]:
# Create short names for industries
ind_abrv = {
    1: "AGM",
    6: "CON",
    9: "EPO",
    3: "IUT",
    8: "IFR",
    11: "LAF",
    2: "MFG",
    10: "MHS",
    4: "RET",
    7: "TRN",
    5: "WHL",
    96: "ONC"
}

In [None]:
delivTNC = df_estab[df_estab['is_use_tnc']==1].groupby(['base_location_Industry Group']).sum()['no_of_from_deliveries_pcsuvpu'].reset_index(name='delivTNC')
delivTNC

In [None]:
# TNC Usage Rates (raw)
useTNC = df_estab[df_estab['is_use_tnc']==1].groupby(['base_location_Industry Group']).size().reset_index(name='useTNC')
delivTNC = df_estab[df_estab['is_use_tnc']==1].groupby(['base_location_Industry Group']).sum()['no_of_from_deliveries_pcsuvpu'].reset_index(name='delivTNC').astype('int64')

allEstab = df_estab.groupby(['base_location_Industry Group']).size().reset_index(name='totEstab')
allEstab["IndustryGroup"] = allEstab['base_location_Industry Group'].map(indus_lookup)
TNC_Industry = allEstab.set_index('base_location_Industry Group').join(useTNC.set_index('base_location_Industry Group'))
TNC_Industry = TNC_Industry.join(delivTNC.set_index('base_location_Industry Group'))
TNC_Industry.loc[:,'IndusAbbrv'] = TNC_Industry.index.map(ind_abrv)

# Reorder columns
TNC_Industry = TNC_Industry[['IndustryGroup', 'useTNC', 'delivTNC', 'totEstab']]

# Calculate rates
TNC_Industry['pctUseTNC'] = TNC_Industry['useTNC'] / TNC_Industry['totEstab']
TNC_Industry['pctUseTNC'] = TNC_Industry['pctUseTNC'].map('{:.2%}'.format)
TNC_Industry['rateDelTNC'] = TNC_Industry['delivTNC'] / TNC_Industry['totEstab']
TNC_Industry['rateDelTNC'] = TNC_Industry['rateDelTNC'].map('{:5.3f}'.format)

print("Unweighted Establishment Observations")
print("\nNote: 'rateDelTNC' = delivery trips per day per establishment in industry\n")
print(TNC_Industry.to_string(index=False))

In [None]:
# TNC Usage Rates (weighted)
useTNC = df_estab[df_estab['is_use_tnc']==1].groupby(['base_location_Industry Group']).sum()['estab_exp_weight'].reset_index(name='useTNC')
df_estab.loc[df_estab['is_use_tnc']==1, 'wghtd_from_deliveries_pcsuvpu'] = df_estab['estab_exp_weight'] * df_estab['no_of_from_deliveries_pcsuvpu']
df_estab['wghtd_employment'] = df_estab['estab_exp_weight'] * df_estab['emp_total']
delivTNC = df_estab[df_estab['is_use_tnc']==1].groupby(['base_location_Industry Group']).sum()['wghtd_from_deliveries_pcsuvpu'].reset_index(name='delivTNC').astype('int64')

empIfUseTNC = df_estab[df_estab['is_use_tnc']==1].groupby(['base_location_Industry Group']).sum()['wghtd_employment'].reset_index(name='empIfUseTNC').astype('int64')
empAll = df_estab.groupby(['base_location_Industry Group']).sum()['wghtd_employment'].reset_index(name='empAll').astype('int64')
#empAll["IndustryGroup"] = empAll['base_location_Industry Group'].map(indus_lookup)

allEstab = df_estab.groupby(['base_location_Industry Group']).sum()['estab_exp_weight'].reset_index(name='totEstab')
allEstab["IndustryGroup"] = allEstab['base_location_Industry Group'].map(indus_lookup)
TNC_Industry = allEstab.set_index('base_location_Industry Group').join(useTNC.set_index('base_location_Industry Group'))
TNC_Industry = TNC_Industry.join(delivTNC.set_index('base_location_Industry Group'))
TNC_Industry = TNC_Industry.join(empIfUseTNC.set_index('base_location_Industry Group'))
TNC_Industry = TNC_Industry.join(empAll.set_index('base_location_Industry Group'))
TNC_Industry.loc[:,'IndusAbbrv'] = TNC_Industry.index.map(ind_abrv)

# Reorder columns
TNC_Industry = TNC_Industry[['IndusAbbrv', 'useTNC', 'delivTNC', 'empIfUseTNC', 'empAll', 'totEstab']]

TNC_Industry['pctUseTNC'] = TNC_Industry['useTNC'] / TNC_Industry['totEstab']
TNC_Totals = pd.DataFrame(TNC_Industry[['useTNC', 'delivTNC', 'empIfUseTNC', 'empAll', 'totEstab']].sum().map('{:.0f}'.format)).T
TNC_Industry['rateDelTNC'] = TNC_Industry['delivTNC'] / TNC_Industry['totEstab']
TNC_Industry['delivEmpIfTNC'] = TNC_Industry['delivTNC'] / TNC_Industry['empIfUseTNC']

NonRestRetl = pd.DataFrame(TNC_Industry[(TNC_Industry.index!=4) & (TNC_Industry.index!=11)]\
                             [['useTNC','delivTNC','empIfUseTNC','empAll','totEstab']].copy().sum()).T
NonRestRetl['pctUseTNC'] = NonRestRetl['useTNC'] / NonRestRetl['totEstab']
NonRestRetl['rateDelTNC'] = NonRestRetl['delivTNC'] / NonRestRetl['totEstab']
NonRestRetl['delivEmpIfTNC'] = NonRestRetl['delivTNC'] / NonRestRetl['empIfUseTNC']

TNC_Industry['useTNC'] = TNC_Industry['useTNC'].map('{:.0f}'.format)
TNC_Industry['delivTNC'] = TNC_Industry['delivTNC'].map('{:.0f}'.format)
TNC_Industry['totEstab'] = TNC_Industry['totEstab'].map('{:.0f}'.format)
TNC_Industry['empIfUseTNC'] = TNC_Industry['empIfUseTNC'].map('{:.0f}'.format)
TNC_Industry['empAll'] = TNC_Industry['empAll'].map('{:.0f}'.format)
TNC_Industry['pctUseTNC'] = TNC_Industry['pctUseTNC'].map('{:.5f}'.format)
TNC_Industry['rateDelTNC'] = TNC_Industry['rateDelTNC'].map('{:.5f}'.format)
TNC_Industry['delivEmpIfTNC'] = TNC_Industry['delivEmpIfTNC'].map('{:.5f}'.format)

NonRestRetl['useTNC'] = NonRestRetl['useTNC'].map('{:.0f}'.format)
NonRestRetl['delivTNC'] = NonRestRetl['delivTNC'].map('{:.0f}'.format)
NonRestRetl['totEstab'] = NonRestRetl['totEstab'].map('{:.0f}'.format)
NonRestRetl['empIfUseTNC'] = NonRestRetl['empIfUseTNC'].map('{:.0f}'.format)
NonRestRetl['empAll'] = NonRestRetl['empAll'].map('{:.0f}'.format)
NonRestRetl['pctUseTNC'] = NonRestRetl['pctUseTNC'].map('{:.5f}'.format)
NonRestRetl['rateDelTNC'] = NonRestRetl['rateDelTNC'].map('{:.5f}'.format)
NonRestRetl['delivEmpIfTNC'] = NonRestRetl['delivEmpIfTNC'].map('{:.5f}'.format)
NonRestRetl['type'] = 'NonRestRetl'

print("Weighted Establishment Observations")
print("\nNote: 'rateDelTNC' = delivery trips per day per establishment in industry\n")
print(TNC_Industry.to_string(index=False))
print("\nAll Establishment Totals:")
print(TNC_Totals.to_string(index=False))

In [None]:
empAll["IndustryGroup"] = empAll['base_location_Industry Group'].map(indus_lookup)
empAll

In [None]:
# Create table for Restaurant and Non-Restaurant Types
usecols = ['useTNC', 'delivTNC', 'empIfUseTNC', 'empAll', 'totEstab', 'pctUseTNC', 'rateDelTNC', 'delivEmpIfTNC']

Retail = TNC_Industry[TNC_Industry.index==4].reset_index()
Retail = Retail[usecols]
Retail['type'] = 'Retail'

Restaurant = TNC_Industry[TNC_Industry.index==11].reset_index()
Restaurant = Restaurant[usecols]
Restaurant['type'] = 'Restaurant'

In [None]:
# Create combined table of usage rates by three industry groups
usecols = ['type', 'useTNC', 'delivTNC', 'empIfUseTNC','empAll','totEstab', 'pctUseTNC', 'rateDelTNC', 'delivEmpIfTNC'] #re-order
TNC_Industry3 = pd.concat([NonRestRetl, Restaurant, Retail], axis=0, sort=False).reset_index().drop(['index'], axis=1)
TNC_Industry3 = TNC_Industry3[usecols].set_index('type')

print("TNC Usage Rates by Three Primary Groups\n")
print(TNC_Industry3.to_string(index=True))

In [None]:
.44129 * 84497 * 0.01715

#### Read in TNC establishment-level records (driver summaries)

In [None]:
# Read input data -- TNC establishment file
df_tnc_driver = pd.read_excel(os.path.join(root_dir, raw_data_dir,
                                      r"TNC\TNC Travel Survey_Data Submittal_1-19-23.xlsx"),
                                     sheet_name=r"Establishment Data")

df_tnc_driver.set_index('company_id')
df_tnc_driver.head(3).T

In [None]:
df_tnc_driver = df_tnc_driver[['company_id','company_name','number_of_trips']]
df_tnc_driver = df_tnc_driver.rename(columns = {'company_name': 'client_app'})
df_tnc_driver.head()

In [None]:
# Read input data -- lookup table for TNC client categories
df_lookup = pd.read_excel(os.path.join(root_dir, lookup_dir,
                                      r"Lookups_v8.xlsx"),
                                     sheet_name=r"TNC Categories")
df_lookup.head()

In [None]:
df_lookup["company_name2"] = df_lookup["company_name_alt"].str.strip().str.lower().str.replace(" ","")

In [None]:
TNC_IndCat = dict(zip(df_lookup['company_name2'],df_lookup['TNC_IndCat3']))
TNC_IndCat

In [None]:
df_tnc_driver['client_app2'] = df_tnc_driver['client_app'].str.strip().str.lower().str.replace(" ","")
df_tnc_driver.loc[:,'IndCat3'] = df_tnc_driver['client_app2'].map(TNC_IndCat)
df_tnc_driver.head()

In [None]:
# Read input data -- TNC trips
df_tnc_trips = pd.read_excel(os.path.join(root_dir, raw_data_dir,
                                      r"TNC\TNC Travel Survey_Data Submittal_1-19-23.xlsx"),
                                     sheet_name=r"Trips")

df_tnc_trips.set_index('company_id')
df_tnc_trips.head()

In [None]:
# Sum number of client pickup stops for each driver (total unique clients)
temp = df_tnc_trips[df_tnc_trips['activity_type']==6].groupby(['company_id',\
                                                                        'location_placename',\
                                                                        'location_address']).size().reset_index(name='clientPickups')
clientPickups = temp.groupby(['company_id']).count()
clientPickups = clientPickups['clientPickups'].reset_index()
clientPickups.head()

In [None]:
# Sum number of home dropoffs for each driver
homeDropoffs = df_tnc_trips[(df_tnc_trips['activity_type']==5) &\
                           (df_tnc_trips['placetype']==11)].groupby(['company_id']).size().reset_index(name='homeDropoffs')
homeDropoffs.head()

In [None]:
# Sum number of business dropoffs for each driver
bizDropoffs = df_tnc_trips[(df_tnc_trips['activity_type']==5) &\
                           (df_tnc_trips['placetype']!=11)].groupby(['company_id']).size().reset_index(name='businessDropoffs')
bizDropoffs.head()

In [None]:
df_tnc_driver = df_tnc_driver.merge(clientPickups, how='left', on='company_id')
df_tnc_driver.head()

In [None]:
df_tnc_driver = df_tnc_driver.merge(homeDropoffs, how='left', on='company_id')
df_tnc_driver.head()

In [None]:
df_tnc_driver = df_tnc_driver.merge(bizDropoffs, how='left', on='company_id')
df_tnc_driver.head()

In [None]:
# Calculate TNC Driver Pickups by Industry Category
tnc_by_client = df_tnc_driver.groupby('IndCat3').sum()['clientPickups'].reset_index(name='clientPickups')
tnc_by_client.set_index('IndCat3')

In [None]:
# Calculate TNC Home Dropoffs by Industry Category
tnc_by_homedel = df_tnc_driver.groupby('IndCat3').sum()['homeDropoffs'].reset_index(name='homeDropoffs')
tnc_by_homedel.set_index('IndCat3')

In [None]:
# Calculate TNC Business Dropoffs by Industry Category
tnc_by_bizdel = df_tnc_driver.groupby('IndCat3').sum()['businessDropoffs'].reset_index(name='businessDropoffs')
tnc_by_bizdel.set_index('IndCat3')

In [None]:
# Calculate TNC Weights
tnc_weights = tnc_by_client.merge(TNC_Industry3, left_on='IndCat3', right_on='type')

tnc_weights['tnc_expwght'] = tnc_weights['delivTNC'].str.replace(",","").astype(float) \
/ tnc_weights['clientPickups'].astype(float)

tnc_weights['clientPickups'] = tnc_weights['clientPickups'].map('{:,.0f}'.format)

tnc_weights['tnc_expwght'] = round(tnc_weights['tnc_expwght'],5)
print(tnc_weights[['IndCat3','clientPickups','delivTNC','tnc_expwght']].to_string(index=False))

In [None]:
# Create TNC expansion weights dictionary
tnc_expwght = dict(zip(tnc_weights['IndCat3'],tnc_weights['tnc_expwght']))
df_tnc_driver.loc[:, 'tnc_expwght'] = df_tnc_driver['IndCat3'].map(tnc_expwght)

In [None]:
tnc_expwght

In [None]:
# Calculate impact on total trips
df_tnc_driver['wghtd_trips'] = df_tnc_driver['number_of_trips'] * df_tnc_driver['tnc_expwght']

tnc_summary = pd.DataFrame(df_tnc_driver[['number_of_trips','wghtd_trips']].sum()).T
tnc_summary['number_of_trips'] = tnc_summary['number_of_trips'].map('{:,.0f}'.format)
tnc_summary['wghtd_trips'] = tnc_summary['wghtd_trips'].map('{:,.0f}'.format)
tnc_summary.rename(columns={'number_of_trips': 'raw_trips'}, inplace=True)

print("Impact of TNC weights expansion on Total TNC trips\n")
print(tnc_summary.to_string(index=False))

In [None]:
df_tnc_driver.to_csv(os.path.join(root_dir,
                                      raw_data_dir,
                                      r"TNC\TNC_weighted.csv"))

In [None]:
# Get weighted number of trips and average per driver-route
df_tnc_driver['wghtd_client_picks'] = df_tnc_driver['clientPickups'] * df_tnc_driver['tnc_expwght']
df_tnc_driver['wghtd_home_delivs'] = df_tnc_driver['homeDropoffs'] * df_tnc_driver['tnc_expwght']
df_tnc_driver['wghtd_biz_delivs'] = df_tnc_driver['businessDropoffs'] * df_tnc_driver['tnc_expwght']

In [None]:
# Client pickups by TNCs
clientPickups = df_tnc_driver.groupby(['IndCat3']).sum()['wghtd_client_picks']
clientPickups = pd.DataFrame(clientPickups.map('{:,.0f}'.format))
print(clientPickups.T.to_string(index=False))

In [None]:
# Calculate deliveries to households by TNCs -- food (retaurant+groceries), non-food parcels
homeDeliveries = df_tnc_driver.groupby(['IndCat3']).sum()['wghtd_home_delivs']
food = homeDeliveries['Restaurant']+homeDeliveries['Retail']
homeDeliveries = pd.DataFrame(homeDeliveries.map('{:,.0f}'.format))

print(homeDeliveries.T.to_string(index=False))
print("\nFood Deliveries = Restaurant + Retail")
print(f"\t{food:,.0f}")

In [None]:
# Calculate deliveries to businesses by TNCs
clientPickups = df_tnc_driver.groupby(['IndCat3']).sum()['wghtd_biz_delivs']
clientPickups = pd.DataFrame(clientPickups.map('{:,.0f}'.format))
print(clientPickups.T.to_string(index=False))

In [None]:
# Clients per Driver Route
clientPickups = df_tnc_driver.groupby(['IndCat3']).sum()['wghtd_client_picks'] \
/ df_tnc_driver.groupby(['IndCat3']).sum()['tnc_expwght']
clientPickups = pd.DataFrame(clientPickups.map('{:,.4f}'.format))
print("Clients per driver route:")
print(clientPickups.T.to_string(index=False))

# Driver routes
driverRoutes = df_tnc_driver.groupby(['IndCat3']).sum()['tnc_expwght']
driverRoutes['Total'] = driverRoutes.sum()
driverRoutes = pd.DataFrame(driverRoutes.map('{:.0f}'.format))
print("\nDriver routes (weighted)")
print(driverRoutes.T.to_string(index=False))

#### Get TNC Route Generation (routes = drivers)

In [None]:
# Driver Routes Per Client Establishment using TNCs
driversPerClient = df_tnc_driver.groupby(['IndCat3']).sum()['tnc_expwght'] \
/ df_tnc_driver.groupby(['IndCat3']).sum()['wghtd_client_picks']
driversPerClient = pd.DataFrame(driversPerClient.map('{:.5f}'.format))
driversPerClient.rename(columns={0:'drivers_per_client'}, inplace=True)

print("Driver Routes per Client Establishment using TNCs:")
print(driversPerClient.T.to_string(index=False))

In [None]:
# Merge with establishment data
tncRouteGen = pd.merge(driversPerClient.astype('float').reset_index(),
                       TNC_Industry3.astype('float').reset_index(),
                       how='left', left_on='IndCat3', right_on='type')

tncRouteGen = tncRouteGen[['IndCat3','pctUseTNC','rateDelTNC', 'delivEmpIfTNC','drivers_per_client']]

In [None]:
tncRouteGen

In [None]:
TNC_Industry3

In [None]:
27532 * 0.02088 * 0.61184

In [None]:
print("\nDriver routes (weighted)")
print(driverRoutes.T.to_string(index=False))

In [None]:
# Create employment based rates
routeGenRates = pd.merge(driverRoutes.astype('float').reset_index(),
                         TNC_Industry3[['empAll']].astype('float').reset_index(),
                         how='inner', left_on='IndCat3', right_on='type').drop(columns=['type'])

routeGenRates.rename(columns={'tnc_expwght': 'routes_wtd', 'empAll': 'employment_wtd'}, inplace=True)
routeGenRates.loc[4] = routeGenRates.sum()
routeGenRates.loc[routeGenRates.index[-1], 'IndCat3'] = "Totals"
routeGenRates['routes_per_emp'] = routeGenRates['routes_wtd'] / routeGenRates['employment_wtd']
routeGenRates

#### Calibrate Route Gen

In [None]:
# Read land use data
df_MGRA = pd.read_csv(os.path.join(root_dir,
                                      raw_data_dir,
                                      r"Land_Use\mgra15_based_input_2022_02_cvm.csv"))
df_MGRA.head()

In [None]:
# Create short names for industries
indus_abrv = {
    1: "AGM",
    2: "MFG",
    3: "IUT",
    4: "RET",
    5: "WHL",
    6: "CON",
    7: "TRN",
    8: "IFR",
    9: "EPO",
    10: "MHS",
    11: "LAF",
    12: "MIL"
}

# Reverse lookup
abrv_indus = dict(zip(list(indus_abrv.values()), list(indus_abrv.keys())))

# Cross walk between MGRA employment and model employment categories
emp_mgra_to_model = {
    1:["emp_ag_min"],
    2:["emp_mnf"],
    3:["emp_utl"],
    4:["emp_ret"],
    5:["emp_whl"],
    6:["emp_con"],
    7:["emp_trn_wrh"],
    8:["emp_fin_res_mgm","emp_bus_svcs"],
    9:["emp_educ","emp_gov","emp_oth","emp_ent","emp_accm"],
    10:["emp_hlth"],
    11:["emp_food"],
    12:["emp_mil"]
}

# Reverse lookup
emp_survey_to_mgra = dict()
for key, values in emp_mgra_to_model.items():
    for v in values:
        emp_survey_to_mgra[v] = key
#emp_survey_to_mgra

In [None]:
# Bucket round function
def bucketRound(arr, thr=0.5):
    if isinstance(arr, list) or isinstance(arr, np.ndarray):
        arr = np.asarray(arr).astype('float')
        out = np.zeros(len(arr)).astype('int64')
        bucket = float(0)
        for i in range(len(arr)):
            out[i] = int(arr[i])
            bucket += arr[i]%1
            if bucket > thr:
                out[i] += 1
                bucket = 0
        return out
    else:
        print(arr)
        print("Error: Function requires inputs as an array or list of values.")

In [None]:
# Select only relevant employment fields
use_columns = [col for col in df_MGRA.columns if 'emp_' in col]
#[use_columns.remove(x) for x in ['emp_non_ws_wfh','emp_non_ws_oth','emp_tot']]

# Create new dataframe and identify model sectors
df_MGRA_emp = pd.DataFrame(df_MGRA[use_columns].sum()).rename(columns={0:'mgra_jobs'})
df_MGRA_emp.loc[:, 'emp_group_no'] = df_MGRA_emp.index.map(emp_survey_to_mgra)
df_MGRA_emp.loc[:, 'emp_mod_sector'] = df_MGRA_emp.emp_group_no.map(indus_abrv)
df_MGRA_emp.fillna(0, inplace=True)

# Identify TNC3 sectors
df_MGRA_emp['IndCat3'] = 'NonRestRetl'
df_MGRA_emp.loc[df_MGRA_emp['emp_group_no'] == 11, 'IndCat3'] = 'Restaurant'
df_MGRA_emp.loc[df_MGRA_emp['emp_group_no'] == 4, 'IndCat3'] = 'Retail'
df_MGRA_emp.loc[df_MGRA_emp['emp_group_no'] == 0, 'IndCat3'] = 'None'
df_MGRA_emp

In [None]:
# Collect jobs summaries by model sectors
mgraEmpCat3 = pd.DataFrame(df_MGRA_emp[df_MGRA_emp['emp_group_no']>0].groupby(['IndCat3'])['mgra_jobs'].sum()).reset_index()
mgraEmpCat3.loc['Totals'] = mgraEmpCat3.sum()['mgra_jobs']
mgraEmpCat3

In [None]:
# Adjust Rates to match MGRA mix of employment, which is different from CVS x TNC survey
routeGenRates = routeGenRates.merge(mgraEmpCat3, how='inner', on='IndCat3')
routeGenRates.loc[:, 'routes_per_emp_adj1'] = routeGenRates['routes_per_emp'] * routeGenRates['employment_wtd'] / routeGenRates['mgra_jobs']
routeGenRates.loc[:, 'routes_calibrated'] = routeGenRates['routes_per_emp_adj1'] * routeGenRates['mgra_jobs']
routeGenRates

In [None]:
# Apply to MGRAs and adjust for rounding
test_retail = pd.DataFrame(df_MGRA.groupby(['LUZ'])['emp_ret','emp_non_ws_wfh','emp_non_ws_oth'].sum()).reset_index()
test_retail.loc[:, 'tnc_routes'] = routeGenRates.iloc[2]["routes_per_emp_adj1"] * test_retail['emp_ret']
test_retail.loc[:, 'tnc_routes'] = np.round(test_retail['tnc_routes'].values)

out_retail = test_retail['tnc_routes'].sum()
factor = routeGenRates.iloc[2]["routes_wtd"] / out_retail
routeGenRates["routes_per_emp_calib"] = 0
routeGenRates.iloc[2, routeGenRates.columns.get_loc("routes_per_emp_calib")] = routeGenRates.iloc[2]["routes_per_emp_adj1"] * factor
test_retail.loc[:, 'tnc_routes'] = routeGenRates.iloc[2]["routes_per_emp_calib"] * test_retail['emp_ret']
test_retail.loc[:, 'tnc_routes'] = np.round(test_retail['tnc_routes'].values)

routeGenRates.iloc[2, routeGenRates.columns.get_loc("routes_calibrated")] = test_retail['tnc_routes'].sum()
test_retail['tnc_routes'].sum()
# test_retail[test_retail['tnc_routes']>0]

In [None]:
df_MGRA.loc[:, 'emp_laf'] = df_MGRA['emp_food']# + df_MGRA['emp_ent'] + df_MGRA['emp_accm']
test_restaurant = pd.DataFrame(df_MGRA.groupby(['LUZ'])['emp_laf','emp_non_ws_wfh','emp_non_ws_oth'].sum()).reset_index()
test_restaurant.loc[:, 'tnc_routes'] = routeGenRates.iloc[1]["routes_per_emp_adj1"] * test_restaurant['emp_laf']
test_restaurant.loc[:, 'tnc_routes'] = np.round(test_restaurant['tnc_routes'].values)

out_restaurant = test_restaurant['tnc_routes'].sum()
factor = routeGenRates.iloc[1]["routes_wtd"] / out_restaurant
routeGenRates.iloc[1, routeGenRates.columns.get_loc("routes_per_emp_calib")] = routeGenRates.iloc[1]["routes_per_emp_adj1"] * factor
test_restaurant.loc[:, 'tnc_routes'] = routeGenRates.iloc[1]["routes_per_emp_calib"] * test_restaurant['emp_laf']
test_restaurant.loc[:, 'tnc_routes'] = np.round(test_restaurant['tnc_routes'].values)

routeGenRates.iloc[1, routeGenRates.columns.get_loc("routes_calibrated")] = test_restaurant['tnc_routes'].sum()
test_restaurant['tnc_routes'].sum()
# test_restaurant[test_restaurant['tnc_routes']>0]

In [None]:
df_MGRA.loc[:, 'emp_NonRR'] = df_MGRA['emp_tot'] - df_MGRA['emp_laf'] - df_MGRA['emp_ret'] - df_MGRA['emp_non_ws_wfh'] - df_MGRA['emp_non_ws_oth']
test_nonrr = pd.DataFrame(df_MGRA.groupby(['LUZ'])['emp_NonRR','emp_non_ws_wfh','emp_non_ws_oth'].sum()).reset_index()
test_nonrr.loc[:, 'tnc_routes'] = routeGenRates.iloc[0]["routes_per_emp_adj1"] * test_nonrr['emp_NonRR']
test_nonrr.loc[:, 'tnc_routes'] = np.round(test_nonrr['tnc_routes'].values)

out_nonrr = test_nonrr['tnc_routes'].sum()
factor = routeGenRates.iloc[0]["routes_wtd"] / out_nonrr
routeGenRates.iloc[0, routeGenRates.columns.get_loc("routes_per_emp_calib")] = routeGenRates.iloc[0]["routes_per_emp_adj1"] * factor
test_nonrr.loc[:, 'tnc_routes'] = routeGenRates.iloc[0]["routes_per_emp_calib"] * test_nonrr['emp_NonRR']
test_nonrr.loc[:, 'tnc_routes'] = np.round(test_nonrr['tnc_routes'].values)

routeGenRates.iloc[0, routeGenRates.columns.get_loc("routes_calibrated")] = test_nonrr['tnc_routes'].sum()
test_nonrr['tnc_routes'].sum()
# test_nonrr[test_nonrr['tnc_routes']>0]

In [None]:
# Send to CSV
model_path = "Task05_Estimation_Calibration/Estimation/Route_Gen"
routeGenRates['routes_per_emp_calib'] = np.round(routeGenRates['routes_per_emp_calib'], 7)
routeGenRates[['IndCat3','routes_wtd','routes_per_emp_calib']].to_csv(os.path.join(root_dir, model_path, "TNC_RouteGenRates.csv"), index=False)