### SANDAG Commercial Vehicle Model
#### Task 3 -- Data Exploration for Design Decisions

#### Tabulate weighted establishment deliveries to residences/households

In [1]:
import numpy as np
import pandas as pd
import os
# from matplotlib import pyplot as plt
# from matplotlib import ticker

In [2]:
pd.set_option("display.max_rows", None)

In [3]:
# Import file paths from script
from FilePaths import *
print("root_dir = \n", root_dir, "\n")
print("raw_data_dir = \n", raw_data_dir,"\n")
print("proc_data_dir = \n", proc_data_dir,"\n")
print("lookup_dir = \n", lookup_dir,"\n")

root_dir = 
 C:\Users\jgliebe\OneDrive - Cambridge Systematics\Documents - PROJ SANDAG Commercial Vehicle & Heavy Truck Model Update\_Shared_CSTeam 

raw_data_dir = 
 Task03_DataID_Review 

proc_data_dir = 
 Task04_DataProcessing\Data\proc data\v10 

lookup_dir = 
 Task04_DataProcessing\Data\Lookups 



In [4]:
# Read input data -- establishment file
df_estab = pd.read_excel(os.path.join(root_dir, 
                                      raw_data_dir, 
                                      r"CVS\SANDAG 2022 CV DataBase & Dictionaires_03_03_2023.xlsx"),
                                     sheet_name=r"Establishment Data")

df_estab.set_index('company_id')
df_estab.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2697 entries, 0 to 2696
Data columns (total 68 columns):
company_id                                                               2697 non-null int64
Industry Group_Size Code (Group from Column D; Size 1=0-9 emp, 2=10+)    2697 non-null object
company_name                                                             2697 non-null object
base_location_Industry Group                                             2697 non-null int64
company_location_address                                                 2697 non-null object
company_location_city                                                    2697 non-null object
company_location_state                                                   2697 non-null object
company_location_zipcode                                                 2697 non-null int64
company_location_latitude                                                2697 non-null float64
company_location_longitude                       

In [5]:
# Read input data -- establishment file
df_trips = pd.read_excel(os.path.join(root_dir, 
                                      raw_data_dir, 
                                      r"CVS\SANDAG 2022 CV DataBase & Dictionaires_03_03_2023.xlsx"),
                                     sheet_name=r"Trip Data")

df_trips.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12261 entries, 0 to 12260
Data columns (total 32 columns):
company_id                            12261 non-null int64
vehicle_id                            12261 non-null int64
driver_id                             12261 non-null int64
trip_number                           12261 non-null int64
trip_load_status                      1582 non-null float64
trip_load_weight                      191 non-null float64
activity_type                         12261 non-null int64
activity_type_other                   700 non-null object
placetype                             12261 non-null int64
placetype_other                       1175 non-null object
location_placename                    12261 non-null object
location_address                      12261 non-null object
location_city                         12248 non-null object
location_state                        12261 non-null object
location_zip                          12261 non-null int64
lo

In [6]:
indus_lookup = {
1: "Agriculture/Mining",
2: "Manufacturing",
3: "Industrial/Utilities",
4: "Retail",
5: "Wholesale",
6: "Construction",
7: "Transportation",
8: "Info/Finance/Insurance/Real Estate/Professional services",
9: "Education/Other public services",
10: "Medical/Health Services",
11: "Leisure/Accommodations and Food",
96: "Other/Non-Classified"
}

In [7]:
# Get establishment employment
df_empl = df_estab[['company_id','base_location_Industry Group']].reset_index()
df_empl['industry_group'] = df_empl['base_location_Industry Group'].map(indus_lookup)
df_empl.loc[:, 'emp_total'] = df_estab['employees_fulltime_count']+df_estab['employees_parttime_count']
df_empl.set_index('company_id', inplace=True)
df_empl = df_empl.drop(['index','base_location_Industry Group'], axis=1)
df_empl.head()

Unnamed: 0_level_0,industry_group,emp_total
company_id,Unnamed: 1_level_1,Unnamed: 2_level_1
100002,Agriculture/Mining,50
100003,Agriculture/Mining,2
100004,Agriculture/Mining,4
100005,Agriculture/Mining,34
100007,Agriculture/Mining,5


In [8]:
# Join establishment total employment to routes records
df_trips = df_trips.merge(df_empl, how='left', on='company_id')
df_trips[['emp_total']].info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 12261 entries, 0 to 12260
Data columns (total 1 columns):
emp_total    12261 non-null int64
dtypes: int64(1)
memory usage: 191.6 KB


In [9]:
df_trips.head()

Unnamed: 0,company_id,vehicle_id,driver_id,trip_number,trip_load_status,trip_load_weight,activity_type,activity_type_other,placetype,placetype_other,...,arrival_time,departure_time,used_other_vehicle,participation_type,vehicle_id.1,Lower Estimate Weight Factor,Most Likely Estimate Weight Factor,Upper Estimate Weight Factor,industry_group,emp_total
0,100012,2062,2589,0,4.0,,1,,11,,...,,10:00:00,2,online,2062,109.693404,177.825038,198.177052,Education/Other public services,3
1,100012,2062,2589,1,,,11,,6,,...,10:20:00,11:10:00,2,online,2062,109.693404,177.825038,198.177052,Education/Other public services,3
2,100012,2062,2589,2,,,1,,11,,...,11:22:00,11:40:00,2,online,2062,109.693404,177.825038,198.177052,Education/Other public services,3
3,100012,2062,2589,3,,,10,,96,DOG PARK,...,12:00:00,14:00:00,2,online,2062,109.693404,177.825038,198.177052,Education/Other public services,3
4,100012,2062,2589,4,,,1,,11,,...,14:20:00,,2,online,2062,109.693404,177.825038,198.177052,Education/Other public services,3


#### National Business Employment Dynamics Data by Firm Size Class
* Size class 1 (1 to 4 employees)
* Size class 2 (5 to 9 employees)
* Size class 3 (10 to 19 employees)
* Size class 4 (20 to 49 employees)
* Size class 5 (50 to 99 employees)
* Size class 6 (100 to 249 employees)
* Size class 7 (250 to 499 employees)
* Size class 8 (500 to 999 employees)
* Size class 9 (1,000 or more employees)

In [10]:
# Create sizeClass dictionary
def sizeClass(emp=0):
    if emp in range(1, 5): return 1
    elif emp in range(5, 10): return 2
    elif emp in range(10, 20): return 3
    elif emp in range(20, 50): return 4
    elif emp in range(50, 100): return 5
    elif emp in range(100, 250): return 6
    elif emp in range(250, 500): return 7
    elif emp in range(500, 1000): return 8
    elif emp in range(1000, 9999): return 9
    else: return 0
    
df_trips.loc[:, 'emp_szclass'] = df_trips['emp_total'].map(sizeClass)

In [11]:
# Summarize
dft1 = df_trips.groupby(['industry_group','emp_szclass']).size().reset_index(name='n_trips')
dft2 = df_trips.groupby(['industry_group','emp_szclass'])['Most Likely Estimate Weight Factor'].sum().astype('int64').reset_index(name='wtd_trips')
df_sum = pd.merge(dft1, dft2)
df_sum.loc[:, 'n_trips'] = df_sum['n_trips'] #.map('{:,d}'.format)
df_sum.loc[:, 'wtd_trips'] = df_sum['wtd_trips'] #.map('{:,d}'.format)

pivot1 = pd.pivot_table(data = df_sum, 
                        values=['n_trips'], 
                        index='industry_group',
                        columns = 'emp_szclass', 
                        aggfunc=np.sum, fill_value=0, 
                        margins=True, margins_name='Totals').style.format('{:,.0f}')

pivot2 = pd.pivot_table(data = df_sum, 
                        values=['wtd_trips'], 
                        index='industry_group',
                        columns = 'emp_szclass', 
                        aggfunc=np.sum, fill_value=0, 
                        margins=True, margins_name='Totals').style.format('{:,.0f}')

In [12]:
print("Trips by Industry and Size Class -- Raw Count")
pivot1

Trips by Industry and Size Class -- Raw Count


Unnamed: 0_level_0,n_trips,n_trips,n_trips,n_trips,n_trips,n_trips,n_trips,n_trips
emp_szclass,1,2,3,4,5,6,7,Totals
industry_group,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
Agriculture/Mining,35,48,164,51,0,0,0,298
Construction,218,543,384,410,82,0,0,1637
Education/Other public services,142,205,53,205,457,372,127,1561
Industrial/Utilities,25,0,33,59,526,105,0,748
Info/Finance/Insurance/Real Estate/Professional services,301,475,183,252,204,0,0,1415
Leisure/Accommodations and Food,98,168,38,71,4,0,3,382
Manufacturing,81,201,188,237,28,8,149,892
Medical/Health Services,25,817,57,153,310,96,10,1468
Retail,204,168,271,49,194,0,0,886
Transportation,240,187,428,474,526,0,0,1855


In [13]:
print("Trips by Industry and Size Class -- Weighted")
pivot2

Trips by Industry and Size Class -- Weighted


Unnamed: 0_level_0,wtd_trips,wtd_trips,wtd_trips,wtd_trips,wtd_trips,wtd_trips,wtd_trips,wtd_trips
emp_szclass,1,2,3,4,5,6,7,Totals
industry_group,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
Agriculture/Mining,520,2764,8331,539,0,0,0,12154
Construction,37860,83509,29684,34012,15875,0,0,200940
Education/Other public services,25251,84412,12726,49223,164376,133217,69462,538667
Industrial/Utilities,364,0,481,860,12423,6993,0,21121
Info/Finance/Insurance/Real Estate/Professional services,49139,80555,30789,57204,44338,0,0,262025
Leisure/Accommodations and Food,9680,13164,5234,9779,550,0,413,38820
Manufacturing,4944,12194,27316,22522,2531,467,51000,120974
Medical/Health Services,8715,290112,5719,18373,58792,33794,1003,416508
Retail,29037,19163,25227,3053,17799,0,0,94279
Transportation,12180,10104,22272,23940,33565,0,0,102061


In [14]:
# Group number of home deliveries by establishment
df_homeDeliv = df_trips[(df_trips['activity_type']==5) & \
                        (df_trips['placetype']==11)].groupby(['industry_group']).size().reset_index(name='n_stops')

df_homeDeliv = df_trips[(df_trips['placetype']==11)].groupby(['industry_group']).size().reset_index(name='n_stops')
df_homeDeliv

Unnamed: 0,industry_group,n_stops
0,Agriculture/Mining,71
1,Construction,439
2,Education/Other public services,451
3,Industrial/Utilities,216
4,Info/Finance/Insurance/Real Estate/Professiona...,327
5,Leisure/Accommodations and Food,55
6,Manufacturing,130
7,Medical/Health Services,788
8,Retail,169
9,Transportation,496


In [15]:
# Group number of home deliveries by establishment
df_homeDeliv = df_trips[(df_trips['activity_type']==5) & \
                        (df_trips['placetype']==11)].groupby(['industry_group'])['Most Likely Estimate Weight Factor'].sum().astype('int64').reset_index(name='wtd_stops')

df_homeDeliv = df_trips[(df_trips['placetype']==11)].groupby(['industry_group'])['Most Likely Estimate Weight Factor'].sum().astype('int64').reset_index(name='wtd_stops')
food = df_homeDeliv[df_homeDeliv['industry_group'].isin(['Leisure/Accommodations and Food','Retail'])].sum().reset_index(name='wtd_stops')

df_homeDeliv['wtd_stops'] = df_homeDeliv['wtd_stops'].map('{:,d}'.format)
print(df_homeDeliv.to_string(index=False))

                                    industry_group wtd_stops
                                Agriculture/Mining     2,271
                                      Construction    49,452
                   Education/Other public services   168,815
                              Industrial/Utilities     4,163
 Info/Finance/Insurance/Real Estate/Professiona...    56,478
                   Leisure/Accommodations and Food     5,534
                                     Manufacturing    15,865
                           Medical/Health Services   233,709
                                            Retail    14,673
                                    Transportation    29,671
                                         Wholesale    12,737


In [16]:
print(f"Maximum number of residential stops for retail and food: {food.iloc[1]['wtd_stops']:,}")

Maximum number of residential stops for retail and food: 20,207
