### SANDAG Commercial Vehicle Model
#### Task 3 -- Data Exploration for Design Decisions

#### Household Deliveries of Food, Packages, and Service Stops 

In [1]:
import numpy as np
import pandas as pd
import os
# from matplotlib import pyplot as plt
# from matplotlib import ticker

In [2]:
pd.set_option("display.max_rows", None)

In [3]:
# Import file paths from script
from FilePaths import *
print("root_dir = \n", root_dir, "\n")
print("raw_data_dir = \n", raw_data_dir,"\n")
# print("proc_data_dir = \n", proc_data_dir,"\n")

root_dir = 
 C:\Users\jgliebe\OneDrive - Cambridge Systematics\Documents - PROJ SANDAG Commercial Vehicle & Heavy Truck Model Update\_Shared_CSTeam 

raw_data_dir = 
 Task03_DataID_Review 



In [4]:
# Read input data -- establishment file
df_days = pd.read_csv(os.path.join(root_dir, 
                                      raw_data_dir, 
                                      r"HHTS\export_day_weights.csv"))
df_days.set_index('day_id')
df_days.info()
df_days.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 19009 entries, 0 to 19008
Data columns (total 49 columns):
day_id                  19009 non-null int64
num_trips               18109 non-null float64
travel_date             19009 non-null object
day_num                 19009 non-null int64
travel_dow              19009 non-null int64
travel_day              19009 non-null int64
hh_day_complete         19009 non-null int64
survey_complete         19009 non-null int64
begin_day               19009 non-null int64
end_day                 19009 non-null int64
telecommute_time        10113 non-null float64
delivery_2              19009 non-null int64
delivery_3              19009 non-null int64
delivery_5              19009 non-null int64
delivery_6              19009 non-null int64
delivery_7              19009 non-null int64
delivery_8              19009 non-null int64
delivery_996            19009 non-null int64
made_travel             19009 non-null int64
no_travel_1             19009 n

Unnamed: 0,day_id,num_trips,travel_date,day_num,travel_dow,travel_day,hh_day_complete,survey_complete,begin_day,end_day,...,attend_school_no_4,attend_school_no_5,attend_school_no_997,attend_school_no_998,attend_school_no_999,person_id,person_num,is_participant,hh_id,day_weight
0,220000770101,6.0,2022-05-17,1,2,1,1,1,1,1,...,995,995,995,995,995,2200007701,1,1,22000077,58.290764
1,220000770201,6.0,2022-05-17,1,2,1,1,1,1,1,...,995,995,995,995,995,2200007702,2,1,22000077,58.290764
2,220000770102,5.0,2022-05-18,2,3,1,1,1,1,1,...,995,995,995,995,995,2200007701,1,1,22000077,58.290764
3,220000770202,5.0,2022-05-18,2,3,1,1,1,1,1,...,995,995,995,995,995,2200007702,2,1,22000077,58.290764
4,220000770103,4.0,2022-05-19,3,4,1,1,1,1,1,...,995,995,995,995,995,2200007701,1,1,22000077,58.290764


In [5]:
# Filter only eligible participants and weekdays (Mon-Thu)
df_days = df_days[(df_days['is_participant']>0) & (df_days['travel_dow'].isin([1,2,3,4]))]
df_days.head()

Unnamed: 0,day_id,num_trips,travel_date,day_num,travel_dow,travel_day,hh_day_complete,survey_complete,begin_day,end_day,...,attend_school_no_4,attend_school_no_5,attend_school_no_997,attend_school_no_998,attend_school_no_999,person_id,person_num,is_participant,hh_id,day_weight
0,220000770101,6.0,2022-05-17,1,2,1,1,1,1,1,...,995,995,995,995,995,2200007701,1,1,22000077,58.290764
1,220000770201,6.0,2022-05-17,1,2,1,1,1,1,1,...,995,995,995,995,995,2200007702,2,1,22000077,58.290764
2,220000770102,5.0,2022-05-18,2,3,1,1,1,1,1,...,995,995,995,995,995,2200007701,1,1,22000077,58.290764
3,220000770202,5.0,2022-05-18,2,3,1,1,1,1,1,...,995,995,995,995,995,2200007702,2,1,22000077,58.290764
4,220000770103,4.0,2022-05-19,3,4,1,1,1,1,1,...,995,995,995,995,995,2200007701,1,1,22000077,58.290764


#### Delivery Variable Definitions

- delivery_2:  Delivery on travel day: Food was delivered to home (e.g., take-out, groceries)
- delivery_3:  Delivery on travel day: Someone came to do work at home (e.g., landscaping, plumber, housecleaning)
- delivery_5:  Delivery on travel day: Received package AT HOME (e.g., USPS, FedEx, UPS)
- delivery_6:  Delivery on travel day: Received personal packages AT WORK
- delivery_7:  Delivery on travel day: Received packages at OFFSITE LOCKER (e.g., Amazon locker, package pick-up point)
- delivery_8:  Delivery on travel day: Other item delivered to home (e.g., appliance)
- delivery_996:  Delivery on travel day: None of the above

In [6]:
cols = [col for col in df_days.columns if "delivery_" in col]

# Recode non-responses '995' as zeros
for col in cols:
    df_days.loc[df_days[col]>1, col] = 0

cols = cols + ['day_weight']
df_deliv = df_days.groupby(['hh_id','day_num'])[cols].max().reset_index()
df_deliv.head()

Unnamed: 0,hh_id,day_num,delivery_2,delivery_3,delivery_5,delivery_6,delivery_7,delivery_8,delivery_996,day_weight
0,22000077,1,0,0,1,0,0,0,1,58.290764
1,22000077,2,0,0,0,0,0,0,1,58.290764
2,22000077,3,0,0,0,0,0,0,1,58.290764
3,22000077,7,0,0,0,0,0,0,1,58.290764
4,22000125,3,0,0,0,0,0,0,1,3.140051


In [7]:
for col in cols:
    if "delivery_" in col:
        print(f"{col}: {df_deliv[df_deliv[col]==1][col].sum()}")

delivery_2: 317
delivery_3: 365
delivery_5: 2333
delivery_6: 56
delivery_7: 46
delivery_8: 45
delivery_996: 4465


In [8]:
# Recode
df_deliv['Food'] = (df_deliv['delivery_2']==1).astype('int32')*df_deliv['day_weight']
df_deliv['Service'] = (df_deliv['delivery_3']==1).astype('int32')*df_deliv['day_weight']
df_deliv['Package'] = ((df_deliv['delivery_5']==1) | (df_deliv['delivery_8']==1)).astype('int32')*df_deliv['day_weight']
use_cols = ['Food', 'Service', 'Package']

In [11]:
df_deliv.head(5)

Unnamed: 0,hh_id,day_num,delivery_2,delivery_3,delivery_5,delivery_6,delivery_7,delivery_8,delivery_996,day_weight,Food,Service,Package
0,22000077,1,0,0,1,0,0,0,1,58.290764,0.0,0.0,58.290764
1,22000077,2,0,0,0,0,0,0,1,58.290764,0.0,0.0,0.0
2,22000077,3,0,0,0,0,0,0,1,58.290764,0.0,0.0,0.0
3,22000077,7,0,0,0,0,0,0,1,58.290764,0.0,0.0,0.0
4,22000125,3,0,0,0,0,0,0,1,3.140051,0.0,0.0,0.0


In [9]:
print("Household days observed food, service or package received at home")
print(df_deliv[use_cols].sum().map('{:,.0f}'.format).to_string())
print("\n")
print("Household percent of days food, service or package received at home")
print(round(df_deliv[use_cols].sum()/df_deliv['day_weight'].sum(),3))

Household days observed food, service or package received at home
Food        57,177
Service     81,266
Package    485,677


Household percent of days food, service or package received at home
Food       0.048
Service    0.068
Package    0.408
dtype: float64


In [10]:
# This should equal 1,189,195 households
print("This should equal 1,189,195 households:")
print(f"{df_deliv['day_weight'].sum():,.0f}")

This should equal 1,189,195 households:
1,189,195


In [1]:
38/57


0.6666666666666666