### SANDAG Commercial Vehicle Model
#### Task 3 -- Data Exploration for Design Decisions

#### Household Deliveries of Food, Packages, and Service Stops 

In [None]:
import numpy as np
import pandas as pd
import os
# from matplotlib import pyplot as plt
# from matplotlib import ticker

In [None]:
pd.set_option("display.max_rows", None)

In [None]:
# Import file paths from script
from FilePaths import *
print("root_dir = \n", root_dir, "\n")
print("raw_data_dir = \n", raw_data_dir,"\n")
# print("proc_data_dir = \n", proc_data_dir,"\n")

In [None]:
# Read input data -- establishment file
df_days = pd.read_csv(os.path.join(root_dir,
                                      raw_data_dir,
                                      r"HHTS\export_day_weights.csv"))
df_days.set_index('day_id')
df_days.info()
df_days.head()

In [None]:
# Filter only eligible participants and weekdays (Mon-Thu)
df_days = df_days[(df_days['is_participant']>0) & (df_days['travel_dow'].isin([1,2,3,4]))]
df_days.head()

#### Delivery Variable Definitions

- delivery_2:  Delivery on travel day: Food was delivered to home (e.g., take-out, groceries)
- delivery_3:  Delivery on travel day: Someone came to do work at home (e.g., landscaping, plumber, housecleaning)
- delivery_5:  Delivery on travel day: Received package AT HOME (e.g., USPS, FedEx, UPS)
- delivery_6:  Delivery on travel day: Received personal packages AT WORK
- delivery_7:  Delivery on travel day: Received packages at OFFSITE LOCKER (e.g., Amazon locker, package pick-up point)
- delivery_8:  Delivery on travel day: Other item delivered to home (e.g., appliance)
- delivery_996:  Delivery on travel day: None of the above

In [None]:
cols = [col for col in df_days.columns if "delivery_" in col]

# Recode non-responses '995' as zeros
for col in cols:
    df_days.loc[df_days[col]>1, col] = 0

cols = cols + ['day_weight']
df_deliv = df_days.groupby(['hh_id','day_num'])[cols].max().reset_index()
df_deliv.head()

In [None]:
for col in cols:
    if "delivery_" in col:
        print(f"{col}: {df_deliv[df_deliv[col]==1][col].sum()}")

In [None]:
# Recode
df_deliv['Food'] = (df_deliv['delivery_2']==1).astype('int32')*df_deliv['day_weight']
df_deliv['Service'] = (df_deliv['delivery_3']==1).astype('int32')*df_deliv['day_weight']
df_deliv['Package'] = ((df_deliv['delivery_5']==1) | (df_deliv['delivery_8']==1)).astype('int32')*df_deliv['day_weight']
use_cols = ['Food', 'Service', 'Package']

In [None]:
df_deliv.head(5)

In [None]:
print("Household days observed food, service or package received at home")
print(df_deliv[use_cols].sum().map('{:,.0f}'.format).to_string())
print("\n")
print("Household percent of days food, service or package received at home")
print(round(df_deliv[use_cols].sum()/df_deliv['day_weight'].sum(),3))

In [None]:
# This should equal 1,189,195 households
print("This should equal 1,189,195 households:")
print(f"{df_deliv['day_weight'].sum():,.0f}")

In [None]:
38/57
