In [1]:
import pandas as pd
from datetime import datetime, timedelta
import numpy as np
import plotly.express as px
import plotly.graph_objects as go

# Curate NHTS 2022 data
https://nhts.ornl.gov/downloads

In [2]:
data_NHTS = pd.read_csv('travel_data/nhts2022/trippub.csv')

In [3]:
data_NHTS.columns

Index(['HOUSEID', 'PERSONID', 'TRIPID', 'SEQ_TRIPID', 'VEHCASEID', 'FRSTHM',
       'PARK', 'HHMEMDRV', 'TDWKND', 'TRAVDAY', 'LOOP_TRIP', 'DWELTIME',
       'PUBTRANS', 'TRIPPURP', 'WHYFROM', 'WHYTRP1S', 'TRVLCMIN', 'STRTTIME',
       'ENDTIME', 'TRPHHVEH', 'VEHID', 'TRPTRANS', 'NUMONTRP', 'ONTD_P1',
       'ONTD_P2', 'ONTD_P3', 'ONTD_P4', 'ONTD_P5', 'ONTD_P6', 'ONTD_P7',
       'ONTD_P8', 'ONTD_P9', 'ONTD_P10', 'NONHHCNT', 'HHACCCNT', 'WHODROVE',
       'DRVR_FLG', 'PSGR_FLG', 'WHODROVE_IMP', 'PARK2_PAMOUNT',
       'PARK2_PAYTYPE', 'PARK2', 'WHYTO', 'WALK', 'TRPMILES', 'WTTRDFIN',
       'WTTRDFIN5D', 'WTTRDFIN2D', 'TDCASEID', 'VMT_MILE', 'GASPRICE',
       'WHYTRP90', 'NUMADLT', 'HOMEOWN', 'RAIL', 'CENSUS_D', 'CENSUS_R',
       'HH_HISP', 'DRVRCNT', 'CDIVMSAR', 'HHFAMINC', 'HH_RACE', 'HHSIZE',
       'HHVEHCNT', 'LIF_CYC', 'MSACAT', 'MSASIZE', 'URBAN', 'URBANSIZE',
       'URBRUR', 'TDAYDATE', 'WRKCOUNT', 'STRATUMID', 'R_AGE', 'R_SEX',
       'WORKER', 'DRIVER', 'R_RACE', 'R_HISP', 

## Mapping of trip attribute definitions:
- <font size="2">HOUSEID: Unique Identifier - Household
- PERSONID: Person ID within household
- TRIPID: Trip ID for each trip a person took
- SEQ_TRIPID: Renumbered sequential tripid
- VEHCASEID: Unique vehicle identifier
- FRSTHM: Started travel day at home
- PARK: Paid for parking at any time during travel day
- HHMEMDRV: Household member drove on trip
- TDWKND: Weekend trip
- TRAVDAY: Travel day - day of week
- LOOP_TRIP: Trip origin and destination at identical location
- DWELTIME: Time at Destination (minutes)
- PUBTRANS: Used public transit on trip
- TRIPPURP: General purpose of trip
- WHYFROM: Reason for previous trip
- WHYTRP1S: Trip purpose summary
- TRVLCMIN: Trip Duration in Minutes
- STRTTIME: 24 hour local start time of trip
- ENDTIME: 24 hour local end time of trip
- TRPHHVEH: Household vehicle used for trip
- VEHID: Vehicle ID of vehicle used from household roster
- TRPTRANS: Trip mode derived
- NUMONTRP: Number of people on trip
- ONTD_P1-10: Person 1-10 was on trip
- NONHHCNT: Number of non-household members on trip.
- HHACCCNT: Number of household members on trip.
- WHODROVE: Person who drove on trip.
- DRVR_FLG: Flag for driver on trip.
- PSGR_FLG: Flag for passenger on trip.
- WHODROVE_IMP: Imputed person who drove on trip.
- PARK2_PAMOUNT: Amount paid for parking.
- PARK2_PAYTYPE: Periodicity of parking payment.
- PARK2: Paid for parking on this trip.
- WHYTO: Reason for travel to destination.
- WALK: Minutes walked from parking to destination.
- TRPMILES: Calculated Trip distance converted into miles.
- WTTRDFIN: 7 day National trip weight.
- WTTRDFIN5D: 5 day National trip weight.
- WTTRDFIN2D: 2 day National trip weight.
- TDCASEID: Unique identifier for every trip record in the file.
- VMT_MILE: Calculated Trip distance (miles) for Driver Trips.
- GASPRICE: Weekly regional gasoline price in cents during the week of the household's travel day.
- WHYTRP90: Travel day trip purpose consistent with 1990 NPTS design.
- NUMADLT: Count of adult household members at least 18 years old.
- HOMEOWN: Whether home owned or rented.
- RAIL: MSA heavy rail status for household.
- CENSUS_D: Census division classification for home address.
- CENSUS_R: Census region classification for home address.
- HH_HISP: Hispanic status of household respondent.
- DRVRCNT: Number of drivers in the household.
- **CDIVMSAR: Grouping of household by combination of Census division MSA status and presence of rail.**
- **HHFAMINC: Household income.**
- HH_RACE: Race of household respondent.
- HHSIZE: Total number of people in household.
- HHVEHCNT: Total number of vehicles in household.
- LIF_CYC: Life Cycle classification for the household.
- MSACAT: MSA category for the HH home address.
- MSASIZE: Population size category of the MSA from the five-year ACS API.
- URBAN: Household urban area classification based on 2020 TIGER/Line Shapefile.
- URBANSIZE: Urban area size where home address is located.
- URBRUR: Household in urban/rural area.
- TDAYDATE: Date of travel day (YYYYMM).
- WRKCOUNT: Count of workers in household.
- STRATUMID: Household Stratum ID
- R_AGE: Respondent age
- R_SEX: Respondent sex
- WORKER: Employment status of respondent
- DRIVER: Driver status, derived
- R_RACE: Respondent race
- R_HISP: Person 5 or older - Hispanic or Latino
- PROXY: Survey completed by self or someone else
- EDUC: Respondent highest level of education
- PRMACT: Primary activity for those who did not work for pay last week
- R_SEX_IMP: Respondent sex (imputed)
- VEHTYPE: Vehicle type</font>

In [4]:
data_NHTS.PERSONID.unique()

array([1, 2, 4, 3, 5, 6, 7, 8, 9], dtype=int64)

## Remove unwanted trip information

In [5]:
# Keep only relevant attributes

data_NHTS = data_NHTS[['HOUSEID','PERSONID','TRIPID','FRSTHM','TRAVDAY','LOOP_TRIP','DWELTIME','WHYFROM',
                       'WHYTRP1S','TRVLCMIN','STRTTIME','ENDTIME','TRPHHVEH','VEHID','TRPTRANS','WHYTO','TRPMILES',
                       'VMT_MILE','CDIVMSAR','HHFAMINC','HHSIZE','HHVEHCNT','MSASIZE','URBRUR','WRKCOUNT','WORKER','DRIVER','PROXY','VEHTYPE']]

data_NHTS.drop_duplicates(inplace=True)

In [6]:
data_NHTS = data_NHTS[data_NHTS.TRPTRANS <= 4]  # Exclude modes other than cars, van, SUV/crossover and pickup truck
data_NHTS = data_NHTS[data_NHTS.VEHTYPE > 0]  # Exclude vehicle types other than cars, van, SUV/crossover and pickup truck
data_NHTS = data_NHTS[data_NHTS.VEHTYPE <= 4]  # Exclude vehicle types other than cars, van, SUV/crossover and pickup truck
data_NHTS = data_NHTS[data_NHTS.DRIVER == 1]    # Only trips carried out as a driver, not as a passenger
data_NHTS = data_NHTS[data_NHTS.TRPHHVEH == 1]  # Exclude all non-household vehicles
data_NHTS = data_NHTS[data_NHTS.VEHID != -1]    # Exclude all respondents that skipped this question
data_NHTS = data_NHTS[data_NHTS.TRPMILES > 0]   # Exclude trips with no clear response
data_NHTS = data_NHTS[data_NHTS.TRPMILES < 1280] # Exclude all trips with distance travelled of more than 1280 miles 
data_NHTS = data_NHTS[data_NHTS.VMT_MILE > 0]   # Exclude trips with no clear response
data_NHTS = data_NHTS[data_NHTS.VMT_MILE < 1280] # Exclude all trips with distance travelled of more than 1280 miles 
data_NHTS = data_NHTS[data_NHTS.TRVLCMIN > 0]   # Exclude trips with no clear response
data_NHTS = data_NHTS[data_NHTS.TRVLCMIN < 960] # Exclude trips with durations of more than 16 hours
data_NHTS = data_NHTS[data_NHTS.TRVLCMIN > 0]   # Exclude trips with no clear response

data_NHTS = data_NHTS[data_NHTS.WHYFROM >= 1] # Exclude trips with no clear response
data_NHTS = data_NHTS[data_NHTS.WHYFROM < 97] # Exclude trips with no clear response
data_NHTS = data_NHTS[data_NHTS.WHYTO >= 1] # Exclude trips with no clear response
data_NHTS = data_NHTS[data_NHTS.WHYTO < 97] # Exclude trips with no clear response
data_NHTS = data_NHTS[data_NHTS.STRTTIME >= 0] # Exclude trips with no clear response

data_NHTS['AVGSPEED'] = data_NHTS.TRPMILES / (data_NHTS.TRVLCMIN/60)
data_NHTS = data_NHTS[data_NHTS.AVGSPEED >= 0.5] # Exclude trips calculated avg. speed lower than 0.5 mph
data_NHTS = data_NHTS[data_NHTS.AVGSPEED < 120] # Exclude trips calculated avg. speed higher than 120 mph

In [7]:
data_NHTS

Unnamed: 0,HOUSEID,PERSONID,TRIPID,FRSTHM,TRAVDAY,LOOP_TRIP,DWELTIME,WHYFROM,WHYTRP1S,TRVLCMIN,...,HHSIZE,HHVEHCNT,MSASIZE,URBRUR,WRKCOUNT,WORKER,DRIVER,PROXY,VEHTYPE,AVGSPEED
0,9000013002,1,1,1,1,2,75,1,50,15,...,4,2,5,1,1,2,1,1,3,15.609695
1,9000013002,1,2,1,1,2,-9,15,1,10,...,4,2,5,1,1,2,1,1,3,23.414543
2,9000013002,2,1,1,1,2,10,1,80,30,...,4,2,5,1,1,1,1,2,3,34.154133
3,9000013002,2,2,1,1,2,10,12,80,10,...,4,2,5,1,1,1,1,2,3,28.489745
4,9000013002,2,3,1,1,2,-9,12,1,30,...,4,2,5,1,1,1,1,2,3,28.197638
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
31068,9000218013,1,1,1,5,2,5,1,40,15,...,2,1,1,1,0,2,1,1,3,5.001865
31069,9000218013,1,2,1,5,2,70,14,40,15,...,2,1,1,1,0,2,1,1,3,13.183344
31070,9000218013,1,3,1,5,2,-9,13,1,15,...,2,1,1,1,0,2,1,1,3,12.236172
31071,9000218040,1,1,1,5,2,6,1,40,4,...,1,1,6,2,1,1,1,1,4,1.538223


## Data exploration

In [8]:
fig = px.histogram(data_NHTS, x='AVGSPEED', 
                #    histnorm='probability density'
                   )

fig.show()

## Pairing *vehicle fuel* attribute from public vehicle table

In [9]:
veh_NHTS = pd.read_csv('travel_data/nhts2022/vehpub.csv')

In [10]:
veh_NHTS = veh_NHTS[['HOUSEID', 'VEHID', 'VEHFUEL']]

veh_NHTS

Unnamed: 0,HOUSEID,VEHID,VEHFUEL
0,9000013002,1,1
1,9000013002,2,1
2,9000013016,1,1
3,9000013039,1,1
4,9000013041,1,1
...,...,...,...
14679,9000218013,1,1
14680,9000218021,1,1
14681,9000218034,1,1
14682,9000218034,2,1


In [11]:
curated_NHTS = pd.merge(data_NHTS, veh_NHTS, on=['HOUSEID', 'VEHID'], how='left')

In [12]:
curated_NHTS.isna().sum()

HOUSEID      0
PERSONID     0
TRIPID       0
FRSTHM       0
TRAVDAY      0
LOOP_TRIP    0
DWELTIME     0
WHYFROM      0
WHYTRP1S     0
TRVLCMIN     0
STRTTIME     0
ENDTIME      0
TRPHHVEH     0
VEHID        0
TRPTRANS     0
WHYTO        0
TRPMILES     0
VMT_MILE     0
CDIVMSAR     0
HHFAMINC     0
HHSIZE       0
HHVEHCNT     0
MSASIZE      0
URBRUR       0
WRKCOUNT     0
WORKER       0
DRIVER       0
PROXY        0
VEHTYPE      0
AVGSPEED     0
VEHFUEL      0
dtype: int64

Trips with different TRPMILES and VMT_MILE; only a small subset. VMT_MILE seems unreasonably high in some cases, therefore it'll be ignored.

In [13]:
sum(curated_NHTS.TRPMILES.values != curated_NHTS.VMT_MILE.values)

31

In [14]:
curated_NHTS[curated_NHTS.TRPMILES.values != curated_NHTS.VMT_MILE.values][['HOUSEID', 'PERSONID', 'TRIPID', 'TRPMILES', 'VMT_MILE']]

Unnamed: 0,HOUSEID,PERSONID,TRIPID,TRPMILES,VMT_MILE
1712,9000046328,1,1,0.12379,75.024239
2075,9000047867,1,1,1.573102,204.400249
3013,9000051281,1,1,5.756584,116.700435
5390,9000073682,3,1,4.849461,41.123058
5391,9000073682,3,2,1.816006,247.746426
5392,9000073682,3,3,4.927349,329.568055
6100,9000075774,1,3,2.917708,32.057178
6101,9000075774,1,4,2.917708,20.791796
6721,9000077680,1,1,4.882269,129.172778
6722,9000077680,1,2,4.882269,131.49161


# Trip distributions

## Travel schedule by day of the week (*trip_by_time.csv*)

Convert to datetime format and round to the nearest hour

In [15]:
# curated_NHTS['START_TIME'] = pd.to_datetime('2022-01-01 ' + curated_NHTS.STRTTIME.astype(str).str.zfill(4)).dt.round('H')
# curated_NHTS['START_TIME'].replace('2022-01-02 00:00:00', '2022-01-01 23:00:00', inplace=True)

curated_NHTS['START_TIME'] = pd.to_datetime('2022-01-01 ' + curated_NHTS.STRTTIME.astype(str).str.zfill(4))

def midtime(a, b):
    return a + timedelta(minutes=b/2)

curated_NHTS['MIDTIME'] = curated_NHTS.apply(lambda row: midtime(row['START_TIME'],row['TRVLCMIN']), axis=1)

def endtime(a, b):
    return a + timedelta(minutes=b)

curated_NHTS['ENDTIME'] = curated_NHTS.apply(lambda row: endtime(row['START_TIME'],row['TRVLCMIN']), axis=1)

def to_integer(dt_time):
    return dt_time.hour

curated_NHTS['INTTIME'] = curated_NHTS.MIDTIME.apply(to_integer)

curated_NHTS['INTTIME_END'] = curated_NHTS.ENDTIME.apply(to_integer)

## Group into weekday, Sat and Sun

In [16]:
curated_NHTS.TRAVDAY.unique()

array([1, 4, 5, 6, 2, 7, 3], dtype=int64)

In [17]:
days = {
    1:'sunday',
    2:'weekday',
    3:'weekday',
    4:'weekday',
    5:'weekday',
    6:'weekday',
    7:'saturday'
}

curated_NHTS['WEEKDAY'] = curated_NHTS.TRAVDAY.map(days)

### Weekday trip start time distribution

In [18]:
curated_NHTS[(curated_NHTS.TRAVDAY != 1) & (curated_NHTS.TRAVDAY != 7)].INTTIME.value_counts(normalize=True).sort_index() * 100

INTTIME
0     0.130685
1     0.039205
2     0.084945
3     0.058808
4     0.392054
5     1.535546
6     3.221380
7     8.050183
8     6.841349
9     5.462624
10    6.253267
11    6.782541
12    6.730267
13    6.553842
14    7.416362
15    8.128594
16    8.318087
17    8.716675
18    6.057240
19    3.587297
20    2.476477
21    1.718505
22    0.888657
23    0.555410
Name: proportion, dtype: float64

### Saturday trip start time distribution

In [19]:
curated_NHTS[curated_NHTS.TRAVDAY == 7].INTTIME.value_counts(normalize=True).sort_index() * 100

INTTIME
0     0.747012
1     0.049801
2     0.149402
3     0.049801
4     0.099602
5     0.597610
6     1.095618
7     3.187251
8     4.233068
9     5.926295
10    6.872510
11    8.615538
12    8.167331
13    9.412351
14    8.416335
15    7.818725
16    7.669323
17    7.968127
18    6.374502
19    4.482072
20    3.137450
21    2.838645
22    1.145418
23    0.946215
Name: proportion, dtype: float64

### Sunday trip start time distribution

In [20]:
curated_NHTS[curated_NHTS.TRAVDAY == 1].INTTIME.value_counts(normalize=True).sort_index() * 100

INTTIME
0      0.199283
1      0.039857
2      0.119570
4      0.239139
5      0.358709
6      1.076126
7      2.032682
8      4.463930
9      7.413312
10     8.489438
11     9.366281
12    10.203268
13     9.844560
14     9.406138
15     8.130729
16     7.772021
17     6.974890
18     4.424073
19     3.666800
20     2.750100
21     1.873256
22     0.797130
23     0.358709
Name: proportion, dtype: float64

In [77]:
fig = px.histogram(curated_NHTS, x='INTTIME_END', facet_col='WEEKDAY', 
                   nbins=24, text_auto='.1f',
                   histnorm='percent'
                   )

fig.update_yaxes(title='Relative freq. (%)')

fig.update_xaxes(title='Start time (hour)',
                 dtick=2
                 )

fig.update_layout(width=1000,
                  height=500,
                  template='plotly_white',
                  bargap=0.05,
                #   barmode='stack',
                  title={'text':"NHTS 2022 Trip start time distributions by day type",
                         'x':0.5,
                         'y':0.97},
                #   legend=dict(orientation='v',
                #               y=1,
                #               x=0.85)
                font=dict(
                       # family="Courier New, monospace",
                       size=13)
                )

fig.show()

# Avg. trip characteristics

## Avg. total daily driven distance by day of the week (*d_tot.csv*)

In [22]:
curated_NHTS[(curated_NHTS.HOUSEID == 9000115729)][['HOUSEID', 'PERSONID', 'TRIPID', 'VEHID', 'TRAVDAY', 'WEEKDAY', 'TRPMILES']]

Unnamed: 0,HOUSEID,PERSONID,TRIPID,VEHID,TRAVDAY,WEEKDAY,TRPMILES
10551,9000115729,1,1,1,7,saturday,22.665631
10552,9000115729,1,2,1,7,saturday,21.155998
10553,9000115729,1,3,1,7,saturday,34.883157
10554,9000115729,1,4,1,7,saturday,10.671846
10555,9000115729,1,5,1,7,saturday,43.419515
10556,9000115729,1,6,1,7,saturday,1.094469
10557,9000115729,1,7,1,7,saturday,1.094469
10558,9000115729,2,1,2,7,saturday,1.094469
10559,9000115729,7,1,2,7,saturday,14.976383
10560,9000115729,7,2,2,7,saturday,12.059664


In [23]:
d_tot = curated_NHTS.groupby(['HOUSEID', 'PERSONID'], as_index=False).agg({
    'TRPMILES': 'sum',   # Summing this column
    'WEEKDAY': 'first'   # Retaining the first value of this column
})

d_tot[d_tot.HOUSEID == 9000115729]

Unnamed: 0,HOUSEID,PERSONID,TRPMILES,WEEKDAY
3781,9000115729,1,134.985084,saturday
3782,9000115729,2,1.094469,saturday
3783,9000115729,7,27.036047,saturday


### Avg. total daily driven distance - weekday

In [24]:
d_tot_weekday = curated_NHTS[curated_NHTS.WEEKDAY == 'weekday'].groupby(['HOUSEID', 'PERSONID'], as_index=False)['TRPMILES'].sum()

print("{} km".format(round(d_tot_weekday.TRPMILES.mean()*1.60934, 2)))

47.39 km


### Avg. total daily driven distance - weekend

In [25]:
d_tot_weekend = curated_NHTS[(curated_NHTS.WEEKDAY == 'saturday') | (curated_NHTS.WEEKDAY == 'sunday')].groupby(['HOUSEID', 'PERSONID'], as_index=False)['TRPMILES'].sum()

print("{} km".format(round(d_tot_weekend.TRPMILES.mean()*1.60934, 2)))

57.75 km


# Distance groups - TBD

# Occupation groups

## Avg. trip characteristics by trip purpose and day of the week

In [26]:
np.sort(curated_NHTS.WHYTRP1S.unique())

array([ 1, 10, 20, 30, 40, 50, 70, 80, 97], dtype=int64)

WHYTRP1S (trip purpose summary) = 1 (home) can include trips coming from any purpose (including work).

In [27]:
# curated_NHTS[curated_NHTS.WHYTRP1S == 1][['WHYFROM', 'WHYTO', 'WHYTRP1S']]

np.sort(curated_NHTS[curated_NHTS.WHYTRP1S == 1].WHYFROM.unique())

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
       18, 19], dtype=int64)

In [28]:
np.sort(curated_NHTS[curated_NHTS.WHYTRP1S == 1].WHYTO.unique())

array([1, 2], dtype=int64)

In [29]:
purpose = {
    1:'personal',
    10:'occupation',
    20:'occupation',
    30:'personal',
    40:'personal',
    50:'personal',
    70:'personal',
    80:'personal',
    97:'personal',
}

curated_NHTS['PURPOSE'] = curated_NHTS.WHYTRP1S.map(purpose)

### *d_min.csv* - occupation, weekday

In [30]:
print("{} km".format(
    round(curated_NHTS[(curated_NHTS.PURPOSE == 'occupation') & (curated_NHTS.WEEKDAY == 'weekday')].TRPMILES.mean()*1.60934, 8)))

20.22996344 km


### *d_min.csv* - occupation, saturday

In [31]:
print("{} km".format(
    round(curated_NHTS[(curated_NHTS.PURPOSE == 'occupation') & (curated_NHTS.WEEKDAY == 'saturday')].TRPMILES.mean()*1.60934, 8)))

19.74173077 km


### *d_min.csv* - occupation, sunday

In [32]:
print("{} km".format(
    round(curated_NHTS[(curated_NHTS.PURPOSE == 'occupation') & (curated_NHTS.WEEKDAY == 'sunday')].TRPMILES.mean()*1.60934, 8)))

15.26908147 km


### *d_min.csv* - personal, weekday

In [33]:
print("{} km".format(
    round(curated_NHTS[(curated_NHTS.PURPOSE == 'personal') & (curated_NHTS.WEEKDAY == 'weekday')].TRPMILES.mean()*1.60934, 8)))

15.61215657 km


### *d_min.csv* - personal, saturday

In [34]:
print("{} km".format(
    round(curated_NHTS[(curated_NHTS.PURPOSE == 'personal') & (curated_NHTS.WEEKDAY == 'saturday')].TRPMILES.mean()*1.60934, 8)))

21.58120648 km


### *d_min.csv* - personal, sunday

In [35]:
print("{} km".format(
    round(curated_NHTS[(curated_NHTS.PURPOSE == 'personal') & (curated_NHTS.WEEKDAY == 'sunday')].TRPMILES.mean()*1.60934, 8)))

22.87273854 km


### *t_func.csv* - occupation, weekday

In [36]:
print("{} min".format(
    round(curated_NHTS[(curated_NHTS.PURPOSE == 'occupation') & (curated_NHTS.WEEKDAY == 'weekday')].TRVLCMIN.mean(), 8)))

24.57687075 min


### *t_func.csv* - occupation, saturday

In [37]:
print("{} min".format(
    round(curated_NHTS[(curated_NHTS.PURPOSE == 'occupation') & (curated_NHTS.WEEKDAY == 'saturday')].TRVLCMIN.mean(), 8)))

23.22285714 min


### *t_func.csv* - occupation, sunday

In [38]:
print("{} min".format(
    round(curated_NHTS[(curated_NHTS.PURPOSE == 'occupation') & (curated_NHTS.WEEKDAY == 'sunday')].TRVLCMIN.mean(), 8)))

19.35951662 min


### *t_func.csv* - personal, weekday

In [39]:
print("{} min".format(
    round(curated_NHTS[(curated_NHTS.PURPOSE == 'personal') & (curated_NHTS.WEEKDAY == 'weekday')].TRVLCMIN.mean(), 8)))

20.90682627 min


### *t_func.csv* - personal, saturday

In [40]:
print("{} min".format(
    round(curated_NHTS[(curated_NHTS.PURPOSE == 'personal') & (curated_NHTS.WEEKDAY == 'saturday')].TRVLCMIN.mean(), 8)))

24.67375887 min


### *t_func.csv* - personal, sunday

In [41]:
print("{} min".format(
    round(curated_NHTS[(curated_NHTS.PURPOSE == 'personal') & (curated_NHTS.WEEKDAY == 'sunday')].TRVLCMIN.mean(), 8)))

24.55509642 min


## Time windows

### Worker occupation - **including work from home**

In [42]:
worker_windows = curated_NHTS[(curated_NHTS.WHYFROM == 2) | (curated_NHTS.WHYFROM == 3) | (curated_NHTS.WHYFROM == 4) | (curated_NHTS.WHYFROM == 5) |
                              (curated_NHTS.WHYTO == 2) | (curated_NHTS.WHYTO == 3) | (curated_NHTS.WHYTO == 4) | (curated_NHTS.WHYTO == 5)]

In [43]:
fig = px.histogram(worker_windows, x='START_TIME', 
                  #  nbins=48, 
                   text_auto='.1f',
                   histnorm='percent'
                   )

fig.add_vrect(x0=datetime(2022, 1, 1, 6, 00),    # 6:00 - 9:30
              x1=datetime(2022, 1, 1, 9, 30),
              annotation_text='Main window: ~3%',
              annotation_position='top left',
              fillcolor="green", opacity=0.2, line_width=0)

fig.add_vrect(x0=datetime(2022, 1, 1, 14, 30),   # 14:30 - 18:30
              x1=datetime(2022, 1, 1, 18, 30),
              annotation_text='Main window: ~3%',
              annotation_position='top left',
              fillcolor="green", opacity=0.2, line_width=0)

fig.update_xaxes(title='Start time',
                 range=[datetime(2022, 1, 1, 0, 0), datetime(2022, 1, 1, 23, 0)],
                 showgrid=True,
                 tickformat="%H:%M",
                 dtick=3600*1000    # dtick is in miliseconds
                 )

fig.update_layout(width=1200,
                  height=500,
                  template='simple_white',
                  bargap=0.05,
                #   barmode='stack',
                  title={'text':"NHTS 2022 Trips to/from work (inc. work from home) - functioning windows",
                         'x':0.5,
                         'y':0.95},
                #   legend=dict(orientation='v',
                #               y=1,
                #               x=0.85)
                )

fig.show()

### Student occupation

In [44]:
student_windows = curated_NHTS[(curated_NHTS.WHYFROM == 6) |
                              (curated_NHTS.WHYTO == 6)]

In [45]:
student_windows.shape

(234, 37)

In [46]:
fig = px.histogram(student_windows, x='START_TIME', 
                   nbins=48, 
                   text_auto='.1f',
                   histnorm='percent'
                   )

fig.add_vrect(x0=datetime(2022, 1, 1, 6, 30),   # 6:30 - 9:30
              x1=datetime(2022, 1, 1, 9, 30),
              annotation_text='Main window: ~3%',
              annotation_position='top left',
              fillcolor="green", opacity=0.2, line_width=0)

fig.add_vrect(x0=datetime(2022, 1, 1, 14, 0),   # 14:00 - 16:00
              x1=datetime(2022, 1, 1, 16, 0),
              annotation_text='Main window: ~3%',
              annotation_position='top left',
              fillcolor="green", opacity=0.2, line_width=0)

fig.update_xaxes(title='Start time',
                 range=[datetime(2022, 1, 1, 0, 0), datetime(2022, 1, 1, 23, 0)],
                 showgrid=True,
                 tickformat="%H:%M",
                 dtick=3600*1000    # dtick is in miliseconds
                 )

fig.update_layout(width=1200,
                  height=500,
                  template='simple_white',
                  bargap=0.05,
                #   barmode='stack',
                  title={'text':"NHTS 2022 Trips to/from school as a student - functioning windows",
                         'x':0.5,
                         'y':0.95},
                #   legend=dict(orientation='v',
                #               y=1,
                #               x=0.85)
                )

fig.show()

### No occupation - (not worker nor student)

In [47]:
inactive_windows = curated_NHTS[(curated_NHTS.WHYFROM != 2) & (curated_NHTS.WHYFROM != 3) & (curated_NHTS.WHYFROM != 4) & (curated_NHTS.WHYFROM != 5) & (curated_NHTS.WHYFROM != 6) &
                                (curated_NHTS.WHYTO != 2) & (curated_NHTS.WHYTO != 3) & (curated_NHTS.WHYTO != 4) & (curated_NHTS.WHYTO != 5) & (curated_NHTS.WHYTO != 6)]

In [48]:
fig = px.histogram(inactive_windows, x='START_TIME', 
                   nbins=48, 
                   text_auto='.1f',
                   histnorm='percent'
                   )

fig.add_vrect(x0=datetime(2022, 1, 1, 7, 30),   # 7:30 - 19:30
              x1=datetime(2022, 1, 1, 19, 30),
              annotation_text='Main window: +2%',
              annotation_position='top left',
              fillcolor="green", opacity=0.2, line_width=0)

fig.update_xaxes(title='Start time',
                 range=[datetime(2022, 1, 1, 0, 0), datetime(2022, 1, 1, 23, 0)],
                 showgrid=True,
                 tickformat="%H:%M",
                 dtick=3600*1000    # dtick is in miliseconds
                 )

fig.update_layout(width=1200,
                  height=500,
                  template='simple_white',
                  bargap=0.05,
                #   barmode='stack',
                  title={'text':"NHTS 2022 Trips to/from any activity except work or school - functioning windows",
                         'x':0.5,
                         'y':0.95},
                #   legend=dict(orientation='v',
                #               y=1,
                #               x=0.85)
                )

fig.show()

In [71]:
from plotly.subplots import make_subplots

fig = make_subplots(rows=3, cols=1,
                    shared_xaxes=True,
                    vertical_spacing=0.03)

fig.add_trace(
    go.Histogram(x=worker_windows['START_TIME'],
                 nbinsx=48, 
                 texttemplate='%{y:.1f}',
                 histnorm='percent',
                 name='Workers'), col=1, row=1
)

fig.add_vrect(x0=datetime(2022, 1, 1, 6, 30),    # 6:30 - 9:30
              x1=datetime(2022, 1, 1, 9, 30),
              row=1,
              annotation_text='Main window: +3%',
              annotation_position='top left',
              fillcolor="green", opacity=0.2, line_width=0)

fig.add_vrect(x0=datetime(2022, 1, 1, 14, 30),   # 14:30 - 18:30
              x1=datetime(2022, 1, 1, 18, 30),
              row=1,
              annotation_text='Main window: +3%',
              annotation_position='top left',
              fillcolor="green", opacity=0.2, line_width=0)

##################

fig.add_trace(
    go.Histogram(x=student_windows['START_TIME'],
                 nbinsx=48, 
                 texttemplate='%{y:.1f}',
                 histnorm='percent',
                 name='Students'), col=1, row=2
)

fig.add_vrect(x0=datetime(2022, 1, 1, 6, 30),   # 6:30 - 9:30
              x1=datetime(2022, 1, 1, 9, 30),
              row=2,
              annotation_text='Main window: +3%',
              annotation_position='top left',
              fillcolor="green", opacity=0.2, line_width=0)

fig.add_vrect(x0=datetime(2022, 1, 1, 14, 0),   # 14:00 - 16:00
              x1=datetime(2022, 1, 1, 16, 0),
              row=2,
              annotation_text='Main window: +3%',
              annotation_position='top left',
              fillcolor="green", opacity=0.2, line_width=0)

#################

fig.add_trace(
    go.Histogram(x=inactive_windows['START_TIME'],
                 nbinsx=48, 
                 texttemplate='%{y:.1f}',
                 histnorm='percent',
                 name='Inactive'), col=1, row=3
)

fig.add_vrect(x0=datetime(2022, 1, 1, 9, 00),   # 9:30 - 18:30
              x1=datetime(2022, 1, 1, 18, 30),
              row=3,
              annotation_text='Main window: +3%',
              annotation_position='top right',
              fillcolor="green", opacity=0.2, line_width=0)


fig.update_xaxes(title='Start time',
                 range=[datetime(2022, 1, 1, 0, 0), datetime(2022, 1, 1, 23, 0)],
                 showgrid=True,
                 tickformat="%H:%M",
                 dtick=3600*1000    # dtick is in miliseconds
                 )

fig.update_yaxes(title='Relative freq. (%)',
                 )

fig.update_layout(width=1100,
                  height=800,
                  template='simple_white',
                  bargap=0.05,
                #   barmode='stack',
                  title={'text':"NHTS 2022 Trips to/from activity from occupation - functioning windows",
                         'x':0.5,
                         'y':0.95},
                  legend=dict(orientation='v',
                              y=1.1,
                              x=0.9),
                  font=dict(
                       # family="Courier New, monospace",
                       size=13)
                )

fig.show()

## Share of occupations with EVs

In [50]:
worker_windows[(worker_windows.VEHFUEL == 4) | (worker_windows.VEHFUEL == 5)].shape

(179, 37)

In [51]:
student_windows[(student_windows.VEHFUEL == 4) | (student_windows.VEHFUEL == 5)].shape

(0, 37)

In [52]:
inactive_windows[(inactive_windows.VEHFUEL == 4) | (inactive_windows.VEHFUEL == 5)].shape

(384, 37)

## Share of occupations

In [53]:
worker_windows.shape

(5870, 37)

In [54]:
student_windows.shape

(234, 37)

In [55]:
inactive_windows.shape

(13727, 37)

In [56]:
curated_NHTS[(curated_NHTS.START_TIME >= datetime(2022, 1, 1, 6, 0)) & (curated_NHTS.START_TIME <= datetime(2022, 1, 1, 8, 59))]

Unnamed: 0,HOUSEID,PERSONID,TRIPID,FRSTHM,TRAVDAY,LOOP_TRIP,DWELTIME,WHYFROM,WHYTRP1S,TRVLCMIN,...,PROXY,VEHTYPE,AVGSPEED,VEHFUEL,START_TIME,MIDTIME,INTTIME,INTTIME_END,WEEKDAY,PURPOSE
2,9000013002,2,1,1,1,2,10,1,80,30,...,2,3,34.154133,1,2022-01-01 07:00:00,2022-01-01 07:15:00,7,7,sunday,personal
3,9000013002,2,2,1,1,2,10,12,80,10,...,2,3,28.489745,1,2022-01-01 07:40:00,2022-01-01 07:45:00,7,7,sunday,personal
4,9000013002,2,3,1,1,2,-9,12,1,30,...,2,3,28.197638,1,2022-01-01 08:00:00,2022-01-01 08:15:00,8,8,sunday,personal
26,9000013099,1,1,1,4,2,4,1,80,8,...,1,1,27.781231,5,2022-01-01 08:30:00,2022-01-01 08:34:00,8,8,weekday,personal
27,9000013099,1,2,1,4,2,-9,12,1,8,...,1,1,22.597887,5,2022-01-01 08:42:00,2022-01-01 08:46:00,8,8,weekday,personal
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19785,9000217878,1,1,1,6,2,0,1,70,8,...,1,3,21.633002,1,2022-01-01 07:20:00,2022-01-01 07:24:00,7,7,weekday,personal
19786,9000217878,1,2,1,6,2,455,10,1,7,...,1,3,24.547634,1,2022-01-01 07:28:00,2022-01-01 07:31:30,7,7,weekday,personal
19806,9000217982,1,2,1,2,2,420,10,1,15,...,1,3,32.174021,1,2022-01-01 06:45:00,2022-01-01 06:52:30,6,7,weekday,personal
19809,9000217992,1,1,1,4,2,520,1,10,30,...,1,3,29.767557,1,2022-01-01 07:15:00,2022-01-01 07:30:00,7,7,weekday,occupation


In [57]:
def charge_prob(SOC):
    
    k = 15
    per_SOC = 0.53
    
    p = 1-1/(1+np.exp(-k*(SOC-per_SOC)))
       
    return p

x = np.arange(0, 1, 0.01)

In [78]:
fig = px.line(x=x, y=charge_prob(x))

fig.add_vline(x=0.25,
              line_dash='dash',
              line_width=2,
              line_color='red',
              annotation_position='top left',
              annotation_text='Min SOC allowed')

fig.add_vline(x=0.8,
              line_dash='dash',
              line_width=2,
              line_color='green',
              annotation_position='top left',
              annotation_text='Charging ends at')

fig.update_xaxes(title='SOC',
                 showgrid=True)

fig.update_yaxes(title='p of charging',
                 showgrid=True)

fig.update_layout(width=700,
                  height=500,
                  template='plotly_white',
                  title={'text':"Logistic curve - models the prob. of charging based on the SOC of the EV",
                         'x':0.5,
                         'y':0.97},
                     font=dict(
                       # family="Courier New, monospace",
                       size=13)
                )

fig.show()

In [59]:
# curated_NHTS.AVGSPEED.mean()

curated_NHTS.AVGSPEED.std()


14.179519795728279