# SCOG Trip Generation

## Compare trip rates to NCHRP 365 and 716

michael.mccarthy@rsginc.com


In [2]:
# setup
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt


In [3]:
def freqPlot(df, var, query=None):
    fig, axes = plt.subplots(nrows=1, ncols=1, figsize=(5, 5))

    if query != None:
        data = df.query(query)
    else: 
        data = df
    
    counts = data[var].value_counts()
    axes.bar(counts.index, counts.values)
    axes.set_title(var)
    axes.set_xlabel(var)
    axes.set_ylabel('Frequency')

    plt.tight_layout()
    plt.show()

# from Bishoy
def GetTripRatesUsingOneVars(hh_df, trips_df, x, groups, cat, *args, **kwargs):
    """
    Calculate trip rates per cross-classified hhh
    
    Parameters:
    HTS_df: trip records joined to hh + persons
    x: x-class variable no. 1

    groups: trip purpose, i.e., hbs, hbw, ... etc.!
    cat: usually triprate (continous variable), but if trip, we will report the absolute number of trips (discrete)
    """
    HH_PivotTable = hh_df.groupby([x])['hh_weight'].sum().reset_index()
    if groups != "all":
        trips_df = trips_df[trips_df["model_purpose"] == groups].copy()
    else:
        trips_df = trips_df.copy()

    # Finally, pivot the trips using the defined x and y by summing up the trip_weight
    Trip_PivotTable = trips_df.groupby([x])['trip_weight'].sum().reset_index()
    
    if cat == "trip":
        result_table=pd.merge(HH_PivotTable, Trip_PivotTable, left_on=x, right_on=x, how='inner')

    else:
        result_table=pd.merge(HH_PivotTable, Trip_PivotTable, left_on=x, right_on=x, how='inner')
        result_table['trips_rate'] = result_table['trip_weight']/result_table['hh_weight']
    result_table=result_table.fillna(0)
        
    return result_table

def GetTripRatesUsingTwoVars(hh_df, trips_df, x, y, groups, cat, *args, **kwargs):
    """
    Calculate trip rates per cross-classified hhh
    
    Parameters:
    HTS_df: trip records joined to hh + persons
    x: x-class variable no. 1
    y: x-class variable no. 2, if any!
    groups: trip purpose, i.e., hbs, hbw, ... etc.!
    cat: usually triprate (continous variable), but if trip, we will report the absolute number of trips (discrete)
    """
   
    HH_PivotTable = pd.pivot_table(hh_df, values="hh_weight", index=[y],
                        columns=[x], aggfunc=np.sum)     # population
    # Filter out the specific trip purpose: ['nhb', 'hbo', 'hbr', 'hbw', 'hbsc', 'hbc']
    if groups != "all":
        trips_df = trips_df[trips_df["model_purpose"] == groups].copy()
    else:
        trips_df = trips_df.copy()

    # Finally, pivot the trips using the defined x and y by summing up the trip_weight
    dta = "trip_weight"
    Trip_PivotTable = pd.pivot_table(trips_df, values=dta, index=[y],
                    columns=[x], aggfunc=np.sum)     # population

    if cat == "trip":
        result_table = Trip_PivotTable
    else:
        result_table = Trip_PivotTable/HH_PivotTable    # get the rates by dividing by the number of hh's in the respective market segment
    result_table=result_table.fillna(0)
    return result_table

# michael
def tripQA(trips_df, lookup_pairs, qa_queries):
    """
        Method for decoding survey codes (trip purpose and mode) and run queries (check reported trip duration/speed) and export sample for QA 

        Parameters:
        trips_df: trips dataframe
        decode: dict of column, lookup pairs, such as {"o_purpose_category": purpose_lookup_dict}

    """

    for dfcol, lookup in lookup_pairs.items():
        trips_df[dfcol+"_decode"] = trips_df[dfcol].map(lookup)

    for dfcol, query in qa_queries.items():
        trips_df[dfcol+"_QA"] = trips_df.eval(query) # return True/False column

    return trips_df

def htsRecordsbyTwoVars(trips_df, count_field, x, y, groups):
     # If counting HH records, pass count_field="hh_id" and groups="all"
     # Filter out the specific trip purpose: ['nhb', 'hbo', 'hbr', 'hbw', 'hbsc', 'hbc']
    if groups != "all":
        trips_df = trips_df[trips_df["model_purpose"] == groups].copy()
    else:
        trips_df = trips_df.copy()

    Trip_PivotTable = pd.pivot_table(trips_df, values=count_field, index=[y],
                    columns=[x], aggfunc='count')
    
    result_table=Trip_PivotTable.fillna(0)
    return result_table

In [4]:
# read in survey tables: households and trips

#trips = pd.read_csv('data/trip.csv')
hh = pd.read_csv('data/hh.csv')
persons = pd.read_csv('data/person.csv')
days = pd.read_csv('data/day.csv')

trips = pd.read_csv('SCOG_HTS_trips.csv')

In [5]:
# And only hh with hh_id in the accounted for trips should be considered
hh = hh[hh['hh_id'].isin(trips['hh_id'].unique())]

In [6]:
#Get specific variables for HH characteristics
# NCHRP 365 1-5+ persons
# NCHRP 716 1-5+ persons, 0-3+ workers, 0-3+ autos
hh["hh_size"] = np.where(hh["num_people"]>=5, 5, hh["num_people"])
hh["w_size"] = np.where(hh["num_workers"]>=3, 3, hh["num_workers"])
hh["v_size"] = np.where(hh["num_vehicles"]>=3, 3, hh["num_vehicles"])


In [7]:
# TAZ income category: INC1 < 25k; INC2 25-50k; INC3 50-75k; INC4 >75k
# income_broad	1	Under $25,000
# income_broad	2	$25,000-$49,999
# income_broad	3	$50,000-$74,999
# income_broad	4	$75,000-$99,999
# income_broad	5	$100,000-$199,999
# income_broad	6	$200,000 or more
# income_broad	999	Prefer not to answer


income_cat = [
    (hh['income_broad'] == 999),
    (hh['income_broad'] == 1),
    (hh['income_broad'] == 2),
    (hh['income_broad'] == 3),
    (hh['income_broad'] >= 4)
]
income_cat_labels = ['NR','INC1','INC2','INC3','INC4'] # using NR for no response
hh["hh_inc"] = np.select(income_cat, income_cat_labels, default='NR')

hh["hh_inc"].unique()

array(['INC3', 'INC2', 'INC1', 'INC4', 'NR'], dtype=object)

## Trip Purposes

In [8]:
# merge into 3 purpose: HBW, HBO, NHB
trips['model_purpose'] = np.where((trips['model_purpose'] == 'NHBW') | (trips['model_purpose'] == 'NHBO'),'NHB',trips['model_purpose'])

In [9]:
trips['model_purpose'].value_counts()

model_purpose
HBO     3058
NHB     1997
HBW      527
Loop      12
Name: count, dtype: int64

In [10]:
# trips by market segment and day of week
markettab = trips.pivot_table(index='market_segment',columns='travel_dow',values='trip_weight', aggfunc='sum')
markettab

# markettab.to_clipboard()

travel_dow,2,3,4
market_segment,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
II,118149.313949,106209.892332,101222.416258


In [11]:
trips = trips[(trips['market_segment']=='II') & (trips['travel_dow'].isin([2, 3, 4]))]
len(trips)

5594

## Prepare Trip Rates by HH Category

In [12]:
# drop HH classifications from intial trips dataset and merge
trips.drop(['hh_size', 'w_size'], axis=1, inplace=True)

trip_join = trips.merge(hh, right_on="hh_id", left_on="hh_id", how="left")

In [13]:
# NCHRP 365 Total trip rate by HH Size
daily_trip_rates = GetTripRatesUsingOneVars(hh_df = hh, trips_df=trip_join,
            x = "hh_size" , groups = "all", cat ="triprate")
daily_trip_rates

Unnamed: 0,hh_size,hh_weight,trip_weight,trips_rate
0,1,9230.158689,45224.49139,4.899644
1,2,17002.982012,117888.752165,6.933416
2,3,4681.874091,41415.731686,8.845973
3,4,5088.573018,61186.0782,12.024211
4,5,3893.536452,59866.569099,15.375885


In [14]:
# NCHRP 365 Total trips by HH size and Purpose (for % by purpose)
daily_trip_purpose = pd.pivot_table(trip_join, values='trip_weight', index=['hh_size'],
                    columns=['model_purpose'], aggfunc=np.sum)
daily_trip_purpose

  daily_trip_purpose = pd.pivot_table(trip_join, values='trip_weight', index=['hh_size'],


model_purpose,HBO,HBW,Loop,NHB
hh_size,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,21027.263969,3618.498397,136.967221,20441.761803
2,57883.208894,12921.978172,50.958844,47032.606255
3,16059.131488,7414.917407,164.567448,17777.115344
4,33917.338329,5151.688676,341.204151,21775.847045
5,36558.443937,3701.448254,,19606.676908


In [15]:
# NCHRP 365 Total trips per HH
tot_triprate = trip_join['trip_weight'].sum() / hh['hh_weight'].sum() 
tot_triprate

np.float64(8.160528573476823)

In [16]:
# NCHRP 716 HBW by Workers and Autos
hbw2D_trip_rates_aw = GetTripRatesUsingTwoVars(hh_df = hh, trips_df=trip_join,
            x = "w_size" , y = "v_size", groups = "HBW", cat ="triprate")
hbw2D_trip_rates_aw

  HH_PivotTable = pd.pivot_table(hh_df, values="hh_weight", index=[y],
  Trip_PivotTable = pd.pivot_table(trips_df, values=dta, index=[y],


w_size,0,1,2,3
v_size,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,0.0,2.0,4.666667,0.0
1,0.0,0.637502,0.124155,4.125521
2,0.0,0.78234,1.50312,2.101111
3,0.0,0.431104,1.568854,2.279537


In [17]:
# NCHRP 716 HBW by Workers
hbw1D_trip_rates_w = GetTripRatesUsingOneVars(hh_df = hh, trips_df=trip_join,
            x = "w_size", groups = "HBW", cat ="triprate")
hbw1D_trip_rates_w

Unnamed: 0,w_size,hh_weight,trip_weight,trips_rate
0,1,13090.527935,8590.358005,0.656227
1,2,12286.788283,17985.581819,1.463815
2,3,2200.86143,6232.591082,2.831887


In [18]:
# NCHRP 716 HBO by Workers and HH Size
hbo2D_trip_rates_wh = GetTripRatesUsingTwoVars(hh_df = hh, trips_df=trip_join,
            x = "hh_size" , y = "w_size", groups = "HBO", cat ="triprate")
hbo2D_trip_rates_wh

  HH_PivotTable = pd.pivot_table(hh_df, values="hh_weight", index=[y],
  Trip_PivotTable = pd.pivot_table(trips_df, values=dta, index=[y],


hh_size,1,2,3,4,5
w_size,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,2.798838,4.564784,6.201858,9.608705,4.0
1,1.645952,3.394797,5.66187,9.155361,9.418676
2,0.0,2.253964,3.122636,5.493946,9.85513
3,0.0,0.0,1.975433,0.0,10.975729


In [19]:
# NCHRP 716 HBO by Workers
hbo1D_trip_rates_w = GetTripRatesUsingOneVars(hh_df = hh, trips_df=trip_join,
            x = "w_size", groups = "HBO", cat ="triprate")
hbo1D_trip_rates_w

Unnamed: 0,w_size,hh_weight,trip_weight,trips_rate
0,0,12318.946614,48750.081446,3.957325
1,1,13090.527935,63066.75978,4.81774
2,2,12286.788283,46980.630435,3.823671
3,3,2200.86143,6647.914955,3.020597


In [20]:
# NCHRP 716 HBO by HH Size
hbo1D_trip_rates_h = GetTripRatesUsingOneVars(hh_df = hh, trips_df=trip_join,
            x = "hh_size", groups = "HBO", cat ="triprate")
hbo1D_trip_rates_h

Unnamed: 0,hh_size,hh_weight,trip_weight,trips_rate
0,1,9230.158689,21027.263969,2.278104
1,2,17002.982012,57883.208894,3.404297
2,3,4681.874091,16059.131488,3.430065
3,4,5088.573018,33917.338329,6.665393
4,5,3893.536452,36558.443937,9.389521


In [21]:
# NCHRP 716 HBO by Vehicles and HH Size
hbo2D_trip_rates_vh = GetTripRatesUsingTwoVars(hh_df = hh, trips_df=trip_join,
            x = "hh_size" , y = "v_size", groups = "HBO", cat ="triprate")
hbo2D_trip_rates_vh

  HH_PivotTable = pd.pivot_table(hh_df, values="hh_weight", index=[y],
  Trip_PivotTable = pd.pivot_table(trips_df, values=dta, index=[y],


hh_size,1,2,3,4,5
v_size,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,1.480838,3.666667,0.333333,0.0,0.0
1,2.386815,4.780718,3.80103,3.533538,6.300972
2,2.104055,3.24801,4.204322,7.033041,7.807256
3,1.874758,3.079849,2.609421,6.876337,12.79524


In [22]:
# NCHRP 716 HBO by Vehicles
hbo1D_trip_rates_v = GetTripRatesUsingOneVars(hh_df = hh, trips_df=trip_join,
            x = "v_size", groups = "HBO", cat ="triprate")
hbo1D_trip_rates_v

Unnamed: 0,v_size,hh_weight,trip_weight,trips_rate
0,0,125.702128,192.56431,1.53191
1,1,10790.357526,34732.055935,3.218805
2,2,16031.87786,71536.076577,4.462115
3,3,12949.186748,58984.689794,4.555088


In [23]:
# NCHRP 716 NHB by Workers and HH Size
nhb2D_trip_rates_wh = GetTripRatesUsingTwoVars(hh_df = hh, trips_df=trip_join,
            x = "hh_size" , y = "w_size", groups = "NHB", cat ="triprate")
nhb2D_trip_rates_wh

  HH_PivotTable = pd.pivot_table(hh_df, values="hh_weight", index=[y],
  Trip_PivotTable = pd.pivot_table(trips_df, values=dta, index=[y],


hh_size,1,2,3,4,5
w_size,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,2.470662,2.700375,2.982175,4.583387,2.0
1,1.903906,2.252453,3.224555,5.643823,5.411596
2,0.0,3.150189,2.84497,3.408436,5.659239
3,0.0,0.0,5.336044,4.20985,2.0


In [24]:
# NCHRP 716 NHB by Workers
nhb1D_trip_rates_w = GetTripRatesUsingOneVars(hh_df = hh, trips_df=trip_join,
            x = "w_size", groups = "NHB", cat ="triprate")
nhb1D_trip_rates_w

Unnamed: 0,w_size,hh_weight,trip_weight,trips_rate
0,0,12318.946614,32415.365248,2.631342
1,1,13090.527935,42113.831696,3.217122
2,2,12286.788283,41675.250733,3.391875
3,3,2200.86143,10429.559677,4.738853


In [25]:
# NCHRP 716 NHB by HH Size
nhb1D_trip_rates_h = GetTripRatesUsingOneVars(hh_df = hh, trips_df=trip_join,
            x = "hh_size", groups = "NHB", cat ="triprate")
nhb1D_trip_rates_h

Unnamed: 0,hh_size,hh_weight,trip_weight,trips_rate
0,1,9230.158689,20441.761803,2.214671
1,2,17002.982012,47032.606255,2.766139
2,3,4681.874091,17777.115344,3.797008
3,4,5088.573018,21775.847045,4.279362
4,5,3893.536452,19606.676908,5.035699


In [26]:
# NCHRP 716 NHB by Vehicles and HH Size
nhb2D_trip_rates_vh = GetTripRatesUsingTwoVars(hh_df = hh, trips_df=trip_join,
            x = "hh_size" , y = "v_size", groups = "NHB", cat ="triprate")
nhb2D_trip_rates_vh

  HH_PivotTable = pd.pivot_table(hh_df, values="hh_weight", index=[y],
  Trip_PivotTable = pd.pivot_table(trips_df, values=dta, index=[y],


hh_size,1,2,3,4,5
v_size,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,3.697666,1.666667,0.333333,0.0,0.0
1,2.3222,2.189347,4.15484,2.900218,2.0
2,2.165785,2.647305,2.573129,4.455878,4.274112
3,1.104875,3.110212,4.572429,4.355331,6.94914


In [27]:
# NCHRP 716 NHB by Vehicles
nhb1D_trip_rates_v = GetTripRatesUsingOneVars(hh_df = hh, trips_df=trip_join,
            x = "v_size", groups = "NHB", cat ="triprate")
nhb1D_trip_rates_v.to_clipboard()