# SCOG PA to OD Factors

## get PA/AP directional factors by Purpose and Time of Day

michael.mccarthy@rsginc.com


In [1]:
# setup
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
import geopandas as gpd
import openmatrix as omx


In [2]:
def freqPlot(df, var, query=None):
    fig, axes = plt.subplots(nrows=1, ncols=1, figsize=(5, 5))

    if query != None:
        data = df.query(query)
    else: 
        data = df
    
    counts = data[var].value_counts()
    axes.bar(counts.index, counts.values)
    axes.set_title(var)
    axes.set_xlabel(var)
    axes.set_ylabel('Frequency')

    plt.tight_layout()
    plt.show()

# from Bishoy
def GetTripRatesUsingOneVars(hh_df, trips_df, x, groups, cat, *args, **kwargs):
    """
    Calculate trip rates per cross-classified hhh
    
    Parameters:
    HTS_df: trip records joined to hh + persons
    x: x-class variable no. 1

    groups: trip purpose, i.e., hbs, hbw, ... etc.!
    cat: usually triprate (continous variable), but if trip, we will report the absolute number of trips (discrete)
    """
    HH_PivotTable = hh_df.groupby([x])['hh_weight'].sum().reset_index()
    if groups != "all":
        trips_df = trips_df[trips_df["model_purpose"] == groups].copy()
    else:
        trips_df = trips_df.copy()

    # Finally, pivot the trips using the defined x and y by summing up the trip_weight
    Trip_PivotTable = trips_df.groupby([x])['trip_weight'].sum().reset_index()
    
    if cat == "trip":
        result_table=pd.merge(HH_PivotTable, Trip_PivotTable, left_on=x, right_on=x, how='inner')

    else:
        result_table=pd.merge(HH_PivotTable, Trip_PivotTable, left_on=x, right_on=x, how='inner')
        result_table['trips_rate'] = result_table['trip_weight']/result_table['hh_weight']
    result_table=result_table.fillna(0)
        
    return result_table

def GetTripRatesUsingTwoVars(hh_df, trips_df, x, y, groups, cat, *args, **kwargs):
    """
    Calculate trip rates per cross-classified hhh
    
    Parameters:
    HTS_df: trip records joined to hh + persons
    x: x-class variable no. 1
    y: x-class variable no. 2, if any!
    groups: trip purpose, i.e., hbs, hbw, ... etc.!
    cat: usually triprate (continous variable), but if trip, we will report the absolute number of trips (discrete)
    """
   
    HH_PivotTable = pd.pivot_table(hh_df, values="hh_weight", index=[y],
                        columns=[x], aggfunc=np.sum)     # population
    # Filter out the specific trip purpose: ['nhb', 'hbo', 'hbr', 'hbw', 'hbsc', 'hbc']
    if groups != "all":
        trips_df = trips_df[trips_df["model_purpose"] == groups].copy()
    else:
        trips_df = trips_df.copy()

    # Finally, pivot the trips using the defined x and y by summing up the trip_weight
    dta = "trip_weight"
    Trip_PivotTable = pd.pivot_table(trips_df, values=dta, index=[y],
                    columns=[x], aggfunc=np.sum)     # population

    if cat == "trip":
        result_table = Trip_PivotTable
    else:
        result_table = Trip_PivotTable/HH_PivotTable    # get the rates by dividing by the number of hh's in the respective market segment
    result_table=result_table.fillna(0)
    return result_table

def GetTripRatesUsingOneVars_Unweighted(hh_df, trips_df, x, groups, cat, *args, **kwargs):
    """
    Calculate trip rates per cross-classified hhh
    
    Parameters:
    HTS_df: trip records joined to hh + persons
    x: x-class variable no. 1

    groups: trip purpose, i.e., hbs, hbw, ... etc.!
    cat: usually triprate (continous variable), but if trip, we will report the absolute number of trips (discrete)
    """
    HH_PivotTable = hh_df.groupby([x])['hh_id'].count().reset_index()
    if groups != "all":
        trips_df = trips_df[trips_df["trip_purpose"] == groups].copy()
    else:
        trips_df = trips_df.copy()

    # Finally, pivot the trips using the defined x and y by summing up the trip_weight
    Trip_PivotTable = trips_df.groupby([x])['trip_id'].count().reset_index()
    
    if cat == "trip":
        result_table=pd.merge(HH_PivotTable, Trip_PivotTable, left_on=x, right_on=x, how='inner')

    else:
        result_table=pd.merge(HH_PivotTable, Trip_PivotTable, left_on=x, right_on=x, how='inner')
        result_table['trips_rate'] = result_table['trip_id']/result_table['hh_id']
    result_table=result_table.fillna(0)
        
    return result_table

def tripQA(trips_df, lookup_pairs, qa_queries):
    """
        Method for decoding survey codes (trip purpose and mode) and run queries (check reported trip duration/speed) and export sample for QA 

        Parameters:
        trips_df: trips dataframe
        decode: dict of column, lookup pairs, such as {"o_purpose_category": purpose_lookup_dict}
            queries use numexpr, column names and operators passsed in one string, such as
            df.eval("(mode_type == 1 & speed_mph > 4) | (mode_type == 2 & speed_mph > 20) | (speed_mph > 70)"

    """

    for dfcol, lookup in lookup_pairs.items():
        trips_df[dfcol+"_decode"] = trips_df[dfcol].map(lookup)

    for dfcol, query in qa_queries.items():
        trips_df[dfcol+"_QA"] = trips_df.eval(query) # return True/False column

    return trips_df

def omxtoDataframe(thismatrix,indexmap,corestr):
    df = pd.DataFrame(thismatrix, columns=indexmap.keys(), index=indexmap.keys()).reset_index().melt(id_vars='index').rename(columns = {'index':'origin', 'variable':'destination', 'value':corestr})
    return df

In [3]:
# read in survey trips dataset

survey_trips = pd.read_csv('SCOG_HTS_trips.csv')


In [4]:
nhb_trips = survey_trips.copy()

# recode NHB cat
nhb_trips.loc[nhb_trips['model_purpose'].isin(['NHBO','NHBW']),'model_purpose'] = 'NHB'

In [6]:
# code time of day
def codePeriods(data,todPeriods,todStart,todVar):
    # data.loc[:, 'start_hour'] = data['trip_start_time'].str[:2].astype(int)
    # start HH:MM in hours, plus half of duration in hours 
    data.loc[:, 'mid_hour'] = np.floor(data['depart_hour'].astype(float) + (data['depart_minute'].astype(float) / 60) + ((data['duration_minutes']/2)/60))
    data.loc[:, todVar] = ''
    for i in range(len(todPeriods)):
        data.loc[:, todVar] = np.where(data['mid_hour'] >= todStart[i],todPeriods[i],data[todVar])

todPeriods = ["OP","AM","OP","PM","OP"] # Names of TOD periods
todStart = [0,7,9,14,17]
codePeriods(nhb_trips,todPeriods,todStart,'time_period')

In [11]:
# flag peak hour (4 PM - 1600)
nhb_trips['mid_hour'] = np.floor(nhb_trips['depart_hour'].astype(float) + (nhb_trips['depart_minute'].astype(float) / 60) + ((nhb_trips['duration_minutes']/2)/60))
nhb_trips.loc[nhb_trips['depart_hour'] == 16,'peak_hour'] = 1

In [12]:
trips_by_purp = nhb_trips.groupby(['time_period','model_purpose'])['trip_weight'].sum().reset_index()
trips_by_dir = nhb_trips.groupby(['time_period','model_purpose','model_direction'])['trip_weight'].sum().reset_index()
trips_PA_PM_Peak = nhb_trips[nhb_trips['peak_hour'] == 1].groupby(['model_purpose','model_direction'])['trip_weight'].sum().reset_index()
trips_by_purp.to_csv('weighted_trips_TOD_Purpose.csv')
trips_by_dir.to_csv('weighted_trips_TOD_Purpose_PA_AP.csv')
trips_PA_PM_Peak.to_csv('weighted_trips_PMPeak_Purpose_PA_AP.csv')