# SCOG Trip Distribution

## Tag survey trips by district

michael.mccarthy@rsginc.com


In [25]:
# setup
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
import geopandas as gpd
import openmatrix as omx
import os.path


In [26]:
def freqPlot(df, var, query=None):
    fig, axes = plt.subplots(nrows=1, ncols=1, figsize=(5, 5))

    if query != None:
        data = df.query(query)
    else: 
        data = df
    
    counts = data[var].value_counts()
    axes.bar(counts.index, counts.values)
    axes.set_title(var)
    axes.set_xlabel(var)
    axes.set_ylabel('Frequency')

    plt.tight_layout()
    plt.show()

# from Bishoy
def GetTripRatesUsingOneVars(hh_df, trips_df, x, groups, cat, *args, **kwargs):
    """
    Calculate trip rates per cross-classified hhh
    
    Parameters:
    HTS_df: trip records joined to hh + persons
    x: x-class variable no. 1

    groups: trip purpose, i.e., hbs, hbw, ... etc.!
    cat: usually triprate (continous variable), but if trip, we will report the absolute number of trips (discrete)
    """
    HH_PivotTable = hh_df.groupby([x])['hh_weight'].sum().reset_index()
    if groups != "all":
        trips_df = trips_df[trips_df["model_purpose"] == groups].copy()
    else:
        trips_df = trips_df.copy()

    # Finally, pivot the trips using the defined x and y by summing up the trip_weight
    Trip_PivotTable = trips_df.groupby([x])['trip_weight'].sum().reset_index()
    
    if cat == "trip":
        result_table=pd.merge(HH_PivotTable, Trip_PivotTable, left_on=x, right_on=x, how='inner')

    else:
        result_table=pd.merge(HH_PivotTable, Trip_PivotTable, left_on=x, right_on=x, how='inner')
        result_table['trips_rate'] = result_table['trip_weight']/result_table['hh_weight']
    result_table=result_table.fillna(0)
        
    return result_table

def GetTripRatesUsingTwoVars(hh_df, trips_df, x, y, groups, cat, *args, **kwargs):
    """
    Calculate trip rates per cross-classified hhh
    
    Parameters:
    HTS_df: trip records joined to hh + persons
    x: x-class variable no. 1
    y: x-class variable no. 2, if any!
    groups: trip purpose, i.e., hbs, hbw, ... etc.!
    cat: usually triprate (continous variable), but if trip, we will report the absolute number of trips (discrete)
    """
   
    HH_PivotTable = pd.pivot_table(hh_df, values="hh_weight", index=[y],
                        columns=[x], aggfunc=np.sum)     # population
    # Filter out the specific trip purpose: ['nhb', 'hbo', 'hbr', 'hbw', 'hbsc', 'hbc']
    if groups != "all":
        trips_df = trips_df[trips_df["model_purpose"] == groups].copy()
    else:
        trips_df = trips_df.copy()

    # Finally, pivot the trips using the defined x and y by summing up the trip_weight
    dta = "trip_weight"
    Trip_PivotTable = pd.pivot_table(trips_df, values=dta, index=[y],
                    columns=[x], aggfunc=np.sum)     # population

    if cat == "trip":
        result_table = Trip_PivotTable
    else:
        result_table = Trip_PivotTable/HH_PivotTable    # get the rates by dividing by the number of hh's in the respective market segment
    result_table=result_table.fillna(0)
    return result_table

def GetTripRatesUsingOneVars_Unweighted(hh_df, trips_df, x, groups, cat, *args, **kwargs):
    """
    Calculate trip rates per cross-classified hhh
    
    Parameters:
    HTS_df: trip records joined to hh + persons
    x: x-class variable no. 1

    groups: trip purpose, i.e., hbs, hbw, ... etc.!
    cat: usually triprate (continous variable), but if trip, we will report the absolute number of trips (discrete)
    """
    HH_PivotTable = hh_df.groupby([x])['hh_id'].count().reset_index()
    if groups != "all":
        trips_df = trips_df[trips_df["trip_purpose"] == groups].copy()
    else:
        trips_df = trips_df.copy()

    # Finally, pivot the trips using the defined x and y by summing up the trip_weight
    Trip_PivotTable = trips_df.groupby([x])['trip_id'].count().reset_index()
    
    if cat == "trip":
        result_table=pd.merge(HH_PivotTable, Trip_PivotTable, left_on=x, right_on=x, how='inner')

    else:
        result_table=pd.merge(HH_PivotTable, Trip_PivotTable, left_on=x, right_on=x, how='inner')
        result_table['trips_rate'] = result_table['trip_id']/result_table['hh_id']
    result_table=result_table.fillna(0)
        
    return result_table

def tripQA(trips_df, lookup_pairs, qa_queries):
    """
        Method for decoding survey codes (trip purpose and mode) and run queries (check reported trip duration/speed) and export sample for QA 

        Parameters:
        trips_df: trips dataframe
        decode: dict of column, lookup pairs, such as {"o_purpose_category": purpose_lookup_dict}
            queries use numexpr, column names and operators passsed in one string, such as
            df.eval("(mode_type == 1 & speed_mph > 4) | (mode_type == 2 & speed_mph > 20) | (speed_mph > 70)"

    """

    for dfcol, lookup in lookup_pairs.items():
        trips_df[dfcol+"_decode"] = trips_df[dfcol].map(lookup)

    for dfcol, query in qa_queries.items():
        trips_df[dfcol+"_QA"] = trips_df.eval(query) # return True/False column

    return trips_df

def omxtoDataframe(thismatrix,indexmap,corestr):
    df = pd.DataFrame(thismatrix, columns=indexmap.keys(), index=indexmap.keys()).reset_index().melt(id_vars='index').rename(columns = {'index':'origin', 'variable':'destination', 'value':corestr})
    return df

In [27]:
# read in survey trips dataset
survey_trips = pd.read_csv('SCOG_HTS_trips_toNewZones_Autos.csv')
survey_trips.loc[((survey_trips['mode_type'] == 8) & (survey_trips['driver'] == 1)),'driver_trip_weight'] = survey_trips['trip_weight']

projdir = r"C:\Users\michael.mccarthy\Resource Systems Group, Inc\Projects - 2024-Model & RTP Update-240130\3_Model\Data"
districts_path = os.path.join(projdir,'Passive Data 2019','TAZ2022_Districts.csv')
districts = pd.read_csv(districts_path)
districts = districts[['NO','zone_id']]
districts = districts.rename(columns={'zone_id':'District'})


  survey_trips = pd.read_csv('SCOG_HTS_trips_toNewZones_Autos.csv')


In [28]:
# join districts to origin zone
survey_trips = survey_trips.merge(districts, left_on='o_taz', right_on='NO')
survey_trips = survey_trips.rename(columns={'District':'o_district'})
survey_trips = survey_trips.merge(districts, left_on='d_taz', right_on='NO')
survey_trips = survey_trips.rename(columns={'District':'d_district'})
survey_trips = survey_trips.drop(columns=['NO_x','NO_y'])

In [29]:
# get district-district matrix
dist_mat = survey_trips.pivot_table(index='o_district',columns='d_district',values='driver_trip_weight',aggfunc='sum')
dist_mat.to_csv('survey_district_mtx.csv')

In [30]:
dist_HBW_mat = survey_trips[survey_trips['model_purpose'] == 'HBW'].pivot_table(index='o_district',columns='d_district',values='driver_trip_weight',aggfunc='sum')
dist_HBW_mat.to_csv('survey_district_HBW_mtx.csv')

In [31]:
dist_HBO_mat = survey_trips[survey_trips['model_purpose'] == 'HBO'].pivot_table(index='o_district',columns='d_district',values='driver_trip_weight',aggfunc='sum')
dist_HBO_mat.to_csv('survey_district_HBO_mtx.csv')

In [32]:
dist_NHB_mat = survey_trips[survey_trips['model_purpose'].isin(['NHBO','NHBW'])].pivot_table(index='o_district',columns='d_district',values='driver_trip_weight',aggfunc='sum')
dist_NHB_mat.to_csv('survey_district_NHB_mtx.csv')