# SCOG Trip Distribution

## Tag survey trips by district

michael.mccarthy@rsginc.com


In [7]:
# setup
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
import geopandas as gpd
import openmatrix as omx
import os.path


In [8]:
def freqPlot(df, var, query=None):
    fig, axes = plt.subplots(nrows=1, ncols=1, figsize=(5, 5))

    if query != None:
        data = df.query(query)
    else: 
        data = df
    
    counts = data[var].value_counts()
    axes.bar(counts.index, counts.values)
    axes.set_title(var)
    axes.set_xlabel(var)
    axes.set_ylabel('Frequency')

    plt.tight_layout()
    plt.show()

# from Bishoy
def GetTripRatesUsingOneVars(hh_df, trips_df, x, groups, cat, *args, **kwargs):
    """
    Calculate trip rates per cross-classified hhh
    
    Parameters:
    HTS_df: trip records joined to hh + persons
    x: x-class variable no. 1

    groups: trip purpose, i.e., hbs, hbw, ... etc.!
    cat: usually triprate (continous variable), but if trip, we will report the absolute number of trips (discrete)
    """
    HH_PivotTable = hh_df.groupby([x])['hh_weight'].sum().reset_index()
    if groups != "all":
        trips_df = trips_df[trips_df["model_purpose"] == groups].copy()
    else:
        trips_df = trips_df.copy()

    # Finally, pivot the trips using the defined x and y by summing up the trip_weight
    Trip_PivotTable = trips_df.groupby([x])['trip_weight'].sum().reset_index()
    
    if cat == "trip":
        result_table=pd.merge(HH_PivotTable, Trip_PivotTable, left_on=x, right_on=x, how='inner')

    else:
        result_table=pd.merge(HH_PivotTable, Trip_PivotTable, left_on=x, right_on=x, how='inner')
        result_table['trips_rate'] = result_table['trip_weight']/result_table['hh_weight']
    result_table=result_table.fillna(0)
        
    return result_table

def GetTripRatesUsingTwoVars(hh_df, trips_df, x, y, groups, cat, *args, **kwargs):
    """
    Calculate trip rates per cross-classified hhh
    
    Parameters:
    HTS_df: trip records joined to hh + persons
    x: x-class variable no. 1
    y: x-class variable no. 2, if any!
    groups: trip purpose, i.e., hbs, hbw, ... etc.!
    cat: usually triprate (continous variable), but if trip, we will report the absolute number of trips (discrete)
    """
   
    HH_PivotTable = pd.pivot_table(hh_df, values="hh_weight", index=[y],
                        columns=[x], aggfunc=np.sum)     # population
    # Filter out the specific trip purpose: ['nhb', 'hbo', 'hbr', 'hbw', 'hbsc', 'hbc']
    if groups != "all":
        trips_df = trips_df[trips_df["model_purpose"] == groups].copy()
    else:
        trips_df = trips_df.copy()

    # Finally, pivot the trips using the defined x and y by summing up the trip_weight
    dta = "trip_weight"
    Trip_PivotTable = pd.pivot_table(trips_df, values=dta, index=[y],
                    columns=[x], aggfunc=np.sum)     # population

    if cat == "trip":
        result_table = Trip_PivotTable
    else:
        result_table = Trip_PivotTable/HH_PivotTable    # get the rates by dividing by the number of hh's in the respective market segment
    result_table=result_table.fillna(0)
    return result_table

def GetTripRatesUsingOneVars_Unweighted(hh_df, trips_df, x, groups, cat, *args, **kwargs):
    """
    Calculate trip rates per cross-classified hhh
    
    Parameters:
    HTS_df: trip records joined to hh + persons
    x: x-class variable no. 1

    groups: trip purpose, i.e., hbs, hbw, ... etc.!
    cat: usually triprate (continous variable), but if trip, we will report the absolute number of trips (discrete)
    """
    HH_PivotTable = hh_df.groupby([x])['hh_id'].count().reset_index()
    if groups != "all":
        trips_df = trips_df[trips_df["trip_purpose"] == groups].copy()
    else:
        trips_df = trips_df.copy()

    # Finally, pivot the trips using the defined x and y by summing up the trip_weight
    Trip_PivotTable = trips_df.groupby([x])['trip_id'].count().reset_index()
    
    if cat == "trip":
        result_table=pd.merge(HH_PivotTable, Trip_PivotTable, left_on=x, right_on=x, how='inner')

    else:
        result_table=pd.merge(HH_PivotTable, Trip_PivotTable, left_on=x, right_on=x, how='inner')
        result_table['trips_rate'] = result_table['trip_id']/result_table['hh_id']
    result_table=result_table.fillna(0)
        
    return result_table

def tripQA(trips_df, lookup_pairs, qa_queries):
    """
        Method for decoding survey codes (trip purpose and mode) and run queries (check reported trip duration/speed) and export sample for QA 

        Parameters:
        trips_df: trips dataframe
        decode: dict of column, lookup pairs, such as {"o_purpose_category": purpose_lookup_dict}
            queries use numexpr, column names and operators passsed in one string, such as
            df.eval("(mode_type == 1 & speed_mph > 4) | (mode_type == 2 & speed_mph > 20) | (speed_mph > 70)"

    """

    for dfcol, lookup in lookup_pairs.items():
        trips_df[dfcol+"_decode"] = trips_df[dfcol].map(lookup)

    for dfcol, query in qa_queries.items():
        trips_df[dfcol+"_QA"] = trips_df.eval(query) # return True/False column

    return trips_df

def omxtoDataframe(thismatrix,indexmap,corestr):
    df = pd.DataFrame(thismatrix, columns=indexmap.keys(), index=indexmap.keys()).reset_index().melt(id_vars='index').rename(columns = {'index':'origin', 'variable':'destination', 'value':corestr})
    return df

In [9]:
# read in model demand matrices
projdir = r"C:\Users\michael.mccarthy\Resource Systems Group, Inc\Projects - 2024-Model & RTP Update-240130\3_Model\Tasks"
model_path = os.path.join(projdir,'6_Calibration','model_trips_pa.csv')
demand = pd.read_csv(model_path)

In [10]:
projdir = r"C:\Users\michael.mccarthy\Resource Systems Group, Inc\Projects - 2024-Model & RTP Update-240130\3_Model\Data"
districts_path = os.path.join(projdir,'Passive Data 2019','TAZ2022_Districts.csv')
districts = pd.read_csv(districts_path)
districts = districts[['NO','zone_id']]
districts = districts.rename(columns={'zone_id':'District'})

In [11]:
# join districts to TAZs
demand = demand.merge(districts, left_on='Origin', right_on='NO')
demand = demand.rename(columns={'District':'o_district'})
demand = demand.merge(districts, left_on='Destination', right_on='NO')
demand = demand.rename(columns={'District':'d_district'})
demand = demand.drop(columns=['NO_x','NO_y'])

In [12]:
# create district mapping, export from Visum does not work properly
# this didn't work either. so weird
# demand = omx.open_file(model_path,'a')
# demand.create_mapping('district2',districts['District'].tolist())
# demand.close()

In [13]:
# demand = omx.open_file(model_path)
# # 11 = HBW, 12 = HBO, 17 = NHB
# tazs = demand.mapping('NO')
# hbw = demand['11']
# hbo = demand['12']
# nhb = demand['17']
# # total = hbw + hbo + nhb
# 
# dist_HBW_mat = omxtoDataframe(hbw,tazs,'HBW')
# dist_HBO_mat = omxtoDataframe(hbo,tazs,'HBO')
# dist_NHB_mat = omxtoDataframe(nhb,tazs,'NHB')
# dist_total_mat = omxtoDataframe(total,districts,'Total')


In [14]:
# get district-district matrix
dist_HBW_mat = demand.pivot_table(index='o_district',columns='d_district',values='HBW',aggfunc='sum')
dist_HBW_mat.to_clipboard()
#dist_HBW_mat.to_csv('model_district_HBW_mtx.csv')

In [9]:
dist_HBO_mat = demand.pivot_table(index='o_district',columns='d_district',values='HBO',aggfunc='sum')
dist_HBO_mat.to_csv('model_district_HBO_mtx.csv')

In [10]:
dist_NHB_mat = demand.pivot_table(index='o_district',columns='d_district',values='NHB',aggfunc='sum')
dist_NHB_mat.to_csv('model_district_NHB_mtx.csv')