This code processes NHTS OD data to calculate the internal traffic within counties and the inflow of external traffic.

In [None]:
import geopandas as gpd
import pandas as pd
import numpy as np

# Define input file paths
input_files = {
    'od_data': './Data/GDOT_2019_09.csv',  # NHTS Origin-Destination data file  
    'county_boundaries': './Georgia_county.shp',  # Georgia county boundary file from Census
}

# Load OD data from CSV
od_df = pd.read_csv(input_files['od_data'])

# Filter rows where 'origin_zone_id' and 'destination_zone_id' have exactly 12 characters and group by zones
od_filtered_df = od_df[
    (od_df['origin_zone_id'].astype(str).str.len() == 12) & 
    (od_df['destination_zone_id'].astype(str).str.len() == 12)
]

# Group by 'origin_zone_id' and 'destination_zone_id', and sum 'mode_car'
od_grouped_df = od_filtered_df.groupby(
    ['origin_zone_id', 'destination_zone_id'], as_index=False
)['mode_car'].sum()

# Sort zones and remove duplicates by creating sorted pairs
od_grouped_df[['origin_zone_id', 'destination_zone_id']] = pd.DataFrame(
    np.sort(od_grouped_df[['origin_zone_id', 'destination_zone_id']], axis=1)
)

# Group by sorted pairs and sum the traffic
od_final_df = od_grouped_df.groupby(
    ['origin_zone_id', 'destination_zone_id'], as_index=False
)['mode_car'].sum()

# Load the county boundary shapefile
county_boundaries_gdf = gpd.read_file(input_files['county_boundaries']).to_crs(epsg=4326)

# Map from zone_id (GEOID) to county name based on 'NAMELSAD'
zone_to_county = county_boundaries_gdf.set_index('GEOID')['NAMELSAD'].to_dict()

# Map 'origin_zone_id' and 'destination_zone_id' to county names
od_final_df['origin_county'] = od_final_df['origin_zone_id'].map(zone_to_county)
od_final_df['destination_county'] = od_final_df['destination_zone_id'].map(zone_to_county)

# Remove rows where mapping failed or origin and destination are the same
od_final_df = od_final_df.dropna(subset=['origin_county', 'destination_county'])
od_final_df = od_final_df[od_final_df['origin_county'] != od_final_df['destination_county']]

# Group by the county pairs (sorted to ensure bi-directional traffic is combined)
od_final_df[['origin_county', 'destination_county']] = pd.DataFrame(
    np.sort(od_final_df[['origin_county', 'destination_county']], axis=1)
)

county_traffic_df = od_final_df.groupby(
    ['origin_county', 'destination_county'], as_index=False
)['mode_car'].sum()

# Rename column for clarity
county_traffic_df = county_traffic_df.rename(columns={'mode_car': 'traffic'})

# Display the results
print(county_traffic_df)