This code processes NHTS OD data to calculate the internal traffic within counties and the inflow of external traffic.

In [None]:
import geopandas as gpd
import pandas as pd

# Define input file paths
input_files = {
    'od_data': '.Data/GDOT_2019_09.csv',  # NHTS Origin-Destination data file  
    'county_boundaries': './Georgia_county.shp',  # Georgia county boundary file from Census
}

# Load OD data from CSV
od_df = pd.read_csv(input_files['od_data'])

# Filter rows where 'origin_zone_id' and 'destination_zone_id' have exactly 12 characters
od_filtered_df = od_df[od_df['origin_zone_id'].astype(str).str.len() == 12]
od_filtered_df = od_filtered_df[od_filtered_df['destination_zone_id'].astype(str).str.len() == 12]

# Group by 'origin_zone_id' and 'destination_zone_id', then sum the 'mode_car' values
od_grouped_df = od_filtered_df.groupby(['origin_zone_id', 'destination_zone_id']).agg({'mode_car': 'sum'}).reset_index()

# Handle reversed pairs (origin and destination) by sorting and summing values
od_grouped_df['sorted_zone'] = od_grouped_df.apply(
    lambda row: tuple(sorted([str(row['origin_zone_id']), str(row['destination_zone_id'])])), axis=1
)

# Group by sorted zones and sum 'mode_car'
od_final_df = od_grouped_df.groupby('sorted_zone').agg({'mode_car': 'sum'}).reset_index()

# Restore 'origin_zone_id' and 'destination_zone_id' columns from the sorted zone
od_final_df[['origin_zone_id', 'destination_zone_id']] = pd.DataFrame(od_final_df['sorted_zone'].tolist(), index=od_final_df.index)

# Drop the 'sorted_zone' column as it's no longer needed
od_final_df = od_final_df.drop(columns=['sorted_zone'])

# Load the county boundary shapefile
county_boundaries_gdf = gpd.read_file(input_files['county_boundaries']).to_crs(epsg=4326)

# Dictionary to store traffic between counties
county_traffic = {}

# Map from zone_id to county name based on 'NAMELSAD' column
zone_to_county = {row['GEOID']: row['NAMELSAD'] for idx, row in county_boundaries_gdf.iterrows()}

# Calculate traffic between counties
for _, row in od_final_df.iterrows():
    origin_zone = row['origin_zone_id']
    destination_zone = row['destination_zone_id']
    traffic_count = row['mode_car']
    
    if origin_zone == destination_zone:
        continue
    
    # Map zone IDs to county names
    origin_county = zone_to_county.get(origin_zone)
    destination_county = zone_to_county.get(destination_zone)
    
    if origin_county and destination_county:
        # Use a tuple of sorted counties to handle bi-directional traffic
        county_pair = tuple(sorted([origin_county, destination_county]))
        
        if county_pair in county_traffic:
            county_traffic[county_pair] += traffic_count
        else:
            county_traffic[county_pair] = traffic_count

# Convert the results to a DataFrame for easier viewing
county_traffic_df = pd.DataFrame(list(county_traffic.items()), columns=['county_pair', 'traffic'])

# Split the county pair into separate columns
county_traffic_df[['origin_county', 'destination_county']] = pd.DataFrame(county_traffic_df['county_pair'].tolist(), index=county_traffic_df.index)
county_traffic_df = county_traffic_df.drop(columns=['county_pair'])

# Display the results
print(county_traffic_df)