### Multi-Modal Demand Matrix Generation for Brooklyn

This notebook demonstrates advanced demand modeling with:
- **4 travel modes**: Walk, Drive, Bike, Transit
- **3 trip purposes**: HBW (Home-Based Work), HBO (Home-Based Other), NHB (Non-Home-Based)
- **Network-based distances**: Shortest paths instead of straight-line distances
- **Brooklyn-calibrated trip rates**: Based on CMS 2019 survey data and NYC CEQR

In [1]:
# Setup and Imports
import os

import geopandas as gpd
import pandas as pd
import matplotlib.pyplot as plt
import osmnx as ox

import grid2demand as gd
import osm2gmns as og

import warnings
warnings.filterwarnings('ignore')

# Ensure Directory Location and Existence

# Create output directory
output_dir = 'data'
os.makedirs(output_dir, exist_ok=True)
osm_filepath = f"{output_dir}/map.osm"

In [None]:
# RUN ONCE: Select, Download, and Save OSM Network Data for Grid2Demand Using osm2gmns Library

# OSM Area ids
# search at: nominatim.openstreetmap.org
brooklyn_id = 369518
antwerp_id = 53114
brussels_id = 2404020
ghent_id = 897671
lviv_id = 2032280

# Download directly from OSM
print("Downloading OSM network...")
og.downloadOSMData(brooklyn_id, osm_filepath)

# Open OSM, convert to GMNS, and clean as desired
print("Loading and converting OSM network to GMNS...")
net = og.getNetFromFile(osm_filepath, mode_types=['auto', 'railway', 'walk', 'bike'], POI=True)
og.fillLinkAttributesWithDefaultValues(net)
#og.consolidateComplexIntersections(net, auto_identify=True)

# Save to CSV files in GMNS format
og.outputNetToCSV(net, output_dir)

print(f"  Nodes: {net.number_of_nodes:,}")
print(f"  Links: {net.number_of_links:,}")


In [None]:
# Multi-Modal Gravity Model Implementation

print("="*70)
print("BROOKLYN MULTI-MODAL DEMAND MODELING")
print("="*70)

# Define modes and trip purposes
modes = ['auto', 'walk', 'bike']  # grid2demand only supports these 3
trip_purposes = [1, 2, 3]  # HBW, HBO, NHB
purpose_names = {1: 'HBW (Home-Based Work)', 
                 2: 'HBO (Home-Based Other)', 
                 3: 'NHB (Non-Home-Based)'}

# Mode-specific friction factors (alpha, beta, gamma)
# These control distance decay in the gravity model: F_ij = alpha * d^beta * exp(gamma * d)
# CALIBRATED VALUES - attempting to match Brooklyn mode splits:
# Target: Walk ~30%, Auto ~35%, Transit ~32%, Bike ~3%

mode_params = {
    'auto': {
        1: {'alpha': 3000, 'beta': -0.70, 'gamma': -0.12},   # HBW - reduced to lower auto share
        2: {'alpha': 3000, 'beta': -0.70, 'gamma': -0.12},   # HBO
        3: {'alpha': 1200, 'beta': -0.60, 'gamma': -0.08},   # NHB - further reduced to prevent overflow
        'description': 'Auto/Vehicle - with congestion/parking penalties'
    },
    'walk': {
        1: {'alpha': 80000, 'beta': -1.30, 'gamma': -0.12},  # HBW - increased alpha, reduced beta/gamma
        2: {'alpha': 80000, 'beta': -1.30, 'gamma': -0.12},  # HBO
        3: {'alpha': 40000, 'beta': -1.15, 'gamma': -0.08},  # NHB - further reduced to prevent overflow
        'description': 'Walk - high scale, moderate distance sensitivity for short trips'
    },
    'bike': {
        1: {'alpha': 1500, 'beta': -1.90, 'gamma': -0.28},   # HBW - dramatically reduced to lower bike share
        2: {'alpha': 1500, 'beta': -1.90, 'gamma': -0.28},   # HBO
        3: {'alpha': 600, 'beta': -1.70, 'gamma': -0.20},    # NHB - further reduced to prevent overflow
        'description': 'Bike - low scale, high distance sensitivity (niche mode)'
    },
    'transit': {
        1: {'alpha': 10000, 'beta': -0.60, 'gamma': -0.10},  # HBW - competitive with auto for medium distances
        2: {'alpha': 10000, 'beta': -0.60, 'gamma': -0.10},  # HBO
        3: {'alpha': 6000, 'beta': -0.50, 'gamma': -0.08},   # NHB - reduced to prevent overflow
        'description': 'Transit/Railway - competitive for medium-long distances (synthetic)'
    }
}

# Trip rate file with all 3 purposes properly formatted
trip_rate_file = 'settings/brooklyn_poi_trip_rate_all_purposes.csv'

# Initialize storage for all demand matrices
all_demand_matrices = {}

# Loop through each mode supported by grid2demand
for mode in modes:
    print(f"\n{'='*70}")
    print(f"MODE: {mode.upper()}")
    print(f"  {mode_params[mode]['description']}")
    print(f"{'='*70}")
    
    # Create GRID2DEMAND object for this mode
    net = gd.GRID2DEMAND(
        input_dir=output_dir,
        mode_type=mode
    )
    
    # Load network files
    print(f"\n[{mode}] Loading network...")
    net.load_network()
    
    # Generate zones (only need to do this once, but doing per mode is fine)
    print(f"[{mode}] Generating zones (1250m x 1250m grid)...")
    net.net2grid(cell_width=1250, cell_height=1250, unit="meter")
    
    # Load zones
    net.taz2zone()
    
    # Map zones with nodes and POIs
    print(f"[{mode}] Mapping zones with nodes and POIs...")
    net.map_zone_node_poi()
    
    # Calculate zone-to-zone distance matrix
    print(f"[{mode}] Calculating zone-to-zone distances...")
    net.calc_zone_od_distance(pct=1.0)
    
    # Loop through each trip purpose
    for purpose in trip_purposes:
        # Get purpose-specific parameters
        params = mode_params[mode][purpose]
        print(f"\n  [{mode} - Purpose {purpose}] Running gravity model for {purpose_names[purpose]}...")
        print(f"    Parameters: α={params['alpha']}, β={params['beta']}, γ={params['gamma']}")
        
        try:
            # Run gravity model
            net.run_gravity_model(
                trip_rate_file=trip_rate_file,
                trip_purpose=purpose,
                alpha=params['alpha'],
                beta=params['beta'],
                gamma=params['gamma']
            )
            
            # Define output filenames
            demand_file_long = f'{output_dir}/demand_{mode}_purpose{purpose}.csv'
            demand_file_matrix = f'{output_dir}/demand_{mode}_purpose{purpose}_matrix.csv'
            
            # Save results - grid2demand creates both 'demand.csv' (long format) and 'demand_od_matrix.csv' (matrix format)
            net.save_results_to_csv(
                demand_od_matrix=True,
                overwrite_file=True
            )
            
            # Rename the output files to include mode and purpose
            import shutil
            import os
            
            # Rename long-format file (demand.csv) - this has the 'volume' column
            source_file_long = f'{output_dir}/demand.csv'
            if os.path.exists(source_file_long):
                shutil.move(source_file_long, demand_file_long)
            
            # Rename matrix-format file (demand_od_matrix.csv)
            source_file_matrix = f'{output_dir}/demand_od_matrix.csv'
            if os.path.exists(source_file_matrix):
                shutil.move(source_file_matrix, demand_file_matrix)
            
            # Store long-format file in memory for later analysis (it has the 'volume' column)
            if os.path.exists(demand_file_long):
                all_demand_matrices[f'{mode}_p{purpose}'] = pd.read_csv(demand_file_long)
                print(f"    ✓ Saved to {demand_file_long}")
            else:
                print(f"    ⚠ Warning: Could not find output file")
                
        except OverflowError as e:
            print(f"    ✗ ERROR: Numeric overflow in gravity model calculation")
            print(f"      This usually means friction parameters are too extreme.")
            print(f"      Skipping {mode} - Purpose {purpose}")
            continue
        except Exception as e:
            print(f"    ✗ ERROR: {type(e).__name__}: {str(e)}")
            print(f"      Skipping {mode} - Purpose {purpose}")
            continue

# Generate SYNTHETIC TRANSIT demand (workaround for grid2demand limitation)
print(f"\n{'='*70}")
print(f"MODE: TRANSIT (SYNTHETIC)")
print(f"  {mode_params['transit']['description']}")
print(f"{'='*70}")
print("\nNote: grid2demand only supports auto/bike/walk modes.")
print("Creating synthetic transit demand using custom gravity model...")

# Load zone data and distance matrix
zone_file = f'{output_dir}/zone.csv'
dist_matrix_file = f'{output_dir}/zone_od_dist_matrix.csv'

if os.path.exists(zone_file) and os.path.exists(dist_matrix_file):
    zones_df = pd.read_csv(zone_file)
    
    # Read distance matrix (in wide format) and convert to long format
    dist_matrix_wide = pd.read_csv(dist_matrix_file)
    
    # Convert wide to long format
    print("\n  Converting distance matrix from wide to long format...")
    dist_df = dist_matrix_wide.melt(id_vars=['o_zone_id'], var_name='d_zone_id', value_name='distance')
    dist_df['d_zone_id'] = pd.to_numeric(dist_df['d_zone_id'], errors='coerce')
    dist_df = dist_df.dropna(subset=['d_zone_id'])
    dist_df['d_zone_id'] = dist_df['d_zone_id'].astype(int)
    
    print(f"  Distance matrix: {len(dist_df)} OD pairs")
    
    # Load POI data to get productions/attractions
    poi_df = pd.read_csv(f'{output_dir}/poi.csv')
    trip_rate_df = pd.read_csv(trip_rate_file)
    
    for purpose in trip_purposes:
        print(f"\n  [transit - Purpose {purpose}] Generating synthetic demand for {purpose_names[purpose]}...")
        params = mode_params['transit'][purpose]
        print(f"    Parameters: α={params['alpha']}, β={params['beta']}, γ={params['gamma']}")
        
        try:
            import numpy as np
            
            # Create transit demand based on distance friction
            transit_demand = dist_df.copy()
            transit_demand['volume'] = 0.0
            
            # Apply gravity model: V_ij = P_i * A_j * F(d_ij) / Σ(A_j * F(d_ij))
            # Simplified: use distance decay on OD pairs
            alpha = params['alpha']
            beta = params['beta']
            gamma = params['gamma']
            
            # Calculate friction factor: F_ij = alpha * d^beta * exp(gamma * d)
            with np.errstate(over='ignore', invalid='ignore'):
                friction = alpha * np.power(transit_demand['distance'].values, beta) * np.exp(gamma * transit_demand['distance'].values)
                friction = np.nan_to_num(friction, nan=0.0, posinf=0.0, neginf=0.0)
                
                # Normalize to create reasonable transit volumes (rough estimate)
                # Scale based on typical transit mode share (~32%)
                if friction.sum() > 0:
                    transit_demand['volume'] = friction / friction.sum() * 1e9  # Scale factor for reasonable trip numbers
            
            # Save transit demand
            demand_file_long = f'{output_dir}/demand_transit_purpose{purpose}.csv'
            transit_demand.to_csv(demand_file_long, index=False)
            all_demand_matrices[f'transit_p{purpose}'] = transit_demand
            print(f"    ✓ Saved to {demand_file_long}")
            
        except Exception as e:
            print(f"    ✗ ERROR: {type(e).__name__}: {str(e)}")
            import traceback
            traceback.print_exc()
            continue
else:
    print(f"  ⚠ Warning: Zone or distance files not found. Cannot generate transit demand.")

# Save combined zone and distance matrices (same across all modes)
print(f"\n{'='*70}")
print("Saving zone and distance data...")
net.save_results_to_csv(
    zone_od_dist_matrix=True,
    overwrite_file=True
)
print("✓ Zone data saved")

print(f"\n{'='*70}")
print("SUMMARY")
print(f"{'='*70}")
print(f"Modes processed: {len(modes) + 1} (auto, walk, bike, transit)")
print(f"Trip purposes: {len(trip_purposes)}")
print(f"Total demand matrices generated: {len(all_demand_matrices)}")
if len(net.zone_dict) > 0:
    print(f"Zones: {len(net.zone_dict)}")
else:
    print(f"Zones: (not available)")

print(f"\nDemand matrices:")
if len(all_demand_matrices) > 0:
    for key in sorted(all_demand_matrices.keys()):
        mode, purpose = key.split('_')
        purpose_num = int(purpose[1])
        total_trips = all_demand_matrices[key]['volume'].sum()
        print(f"  {mode.upper()} - {purpose_names[purpose_num]}: {total_trips:,.0f} trips")
else:
    print("  (none generated - check errors above)")

# Calculate and display mode splits
print(f"\n{'='*70}")
print("MODE SPLIT ANALYSIS")
print(f"{'='*70}")

if len(all_demand_matrices) > 0:
    # Calculate total by mode
    mode_totals = {}
    grand_total = 0
    
    all_modes = modes + ['transit']
    for mode in all_modes:
        mode_total = sum(all_demand_matrices[f'{mode}_p{p}']['volume'].sum() 
                        for p in trip_purposes if f'{mode}_p{p}' in all_demand_matrices)
        mode_totals[mode] = mode_total
        grand_total += mode_total
    
    # Display mode splits
    print(f"\nTotal trips: {grand_total:,.0f}\n")
    
    # Map mode names to display names
    mode_display = {'auto': 'AUTO', 'walk': 'WALK', 'bike': 'BIKE', 'transit': 'TRANSIT'}
    
    for mode in all_modes:
        if mode in mode_totals:
            trips = mode_totals[mode]
            pct = (trips / grand_total * 100) if grand_total > 0 else 0
            display_name = mode_display.get(mode, mode.upper())
            print(f"{display_name:10s}: {trips:12,.0f} trips ({pct:5.1f}%)")
    
    # Compare with Brooklyn targets
    print(f"\n{'='*70}")
    print("COMPARISON WITH BROOKLYN TARGETS")
    print(f"{'='*70}")
    
    targets = {'AUTO': 35, 'WALK': 30, 'TRANSIT': 32, 'BIKE': 3}
    
    print(f"\n{'Mode':<10s} {'Model':<10s} {'Target':<10s} {'Difference':<10s}")
    print("-" * 40)
    
    for mode in all_modes:
        display_name = mode_display.get(mode, mode.upper())
        if mode in mode_totals and display_name in targets:
            model_pct = (mode_totals[mode] / grand_total * 100) if grand_total > 0 else 0
            target_pct = targets[display_name]
            diff = model_pct - target_pct
            sign = '+' if diff > 0 else ''
            print(f"{display_name:<10s} {model_pct:6.1f}%    {target_pct:6d}%     {sign}{diff:+6.1f}%")

print(f"\n{'='*70}")
print("Multi-modal demand generation complete!")
print(f"{'='*70}")

In [None]:
# Visualize Multi-Modal Results

import matplotlib.pyplot as plt
import seaborn as sns

# Check if we have demand matrices to visualize
if len(all_demand_matrices) == 0:
    print("⚠ No demand matrices available for visualization.")
    print("Please run Cell 3 first to generate demand matrices.")
else:
    # Create comprehensive visualization
    fig, axes = plt.subplots(2, 2, figsize=(16, 12))
    fig.suptitle('Brooklyn Multi-Modal Demand Analysis', fontsize=16, fontweight='bold')

    # 1. Total trips by mode and purpose
    ax1 = axes[0, 0]
    trip_data = []
    for key in sorted(all_demand_matrices.keys()):
        mode, purpose = key.split('_')
        purpose_num = int(purpose[1])
        total_trips = all_demand_matrices[key]['volume'].sum()
        trip_data.append({
            'Mode': mode.upper(),
            'Purpose': purpose_names[purpose_num].split('(')[0].strip(),
            'Trips': total_trips
        })

    df_trips = pd.DataFrame(trip_data)
    pivot_trips = df_trips.pivot(index='Mode', columns='Purpose', values='Trips')
    pivot_trips.plot(kind='bar', ax=ax1, width=0.8)
    ax1.set_title('Total Trips by Mode and Purpose', fontweight='bold')
    ax1.set_ylabel('Total Trips')
    ax1.set_xlabel('Mode')
    ax1.legend(title='Trip Purpose', loc='upper right')
    ax1.grid(axis='y', alpha=0.3)
    ax1.tick_params(axis='x', rotation=0)

    # Format y-axis with commas
    ax1.yaxis.set_major_formatter(plt.FuncFormatter(lambda x, p: f'{int(x):,}'))

    # 2. Mode share by purpose
    ax2 = axes[0, 1]
    mode_shares = {}
    for purpose in trip_purposes:
        purpose_total = 0
        purpose_by_mode = {}
        for mode in modes:
            key = f'{mode}_p{purpose}'
            if key in all_demand_matrices:
                trips = all_demand_matrices[key]['volume'].sum()
                purpose_by_mode[mode.upper()] = trips
                purpose_total += trips
        
        # Calculate percentages
        if purpose_total > 0:
            mode_shares[purpose_names[purpose].split('(')[0].strip()] = \
                {k: v/purpose_total*100 for k, v in purpose_by_mode.items()}

    df_shares = pd.DataFrame(mode_shares)
    df_shares.plot(kind='bar', ax=ax2, width=0.8)
    ax2.set_title('Mode Share by Trip Purpose', fontweight='bold')
    ax2.set_ylabel('Mode Share (%)')
    ax2.set_xlabel('Mode')
    ax2.legend(title='Trip Purpose', loc='upper right')
    ax2.grid(axis='y', alpha=0.3)
    ax2.tick_params(axis='x', rotation=0)

    # 3. Trip length distribution by mode (for Purpose 1 - HBW)
    ax3 = axes[1, 0]
    
    # Check if distance matrix exists
    dist_file = f'{output_dir}/zone_od_dist_matrix.csv'
    if os.path.exists(dist_file):
        dist_df = pd.read_csv(dist_file)
        
        for mode in modes:
            key = f'{mode}_p1'
            if key in all_demand_matrices:
                df = all_demand_matrices[key]
                
                # Merge to get distances
                merged = df.merge(dist_df, on=['o_zone_id', 'd_zone_id'], how='left')
                
                # Weight distances by volume
                if 'distance' in merged.columns and merged['volume'].sum() > 0:
                    # Filter out zero-volume trips for cleaner visualization
                    merged_nonzero = merged[merged['volume'] > 0]
                    if len(merged_nonzero) > 0:
                        # Create weighted histogram
                        weights = merged_nonzero['volume'] / merged_nonzero['volume'].sum()
                        ax3.hist(merged_nonzero['distance'], weights=weights, bins=20, alpha=0.5, 
                                label=mode.upper(), edgecolor='black')
    
        ax3.set_title('Trip Length Distribution by Mode (HBW)', fontweight='bold')
        ax3.set_xlabel('Distance (km)')
        ax3.set_ylabel('Trip Frequency (normalized)')
        ax3.legend()
        ax3.grid(axis='y', alpha=0.3)
    else:
        ax3.text(0.5, 0.5, 'Distance matrix not found', 
                ha='center', va='center', transform=ax3.transAxes)
        ax3.set_title('Trip Length Distribution (data unavailable)', fontweight='bold')

    # 4. Average trip length by mode and purpose
    ax4 = axes[1, 1]
    
    if os.path.exists(dist_file):
        avg_lengths = []
        dist_df = pd.read_csv(dist_file)
        
        for key in sorted(all_demand_matrices.keys()):
            mode, purpose = key.split('_')
            purpose_num = int(purpose[1])
            df = all_demand_matrices[key]
            
            # Merge with distances
            merged = df.merge(dist_df, on=['o_zone_id', 'd_zone_id'], how='left')
            
            if 'distance' in merged.columns and merged['volume'].sum() > 0:
                avg_dist = (merged['distance'] * merged['volume']).sum() / merged['volume'].sum()
                avg_lengths.append({
                    'Mode': mode.upper(),
                    'Purpose': purpose_names[purpose_num].split('(')[0].strip(),
                    'Avg Distance (km)': avg_dist
                })

        if len(avg_lengths) > 0:
            df_avg = pd.DataFrame(avg_lengths)
            pivot_avg = df_avg.pivot(index='Mode', columns='Purpose', values='Avg Distance (km)')
            pivot_avg.plot(kind='bar', ax=ax4, width=0.8)
            ax4.set_title('Average Trip Distance by Mode and Purpose', fontweight='bold')
            ax4.set_ylabel('Average Distance (km)')
            ax4.set_xlabel('Mode')
            ax4.legend(title='Trip Purpose', loc='upper right')
            ax4.grid(axis='y', alpha=0.3)
            ax4.tick_params(axis='x', rotation=0)
        else:
            ax4.text(0.5, 0.5, 'No trip data available', 
                    ha='center', va='center', transform=ax4.transAxes)
    else:
        ax4.text(0.5, 0.5, 'Distance matrix not found', 
                ha='center', va='center', transform=ax4.transAxes)
        ax4.set_title('Average Trip Distance (data unavailable)', fontweight='bold')

    plt.tight_layout()
    plt.savefig(f'{output_dir}/multimodal_analysis.png', dpi=300, bbox_inches='tight')
    plt.show()

    print("\n" + "="*70)
    print("VISUALIZATION SUMMARY")
    print("="*70)
    print(f"✓ Multi-modal analysis chart saved to {output_dir}/multimodal_analysis.png")
    print("\nKey Insights:")
    print(f"  • Total modes analyzed: {len(modes)}")
    print(f"  • Total trip purposes: {len(trip_purposes)}")
    print(f"  • Total demand matrices: {len(all_demand_matrices)}")

    # Print summary statistics
    print("\n" + "="*70)
    print("TRIP STATISTICS")
    print("="*70)
    total_all_trips = sum(df['volume'].sum() for df in all_demand_matrices.values())
    print(f"Total trips across all modes and purposes: {total_all_trips:,.0f}\n")

    for mode in modes:
        mode_trips = [all_demand_matrices[f'{mode}_p{p}']['volume'].sum() 
                     for p in trip_purposes if f'{mode}_p{p}' in all_demand_matrices]
        
        if mode_trips:
            mode_total = sum(mode_trips)
            mode_share = mode_total / total_all_trips * 100 if total_all_trips > 0 else 0
            print(f"{mode.upper()}: {mode_total:,.0f} trips ({mode_share:.1f}%)")
            
            for purpose in trip_purposes:
                key = f'{mode}_p{purpose}'
                if key in all_demand_matrices:
                    trips = all_demand_matrices[key]['volume'].sum()
                    purpose_pct = trips / mode_total * 100 if mode_total > 0 else 0
                    print(f"  - {purpose_names[purpose]}: {trips:,.0f} ({purpose_pct:.1f}%)")

    print("="*70)