# RECOIL Intermodal Data Explorer (Voilà)

This web app lets you explore intermodal freight nodes, edges (H/R/W), and demand on-demand without pre-loading heavy datasets.

**How to use:**
- Enter a city name (or leave blank for all)
- Select a mode (Highway, Railway, or Waterway)
- Click Load Nodes or Load Edges to fetch and display data

Data is fetched live from: https://recoil.ise.utk.edu/data/Parsed_Data/

In [None]:
# Imports and configuration

import os
import pickle
import requests
import pandas as pd
import geopandas as gpd
import shapely
import matplotlib.pyplot as plt
import ipywidgets as widgets
from IPython.display import display, HTML

# Base URL for remote data
DEFAULT_BASE_URL = 'https://recoil.ise.utk.edu/data/Parsed_Data/'
BASE_URL = os.getenv('BASE_URL', DEFAULT_BASE_URL)

# Helper to load pickles from URL
def load_pickle_from_url(url: str):
    try:
        resp = requests.get(url, timeout=30)
        resp.raise_for_status()
        return pickle.loads(resp.content)
    except Exception as e:
        raise RuntimeError(f"Failed to load pickle from {url}: {e}")

# Helper to create GeoDataFrame from edges dict
def create_geoframe_from_edges(edges: dict, sample: int | None = None):
    from shapely import LineString
    rows = []
    keys = list(edges.keys())
    if sample is not None:
        keys = keys[:sample]
    for (i, j) in keys:
        rec = edges[(i, j)]
        path = LineString([(lon, lat) for (lat, lon) in rec['path']])
        rows.append([i, j, rec['i_lat'], rec['i_lon'], rec['j_lat'], rec['j_lon'], rec['mode'], rec['distance'], path])
    gdf = gpd.GeoDataFrame(rows, columns=['i','j','i_lat','i_lon','j_lat','j_lon','mode','distance','geometry'], geometry='geometry', crs='EPSG:4326')
    return gdf

In [None]:
# Custom CSS for professional styling
from IPython.display import HTML, display

custom_css = HTML('''
<style>
    /* Main container styling */
    .widget-html h3 {
        color: #2c3e50;
        border-bottom: 3px solid #3498db;
        padding-bottom: 8px;
        margin-top: 20px;
        font-weight: 600;
    }
    
    .widget-html h4 {
        color: #34495e;
        margin-top: 15px;
        font-weight: 500;
    }
    
    /* Button styling - fix vertical alignment */
    .widget-button {
        font-weight: 500;
        border-radius: 4px;
        padding: 8px 16px !important;
        transition: all 0.3s ease;
        display: inline-flex !important;
        align-items: center !important;
        justify-content: center !important;
        vertical-align: middle !important;
        line-height: normal !important;
    }
    
    .widget-button .widget-button-content {
        display: flex;
        align-items: center;
        justify-content: center;
    }
    
    /* Loading indicator */
    .loading-indicator {
        display: inline-block;
        color: #3498db;
        font-weight: 500;
        animation: pulse 1.5s ease-in-out infinite;
    }
    
    @keyframes pulse {
        0%, 100% { opacity: 1; }
        50% { opacity: 0.5; }
    }
    
    /* Status messages */
    .status-success {
        color: #27ae60;
        font-weight: 500;
    }
    
    .status-warning {
        color: #f39c12;
        font-weight: 500;
    }
    
    .status-error {
        color: #e74c3c;
        font-weight: 500;
    }
    
    /* Table styling */
    table {
        border-collapse: collapse;
        width: 100%;
        margin: 15px 0;
        box-shadow: 0 2px 4px rgba(0,0,0,0.1);
    }
    
    table th {
        background-color: #3498db;
        color: white;
        padding: 12px;
        text-align: left;
        font-weight: 500;
    }
    
    table td {
        padding: 10px 12px;
        border-bottom: 1px solid #ecf0f1;
    }
    
    table tr:hover {
        background-color: #f8f9fa;
    }
    
    /* Info box */
    .info-box {
        background-color: #e8f4f8;
        border-left: 4px solid #3498db;
        padding: 12px 16px;
        margin: 10px 0;
        border-radius: 4px;
    }
    
    .warning-box {
        background-color: #fff3cd;
        border-left: 4px solid #f39c12;
        padding: 12px 16px;
        margin: 10px 0;
        border-radius: 4px;
    }
</style>
''')
display(custom_css)

In [None]:
# Example 1: Basic Node/Edge Exploration with loading indicators

city_input = widgets.Text(value='', description='City:', placeholder='City substring (leave blank for all)')
mode_dropdown = widgets.Dropdown(options=[('Highway', 'H'), ('Railway', 'R'), ('Waterway', 'W')], value='W', description='Mode:')
load_nodes_btn = widgets.Button(description='▶ Load Nodes', button_style='info', icon='database')
load_edges_btn = widgets.Button(description='▶ Load Edges (sample)', button_style='primary', icon='sitemap')
load_demand_btn = widgets.Button(description='▶ Load Demand (sample)', button_style='success', icon='chart-line')
out = widgets.Output()

def show_loading(message='Loading data...'):
    return HTML(f'<div class="loading-indicator">⏳ {message}</div>')

def show_warning(message):
    return HTML(f'<div class="warning-box">⚠️ {message}</div>')

def on_load_nodes(_):
    with out:
        out.clear_output()
        display(show_loading('Fetching nodes data from remote server...'))
        url_N = f"{BASE_URL}intermodal-217.csv"
        try:
            df_nodes = pd.read_csv(url_N)
            if city_input.value:
                df_nodes = df_nodes[df_nodes['city'].str.contains(city_input.value, case=False, na=False)]
            out.clear_output()
            display(HTML(f'<h4>✅ Nodes Loaded Successfully</h4>'))
            display(HTML(f'<p>Showing up to 20 of {len(df_nodes)} total nodes</p>'))
            display(HTML(df_nodes.head(20).to_html(index=False, classes='styled-table')))
        except Exception as e:
            out.clear_output()
            display(HTML(f'<div class="status-error">❌ Error loading nodes: {e}</div>'))

def on_load_edges(_):
    with out:
        out.clear_output()
        mode = mode_dropdown.value
        mode_map = {'H': 'Highway', 'R': 'Railway', 'W': 'Waterway'}
        display(show_loading(f'Fetching {mode_map[mode]} edges... This may take 10-15 seconds.'))
        url_map = {
            'H': f"{BASE_URL}H-adj.pickle",
            'R': f"{BASE_URL}R-adj.pickle",
            'W': f"{BASE_URL}W-adj.pickle",
        }
        url = url_map[mode]
        try:
            edges = load_pickle_from_url(url)
            gdf = create_geoframe_from_edges(edges, sample=15)
            out.clear_output()
            display(HTML(f'<h4>✅ {mode_map[mode]} Edges Loaded Successfully</h4>'))
            display(HTML(f'<p>Total edges in network: {len(edges):,} | Showing sample of 15</p>'))
            if show_warning:
                display(show_warning('Large datasets may take a moment to load. Sample shown for performance.'))
            display(gdf[['i','j','mode','distance']].head(15))
            try:
                ax = gdf.to_crs(3857).plot(figsize=(8,5), linewidth=1.5, color='#3498db')
                ax.set_title(f'{mode_map[mode]} Network (sample)', fontsize=14, fontweight='bold')
                ax.set_axis_off()
                plt.tight_layout()
                plt.show()
            except Exception as plot_e:
                display(HTML(f'<div class="status-warning">⚠️ Plot skipped: {plot_e}</div>'))
        except Exception as e:
            out.clear_output()
            display(HTML(f'<div class="status-error">❌ Error loading edges: {e}</div>'))

def on_load_demand(_):
    with out:
        out.clear_output()
        display(show_loading('Fetching demand data... This may take a moment.'))
        url_D = f"{BASE_URL}demand.pickle"
        try:
            demand = load_pickle_from_url(url_D)
            sample_keys = list(demand.keys())[:10]
            rows = []
            for (i, j) in sample_keys:
                rec = demand[(i, j)]
                rows.append([i, j, rec.get('tons_2025', 0), rec.get('tons_2030', 0), rec.get('tons_2050', 0)])
            df = pd.DataFrame(rows, columns=['Origin', 'Dest', 'Tons 2025', 'Tons 2030', 'Tons 2050'])
            out.clear_output()
            display(HTML(f'<h4>✅ Demand Data Loaded Successfully</h4>'))
            display(HTML(f'<p>Total OD pairs: {len(demand):,} | Showing sample of 10</p>'))
            display(HTML(df.to_html(index=False, classes='styled-table')))
        except Exception as e:
            out.clear_output()
            display(HTML(f'<div class="status-error">❌ Error loading demand: {e}</div>'))

load_nodes_btn.on_click(on_load_nodes)
load_edges_btn.on_click(on_load_edges)
load_demand_btn.on_click(on_load_demand)

ui = widgets.VBox([
    widgets.HTML('<h3>📊 Example 1: Explore Nodes, Edges & Demand</h3>'),
    widgets.HTML('<div class="info-box">Select filters below and click a button to load data on-demand. Data is fetched live from the remote server.</div>'),
    widgets.HBox([city_input, mode_dropdown]),
    widgets.HBox([load_nodes_btn, load_edges_btn, load_demand_btn]),
    out
])
display(ui)

---

## Example 2: Find Intermodal Neighbors

Select a city and mode to see which other intermodal nodes connect to it.

In [None]:
# Helper: find neighbors by ID
def find_connection_by_id(edges_dict, node_id):
    neighbors = []
    for (i, j) in edges_dict.keys():
        if i == node_id:
            neighbors.append(j)
        elif j == node_id:
            neighbors.append(i)
    return sorted(set(neighbors))

# Pre-load city list for dropdown
try:
    url_N = f"{BASE_URL}intermodal-217.csv"
    df_nodes_global = pd.read_csv(url_N)
    unique_cities = sorted(df_nodes_global['city'].dropna().unique().tolist())
except Exception as e:
    print(f"Warning: Could not load city list: {e}")
    unique_cities = ['Houston', 'Seattle', 'New York', 'Los Angeles']  # fallback

# Neighbor finder panel with professional styling
city_dropdown2 = widgets.Dropdown(
    options=unique_cities,
    value=unique_cities[0] if unique_cities else 'Houston',
    description='City:',
    style={'description_width': '60px'}
)
mode_dropdown2 = widgets.Dropdown(
    options=[('Railway', 'R'), ('Waterway', 'W'), ('Highway (SLOW - 15s+)', 'H')],
    value='W',
    description='Mode:'
)
find_neighbors_btn = widgets.Button(
    description='Find Neighbors',
    button_style='warning',
    icon='project-diagram'
)
out2 = widgets.Output()

def on_find_neighbors(_):
    with out2:
        out2.clear_output()
        mode = mode_dropdown2.value
        
        # Show appropriate loading message based on mode
        if mode == 'H':
            display(show_loading('⏳ Loading Highway edges... This may take 15+ seconds or timeout...'))
        elif mode == 'R':
            display(show_loading('⏳ Loading Railway edges (~11 seconds)...'))
        else:
            display(show_loading('Loading Waterway edges...'))
        
        try:
            selected_city = city_dropdown2.value
            
            # Filter by city
            city_nodes = df_nodes_global[df_nodes_global['city'] == selected_city]
            if city_nodes.empty:
                out2.clear_output()
                display(HTML(f'<div class="status-error">❌ No nodes found for city: {selected_city}</div>'))
                return
            
            # Get node ID for selected mode
            mode_map = {'H': 'Highway', 'R': 'Railway', 'W': 'Waterway'}
            node_row = city_nodes[city_nodes['type'] == mode]
            if node_row.empty:
                out2.clear_output()
                display(HTML(f'<div class="status-warning">⚠️ No {mode_map[mode]} node found for {selected_city}. Try a different mode.</div>'))
                return
            
            node_id = node_row.iloc[0]['id']
            
            # Load edges for that mode
            url_map = {
                'H': f"{BASE_URL}H-adj.pickle",
                'R': f"{BASE_URL}R-adj.pickle",
                'W': f"{BASE_URL}W-adj.pickle",
            }
            edges = load_pickle_from_url(url_map[mode])
            
            # Find neighbors
            neighbors = find_connection_by_id(edges, node_id)
            
            # Look up neighbor names
            neighbor_info = []
            for nid in neighbors[:20]:  # limit to first 20
                n_row = df_nodes_global[df_nodes_global['id'] == nid]
                if not n_row.empty:
                    neighbor_info.append({
                        'ID': nid,
                        'City': n_row.iloc[0]['city'],
                        'Type': n_row.iloc[0]['type']
                    })
            
            df_neighbors = pd.DataFrame(neighbor_info)
            out2.clear_output()
            display(HTML(f'<h4>✅ Found {len(neighbors)} Connected Nodes</h4>'))
            display(HTML(f'<p><strong>{selected_city}</strong> ({mode_map[mode]}, ID: {node_id}) connects to:</p>'))
            if len(neighbors) > 20:
                display(show_warning(f'Showing first 20 of {len(neighbors)} total connections'))
            display(HTML(df_neighbors.to_html(index=False, classes='styled-table')))
            
        except Exception as e:
            out2.clear_output()
            display(HTML(f'<div class="status-error">❌ Error: {e}</div>'))

find_neighbors_btn.on_click(on_find_neighbors)

ui2 = widgets.VBox([
    widgets.HTML('<h3>🔗 Example 2: Find Intermodal Neighbors</h3>'),
    widgets.HTML('<div class="info-box">⚠️ <strong>Performance Note:</strong> Waterway loads in <1s, Railway in ~11s, Highway may take 15+ seconds or timeout. Select a city and mode to discover connections.</div>'),
    widgets.HBox([city_dropdown2, mode_dropdown2]),
    find_neighbors_btn,
    out2
])
display(ui2)

---

## Example 3: Demand Between Two Cities

Look up freight demand (tonnage projections) between an origin and destination.

In [None]:
# Demand lookup panel with professional styling
# Filter to Highway nodes only (300-399 range)
highway_cities = sorted(df_nodes_global[df_nodes_global['type'] == 'H']['city'].dropna().unique().tolist())

origin_dropdown = widgets.Dropdown(
    options=highway_cities,
    value=highway_cities[0] if highway_cities else 'Houston',
    description='Origin:',
    style={'description_width': '70px'}
)
dest_dropdown = widgets.Dropdown(
    options=highway_cities,
    value=highway_cities[1] if len(highway_cities) > 1 else highway_cities[0],
    description='Destination:'
)
lookup_demand_btn = widgets.Button(
    description='Lookup Demand',
    button_style='success',
    icon='chart-line'
)
out3 = widgets.Output()

def on_lookup_demand(_):
    with out3:
        out3.clear_output()
        display(show_loading('Loading demand data...'))
        try:
            origin_city = origin_dropdown.value
            dest_city = dest_dropdown.value
            
            # Get IDs (Highway nodes only)
            origin_nodes = df_nodes_global[(df_nodes_global['city'] == origin_city) & (df_nodes_global['type'] == 'H')]
            dest_nodes = df_nodes_global[(df_nodes_global['city'] == dest_city) & (df_nodes_global['type'] == 'H')]
            
            if origin_nodes.empty or dest_nodes.empty:
                out3.clear_output()
                display(HTML('<div class="status-error">❌ One or both cities do not have Highway nodes.</div>'))
                return
            
            origin_id = origin_nodes.iloc[0]['id']
            dest_id = dest_nodes.iloc[0]['id']
            
            if origin_id == dest_id:
                out3.clear_output()
                display(show_warning('Origin and destination are the same. Please select different cities.'))
                return
            
            # Load demand data
            url_D = f"{BASE_URL}demand.pickle"
            demand = load_pickle_from_url(url_D)
            
            # Lookup demand (direction matters)
            key = (origin_id, dest_id)
            if key in demand:
                d = demand[key]
                out3.clear_output()
                display(HTML(f'<h4>✅ Demand Found</h4>'))
                display(HTML(f'<p><strong>{origin_city}</strong> (ID: {origin_id}) → <strong>{dest_city}</strong> (ID: {dest_id})</p>'))
                demand_table = pd.DataFrame([{
                    '2025': f"{d.get('tons_2025', 0):,.0f}",
                    '2030': f"{d.get('tons_2030', 0):,.0f}",
                    '2035': f"{d.get('tons_2035', 0):,.0f}",
                    '2040': f"{d.get('tons_2040', 0):,.0f}",
                    '2045': f"{d.get('tons_2045', 0):,.0f}",
                    '2050': f"{d.get('tons_2050', 0):,.0f}"
                }])
                display(HTML('<p><strong>Projected Demand (tons per year):</strong></p>'))
                display(HTML(demand_table.to_html(index=False, classes='styled-table')))
            else:
                out3.clear_output()
                display(HTML(f'<div class="status-warning">⚠️ No demand data found for {origin_city} → {dest_city}</div>'))
                display(HTML('<p>Note: Demand data is directional. Try reversing origin and destination.</p>'))
            
        except Exception as e:
            out3.clear_output()
            display(HTML(f'<div class="status-error">❌ Error: {e}</div>'))

lookup_demand_btn.on_click(on_lookup_demand)

ui3 = widgets.VBox([
    widgets.HTML('<h3>📊 Example 3: Demand Lookup Between Cities</h3>'),
    widgets.HTML('<div class="info-box">🚚 Lookup freight demand projections between two cities. <strong>Note:</strong> Only Highway nodes have demand data.</div>'),
    widgets.HBox([origin_dropdown, dest_dropdown]),
    lookup_demand_btn,
    out3
])
display(ui3)

---

## Example 4: Quick Dataset Statistics

Get a high-level overview of the dataset sizes and coverage.

In [None]:
# Dataset statistics panel with professional styling
# Based on performance testing: Railway ~11s, Highway timeouts, Waterway ~0.3s, Demand ~1.7s
stats_btn = widgets.Button(
    description='Show Statistics (Partial)',
    button_style='info',
    icon='chart-bar'
)
out4 = widgets.Output()

def on_show_stats(_):
    with out4:
        out4.clear_output()
        display(show_loading('⏳ Loading Railway & Waterway edges (~12 seconds)...'))
        try:
            # Load fast/medium data sources (skip Highway - it times out)
            url_R = f"{BASE_URL}R-adj.pickle"
            url_W = f"{BASE_URL}W-adj.pickle"
            url_D = f"{BASE_URL}demand.pickle"
            
            railway = load_pickle_from_url(url_R)
            waterway = load_pickle_from_url(url_W)
            demand = load_pickle_from_url(url_D)
            
            # Calculate statistics (Railway & Waterway only)
            stats_data = {
                'Dataset': ['Nodes', 'Railway Edges', 'Waterway Edges', 'Demand Records'],
                'Count': [
                    len(df_nodes_global),
                    len(railway),
                    len(waterway),
                    len(demand)
                ]
            }
            
            # Calculate total distances
            r_dist = sum(v['distance'] for v in railway.values())
            w_dist = sum(v['distance'] for v in waterway.values())
            
            distance_data = {
                'Mode': ['Railway', 'Waterway', 'Total (R+W)'],
                'Total Distance (km)': [
                    f"{r_dist:,.0f}",
                    f"{w_dist:,.0f}",
                    f"{r_dist + w_dist:,.0f}"
                ]
            }
            
            df_stats = pd.DataFrame(stats_data)
            df_distances = pd.DataFrame(distance_data)
            
            out4.clear_output()
            display(HTML('<h4>✅ Dataset Statistics Loaded</h4>'))
            display(show_warning('⚠️ Highway edges excluded (dataset is too large and times out after 15+ seconds)'))
            display(HTML('<p><strong>Record Counts:</strong></p>'))
            display(HTML(df_stats.to_html(index=False, classes='styled-table')))
            display(HTML('<p><strong>Network Distances:</strong></p>'))
            display(HTML(df_distances.to_html(index=False, classes='styled-table')))
            
        except Exception as e:
            out4.clear_output()
            display(HTML(f'<div class="status-error">❌ Error: {e}</div>'))

stats_btn.on_click(on_show_stats)

ui4 = widgets.VBox([
    widgets.HTML('<h3>📈 Example 4: Dataset Statistics (Partial)</h3>'),
    widgets.HTML('<div class="info-box">📊 View statistics for Railway and Waterway datasets. Highway edges are excluded as they take 15+ seconds to load and often timeout.</div>'),
    stats_btn,
    out4
])
display(ui4)

---

## Example 5: Compare Mode Distances

Compare the total network distance for each transportation mode.

In [None]:
# Mode comparison panel with professional styling
# Based on performance testing: Railway ~11s, Waterway ~0.3s (Highway excluded - timeouts)
compare_btn = widgets.Button(
    description='Compare Modes',
    button_style='danger',
    icon='balance-scale'
)
out5 = widgets.Output()

def on_compare_modes(_):
    with out5:
        out5.clear_output()
        display(show_loading('⏳ Loading Railway & Waterway edges (~12 seconds)...'))
        try:
            # Load Railway and Waterway only (Highway times out)
            url_R = f"{BASE_URL}R-adj.pickle"
            url_W = f"{BASE_URL}W-adj.pickle"
            
            railway = load_pickle_from_url(url_R)
            waterway = load_pickle_from_url(url_W)
            
            # Calculate distance stats by mode
            def calc_stats(edges):
                distances = [v['distance'] for v in edges.values()]
                return {
                    'count': len(distances),
                    'total': sum(distances),
                    'mean': sum(distances) / len(distances) if distances else 0,
                    'min': min(distances) if distances else 0,
                    'max': max(distances) if distances else 0
                }
            
            r_stats = calc_stats(railway)
            w_stats = calc_stats(waterway)
            
            # Create comparison table
            comparison_data = {
                'Mode': ['Railway', 'Waterway'],
                'Edge Count': [r_stats['count'], w_stats['count']],
                'Total Distance (km)': [f"{r_stats['total']:,.0f}", f"{w_stats['total']:,.0f}"],
                'Avg Distance (km)': [f"{r_stats['mean']:.1f}", f"{w_stats['mean']:.1f}"],
                'Min Distance (km)': [f"{r_stats['min']:.1f}", f"{w_stats['min']:.1f}"],
                'Max Distance (km)': [f"{r_stats['max']:.1f}", f"{w_stats['max']:.1f}"]
            }
            
            df_comparison = pd.DataFrame(comparison_data)
            
            # Create bar chart
            import matplotlib.pyplot as plt
            fig, ax = plt.subplots(figsize=(10, 5))
            modes = ['Railway', 'Waterway']
            counts = [r_stats['count'], w_stats['count']]
            totals = [r_stats['total'], w_stats['total']]
            
            x = range(len(modes))
            width = 0.35
            
            ax.bar([i - width/2 for i in x], counts, width, label='Edge Count', color='#3498db')
            ax.bar([i + width/2 for i in x], [t/1000 for t in totals], width, label='Total Distance (1000 km)', color='#e74c3c')
            
            ax.set_xlabel('Transportation Mode', fontsize=12, fontweight='bold')
            ax.set_ylabel('Value', fontsize=12, fontweight='bold')
            ax.set_title('Mode Comparison: Railway vs Waterway', fontsize=14, fontweight='bold', pad=20)
            ax.set_xticks(x)
            ax.set_xticklabels(modes)
            ax.legend()
            ax.grid(axis='y', alpha=0.3)
            plt.tight_layout()
            
            out5.clear_output()
            display(HTML('<h4>✅ Mode Comparison Complete</h4>'))
            display(show_warning('⚠️ Highway excluded (takes 15+ seconds and often times out)'))
            display(HTML('<p><strong>Railway vs Waterway Comparison:</strong></p>'))
            display(HTML(df_comparison.to_html(index=False, classes='styled-table')))
            display(HTML('<p><strong>Visual Comparison:</strong></p>'))
            plt.show()
            
        except Exception as e:
            out5.clear_output()
            display(HTML(f'<div class="status-error">❌ Error: {e}</div>'))

compare_btn.on_click(on_compare_modes)

ui5 = widgets.VBox([
    widgets.HTML('<h3>⚖️ Example 5: Mode Comparison (Railway vs Waterway)</h3>'),
    widgets.HTML('<div class="info-box">🔍 Compare Railway and Waterway transportation modes. Highway excluded as the dataset is too large (15+ seconds load time).</div>'),
    compare_btn,
    out5
])
display(ui5)

---

## About the data

- **Nodes**: 217 intermodal locations (Highway: 301–413, Railway: 101–156, Waterway: 201–248)
- **Edges**: mode-specific adjacency with distance and path geometry
- **Demand**: directed OD pairs with tonnage projections (2025–2050)

For detailed exploration and the full notebook, see [`usage.ipynb`](usage.ipynb) in the repo.