# Interactive Tidal Data Point Query

Click on the map to query detailed tidal data at specific coordinates.

This notebook:
1. Shows grid coverage boundaries (aggregated for performance)
2. Captures clicked coordinates automatically
3. Queries the nearest data point and displays summary information

In [None]:
import json
from pathlib import Path
import pandas as pd
import numpy as np
import folium
import matplotlib.pyplot as plt
from IPython.display import display

from query_tidal_manifest import TidalManifestQuery

# Configure matplotlib for better plots
plt.style.use('seaborn-v0_8-darkgrid')
%matplotlib inline

## Configuration

In [None]:
# Configuration
MANIFEST_PATH = Path(
    "/projects/hindcastra/Tidal/datasets/high_resolution_tidal_hindcast/manifests/v0.3.0/manifest.json"
)
BASE_DATA_DIR = Path("/projects/hindcastra/Tidal/datasets/high_resolution_tidal_hindcast")

# For local development, override paths if needed:
# MANIFEST_PATH = Path("./manifests/v0.3.0/manifest.json")
# BASE_DATA_DIR = Path("./data")

<cell_type>markdown</cell_type>## Step 1: Load Manifest and Generate Location Boundaries

The manifest contains per-location geospatial bounds derived from the FVCOM mesh geometry.

In [None]:
# Load manifest and generate location boundaries
print(f"Loading manifest from: {MANIFEST_PATH}")
query = TidalManifestQuery(MANIFEST_PATH)

print(f"\nGenerating location boundaries from manifest...")
geojson = query.generate_location_boundaries_geojson()

## Step 2: Display Interactive Map

**Instructions:** Click anywhere on the map to select a point for querying.

In [None]:
# Create map centered on data
bounds = query.manifest['spatial_bounds']
center_lat = (bounds['lat_min'] + bounds['lat_max']) / 2
center_lon = (bounds['lon_min'] + bounds['lon_max']) / 2

# Initialize map
m = folium.Map(
    location=[center_lat, center_lon],
    zoom_start=5,
    tiles='OpenStreetMap'
)

# Add location boundaries as GeoJSON layer with tooltips
folium.GeoJson(
    geojson,
    name='Location Boundaries',
    style_function=lambda x: {
        'fillColor': '#3388ff',
        'color': '#3388ff',
        'weight': 2,
        'fillOpacity': 0.2
    },
    tooltip=folium.GeoJsonTooltip(
        fields=['label', 'location', 'point_count'],
        aliases=['Location:', 'ID:', 'Points:'],
        localize=True
    )
).add_to(m)

# Add custom click handler with copyable code snippet
click_js = """
<script>
function onMapClick(e) {
    var lat = e.latlng.lat.toFixed(6);
    var lon = e.latlng.lng.toFixed(6);
    
    var popupContent = `
        <div style="min-width: 300px;">
            <b>Selected Coordinates</b><br><br>
            <div style="background: #f5f5f5; padding: 10px; border-radius: 4px; font-family: monospace; margin: 10px 0;">
                <div>QUERY_LAT = ${lat}</div>
                <div>QUERY_LON = ${lon}</div>
            </div>
            <button onclick="navigator.clipboard.writeText('QUERY_LAT = ${lat}\\nQUERY_LON = ${lon}').then(() => {
                this.innerHTML = 'Copied!';
                this.style.background = '#4CAF50';
                setTimeout(() => {
                    this.innerHTML = 'Copy to Clipboard';
                    this.style.background = '#2196F3';
                }, 2000);
            })" 
            style="width: 100%; padding: 8px; background: #2196F3; color: white; border: none; border-radius: 4px; cursor: pointer; font-size: 14px;">
                Copy to Clipboard
            </button>
            <div style="margin-top: 10px; font-size: 12px; color: #666;">
                <b>Next steps:</b><br>
                1. Click "Copy to Clipboard"<br>
                2. Scroll to "Step 3" below<br>
                3. Paste into the query cell<br>
                4. Run the cell to see results
            </div>
        </div>
    `;
    
    L.popup()
        .setLatLng(e.latlng)
        .setContent(popupContent)
        .openOn(this);
}
</script>
"""

# Add the JavaScript to the map
m.get_root().html.add_child(folium.Element(click_js))

# Bind click event
click_handler = """
<script>
document.addEventListener('DOMContentLoaded', function() {
    // Wait for map to be ready
    setTimeout(function() {
        var maps = document.querySelectorAll('.folium-map');
        if (maps.length > 0) {
            var mapId = maps[0].id;
            var map = window[mapId];
            if (map) {
                map.on('click', onMapClick);
            }
        }
    }, 1000);
});
</script>
"""
m.get_root().html.add_child(folium.Element(click_handler))

# Add instructions overlay
instructions_html = '''
<div style="position: fixed; 
    top: 10px; left: 50px; width: 350px; 
    background-color: white; border:2px solid #3388ff; 
    z-index:9999; font-size:14px; padding: 10px; border-radius: 5px;">
    <b>Interactive Tidal Data Query</b><br><br>
    <b>Instructions:</b><br>
    1. Hover over location boundaries to see details<br>
    2. Click anywhere on the map to select a point<br>
    3. Click "Copy to Clipboard" in the popup<br>
    4. Scroll to "Step 3" and paste coordinates<br>
    5. Run the query cell to see results
</div>
'''
m.get_root().html.add_child(folium.Element(instructions_html))

print("Map ready! Click on the map to select coordinates.")
m

## Step 3: Query Point Data

Copy the coordinates from the map click and paste them below, then run this cell.

In [None]:
# ==============================================================================
# PASTE CLICKED COORDINATES HERE
# ==============================================================================
QUERY_LAT = 49.94
QUERY_LON = -174.96

# ==============================================================================
# Query and Display Results
# ==============================================================================
print(f"Querying point: ({QUERY_LAT:.6f}, {QUERY_LON:.6f})")
print("=" * 80)

# Find nearest grid (used for file lookup only)
result = query.query_nearest_point(lat=QUERY_LAT, lon=QUERY_LON, load_details=True)

if result is None:
    print("No data found near this location")
else:
    details = result['details']
    
    # Find closest point within grid based on filename coordinates
    min_distance = float('inf')
    closest_point = None
    
    for point in details['points']:
        lat_diff = point['lat'] - QUERY_LAT
        lon_diff = point['lon'] - QUERY_LON
        distance = np.sqrt(lat_diff**2 + lon_diff**2)
        
        if distance < min_distance:
            min_distance = distance
            closest_point = point
    
    print(f"\nNearest Data Point:")
    print(f"  Face ID: {closest_point['face']}")
    print(f"  Location: {details['location']}")
    print(f"  Temporal Resolution: {details['temporal']}")
    print(f"  Coordinates: ({closest_point['lat']:.6f}, {closest_point['lon']:.6f})")
    print(f"  Distance from query: {min_distance:.6f}° (~{min_distance * 111:.2f} km)")
    
    # Construct file path
    location = details['location']
    file_path_str = closest_point['file_path']
    
    if file_path_str.startswith(location + '/'):
        relative_path = file_path_str[len(location) + 1:]
    else:
        relative_path = file_path_str
    
    parquet_file = BASE_DATA_DIR / location / "b4_vap_partition" / relative_path
    
    print(f"\nParquet File:")
    print(f"  {parquet_file}")
    
    if not parquet_file.exists():
        print(f"\nERROR: File not found at {parquet_file}")
    else:
        # Load data
        print(f"\nLoading data...")
        df = pd.read_parquet(parquet_file)
        
        # Set time as index if it exists
        if 'time' in df.columns:
            df = df.set_index('time')
            print("  Set 'time' column as index")
        
        # Check if lat/lon columns exist in the data
        has_coords = 'lat' in df.columns and 'lon' in df.columns
        
        if has_coords:
            # Use actual coordinates from the data
            actual_lat = df['lat'].iloc[0] if len(df) > 0 else closest_point['lat']
            actual_lon = df['lon'].iloc[0] if len(df) > 0 else closest_point['lon']
            
            # Recalculate distance using actual coordinates from file
            lat_diff = actual_lat - QUERY_LAT
            lon_diff = actual_lon - QUERY_LON
            actual_distance = np.sqrt(lat_diff**2 + lon_diff**2)
            
            print(f"\nActual Data Point Coordinates (from file):")
            print(f"  Latitude: {actual_lat:.6f}")
            print(f"  Longitude: {actual_lon:.6f}")
            print(f"  Distance from query: {actual_distance:.6f}° (~{actual_distance * 111:.2f} km)")
        
        # Basic info
        print(f"\nDataset Information:")
        print(f"  Rows: {len(df):,}")
        print(f"  Columns: {len(df.columns)}")
        print(f"  Time range: {df.index.min()} to {df.index.max()}")
        print(f"  File size: {parquet_file.stat().st_size / (1024**2):.2f} MB")
        
        # Display column names
        print(f"\nColumns:")
        for col in df.columns:
            print(f"  - {col}")
        
        # Show head
        print(f"\nData Preview (first 5 rows):")
        display(df.head(5))
        
        # Show summary statistics
        print(f"\nSummary Statistics:")
        summary = pd.DataFrame({
            'min': df.min(),
            'mean': df.mean(),
            'max': df.max(),
            'std': df.std(),
            'nan_count': df.isna().sum()
        })
        display(summary)

<cell_type>markdown</cell_type>## Step 4: Visualize Time Series

Plot the tidal data variables over time.

In [None]:
import matplotlib.pyplot as plt

# Only plot if data was successfully loaded
if 'df' in locals() and df is not None and len(df) > 0:
    # Get numeric columns (exclude lat/lon if they exist)
    plot_cols = [col for col in df.select_dtypes(include=[np.number]).columns 
                 if col not in ['lat', 'lon']]
    
    if len(plot_cols) == 0:
        print("No numeric columns to plot")
    else:
        # Determine number of subplots
        n_plots = len(plot_cols)
        n_cols = 2
        n_rows = (n_plots + n_cols - 1) // n_cols  # Ceiling division
        
        # Create figure with subplots
        fig, axes = plt.subplots(n_rows, n_cols, figsize=(15, 4*n_rows))
        fig.suptitle(f'Tidal Data Time Series - Face {closest_point["face"]}', fontsize=16, y=1.001)
        
        # Flatten axes array for easier iteration
        if n_plots == 1:
            axes = [axes]
        else:
            axes = axes.flatten()
        
        # Plot each variable
        for idx, col in enumerate(plot_cols):
            ax = axes[idx]
            df[col].plot(ax=ax, linewidth=0.8)
            ax.set_title(f'{col}', fontsize=12)
            ax.set_xlabel('Time')
            ax.set_ylabel(col)
            ax.grid(True, alpha=0.3)
            
            # Format x-axis for better readability
            ax.tick_params(axis='x', rotation=45)
        
        # Hide any unused subplots
        for idx in range(len(plot_cols), len(axes)):
            axes[idx].set_visible(False)
        
        plt.tight_layout()
        plt.show()
        
        # Additional plots: Correlation matrix if multiple variables
        if len(plot_cols) > 1:
            print("\nVariable Correlations:")
            corr = df[plot_cols].corr()
            display(corr)
            
            # Plot correlation heatmap
            fig, ax = plt.subplots(figsize=(10, 8))
            im = ax.imshow(corr, cmap='coolwarm', aspect='auto', vmin=-1, vmax=1)
            
            # Set ticks and labels
            ax.set_xticks(np.arange(len(plot_cols)))
            ax.set_yticks(np.arange(len(plot_cols)))
            ax.set_xticklabels(plot_cols, rotation=45, ha='right')
            ax.set_yticklabels(plot_cols)
            
            # Add colorbar
            cbar = plt.colorbar(im, ax=ax)
            cbar.set_label('Correlation', rotation=270, labelpad=20)
            
            # Add correlation values as text
            for i in range(len(plot_cols)):
                for j in range(len(plot_cols)):
                    text = ax.text(j, i, f'{corr.iloc[i, j]:.2f}',
                                 ha="center", va="center", color="black", fontsize=10)
            
            ax.set_title('Variable Correlation Matrix')
            plt.tight_layout()
            plt.show()
else:
    print("No data available to plot. Please run the query cell above first.")

## Export Data

Save the queried data to a file: