# Interactive Tidal Data Point Query

This notebook provides an interactive map to:
- Visualize the tidal data grid coverage
- Click on the map to query data at specific coordinates
- Load and preview parquet data for selected points

## Setup

In [None]:
import json
from pathlib import Path
import pandas as pd
import numpy as np
import folium
from folium import plugins
from IPython.display import display, HTML
import matplotlib.pyplot as plt

from query_tidal_manifest import TidalManifestQuery

## Configuration

In [None]:
# Manifest path
MANIFEST_PATH = Path(
    "/projects/hindcastra/Tidal/datasets/high_resolution_tidal_hindcast/manifests/v0.3.0/manifest.json"
)

# Base directory for datasets
BASE_DATA_DIR = Path("/projects/hindcastra/Tidal/datasets/high_resolution_tidal_hindcast")

# For local development, you can override these paths:
# MANIFEST_PATH = Path("./manifests/v0.3.0/manifest.json")
# BASE_DATA_DIR = Path("./data")

## Load Manifest and Initialize Query

In [None]:
# Load manifest
print(f"Loading manifest from: {MANIFEST_PATH}")
query = TidalManifestQuery(MANIFEST_PATH)

print(f"\nManifest loaded successfully!")
print(f"Total grids: {query.total_grids:,}")
print(f"Grid resolution: {query.grid_resolution_deg}¬∞")
print(f"Spatial bounds: {query.manifest['spatial_bounds']}")

## Create Interactive Map with Grid Overlay

This map shows all available data grids. The grid is visualized using:
- **Heat map layer**: Shows density of data coverage
- **Click handler**: Click anywhere on the map to query nearest data point

In [None]:
# Calculate map center from bounds
bounds = query.manifest['spatial_bounds']
center_lat = (bounds['lat_min'] + bounds['lat_max']) / 2
center_lon = (bounds['lon_min'] + bounds['lon_max']) / 2

# Create base map
m = folium.Map(
    location=[center_lat, center_lon],
    zoom_start=5,
    tiles='OpenStreetMap'
)

# Add different base map options
folium.TileLayer('cartodbpositron', name='CartoDB Positron').add_to(m)
folium.TileLayer('Stamen Terrain', name='Terrain').add_to(m)

print(f"Creating map centered at ({center_lat:.2f}, {center_lon:.2f})")
print(f"Processing {len(query.grid_lats):,} grid points...")

In [None]:
# Prepare grid data for visualization
# For performance, we'll downsample if there are too many points
MAX_POINTS_TO_DISPLAY = 50000

if len(query.grid_lats) > MAX_POINTS_TO_DISPLAY:
    # Downsample for visualization
    step = len(query.grid_lats) // MAX_POINTS_TO_DISPLAY
    display_lats = query.grid_lats[::step]
    display_lons = query.grid_lons[::step]
    print(f"Downsampling to {len(display_lats):,} points for visualization")
else:
    display_lats = query.grid_lats
    display_lons = query.grid_lons

# Create heat map data
heat_data = [[lat, lon] for lat, lon in zip(display_lats, display_lons)]

# Add heat map layer
plugins.HeatMap(
    heat_data,
    name='Data Coverage',
    min_opacity=0.3,
    radius=8,
    blur=10,
    gradient={
        0.0: 'blue',
        0.5: 'lime',
        1.0: 'red'
    }
).add_to(m)

# Add layer control
folium.LayerControl().add_to(m)

# Add click handler instructions
title_html = '''
             <div style="position: fixed; 
             top: 10px; left: 50px; width: 300px; height: 90px; 
             background-color: white; border:2px solid grey; z-index:9999; 
             font-size:14px; padding: 10px">
             <b>Interactive Tidal Data Query</b><br>
             Heat map shows data coverage.<br>
             Click on the map to query data at that location.<br>
             Results will appear in the cell output below.
             </div>
             '''
m.get_root().html.add_child(folium.Element(title_html))

print("Map created!")
m

## Query Function

Run this cell, then use the interactive input below to enter coordinates from your map click.

In [None]:
def query_and_load_data(lat: float, lon: float, preview_rows: int = 10):
    """
    Query data for a clicked point and load the parquet file.
    
    Parameters
    ----------
    lat : float
        Latitude
    lon : float
        Longitude
    preview_rows : int
        Number of rows to preview
    
    Returns
    -------
    pd.DataFrame
        Loaded parquet data
    """
    print("=" * 80)
    print(f"QUERY: ({lat:.6f}, {lon:.6f})")
    print("=" * 80)
    
    # Query nearest grid
    result = query.query_nearest_point(lat=lat, lon=lon, load_details=True)
    
    if result is None:
        print("‚ùå No data found near this location")
        return None
    
    # Display results
    print(f"\n‚úì Grid Found: {result['grid_id']}")
    print(f"  Centroid: ({result['centroid'][0]:.6f}, {result['centroid'][1]:.6f})")
    print(f"  Distance: {result['distance_deg']:.6f}¬∞ (~{result['distance_deg'] * 111:.2f} km)")
    
    details = result['details']
    print(f"\nüìç Location: {details['location']}")
    print(f"‚è±Ô∏è  Temporal Resolution: {details['temporal']}")
    print(f"üìä Points in grid: {len(details['points'])}")
    
    # Find closest point
    min_distance = float('inf')
    closest_point = None
    
    for point in details['points']:
        lat_diff = point['lat'] - lat
        lon_diff = point['lon'] - lon
        distance = (lat_diff**2 + lon_diff**2) ** 0.5
        
        if distance < min_distance:
            min_distance = distance
            closest_point = point
    
    print(f"\nüéØ Closest Point:")
    print(f"  Face ID: {closest_point['face']}")
    print(f"  Coordinates: ({closest_point['lat']:.6f}, {closest_point['lon']:.6f})")
    print(f"  Distance: {min_distance:.6f}¬∞ (~{min_distance * 111:.2f} km)")
    
    # Construct file path
    location = details['location']
    file_path_str = closest_point['file_path']
    
    if file_path_str.startswith(location + '/'):
        relative_path = file_path_str[len(location) + 1:]
    else:
        relative_path = file_path_str
    
    parquet_file = BASE_DATA_DIR / location / "b4_vap_partition" / relative_path
    
    print(f"\nüìÅ File: {parquet_file.name}")
    print(f"   Path: {parquet_file.parent}")
    
    if not parquet_file.exists():
        print(f"\n‚ùå ERROR: File not found at {parquet_file}")
        return None
    
    # Load parquet file
    print(f"\nüìñ Loading data...")
    df = pd.read_parquet(parquet_file)
    
    print(f"  Rows: {len(df):,}")
    print(f"  Columns: {len(df.columns)}")
    print(f"  Variables: {list(df.columns)}")
    print(f"  Time range: {df.index.min()} to {df.index.max()}")
    
    # Display preview
    print(f"\nüìã Data Preview (first {preview_rows} rows):")
    display(df.head(preview_rows))
    
    # Show basic statistics
    print("\nüìà Summary Statistics:")
    display(df.describe())
    
    return df

print("Query function loaded! Use the cell below to query specific coordinates.")

## Interactive Query

Click on the map above to identify coordinates, then enter them below:

In [None]:
# Example query - edit these coordinates based on map click
QUERY_LAT = 49.94
QUERY_LON = -174.96

df = query_and_load_data(QUERY_LAT, QUERY_LON)

## Plot Time Series

Visualize the data loaded from the query above:

In [None]:
if df is not None:
    # Create figure with subplots
    n_vars = len(df.columns)
    fig, axes = plt.subplots(n_vars, 1, figsize=(12, 4 * n_vars), sharex=True)
    
    if n_vars == 1:
        axes = [axes]
    
    for i, col in enumerate(df.columns):
        axes[i].plot(df.index, df[col], linewidth=0.5)
        axes[i].set_ylabel(col)
        axes[i].grid(True, alpha=0.3)
        axes[i].set_title(f"{col} Time Series")
    
    axes[-1].set_xlabel('Time')
    plt.tight_layout()
    plt.show()
else:
    print("No data loaded. Run the query cell above first.")

## Advanced: Create Custom Map with Click Handler

This cell creates a map with JavaScript click handler that automatically updates coordinates:

In [None]:
# Create map with click handler
m2 = folium.Map(
    location=[center_lat, center_lon],
    zoom_start=5,
)

# Add grid overlay
plugins.HeatMap(
    heat_data,
    name='Data Coverage',
    min_opacity=0.3,
    radius=8,
    blur=10,
).add_to(m2)

# Add click handler that prints coordinates
click_handler = """
function(e) {
    var lat = e.latlng.lat.toFixed(6);
    var lon = e.latlng.lng.toFixed(6);
    var popup = L.popup()
        .setLatLng(e.latlng)
        .setContent("Clicked at: (" + lat + ", " + lon + ")<br>Copy these coordinates to the query cell above.")
        .openOn(this);
    console.log("Coordinates: (" + lat + ", " + lon + ")");
}
"""

m2.add_child(folium.ClickForMarker(popup=None))

# Add instructions
instructions = '''
<div style="position: fixed; 
top: 10px; left: 50px; width: 350px; height: 110px; 
background-color: white; border:2px solid grey; z-index:9999; 
font-size:14px; padding: 10px">
<b>üìç Click to Query Data</b><br>
1. Click anywhere on the map<br>
2. A marker will appear with coordinates<br>
3. Copy the coordinates to the query cell above<br>
4. Run the query cell to load data
</div>
'''
m2.get_root().html.add_child(folium.Element(instructions))

m2

## Export Data

Save the queried data to a file:

In [None]:
if df is not None:
    output_file = f"tidal_data_lat{QUERY_LAT:.4f}_lon{QUERY_LON:.4f}.parquet"
    df.to_parquet(output_file)
    print(f"‚úì Data saved to: {output_file}")
    print(f"  Size: {Path(output_file).stat().st_size / 1024:.2f} KB")
else:
    print("No data to export. Run a query first.")

## Grid Statistics

Analyze the data grid distribution:

In [None]:
# Plot grid distribution
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Latitude distribution
axes[0].hist(query.grid_lats, bins=100, edgecolor='black', alpha=0.7)
axes[0].set_xlabel('Latitude')
axes[0].set_ylabel('Number of Grids')
axes[0].set_title('Latitude Distribution of Data Grids')
axes[0].grid(True, alpha=0.3)

# Longitude distribution
axes[1].hist(query.grid_lons, bins=100, edgecolor='black', alpha=0.7, color='orange')
axes[1].set_xlabel('Longitude')
axes[1].set_ylabel('Number of Grids')
axes[1].set_title('Longitude Distribution of Data Grids')
axes[1].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

print(f"Total grids: {len(query.grid_lats):,}")
print(f"Latitude range: {query.grid_lats.min():.2f}¬∞ to {query.grid_lats.max():.2f}¬∞")
print(f"Longitude range: {query.grid_lons.min():.2f}¬∞ to {query.grid_lons.max():.2f}¬∞")