# Filter Daratan dari GeoJSON

Notebook ini untuk filter titik-titik daratan dari GeoJSON menggunakan data batimetri.

## Langkah-langkah:
1. Load data batimetri
2. Resample batimetri ke grid yang sama dengan HSI data
3. Filter GeoJSON: hanya export points dengan elevation <= 0 (lautan)
4. Re-export GeoJSON tanpa daratan

## 1. Import Libraries & Load Data

In [1]:
import numpy as np
import netCDF4
import json
import os
from scipy.interpolate import griddata
import warnings
warnings.filterwarnings('ignore')

print("Libraries imported successfully!")

Libraries imported successfully!


In [2]:
# File paths
BATIMETRI_FILE = '../BatimetriSelatSunda.nc'
MONTHLY_DATA_FILE = '../data/processed/monthly_hsi_data.npz'
GEOJSON_DIR = '../data/geojson'

# Load monthly data untuk mendapatkan grid coordinates
monthly_data = np.load(MONTHLY_DATA_FILE)
lat_grid = monthly_data['lat_grid']
lon_grid = monthly_data['lon_grid']
months = monthly_data['months']

if isinstance(months, np.ndarray):
    months = months.tolist()

print(f"✅ Monthly data loaded!")
print(f"Grid: {len(lat_grid)} x {len(lon_grid)}")
print(f"Months: {len(months)}")

✅ Monthly data loaded!
Grid: 28 x 29
Months: 48


## 2. Load & Resample Batimetri Data

In [3]:
# Load batimetri data
nc_batimetri = netCDF4.Dataset(BATIMETRI_FILE, 'r')

# Get variables
lat_bat = nc_batimetri.variables['lat'][:]
lon_bat = nc_batimetri.variables['lon'][:]
elevation = nc_batimetri.variables['elevation'][:]

print(f"✅ Batimetri data loaded!")
print(f"Original grid: {len(lat_bat)} x {len(lon_bat)}")
print(f"Elevation range: {np.nanmin(elevation):.1f} to {np.nanmax(elevation):.1f} m")
print(f"  (Negative = lautan, Positive = daratan)")

# Create meshgrid for original batimetri
lon_bat_mesh, lat_bat_mesh = np.meshgrid(lon_bat, lat_bat)

# Flatten for interpolation
points_bat = np.column_stack((lon_bat_mesh.ravel(), lat_bat_mesh.ravel()))
values_bat = elevation.ravel()

# Create target grid (same as HSI data)
lon_target_mesh, lat_target_mesh = np.meshgrid(lon_grid, lat_grid)
points_target = np.column_stack((lon_target_mesh.ravel(), lat_target_mesh.ravel()))

# Interpolate batimetri to target grid
print(f"\nInterpolating batimetri to target grid...")
elevation_resampled = griddata(
    points_bat,
    values_bat,
    points_target,
    method='linear',
    fill_value=np.nan
)

elevation_resampled = elevation_resampled.reshape(lon_target_mesh.shape)

print(f"✅ Batimetri resampled!")
print(f"Resampled elevation range: {np.nanmin(elevation_resampled):.1f} to {np.nanmax(elevation_resampled):.1f} m")
print(f"Points in ocean (elevation <= 0): {np.sum(elevation_resampled <= 0)} / {elevation_resampled.size}")
print(f"Points on land (elevation > 0): {np.sum(elevation_resampled > 0)} / {elevation_resampled.size}")

nc_batimetri.close()

✅ Batimetri data loaded!
Original grid: 588 x 725
Elevation range: -6746.0 to 2946.0 m
  (Negative = lautan, Positive = daratan)

Interpolating batimetri to target grid...
✅ Batimetri resampled!
Resampled elevation range: -2055.5 to 945.5 m
Points in ocean (elevation <= 0): 544 / 812
Points on land (elevation > 0): 181 / 812


## 3. Filter GeoJSON Files (Remove Land Points)

In [4]:
import time

# Create mask: True for ocean points (elevation <= 0), False for land
ocean_mask = elevation_resampled <= 0

print(f"=== Filtering GeoJSON Files ===")
print(f"Ocean mask: {np.sum(ocean_mask)} ocean points, {np.sum(~ocean_mask)} land points")

# Create meshgrid for coordinates
lon_mesh, lat_mesh = np.meshgrid(lon_grid, lat_grid)

start_time = time.time()
filtered_files = []

for i, month_str in enumerate(months):
    # Load original GeoJSON
    filename = f"hsi_{month_str.replace('-', '_')}.geojson"
    filepath = os.path.join(GEOJSON_DIR, filename)
    
    if not os.path.exists(filepath):
        print(f"⚠️  File not found: {filename}")
        continue
    
    with open(filepath, 'r', encoding='utf-8') as f:
        geojson = json.load(f)
    
    # Filter features: only keep ocean points
    filtered_features = []
    
    for feature in geojson['features']:
        coords = feature['geometry']['coordinates']
        lon = coords[0]
        lat = coords[1]
        
        # Find closest grid point
        lat_idx = np.argmin(np.abs(lat_grid - lat))
        lon_idx = np.argmin(np.abs(lon_grid - lon))
        
        # Check if point is in ocean
        if ocean_mask[lat_idx, lon_idx]:
            filtered_features.append(feature)
    
    # Create filtered GeoJSON
    filtered_geojson = {
        "type": "FeatureCollection",
        "features": filtered_features
    }
    
    # Save filtered GeoJSON (overwrite original)
    with open(filepath, 'w', encoding='utf-8') as f:
        json.dump(filtered_geojson, f, indent=2, ensure_ascii=False)
    
    filtered_files.append({
        'month': month_str,
        'original_features': len(geojson['features']),
        'filtered_features': len(filtered_features),
        'removed': len(geojson['features']) - len(filtered_features)
    })
    
    if (i + 1) % 6 == 0 or i == 0:
        print(f"  Filtered {i+1}/{len(months)}: {month_str} ({len(filtered_features)}/{len(geojson['features'])} features)")

elapsed = time.time() - start_time
print(f"\n✅ Filtering complete in {elapsed:.1f}s!")
print(f"\nSummary:")
total_original = sum(f['original_features'] for f in filtered_files)
total_filtered = sum(f['filtered_features'] for f in filtered_files)
total_removed = sum(f['removed'] for f in filtered_files)
print(f"  Total original features: {total_original:,}")
print(f"  Total filtered features: {total_filtered:,}")
print(f"  Total removed (land): {total_removed:,} ({100*total_removed/total_original:.1f}%)")

=== Filtering GeoJSON Files ===
Ocean mask: 544 ocean points, 268 land points
  Filtered 1/48: 2021-01 (544/812 features)
  Filtered 6/48: 2021-06 (544/812 features)
  Filtered 12/48: 2021-12 (544/812 features)
  Filtered 18/48: 2022-06 (544/812 features)
  Filtered 24/48: 2022-12 (544/812 features)
  Filtered 30/48: 2023-06 (544/812 features)
  Filtered 36/48: 2023-12 (544/812 features)
  Filtered 42/48: 2024-06 (544/812 features)
  Filtered 48/48: 2024-12 (544/812 features)

✅ Filtering complete in 9.5s!

Summary:
  Total original features: 38,976
  Total filtered features: 26,112
  Total removed (land): 12,864 (33.0%)


## 4. Update Metadata

In [5]:
# Update metadata
metadata_file = os.path.join(GEOJSON_DIR, 'metadata.json')

with open(metadata_file, 'r', encoding='utf-8') as f:
    metadata = json.load(f)

# Update available_data with filtered feature counts
for item in metadata['available_data']:
    month_str = f"{item['year']}-{str(item['month']).zfill(2)}"
    filtered_info = next((f for f in filtered_files if f['month'] == month_str), None)
    if filtered_info:
        item['features'] = filtered_info['filtered_features']
        item['original_features'] = filtered_info['original_features']
        item['land_points_removed'] = filtered_info['removed']

# Add filter info to metadata
metadata['filter_applied'] = {
    'description': 'Land points filtered using bathymetry data',
    'threshold': 'elevation <= 0 (ocean only)',
    'total_land_points_removed': int(total_removed)
}

# Save updated metadata
with open(metadata_file, 'w', encoding='utf-8') as f:
    json.dump(metadata, f, indent=2, ensure_ascii=False)

print(f"✅ Metadata updated!")

✅ Metadata updated!


## 5. Summary

In [6]:
print("=== FILTERING SUMMARY ===")
print("\n✅ Land points filtered successfully!")
print("\nWhat was done:")
print("1. ✅ Loaded batimetri data")
print("2. ✅ Resampled batimetri to HSI grid")
print("3. ✅ Filtered all GeoJSON files (removed land points)")
print("4. ✅ Updated metadata")
print("\nResult:")
print(f"  - {len(filtered_files)} GeoJSON files filtered")
print(f"  - {total_removed:,} land points removed")
print(f"  - {total_filtered:,} ocean points remaining")
print("\nNext Steps:")
print("- Refresh frontend to see filtered data")
print("- Map will now only show ocean points")

=== FILTERING SUMMARY ===

✅ Land points filtered successfully!

What was done:
1. ✅ Loaded batimetri data
2. ✅ Resampled batimetri to HSI grid
3. ✅ Filtered all GeoJSON files (removed land points)
4. ✅ Updated metadata

Result:
  - 48 GeoJSON files filtered
  - 12,864 land points removed
  - 26,112 ocean points remaining

Next Steps:
- Refresh frontend to see filtered data
- Map will now only show ocean points
