# Fase 5: GeoJSON Export

Notebook ini untuk convert data monthly HSI ke format GeoJSON.

## Langkah-langkah:
1. Load monthly aggregated data
2. Convert ke format GeoJSON (Point geometry)
3. Generate 36 file GeoJSON (satu per bulan)
4. Save ke folder data/geojson/
5. Generate metadata.json

## 1. Import Libraries & Load Monthly Data

In [8]:
import numpy as np
import json
import os
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

print("Libraries imported successfully!")

Libraries imported successfully!


In [9]:
# Load monthly data
MONTHLY_DATA_FILE = '../data/processed/monthly_hsi_data.npz'

if not os.path.exists(MONTHLY_DATA_FILE):
    raise FileNotFoundError(f"Monthly data file not found! Please run monthly aggregation notebook first.")

data = np.load(MONTHLY_DATA_FILE)

monthly_hsi = data['hsi_total']
monthly_hsi_chl = data['hsi_chl']
monthly_hsi_sst = data['hsi_sst']
monthly_hsi_so = data['hsi_so']
monthly_chl = data['chl']
monthly_sst = data['sst']
monthly_salinity = data['salinity']
lat_grid = data['lat_grid']
lon_grid = data['lon_grid']
months = data['months']

# Convert months to list if it's numpy array
if isinstance(months, np.ndarray):
    months = months.tolist()

print(f"✅ Monthly data loaded successfully!")
print(f"\nData shapes:")
print(f"  HSI: {monthly_hsi.shape}")
print(f"  Grid: {len(lat_grid)} x {len(lon_grid)}")
print(f"  Months: {len(months)}")
print(f"\nDate range: {months[0]} to {months[-1]}")

✅ Monthly data loaded successfully!

Data shapes:
  HSI: (48, 28, 29)
  Grid: 28 x 29
  Months: 48

Date range: 2021-01 to 2024-12


## 2. Create GeoJSON from Monthly Data

In [10]:
def create_geojson_from_monthly_data(month_index, hsi_data, chl_data, sst_data, salinity_data,
                                      hsi_chl_data, hsi_sst_data, hsi_so_data,
                                      lat_grid, lon_grid, month_str):
    """
    Create GeoJSON FeatureCollection from monthly data
    
    Parameters:
    - month_index: index bulan (0-35)
    - hsi_data: HSI data [n_months, lat, lon]
    - chl_data, sst_data, salinity_data: original parameters
    - hsi_chl_data, hsi_sst_data, hsi_so_data: individual HSI
    - lat_grid, lon_grid: grid coordinates
    - month_str: month string (e.g., '2021-01')
    
    Returns:
    - GeoJSON dict
    """
    features = []
    
    # Get data for this month
    hsi_month = hsi_data[month_index, :, :]
    chl_month = chl_data[month_index, :, :]
    sst_month = sst_data[month_index, :, :]
    salinity_month = salinity_data[month_index, :, :]
    hsi_chl_month = hsi_chl_data[month_index, :, :]
    hsi_sst_month = hsi_sst_data[month_index, :, :]
    hsi_so_month = hsi_so_data[month_index, :, :]
    
    # Create meshgrid for coordinates
    lon_mesh, lat_mesh = np.meshgrid(lon_grid, lat_grid)
    
    # Iterate through all grid points
    for i in range(len(lat_grid)):
        for j in range(len(lon_grid)):
            lat = lat_mesh[i, j]
            lon = lon_mesh[i, j]
            
            # Get values
            hsi_val = float(hsi_month[i, j]) if not np.isnan(hsi_month[i, j]) else None
            chl_val = float(chl_month[i, j]) if not np.isnan(chl_month[i, j]) else None
            sst_val = float(sst_month[i, j]) if not np.isnan(sst_month[i, j]) else None
            salinity_val = float(salinity_month[i, j]) if not np.isnan(salinity_month[i, j]) else None
            hsi_chl_val = float(hsi_chl_month[i, j]) if not np.isnan(hsi_chl_month[i, j]) else None
            hsi_sst_val = float(hsi_sst_month[i, j]) if not np.isnan(hsi_sst_month[i, j]) else None
            hsi_so_val = float(hsi_so_month[i, j]) if not np.isnan(hsi_so_month[i, j]) else None
            
            # Skip if all values are NaN
            if hsi_val is None and chl_val is None and sst_val is None and salinity_val is None:
                continue
            
            # Create feature
            feature = {
                "type": "Feature",
                "geometry": {
                    "type": "Point",
                    "coordinates": [float(lon), float(lat)]
                },
                "properties": {
                    "hsi": hsi_val,
                    "chl": chl_val,
                    "sst": sst_val,
                    "salinity": salinity_val,
                    "hsi_chl": hsi_chl_val,
                    "hsi_sst": hsi_sst_val,
                    "hsi_so": hsi_so_val,
                    "year": int(month_str.split('-')[0]),
                    "month": int(month_str.split('-')[1])
                }
            }
            
            features.append(feature)
    
    # Create FeatureCollection
    geojson = {
        "type": "FeatureCollection",
        "features": features
    }
    
    return geojson

print("✅ GeoJSON creation function defined!")

✅ GeoJSON creation function defined!


## 3. Export All Months to GeoJSON

In [11]:
import time

# Output directory
OUTPUT_DIR = '../data/geojson'
os.makedirs(OUTPUT_DIR, exist_ok=True)

print(f"=== Exporting {len(months)} months to GeoJSON ===")
print(f"Output directory: {OUTPUT_DIR}")

start_time = time.time()
exported_files = []

for i, month_str in enumerate(months):
    # Create GeoJSON
    geojson = create_geojson_from_monthly_data(
        i, monthly_hsi, monthly_chl, monthly_sst, monthly_salinity,
        monthly_hsi_chl, monthly_hsi_sst, monthly_hsi_so,
        lat_grid, lon_grid, month_str
    )
    
    # Filename: hsi_YYYY_MM.geojson
    filename = f"hsi_{month_str.replace('-', '_')}.geojson"
    filepath = os.path.join(OUTPUT_DIR, filename)
    
    # Save GeoJSON
    with open(filepath, 'w', encoding='utf-8') as f:
        json.dump(geojson, f, indent=2, ensure_ascii=False)
    
    exported_files.append({
        'year': int(month_str.split('-')[0]),
        'month': int(month_str.split('-')[1]),
        'file': filename,
        'features': len(geojson['features'])
    })
    
    if (i + 1) % 6 == 0 or i == 0:
        print(f"  Exported {i+1}/{len(months)}: {month_str} ({len(geojson['features'])} features)")

elapsed = time.time() - start_time
print(f"\n✅ Export complete in {elapsed:.1f}s!")
print(f"Total files: {len(exported_files)}")
print(f"Total features: {sum(f['features'] for f in exported_files):,}")

=== Exporting 48 months to GeoJSON ===
Output directory: ../data/geojson
  Exported 1/48: 2021-01 (812 features)
  Exported 6/48: 2021-06 (812 features)
  Exported 12/48: 2021-12 (812 features)
  Exported 18/48: 2022-06 (812 features)
  Exported 24/48: 2022-12 (812 features)
  Exported 30/48: 2023-06 (812 features)
  Exported 36/48: 2023-12 (812 features)
  Exported 42/48: 2024-06 (812 features)
  Exported 48/48: 2024-12 (812 features)

✅ Export complete in 19.9s!
Total files: 48
Total features: 38,976


## 4. Generate Metadata File

In [12]:
# Create metadata
metadata = {
    "title": "HSI Data for Sunda Strait",
    "description": "Monthly Habitat Suitability Index data derived from CHL, SST, and Salinity",
    "date_created": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
    "data_range": {
        "start": months[0],
        "end": months[-1]
    },
    "spatial_bounds": {
        "min_lat": float(lat_grid.min()),
        "max_lat": float(lat_grid.max()),
        "min_lon": float(lon_grid.min()),
        "max_lon": float(lon_grid.max())
    },
    "grid_resolution": {
        "lat_resolution": float(abs(lat_grid[1] - lat_grid[0])) if len(lat_grid) > 1 else None,
        "lon_resolution": float(abs(lon_grid[1] - lon_grid[0])) if len(lon_grid) > 1 else None
    },
    "available_data": exported_files,
    "total_months": len(exported_files),
    "properties": {
        "hsi": "Habitat Suitability Index (0-1)",
        "chl": "Chlorophyll-a concentration (mg/m³)",
        "sst": "Sea Surface Temperature (°C)",
        "salinity": "Salinity (PSU)",
        "hsi_chl": "HSI for Chlorophyll-a (0-1)",
        "hsi_sst": "HSI for Sea Surface Temperature (0-1)",
        "hsi_so": "HSI for Salinity (0-1)"
    }
}

# Save metadata
metadata_file = os.path.join(OUTPUT_DIR, 'metadata.json')
with open(metadata_file, 'w', encoding='utf-8') as f:
    json.dump(metadata, f, indent=2, ensure_ascii=False)

print(f"✅ Metadata saved to {metadata_file}")
print(f"\nMetadata summary:")
print(f"  Date range: {metadata['data_range']['start']} to {metadata['data_range']['end']}")
print(f"  Total months: {metadata['total_months']}")
print(f"  Spatial bounds: {metadata['spatial_bounds']['min_lat']:.4f} to {metadata['spatial_bounds']['max_lat']:.4f} lat, ")
print(f"                {metadata['spatial_bounds']['min_lon']:.4f} to {metadata['spatial_bounds']['max_lon']:.4f} lon")

✅ Metadata saved to ../data/geojson\metadata.json

Metadata summary:
  Date range: 2021-01 to 2024-12
  Total months: 48
  Spatial bounds: -6.7750 to -5.4250 lat, 
                104.5625 to 105.9625 lon


## 5. Verify GeoJSON Files

In [13]:
# Verify files
print("=== Verifying GeoJSON Files ===")

geojson_files = [f for f in os.listdir(OUTPUT_DIR) if f.endswith('.geojson')]
geojson_files.sort()

print(f"\nFound {len(geojson_files)} GeoJSON files")

if len(geojson_files) == len(months):
    print("✅ All files present!")
else:
    print(f"⚠️  Expected {len(months)} files, found {len(geojson_files)}")

# Check file sizes
total_size = 0
for filename in geojson_files:
    filepath = os.path.join(OUTPUT_DIR, filename)
    size = os.path.getsize(filepath)
    total_size += size

print(f"\nTotal size: {total_size / (1024*1024):.2f} MB")
print(f"Average per file: {total_size / len(geojson_files) / (1024*1024):.2f} MB")

# Sample file check
if geojson_files:
    sample_file = os.path.join(OUTPUT_DIR, geojson_files[0])
    with open(sample_file, 'r', encoding='utf-8') as f:
        sample_data = json.load(f)
    
    print(f"\nSample file: {geojson_files[0]}")
    print(f"  Type: {sample_data.get('type')}")
    print(f"  Features: {len(sample_data.get('features', []))}")
    if sample_data.get('features'):
        sample_feature = sample_data['features'][0]
        print(f"  Sample feature properties: {list(sample_feature.get('properties', {}).keys())}")

=== Verifying GeoJSON Files ===

Found 48 GeoJSON files
✅ All files present!

Total size: 19.86 MB
Average per file: 0.41 MB

Sample file: hsi_2021_01.geojson
  Type: FeatureCollection
  Features: 812
  Sample feature properties: ['hsi', 'chl', 'sst', 'salinity', 'hsi_chl', 'hsi_sst', 'hsi_so', 'year', 'month']


## 6. Summary & Next Steps

In [14]:
print("=== GEOJSON EXPORT SUMMARY ===")
print("\n✅ GeoJSON export completed successfully!")
print("\nWhat was done:")
print("1. ✅ Loaded monthly aggregated data")
print("2. ✅ Converted to GeoJSON format (Point geometry)")
print("3. ✅ Generated 36 GeoJSON files (one per month)")
print("4. ✅ Saved to data/geojson/ folder")
print("5. ✅ Generated metadata.json")
print("\nOutput files:")
print(f"  - {len(geojson_files)} GeoJSON files in data/geojson/")
print(f"  - metadata.json with data information")
print("\nNext Steps:")
print("- Backend: Setup Node.js API")
print("  - Create Express server")
print("  - Create endpoints to serve GeoJSON files")
print("  - GET /api/hsi?year=2021&month=1")
print("  - GET /api/hsi/available")
print("\n✅ Jupyter processing phase complete!")

=== GEOJSON EXPORT SUMMARY ===

✅ GeoJSON export completed successfully!

What was done:
1. ✅ Loaded monthly aggregated data
2. ✅ Converted to GeoJSON format (Point geometry)
3. ✅ Generated 36 GeoJSON files (one per month)
4. ✅ Saved to data/geojson/ folder
5. ✅ Generated metadata.json

Output files:
  - 48 GeoJSON files in data/geojson/
  - metadata.json with data information

Next Steps:
- Backend: Setup Node.js API
  - Create Express server
  - Create endpoints to serve GeoJSON files
  - GET /api/hsi?year=2021&month=1
  - GET /api/hsi/available

✅ Jupyter processing phase complete!
