## download data

In [1]:
base_url = "https://popo.jpl.nasa.gov/pub/PBrodrick/bioscape/mosaic_v02"
# save_dir = "/home/geethen/invasives/hypinvalimap/data"
save_dir = "/mnt/hdd1/invasives/hypinvalimap/data"

files_to_download = []

for x in range(32, 36):
    for y in range(16, 22):
        filename = f"AVIRIS-NG_BIOSCAPE_V02_L3_{x}_{y}_RFL.nc"
        url = f"{base_url}/{x}_{y}/{filename}"
        output_path = f"{save_dir}/{filename}"
        files_to_download.append((url, output_path))

# Print to verify
for url, path in files_to_download:
    print(f"('{url}', '{path}'),")


('https://popo.jpl.nasa.gov/pub/PBrodrick/bioscape/mosaic_v02/32_16/AVIRIS-NG_BIOSCAPE_V02_L3_32_16_RFL.nc', '/mnt/hdd1/invasives/hypinvalimap/data/AVIRIS-NG_BIOSCAPE_V02_L3_32_16_RFL.nc'),
('https://popo.jpl.nasa.gov/pub/PBrodrick/bioscape/mosaic_v02/32_17/AVIRIS-NG_BIOSCAPE_V02_L3_32_17_RFL.nc', '/mnt/hdd1/invasives/hypinvalimap/data/AVIRIS-NG_BIOSCAPE_V02_L3_32_17_RFL.nc'),
('https://popo.jpl.nasa.gov/pub/PBrodrick/bioscape/mosaic_v02/32_18/AVIRIS-NG_BIOSCAPE_V02_L3_32_18_RFL.nc', '/mnt/hdd1/invasives/hypinvalimap/data/AVIRIS-NG_BIOSCAPE_V02_L3_32_18_RFL.nc'),
('https://popo.jpl.nasa.gov/pub/PBrodrick/bioscape/mosaic_v02/32_19/AVIRIS-NG_BIOSCAPE_V02_L3_32_19_RFL.nc', '/mnt/hdd1/invasives/hypinvalimap/data/AVIRIS-NG_BIOSCAPE_V02_L3_32_19_RFL.nc'),
('https://popo.jpl.nasa.gov/pub/PBrodrick/bioscape/mosaic_v02/32_20/AVIRIS-NG_BIOSCAPE_V02_L3_32_20_RFL.nc', '/mnt/hdd1/invasives/hypinvalimap/data/AVIRIS-NG_BIOSCAPE_V02_L3_32_20_RFL.nc'),
('https://popo.jpl.nasa.gov/pub/PBrodrick/bioscape

In [2]:
import os
import requests
from tqdm.auto import tqdm
from concurrent.futures import ThreadPoolExecutor
from time import sleep

MAX_RETRIES = 3
PARALLEL_DOWNLOADS = 2

def download_file(url, output_path):
    if os.path.exists(output_path):
        return f"⏭️ Skipped (already exists): {output_path}"

    os.makedirs(os.path.dirname(output_path) or '.', exist_ok=True)
    for attempt in range(MAX_RETRIES):
        try:
            with requests.get(url, stream=True, timeout=30) as r:
                r.raise_for_status()
                total = int(r.headers.get('content-length', 0))
                with open(output_path, 'wb') as f, tqdm(
                    total=total, unit='B', unit_scale=True, desc=os.path.basename(output_path), ncols=80
                ) as pbar:
                    for chunk in r.iter_content(chunk_size=8192):
                        if chunk:
                            f.write(chunk)
                            pbar.update(len(chunk))
            return f"✅ Downloaded: {output_path}"
        except Exception as e:
            print(f"⚠️ Retry {attempt+1} failed for {url}: {e}")
            sleep(2)
    return f"❌ Failed to download: {url}"

# Parallel download with progress bars
with ThreadPoolExecutor(max_workers=PARALLEL_DOWNLOADS) as executor:
    futures = [executor.submit(download_file, url, path) for url, path in files_to_download]
    for future in futures:
        print(future.result())


⏭️ Skipped (already exists): /mnt/hdd1/invasives/hypinvalimap/data/AVIRIS-NG_BIOSCAPE_V02_L3_32_16_RFL.nc
⏭️ Skipped (already exists): /mnt/hdd1/invasives/hypinvalimap/data/AVIRIS-NG_BIOSCAPE_V02_L3_32_17_RFL.nc
⏭️ Skipped (already exists): /mnt/hdd1/invasives/hypinvalimap/data/AVIRIS-NG_BIOSCAPE_V02_L3_32_18_RFL.nc
⏭️ Skipped (already exists): /mnt/hdd1/invasives/hypinvalimap/data/AVIRIS-NG_BIOSCAPE_V02_L3_32_19_RFL.nc
⏭️ Skipped (already exists): /mnt/hdd1/invasives/hypinvalimap/data/AVIRIS-NG_BIOSCAPE_V02_L3_32_20_RFL.nc
⏭️ Skipped (already exists): /mnt/hdd1/invasives/hypinvalimap/data/AVIRIS-NG_BIOSCAPE_V02_L3_32_21_RFL.nc
⏭️ Skipped (already exists): /mnt/hdd1/invasives/hypinvalimap/data/AVIRIS-NG_BIOSCAPE_V02_L3_33_16_RFL.nc
⏭️ Skipped (already exists): /mnt/hdd1/invasives/hypinvalimap/data/AVIRIS-NG_BIOSCAPE_V02_L3_33_17_RFL.nc
⏭️ Skipped (already exists): /mnt/hdd1/invasives/hypinvalimap/data/AVIRIS-NG_BIOSCAPE_V02_L3_33_18_RFL.nc
⏭️ Skipped (already exists): /mnt/hdd1/invasiv

In [5]:
from tqdm.auto import tqdm
import xarray as xr
from pathlib import Path

# Base input and output paths
save_dir = Path("/mnt/hdd1/invasives/hypinvalimap/data")
zarr_root = save_dir / "2023_bioscape_invasives_tiles"

# Ensure output directory exists
zarr_root.mkdir(parents=True, exist_ok=True)

# Build list of NetCDF files and determine which need to be converted
files_to_convert = []
already_converted = []

for x in range(32, 36):
    for y in range(16, 22):
        filename = f"AVIRIS-NG_BIOSCAPE_V02_L3_{x}_{y}_RFL.nc"
        nc_path = save_dir / filename
        zarr_path = zarr_root / f"tile_{x}_{y}.zarr"
        if zarr_path.exists():
            already_converted.append(zarr_path)
        else:
            files_to_convert.append((nc_path, x, y))

# Print already converted files
print("Already converted:")
for z in already_converted:
    print(f" - {z}")

# Optional: convert remaining files
if len(files_to_convert) >0:
    # Print files still needing conversion
    print("\nFiles still to convert:")
    for nc_file, x, y in files_to_convert:
        print(f" - {nc_file}")
        
    print(f"\nConverting {len(files_to_convert)} files to Zarr format...")
    for nc_file, x, y in tqdm(files_to_convert, desc="Writing tiles to Zarr"):
        ds = xr.open_dataset(nc_file, engine='rasterio', chunks={"x": 128, "y": 128})
        tile_zarr_path = zarr_root / f"tile_{x}_{y}.zarr"
        ds.to_zarr(tile_zarr_path, mode="w", consolidated=True)


Already converted:
 - /mnt/hdd1/invasives/hypinvalimap/data/2023_bioscape_invasives_tiles/tile_32_16.zarr
 - /mnt/hdd1/invasives/hypinvalimap/data/2023_bioscape_invasives_tiles/tile_32_17.zarr
 - /mnt/hdd1/invasives/hypinvalimap/data/2023_bioscape_invasives_tiles/tile_32_18.zarr
 - /mnt/hdd1/invasives/hypinvalimap/data/2023_bioscape_invasives_tiles/tile_32_19.zarr
 - /mnt/hdd1/invasives/hypinvalimap/data/2023_bioscape_invasives_tiles/tile_32_20.zarr
 - /mnt/hdd1/invasives/hypinvalimap/data/2023_bioscape_invasives_tiles/tile_32_21.zarr
 - /mnt/hdd1/invasives/hypinvalimap/data/2023_bioscape_invasives_tiles/tile_33_16.zarr
 - /mnt/hdd1/invasives/hypinvalimap/data/2023_bioscape_invasives_tiles/tile_33_17.zarr
 - /mnt/hdd1/invasives/hypinvalimap/data/2023_bioscape_invasives_tiles/tile_33_18.zarr
 - /mnt/hdd1/invasives/hypinvalimap/data/2023_bioscape_invasives_tiles/tile_33_19.zarr
 - /mnt/hdd1/invasives/hypinvalimap/data/2023_bioscape_invasives_tiles/tile_33_20.zarr
 - /mnt/hdd1/invasives/h

# 2023: Extract spectral signatures

In [1]:
import geopandas as gpd
import xarray as xr
from shapely.geometry import box, mapping
import rioxarray as riox
import numpy as np
import hvplot.xarray
import holoviews as hv
import xvec
import matplotlib.pyplot as plt
import pandas as pd
from dask.diagnostics import ProgressBar
import warnings

warnings.filterwarnings('ignore')
hvplot.extension('bokeh')

In [2]:
ds = xr.open_dataset(r"/mnt/hdd1/invasives/hypinvalimap/data/AVIRIS-NG_BIOSCAPE_V02_L3_32_16_RFL.nc", engine="rasterio", chunks="auto")
dscrs = ds.rio.crs
ds


Unnamed: 0,Array,Chunk
Bytes,6.33 GiB,122.07 MiB
Shape,"(425, 2000, 2000)","(8, 2000, 2000)"
Dask graph,54 chunks in 2 graph layers,54 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 6.33 GiB 122.07 MiB Shape (425, 2000, 2000) (8, 2000, 2000) Dask graph 54 chunks in 2 graph layers Data type float32 numpy.ndarray",2000  2000  425,

Unnamed: 0,Array,Chunk
Bytes,6.33 GiB,122.07 MiB
Shape,"(425, 2000, 2000)","(8, 2000, 2000)"
Dask graph,54 chunks in 2 graph layers,54 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,15.26 MiB,15.26 MiB
Shape,"(1, 2000, 2000)","(1, 2000, 2000)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 15.26 MiB 15.26 MiB Shape (1, 2000, 2000) (1, 2000, 2000) Dask graph 1 chunks in 2 graph layers Data type float32 numpy.ndarray",2000  2000  1,

Unnamed: 0,Array,Chunk
Bytes,15.26 MiB,15.26 MiB
Shape,"(1, 2000, 2000)","(1, 2000, 2000)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,15.26 MiB,15.26 MiB
Shape,"(1, 2000, 2000)","(1, 2000, 2000)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 15.26 MiB 15.26 MiB Shape (1, 2000, 2000) (1, 2000, 2000) Dask graph 1 chunks in 2 graph layers Data type float32 numpy.ndarray",2000  2000  1,

Unnamed: 0,Array,Chunk
Bytes,15.26 MiB,15.26 MiB
Shape,"(1, 2000, 2000)","(1, 2000, 2000)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,15.26 MiB,15.26 MiB
Shape,"(1, 2000, 2000)","(1, 2000, 2000)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 15.26 MiB 15.26 MiB Shape (1, 2000, 2000) (1, 2000, 2000) Dask graph 1 chunks in 2 graph layers Data type float32 numpy.ndarray",2000  2000  1,

Unnamed: 0,Array,Chunk
Bytes,15.26 MiB,15.26 MiB
Shape,"(1, 2000, 2000)","(1, 2000, 2000)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,15.26 MiB,15.26 MiB
Shape,"(1, 2000, 2000)","(1, 2000, 2000)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 15.26 MiB 15.26 MiB Shape (1, 2000, 2000) (1, 2000, 2000) Dask graph 1 chunks in 2 graph layers Data type float32 numpy.ndarray",2000  2000  1,

Unnamed: 0,Array,Chunk
Bytes,15.26 MiB,15.26 MiB
Shape,"(1, 2000, 2000)","(1, 2000, 2000)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,15.26 MiB,15.26 MiB
Shape,"(1, 2000, 2000)","(1, 2000, 2000)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 15.26 MiB 15.26 MiB Shape (1, 2000, 2000) (1, 2000, 2000) Dask graph 1 chunks in 2 graph layers Data type float32 numpy.ndarray",2000  2000  1,

Unnamed: 0,Array,Chunk
Bytes,15.26 MiB,15.26 MiB
Shape,"(1, 2000, 2000)","(1, 2000, 2000)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,15.26 MiB,15.26 MiB
Shape,"(1, 2000, 2000)","(1, 2000, 2000)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 15.26 MiB 15.26 MiB Shape (1, 2000, 2000) (1, 2000, 2000) Dask graph 1 chunks in 2 graph layers Data type float32 numpy.ndarray",2000  2000  1,

Unnamed: 0,Array,Chunk
Bytes,15.26 MiB,15.26 MiB
Shape,"(1, 2000, 2000)","(1, 2000, 2000)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,15.26 MiB,15.26 MiB
Shape,"(1, 2000, 2000)","(1, 2000, 2000)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 15.26 MiB 15.26 MiB Shape (1, 2000, 2000) (1, 2000, 2000) Dask graph 1 chunks in 2 graph layers Data type float32 numpy.ndarray",2000  2000  1,

Unnamed: 0,Array,Chunk
Bytes,15.26 MiB,15.26 MiB
Shape,"(1, 2000, 2000)","(1, 2000, 2000)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,15.26 MiB,15.26 MiB
Shape,"(1, 2000, 2000)","(1, 2000, 2000)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 15.26 MiB 15.26 MiB Shape (1, 2000, 2000) (1, 2000, 2000) Dask graph 1 chunks in 2 graph layers Data type float32 numpy.ndarray",2000  2000  1,

Unnamed: 0,Array,Chunk
Bytes,15.26 MiB,15.26 MiB
Shape,"(1, 2000, 2000)","(1, 2000, 2000)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,15.26 MiB,15.26 MiB
Shape,"(1, 2000, 2000)","(1, 2000, 2000)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 15.26 MiB 15.26 MiB Shape (1, 2000, 2000) (1, 2000, 2000) Dask graph 1 chunks in 2 graph layers Data type float32 numpy.ndarray",2000  2000  1,

Unnamed: 0,Array,Chunk
Bytes,15.26 MiB,15.26 MiB
Shape,"(1, 2000, 2000)","(1, 2000, 2000)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,15.26 MiB,15.26 MiB
Shape,"(1, 2000, 2000)","(1, 2000, 2000)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 15.26 MiB 15.26 MiB Shape (1, 2000, 2000) (1, 2000, 2000) Dask graph 1 chunks in 2 graph layers Data type float32 numpy.ndarray",2000  2000  1,

Unnamed: 0,Array,Chunk
Bytes,15.26 MiB,15.26 MiB
Shape,"(1, 2000, 2000)","(1, 2000, 2000)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,15.26 MiB,15.26 MiB
Shape,"(1, 2000, 2000)","(1, 2000, 2000)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 15.26 MiB 15.26 MiB Shape (1, 2000, 2000) (1, 2000, 2000) Dask graph 1 chunks in 2 graph layers Data type float32 numpy.ndarray",2000  2000  1,

Unnamed: 0,Array,Chunk
Bytes,15.26 MiB,15.26 MiB
Shape,"(1, 2000, 2000)","(1, 2000, 2000)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,15.26 MiB,15.26 MiB
Shape,"(1, 2000, 2000)","(1, 2000, 2000)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 15.26 MiB 15.26 MiB Shape (1, 2000, 2000) (1, 2000, 2000) Dask graph 1 chunks in 2 graph layers Data type float32 numpy.ndarray",2000  2000  1,

Unnamed: 0,Array,Chunk
Bytes,15.26 MiB,15.26 MiB
Shape,"(1, 2000, 2000)","(1, 2000, 2000)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray


In [28]:
ds = xr.open_zarr(r"/mnt/hdd1/invasives/hypinvalimap/data/2023_bioscape_invasives_tiles/tile_32_21.zarr", chunks="auto")
ds.rio.set_crs(dscrs, inplace=True)
ds

Unnamed: 0,Array,Chunk
Bytes,30.52 MiB,128.00 kiB
Shape,"(1, 2000, 2000)","(1, 128, 128)"
Dask graph,256 chunks in 2 graph layers,256 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 30.52 MiB 128.00 kiB Shape (1, 2000, 2000) (1, 128, 128) Dask graph 256 chunks in 2 graph layers Data type float64 numpy.ndarray",2000  2000  1,

Unnamed: 0,Array,Chunk
Bytes,30.52 MiB,128.00 kiB
Shape,"(1, 2000, 2000)","(1, 128, 128)"
Dask graph,256 chunks in 2 graph layers,256 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,30.52 MiB,128.00 kiB
Shape,"(1, 2000, 2000)","(1, 128, 128)"
Dask graph,256 chunks in 2 graph layers,256 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 30.52 MiB 128.00 kiB Shape (1, 2000, 2000) (1, 128, 128) Dask graph 256 chunks in 2 graph layers Data type float64 numpy.ndarray",2000  2000  1,

Unnamed: 0,Array,Chunk
Bytes,30.52 MiB,128.00 kiB
Shape,"(1, 2000, 2000)","(1, 128, 128)"
Dask graph,256 chunks in 2 graph layers,256 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,12.67 GiB,128.00 kiB
Shape,"(425, 2000, 2000)","(1, 128, 128)"
Dask graph,108800 chunks in 2 graph layers,108800 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 12.67 GiB 128.00 kiB Shape (425, 2000, 2000) (1, 128, 128) Dask graph 108800 chunks in 2 graph layers Data type float64 numpy.ndarray",2000  2000  425,

Unnamed: 0,Array,Chunk
Bytes,12.67 GiB,128.00 kiB
Shape,"(425, 2000, 2000)","(1, 128, 128)"
Dask graph,108800 chunks in 2 graph layers,108800 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,30.52 MiB,128.00 kiB
Shape,"(1, 2000, 2000)","(1, 128, 128)"
Dask graph,256 chunks in 2 graph layers,256 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 30.52 MiB 128.00 kiB Shape (1, 2000, 2000) (1, 128, 128) Dask graph 256 chunks in 2 graph layers Data type float64 numpy.ndarray",2000  2000  1,

Unnamed: 0,Array,Chunk
Bytes,30.52 MiB,128.00 kiB
Shape,"(1, 2000, 2000)","(1, 128, 128)"
Dask graph,256 chunks in 2 graph layers,256 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,30.52 MiB,128.00 kiB
Shape,"(1, 2000, 2000)","(1, 128, 128)"
Dask graph,256 chunks in 2 graph layers,256 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 30.52 MiB 128.00 kiB Shape (1, 2000, 2000) (1, 128, 128) Dask graph 256 chunks in 2 graph layers Data type float64 numpy.ndarray",2000  2000  1,

Unnamed: 0,Array,Chunk
Bytes,30.52 MiB,128.00 kiB
Shape,"(1, 2000, 2000)","(1, 128, 128)"
Dask graph,256 chunks in 2 graph layers,256 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,30.52 MiB,128.00 kiB
Shape,"(1, 2000, 2000)","(1, 128, 128)"
Dask graph,256 chunks in 2 graph layers,256 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 30.52 MiB 128.00 kiB Shape (1, 2000, 2000) (1, 128, 128) Dask graph 256 chunks in 2 graph layers Data type float64 numpy.ndarray",2000  2000  1,

Unnamed: 0,Array,Chunk
Bytes,30.52 MiB,128.00 kiB
Shape,"(1, 2000, 2000)","(1, 128, 128)"
Dask graph,256 chunks in 2 graph layers,256 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,30.52 MiB,128.00 kiB
Shape,"(1, 2000, 2000)","(1, 128, 128)"
Dask graph,256 chunks in 2 graph layers,256 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 30.52 MiB 128.00 kiB Shape (1, 2000, 2000) (1, 128, 128) Dask graph 256 chunks in 2 graph layers Data type float64 numpy.ndarray",2000  2000  1,

Unnamed: 0,Array,Chunk
Bytes,30.52 MiB,128.00 kiB
Shape,"(1, 2000, 2000)","(1, 128, 128)"
Dask graph,256 chunks in 2 graph layers,256 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,30.52 MiB,128.00 kiB
Shape,"(1, 2000, 2000)","(1, 128, 128)"
Dask graph,256 chunks in 2 graph layers,256 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 30.52 MiB 128.00 kiB Shape (1, 2000, 2000) (1, 128, 128) Dask graph 256 chunks in 2 graph layers Data type float64 numpy.ndarray",2000  2000  1,

Unnamed: 0,Array,Chunk
Bytes,30.52 MiB,128.00 kiB
Shape,"(1, 2000, 2000)","(1, 128, 128)"
Dask graph,256 chunks in 2 graph layers,256 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,30.52 MiB,128.00 kiB
Shape,"(1, 2000, 2000)","(1, 128, 128)"
Dask graph,256 chunks in 2 graph layers,256 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 30.52 MiB 128.00 kiB Shape (1, 2000, 2000) (1, 128, 128) Dask graph 256 chunks in 2 graph layers Data type float64 numpy.ndarray",2000  2000  1,

Unnamed: 0,Array,Chunk
Bytes,30.52 MiB,128.00 kiB
Shape,"(1, 2000, 2000)","(1, 128, 128)"
Dask graph,256 chunks in 2 graph layers,256 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,30.52 MiB,128.00 kiB
Shape,"(1, 2000, 2000)","(1, 128, 128)"
Dask graph,256 chunks in 2 graph layers,256 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 30.52 MiB 128.00 kiB Shape (1, 2000, 2000) (1, 128, 128) Dask graph 256 chunks in 2 graph layers Data type float64 numpy.ndarray",2000  2000  1,

Unnamed: 0,Array,Chunk
Bytes,30.52 MiB,128.00 kiB
Shape,"(1, 2000, 2000)","(1, 128, 128)"
Dask graph,256 chunks in 2 graph layers,256 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,30.52 MiB,128.00 kiB
Shape,"(1, 2000, 2000)","(1, 128, 128)"
Dask graph,256 chunks in 2 graph layers,256 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 30.52 MiB 128.00 kiB Shape (1, 2000, 2000) (1, 128, 128) Dask graph 256 chunks in 2 graph layers Data type float64 numpy.ndarray",2000  2000  1,

Unnamed: 0,Array,Chunk
Bytes,30.52 MiB,128.00 kiB
Shape,"(1, 2000, 2000)","(1, 128, 128)"
Dask graph,256 chunks in 2 graph layers,256 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,30.52 MiB,128.00 kiB
Shape,"(1, 2000, 2000)","(1, 128, 128)"
Dask graph,256 chunks in 2 graph layers,256 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 30.52 MiB 128.00 kiB Shape (1, 2000, 2000) (1, 128, 128) Dask graph 256 chunks in 2 graph layers Data type float64 numpy.ndarray",2000  2000  1,

Unnamed: 0,Array,Chunk
Bytes,30.52 MiB,128.00 kiB
Shape,"(1, 2000, 2000)","(1, 128, 128)"
Dask graph,256 chunks in 2 graph layers,256 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,30.52 MiB,128.00 kiB
Shape,"(1, 2000, 2000)","(1, 128, 128)"
Dask graph,256 chunks in 2 graph layers,256 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 30.52 MiB 128.00 kiB Shape (1, 2000, 2000) (1, 128, 128) Dask graph 256 chunks in 2 graph layers Data type float64 numpy.ndarray",2000  2000  1,

Unnamed: 0,Array,Chunk
Bytes,30.52 MiB,128.00 kiB
Shape,"(1, 2000, 2000)","(1, 128, 128)"
Dask graph,256 chunks in 2 graph layers,256 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray


In [29]:
print(ds.rio.crs)

PROJCS["unnamed",GEOGCS["Ellipse Based",DATUM["Ellipse Based",SPHEROID["Unnamed",6378137,298.257223562997]],PRIMEM["Greenwich",0],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]]],PROJECTION["Albers_Conic_Equal_Area"],PARAMETER["latitude_of_center",-30],PARAMETER["longitude_of_center",25],PARAMETER["standard_parallel_1",-22],PARAMETER["standard_parallel_2",-38],PARAMETER["false_easting",1400000],PARAMETER["false_northing",1300000],UNIT["metre",1,AUTHORITY["EPSG","9001"]],AXIS["Easting",EAST],AXIS["Northing",NORTH]]


In [30]:
import xarray as xr
import geopandas as gpd
from shapely.geometry import box

def get_bounds_as_gdf(ds):
    """
    Given a NetCDF file with georeferenced raster data, returns a GeoDataFrame
    with a single bounding box geometry representing the dataset extent.
    
    Parameters:
        nc_path (str): Path to the NetCDF file.
    
    Returns:
        gpd.GeoDataFrame: A GeoDataFrame with one row containing the bounding box.
    """
    # Extract coordinate variables (commonly named 'x' and 'y')
    try:
        x = ds['x'].values
        y = ds['y'].values
    except KeyError:
        raise ValueError("Could not find coordinate variables 'x' and 'y' in the dataset.")

    # Compute bounds
    bounds = (x.min(), y.min(), x.max(), y.max())
    geom = box(*bounds)

    # Get CRS from dataset
    try:
        crs = ds.rio.crs
    except AttributeError:
        crs = None  # Will be set to None if CRS is missing

    return gpd.GeoDataFrame(index=[0], geometry=[geom], crs=crs).to_crs(epsg=4326)

geo = get_bounds_as_gdf(ds)

# Clip the raw data to the bounding box
# points = joined.clip(geo)
# m = points.explore()
# m = geo.explore(m=m, color="red", name="Bounding Box")
# m


In [None]:
# Divide reflectance values by 1000 before visualization
ds_scaled = ds / 1000
h = ds_scaled.sel(wavelength=[660, 570, 480], method="nearest").hvplot.rgb(
    'x', 'y',
    rasterize=True,
    data_aspect=1,
    bands='wavelength',
    frame_width=400
)
h

In [31]:
# Load invasive species point data
# gdf = gpd.read_file(r'C:\Users\coach\myfiles\postdoc\Invasives\code\hypinvalimap\data\aliens_sep2018.shp')
# gdf = gpd.read_file(r'/home/geethen/invasives/hypinvalimap/data/2018_2023_MgnChg.shp')
# gdf = gdf[gdf.geometry.notnull()]

# gdf2 = gpd.read_file(r'/home/geethen/invasives/hypinvalimap/data/aliens_sep2018.shp')
# gdf2.dropna(subset=['geometry'], inplace=True)
# gdf2 = gdf2[gdf2.geometry != 'None']
# gdf2.shape

# joined = gpd.sjoin_nearest(gdf2[['fid', 'class', 'group', 'geometry']], gdf[['geometry','change', 'notes']], how='inner', distance_col='dist')

# joined.isna().sum()

# gdf2
# gdf.head()

# v2
joined = gpd.read_file(r'/home/geethen/invasives/hypinvalimap/data/aliens_sep2018_bioscape2023.shp')
joined.columns

Index(['fid', 'class', 'group', 'layer', 'path', '2018_2023', 'change',
       'notes', '2023_class', 'geometry'],
      dtype='object')

In [32]:
m = joined.explore()
geo = get_bounds_as_gdf(ds)
m = geo.explore(m=m, color="red", name="Bounding Box")
m

In [33]:
def extract_points(ds, points, crs):
    """
    Extracts data values at specified points from a locally stored dataset.

    Parameters:
    - ds: str, path to the dataset (e.g., netCDF or GeoTIFF) or dataset.
    - points: GeoDataFrame, point locations to extract data.

    Returns:
    - DataFrame containing extracted data values and point indices in the same crs as ds.
    """

    if isinstance(ds, str):
        ds = xr.open_zarr(ds)
        ds.rio.set_crs(crs, inplace=True)
    
    # get the bounding box of the dataset
    geo = get_bounds_as_gdf(ds).to_crs(crs)

    # Reproject points to match the CRS of the dataset
    if points.crs != crs:
        points = points.to_crs(crs)

    # Clip the raw data to the bounding box
    points = points.clip(geo)
    print(f'got {points.shape[0]} point from {ds.title}')

    # Extract data at points
    extracted = ds.xvec.extract_points(
        points['geometry'], 
        x_coords="x", 
        y_coords="y", 
        index=True
    )
    
    return extracted

df = extract_points(ds, joined, crs = dscrs)
df

got 14 point from AVIRIS-NG L3 Mosaiced Surface Reflectance (grid cell: 32_21)


Unnamed: 0,Array,Chunk
Bytes,112 B,112 B
Shape,"(1, 14)","(1, 14)"
Dask graph,1 chunks in 4 graph layers,1 chunks in 4 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 112 B 112 B Shape (1, 14) (1, 14) Dask graph 1 chunks in 4 graph layers Data type float64 numpy.ndarray",14  1,

Unnamed: 0,Array,Chunk
Bytes,112 B,112 B
Shape,"(1, 14)","(1, 14)"
Dask graph,1 chunks in 4 graph layers,1 chunks in 4 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,112 B,112 B
Shape,"(1, 14)","(1, 14)"
Dask graph,1 chunks in 4 graph layers,1 chunks in 4 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 112 B 112 B Shape (1, 14) (1, 14) Dask graph 1 chunks in 4 graph layers Data type float64 numpy.ndarray",14  1,

Unnamed: 0,Array,Chunk
Bytes,112 B,112 B
Shape,"(1, 14)","(1, 14)"
Dask graph,1 chunks in 4 graph layers,1 chunks in 4 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,46.48 kiB,112 B
Shape,"(425, 14)","(1, 14)"
Dask graph,425 chunks in 4 graph layers,425 chunks in 4 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 46.48 kiB 112 B Shape (425, 14) (1, 14) Dask graph 425 chunks in 4 graph layers Data type float64 numpy.ndarray",14  425,

Unnamed: 0,Array,Chunk
Bytes,46.48 kiB,112 B
Shape,"(425, 14)","(1, 14)"
Dask graph,425 chunks in 4 graph layers,425 chunks in 4 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,112 B,112 B
Shape,"(1, 14)","(1, 14)"
Dask graph,1 chunks in 4 graph layers,1 chunks in 4 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 112 B 112 B Shape (1, 14) (1, 14) Dask graph 1 chunks in 4 graph layers Data type float64 numpy.ndarray",14  1,

Unnamed: 0,Array,Chunk
Bytes,112 B,112 B
Shape,"(1, 14)","(1, 14)"
Dask graph,1 chunks in 4 graph layers,1 chunks in 4 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,112 B,112 B
Shape,"(1, 14)","(1, 14)"
Dask graph,1 chunks in 4 graph layers,1 chunks in 4 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 112 B 112 B Shape (1, 14) (1, 14) Dask graph 1 chunks in 4 graph layers Data type float64 numpy.ndarray",14  1,

Unnamed: 0,Array,Chunk
Bytes,112 B,112 B
Shape,"(1, 14)","(1, 14)"
Dask graph,1 chunks in 4 graph layers,1 chunks in 4 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,112 B,112 B
Shape,"(1, 14)","(1, 14)"
Dask graph,1 chunks in 4 graph layers,1 chunks in 4 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 112 B 112 B Shape (1, 14) (1, 14) Dask graph 1 chunks in 4 graph layers Data type float64 numpy.ndarray",14  1,

Unnamed: 0,Array,Chunk
Bytes,112 B,112 B
Shape,"(1, 14)","(1, 14)"
Dask graph,1 chunks in 4 graph layers,1 chunks in 4 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,112 B,112 B
Shape,"(1, 14)","(1, 14)"
Dask graph,1 chunks in 4 graph layers,1 chunks in 4 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 112 B 112 B Shape (1, 14) (1, 14) Dask graph 1 chunks in 4 graph layers Data type float64 numpy.ndarray",14  1,

Unnamed: 0,Array,Chunk
Bytes,112 B,112 B
Shape,"(1, 14)","(1, 14)"
Dask graph,1 chunks in 4 graph layers,1 chunks in 4 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,112 B,112 B
Shape,"(1, 14)","(1, 14)"
Dask graph,1 chunks in 4 graph layers,1 chunks in 4 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 112 B 112 B Shape (1, 14) (1, 14) Dask graph 1 chunks in 4 graph layers Data type float64 numpy.ndarray",14  1,

Unnamed: 0,Array,Chunk
Bytes,112 B,112 B
Shape,"(1, 14)","(1, 14)"
Dask graph,1 chunks in 4 graph layers,1 chunks in 4 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,112 B,112 B
Shape,"(1, 14)","(1, 14)"
Dask graph,1 chunks in 4 graph layers,1 chunks in 4 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 112 B 112 B Shape (1, 14) (1, 14) Dask graph 1 chunks in 4 graph layers Data type float64 numpy.ndarray",14  1,

Unnamed: 0,Array,Chunk
Bytes,112 B,112 B
Shape,"(1, 14)","(1, 14)"
Dask graph,1 chunks in 4 graph layers,1 chunks in 4 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,112 B,112 B
Shape,"(1, 14)","(1, 14)"
Dask graph,1 chunks in 4 graph layers,1 chunks in 4 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 112 B 112 B Shape (1, 14) (1, 14) Dask graph 1 chunks in 4 graph layers Data type float64 numpy.ndarray",14  1,

Unnamed: 0,Array,Chunk
Bytes,112 B,112 B
Shape,"(1, 14)","(1, 14)"
Dask graph,1 chunks in 4 graph layers,1 chunks in 4 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,112 B,112 B
Shape,"(1, 14)","(1, 14)"
Dask graph,1 chunks in 4 graph layers,1 chunks in 4 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 112 B 112 B Shape (1, 14) (1, 14) Dask graph 1 chunks in 4 graph layers Data type float64 numpy.ndarray",14  1,

Unnamed: 0,Array,Chunk
Bytes,112 B,112 B
Shape,"(1, 14)","(1, 14)"
Dask graph,1 chunks in 4 graph layers,1 chunks in 4 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,112 B,112 B
Shape,"(1, 14)","(1, 14)"
Dask graph,1 chunks in 4 graph layers,1 chunks in 4 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 112 B 112 B Shape (1, 14) (1, 14) Dask graph 1 chunks in 4 graph layers Data type float64 numpy.ndarray",14  1,

Unnamed: 0,Array,Chunk
Bytes,112 B,112 B
Shape,"(1, 14)","(1, 14)"
Dask graph,1 chunks in 4 graph layers,1 chunks in 4 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,112 B,112 B
Shape,"(1, 14)","(1, 14)"
Dask graph,1 chunks in 4 graph layers,1 chunks in 4 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 112 B 112 B Shape (1, 14) (1, 14) Dask graph 1 chunks in 4 graph layers Data type float64 numpy.ndarray",14  1,

Unnamed: 0,Array,Chunk
Bytes,112 B,112 B
Shape,"(1, 14)","(1, 14)"
Dask graph,1 chunks in 4 graph layers,1 chunks in 4 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray


In [34]:
class_xr =joined[['2023_class','group']].to_xarray()
ds = df.merge(class_xr.astype(int),join='left')
ds

Unnamed: 0,Array,Chunk
Bytes,112 B,112 B
Shape,"(1, 14)","(1, 14)"
Dask graph,1 chunks in 4 graph layers,1 chunks in 4 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 112 B 112 B Shape (1, 14) (1, 14) Dask graph 1 chunks in 4 graph layers Data type float64 numpy.ndarray",14  1,

Unnamed: 0,Array,Chunk
Bytes,112 B,112 B
Shape,"(1, 14)","(1, 14)"
Dask graph,1 chunks in 4 graph layers,1 chunks in 4 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,112 B,112 B
Shape,"(1, 14)","(1, 14)"
Dask graph,1 chunks in 4 graph layers,1 chunks in 4 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 112 B 112 B Shape (1, 14) (1, 14) Dask graph 1 chunks in 4 graph layers Data type float64 numpy.ndarray",14  1,

Unnamed: 0,Array,Chunk
Bytes,112 B,112 B
Shape,"(1, 14)","(1, 14)"
Dask graph,1 chunks in 4 graph layers,1 chunks in 4 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,46.48 kiB,112 B
Shape,"(425, 14)","(1, 14)"
Dask graph,425 chunks in 4 graph layers,425 chunks in 4 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 46.48 kiB 112 B Shape (425, 14) (1, 14) Dask graph 425 chunks in 4 graph layers Data type float64 numpy.ndarray",14  425,

Unnamed: 0,Array,Chunk
Bytes,46.48 kiB,112 B
Shape,"(425, 14)","(1, 14)"
Dask graph,425 chunks in 4 graph layers,425 chunks in 4 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,112 B,112 B
Shape,"(1, 14)","(1, 14)"
Dask graph,1 chunks in 4 graph layers,1 chunks in 4 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 112 B 112 B Shape (1, 14) (1, 14) Dask graph 1 chunks in 4 graph layers Data type float64 numpy.ndarray",14  1,

Unnamed: 0,Array,Chunk
Bytes,112 B,112 B
Shape,"(1, 14)","(1, 14)"
Dask graph,1 chunks in 4 graph layers,1 chunks in 4 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,112 B,112 B
Shape,"(1, 14)","(1, 14)"
Dask graph,1 chunks in 4 graph layers,1 chunks in 4 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 112 B 112 B Shape (1, 14) (1, 14) Dask graph 1 chunks in 4 graph layers Data type float64 numpy.ndarray",14  1,

Unnamed: 0,Array,Chunk
Bytes,112 B,112 B
Shape,"(1, 14)","(1, 14)"
Dask graph,1 chunks in 4 graph layers,1 chunks in 4 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,112 B,112 B
Shape,"(1, 14)","(1, 14)"
Dask graph,1 chunks in 4 graph layers,1 chunks in 4 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 112 B 112 B Shape (1, 14) (1, 14) Dask graph 1 chunks in 4 graph layers Data type float64 numpy.ndarray",14  1,

Unnamed: 0,Array,Chunk
Bytes,112 B,112 B
Shape,"(1, 14)","(1, 14)"
Dask graph,1 chunks in 4 graph layers,1 chunks in 4 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,112 B,112 B
Shape,"(1, 14)","(1, 14)"
Dask graph,1 chunks in 4 graph layers,1 chunks in 4 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 112 B 112 B Shape (1, 14) (1, 14) Dask graph 1 chunks in 4 graph layers Data type float64 numpy.ndarray",14  1,

Unnamed: 0,Array,Chunk
Bytes,112 B,112 B
Shape,"(1, 14)","(1, 14)"
Dask graph,1 chunks in 4 graph layers,1 chunks in 4 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,112 B,112 B
Shape,"(1, 14)","(1, 14)"
Dask graph,1 chunks in 4 graph layers,1 chunks in 4 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 112 B 112 B Shape (1, 14) (1, 14) Dask graph 1 chunks in 4 graph layers Data type float64 numpy.ndarray",14  1,

Unnamed: 0,Array,Chunk
Bytes,112 B,112 B
Shape,"(1, 14)","(1, 14)"
Dask graph,1 chunks in 4 graph layers,1 chunks in 4 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,112 B,112 B
Shape,"(1, 14)","(1, 14)"
Dask graph,1 chunks in 4 graph layers,1 chunks in 4 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 112 B 112 B Shape (1, 14) (1, 14) Dask graph 1 chunks in 4 graph layers Data type float64 numpy.ndarray",14  1,

Unnamed: 0,Array,Chunk
Bytes,112 B,112 B
Shape,"(1, 14)","(1, 14)"
Dask graph,1 chunks in 4 graph layers,1 chunks in 4 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,112 B,112 B
Shape,"(1, 14)","(1, 14)"
Dask graph,1 chunks in 4 graph layers,1 chunks in 4 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 112 B 112 B Shape (1, 14) (1, 14) Dask graph 1 chunks in 4 graph layers Data type float64 numpy.ndarray",14  1,

Unnamed: 0,Array,Chunk
Bytes,112 B,112 B
Shape,"(1, 14)","(1, 14)"
Dask graph,1 chunks in 4 graph layers,1 chunks in 4 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,112 B,112 B
Shape,"(1, 14)","(1, 14)"
Dask graph,1 chunks in 4 graph layers,1 chunks in 4 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 112 B 112 B Shape (1, 14) (1, 14) Dask graph 1 chunks in 4 graph layers Data type float64 numpy.ndarray",14  1,

Unnamed: 0,Array,Chunk
Bytes,112 B,112 B
Shape,"(1, 14)","(1, 14)"
Dask graph,1 chunks in 4 graph layers,1 chunks in 4 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,112 B,112 B
Shape,"(1, 14)","(1, 14)"
Dask graph,1 chunks in 4 graph layers,1 chunks in 4 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 112 B 112 B Shape (1, 14) (1, 14) Dask graph 1 chunks in 4 graph layers Data type float64 numpy.ndarray",14  1,

Unnamed: 0,Array,Chunk
Bytes,112 B,112 B
Shape,"(1, 14)","(1, 14)"
Dask graph,1 chunks in 4 graph layers,1 chunks in 4 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,112 B,112 B
Shape,"(1, 14)","(1, 14)"
Dask graph,1 chunks in 4 graph layers,1 chunks in 4 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 112 B 112 B Shape (1, 14) (1, 14) Dask graph 1 chunks in 4 graph layers Data type float64 numpy.ndarray",14  1,

Unnamed: 0,Array,Chunk
Bytes,112 B,112 B
Shape,"(1, 14)","(1, 14)"
Dask graph,1 chunks in 4 graph layers,1 chunks in 4 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray


In [35]:
with ProgressBar():
    ds = ds.persist()

[########################################] | 100% Completed | 35.16 s


In [43]:
ds.reflectance.values

array([[nan, nan, nan, ..., nan, nan, nan],
       [nan, nan, nan, ..., nan, nan, nan],
       [nan, nan, nan, ..., nan, nan, nan],
       ...,
       [nan, nan, nan, ..., nan, nan, nan],
       [nan, nan, nan, ..., nan, nan, nan],
       [nan, nan, nan, ..., nan, nan, nan]], shape=(425, 14))

In [None]:
# base_url = "https://popo.jpl.nasa.gov/pub/PBrodrick/bioscape/mosaic_v02"
# # save_dir = "/home/geethen/invasives/hypinvalimap/data"
# save_dir = "/mnt/hdd1/invasives/hypinvalimap/data/2023_bioscape_invasives_tiles"

# files_to_download = []

# for x in range(32, 36):
#     for y in range(16, 22):
#         filename = f"AVIRIS-NG_BIOSCAPE_V02_L3_{x}_{y}_RFL.zarr"
#         url = f"{base_url}/{x}_{y}/{filename}"
#         output_path = f"{save_dir}/{filename}"
#         files_to_download.append((url, output_path))
        
# df_all = [extract_points(file, joined, crs=dscrs) for _, file in files_to_download]

# #combine the results into a single xarray
# df_all  = xr.concat(df_all, dim='file')
# df_all

In [36]:
def xr_to_gdf(ds, crs=None):
    """
    Convert an xarray DataArray to a GeoDataFrame.
    
    Parameters:
        xr_data (xarray.DataArray): The xarray data to convert.
        crs (str or dict, optional): Coordinate reference system for the GeoDataFrame.
        
    Returns:
        gpd.GeoDataFrame: The converted GeoDataFrame.
    """
    df = ds.xvec.to_geodataframe(long=True).pivot_table(
    index=['geometry'],  # Replace with your actual spatial and other relevant dimensions
    columns='wavelength',
    values='reflectance'
    ).reset_index()
    df.columns = [str(col) if col != 'geometry' else col for col in df.columns]
    gdf = gpd.GeoDataFrame(df, geometry=df['geometry'], crs=crs).to_crs(epsg=4326)
    return gdf

test = xr_to_gdf(ds, crs = dscrs)

In [37]:
test.explore()

In [25]:
# Extract data for all points
import os
import xarray as xr
from tqdm.auto import tqdm

save_dir = "/mnt/hdd1/invasives/hypinvalimap/data"
zarr_root = f"{save_dir}/2023_bioscape_invasives_tiles"

files = [
    f"{zarr_root}/tile_{x}_{y}.zarr"
    for x in range(32, 36)
    for y in range(16, 22)
]

for file in tqdm(files):
    # Extract tile name and construct save path in save_dir
    tile_name = os.path.basename(file).replace('.zarr', '.geojson')
    save_path = os.path.join(save_dir, tile_name)
    
    if os.path.exists(save_path):
    #     print(f"{tile_name} exists. Skipping.")
    # else:
        print(f"Processing {tile_name}...")
        gdf = xr_to_gdf(extract_points(file, joined, crs= dscrs), dscrs)
        print(gdf.shape)
        
        gdf.to_file(save_path)


  0%|          | 0/24 [00:00<?, ?it/s]

Processing tile_32_16.geojson...
got 6 point from AVIRIS-NG L3 Mosaiced Surface Reflectance (grid cell: 32_16)
(6, 426)
Processing tile_32_17.geojson...
got 174 point from AVIRIS-NG L3 Mosaiced Surface Reflectance (grid cell: 32_17)
(174, 426)
Processing tile_32_18.geojson...
got 248 point from AVIRIS-NG L3 Mosaiced Surface Reflectance (grid cell: 32_18)
(248, 426)
Processing tile_32_19.geojson...
got 151 point from AVIRIS-NG L3 Mosaiced Surface Reflectance (grid cell: 32_19)
(151, 426)
Processing tile_32_20.geojson...
got 34 point from AVIRIS-NG L3 Mosaiced Surface Reflectance (grid cell: 32_20)
(34, 426)
Processing tile_32_21.geojson...
got 14 point from AVIRIS-NG L3 Mosaiced Surface Reflectance (grid cell: 32_21)
(0, 1)
Processing tile_33_16.geojson...
got 11 point from AVIRIS-NG L3 Mosaiced Surface Reflectance (grid cell: 33_16)
(11, 426)
Processing tile_33_17.geojson...
got 83 point from AVIRIS-NG L3 Mosaiced Surface Reflectance (grid cell: 33_17)
(83, 426)
Processing tile_33_18.g

In [26]:
import os
import geopandas as gpd
from tqdm.auto import tqdm

save_dir = "/mnt/hdd1/invasives/hypinvalimap/data"

# List of GeoJSON file paths
files = [
    os.path.join(save_dir, f"tile_{x}_{y}.geojson")
    for x in range(32, 36)
    for y in range(16, 22)
]

# Combine all GeoJSONs into a single GeoDataFrame
gdfs = []
for file in tqdm(files):
    gdf = gpd.read_file(file).set_crs(epsg=4326, allow_override=True)
    gdfs.append(gdf)

xdf = gpd.GeoDataFrame(pd.concat(gdfs, ignore_index=True), crs='EPSG:4326')
xdf


  0%|          | 0/24 [00:00<?, ?it/s]

Unnamed: 0,377.19565,382.20566,387.21564,392.22565,397.22565,402.23566,407.24564,412.25565,417.26566,422.27563,...,2460.8057,2465.8157,2470.8157,2475.8257,2480.8357,2485.8457,2490.8557,2495.8657,2500.8757,geometry
0,-0.001360,0.001746,0.009233,0.011261,0.012637,0.013316,0.012942,0.014665,0.016175,0.017520,...,0.023868,0.022432,0.031687,0.039885,0.049506,0.060027,0.069544,0.078307,0.094685,POINT (18.99064 -33.78642)
1,-0.013990,-0.004646,-0.002463,0.006976,0.009752,0.006681,0.010878,0.012079,0.012941,0.015146,...,0.032015,0.033710,0.033165,0.040219,0.052750,0.062266,0.071139,0.080009,0.069628,POINT (18.99329 -33.77454)
2,-0.004312,-0.003247,0.007766,0.007847,0.007336,0.012901,0.013843,0.015823,0.016952,0.016849,...,0.038082,0.044695,0.046063,0.054095,0.069160,0.080419,0.092032,0.103193,0.096860,POINT (18.99461 -33.77531)
3,-0.009425,-0.003676,0.004743,0.007010,0.006713,0.010284,0.011440,0.013126,0.014715,0.015523,...,0.039720,0.043391,0.048259,0.057994,0.066670,0.077617,0.086705,0.095655,0.102966,POINT (18.99528 -33.7755)
4,-0.000792,0.001331,0.016432,0.013918,0.013654,0.018442,0.021342,0.024033,0.024784,0.026299,...,0.078187,0.079905,0.088660,0.106881,0.126884,0.151404,0.171975,0.191847,0.171001,POINT (19.00378 -33.77434)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1615,-0.026699,-0.008511,0.003038,0.006947,0.010898,0.012562,0.016860,0.018258,0.019699,0.020430,...,0.002770,0.007171,-0.002427,0.007888,0.016347,0.036395,0.073807,0.016607,0.011262,POINT (19.14737 -33.99141)
1616,-0.006539,-0.003237,0.012373,0.008805,0.008135,0.014811,0.017723,0.022906,0.020922,0.021219,...,-0.000002,-0.004252,0.002824,-0.000377,0.013356,-0.004699,0.101039,-0.022947,-0.015435,POINT (19.15609 -33.99034)
1617,-0.009968,-0.008733,0.002173,0.005276,0.011025,0.011454,0.017868,0.018317,0.018992,0.019012,...,-0.002678,0.004468,-0.000922,-0.002775,-0.024097,-0.044038,0.068780,0.005774,0.010309,POINT (19.16467 -33.99555)
1618,0.014812,0.026067,0.024296,0.026962,0.029755,0.030520,0.032404,0.034234,0.034657,0.035751,...,0.133986,0.135523,0.137910,0.149332,0.162028,0.173455,0.185647,0.197027,0.208798,POINT (19.34197 -34.00036)


In [27]:
sjoined = gpd.sjoin_nearest(joined[['fid', 'class', 'group','change', 'notes', 'geometry']], xdf, how='inner', distance_col='dist')
sjoined.to_file(r"/home/geethen/invasives/hypinvalimap/data/2023_extracted.geojson")