Skip to content

Commit

Permalink
update regrid feature, add suffix argument
Browse files Browse the repository at this point in the history
  • Loading branch information
sebhahn committed May 12, 2024
1 parent 0f759a9 commit 10b6bf4
Show file tree
Hide file tree
Showing 3 changed files with 154 additions and 216 deletions.
121 changes: 67 additions & 54 deletions src/ascat/regrid/interface.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2024, TU Wien, Department of Geodesy and Geoinformation
# Copyright (c) 2024, TU Wien
# All rights reserved.

# Redistribution and use in source and binary forms, with or without
Expand Down Expand Up @@ -35,41 +35,73 @@
from ascat.read_native.xarray_io import swath_io_catalog
from ascat.regrid.regrid import regrid_swath_ds
from ascat.regrid.regrid import retrieve_or_store_grid_lut
from ascat.regrid.regrid import grid_to_regular_grid


def parse_args_swath_regrid(args):
"""
Parse command line arguments for regridding an ASCAT swath file to a regular grid.
Parse command line arguments for regridding an ASCAT swath file
to a regular grid.
Parameters
----------
args : list
Command line arguments.
Returns
-------
parser : ArgumentParser
Argument Parser object.
"""
parser = argparse.ArgumentParser(
description='Regrid an ASCAT swath file to a regular grid'
)
parser.add_argument('filepath', metavar='FILEPATH', help='Path to the data')
parser.add_argument('outpath', metavar='OUTPATH', help='Path to the output data')
description="Regrid an ASCAT swath file to a regular grid")
parser.add_argument(
'regrid_deg',
metavar='REGRID_DEG',
help='Regrid the data to a regular grid with the given spacing in degrees'
)
"filepath", metavar="FILEPATH", help="Path to file or folder")
parser.add_argument(
'--grid_store',
metavar='GRID_STORE',
help='Path to a directory for storing grids and lookup tables between them'
)
"outpath", metavar="OUTPATH", help="Path to the output data")
parser.add_argument(
"regrid_deg",
metavar="REGRID_DEG",
help="Target grid spacing in degrees")
parser.add_argument(
"--grid_store",
metavar="GRID_STORE",
help="Path for storing/loading lookup tables")
parser.add_argument(
"--suffix",
metavar="SUFFIX",
help="File suffix (default: _REGRID_DEGdeg)")

return parser.parse_args(args)


def swath_regrid_main(cli_args):
"""
Regrid an ASCAT swath file or directory of swath files to a regular grid and write
the results to disk.
Regrid an ASCAT swath file or directory of swath files
to a regular grid and write the results to disk.
Parameters
----------
cli_args : list
Command line arguments.
"""
args = parse_args_swath_regrid(cli_args)

filepath = Path(args.filepath)

outpath = Path(args.outpath)
new_grid_size = float(args.regrid_deg)
new_grid = None
outpath.parent.mkdir(parents=True, exist_ok=True)

trg_grid_size = float(args.regrid_deg)

if args.grid_store:
grid_store = args.grid_store
else:
grid_store = None

if args.suffix:
suffix = args.suffix
else:
suffix = f"_{args.regrid_deg}deg"

if filepath.is_dir():
files = list(filepath.glob("**/*.nc"))
Expand All @@ -79,47 +111,28 @@ def swath_regrid_main(cli_args):
files = None

if files is None:
raise ValueError("No .nc files found at the provided filepath.")
raise ValueError("No files found at the provided filepath.")

first_file = files[0]
product = swath_io_catalog[get_swath_product_id(str(first_file.name))]
old_grid = product.grid
old_grid_size = product.grid_sampling_km
src_grid = product.grid
src_grid_size = product.grid_sampling_km

if args.grid_store:
grid_store = args.grid_store
else:
grid_store = None
src_grid_id = f"fib_grid_{src_grid_size}km"
trg_grid_id = f"reg_grid_{trg_grid_size}deg"

trg_grid, grid_lut = retrieve_or_store_grid_lut(grid_store, src_grid,
src_grid_id, trg_grid_id,
trg_grid_size)

for f in files:
outfile = outpath / Path(f.stem + suffix + f.suffix)

for file in files:
if new_grid is None and grid_store is None:
new_grid, _, new_grid_lut = grid_to_regular_grid(
old_grid,
new_grid_size
)
elif new_grid is None:
old_grid_id = f"fib_grid_{old_grid_size}km"
new_grid_id = f"reg_grid_{args.regrid_deg}deg"
new_grid, _, new_grid_lut = retrieve_or_store_grid_lut(
grid_store,
old_grid,
old_grid_id,
new_grid_id,
new_grid_size
)
swath_ds = xr.open_dataset(file, decode_cf=False, mask_and_scale=False)
regridded_ds = regrid_swath_ds(swath_ds, new_grid, new_grid_lut)

# figure out directories between filepath and file, if any
relative_path = file.relative_to(filepath)
if relative_path != Path("."):
outfile = outpath / relative_path
outfile.parent.mkdir(parents=True, exist_ok=True)
else:
outfile = outpath / file.name
regridded_ds.to_netcdf(outfile)
with xr.open_dataset(f, decode_cf=False, mask_and_scale=False) as ds:
regrid_ds = regrid_swath_ds(ds, src_grid, trg_grid, grid_lut)
regrid_ds.to_netcdf(outfile)


def run_swath_regrid():
""" Run command line interface for temporal aggregation of ASCAT data. """
"""Run command line interface for temporal aggregation of ASCAT data."""
swath_regrid_main(sys.argv[1:])
178 changes: 87 additions & 91 deletions src/ascat/regrid/regrid.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2024, TU Wien, Department of Geodesy and Geoinformation
# Copyright (c) 2024, TU Wien
# All rights reserved.

# Redistribution and use in source and binary forms, with or without
Expand Down Expand Up @@ -28,120 +28,116 @@
from pathlib import Path

import numpy as np
import xarray as xr

from pygeogrids.grids import genreg_grid
from pygeogrids.netcdf import save_grid, load_grid
from pygeogrids.netcdf import load_grid, save_grid


# move this to pygeogrids

def grid_to_regular_grid(old_grid, new_grid_size):
""" Create a regular grid of a given size and a lookup table from it to another grid.
Parameters
----------
old_grid : pygeogrids.grids.BasicGrid
The grid to create a lookup table to.
new_grid_size : int
Size of the new grid in degrees.
def retrieve_or_store_grid_lut(store_path, src_grid, src_grid_id, trg_grid_id,
trg_grid_size):
"""
new_grid = genreg_grid(new_grid_size, new_grid_size)
old_grid_lut = new_grid.calc_lut(old_grid)
new_grid_lut = old_grid.calc_lut(new_grid)
return new_grid, old_grid_lut, new_grid_lut


def retrieve_or_store_grid_lut(
store_path,
old_grid,
old_grid_id,
new_grid_id,
regrid_degrees
):
"""Get a grid and its lookup table from a store directory or create, store, and return them.
Get a grid and its lookup table from a store directory or
create, store, and return them.
Parameters
----------
store_path : str
Path to the store directory.
old_grid : pygeogrids.BasicGrid
The old grid.
old_grid_id : str
The old grid's id.
new_grid_id : str
The new grid's id.
regrid_degrees : int
The size of the new grid in degrees.
src_grid : pygeogrids.BasicGrid
Source grid.
src_grid_id : str
The source grid's id.
trg_grid_id : str
The target grid's id.
trg_grid_size : int
The size of the target grid in degrees.
Returns
-------
trg_grid : pygeogrids.grids.BasicGrid
Target grid.
grid_lut : numpy.ndarray
Look-up table.
"""
store_path = Path(store_path)
old_lut_path = store_path / f"lut_{old_grid_id}_{new_grid_id}.npy"
new_lut_path = store_path / f"lut_{new_grid_id}_{old_grid_id}.npy"
grid_path = store_path / f"grid_{new_grid_id}.nc"
if grid_path.exists() and cur_new_lut_path.exists() and new_cur_lut_path.exists():
new_grid = load_grid(grid_path)
old_grid_lut = np.load(old_lut_path, allow_pickle=True)
new_grid_lut = np.load(new_lut_path, allow_pickle=True)

else:
new_grid, old_grid_lut, new_grid_lut = grid_to_regular_grid(old_grid,
regrid_degrees)
old_lut_path.parent.mkdir(parents=True, exist_ok=True)
new_lut_path.parent.mkdir(parents=True, exist_ok=True)
old_grid_lut.dump(old_lut_path)
new_grid_lut.dump(new_lut_path)
save_grid(grid_path, new_grid)
trg_grid_file = store_path / f"{trg_grid_id}.nc"

return new_grid, old_grid_lut, new_grid_lut
if trg_grid_file.exists():
trg_grid = load_grid(trg_grid_file)
else:
store_path.mkdir(parents=True, exist_ok=True)
trg_grid = genreg_grid(trg_grid_size, trg_grid_size)
save_grid(trg_grid_file, trg_grid)

def regrid_swath_ds(ds, new_grid, new_grid_lut):
"""Convert a swath dataset's location_ids to their nearest neighbors in a new grid."""
new_gpis = new_grid_lut[ds["location_id"].values]
new_lons = new_grid.arrlon[new_gpis]
new_lats = new_grid.arrlat[new_gpis]
ds["location_id"] = ("obs", new_gpis)
ds["longitude"] = ("obs", new_lons)
ds["latitude"] = ("obs", new_lats)
return ds
grid_lut_file = store_path / f"lut_{src_grid_id}_{trg_grid_id}.npy"

if grid_lut_file.exists():
grid_lut = np.load(grid_lut_file, allow_pickle=True)
else:
grid_lut = trg_grid.calc_lut(src_grid)
store_path.mkdir(parents=True, exist_ok=True)
grid_lut.dump(grid_lut_file)

def regrid_global_raster_ds(ds, new_grid, old_grid_lut):
"""Convert a global dataset from a Fibonacci grid to a standard grid.
return trg_grid, grid_lut

Assumes the input dataset has a unique location_id dimension.

The output data will cover the entire globe and have lat and lon dimensions. The data
for each point will be taken from the nearest location on the old grid. If multiple
new points have the same nearest location, that data will be duplicated for each. If
the nearest old location is NaN or does not exist in `ds`, the new point will be NaN.
def regrid_swath_ds(ds, src_grid, trg_grid, grid_lut):
"""
Convert a swath dataset to their nearest neighbors
on a regular lat/lon grid.
Parameters
----------
ds : xarray.Dataset
Dataset with a location_id variable derived from a pygeogrids.grids.BasicGrid.
new_grid : pygeogrids.grids.BasicGrid
Instance of BasicGrid that the dataset should be regridded to.
ds_grid_lut : dict
Lookup table from the new grid to the dataset's grid.
Swath dataset.
src_grid : pygeogrids.grids.BasicGrid
Sourde grid.
trg_grid : pygeogrids.grids.BasicGrid
Target grid.
trg_grid_lut : numpy.ndarray
Grid look-up table.
Returns
-------
xarray.Dataset
Dataset with lon and lat dimensions according to the new grid system.
ds : xarray.Dataset
Swath dataset resampled on a regular lat/lon grid.
"""
new_gpis = new_grid.gpis
new_lons = new_grid.arrlon
new_lats = new_grid.arrlat
nearest_ds_gpis = old_grid_lut[new_gpis]
ds = ds.reindex(location_id=nearest_ds_gpis)

# put the new gpi/lon/lat data onto the grouped_ds as well
ds["location_id"] = ("location_id", new_gpis)
ds["lon"] = ("location_id", new_lons)
ds["lat"] = ("location_id", new_lats)

# finally we turn lat and lon into their own dimensions by making a multiindex
# out of them and then unstacking it.
ds = ds.set_index(location_id=["lat", "lon"])
ds = ds.unstack()

return ds
index_lut = np.zeros(src_grid.n_gpi, dtype=np.int32) - 1
index_lut[ds["location_id"].data] = np.arange(ds["location_id"].size)
idx = index_lut[grid_lut]
nan_pos = idx == -1

missing_value = {
"time": 0,
"backscatter_flag": 255,
"correction_flag": 255,
"processing_flag": 255,
"surface_flag": 255
}

coords = {"lat": np.int32(trg_grid.lat2d[:, 0] / 1e-6),
"lon": np.int32(trg_grid.lon2d[0] / 1e-6)}

regrid_ds = xr.Dataset(coords=coords)
regrid_ds.attrs = ds.attrs

regrid_ds["lat"].attrs = ds["latitude"].attrs
regrid_ds["lon"].attrs = ds["longitude"].attrs
dim = ("lat", "lon")

for var in ds.variables:
if var in ["latitude", "longitude"]:
continue

regrid_ds[var] = (dim, ds[var].data[idx])
regrid_ds[var].attrs = ds[var].attrs
regrid_ds[var].encoding = {"zlib": True, "complevel": 4}

if hasattr(ds[var], "missing_value"):
regrid_ds[var].data[nan_pos] = ds[var].missing_value
else:
regrid_ds[var].data[nan_pos] = missing_value[var]

return regrid_ds

0 comments on commit 10b6bf4

Please sign in to comment.