/
coastal.py
1946 lines (1701 loc) · 74.9 KB
/
coastal.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
## dea_coastaltools.py
"""
Coastal analysis and tide modelling tools.
License: The code in this notebook is licensed under the Apache License,
Version 2.0 (https://www.apache.org/licenses/LICENSE-2.0). Digital Earth
Australia data is licensed under the Creative Commons by Attribution 4.0
license (https://creativecommons.org/licenses/by/4.0/).
Contact: If you need assistance, post a question on the Open Data Cube
Slack channel (http://slack.opendatacube.org/) or the GIS Stack Exchange
(https://gis.stackexchange.com/questions/ask?tags=open-data-cube) using
the `open-data-cube` tag (you can view previously asked questions here:
https://gis.stackexchange.com/questions/tagged/open-data-cube).
If you would like to report an issue with this script, you can file one
on GitHub (https://github.com/GeoscienceAustralia/dea-notebooks/issues/new).
Last modified: November 2023
"""
# Import required packages
import os
import pyproj
import pathlib
import warnings
import scipy.interpolate
import numpy as np
import xarray as xr
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import matplotlib.colors as colors
from scipy import stats
from warnings import warn
from functools import partial
from shapely.geometry import box, shape
from owslib.wfs import WebFeatureService
from datacube.utils.geometry import CRS
from dea_tools.datahandling import parallel_apply
# Fix converters for tidal plot
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()
WFS_ADDRESS = "https://geoserver.dea.ga.gov.au/geoserver/wfs"
def transect_distances(transects_gdf, lines_gdf, mode="distance"):
"""
Take a set of transects (e.g. shore-normal beach survey lines), and
determine the distance along the transect to each object in a set of
lines (e.g. shorelines). Distances are measured in the CRS of the
input datasets.
For coastal applications, transects should be drawn from land to
water (with the first point being on land so that it can be used
as a consistent location from which to measure distances.
The distance calculation can be performed using two modes:
- 'distance': Distances are measured from the start of the
transect to where it intersects with each line. Any transect
that intersects a line more than once is ignored. This mode is
useful for measuring e.g. the distance to the shoreline over
time from a consistent starting location.
- 'width' Distances are measured between the first and last
intersection between a transect and each line. Any transect
that intersects a line only once is ignored. This is useful
for e.g. measuring the width of a narrow area of coastline over
time, e.g. the neck of a spit or tombolo.
Parameters
----------
transects_gdf : geopandas.GeoDataFrame
A GeoDataFrame containing one or multiple vector profile lines.
The GeoDataFrame's index column will be used to name the rows in
the output distance table.
lines_gdf : geopandas.GeoDataFrame
A GeoDataFrame containing one or multiple vector line features
that intersect the profile lines supplied to `transects_gdf`.
The GeoDataFrame's index column will be used to name the columns
in the output distance table.
mode : string, optional
Whether to use 'distance' (for measuring distances from the
start of a profile) or 'width' mode (for measuring the width
between two profile intersections). See docstring above for more
info; defaults to 'distance'.
Returns
-------
distance_df : pandas.DataFrame
A DataFrame containing distance measurements for each profile
line (rows) and line feature (columns).
"""
import warnings
from shapely.errors import ShapelyDeprecationWarning
from shapely.geometry import Point
def _intersect_dist(transect_gdf, lines_gdf, mode=mode):
"""
Take an individual transect, and determine the distance along
the transect to each object in a set of lines (e.g. shorelines).
"""
# Identify intersections between transects and lines
intersect_points = lines_gdf.apply(
lambda x: x.geometry.intersection(transect_gdf.geometry), axis=1
)
# In distance mode, identify transects with one intersection only,
# and use this as the end point and the start of the transect as the
# start point when measuring distances
if mode == "distance":
start_point = Point(transect_gdf.geometry.coords[0])
point_df = intersect_points.apply(
lambda x: pd.Series({"start": start_point, "end": x})
if x.type == "Point"
else pd.Series({"start": None, "end": None})
)
# In width mode, identify transects with multiple intersections, and
# use the first intersection as the start point and the second
# intersection for the end point when measuring distances
if mode == "width":
point_df = intersect_points.apply(
lambda x: pd.Series({"start": x.geoms[0], "end": x.geoms[-1]})
if x.type == "MultiPoint"
else pd.Series({"start": None, "end": None})
)
# Calculate distances between valid start and end points
distance_df = point_df.apply(
lambda x: x.start.distance(x.end) if x.start else None, axis=1
)
return distance_df
# Run code after ignoring Shapely pre-v2.0 warnings
with warnings.catch_warnings():
warnings.filterwarnings("ignore", category=ShapelyDeprecationWarning)
# Assert that both datasets use the same CRS
assert transects_gdf.crs == lines_gdf.crs, (
"Please ensure both " "input datasets use the same CRS."
)
# Run distance calculations
distance_df = transects_gdf.apply(
lambda x: _intersect_dist(x, lines_gdf), axis=1
)
return pd.DataFrame(distance_df)
def get_coastlines(
bbox: tuple, crs="EPSG:4326", layer="shorelines_annual", drop_wms=True
) -> gpd.GeoDataFrame:
"""
Load DEA Coastlines annual shorelines or rates of change points data
for a provided bounding box using WFS.
For a full description of the DEA Coastlines dataset, refer to the
official Geoscience Australia product description:
/data/product/dea-coastlines
Parameters
----------
bbox : (xmin, ymin, xmax, ymax), or geopandas object
Bounding box expressed as a tutple. Alternatively, a bounding
box can be automatically extracted by suppling a
geopandas.GeoDataFrame or geopandas.GeoSeries.
crs : str, optional
Optional CRS for the bounding box. This is ignored if `bbox`
is provided as a geopandas object.
layer : str, optional
Which DEA Coastlines layer to load. Options include the annual
shoreline vectors ("shorelines_annual") and the rates of change
points ("rates_of_change"). Defaults to "shorelines_annual".
drop_wms : bool, optional
Whether to drop WMS-specific attribute columns from the data.
These columns are used for visualising the dataset on DEA Maps,
and are unlikely to be useful for scientific analysis. Defaults
to True.
Returns
-------
gpd.GeoDataFrame
A GeoDataFrame containing shoreline or point features and
associated metadata.
"""
# If bbox is a geopandas object, convert to bbox
try:
crs = str(bbox.crs)
bbox = bbox.total_bounds
except:
pass
# Query WFS
wfs = WebFeatureService(url=WFS_ADDRESS, version="1.1.0")
layer_name = f"dea:{layer}"
response = wfs.getfeature(
typename=layer_name,
bbox=tuple(bbox) + (crs,),
outputFormat="json",
)
# Load data as a geopandas.GeoDataFrame
coastlines_gdf = gpd.read_file(response)
# Clip to extent of bounding box
extent = gpd.GeoSeries(box(*bbox), crs=crs).to_crs(coastlines_gdf.crs)
coastlines_gdf = coastlines_gdf.clip(extent)
# Optionally drop WMS-specific columns
if drop_wms:
coastlines_gdf = coastlines_gdf.loc[
:, ~coastlines_gdf.columns.str.contains("wms_")
]
return coastlines_gdf
def _model_tides(
model,
x,
y,
time,
directory,
crs,
method,
extrapolate,
cutoff,
output_units,
mode,
):
"""
Worker function applied in parallel by `model_tides`. Handles the
extraction of tide modelling constituents and tide modelling using
`pyTMD`.
"""
import pyTMD.constants
import pyTMD.eop
import pyTMD.io
import pyTMD.time
import pyTMD.io.model
import pyTMD.predict
import pyTMD.spatial
import pyTMD.utilities
# Get parameters for tide model; use custom definition file for
# FES2012 (leave this as an undocumented feature for now)
if model == "FES2012":
pytmd_model = pyTMD.io.model(directory).from_file(
directory / "model_FES2012.def"
)
elif model == "TPXO8-atlas-v1":
pytmd_model = pyTMD.io.model(directory).from_file(directory / "model_TPXO8.def")
else:
pytmd_model = pyTMD.io.model(
directory, format="netcdf", compressed=False
).elevation(model)
# Convert x, y to latitude/longitude
transformer = pyproj.Transformer.from_crs(crs, "EPSG:4326", always_xy=True)
lon, lat = transformer.transform(x.flatten(), y.flatten())
# Convert datetime
timescale = pyTMD.time.timescale().from_datetime(time.flatten())
# Read tidal constants and interpolate to grid points
if pytmd_model.format in ("OTIS", "ATLAS", "ESR"):
amp, ph, D, c = pyTMD.io.OTIS.extract_constants(
lon,
lat,
pytmd_model.grid_file,
pytmd_model.model_file,
pytmd_model.projection,
type=pytmd_model.type,
method=method,
extrapolate=extrapolate,
cutoff=cutoff,
grid=pytmd_model.format,
)
# Use delta time at 2000.0 to match TMD outputs
deltat = np.zeros((len(timescale)), dtype=np.float64)
elif pytmd_model.format == "netcdf":
amp, ph, D, c = pyTMD.io.ATLAS.extract_constants(
lon,
lat,
pytmd_model.grid_file,
pytmd_model.model_file,
type=pytmd_model.type,
method=method,
extrapolate=extrapolate,
cutoff=cutoff,
scale=pytmd_model.scale,
compressed=pytmd_model.compressed,
)
# Use delta time at 2000.0 to match TMD outputs
deltat = np.zeros((len(timescale)), dtype=np.float64)
elif pytmd_model.format == "GOT":
amp, ph, c = pyTMD.io.GOT.extract_constants(
lon,
lat,
pytmd_model.model_file,
method=method,
extrapolate=extrapolate,
cutoff=cutoff,
scale=pytmd_model.scale,
compressed=pytmd_model.compressed,
)
# Delta time (TT - UT1)
deltat = timescale.tt_ut1
elif pytmd_model.format == "FES":
amp, ph = pyTMD.io.FES.extract_constants(
lon,
lat,
pytmd_model.model_file,
type=pytmd_model.type,
version=pytmd_model.version,
method=method,
extrapolate=extrapolate,
cutoff=cutoff,
scale=pytmd_model.scale,
compressed=pytmd_model.compressed,
)
# Available model constituents
c = pytmd_model.constituents
# Delta time (TT - UT1)
deltat = timescale.tt_ut1
# Calculate complex phase in radians for Euler's
cph = -1j * ph * np.pi / 180.0
# Calculate constituent oscillation
hc = amp * np.exp(cph)
# Determine the number of points and times to process. If in
# "one-to-many" mode, these counts are used to repeat our extracted
# constituents and timesteps so we can extract tides for all
# combinations of our input times and tide modelling points.
# If in "one-to-one" mode, we avoid this step by setting counts to 1
# (e.g. "repeat 1 times")
points_repeat = len(x) if mode == "one-to-many" else 1
time_repeat = len(time) if mode == "one-to-many" else 1
# If in "one-to-many" mode, repeat constituents to length of time
# and number of input coords before passing to `predict_tide_drift`
t, hc, deltat = (
np.tile(timescale.tide, points_repeat),
hc.repeat(time_repeat, axis=0),
np.tile(deltat, points_repeat),
)
# Predict tidal elevations at time and infer minor corrections
npts = len(t)
tide = np.ma.zeros((npts), fill_value=np.nan)
tide.mask = np.any(hc.mask, axis=1)
# Predict tides
tide.data[:] = pyTMD.predict.drift(
t, hc, c, deltat=deltat, corrections=pytmd_model.format
)
minor = pyTMD.predict.infer_minor(
t, hc, c, deltat=deltat, corrections=pytmd_model.format
)
tide.data[:] += minor.data[:]
# Replace invalid values with fill value
tide.data[tide.mask] = tide.fill_value
# Convert data to pandas.DataFrame, and set index to our input
# time/x/y values
tide_df = pd.DataFrame(
{
"time": np.tile(time, points_repeat),
"x": np.repeat(x, time_repeat),
"y": np.repeat(y, time_repeat),
"tide_model": model,
"tide_m": tide,
}
).set_index(["time", "x", "y"])
# Optionally convert outputs to integer units (can save memory)
if output_units == "m":
tide_df["tide_m"] = tide_df.tide_m.astype(np.float32)
elif output_units == "cm":
tide_df["tide_m"] = (tide_df.tide_m * 100).astype(np.int16)
elif output_units == "mm":
tide_df["tide_m"] = (tide_df.tide_m * 1000).astype(np.int16)
return tide_df
def model_tides(
x,
y,
time,
model="FES2014",
directory=None,
crs="EPSG:4326",
method="spline",
extrapolate=True,
cutoff=None,
mode="one-to-many",
parallel=True,
parallel_splits=5,
output_units="m",
output_format="long",
epsg=None,
):
"""
Compute tides at multiple points and times using tidal harmonics.
This function supports any tidal model supported by
`pyTMD`, including the FES2014 Finite Element Solution
tide model, and the TPXO8-atlas and TPXO9-atlas-v5
TOPEX/POSEIDON global tide models.
This function requires access to tide model data files.
These should be placed in a folder with subfolders matching
the formats specified by `pyTMD`:
https://pytmd.readthedocs.io/en/latest/getting_started/Getting-Started.html#directories
For FES2014 (https://www.aviso.altimetry.fr/es/data/products/auxiliary-products/global-tide-fes/description-fes2014.html):
- {directory}/fes2014/ocean_tide/
- {directory}/fes2014/load_tide/
For TPXO8-atlas (https://www.tpxo.net/tpxo-products-and-registration):
- {directory}/tpxo8_atlas/
For TPXO9-atlas-v5 (https://www.tpxo.net/tpxo-products-and-registration):
- {directory}/TPXO9_atlas_v5/
For EOT20 (https://www.seanoe.org/data/00683/79489/):
- {directory}/EOT20/ocean_tides/
- {directory}/EOT20/load_tides/
For GOT4.10c (https://earth.gsfc.nasa.gov/geo/data/ocean-tide-models):
- {directory}/GOT4.10c/grids_oceantide_netcdf/
For HAMTIDE (https://www.cen.uni-hamburg.de/en/icdc/data/ocean/hamtide.html):
- {directory}/hamtide/
This function is a modification of the `pyTMD` package's
`compute_tide_corrections` function. For more info:
https://pytmd.readthedocs.io/en/stable/user_guide/compute_tide_corrections.html
Parameters:
-----------
x, y : float or list of floats
One or more x and y coordinates used to define
the location at which to model tides. By default these
coordinates should be lat/lon; use "crs" if they
are in a custom coordinate reference system.
time : A datetime array or pandas.DatetimeIndex
An array containing `datetime64[ns]` values or a
`pandas.DatetimeIndex` providing the times at which to
model tides in UTC time.
model : string, optional
The tide model used to model tides. Options include:
- "FES2014" (only pre-configured option on DEA Sandbox)
- "TPXO9-atlas-v5"
- "TPXO8-atlas"
- "EOT20"
- "HAMTIDE11"
- "GOT4.10"
directory : string, optional
The directory containing tide model data files. If no path is
provided, this will default to the environment variable
"DEA_TOOLS_TIDE_MODELS" if set, otherwise "/var/share/tide_models".
Tide modelling files should be stored in sub-folders for each
model that match the structure provided by `pyTMD`:
https://pytmd.readthedocs.io/en/latest/getting_started/Getting-Started.html#directories
For example:
- {directory}/fes2014/ocean_tide/
{directory}/fes2014/load_tide/
- {directory}/tpxo8_atlas/
- {directory}/TPXO9_atlas_v5/
crs : str, optional
Input coordinate reference system for x and y coordinates.
Defaults to "EPSG:4326" (WGS84; degrees latitude, longitude).
method : string, optional
Method used to interpolate tidal contsituents
from model files. Options include:
- "spline": scipy bivariate spline interpolation (default)
- "bilinear": quick bilinear interpolation
- "linear", "nearest": scipy regular grid interpolations
extrapolate : bool, optional
Whether to extrapolate tides for x and y coordinates outside of
the valid tide modelling domain using nearest-neighbor.
cutoff : int or float, optional
Extrapolation cutoff in kilometers. The default is None, which
will extrapolate for all points regardless of distance from the
valid tide modelling domain.
mode : string, optional
The analysis mode to use for tide modelling. Supports two options:
- "one-to-many": Models tides for every timestep in "time" at
every input x and y coordinate point. This is useful if you
want to model tides for a specific list of timesteps across
multiple spatial points (e.g. for the same set of satellite
acquisition times at various locations across your study area).
- "one-to-one": Model tides using a different timestep for each
x and y coordinate point. In this mode, the number of x and
y points must equal the number of timesteps provided in "time".
parallel : boolean, optional
Whether to parallelise tide modelling using `concurrent.futures`.
If multiple tide models are requested, these will be run in
parallel. Optionally, tide modelling can also be run in parallel
across input x and y coordinates (see "parallel_splits" below).
Default is True.
parallel_splits : int, optional
Whether to split the input x and y coordinates into smaller,
evenly-sized chunks that are processed in parallel. This can
provide a large performance boost when processing large numbers
of coordinates. The default is 5 chunks, which will split
coordinates into 5 parallelised chunks.
output_units : str, optional
Whether to return modelled tides in floating point metre units,
or integer centimetre units (i.e. scaled by 100) or integer
millimetre units (i.e. scaled by 1000. Returning outputs in
integer units can be useful for reducing memory usage.
Defaults to "m" for metres; set to "cm" for centimetres or "mm"
for millimetres.
output_format : str, optional
Whether to return the output dataframe in long format (with
results stacked vertically along "tide_model" and "tide_m"
columns), or wide format (with a column for each tide model).
Defaults to "long".
epsg : int, DEPRECATED
Deprecated; use "crs" instead.
Returns
-------
A pandas.DataFrame containing tide heights for every
combination of time and point coordinates.
"""
# Deprecate `epsg` param
if epsg is not None:
warn(
"The `epsg` parameter is deprecated; please use `crs` to "
"provide CRS information in the form 'EPSG:XXXX'",
DeprecationWarning,
stacklevel=2,
)
# Set tide modelling files directory. If no custom path is provided,
# first try global environmental var, then "/var/share/tide_models"
if directory is None:
if "DEA_TOOLS_TIDE_MODELS" in os.environ:
directory = os.environ["DEA_TOOLS_TIDE_MODELS"]
else:
directory = "/var/share/tide_models"
# Verify path exists
directory = pathlib.Path(directory).expanduser()
if not directory.exists():
raise FileNotFoundError("Invalid tide directory")
# If time passed as a single Timestamp, convert to datetime64
if isinstance(time, pd.Timestamp):
time = time.to_datetime64()
# Turn inputs into arrays for consistent handling
model = np.atleast_1d(model)
x = np.atleast_1d(x)
y = np.atleast_1d(y)
time = np.atleast_1d(time)
# Validate input arguments
assert method in ("bilinear", "spline", "linear", "nearest")
assert output_units in (
"m",
"cm",
"mm",
), "Output units must be either 'm', 'cm', or 'mm'."
assert output_format in (
"long",
"wide",
), "Output format must be either 'long' or 'wide'."
assert len(x) == len(y), "x and y must be the same length."
if mode == "one-to-one":
assert len(x) == len(time), (
"The number of supplied x and y points and times must be "
"identical in 'one-to-one' mode. Use 'one-to-many' mode if "
"you intended to model multiple timesteps at each point."
)
# Verify that all provided models are in list of supported models
valid_models = [
"FES2014",
"TPXO9-atlas-v5",
"TPXO8-atlas",
"EOT20",
"HAMTIDE11",
"GOT4.10",
"FES2012", # Requires custom tide model definition file
"TPXO8-atlas-v1", # Requires custom tide model definition file
]
if not all(m in valid_models for m in model):
raise ValueError(
f"One or more of the models requested {model} is not valid. "
f"The following models are currently supported: {valid_models}"
)
# Update tide modelling func to add default keyword arguments that
# are used for every iteration during parallel processing
iter_func = partial(
_model_tides,
directory=directory,
crs=crs,
method=method,
extrapolate=extrapolate,
cutoff=np.inf if cutoff is None else cutoff,
output_units=output_units,
mode=mode,
)
# Ensure requested parallel splits is not smaller than number of points
parallel_splits = min(parallel_splits, len(x))
# Parallelise if either multiple models or multiple splits requested
if parallel & ((len(model) > 1) | (parallel_splits > 1)):
from concurrent.futures import ProcessPoolExecutor
from tqdm import tqdm
with ProcessPoolExecutor() as executor:
print(f"Modelling tides using {', '.join(model)} in parallel")
# Optionally split lon/lat points into `splits_n` chunks
# that will be applied in parallel
x_split = np.array_split(x, parallel_splits)
y_split = np.array_split(y, parallel_splits)
# Get every combination of models and lat/lon points, and
# extract as iterables that can be passed to `executor.map()`
# In "one-to-many" mode, pass entire set of timesteps to each
# parallel iteration by repeating timesteps by number of total
# parallel iterations. In "one-to-one" mode, split up
# timesteps into smaller parallel chunks too.
if mode == "one-to-many":
model_iters, x_iters, y_iters = zip(
*[
(m, x_split[i], y_split[i])
for m in model
for i in range(parallel_splits)
]
)
time_iters = [time] * len(model_iters)
elif mode == "one-to-one":
time_split = np.array_split(time, parallel_splits)
model_iters, x_iters, y_iters, time_iters = zip(
*[
(m, x_split[i], y_split[i], time_split[i])
for m in model
for i in range(parallel_splits)
]
)
# Apply func in parallel, iterating through each input param
model_outputs = list(
tqdm(
executor.map(iter_func, model_iters, x_iters, y_iters, time_iters),
total=len(model_iters),
)
)
# Model tides in series if parallelisation is off
else:
model_outputs = []
for model_i in model:
print(f"Modelling tides using {model_i}")
tide_df = iter_func(model_i, x, y, time)
model_outputs.append(tide_df)
# Combine outputs into a single dataframe
tide_df = pd.concat(model_outputs, axis=0)
# Optionally convert to a wide format dataframe with a tide model in
# each dataframe column
if output_format == "wide":
# Pivot into wide format with each time model as a column
print("Converting to a wide format dataframe")
tide_df = tide_df.pivot(columns="tide_model", values="tide_m")
# If in 'one-to-one' mode, reindex using our input time/x/y
# values to ensure the output is sorted the same as our inputs
if mode == "one-to-one":
output_indices = pd.MultiIndex.from_arrays(
[time, x, y], names=["time", "x", "y"]
)
tide_df = tide_df.reindex(output_indices)
return tide_df
def _pixel_tides_resample(
tides_lowres,
ds,
resample_method="bilinear",
dask_chunks="auto",
dask_compute=True,
):
"""
Resamples low resolution tides modelled by `pixel_tides` into the
geobox (e.g. spatial resolution and extent) of the original higher
resolution satellite dataset.
Parameters:
-----------
tides_lowres : xarray.DataArray
The low resolution tide modelling data array to be resampled.
ds : xarray.Dataset
The dataset whose geobox will be used as the template for the
resampling operation. This is typically the same satellite
dataset originally passed to `pixel_tides`.
resample_method : string, optional
The resampling method to use. Defaults to "bilinear"; valid
options include "nearest", "cubic", "min", "max", "average" etc.
dask_chunks : str or tuple, optional
Can be used to configure custom Dask chunking for the final
resampling step. The default of "auto" will automatically set
x/y chunks to match those in `ds` if they exist, otherwise will
set x/y chunks that cover the entire extent of the dataset.
For custom chunks, provide a tuple in the form `(y, x)`, e.g.
`(2048, 2048)`.
dask_compute : bool, optional
Whether to compute results of the resampling step using Dask.
If False, this will return `tides_highres` as a Dask array.
Returns:
--------
tides_highres, tides_lowres : tuple of xr.DataArrays
In addition to `tides_lowres` (see above), a high resolution
array of tide heights will be generated matching the
exact spatial resolution and extent of `ds`.
"""
# Determine spatial dimensions
y_dim, x_dim = ds.odc.spatial_dims
# Convert array to Dask, using no chunking along y and x dims,
# and a single chunk for each timestep/quantile and tide model
tides_lowres_dask = tides_lowres.chunk(
{d: None if d in [y_dim, x_dim] else 1 for d in tides_lowres.dims}
)
# Automatically set Dask chunks for reprojection if set to "auto".
# This will either use x/y chunks if they exist in `ds`, else
# will cover the entire x and y dims) so we don't end up with
# hundreds of tiny x and y chunks due to the small size of
# `tides_lowres` (possible odc.geo bug?)
if dask_chunks == "auto":
if ds.chunks is not None:
if (y_dim in ds.chunks) & (x_dim in ds.chunks):
dask_chunks = (ds.chunks[y_dim], ds.chunks[x_dim])
else:
dask_chunks = ds.odc.geobox.shape
else:
dask_chunks = ds.odc.geobox.shape
# Reproject into the GeoBox of `ds` using odc.geo and Dask
tides_highres = tides_lowres_dask.odc.reproject(
how=ds.odc.geobox,
chunks=dask_chunks,
resampling=resample_method,
).rename("tide_m")
# Optionally process and load into memory with Dask
if dask_compute:
tides_highres.load()
return tides_highres, tides_lowres
def pixel_tides(
ds,
times=None,
resample=True,
calculate_quantiles=None,
resolution=None,
buffer=None,
resample_method="bilinear",
model="FES2014",
dask_chunks="auto",
dask_compute=True,
**model_tides_kwargs,
):
"""
Obtain tide heights for each pixel in a dataset by modelling
tides into a low-resolution grid surrounding the dataset,
then (optionally) spatially resample this low-res data back
into the original higher resolution dataset extent and resolution.
Parameters:
-----------
ds : xarray.Dataset
A dataset whose geobox (`ds.odc.geobox`) will be used to define
the spatial extent of the low resolution tide modelling grid.
times : pandas.DatetimeIndex or list of pandas.Timestamps, optional
By default, the function will model tides using the times
contained in the `time` dimension of `ds`. Alternatively, this
param can be used to model tides for a custom set of times
instead. For example:
`times=pd.date_range(start="2000", end="2001", freq="5h")`
resample : bool, optional
Whether to resample low resolution tides back into `ds`'s original
higher resolution grid. Set this to `False` if you do not want
low resolution tides to be re-projected back to higher resolution.
calculate_quantiles : list or np.array, optional
Rather than returning all individual tides, low-resolution tides
can be first aggregated using a quantile calculation by passing in
a list or array of quantiles to compute. For example, this could
be used to calculate the min/max tide across all times:
`calculate_quantiles=[0.0, 1.0]`.
resolution: int, optional
The desired resolution of the low-resolution grid used for tide
modelling. The default None will create a 5000 m resolution grid
if `ds` has a projected CRS (i.e. metre units), or a 0.05 degree
resolution grid if `ds` has a geographic CRS (e.g. degree units).
Note: higher resolutions do not necessarily provide better
tide modelling performance, as results will be limited by the
resolution of the underlying global tide model (e.g. 1/16th
degree / ~5 km resolution grid for FES2014).
buffer : int, optional
The amount by which to buffer the higher resolution grid extent
when creating the new low resolution grid. This buffering is
important as it ensures that ensure pixel-based tides are seamless
across dataset boundaries. This buffer will eventually be clipped
away when the low-resolution data is re-projected back to the
resolution and extent of the higher resolution dataset. To
ensure that at least two pixels occur outside of the dataset
bounds, the default None applies a 12000 m buffer if `ds` has a
projected CRS (i.e. metre units), or a 0.12 degree buffer if
`ds` has a geographic CRS (e.g. degree units).
resample_method : string, optional
If resampling is requested (see `resample` above), use this
resampling method when converting from low resolution to high
resolution pixels. Defaults to "bilinear"; valid options include
"nearest", "cubic", "min", "max", "average" etc.
model : string or list of strings
The tide model or a list of models used to model tides, as
supported by the `pyTMD` Python package. Options include:
- "FES2014" (default; pre-configured on DEA Sandbox)
- "TPXO8-atlas"
- "TPXO9-atlas-v5"
- "EOT20"
- "HAMTIDE11"
- "GOT4.10"
dask_chunks : str or tuple, optional
Can be used to configure custom Dask chunking for the final
resampling step. The default of "auto" will automatically set
x/y chunks to match those in `ds` if they exist, otherwise will
set x/y chunks that cover the entire extent of the dataset.
For custom chunks, provide a tuple in the form `(y, x)`, e.g.
`(2048, 2048)`.
dask_compute : bool, optional
Whether to compute results of the resampling step using Dask.
If False, this will return `tides_highres` as a Dask array.
**model_tides_kwargs :
Optional parameters passed to the `dea_tools.coastal.model_tides`
function. Important parameters include "directory" (used to
specify the location of input tide modelling files) and "cutoff"
(used to extrapolate modelled tides away from the coast; if not
specified here, cutoff defaults to `np.inf`).
Returns:
--------
If `resample` is False:
tides_lowres : xr.DataArray
A low resolution data array giving either tide heights every
timestep in `ds` (if `times` is None), tide heights at every
time in `times` (if `times` is not None), or tide height quantiles
for every quantile provided by `calculate_quantiles`.
If `resample` is True:
tides_highres, tides_lowres : tuple of xr.DataArrays
In addition to `tides_lowres` (see above), a high resolution
array of tide heights will be generated that matches the
exact spatial resolution and extent of `ds`. This will contain
either tide heights every timestep in `ds` (if `times` is None),
tide heights at every time in `times` (if `times` is not None),
or tide height quantiles for every quantile provided by
`calculate_quantiles`.
"""
import odc.geo.xr
from odc.geo.geobox import GeoBox
# First test if no time dimension and nothing passed to `times`
if ("time" not in ds.dims) & (times is None):
raise ValueError(
"`ds` does not contain a 'time' dimension. Times are required "
"for modelling tides: please pass in a set of custom tides "
"using the `times` parameter. For example: "
"`times=pd.date_range(start='2000', end='2001', freq='5h')`"
)
# If custom times are provided, convert them to a consistent
# pandas.DatatimeIndex format
if times is not None:
if isinstance(times, list):
time_coords = pd.DatetimeIndex(times)
elif isinstance(times, pd.Timestamp):
time_coords = pd.DatetimeIndex([times])
else:
time_coords = times
# Otherwise, use times from `ds` directly
else:
time_coords = ds.coords["time"]
# Set defaults passed to `model_tides`
model_tides_kwargs.setdefault("cutoff", np.inf)
# Standardise model into a list for easy handling
model = [model] if isinstance(model, str) else model
# Test if no time dimension and nothing passed to `times`
if ("time" not in ds.dims) & (times is None):
raise ValueError(
"`ds` does not contain a 'time' dimension. Times are required "
"for modelling tides: please pass in a set of custom tides "
"using the `times` parameter. For example: "
"`times=pd.date_range(start='2000', end='2001', freq='5h')`"
)
# If custom times are provided, convert them to a consistent
# pandas.DatatimeIndex format
if times is not None:
if isinstance(times, list):
time_coords = pd.DatetimeIndex(times)
elif isinstance(times, pd.Timestamp):
time_coords = pd.DatetimeIndex([times])
else:
time_coords = times
# Otherwise, use times from `ds` directly
else:
time_coords = ds.coords["time"]
# Determine spatial dimensions
y_dim, x_dim = ds.odc.spatial_dims
# Determine resolution and buffer, using different defaults for
# geographic (i.e. degrees) and projected (i.e. metres) CRSs:
crs_units = ds.odc.geobox.crs.units[0][0:6]
if ds.odc.geobox.crs.geographic:
if resolution is None:
resolution = 0.05
elif resolution > 360:
raise ValueError(
f"A resolution of greater than 360 was "
f"provided, but `ds` has a geographic CRS "
f"in {crs_units} units. Did you accidently "
f"provide a resolution in projected "
f"(i.e. metre) units?"
)
if buffer is None:
buffer = 0.12
else:
if resolution is None:
resolution = 5000
elif resolution < 1:
raise ValueError(
f"A resolution of less than 1 was provided, "
f"but `ds` has a projected CRS in "
f"{crs_units} units. Did you accidently "
f"provide a resolution in geographic "
f"(degree) units?"
)
if buffer is None:
buffer = 12000
# Raise error if resolution is less than dataset resolution
dataset_res = ds.odc.geobox.resolution.x
if resolution < dataset_res:
raise ValueError(
f"The resolution of the low-resolution tide "
f"modelling grid ({resolution:.2f}) is less "
f"than `ds`'s pixel resolution ({dataset_res:.2f}). "
f"This can cause extremely slow tide modelling "