In [1]:
import os
import sys
import datacube
import numpy as np
import pandas as pd
import xarray as xr
from otps import TimePoint
from otps import predict_tide
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
from datacube.utils import geometry
from datacube.utils.geometry import CRS
from datacube_stats.statistics import GeoMedian
from dateutil.relativedelta import relativedelta
from datacube.helpers import ga_pq_fuser
from datacube.storage import masking
from collections import defaultdict
import itertools

# Import external functions from dea-notebooks using relative link to Scripts
sys.path.append('/g/data/r78/rt1527/dea-notebooks/Scripts')
import DEAPlotting

# Create datacube instance
dc = datacube.Datacube(app='Tidal tagging')

def date_range(start_date, end_date, increment, period):
    
    """Generate dates seperated by given time increment/period"""
    
    result = []
    nxt = start_date
    delta = relativedelta(**{period:increment})
    while nxt <= end_date:
        result.append(nxt)
        nxt += delta
    return result

Failed to resolve driver datacube.plugins.index::s3aio_index


## Setup

In [2]:
# Set up analysis data query
lat, lon = -13.4225701889, 130.28851991

# Set a tide post: this is the location the OTPS model uses to compute tides for the supplied datetimes
tidepost_lat, tidepost_lon = -13.3079256385, 130.187740555

# Set tidal lims
lower_tideheight = 0.40
upper_tideheight = 0.60

# Set times
time_period = ('1988-01-01', '2018-01-01')
epoch_years = 5

# Set sensors
sensors = ['ls5', 'ls7', 'ls8']

## Analysis
Three sensors
X time periods

Filter by tide height
Filter by date

In [3]:
# For each epoch between start and end of timeperiod, create list of datetimes
start = datetime.strptime(time_period[0], "%Y-%m-%d")
end = datetime.strptime(time_period[1], "%Y-%m-%d")
epochs = date_range(start, end, epoch_years, 'years') 

# Set up query
x, y = geometry.point(lon, lat, CRS('WGS84')).to_crs(CRS('EPSG:3577')).points[0]
query = {'x': (x - 5000, x + 5000),
         'y': (y - 5000, y + 5000),
         'time': time_period,
         'crs': 'EPSG:3577'}

# Output dicts to hold entire time-series for each sensor
sensor_dict = {}
pq_dict = {}

# For each sensor, dask load data and compute tide heights for each sensor
for sensor in sensors:
    
    # Return observations that match our query without actually loading them using dask
    sensor_all = dc.load(product = '{}_nbart_albers'.format(sensor), 
                     group_by = 'solar_day', 
                     dask_chunks={'time': 1},
                     **query)

    # Load PQ data
    pq_all = dc.load(product = '{}_pq_albers'.format(sensor),
                    group_by = 'solar_day',
                    fuse_func=ga_pq_fuser, 
                    dask_chunks={'time': 1},
                    **query)

    # Return Landsat observations that have matching PQ data 
    time = (sensor_all.time - pq_all.time).time
    sensor_all = sensor_all.sel(time=time)
    pq_all = pq_all.sel(time=time)
    
    # Use the tidal mode to extract tide heights for each observation:
    obs_datetimes = sensor_all.time.data.astype('M8[s]').astype('O').tolist()
    obs_timepoints = [TimePoint(tidepost_lon, tidepost_lat, dt) for dt in obs_datetimes]
    obs_predictedtides = predict_tide(obs_timepoints)
    obs_tideheights = [predictedtide.tide_m for predictedtide in obs_predictedtides]

    # Assign these tide heights back into the dataset:
    sensor_all['tide_heights'] = xr.DataArray(obs_tideheights, [('time', sensor_all.time)])  
    
    # Append to output datasets
    sensor_dict[sensor] = sensor_all
    pq_dict[sensor] = pq_all   


In [8]:
# Calculate max and min tide heights
obs_min = np.min([sensor_ds.tide_heights.min() for sensor_ds in sensor_dict.values()])
obs_max = np.max([sensor_ds.tide_heights.max() for sensor_ds in sensor_dict.values()])
obs_range = obs_max - obs_min

# Calculate tidal limits used for data selection
sel_min = obs_min + (obs_range * lower_tideheight)
sel_max = obs_min + (obs_range * upper_tideheight)
print('Analysing tidal heights of {0:.2f} m to {1:.2f} m out of an observed local tidal ' 
      'range of {2:.2f} m to {3:.2f} m'.format(sel_min, sel_max, obs_min, obs_max))

Analysing tidal heights of -0.58 m to 0.40 m out of an observed local tidal range of -2.55 m to 2.38 m


In [None]:
data_dict = defaultdict(list)
   
for epoch, sensor in itertools.product(epochs[:-1], sensor_dict.keys()):
                                           
    # Select dataset
    sensor_ds = sensor_dict[sensor]    
    pq_ds = pq_dict[sensor]   
                                           
    # Filter by tidal stage
    sensor_subset = sensor_ds.where((sensor_ds.tide_heights >= sel_min) & 
                                       (sensor_ds.tide_heights <= sel_max), drop = True)    
    pq_subset = pq_ds.where((sensor_ds.tide_heights >= sel_min) & 
                               (sensor_ds.tide_heights <= sel_max), drop = True)

    # Identify from and to date strings
    from_date = epoch.strftime('%Y-%m-%d')
    to_date = (epoch + relativedelta(years=epoch_years)).strftime('%Y-%m-%d')
    
    print('Filtering {} from {} to {}'.format(sensor, from_date, to_date))
    sensor_subset = sensor_subset.sel(time=slice(from_date, to_date)) 
    pq_subset = pq_subset.sel(time=slice(from_date, to_date)) 

    # Add to dict
    data_dict[from_date].append(sel_ds)  

In [None]:
epoch

In [None]:
for from_date in ['2013-01-01']:
       
    combined_ds = xr.concat([i.compute() for i in data_dict[from_date]], dim='time')
    combined_ds = combined_ds.sortby('time')

In [None]:
data_dict[from_date]

In [None]:
x, y = geometry.point(lon, lat, CRS('WGS84')).to_crs(CRS('EPSG:3577')).points[0]
query = {'x': (x - 5000, x + 5000),
         'y': (y - 5000, y + 5000),         
         'crs': 'EPSG:3577',
         'time': time_period}

# Return observations that match our query without actually loading them using dask
obs_ds = dc.load(product = 'ls5_nbart_albers', 
                 group_by = 'solar_day', 
                 dask_chunks={'time': 1}, 
                 **query)

# Load PQ data
pq = dc.load(product = 'ls5_pq_albers',
             group_by = 'solar_day',
             fuse_func=ga_pq_fuser, 
             dask_chunks={'time': 1}, 
             **query)

# Return Landsat observations that have matching PQ data 
time = (obs_ds.time - pq.time).time
obs_ds = obs_ds.sel(time=time)
pq = pq.sel(time=time)
print(pq, obs_ds)

# Use the tidal mode to extract tide heights for each observation:
obs_datetimes = obs_ds.time.data.astype('M8[s]').astype('O').tolist()
obs_timepoints = [TimePoint(tidepost_lon, tidepost_lat, dt) for dt in obs_datetimes]
obs_predictedtides = predict_tide(obs_timepoints)
obs_tideheights = [predictedtide.tide_m for predictedtide in obs_predictedtides]

# Assign these tide heights back into the dataset:
obs_ds['tide_heights'] = xr.DataArray(obs_tideheights, [('time', obs_ds.time)])

In [None]:
# Extract observed tidal height ranges
obs_min, obs_max = obs_ds['tide_heights'].quantile(q=[0.0, 1.0]).values
obs_range = obs_max - obs_min

# Calculate tidal limits used for data selection
sel_min = obs_min + (obs_range * lower_tideheight)
sel_max = obs_min + (obs_range * upper_tideheight)
print('Analysing tidal heights of {0:.2f} m to {1:.2f} m out of an observed local tidal ' 
      'range of {2:.2f} m to {3:.2f} m'.format(sel_min, sel_max, obs_min, obs_max))

# Filter by tidal stage
tide_ds = obs_ds.where((obs_ds.tide_heights >= sel_min) & 
                       (obs_ds.tide_heights <= sel_max), drop = True)

tide_pq = pq.sel(time = tide_ds.time)
print(tide_ds, tide_pq)

# For each epoch between start and end of timeperiod, create list of datetimes
start = datetime.strptime(time_period[0], "%Y-%m-%d")
end = datetime.strptime(time_period[1], "%Y-%m-%d")
epochs = date_range(start, end, epoch, 'years') 

for from_date in epochs[:-1]:
    
    # Filter by time period
    to_date = from_date + relativedelta(years=epoch)
    print('Filtering from {} to {}'.format(from_date.strftime('%Y-%m-%d'), to_date.strftime('%Y-%m-%d')))
    sel_ds = tide_ds.sel(time=slice(from_date.strftime('%Y-%m-%d'), to_date.strftime('%Y-%m-%d'))) 
    sel_pq = tide_pq.sel(time=slice(from_date.strftime('%Y-%m-%d'), to_date.strftime('%Y-%m-%d'))) 
    
    # Run `compute()` to load only our filtered datasets:
    sel_ds = sel_ds.compute()
    sel_pq = sel_pq.compute()
    
    # Identify pixels with no clouds/shadows in either ACCA for Fmask
    good_quality = masking.make_mask(sel_pq.pixelquality,
                                     cloud_acca='no_cloud',
                                     cloud_shadow_acca='no_cloud_shadow',
                                     cloud_shadow_fmask='no_cloud_shadow',
                                     cloud_fmask='no_cloud',
                                     blue_saturated=False,
                                     green_saturated=False,
                                     red_saturated=False,
                                     nir_saturated=False,
                                     swir1_saturated=False,
                                     swir2_saturated=False,
                                     contiguous=True)
    
    sel_ds = sel_ds.where(good_quality)

    # Now we can take the median of each set of low and high tide observations to produce a composite:
#     geomedian_ds = sel_ds.median(dim = 'time', keep_attrs = True)
    geomedian_ds = GeoMedian().compute(sel_ds)
    fig, ax = DEAPlotting.three_band_image(ds=geomedian_ds, bands=['swir1', 'nir', 'green'], 
                                           title = 'Geomedian', reflect_stand=4000)
    fig.savefig('output_{}.png'.format(from_date.strftime('%Y-%m-%d')))

In [None]:
# pq.where(obs_ds.tide_heights >= sel_min, drop = True)
pq.sel(time=tide_ds.time)

In [None]:
x, y = geometry.point(lon, lat, CRS('WGS84')).to_crs(CRS('EPSG:3577')).points[0]
query = {'x': (x - 5000, x + 5000),
         'y': (y - 5000, y + 5000),         
         'crs': 'EPSG:3577',
         'time': ('1988-01-01', '1990-01-01')}

# Load PQ data
pq = dc.load(product = 'ls5_pq_albers',
             group_by = 'solar_day',
             fuse_func=ga_pq_fuser, 
             **query)

masking.make_mask(pq['pixelquality'],
                 cloud_acca='no_cloud',
                 cloud_shadow_acca='no_cloud_shadow',
                 cloud_shadow_fmask='no_cloud_shadow',
                 cloud_fmask='no_cloud',
                 blue_saturated=False,
                 green_saturated=False,
                 red_saturated=False,
                 nir_saturated=False,
                 swir1_saturated=False,
                 swir2_saturated=False,
                 contiguous=True)

In [None]:
pq 