In [None]:
import pandas as pd
from otps import TimePoint
from otps import predict_tide

tidepost_lat, tidepost_lon, date =-15.92129443, 145.36472283, "2010-11-13T00:40:58.612213Z"
# tidepost_lat, tidepost_lon, date =-18.53226273, 146.35253872, "2011-05-10T00:58:07.007102Z"  # 80
# tidepost_lat, tidepost_lon, date =-20.86943927, 148.91656844, "2011-05-27T00:36:48.717232Z"  # 100

# Use the tidal mode to compute tide heights for each observation:
# obs_datetimes = ds.time.data.astype('M8[s]').astype('O').tolist()
obs_datetimes = [pd.to_datetime(date)]
obs_timepoints = [TimePoint(tidepost_lon, tidepost_lat, dt) for dt in obs_datetimes]
obs_predictedtides = predict_tide(obs_timepoints)

# Extract tide height
obs_tideheights = [predictedtide.tide_m for predictedtide in obs_predictedtides]
obs_tideheights

In [None]:
pd.to_datetime("2010-11-13T00:40:58.612213Z")

In [1]:
from datacube.storage import masking
from datacube.helpers import ga_pq_fuser
import datacube
# import numpy as np
# from datetime import time, datetime
# import pandas as pd
from otps import TimePoint
from otps import predict_tide

import sys
sys.path.append('../10_Scripts')
import SpatialTools, DEADataHandling

def load_cloudmaskedlandsat(dc, query, platform='ls8', bands=['red', 'green', 'blue']):
    
    '''
    This function returns cloud-masked Landsat `*_nbar_scene` data by loading 
    both Landsat and Landsat pixel quality data and masking out any pixels 
    affected by cloud, cloud shadow, saturated pixels or any pixels missing data 
    in any band. For convenience, the resulting data is returned with sensible
    band names (e.g. 'red', 'green', 'blue') instead of the unnamed bands in the
    original data.
    
    Last modified: March 2019
    Author: Robbi Bishop-Taylor
    
    Parameters
    ----------  
    dc : datacube Datacube object
        A specific Datacube to import from, i.e. `dc = datacube.Datacube(app='Clear Landsat')`. This allows you to 
        also use development datacubes if they have been imported into the environment.    
    query : dict
        A dict containing the query bounds. Can include lat/lon, time etc. If no `time` query is given, the 
        function defaults to all timesteps available to all sensors (e.g. 1987-2018)
    platform : list, optional
        An optional Landsat platform name to load data from. Options are 'ls5', 'ls7', 'ls8'.
    bands : list, optional
        An optional list of strings containing the bands to be read in; options include 'red', 'green', 'blue', 
        'nir', 'swir1', 'swir2'; defaults to `['red', 'green', 'blue']`.
        
    Returns
    -------
    landsat_ds : xarray Dataset
        An xarray dataset containing pixel-quality masked Landsat observations        
        
    '''
    
    # Define dictionary for converting band names between numbered 
    # (e.g. '2', '3', '4') and named bands (e.g. 'red', 'green', 'blue')
    if (platform == 'ls5') or (platform == 'ls7'):
    
        band_nametonum = {'blue': '1', 'green': '2', 'red': '3', 
                          'nir': '4', 'swir1': '5', 'swir2': '7'}
        
    else:
        
        band_nametonum = {'coastal': '1', 'blue': '2', 'green': '3', 
                  'red': '4', 'nir': '5', 'swir1': '6', 'swir2': '7'}

    
    # Test if data is available for query
    n_obs = len(dc.find_datasets(product=f'{platform}_nbar_scene', **query))
    if n_obs > 0:

        print(f'Loading data for {n_obs} {platform} observations')
        landsat_ds = dc.load(product=f'{platform}_nbar_scene', 
                             measurements=[band_nametonum[i] for i in bands],
                             group_by='solar_day', 
                             **query)

        print(f'Loading pixel quality data for {n_obs} {platform} observations')
        landsat_pq = dc.load(product=f'{platform}_pq_scene', 
                             group_by='solar_day', 
                             fuse_func=ga_pq_fuser,
                             **query)

        print('Masking out poor quality pixels (e.g. cloud)')
        good_quality = masking.make_mask(landsat_pq.pqa,                        
                                     cloud_acca='no_cloud',
                                     cloud_shadow_acca='no_cloud_shadow',
                                     cloud_shadow_fmask='no_cloud_shadow',
                                     cloud_fmask='no_cloud',
                                     blue_saturated=False,
                                     green_saturated=False,
                                     red_saturated=False,
                                     nir_saturated=False,
                                     swir1_saturated=False,
                                     swir2_saturated=False,
                                     contiguous=True)

        # Apply pixel quality mask
        landsat_ds = landsat_ds.where(good_quality)

        # Rename bands to useful names and return data
        band_numtoname = {b: a for a, b in band_nametonum.items() if a in bands}
        landsat_ds = landsat_ds.rename(band_numtoname)

        return landsat_ds
    
    else:
        raise Exception(f'No data was returned for the query {query}. '
                        'Please change lat, lon and time extents to an area with data.')
        

def load_landsatscenes(dc, query, platforms=['ls5', 'ls7', 'ls8'], bands=['red', 'green', 'blue']):
    
    out = []

    for i in platforms:

        try:

            out.append(load_cloudmaskedlandsat(dc, query, platform=i, bands=bands))

        except:

            pass

    # Concatenate
    return xr.concat(out, dim='time').sortby('time')
        

%load_ext autoreload
%autoreload 2

In [None]:
import xarray as xr

# Load in validation data
wv2_xr = xr.open_rasterio('/g/data/r78/rt1527/dea-notebooks/Subpixel_paper/raw_data/052743823130_011_geo.tif', chunks={'x': 500, 'y': 500})
wv2_xr = wv2_xr.to_dataset(dim='band').rename({1: 'coastal', 2: 'blue', 3: 'green', 4: 'yellow', 5: 'red', 6: 'red_edge', 7: 'nir1', 8: 'nir2'})
wv2_xr = wv2_xr.where(wv2_xr > 0)
# wv2_xr[['red', 'green', 'blue']].coarsen({'x': 10, 'y': 10}, boundary='trim').mean().to_array().plot.imshow(robust=True)

In [None]:
wv2_xr_test = wv2_xr   #.coarsen({'x': 4, 'y': 4}, boundary='trim').mean().compute()

In [None]:
ndwi = ((wv2_xr_test.red - wv2_xr_test.nir1) / (wv2_xr_test.red + wv2_xr_test.nir1)).compute()

In [None]:
ndwi

In [None]:
# import affine
affine.Affine(*wv2_xr.transform)

In [None]:
thresh = 0.2

contours_gdf = SpatialTools.contour_extract(z_values=[thresh],
                                   ds_array=ndwi,
                                   ds_crs=wv2_xr.crs[6:],
                                   ds_affine=affine.Affine(*wv2_xr.transform),
                                   output_shp=f'waterline_wv2_7.shp',
                                   min_vertices=2,
                                   verbose=False)
contours_gdf

In [None]:
# wv2_xr[['red', 'green', 'blue']].isel(x=slice(100, 2000), y=slice(2000, 4000)).coarsen({'x': 2.5, 'y': 2.5}, boundary='trim').mean().to_array().plot.imshow(robust=True)

In [None]:
# Connect to the datacube
dc = datacube.Datacube(app='Coastal erosion')

# Create 'query' based on the longitudes, latitudes and time provided above
query = {'y': (-15.68847186, -16.04168290),
         'x': (145.34355739, 145.47173940),
         'time': ('2010-10-01', '2010-12-31'),
         'output_crs': 'EPSG:28355',
         'resolution': (-25, 25)}

# Load Landsat 8 data for the time and area in the query. This may take several minutes!
ds = load_cloudmaskedlandsat(dc=dc, 
                             query=query, 
                             platform='ls5', 
                             bands=['red', 'green', 'blue', 'nir', 'swir1'])
print(ds)

# ds['MNDWI'] = (ds.green - ds.swir1) / (ds.green + ds.swir1)
ds['MNDWI'] = (ds.green - ds.nir) / (ds.green + ds.nir)
        

### Compute tides

In [None]:
tidepost_lat, tidepost_lon, date =-15.92129443, 145.36472283, "2010-11-13T00:40:58.612213Z"

# Use the tidal mode to compute tide heights for each observation:
obs_datetimes = ds.time.data.astype('M8[s]').astype('O').tolist()
obs_timepoints = [TimePoint(tidepost_lon, tidepost_lat, dt) for dt in obs_datetimes]
obs_predictedtides = predict_tide(obs_timepoints)

# Extract tide height
obs_tideheights = [predictedtide.tide_m for predictedtide in obs_predictedtides]
obs_tideheights

In [None]:
ds_sel = ds.isel(time=2)[['MNDWI']]
ds_sel.MNDWI.plot(size=10, aspect=0.6)

### Extract waterlines

In [None]:
thresh = 0.0

contours_gdf = SpatialTools.contour_extract(z_values=[thresh],
                                   ds_array=ds_sel.MNDWI,
                                   ds_crs=ds.crs,
                                   ds_affine=ds.geobox.transform,
                                   output_shp=f'waterline_subpixel.shp',
                                   min_vertices=2,  
                                   verbose=False)
contours_gdf


## Jagged contours

In [None]:
from rasterio.features import shapes
import geopandas as gpd
from shapely.geometry import shape, Point
import numpy as np

ds_bool = (ds_sel.MNDWI > thresh).where(np.isfinite(ds_sel.MNDWI))

# Polygonise and filter to keep only values = 1
geoms = list(shapes(ds_bool.values.astype(np.int16), transform=ds.geobox.transform))
# geoms = [shape(a) for (a, b) in geoms if b == 1.0]
geoms_shape = [shape(a) for (a, b) in geoms]
data = [b for (a, b) in geoms]


polygon = gpd.GeoDataFrame(data={'value': data}, geometry = geoms_shape, crs = str(ds.crs))
# polygon_dissolved = polygon.dissolve(by='value')

# from shapely.geometry import LineString
polygon['geometry'] =  [LineString(i.coords) for i in polygon.geometry.exterior]
# polygon.to_file('waterline_blocky.shp')
polygon.plot()


In [None]:
from shapely.geometry import LineString

out_strings = []

water_poly = polygon[polygon.area == max(polygon.area)].geometry.iloc[0]
# polygon_dissolved.iloc[1][0]

for i in polygon_dissolved.iloc[0][0][0:500]:
    x, y = i.exterior.coords.xy
    
    good_coords = []
    all_coords = []
    
    for (x, y) in zip(x, y):
    
        on_border = water_poly.intersects(Point(x, y))
        
        if on_border:
            good_coords.append((x, y))
        all_coords.append((x, y))
        
    out_strings.append(LineString(good_coords))


In [None]:
line_boundaries = gpd.GeoDataFrame(geometry=out_strings)
line_boundaries[line_boundaries.length > 0].to_file('waterline_blocky.shp')

In [None]:

[polygon.iloc[[0]].geometry.intersects(Point(a, b))[0] for (a, b) in ii.exterior.coords.xy for ii in polygon.iloc[0][0]]


# polygon.iloc[[0]].geometry.intersects(Point(x[5], y[5]))

In [None]:
 [(j, k) for j in s1 for k in s2]

In [None]:
polygon.iloc[[0]].plot()

## RMSE stats

In [None]:
# import numpy as np
# contours_gdf.interpolate(np.array([20, 40]))
import geopandas as gpd

# Import validation dataset 
validation_studyarea = gpd.read_file('/g/data/r78/rt1527/dea-notebooks/Subpixel_paper/raw_data/woldview_validation_studyarea_sub.shp')
validation_shoreline = gpd.read_file('/g/data/r78/rt1527/dea-notebooks/Subpixel_paper/raw_data/woldview_validation_shoreline.shp')
validation_shoreline_feat = validation_shoreline.geometry.unary_union
# validation_shoreline_buff = validation_shoreline_feat.buffer(500)


def points_along_line(line_gdf, dist=5):    

    out = [] 
    
    # create points every 30 meters along the line
    for i, distance in enumerate(range(0, int(line_gdf.length), dist)):
        point = line_gdf.interpolate(distance)
        out.append(point[0])
        
    subpixel_points = gpd.GeoDataFrame(geometry=out, crs=line_gdf.crs)
    return subpixel_points



In [None]:
contours_gdf['geometry'] = contours_gdf.geometry.intersection(validation_studyarea.geometry)
subpixel_points = points_along_line(contours_gdf, dist=5)
subpixel_points['dist_val'] = subpixel_points.apply(lambda x: x.geometry.distance(validation_shoreline_feat), axis=1)
subpixel_points.to_file('subpixel_dists.shp')

In [None]:
blocky_gdf = gpd.GeoDataFrame(geometry=[polygon.unary_union], crs=str(ds.crs))
blocky_gdf['geometry'] = blocky_gdf.geometry.intersection(validation_studyarea.geometry)
blocky_points = points_along_line(blocky_gdf, dist=5)
blocky_points['dist_val'] = blocky_points.apply(lambda x: x.geometry.distance(validation_shoreline_feat), axis=1)
blocky_points.to_file('blocky_dists.shp')

In [None]:
subpixel_points.dist_val.mean()

In [None]:
blocky_points.dist_val.mean()

In [None]:
subpixel_points.dist_val.std()

In [None]:
blocky_points.dist_val.std()

In [None]:
np.sqrt((subpixel_points.dist_val ** 2).mean())

In [None]:
np.sqrt((blocky_points.dist_val ** 2).mean())

In [None]:
subpixel_points.dist_val.hist(bins=50)


In [None]:
blocky_points.dist_val.hist(bins=50)

In [None]:
#Remove Landsat 7 scenes with the Scan Line Correction (SLC) missing data
LS7_BROKEN_DATE = datetime(2003, 5, 31)
is_pre_slc_failure = lambda dataset: dataset.center_time < LS7_BROKEN_DATE
print(f"Cell finished at {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")

#Create function to ensure wofls in correct format
def wofls_fuser(dest, src):
    where_nodata = (src & 1) == 0
    np.copyto(dest, src, where=where_nodata)
    return dest

#Create virtual product so that datacube data can be loaded effectively within memory
fc_and_water_yaml = """
        juxtapose:
          - collate:
              - transform: apply_mask
                mask_measurement_name: pixelquality
                preserve_dtype: false
                input:
                    juxtapose:
                      - product: ls5_fc_albers
                        group_by: solar_day
                        measurements: [PV, NPV, BS]
                      - transform: make_mask
                        input:
                            product: ls5_pq_albers
                            group_by: solar_day
                            fuse_func: datacube.helpers.ga_pq_fuser
                        flags:
                            ga_good_pixel: true
                        mask_measurement_name: pixelquality
              - transform: apply_mask
                mask_measurement_name: pixelquality
                preserve_dtype: false
                input:
                    juxtapose:
                      - product: ls7_fc_albers
                        group_by: solar_day
                        measurements: [PV, NPV, BS]
                        dataset_predicate: __main__.is_pre_slc_failure
                      - transform: make_mask
                        input:
                            product: ls7_pq_albers
                            group_by: solar_day
                            fuse_func: datacube.helpers.ga_pq_fuser
                        flags:
                            ga_good_pixel: true
                        mask_measurement_name: pixelquality
              - transform: apply_mask
                mask_measurement_name: pixelquality
                preserve_dtype: false
                input:
                    juxtapose:
                      - product: ls8_fc_albers
                        group_by: solar_day
                        measurements: [PV, NPV, BS]
                      - transform: make_mask
                        input:
                            product: ls8_pq_albers
                            group_by: solar_day
                            fuse_func: datacube.helpers.ga_pq_fuser
                        flags:
                            ga_good_pixel: true
                        mask_measurement_name: pixelquality
          - transform: make_mask
            input:
                product: wofs_albers
                group_by: solar_day
                fuse_func: __main__.wofls_fuser
            flags:
                wet: true
            mask_measurement_name: water
"""

fc_and_water = construct_from_yaml(fc_and_water_yaml)

In [None]:
out_test = fc_and_water.load(dc, **query)

In [None]:
out_test.isel(time=6).water.plot()

In [None]:
out_test