# Sentinel 2 + Landsat animation with hydrograph

In [1]:
# Import modules
import datacube 
import sys
import xarray as xr
import numpy as np
import calendar
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.animation as animation
import matplotlib.patheffects as PathEffects
import matplotlib.patches as patches
import matplotlib.dates as mdates
import matplotlib.lines as mlines
from skimage import exposure
from datacube.utils import geometry
from datacube.utils.geometry import CRS
from datetime import datetime, timedelta

# Import external dea-notebooks functions using relative link to Scripts directory
sys.path.append('../Scripts')
import DEADataHandling
import DEAPlotting
import TasseledCapTools

# Set up datacube instances
dc = datacube.Datacube(app='Time series animation')
s2dc = datacube.Datacube(config='/g/data/r78/dc_configs/sentinel2.conf')

# Study area name used for output file
study_area = 'hattah'

# Thresholds used for cloud masking (Sentinel 2 is more aggressive)
landsat_masked_prop = 0.99
sentinel_masked_prop = 0.90

# Set up analysis data query using a buffer around a lat-long point
lat, lon, buffer = -34.7275016476, 142.367987284, 5000
x, y = geometry.point(lon, lat, CRS('WGS84')).to_crs(CRS('EPSG:3577')).points[0]
query = {'x': (x - 10000, x + 10000),
         'y': (y - buffer, y + buffer),    
         'time': ('2014-01-01', '2018-07-30'),
         'crs': 'EPSG:3577'}

%load_ext autoreload
%autoreload 2

## Extract cloud-free clear Landsat and Sentinel observations
Use the `load_clearlandsat` function to load Landsat observations and PQ data for multiple sensors (i.e. ls5, ls7, ls8), and return a single xarray dataset containing only observations that contain greater than a specified proportion of clear pixels. 

Load in Sentinel data for S2A and S2B.

In [2]:
# Custom mask that includes only cloudy or cloud shadowed pixels with data for all bands
custom_mask = {'cloud_acca': 'no_cloud', 
               'cloud_fmask': 'no_cloud', 
               'cloud_shadow_acca': 'no_cloud_shadow',
               'cloud_shadow_fmask': 'no_cloud_shadow',
               'contiguous': True}

# Load in data
ls578_ds = DEADataHandling.load_clearlandsat(dc=dc, query=query, product='nbart', 
                                        #bands_of_interest=['green', 'nir', 'swir1', 'red'], 
                                        masked_prop=landsat_masked_prop, mask_dict=custom_mask, apply_mask=False)  

# Remove problematic timesteps
problem_times = ['2015-08-02', '2015-08-18', '2016-03-13', '2016-04-30', '2017-05-03', '2015-04-28']
times_to_drop = [ls578_ds.time.loc[i].values for i in problem_times]

# Remove timesteps
ls578_ds = ls578_ds.drop(np.concatenate(times_to_drop), dim="time")


Loading ls5 PQ
    Skipping ls5
Loading ls7 PQ
    Loading 0 filtered ls7 timesteps
Loading ls8 PQ
    Loading 47 filtered ls8 timesteps
Combining and sorting ls5, ls7 and ls8 data


In [27]:
ls578_ds

<xarray.Dataset>
Dimensions:    (time: 44, x: 801, y: 401)
Coordinates:
  * y          (y) float64 -3.825e+06 -3.825e+06 -3.825e+06 -3.825e+06 ...
  * x          (x) float64 9.353e+05 9.353e+05 9.353e+05 9.353e+05 9.354e+05 ...
  * time       (time) datetime64[ns] 2014-01-03T00:22:34 2014-01-19T00:22:20 ...
Data variables:
    blue       (time, y, x) float64 438.0 440.0 411.0 439.0 452.0 445.0 ...
    green      (time, y, x) float64 683.0 679.0 635.0 705.0 669.0 676.0 ...
    red        (time, y, x) float64 976.0 987.0 900.0 995.0 967.0 990.0 ...
    nir        (time, y, x) float64 2.133e+03 2.145e+03 2.072e+03 2.212e+03 ...
    swir1      (time, y, x) float64 2.719e+03 2.739e+03 2.678e+03 2.826e+03 ...
    swir2      (time, y, x) float64 1.931e+03 1.94e+03 1.869e+03 1.971e+03 ...
    data_perc  (time) float64 0.997 1.0 1.0 1.0 1.0 1.0 0.9995 0.998 1.0 1.0 ...
Attributes:
    crs:      EPSG:3577

## Import Sentinel 2

In [3]:
# Load Sentinel
s2a_ds = s2dc.load(product='s2a_ard_granule', group_by='solar_day', 
                          #measurements = ['swir1', 'nir1', 'green', 'pixel_quality'], 
                   like=ls578_ds)

s2b_ds = s2dc.load(product='s2b_ard_granule', group_by='solar_day', 
                          #measurements = ['swir1', 'nir1', 'green', 'pixel_quality'], 
                   like=ls578_ds)

# Keep only cloud-free Sentinel 2A data
PQmask = s2a_ds.where(s2a_ds.pixel_quality == 1)
threshold = s2a_ds.x.count() * s2a_ds.y.count() * (1.0 - sentinel_masked_prop)
KeepTimeMask = PQmask.green.isnull().sum(axis=(1, 2)) <= int(threshold)
s2a_ds = s2a_ds.where(KeepTimeMask).dropna(dim='time')

# Keep only cloud-free Sentinel 2B data
PQmask = s2b_ds.where(s2b_ds.pixel_quality == 1)
threshold = s2b_ds.x.count() * s2b_ds.y.count() * (1.0 - sentinel_masked_prop)
KeepTimeMask = PQmask.green.isnull().sum(axis=(1, 2)) <= int(threshold)
s2b_ds = s2b_ds.where(KeepTimeMask).dropna(dim='time')


  if like:
  if like:


In [4]:
# Combine into single dataset
s2a_ds.rename({'nir1': 'nir'}, inplace=True)
s2b_ds.rename({'nir1': 'nir'}, inplace=True)
combined_ds = xr.auto_combine([ls578_ds, s2a_ds, s2b_ds])
combined_ds = combined_ds.sortby('time')
combined_ds

<xarray.Dataset>
Dimensions:        (time: 65, x: 801, y: 401)
Coordinates:
  * time           (time) datetime64[ns] 2014-01-03T00:22:34 ...
  * y              (y) float64 -3.825e+06 -3.825e+06 -3.825e+06 -3.825e+06 ...
  * x              (x) float64 9.353e+05 9.353e+05 9.353e+05 9.353e+05 ...
Data variables:
    blue           (time, y, x) float64 438.0 440.0 411.0 439.0 452.0 445.0 ...
    green          (time, y, x) float64 683.0 679.0 635.0 705.0 669.0 676.0 ...
    red            (time, y, x) float64 976.0 987.0 900.0 995.0 967.0 990.0 ...
    nir            (time, y, x) float64 2.133e+03 2.145e+03 2.072e+03 ...
    swir1          (time, y, x) float64 2.719e+03 2.739e+03 2.678e+03 ...
    swir2          (time, y, x) float64 1.931e+03 1.94e+03 1.869e+03 ...
    data_perc      (time) float64 0.997 1.0 1.0 1.0 1.0 1.0 0.9995 0.998 1.0 ...
    pixel_quality  (time, y, x) float64 nan nan nan nan nan nan nan nan nan ...
    contiguity     (time, y, x) float64 nan nan nan nan nan nan nan

In [12]:
from datacube.storage.storage import write_dataset_to_netcdf
#drop unitless bands because they don't write to netcdf - other option is to give them a unitless unit
combined_ds =combined_ds.drop('data_perc')

AttributeError: 'DataVariables' object has no attribute 'isel'

In [30]:
try:
    ls578_ds = ls578_ds.drop('data_perc')
    DEADataHandling.write_your_netcdf(ls578_ds, 'ls578_ds', '/g/data/r78/tmp/'+'ls578_ds'+'.nc', crs = ls578_ds.crs)
#complain if the file already exists but don't fail    
except RuntimeError as err:
    print("RuntimeError: {0}".format(err))
print('wrote to netCDF' )

RuntimeError: Storage Unit already exists: /g/data/r78/tmp/ls578_ds.nc
wrote to netCDF


## Import flow data
This is used to plot a line on the right panel of the animation. Must be imported as a geopandas dataframe with a datetime index column

In [None]:
# Set area over which index values should be averaged
line_df = pd.read_csv('/g/data/r78/rt1527/dea-notebooks/Enviroflows/raw_data/euston_flow.csv', index_col='date', parse_dates=True, dayfirst=True)

# Subset to time covered by datacube query
date_min, date_max = query['time']
line_df = line_df[(line_df.index >= date_min) & (line_df.index <= date_max)]

# Give informative name
# line_df['Max discharge (Ml/d)'] = line_df['mean'] * 2
line_df.rename(columns={'mean':'Mean discharge (Ml/d) at Euston, Murray River'}, inplace=True)
line_df.plot()


## Plot data as animation

In [None]:
# Combine into single dataset
s2a_ds.rename({'nir1': 'nir'}, inplace=True)
s2b_ds.rename({'nir1': 'nir'}, inplace=True)
combined_ds = xr.auto_combine([ls578_ds, s2a_ds, s2b_ds])
combined_ds = combined_ds.sortby('time')
combined_ds

In [None]:
#run tasselled cap and threshold (non-binary?)

In [None]:
tci = TasseledCapTools.thresholded_tasseled_cap(combined_ds, wetness_threshold=-350, drop_tc_bands=False)

In [None]:
tci.wetness_thresholded.max()

In [None]:


# Create animation
DEAPlotting.animated_timeseriesline(ds=combined_ds, # .isel(time=[0,1,2,3,4,5]), 
                        df=line_df, 
                        output_path='{}_audio.gif'.format(study_area), 
                        bands=['swir1', 'nir', 'green'], 
                        interval=220, 
                        annotation_kwargs={'fontsize':17},
                        reflect_stand=4200)

In [None]:
# Create animation
DEAPlotting.animated_timeseriesline(ds=tci,#.isel(time=[0,1,2,3,4,5]), 
                        df=line_df, 
                        output_path='{}_wetness_sentinel2.gif'.format(study_area), 
                        bands=['wetness_thresholded'], 
                        onebandplot_cbar=False, 
                        onebandplot_kwargs={'cmap':'gist_earth_r','vmin':tci.wetness_thresholded.min(),
                                           'vmax':tci.wetness_thresholded.max()},
                        interval=220, 
                        annotation_kwargs={'fontsize':17},
                        reflect_stand=4200
                                   )