This notebook loads and/or generates vegetation related products (NDVI, FC, TC, WofS) for a specific area
and exports them as jpegs for further analysis/viewing in ArcGIS

In [13]:
%matplotlib inline
import sys
import warnings
import matplotlib.pyplot as plt
import calendar
import os
import numpy as np
import xarray as xr

import dask
from dask.utils import parse_bytes

import datacube
from datacube.storage import masking
from datacube.helpers import write_geotiff
from datacube.utils.rio import configure_s3_access
from datacube.utils.dask import start_local_dask

# Load custom DEA notebook functions
sys.path.append('../dea-notebooks/Scripts')
import dea_datahandling
import dea_plotting
import DEADataHandling
from dea_bandindices import calculate_indices

Set up a dask cluster

This will help keep our memory use down and conduct the analysis in parallel. If you'd like to view the dask dashboard, click on the hyperlink that prints below the cell.

The parameters for generating the local dask cluster are automatically generated, but if you wish to alter them use the documentation here - https://distributed.dask.org/en/stable/local-cluster.html. Put simply, the code below identifies how many cpus and how much RAM the computer has and generates a local cluster using those variables.

In [14]:
# configure dashboard link to go over proxy
dask.config.set({"distributed.dashboard.link":
                 os.environ.get('JUPYTERHUB_SERVICE_PREFIX', '/')+"proxy/{port}/status"})

# Figure out how much memory/cpu we really have (those are set by jupyterhub)
mem_limit = int(os.environ.get('MEM_LIMIT', '0'))
cpu_limit = float(os.environ.get('CPU_LIMIT', '0'))
cpu_limit = int(cpu_limit) if cpu_limit > 0 else 4
mem_limit = mem_limit if mem_limit > 0 else parse_bytes('8Gb')

# leave 3Gb for notebook itself
mem_limit -= parse_bytes('3Gb')

# close previous client if any, so we can re-run this cell without issues
client = locals().get('client', None)
if client is not None:
    client.close()
    del client

# start up a local cluster
client = start_local_dask(n_workers=1,
                          threads_per_worker=cpu_limit,
                          memory_limit=mem_limit)

# show the dask cluster settings
display(client)

0,1
Client  Scheduler: tcp://127.0.0.1:38159  Dashboard: /proxy/8787/status,Cluster  Workers: 1  Cores: 4  Memory: 5.00 GB


Initialise the data cube. 'app' argument is used to identify this app. It does not influence the analysis.
Note Fractional Cover is not in the DEA Collection 3 yet so will for the time being be loaded using from
Collection 2 using old functions.

In [15]:
dc_landsat2 = datacube.Datacube(app='VegAnalysis-WD')

Create spatial and temporal query. This is used for Collection 2 data (i.e query_2). 
If running this notebook locally, use the smaller spatial extent and subset of the time series. If running on gadi, the larger extent covers the full Western Davenport study area and the full time-series should be used. 
Note, the Fractional Cover products (defined in query_2) are from Collection 2 and only go up to 2018. This can be updated once FC is added to Collection 3. FC and Wofs also start from 1987.

In [21]:
query_2 = {
        'lon': (132.07, 133.00),             # small test area
        'lat': (-20.31, -21.00),             # small test area
#        'lon': (132.07, 135.36),             # full study area
#        'lat': (-20.31, -22.11),             # full study area
#        'time':('2018-12', '2018-12'),       # subset of time-series
        'time':('1987-01', '2018-12'),       # full time-series
        'output_crs': 'EPSG:3577',
        'resolution': (25, 25),
        'group_by': 'solar_day'
}

In [17]:
dry_months = [5,6,7]

Specify the months (0-11) that represent the dry season for the area of interest

Load fractional cover data and wofs using `dc.load`:

In [22]:
dataset_fc = dc_landsat2.load(product='ls8_fc_albers',
              dask_chunks = {'x': 500, 'y': 500}, **query_2)

dataset_wofs = dc_landsat2.load(product='wofs_albers', 
              dask_chunks = {'x': 500, 'y': 500}, like=dataset_fc)

Wofs is then used to mask out standing water from Fractional Cover. The resulting FC data is then converted to FC out of 1 (rather than 100)

In [27]:
# Match the data
shared_times = np.intersect1d(dataset_fc.time, dataset_wofs.time)

ds_fc_matched = dataset_fc.sel(time=shared_times)
ds_wofs_matched = dataset_wofs.sel(time=shared_times)

# Mask FC
dry_mask = masking.make_mask(ds_wofs_matched, dry=True)

# Get fractional masked fc dataset (as proportion of 1, rather than 100)
ds_fc_masked = ds_fc_matched.where(dry_mask.water == True) / 100

# Resample
#ds_resampled = ds_fc_masked.resample(time="1M").median()    # median not currently supported by dask
ds_resampled = ds_fc_masked.resample(time="1M").mean()
ds_resampled.attrs["crs"] = dataset_fc.crs

Calculate water frequency (percentage of wet observations) from WofS time series.
The box below will load the selected WOfS images with `.compute()` and then cloud filter the images, meaning it will take out images that had too much cloud to see anything. 
It does this by using the `.make_mask()` function to calculate the fraction of cloud pixels in each image.

In [None]:
# Identify available WofS time-steps
date_list = dataset_wofs.time.values
time_steps = dataset_wofs.sel(time=date_list).compute()

# Calculate the number of cloudy pixels per timestep
cc = masking.make_mask(time_steps.water, cloud=True)
ncloud_pixels = cc.sum(dim=['x', 'y'])

# Calculate the total number of pixels per timestep
npixels_per_slice = (time_steps.water.shape[1] * 
                     time_steps.water.shape[2])

# Calculate the proportion of cloudy pixels
cloud_pixels_fraction = (ncloud_pixels / npixels_per_slice)

# Filter out "too cloudy" passes (i.e. more than 50% cloud)
clear_time_steps = time_steps.water.isel(
    time=cloud_pixels_fraction < 0.5)



In [None]:
# Identify all wet and dry pixels
wet = masking.make_mask(clear_time_steps, wet=True).sum(dim='time')
dry = masking.make_mask(clear_time_steps, dry=True).sum(dim='time')

# Calculate how frequently each pixel was wet when it was observed
clear = wet + dry
frequency = wet / clear

# Remove persistent NAs that occur due to mountain shadows
frequency = frequency.fillna(0)  

# Set pixels that remain dry 100% of the time to nodata so they appear white
frequency = frequency.where(frequency != 0)  


Calculate standard deviations and medians for the the dry season

In [10]:
median_TCW_dry = ds.TCW['time.month'].isin(dry_months)
median_TCW_dry = ds.TCW.groupby('time.month').median(dim = 'time')
median_TCW_dry = median_TCW_dry.median(dim = 'month')

median_TCB_dry = ds.TCB['time.month'].isin(dry_months)
median_TCB_dry = ds.TCB.groupby('time.month').median(dim = 'time')
median_TCB_dry = median_TCB_dry.median(dim = 'month')

median_TCG_dry = ds.TCG['time.month'].isin(dry_months)
median_TCG_dry = ds.TCG.groupby('time.month').median(dim = 'time')
median_TCG_dry = median_TCG_dry.median(dim = 'month')

ndvi = ds.NDVI
median_ndvi = ndvi.groupby('time.month').median(dim = 'time')
median_ndvi = median_ndvi.median(dim = 'month')

std_ndvi = ndvi.groupby('time.month').std(dim = 'time')
std_ndvi = std_ndvi.std(dim = 'month')

std_ndvi_dry = ndvi[ndvi['time.month'].isin(dry_months)]
std_ndvi_dry = std_ndvi_dry.groupby('time.month').std(dim = 'time')
std_ndvi_dry = std_ndvi_dry.std(dim = 'month')

std_ndvi_diff1 = ndvi.groupby('time.month').std(dim = 'time').isel(month = 0)
std_ndvi_diff2 = ndvi.groupby('time.month').std(dim = 'time').isel(month = 7)
std_ndvi_diff = std_ndvi_diff1 - std_ndvi_diff2

median_ndvi_dry = ndvi[ndvi['time.month'].isin(dry_months)]
median_ndvi_dry = median_ndvi_dry.groupby('time.month').median(dim = 'time')
median_ndvi_dry = median_ndvi_dry.median(dim = 'month')

median_LAI_dry = ds.LAI['time.month'].isin(dry_months)
median_LAI_dry = ds.LAI.groupby('time.month').median(dim = 'time')
median_LAI_dry = median_LAI_dry.median(dim = 'month')

BS_dry = ds_resampled.BS[ds_resampled.BS['time.month'].isin(dry_months)]
BS_dry = BS_dry.groupby('time.month').median(dim = 'time')
BS_dry = BS_dry.median(dim = 'month')

PV_dry = ds_resampled.PV[ds_resampled.PV['time.month'].isin(dry_months)]
PV_dry = PV_dry.groupby('time.month').median(dim = 'time')
PV_dry = PV_dry.median(dim = 'month')

NPV_dry = ds_resampled.NPV[ds_resampled.NPV['time.month'].isin(dry_months)]
NPV_dry = NPV_dry.groupby('time.month').median(dim = 'time')
NPV_dry = NPV_dry.median(dim = 'month')


Exporting data

In order to use the datacube.helpers write_geotiff function to export a simple single-band, single time-slice geotiff the above xarray DataArrays need to be converted to xarray Datasets. We do this be using the xarray function .to_dataset. If you don't do this, the write_geotiff fucntion will return an error. 
We also need to reassign the coordinate reference system before the write_geotiff function will work. This is done by the .attrs function. We take the crs from the original imported data (ds).
Each file will be exported as a geotiff and saved in the same directory as this notebook. It can be downloaded from this location to the GA network using FileZilla.

In [157]:
#set variable for path to save files
savefilepath = '/g/data/zk34/ljg547/Outputs/'

# Set project naming convention. Start and end dates are reformated to remove '-'.
Proj = 'SSC_WD_'

ds_startDate = str(ds.isel(time=0).time.values)[0:10]
ds_startDate = str(ds_startDate[0:4] + f'{int(ds_startDate[6:7]):02d}' + 
              f'{int(ds_startDate[9:10]):02d}')

ds_endDate = str(ds.isel(time=-1).time.values)[0:10]
ds_endDate = str(ds_endDate[0:4] + f'{int(ds_endDate[6:7]):02d}' + 
              f'{int(ds_endDate[9:10]):02d}')

fc_startDate = str(dataset_fc.isel(time=0).time.values)[0:10]
fc_startDate = str(fc_startDate[0:4] + f'{int(fc_startDate[6:7]):02d}' + 
              f'{int(fc_startDate[9:10]):02d}')

fc_endDate = str(dataset_fc.isel(time=-1).time.values)[0:10]
fc_endDate = str(fc_endDate[0:4] + f'{int(fc_endDate[6:7]):02d}' + 
              f'{int(fc_endDate[9:10]):02d}')


Generating naming convention for dry season files based on Project area (Proj), specified dry season and time series start and end dates. Note some products (e.g. FC and Wofs) have different time-series period as they are imported from Collection 2 rather than Collection 3 (current collection of Landsat data).

In [179]:
# Export data
arr = median_TCW_dry.to_dataset(name='median_TCW_dry')
arr.attrs = ds.attrs
fname = str(savefilepath + Proj + 'MedianTCW_DrySeason' +
              str(dry_months[0]+1) + 'to' + str(dry_months[-1]+1) +
              '_' + ds_startDate + '_' + ds_endDate + '.tif')
write_geotiff(dataset = arr, filename = fname)

# Create metadata file. w - writes, r - reads, a- appends
f = open(savefilepath + Proj + 'MedianTCW_DrySeason' +
              str(dry_months[0]+1) + 'to' + str(dry_months[-1]+1) +
              '_' + ds_startDate + '_' + ds_endDate + '.txt','w')  

f.write("Tasselled Cap Wetness for the dry season (" + 
        str(dry_months[0]+1) + "-" + str(dry_months[-1]+1) + " month)" +  
        " from " + ds_startDate + "-" + ds_endDate + "." + "\n" +
        "TCW_dry_median is the median value of TCW over the dry months."+ "\n" +
        "This product was derived from VegProducts_Export.ipynb"
    )

f.close()


In [180]:
# Export data
arr = median_TCB_dry.to_dataset(name='median_TCB_dry')
arr.attrs = ds.attrs
fname = str(savefilepath + Proj + 'MedianTCB_DrySeason' +
              str(dry_months[0]+1) + 'to' + str(dry_months[-1]+1) +
              '_' + ds_startDate + '_' + ds_endDate + '.tif')
write_geotiff(dataset=arr, filename=fname)

# Create metadata file. w - writes, r - reads, a- appends
f = open(savefilepath + Proj + 'MedianTCB_DrySeason' +
              str(dry_months[0]+1) + 'to' + str(dry_months[-1]+1) +
              '_' + ds_startDate + '_' + ds_endDate + '.txt','w')  

f.write("Tasselled Cap Brigthness for the dry season (" + 
        str(dry_months[0]+1) + "-" + str(dry_months[-1]+1) + " month)" +  
        " from " + ds_startDate + "-" + ds_endDate + "." + "\n" +
        "TCB_dry_median is the median value of TCB over the dry months."+ "\n" +
        "This product was derived from VegProducts_Export.ipynb"
    )

f.close()

In [181]:
# Export data
arr = median_TCG_dry.to_dataset(name='median_TCG_dry')
arr.attrs = ds.attrs
fname = str(savefilepath + Proj + 'MedianTCG_DrySeason' +
              str(dry_months[0]+1) + 'to' + str(dry_months[-1]+1) +
              '_' + ds_startDate + '_' + ds_endDate + '.tif')
write_geotiff(dataset=arr, filename=fname)

# Create metadata file. w - writes, r - reads, a- appends
f = open(savefilepath + Proj + 'MedianTCG_DrySeason' +
              str(dry_months[0]+1) + 'to' + str(dry_months[-1]+1) +
              '_' + ds_startDate + '_' + ds_endDate + '.txt','w')  

f.write("Tasselled Cap Greenness for the dry season (" + 
        str(dry_months[0]+1) + "-" + str(dry_months[-1]+1) + " month)" +  
        " from " + ds_startDate + "-" + ds_endDate + "." + "\n" +
        "TCG_dry_median is the median value of TCG over the dry months."+ "\n" +
        "This product was derived from VegProducts_Export.ipynb"
    )

f.close()

In [182]:
# Export data
arr = median_ndvi.to_dataset(name='median_ndvi')
arr.attrs = ds.attrs
fname = str(savefilepath + Proj + 'MedianNDVI_' +
              ds_startDate + '_' + ds_endDate + '.tif')
write_geotiff(dataset=arr, filename=fname)

# Create metadata file. w - writes, r - reads, a- appends
f = open(savefilepath + Proj + 'MedianNDVI_' +
              ds_startDate + '_' + ds_endDate + '.txt','w')  

f.write("Median NDVI for all months" + " from " + ds_startDate + 
      "-" + ds_endDate + "." + "\n" +
      "This product was derived from VegProducts_Export.ipynb"
    )

f.close()

In [183]:
# Export data
arr = std_ndvi.to_dataset(name='std_ndvi')
arr.attrs = ds.attrs
fname = str(savefilepath + Proj + 'stdNDVI_' +
              ds_startDate + '_' + ds_endDate + '.tif')
write_geotiff(dataset=arr, filename=fname)

# Create metadata file. w - writes, r - reads, a- appends
f = open(savefilepath + Proj + 'stdNDVI_' +
              ds_startDate + '_' + ds_endDate + '.txt','w')  

f.write("Standard deviation of NDVI for all months" + " from " + ds_startDate + 
      "-" + ds_endDate + "." + "\n" + 
      "Higher standard deviation suggests greater variation in vegetation greenness and therefore inferred water supply."
      "This product was derived from VegProducts_Export.ipynb"
    )

f.close()


In [184]:
# Export data
arr = std_ndvi_dry.to_dataset(name='std_ndvi_dry')
arr.attrs = ds.attrs
fname = str(savefilepath + Proj + 'stdNDVI_DrySeason' +
              str(dry_months[0]+1) + 'to' + str(dry_months[-1]+1) +
              '_' + ds_startDate + '_' + ds_endDate + '.tif')
write_geotiff(dataset=arr, filename=fname)

# Create metadata file. w - writes, r - reads, a- appends
f = open(savefilepath + Proj + 'stdNDVI_DrySeason' +
              str(dry_months[0]+1) + 'to' + str(dry_months[-1]+1) +
              '_' + ds_startDate + '_' + ds_endDate + '.txt','w')  

f.write("Standard deviation of NDVI for the dry season(" + 
        str(dry_months[0]+1) + "-" + str(dry_months[-1]+1) + " month)" +  
        " from " + ds_startDate + "-" + ds_endDate + "." + "\n" + 
      "Higher standard deviation suggests greater variation in vegetation greenness and therefore inferred water supply."
      "This product was derived from VegProducts_Export.ipynb"
    )

f.close()


In [192]:
# Export data
arr = std_ndvi_diff.to_dataset(name='std_ndvi_diff')
arr.attrs = ds.attrs
fname = str(savefilepath + Proj + 'stdNDVI_DiffJanAug_' +
              ds_startDate + '_' + ds_endDate + '.tif')
write_geotiff(dataset=arr, filename=fname)

# Create metadata file. w - writes, r - reads, a- appends
f = open(savefilepath + Proj + 'stdNDVI_DiffJanAug_' +
              ds_startDate + '_' + ds_endDate + '.txt','w')  

f.write("Comparison between NDVI standard deviation during the wet season (January) ad at the end of the dry season (August)." + "\n" 
      "Time series includes imagery from " + ds_startDate + "-" + ds_endDate + "." + "\n" + 
      "Where vegetation is accessing more reliable water sources (e.g. groundwater), residual standard deviation is low." + "\n"
      "This product was derived from VegProducts_Export.ipynb"
    )

f.close()


In [186]:
# Export data
arr = median_ndvi_dry.to_dataset(name='median_ndvi_dry')
arr.attrs = ds.attrs
fname = str(savefilepath + Proj + 'mediandNDVI_DrySeason' +
              str(dry_months[0]+1) + 'to' + str(dry_months[-1]+1) +
              '_' + ds_startDate + '_' + ds_endDate + '.tif')
write_geotiff(dataset=arr, filename=fname)

# Create metadata file. w - writes, r - reads, a- appends
f = open(savefilepath + Proj + 'MedianTCW_DrySeason' +
              str(dry_months[0]+1) + 'to' + str(dry_months[-1]+1) +
              '_' + ds_startDate + '_' + ds_endDate + '.txt','w')  

f.write("NDVI of dry period (" + str(dry_months[0]+1) + "-" + str(dry_months[-1]+1) + " month)" +  
      " from " + ds_startDate + "-" + ds_endDate + "." + "\n" +
      "NDVI_dry_median is the median value of NDVI over the dry months."+ "\n"
      "This product was derived from VegProducts_Export.ipynb"
    )

f.close()


In [187]:
# Export data
arr = median_LAI_dry.to_dataset(name='median_LAI_dry')
arr.attrs = ds.attrs
fname = str(savefilepath + Proj + 'medianLAI_DrySeason' +
              str(dry_months[0]+1) + 'to' + str(dry_months[-1]+1) +
              '_' + ds_startDate + '_' + ds_endDate + '.tif')
write_geotiff(dataset=arr, filename=fname)

# Create metadata file. w - writes, r - reads, a- appends
f = open(savefilepath + Proj + 'MedianLAI_DrySeason' +
              str(dry_months[0]+1) + 'to' + str(dry_months[-1]+1) +
              '_' + ds_startDate + '_' + ds_endDate + '.txt','w')  

f.write("Leaf Area Index for the dry season (" + 
        str(dry_months[0]+1) + "-" + str(dry_months[-1]+1) + " month)" +  
        " from " + ds_startDate + "-" + ds_endDate + "." + "\n" +
        "LAI_dry_median is the median value of LAI over the dry months."+ "\n" +
        "This product was derived from VegProducts_Export.ipynb"
    )

f.close()


In [188]:
# Export data
arr = BS_dry.to_dataset(name='BS_dry')
arr.attrs = dataset_fc.attrs
fname = str(savefilepath + Proj + 'medianBS_DrySeason' +
              str(dry_months[0]+1) + 'to' + str(dry_months[-1]+1) +
              '_' + fc_startDate + '_' + fc_endDate + '.tif')
write_geotiff(dataset=arr, filename=fname)

# Create metadata file. w - writes, r - reads, a- appends
f = open(savefilepath + Proj + 'medianBS_DrySeason' +
              str(dry_months[0]+1) + 'to' + str(dry_months[-1]+1) +
              '_' + fc_startDate + '_' + fc_endDate + '.txt','w')  

f.write("Fraction of Bare Soil (as compared to photo-synthetic vegetation and non-photosynthetic vegetation) for the dry season (" + 
        str(dry_months[0]+1) + "-" + str(dry_months[-1]+1) + " month)" +  
        " from " + ds_startDate + "-" + ds_endDate + "." + "\n" +
        "The BS value was calculated for the dry months."+ "\n" +
        "This product was derived from VegProducts_Export.ipynb"
    )

f.close()


In [189]:
# Export data
arr = PV_dry.to_dataset(name='PV_dry')
arr.attrs = dataset_fc.attrs
fname = str(savefilepath + Proj + 'medianPV_DrySeason' +
              str(dry_months[0]+1) + 'to' + str(dry_months[-1]+1) +
              '_' + fc_startDate + '_' + fc_endDate + '.tif')
write_geotiff(dataset=arr, filename=fname)

# Create metadata file. w - writes, r - reads, a- appends
f = open(savefilepath + Proj + 'medianPV_DrySeason' +
              str(dry_months[0]+1) + 'to' + str(dry_months[-1]+1) +
              '_' + fc_startDate + '_' + fc_endDate + '.txt','w')  

f.write("Fraction of photosynthetic vegetation (as compared to base soil and non-photosynthetic vegetation) for the dry season (" + 
        str(dry_months[0]+1) + "-" + str(dry_months[-1]+1) + " month)" +  
        " from " + ds_startDate + "-" + ds_endDate + "." + "\n" +
        "The PV value was calculated for the dry months."+ "\n" +
        "This product was derived from VegProducts_Export.ipynb"
    )

f.close()


In [190]:
# Export data
arr = NPV_dry.to_dataset(name='NPV_dry')
arr.attrs = dataset_fc.attrs
fname = str(savefilepath + Proj + 'medianNPB_DrySeason' +
              str(dry_months[0]+1) + 'to' + str(dry_months[-1]+1) +
              '_' + fc_startDate + '_' + fc_endDate + '.tif')
write_geotiff(dataset=arr, filename=fname)

# Create metadata file. w - writes, r - reads, a- appends
f = open(savefilepath + Proj + 'medianNPV_DrySeason' +
              str(dry_months[0]+1) + 'to' + str(dry_months[-1]+1) +
              '_' + fc_startDate + '_' + fc_endDate + '.txt','w')  

f.write("Fraction of non-photosynthetic vegetation (as compared to photo-synthetic vegetation and bare soil) for the dry season (" + 
        str(dry_months[0]+1) + "-" + str(dry_months[-1]+1) + " month)" +  
        " from " + ds_startDate + "-" + ds_endDate + "." + "\n" +
        "The NPV value was calculated for the dry months."+ "\n" +
        "This product was derived from VegProducts_Export.ipynb"
    )

f.close()


In [193]:
# Export data
arr = frequency.to_dataset(name='WofS_WetFrequency')
arr.attrs = dataset_wofs.attrs
fname = str(savefilepath + Proj + 'WofS_WetFrequency_' +
              fc_startDate + '_' + fc_endDate + '.tif')
write_geotiff(dataset=arr, filename=fname)

# Create metadata file. w - writes, r - reads, a- appends
f = open(savefilepath + Proj + 'WofS_WetFrequency_' +
              fc_startDate + '_' + fc_endDate + '.txt','w')  

f.write("The frequence of wet, as calculated for each pixel using the Water Observations from Space algorithm." + 
        "Time series: " + ds_startDate + "-" + ds_endDate + "." + "\n" +
        "This product was derived from VegProducts_Export.ipynb"
    )

f.close()
