# Rolling Qutarerly NDVI climatology
Notebook is only compatible on the `NCI` as it uses Landsat Collection 2

In [59]:
import xarray as xr
from datacube.helpers import write_geotiff
import datacube
import matplotlib.pyplot as plt
from dask.distributed import Client
import geopandas as gpd
import sys
import os
sys.path.append('src')
from anomalies import calculate_anomalies, load_landsat, display_map, map_shapefile
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Analysis Parameters

The following cell sets the parameters, which define the area of interest and the season to conduct the analysis over. The parameters are:

* `veg_index`: The vegetation index to use for the analysis, either `'msavi'` or `'ndvi'`.
* `from_shape`: If providing a shapefile to the define the area of interest, set this parameter to `True`, otherwise set to `False`.
* `shp_fpath`: If you set `from_shape` to True, provide a filepath to the shapefile.
* `lat`, `lon`: If not using a shapefile to define the AOI, then use a latitide and longitude point. This point will be the centre point of your AOI.
* `buffer`: The length, in decimal degrees, either side of the `lat` and `lon` point that will define the AOI box. Keep this below `0.5` to avoid long load times
* `year`: The year of interest, e.g. `'2018'`
* `season`:  The season of interest, must be one of `'DJF'`, `'MAM'`, `'JJA'`, or `'SON'`
* `region`: This variable is to account for the climatology datasets being cacluated only for specific regions at this time.  Must be one of `'NMDB'` (for the Northern Murray Darling Basin) or `'NWQLD'` for NW Queensland

### Set up local dask cluster

Dask will create a local cluster of cpus for running this analysis in parallel. If you'd like to see what the dask cluster is doing, click on the hyperlink that prints after you run the cell and you can watch the cluster run.

In [60]:
#delete old client if one still exists
client = locals().get('client', None)
if client is not None:
    client.close()
    del client
    
client = Client(n_workers=3, threads_per_worker=1, memory_limit='9GB')
client

0,1
Client  Scheduler: tcp://127.0.0.1:34196  Dashboard: http://127.0.0.1:8787/status,Cluster  Workers: 3  Cores: 3  Memory: 27.00 GB


In [61]:
#input parameters
veg_index='ndvi'
from_shape = False
shp_fpath = "data/nmdb_individual_catchments/CONDAMINE-CULGOA RIVERS.shp"  #NSW shapefile vs bounding box - speed?
lat, lon = -33.2, 149.1
buffer = 0.15
time = ('1992', '2008')
# year = '2018'
# season = 'JJA'
# region = 'NMDB'

#dask chunk size, shouldn't need to change
chunk_size = 750

In [None]:
# If your specifying a lat, lon and buffer, run this cell
display_map(y=(lat-buffer, lat + buffer), x=(lon-buffer, lon + buffer))

In [62]:
dc = datacube.Datacube(app='longterm-ndvi-anomaly')

query = {'lon': (lon - buffer, lon + buffer),
                 'lat': (lat - buffer, lat + buffer),
                 'time': time}

ds = load_landsat(dc=dc, query=query, sensors=['ls5','ls7','ls8'], 
                  bands_of_interest=['nir', 'red'], lazy_load=True,
                  dask_chunks = {'x': chunk_size, 'y': chunk_size})
ds


Loading ls5
Loading ls7
Loading ls8
    Skipping ls8; no valid data for query
Combining and sorting ls5, ls7 data


<xarray.Dataset>
Dimensions:  (time: 575, x: 1281, y: 1477)
Coordinates:
  * y        (y) float64 -3.709e+06 -3.709e+06 ... -3.746e+06 -3.746e+06
  * x        (x) float64 1.563e+06 1.563e+06 1.563e+06 ... 1.595e+06 1.595e+06
  * time     (time) datetime64[ns] 1992-01-10T23:20:18.500000 ... 2008-12-16T23:34:10.500000
Data variables:
    nir      (time, y, x) float32 dask.array<shape=(575, 1477, 1281), chunksize=(1, 750, 750)>
    red      (time, y, x) float32 dask.array<shape=(575, 1477, 1281), chunksize=(1, 750, 750)>
Attributes:
    crs:      EPSG:3577

In [73]:
def compute_climatology(data,quarter):
    
    quarter_dict= {'JFM': [1,2,3],
                   'FMA': [2,3,4],
                   'MAM': [3,4,5],
                   'AMJ': [4,5,6],
                   'MJJ': [5,6,7],
                   'JJA': [6,7,8],
                   'JAS': [7,8,9],
                   'ASO': [8,9,10],
                   'SON': [9,10,11],
                   'OND': [10,11,12],
                   'NDJ': [11,12,1],
                   'DJF': [12,1,2],
                  }
    #Scale reflectance values to 0-1
    nir = data.nir / 10000
    red = data.red / 10000
    #calculate ndvi
    ndvi=(nir-red)/(nir+red)
    ndvi = ndvi.astype('float32') #convert to reduce memory
    
    #modified from anomalies - return to function with input season? JFM=1,2,3 or just numeric loop
#calculate climatologies

#fixme - return to function, default behaviour - run both mean and std?
    ix=ndvi['time.month'].isin(quarter_dict[quarter])
    
    ndvi_clim_mean=ndvi[ix].mean(dim='time')   
    ndvi_clim_mean=ndvi_clim_mean.rename('ndvi_clim_mean').to_dataset()
    ndvi_clim_mean.to_zarr('results/ndvi_clim_mean_'+quarter+ '.nc')
    
    ndvi_clim_std=ndvi[ix].std(dim='time')
    ndvi_clim_std=ndvi_clim_std.rename('ndvi_clim_std').to_dataset()
    ndvi_clim_std.to_zarr('results/ndvi_clim_std_'+quarter+ '.nc')



In [75]:
compute_climatology(ds,'MAM')

In [71]:
#in a loop
for q in quarter_dict:
    compute_climatology(ds,q)


ValueError: path '' contains a group

In [None]:

ndvi_clim_mean.plot.imshow(cmap='YlGn',vmin=0,vmax=1,figsize=(10,8))

# ndvi = msavi.resample(time='QS-DEC').mean('time')
# climatology_mean = msavi.groupby('time.season').mean('time')
# djf = climatology_mean.sel(season='DJF')