# Create MNDWI area extract to csv file
This code has been specifically written for Landsat ARD data. The idea is that you provide the date range you wish to process over a given area (defined by the polygon). Always try to stay about 1gb under your threshold to ensure your work gets processed.It will loop over polygons so be careful of file sizes. Output is a csv with calculated areas at all time steps for inundated pixels (where MNDWI > 0), dry pixels, total pixels and NA pixels for the wetland polygon.

In [1]:
%matplotlib inline

import datacube
import geopandas as gpd
import pandas as pd
import numpy as np
# from datacube.utils import geometry
from datacube.utils.cog import write_cog
from datacube.utils.geometry import Geometry, CRS

import sys
sys.path.insert(1, '../Tools/')
from dea_tools.datahandling import load_ard
from dea_tools.plotting import map_shapefile
from dea_tools.bandindices import calculate_indices
from dea_tools.spatial import xr_rasterize

### Connect to the datacube

In [2]:
# Temporary solution to account for Collection 3 data being in a different
# database on the NCI
try:
    dc = datacube.Datacube(app='Analyse_multiple_polygons', env='c3-samples')
except:
    dc = datacube.Datacube(app='Analyse_multiple_polygons')

## Analysis parameters

* `time_list` : Enter dates, in units YYYY-MM-DD, for each target date  e.g. `'2019-01-01'`
* `vector_file` : A path to a vector file (ESRI Shapefile or GeoJSON). Code below presumes your vector to live in a directory called 'vectors' located in the same folder as this notebook. Please ensure it is projected to Albers Equal Area projection
* `attribute_col` : A column in the vector file used to label the output `xarray` datasets containing satellite images. Each row of this column should have a unique identifier
* `products` : Here we are using Landsat8 only
* `measurements` : A list of band names to load from the satellite product e.g. `['nbart_red', 'nbart_green']`
* `resolution` : The spatial resolution of the loaded satellite data e.g. for Landsat, this is `(-30, 30)`
* `output_crs` : The coordinate reference system/map projection to load data into, e.g. `'EPSG:3577'` to load data in the Albers Equal Area projection
* `align` : How to align the x, y coordinates respect to each pixel. Landsat Collection 3 should be centre aligned `align = (15, 15)` if data is loaded in its native UTM zone projection, e.g. `'EPSG:32756'` 

In [3]:
time_range = ('1987', '2021')
vector_file = './vectors/ACE_ANDE.shp'
attribute_col = 'WCode'
products = ['ga_ls5t_ard_3', 'ga_ls8c_ard_3'] # L7 do separately due to striping

measurements = ['nbart_blue', 'nbart_green', 'nbart_red', 'nbart_nir', 'nbart_swir_1', 'nbart_swir_2'] #     choose only the bands you need to save space
resolution = (-30, 30)
output_crs = 'EPSG:3577'
align = (0, 0)

In [4]:
time_range

('1987-09', '1988-01')

In [5]:
# read in original vectors
gdf = gpd.read_file(vector_file)
# geom = geometry.Geometry(gdf.iloc[0].geometry, 
#                           crs=gdf.crs)


# # visualise
# geom

In [6]:
query = {'time': time_range,
         'measurements': measurements,
         'resolution': resolution,
         'output_crs': output_crs,
         'align': align,
         }

In [7]:
# Loop through polygons in geodataframe and extract satellite data
for index, row in gdf.iterrows():
    
    print(f'Feature: {index + 1}/{len(gdf)}')
    
    # Extract the feature's geometry as a datacube geometry object
    geom = Geometry(geom=row.geometry, crs=gdf.crs)
    
    # Update the query to include our geopolygon
    query.update({'geopolygon': geom}) 
    
    # Load landsat
    ds = load_ard(dc=dc, 
                  products=products,
                  min_gooddata= 0.75,  # only take uncloudy scenes
                  # ls7_slc_off = False,                  
                  group_by='solar_day',
                  **query)
    
    # Generate a polygon mask to keep only data within the polygon
    mask = xr_rasterize(gdf.iloc[[index]], ds)
      
    # site = gdf.loc[0 ,attribute_col]
    site = str(row[attribute_col])
    
    ds['mndwi'] = (ds.nbart_green - ds.nbart_swir_1) / (ds.nbart_green + ds.nbart_swir_1)
    ds['is_valid'] = np.isfinite(ds.nbart_green)
    ds['mndwi'] = ds.mndwi.where(ds.mndwi > 0, 0)
    ds['wet'] = ds.mndwi.where(ds.mndwi <= 0, 1)
    ds['wet'] = ds.wet.where(ds.wet == 1, 0)
    
    # Mask dataset to set pixels outside the polygon to `NaN`
    ds = ds.where(mask)
    
    # Make csv output   
    blue = ds.nbart_blue.mean(dim=['x', 'y']) 
    green = ds.nbart_green.mean(dim=['x', 'y'])
    red = ds.nbart_red.mean(dim=['x', 'y'])
    nir = ds.nbart_nir.mean(dim=['x', 'y'])
    swir1 = ds.nbart_swir_1.mean(dim=['x', 'y'])
    swir2 = ds.nbart_swir_2.mean(dim=['x', 'y'])
    
    # Site
    site = str(row[attribute_col])

    # Add to a single dataframe ready to be written out as a CSV with time as an index
    wateranalysis_df = pd.DataFrame(data={'site': site, 'blue':blue, 'green':green, 'red': red,
                                         'nir':nir, 'swir1':swir1, 'swir2':swir2,},
                                    index=ds.time.values)

    # Write to file
    wateranalysis_df.to_csv(f'{site}_band_extracts_0_{time_range[0]}-{time_range[1]}.csv', index_label='time')

    # Preview data
    wateranalysis_df.head()
 

Feature: 1/5
Finding datasets
    ga_ls5t_ard_3
    ga_ls8c_ard_3


  if geom.type in ['Point', 'MultiPoint']:
  if geom.type in ['GeometryCollection', 'MultiPolygon', 'MultiLineString']:
  if geom.type in ['LineString', 'LinearRing']:
  if geom.type == 'Polygon':
  if geom.type in ['Point', 'MultiPoint']:
  if geom.type in ['GeometryCollection', 'MultiPolygon', 'MultiLineString']:
  if geom.type in ['LineString', 'LinearRing']:
  if geom.type == 'Polygon':


Counting good quality pixels for each time step using fmask
Filtering to 5 out of 6 time steps with at least 75.0% good quality pixels
Applying fmask pixel quality/cloud mask
Loading 5 time steps
Feature: 2/5
Finding datasets
    ga_ls5t_ard_3
    ga_ls8c_ard_3


  if geom.type in ['Point', 'MultiPoint']:
  if geom.type in ['GeometryCollection', 'MultiPolygon', 'MultiLineString']:
  if geom.type in ['LineString', 'LinearRing']:
  if geom.type == 'Polygon':
  if geom.type in ['Point', 'MultiPoint']:
  if geom.type in ['GeometryCollection', 'MultiPolygon', 'MultiLineString']:
  if geom.type in ['LineString', 'LinearRing']:
  if geom.type == 'Polygon':


Counting good quality pixels for each time step using fmask
Filtering to 8 out of 16 time steps with at least 75.0% good quality pixels
Applying fmask pixel quality/cloud mask
Loading 8 time steps
Feature: 3/5
Finding datasets
    ga_ls5t_ard_3
    ga_ls8c_ard_3
Counting good quality pixels for each time step using fmask


  if geom.type in ['Point', 'MultiPoint']:
  if geom.type in ['GeometryCollection', 'MultiPolygon', 'MultiLineString']:
  if geom.type in ['LineString', 'LinearRing']:
  if geom.type == 'Polygon':
  if geom.type in ['Point', 'MultiPoint']:
  if geom.type in ['GeometryCollection', 'MultiPolygon', 'MultiLineString']:
  if geom.type in ['LineString', 'LinearRing']:
  if geom.type == 'Polygon':


Filtering to 7 out of 15 time steps with at least 75.0% good quality pixels
Applying fmask pixel quality/cloud mask
Loading 7 time steps
Feature: 4/5
Finding datasets
    ga_ls5t_ard_3
    ga_ls8c_ard_3
Counting good quality pixels for each time step using fmask


  if geom.type in ['Point', 'MultiPoint']:
  if geom.type in ['GeometryCollection', 'MultiPolygon', 'MultiLineString']:
  if geom.type in ['LineString', 'LinearRing']:
  if geom.type == 'Polygon':
  if geom.type in ['Point', 'MultiPoint']:
  if geom.type in ['GeometryCollection', 'MultiPolygon', 'MultiLineString']:
  if geom.type in ['LineString', 'LinearRing']:
  if geom.type == 'Polygon':


Filtering to 4 out of 15 time steps with at least 75.0% good quality pixels
Applying fmask pixel quality/cloud mask
Loading 4 time steps
Feature: 5/5
Finding datasets
    ga_ls5t_ard_3
    ga_ls8c_ard_3
Counting good quality pixels for each time step using fmask


  if geom.type in ['Point', 'MultiPoint']:
  if geom.type in ['GeometryCollection', 'MultiPolygon', 'MultiLineString']:
  if geom.type in ['LineString', 'LinearRing']:
  if geom.type == 'Polygon':
  if geom.type in ['Point', 'MultiPoint']:
  if geom.type in ['GeometryCollection', 'MultiPolygon', 'MultiLineString']:
  if geom.type in ['LineString', 'LinearRing']:
  if geom.type == 'Polygon':
  _reproject(


Filtering to 6 out of 15 time steps with at least 75.0% good quality pixels
Applying fmask pixel quality/cloud mask
Loading 6 time steps
