In [None]:
# the following script is an update of the original one created by Pablo Rozas Larraondo: 
# https://github.com/ANU-WALD/sentinel2_fmc/blob/main/fmc_s2_rf_inference.ipynb
# it contains portions of code used by Digital Earth Australia in their tutorials 
# (https://docs.dea.ga.gov.au/notebooks/Scientific_workflows/TSmask/TSmask.html), 
# under Apache License, Version 2.0 (https://www.apache.org/licenses/LICENSE-2.0)
# DEA data is under Creative Commons by Attribution 4.0 license 
# (https://creativecommons.org/licenses/by/4.0/)

In [1]:
%matplotlib inline
import sys
import matplotlib.pyplot as plt
import datacube
import xarray as xr
from datacube.utils.masking import make_mask
from datacube.drivers.netcdf import write_dataset_to_netcdf
import numpy as np
from numpy import inf
#sys.path

## Download and load the Random Forest model.

In [4]:
!pip3 install pickle5

You should consider upgrading via the '/env/bin/python -m pip install --upgrade pip' command.[0m[33m
[0m

In [2]:
import pickle5 as pickle
with open('rf_fmc.pickle', 'rb') as handle:
    rf = pickle.load(handle)
    print(rf)

https://scikit-learn.org/stable/modules/model_persistence.html#security-maintainability-limitations


RandomForestRegressor(criterion='mse', max_depth=25, n_estimators=25, n_jobs=8)


https://scikit-learn.org/stable/modules/model_persistence.html#security-maintainability-limitations


## Load DEA data for region of interest

In [6]:
dc = datacube.Datacube(app='fmc')

In [5]:
#check variables names
product = "s2a_ard_granule"
measurements = dc.list_measurements()
measurements.loc[product]

Unnamed: 0_level_0,name,dtype,units,nodata,aliases,flags_definition,spectral_definition
measurement,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
azimuthal_exiting,azimuthal_exiting,float32,1,-999,[azimuthal_exiting],,
azimuthal_incident,azimuthal_incident,float32,1,-999,[azimuthal_incident],,
exiting,exiting,float32,1,-999,[exiting],,
incident,incident,float32,1,-999,[incident],,
relative_azimuth,relative_azimuth,float32,1,-999,[relative_azimuth],,
relative_slope,relative_slope,float32,1,-999,[relative_slope],,
satellite_azimuth,satellite_azimuth,float32,1,-999,[satellite_azimuth],,
satellite_view,satellite_view,float32,1,-999,[satellite_view],,
solar_azimuth,solar_azimuth,float32,1,-999,[solar_azimuth],,
solar_zenith,solar_zenith,float32,1,-999,[solar_zenith],,


In [3]:
#check variables names
#product = "s2b_ard_granule"
#measurements = dc.list_measurements()
#measurements.loc[product]

In [7]:
# load sentinel sentinel-2a, and sentinel-2b  and merge them
query = {
        'y': (-34.600, -35.100),
        'x': (138.500, 138.900),
        'crs': 'EPSG:4326',
        'output_crs': 'EPSG:4326',
        'resolution': (-0.0002, 0.0002),  #some bands are 10m resolution, but others are 20m
        'time': ('2022-04-01', '2022-06-01'),
        'measurements':["fmask",
                         "nbart_blue","nbart_green","nbart_red",
                         "nbart_red_edge_1","nbart_red_edge_2","nbart_red_edge_3",
                         "nbart_nir_1","nbart_nir_2",
                         "nbart_swir_2","nbart_swir_3"], }


s2a_ds = dc.load(product='s2a_ard_granule', group_by='solar_day', **query)

In [8]:
s2b_ds = dc.load(product='s2b_ard_granule', group_by='solar_day', **query)

In [9]:
ds_nomask = xr.concat([s2a_ds, s2b_ds], dim='time').sortby('time')

In [20]:
print(ds_nomask)
print(ds_nomask.time.data)

<xarray.Dataset>
Dimensions:           (time: 22, latitude: 2500, longitude: 2000)
Coordinates:
  * time              (time) datetime64[ns] 2022-04-01T00:46:48.272966 ... 20...
  * latitude          (latitude) float64 -34.6 -34.6 -34.6 ... -35.1 -35.1 -35.1
  * longitude         (longitude) float64 138.5 138.5 138.5 ... 138.9 138.9
    spatial_ref       int32 4326
Data variables:
    fmask             (time, latitude, longitude) uint8 1 1 1 1 1 ... 2 2 2 2 2
    nbart_blue        (time, latitude, longitude) int16 836 844 835 ... 772 789
    nbart_green       (time, latitude, longitude) int16 1112 1117 ... 937 1038
    nbart_red         (time, latitude, longitude) int16 1654 1653 ... 1056 1067
    nbart_red_edge_1  (time, latitude, longitude) int16 1923 1907 ... 1525 1537
    nbart_red_edge_2  (time, latitude, longitude) int16 2095 2085 ... 2099 2319
    nbart_red_edge_3  (time, latitude, longitude) int16 2236 2182 ... 2229 2347
    nbart_nir_1       (time, latitude, longitude) int16 24

## Add NDVI and NDII normalised indices to the dataset

In [10]:
ds_nomask['ndvi']=((ds_nomask.nbart_nir_1-ds_nomask.nbart_red)/(ds_nomask.nbart_nir_1+ds_nomask.nbart_red))
ds_nomask['ndii']=((ds_nomask.nbart_nir_1-ds_nomask.nbart_swir_2)/(ds_nomask.nbart_nir_1+ds_nomask.nbart_swir_2))

## Save dataset as netCDF

In [10]:
write_dataset_to_netcdf(ds_nomask, 's2_reflectance_no_mask.nc')

## Read fmask flags

In [11]:
# print sentinel 2 fmask flags (code from https://docs.dea.ga.gov.au/notebooks/Scientific_workflows/TSmask/TSmask.html,Apache License, Version 2.0 https://www.apache.org/licenses/LICENSE-2.0)
ds_nomask.fmask.flags_definition

{'fmask': {'bits': [0, 1, 2, 3, 4, 5, 6, 7],
  'values': {'0': 'nodata',
   '1': 'valid',
   '2': 'cloud',
   '3': 'shadow',
   '4': 'snow',
   '5': 'water'},
  'description': 'Fmask'}}

## Visualise RGB images

In [44]:
ds = xr.open_dataset('s2_reflectance_no_mask.nc')

In [115]:
#for i in range(ds.time.data.size):
#    ds_ = ds.isel(time=i)
#    ds_[['nbart_red', 'nbart_green', 'nbart_blue']].to_array().plot.imshow(robust=True, figsize=(8,8))

## Open reflectance dataset

In [108]:
ds = xr.open_dataset('s2_reflectance_no_mask.nc')
ds

In [10]:
ds.time

# Option A: Export LFMC already masked

## Estimate LFMC

In [109]:
list_lfmc_arrays = list()


for d in ds.time.data:
    print('Current date: ', d, '... Ends at: ', ds.time.data[-1])
    
    ds_2d = ds.sel(time=d)
    
    # Create cloud, cloud shadows mask
    mask_clear = np.where(ds_2d.fmask.data == 2, 1, 0) # 2: cloud
    mask_clear = np.where(ds_2d.fmask.data == 3, 1, mask_clear) # 3: cloud shadows
    mask_clear = np.repeat(mask_clear[np.newaxis,:,:],9,axis=0) # 9 because "refl" below will be size 9 in 3rd dimension, as there are 9 variables
    
    # Stack and reshape dataset to be compatible with the RF input
    refl = ds_2d[['ndvi','ndii','nbart_red','nbart_green','nbart_blue','nbart_nir_1','nbart_nir_2','nbart_swir_2','nbart_swir_3']].to_array().values
    
    # Apply cloud, shadows, water, snow mask
    refl = np.where(mask_clear == 1, np.nan, refl)
    
    # Check and mask values not accepted by RF model
    nan_mask = np.isnan(refl) # nan values are not accepted by the RF model. So first they will be replaced with 0 (below), so that the model can run. Then this mask will be applied to restore the nan in the LFMC map
    nan_mask_2d = np.sum(np.where(nan_mask,1,0), axis=0) # if 0, there were no nan values in any of the reflectance bands, if a pixel is > 0 then at least one of the bands in that position was nan
    
    neg_inf_mask = np.where(refl==-inf,1,0) # fix potential inf values in indices, consequence of nan vaues in reflectance bands. Inf values are not accepted by the RF model
    neg_inf_mask_2d = np.sum(neg_inf_mask, axis=0)
    
    pos_inf_mask = np.where(refl==inf,1,0) 
    pos_inf_mask_2d = np.sum(pos_inf_mask, axis=0)
    
    final_mask_2d = nan_mask_2d + neg_inf_mask_2d + pos_inf_mask_2d # probably the nan_mask_2d is actually enough, as inf values in the indeces nan values in refl bands should coincide
    
    refl = np.where(nan_mask, 0, refl) # replace nan values with 0 so that the model can run
    refl = np.where(neg_inf_mask == 1, -9999, refl) # replace -inf values with -9999 so that the model can run
    refl = np.where(pos_inf_mask == 1, 9999, refl) 
    
    refl_rf = refl.reshape((9,-1)).swapaxes(0,1)
    
    # Estimate FMC values using RF model and previous reflectance input
    rf_lfmc = rf.predict(refl_rf)
    lfmc = rf_lfmc.reshape(refl.shape[1:])
    
    lfmc = np.where(final_mask_2d > 0, np.nan, lfmc) #if a pixel is > 0 then at least one of the bands in that position was nan, or -inf/inf in case of the indeces
    
    list_lfmc_arrays.append(lfmc)
  


Current date:  2022-04-01T00:46:48.000000000 ... Ends at:  2022-05-24T00:56:47.000000000
Current date:  2022-04-04T00:56:42.000000000 ... Ends at:  2022-05-24T00:56:47.000000000
Current date:  2022-04-06T00:46:42.000000000 ... Ends at:  2022-05-24T00:56:47.000000000
Current date:  2022-04-09T00:56:37.000000000 ... Ends at:  2022-05-24T00:56:47.000000000
Current date:  2022-04-11T00:46:47.000000000 ... Ends at:  2022-05-24T00:56:47.000000000
Current date:  2022-04-14T00:56:43.000000000 ... Ends at:  2022-05-24T00:56:47.000000000
Current date:  2022-04-16T00:46:42.000000000 ... Ends at:  2022-05-24T00:56:47.000000000
Current date:  2022-04-19T00:56:36.000000000 ... Ends at:  2022-05-24T00:56:47.000000000
Current date:  2022-04-21T00:46:50.000000000 ... Ends at:  2022-05-24T00:56:47.000000000
Current date:  2022-04-24T00:56:45.000000000 ... Ends at:  2022-05-24T00:56:47.000000000
Current date:  2022-04-26T00:46:39.000000000 ... Ends at:  2022-05-24T00:56:47.000000000
Current date:  2022-0

## Create new netCDF for final output

In [110]:
import netCDF4
import pandas as pd

with netCDF4.Dataset('s2_lfmc_noclouds_noshadows.nc', 'w', format='NETCDF4_CLASSIC') as new_ds:
    
    setattr(new_ds, 'crs', 'EPSG:4326')
    
    t_dim = new_ds.createDimension('time', ds.time.size)
    x_dim = new_ds.createDimension('longitude', ds.longitude.size)
    y_dim = new_ds.createDimension('latitude', ds.latitude.size)
    
    var = new_ds.createVariable("time", "f8", ("time",))
    var.units = "seconds since 1970-01-01 00:00:00.0"
    var.calendar = "standard"
    var.long_name = "Time, unix time-stamp"
    var.standard_name = "time"
    var[:] = [netCDF4.date2num([pd.to_datetime(i)], units="seconds since 1970-01-01 00:00:00.0", calendar="standard") for i in ds.time.data]

    var = new_ds.createVariable("longitude", "f8", ("longitude",))
    var.units = "degrees"
    var.long_name = "longitude"
    var.standard_name = "longitude"
    var[:] = ds.longitude.data

    var = new_ds.createVariable("latitude", "f8", ("latitude",))
    var.units = "degrees"
    var.long_name = "latitude"
    var.standard_name = "latitude"
    var[:] = ds.latitude.data

    var = new_ds.createVariable("lfmc", 'f4', ("time", "latitude", "longitude"), fill_value=-9999.9)
    var.units = '%'
    var.long_name = "Live Fuel Moisture Content estimated with Random Forest model"
    for i in range(ds.time.size):
        var[i,:,:] = list_lfmc_arrays[i]


# Option B: Export LFMC not masked; R G B bands and fmask included in output netCDF

## Estimate LFMC

In [111]:
list_lfmc_arrays = list()


for d in ds.time.data:
    print('Current date: ', d, '... Ends at: ', ds.time.data[-1])
    
    ds_2d = ds.sel(time=d)
    
    # Stack and reshape dataset to be compatible with the RF input
    refl = ds_2d[['ndvi','ndii','nbart_red','nbart_green','nbart_blue','nbart_nir_1','nbart_nir_2','nbart_swir_2','nbart_swir_3']].to_array().values

    # Check and mask values not accepted by RF model
    nan_mask = np.isnan(refl) # nan values are not accepted by the RF model. So first they will be replaced with 0 (below), so that the model can run. Then this mask will be applied to restore the nan in the LFMC map
    nan_mask_2d = np.sum(np.where(nan_mask,1,0), axis=0) # if 0, there were no nan values in any of the reflectance bands, if a pixel is > 0 then at least one of the bands in that position was nan
    
    neg_inf_mask = np.where(refl==-inf,1,0) # fix potential inf values in indices, consequence of nan vaues in reflectance bands. Inf values are not accepted by the RF model
    neg_inf_mask_2d = np.sum(neg_inf_mask, axis=0)
    
    pos_inf_mask = np.where(refl==inf,1,0) 
    pos_inf_mask_2d = np.sum(pos_inf_mask, axis=0)
    
    final_mask_2d = nan_mask_2d + neg_inf_mask_2d + pos_inf_mask_2d # probably the nan_mask_2d is actually enough, as inf values in the indeces nan values in refl bands should coincide
    
    refl = np.where(nan_mask, 0, refl) # replace nan values with 0 so that the model can run
    refl = np.where(neg_inf_mask == 1, -9999, refl) # replace -inf values with -9999 so that the model can run
    refl = np.where(pos_inf_mask == 1, 9999, refl) 
    
    refl_rf = refl.reshape((9,-1)).swapaxes(0,1)
    
    # Estimate FMC values using RF model and previous reflectance input
    rf_lfmc = rf.predict(refl_rf)
    lfmc = rf_lfmc.reshape(refl.shape[1:])
    
    lfmc = np.where(final_mask_2d > 0, np.nan, lfmc) #if a pixel is > 0 then at least one of the bands in that position was nan, or -inf/inf in case of the indeces
    
    list_lfmc_arrays.append(lfmc)
  


Current date:  2022-04-01T00:46:48.000000000 ... Ends at:  2022-05-24T00:56:47.000000000
Current date:  2022-04-04T00:56:42.000000000 ... Ends at:  2022-05-24T00:56:47.000000000
Current date:  2022-04-06T00:46:42.000000000 ... Ends at:  2022-05-24T00:56:47.000000000
Current date:  2022-04-09T00:56:37.000000000 ... Ends at:  2022-05-24T00:56:47.000000000
Current date:  2022-04-11T00:46:47.000000000 ... Ends at:  2022-05-24T00:56:47.000000000
Current date:  2022-04-14T00:56:43.000000000 ... Ends at:  2022-05-24T00:56:47.000000000
Current date:  2022-04-16T00:46:42.000000000 ... Ends at:  2022-05-24T00:56:47.000000000
Current date:  2022-04-19T00:56:36.000000000 ... Ends at:  2022-05-24T00:56:47.000000000
Current date:  2022-04-21T00:46:50.000000000 ... Ends at:  2022-05-24T00:56:47.000000000
Current date:  2022-04-24T00:56:45.000000000 ... Ends at:  2022-05-24T00:56:47.000000000
Current date:  2022-04-26T00:46:39.000000000 ... Ends at:  2022-05-24T00:56:47.000000000
Current date:  2022-0

## Create new netCDF for final output

In [112]:
import netCDF4
import pandas as pd

with netCDF4.Dataset('s2_lfmc.nc', 'w', format='NETCDF4_CLASSIC') as new_ds:
    
    setattr(new_ds, 'crs', 'EPSG:4326')
    setattr(new_ds, 'fmask_legend', '0:nodata, 1:valid, 2:cloud, 3:shadow, 4:snow, 5:water')
    
    t_dim = new_ds.createDimension('time', ds.time.size)
    x_dim = new_ds.createDimension('longitude', ds.longitude.size)
    y_dim = new_ds.createDimension('latitude', ds.latitude.size)
    
    var = new_ds.createVariable("time", "f8", ("time",))
    var.units = "seconds since 1970-01-01 00:00:00.0"
    var.calendar = "standard"
    var.long_name = "Time, unix time-stamp"
    var.standard_name = "time"
    var[:] = [netCDF4.date2num([pd.to_datetime(i)], units="seconds since 1970-01-01 00:00:00.0", calendar="standard") for i in ds.time.data]

    var = new_ds.createVariable("longitude", "f8", ("longitude",))
    var.units = "degrees"
    var.long_name = "longitude"
    var.standard_name = "longitude"
    var[:] = ds.longitude.data

    var = new_ds.createVariable("latitude", "f8", ("latitude",))
    var.units = "degrees"
    var.long_name = "latitude"
    var.standard_name = "latitude"
    var[:] = ds.latitude.data
    
    var = new_ds.createVariable("fmask", 'i4', ("time", "latitude", "longitude"))
    var.units = ''
    var.long_name = "Quality mask"
    var[:] = ds.fmask.data
    
    var = new_ds.createVariable("nbart_red", 'f8', ("time", "latitude", "longitude"))
    var.units = ''
    var.long_name = "Red band"
    var[:] = ds.nbart_red.data
    
    var = new_ds.createVariable("nbart_green", 'f8', ("time", "latitude", "longitude"))
    var.units = ''
    var.long_name = "Green band"
    var[:] = ds.nbart_green.data
    
    var = new_ds.createVariable("nbart_blue", 'f8', ("time", "latitude", "longitude"))
    var.units = ''
    var.long_name = "Blue band"
    var[:] = ds.nbart_blue.data

    var = new_ds.createVariable("lfmc", 'f4', ("time", "latitude", "longitude"), fill_value=-9999.9)
    var.units = '%'
    var.long_name = "Live Fuel Moisture Content estimated with Random Forest model"
    for i in range(ds.time.size):
        var[i,:,:] = list_lfmc_arrays[i]


## Create colormap consistent with the current Australian Fuel Monitoring System

In [2]:
lfmc_final = xr.open_dataset('s2_lfmc.nc') ### change file name as needed
lfmc_final

In [10]:
'''
%matplotlib inline
import matplotlib.pyplot as plt
from matplotlib.colors import LinearSegmentedColormap
import numpy as np


for i in range(ds.time.data.size):
    fmc_array = lfmc_final.lfmc.data[i,...]

    colors = [(0.87, 0, 0), (1, 1, 0.73), (0.165, 0.615, 0.957)]  # R -> G -> B
    cmap = LinearSegmentedColormap.from_list('fmc', colors, N=256)
    plt.figure(figsize=(10,10))
    plt.imshow(np.clip(fmc_array, 0, 136), cmap=cmap)
    plt.colorbar()
'''

"\n%matplotlib inline\nimport matplotlib.pyplot as plt\nfrom matplotlib.colors import LinearSegmentedColormap\nimport numpy as np\n\n\nfor i in range(ds.time.data.size):\n    fmc_array = lfmc_final.lfmc.data[i,...]\n\n    colors = [(0.87, 0, 0), (1, 1, 0.73), (0.165, 0.615, 0.957)]  # R -> G -> B\n    cmap = LinearSegmentedColormap.from_list('fmc', colors, N=256)\n    plt.figure(figsize=(10,10))\n    plt.imshow(np.clip(fmc_array, 0, 136), cmap=cmap)\n    plt.colorbar()\n"

In [8]:
'''
for i in range(lfmc_final.time.data.size):
    ds_ = lfmc_final.isel(time=i)
    ds_[['nbart_red', 'nbart_green', 'nbart_blue']].to_array().plot.imshow(robust=True, figsize=(8,8))
'''

"for i in range(lfmc_final.time.data.size):\n    ds_ = lfmc_final.isel(time=i)\n    ds_[['nbart_red', 'nbart_green', 'nbart_blue']].to_array().plot.imshow(robust=True, figsize=(8,8))"

In [11]:
'''
lfmc_final_masked = xr.open_dataset('s2_lfmc_noclouds_noshadows.nc') ### change file name as needed

%matplotlib inline
import matplotlib.pyplot as plt
from matplotlib.colors import LinearSegmentedColormap
import numpy as np


for i in range(lfmc_final_masked.time.data.size):
    fmc_array = lfmc_final_masked.lfmc.data[i,...]

    colors = [(0.87, 0, 0), (1, 1, 0.73), (0.165, 0.615, 0.957)]  # R -> G -> B
    cmap = LinearSegmentedColormap.from_list('fmc', colors, N=256)
    plt.figure(figsize=(10,10))
    plt.imshow(np.clip(fmc_array, 0, 136), cmap=cmap)
    plt.colorbar()
'''

"lfmc_final_masked = xr.open_dataset('s2_lfmc_noclouds_noshadows.nc') ### change file name as needed\n\n%matplotlib inline\nimport matplotlib.pyplot as plt\nfrom matplotlib.colors import LinearSegmentedColormap\nimport numpy as np\n\n\nfor i in range(lfmc_final_masked.time.data.size):\n    fmc_array = lfmc_final_masked.lfmc.data[i,...]\n\n    colors = [(0.87, 0, 0), (1, 1, 0.73), (0.165, 0.615, 0.957)]  # R -> G -> B\n    cmap = LinearSegmentedColormap.from_list('fmc', colors, N=256)\n    plt.figure(figsize=(10,10))\n    plt.imshow(np.clip(fmc_array, 0, 136), cmap=cmap)\n    plt.colorbar()"