# Example of recipe computation, model fit, predict* and conversion to raster using API of antares3 and [kale](https://github.com/kubeflow-kale/kale) functionality

*Prediction is pixel wise.

**Will use an already ingested and processed Landsat8 data via antares3**

## Some imports

In [1]:
import sys

import os
import json
from datetime import datetime

import matplotlib
from matplotlib.patches import Patch
from matplotlib import pyplot as plt
import numpy as np
import xarray as xr
from shapely.geometry import Point
import rasterio
import dill
import geopandas as gpd
import fiona
from affine import Affine
from rasterio.features import rasterize


In [2]:
sys.__stdout__ = sys.stdout

In [3]:
import datacube
from datacube.api import GridWorkflow
from datacube.storage import masking
from datacube.drivers.netcdf import write_dataset_to_netcdf

from madmex.util.db import get_cmap_from_scheme
from madmex.models import Tag
from madmex.overlay.extractions import zonal_stats_xarray
from madmex.io.vector_db import VectorDb
from madmex.wrappers import gwf_query
from madmex.modeling.supervised.xgb import Model
from madmex.models import Tag
from madmex.overlay.extractions import zonal_stats_xarray
from madmex.util import randomword, mid_date, join_dicts
from madmex.util.xarray import to_float, to_int
from django.contrib.gis.geos.geometry import GEOSGeometry
from madmex.models import PredictObject

  return func(*args, **kwargs)
2020-09-07 18:48:41,526 - madmex.wrappers - loggerwriter write:   return func(*args, **kwargs)
  ret = f(ret)
2020-09-07 18:48:42,226 - madmex.wrappers - loggerwriter write:   ret = f(ret)
  out = yaml.load(src)
2020-09-07 19:07:37,665 - madmex.wrappers - loggerwriter write:   out = yaml.load(src)
  description_dict = yaml.load(src)
2020-09-07 19:07:37,679 - madmex.wrappers - loggerwriter write:   description_dict = yaml.load(src)
  return _prepare_from_string(" ".join(pjargs))
2020-09-07 19:07:40,812 - madmex.wrappers - loggerwriter write:   return _prepare_from_string(" ".join(pjargs))
  projstring = _prepare_from_string(" ".join((projstring, projkwargs)))
2020-09-07 19:07:40,814 - madmex.wrappers - loggerwriter write:   projstring = _prepare_from_string(" ".join((projstring, projkwargs)))
  out = yaml.load(src)
2020-09-07 19:08:40,043 - madmex.wrappers - loggerwriter write:   out = yaml.load(src)
  description_dict = yaml.load(src)
2020-09-07 19:08:40,

In [4]:
sys.stdout = sys.__stdout__

## Recipe computation

In [None]:
name_of_recipe_product = 'recipe_chiapas_L8_17'

In [16]:
path_recipe = os.path.join('/shared_volume/datacube_ingest/recipes/', name_of_recipe_product)

In [None]:
print(pat_recipe)

In [None]:
if not os.path.exists(path_recipe):
    os.makedirs(path_recipe)

Following [landsat_madmex_003.py](https://github.com/CONABIO/antares3/blob/develop/madmex/recipes/landsat_madmex_003.py)

In [5]:
os.environ.setdefault("DJANGO_ALLOW_ASYNC_UNSAFE", "true")
sys.__stdout__ = sys.stdout
sys.stdout = sys.__stdout__


In [9]:
region = 'Chiapas'
products = ['ls8_espa_mexico']
begin = '2017-01-01'
end = '2017-12-31'
gwf_kwargs = {'region': region, 
              'begin': begin, 
              'end':end}


In [10]:
#query

dict_list = []
for prod in products:
    gwf_kwargs.update(product = prod)
    try:
        dict_list.append(gwf_query(**gwf_kwargs, view=False))
    # Exception is in case one of the product hasn't been registered in the datacube
    except Exception as e:
        pass
iterable = join_dicts(*dict_list, join='full').items()


list_iter = list(iterable)

list_iter_sorted = sorted(list_iter, key = lambda x: (x[0][0], x[0][1]))

In [12]:
# Select datacube tile index: (54, -38)

tile = [index for index in list_iter_sorted if index[0] == (54, -38)][0]
center_dt = mid_date(datetime.strptime(begin, '%Y-%m-%d'), 
                     datetime.strptime(end, '%Y-%m-%d'))
crs = tile[1][0].geobox.crs
center_dt = center_dt.strftime("%Y-%m-%d")
nc_filename = os.path.join(path_recipe, 'madmex_003_%d_%d_%s.nc' % (tile[0][0], tile[0][1], center_dt))


In [None]:
print(nc_filename)

In [31]:
# Load via Grid Workflow API
ds = xr.combine_by_coords([GridWorkflow.load(x, dask_chunks={'x': 1200, 'y': 1200})
                             for x in tile[1]], data_vars='minimal', coords='minimal')
ds.attrs['geobox'] = tile[1][0].geobox

# Mask clouds, shadow, water, ice,... and drop qa layer
clear = masking.make_mask(ds.pixel_qa, cloud=False, cloud_shadow=False,
                          snow=False)
ds_1 = ds.where(clear)
ds_1 = ds_1.drop('pixel_qa')
ds_1 = ds_1.apply(func=to_float, keep_attrs=True)
# Compute vegetation indices
ds_1['ndvi'] = ((ds_1.nir - ds_1.red) / (ds_1.nir + ds_1.red)) * 10000
ds_1['ndvi'].attrs['nodata'] = -9999
ds_1['ndmi'] = ((ds_1.nir - ds_1.swir1) / (ds_1.nir + ds_1.swir1)) * 10000
ds_1['ndmi'].attrs['nodata'] = -9999
# Run temporal reductions and rename DataArrays
ds_mean = ds_1.mean('time', keep_attrs=True, skipna=True)
ds_mean = ds_mean.rename({'blue': 'blue_mean',
                          'green': 'green_mean',
                          'red': 'red_mean',
                          'nir': 'nir_mean',
                          'swir1': 'swir1_mean',
                          'swir2': 'swir2_mean',
                          'ndmi': 'ndmi_mean',
                          'ndvi': 'ndvi_mean'})
# Compute min/max/std only for vegetation indices
ndvi_max = ds_1.ndvi.max('time', keep_attrs=True, skipna=True)
ndvi_max = ndvi_max.rename('ndvi_max')
ndvi_max.attrs['nodata'] = -9999
ndvi_min = ds_1.ndvi.min('time', keep_attrs=True, skipna=True)
ndvi_min = ndvi_min.rename('ndvi_min')
ndvi_min.attrs['nodata'] = -9999
# ndmi
ndmi_max = ds_1.ndmi.max('time', keep_attrs=True, skipna=True)
ndmi_max = ndmi_max.rename('ndmi_max')
ndmi_max.attrs['nodata'] = -9999
ndmi_min = ds_1.ndmi.min('time', keep_attrs=True, skipna=True)
ndmi_min = ndmi_min.rename('ndmi_min')
ndmi_min.attrs['nodata'] = -9999
# Load terrain metrics using same spatial parameters than sr
dc = datacube.Datacube(app = 'landsat_madmex_003_%s' % randomword(5))
terrain = dc.load(product='srtm_cgiar_mexico', like=ds,
                  time=(datetime(1970, 1, 1), datetime(2018, 1, 1)),
                  dask_chunks={'x': 1200, 'y': 1200})
dc.close()
# Merge dataarrays
combined = xr.merge([ds_mean.apply(to_int),
                     to_int(ndvi_max),
                     to_int(ndvi_min),
                     to_int(ndmi_max),
                     to_int(ndmi_min),
                     terrain])
combined.attrs['crs'] = crs
write_dataset_to_netcdf(combined.compute(scheduler='threads'), nc_filename)

**Next will use:**

[ingest_recipe_products](https://github.com/CONABIO/antares3-sandbox/blob/master/notebooks/ingest_recipe_products/ingest_recipe_products.ipynb)



**Need to create `/shared_volume/.config/madmex/indexing/`:**

```
mkdir -p /shared_volume/.config/madmex/indexing/
cp ~/.config/madmex/indexing/landsat_madmex_003.yaml /shared_volume/.config/madmex/indexing/
```

In [33]:
from madmex.indexing import add_product_from_yaml, add_dataset, metadict_from_netcdf
from madmex.util import yaml_to_dict

In [34]:
yaml_file = '/shared_volume/.config/madmex/indexing/landsat_madmex_003.yaml' 
recipe = 'landsat_madmex_003'

In [38]:
product_description = yaml_to_dict(yaml_file)
args = {'description': product_description,
        'center_dt': datetime.strptime(center_dt, '%Y-%m-%d'),
        'from_dt': datetime.strptime(begin, '%Y-%m-%d'),
        'to_dt': datetime.strptime(end, '%Y-%m-%d'),
        'algorithm': recipe}

pr, dt = add_product_from_yaml(yaml_file, name_of_recipe_product)

result = metadict_from_netcdf(nc_filename, **args)

print("Adding %s to datacube database" % result[0])
r_add_dataset = add_dataset(pr=pr, dt=dt, metadict=result[1], file=result[0])

print(r_add_dataset)

Adding /shared_volume/datacube_ingest/recipes/recipe_chiapas_L8_17/madmex_003_54_-38_2017-07-02.nc to datacube database
None


## Fit model