# Create EOPatch and fill with LC1 imagery and SANLC 2018 reference land cover

---

The `EOPatch` is created by filling it with Sentinel-2 satelite imagery and reference data. We'll use Sentinel Hub services to download and ```eo-learn``` to create the data for training.

We will only use one image. But still be able to visulize and understand how this process works.

An `EOPatch` is created and manipulated using `EOTasks` chained in an `EOWorkflow`. In this example the final `workflow` is a sequence of the following tasks:
1. Create `EOPatch` by filling it with RGB LC1 data
2. Add cloud mask
3. Calculate Vegetation Indices
3. Validate pixels
4. Count number of valid observations per pixel using valid data mask
5. Add reference SANLC to `EOPatch`
6. Save EOPatch to disk
7. Visualize the result.

In [None]:
#from sentinelhub import SHConfig
from sentinelhub import BBoxSplitter, CRS, MimeType, BBox, CustomUrlParam, SHConfig

# In case you haven't put the credentials into the configuration file... 
INSTANCE_ID = ''

if INSTANCE_ID:
    config = SHConfig()
    config.instance_id = INSTANCE_ID
else:
    config = None

In [None]:
# more credentials...

# In case you haven't put the credentials into the configuration file...
CLIENT_ID = ''              
CLIENT_SECRET = ''      

if CLIENT_ID and CLIENT_SECRET:
    config = SHConfig()
    config.sh_client_id = CLIENT_ID
    config.sh_client_secret = CLIENT_SECRET
else:
    config = None

In [None]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [1]:
# Built-in modules
import pickle
import sys
import os
import datetime
import itertools
from aenum import MultiValueEnum
import geopandas as gpd
import matplotlib.pyplot as plt
from IPython.display import Image
from matplotlib.colors import ListedColormap, BoundaryNorm
from mpl_toolkits.axes_grid1 import make_axes_locatable
import numpy as np

from tqdm.auto import tqdm
#tqdm.notebook.tqdm

In [None]:
from eolearn.core import EOTask, EOPatch, LinearWorkflow, FeatureType, OverwritePermission, LoadTask, SaveTask, EOExecutor, ExtractBandsTask
from eolearn.io import ExportToTiff, SentinelHubInputTask, S2L1CWCSInput
from eolearn.mask import AddMultiCloudMaskTask, AddValidDataMaskTask, AddCloudMaskTask, get_s2_pixel_cloud_detector
from eolearn.geometry import VectorToRaster
from eolearn.features import LinearInterpolation, SimpleFilterTask, NormalizedDifferenceIndexTask
from sentinelhub import UtmZoneSplitter, BBox, CRS, DataSource
import eolearn.visualization

In [None]:
import logging
logging.getLogger("requests").setLevel(logging.WARNING)
logging.getLogger("urllib3").setLevel(logging.WARNING)

In [None]:
import pickle

In [None]:
from pathlib import Path

In [None]:
path = Path('./')

In [None]:
sys.path.append('./utils')
from utils.get_s2_sentinelhub import SentinelHubValidData, CountValid, NormalizedDifferenceIndex,ConcatenateData, ValidDataFractionPredicate

In [None]:
if not os.path.exists(path/'data'/'valid_count'):
    os.makedirs(path/'data'/'valid_count')
if not os.path.exists(path/'figs'):
    os.makedirs(path/'figs')

### Get smaller area (25 - 500x500 tiles)

In [None]:
#get small area from previous notebook
with open(path/'data'/'tile-def'/'CoCT_500x500.pickle','rb') as fp:
    bbox_splitter = pickle.load(fp)

bbox_list = bbox_splitter.get_bbox_list()
info_list = bbox_splitter.get_info_list()

# Within the area of interest select a smaller 5x5 area 
#id of center patch
ID = 86

# Obtain surrounding 5x5 patches
patchIDs = []
for idx, [bbox, info] in enumerate(zip(bbox_list, info_list)):
    if (abs(info['index_x'] - info_list[ID]['index_x']) <= 2 and
        abs(info['index_y'] - info_list[ID]['index_y']) <= 2):
        patchIDs.append(idx)

# Check if final size is 5x5
if len(patchIDs) != 5*5:
    print('Warning! Use a different central patch ID, this one is on the border.')
    
# Change the order of the patches
patchIDs = np.transpose(np.fliplr(np.array(patchIDs).reshape(5, 5))).ravel()

In [None]:
#print the shape and dimension of the array
print(patchIDs.shape, patchIDs.ndim)

In [None]:
#export patchIDs with np.tofile - we'll  need this in the next notebook
import numpy as np
patchIDs.tofile('./data/tile-def/patchIDs.csv')

In [None]:
print(patchIDs)

# eo-learn Workflow to create patches

### Define what makes a pixel valid

To create ````EOPatches````; fill them with Sentinel-2 data using Sentinel Hub services. We will add the following data to each EOPatch:

    L1C custom list of bands [B02, B03, B04, B08, B11, B12], which corresponds to [B, G, R, NIR, SWIR1, SWIR2] wavelengths.

    SentinelHub's cloud mask

Additionally, we will add:

    Calculated NDVI, NDWI, and NDBI information

    A mask of validity, based on acquired data from Sentinel and cloud coverage. Valid pixel is if:
        IS_DATA == True
        CLOUD_MASK == 0 (1 indicates cloudy pixels and 255 indicates NO_DATA)

In [None]:
class SentinelHubValidData:
    """
    Combine Sen2Cor's classification map with `IS_DATA` to define a `VALID_DATA_SH` mask
    The SentinelHub's cloud mask is asumed to be found in eopatch.mask['CLM']
    """
    def __call__(self, eopatch):        
        return np.logical_and(eopatch.mask['IS_DATA'].astype(np.bool), 
                              np.logical_not(eopatch.mask['CLM'].astype(np.bool)))
    
class CountValid(EOTask):   
    """
    The task counts number of valid observations in time-series and stores the results in the timeless mask.
    """
    def __init__(self, count_what, feature_name):
        self.what = count_what
        self.name = feature_name
        
    def execute(self, eopatch):
        eopatch.add_feature(FeatureType.MASK_TIMELESS, self.name, np.count_nonzero(eopatch.mask[self.what],axis=0))
        
        return eopatch

### Define custom tasks

## Initialise tasks

In [None]:
# TASK FOR BAND DATA
# add a request for B(B02), G(B03), R(B04), NIR (B08), SWIR1(B11), SWIR2(B12)
# from default layer 'ALL_BANDS' at 10m resolution
# Here we also do a simple filter of cloudy scenes. A detailed cloud cover
# detection is performed in the next step
custom_script = 'return [B02, B03, B04, B08, B11, B12];'
add_data = S2L1CWCSInput(
    layer='BANDS-S2-L1C',
    feature=(FeatureType.DATA, 'BANDS'), # save under name 'BANDS'
    custom_url_params={CustomUrlParam.EVALSCRIPT: custom_script}, # custom url for 6 specific bands
    resx='10m', # resolution x
    resy='10m', # resolution y
    maxcc=0.8, # maximum allowed cloud cover of original ESA tiles
)
#[B02, B03, B04, B08, B11, B12], which corresponds to [B, G, R, NIR, SWIR1, SWIR2]

# TASK FOR CLOUD INFO
# cloud detection is performed at 80m resolutionnand the resulting cloud probability map and mask
# are scaled to EOPatch's resolution
cloud_classifier = get_s2_pixel_cloud_detector(average_over=2, dilation_size=1, all_bands=False)
add_clm = AddCloudMaskTask(cloud_classifier, 'BANDS-S2CLOUDLESS', cm_size_y='80m', cm_size_x='80m',
                           cmask_feature='CLM', # cloud mask name
                           cprobs_feature='CLP' # cloud prob. map name
                          )

# TASKS FOR CALCULATING NEW FEATURES
# NDVI: (B08 - B04)/(B08 + B04)
# NDWI: (B03 - B08)/(B03 + B08)
# NORM: sqrt(B02^2 + B03^2 + B04^2 + B08^2 + B11^2 + B12^2)
ndvi = NormalizedDifferenceIndex('NDVI', 'BANDS/3', 'BANDS/2')
ndwi = NormalizedDifferenceIndex('NDWI', 'BANDS/1', 'BANDS/3')
ndbi = NormalizedDifferenceIndex('NDBI', 'BANDS/4', 'BANDS/3')
# TASK FOR VALID MASK
# validate pixels using SentinelHub's cloud detection mask and region of acquisition
add_sh_valmask = AddValidDataMaskTask(SentinelHubValidData(),
                                      'IS_VALID' # name of output mask
                                     )

# TASK FOR COUNTING VALID PIXELS
# count number of valid observations per pixel using valid data mask
count_val_sh = CountValid('IS_VALID', # name of existing mask
                          'VALID_COUNT' # name of output scalar
                         )

# TASK FOR SAVING TO OUTPUT (if needed)
path_out = './data/eopatches/'
if not os.path.isdir(path_out):
    os.makedirs(path_out)
save = SaveTask(path_out, overwrite_permission=OverwritePermission.OVERWRITE_PATCH)

#### Reference map task

For this example, the [South African National Land Cover (2018)](https://www.environment.gov.za/projectsprogrammes/egis_landcover_datasets) will serve as reference.

We define a land cover enum class for each type.

The point of this task is to create a raster mask from the vector polygons and add it to the `EOPatch`. With this procedure, any kind of a labeled shapefile can be transformed into a raster reference map. This result is achieved with the existing task `VectorToRaster` from the `eolearn.geometry` package. All polygons belonging to the each of the classes are separately burned to the raster mask.

In [None]:
class LndC(MultiValueEnum):
    """ 
    Enum class containing LandCover types
    """
    Woodland_and_Forest             = 'Woodland and Forest',  1, '#008000'
    Shrub_and_Grassland             = 'Shrub and Grassland)', 2, '#9370DB'
    Water                           = 'Water',                3, '#000080'
    Mines                           = 'Mines',                4, '#8B0000'
    Wetlands                        = 'Wetlands',             5, '#00CED1'
    Bare_Non_Vegetated              = 'Bare Non-Vegetated',   6, '#FFFACD'
    Cultivated_Commercial           = 'Cultivated_Commercial',7, '#DC143C'
    Fallow_land                     = 'Fallow land',          8, '#F08080'
    Formal_Residential              = 'Formal Residential',   9, '#FFA500'
    Informal_Residential            = 'Informal Residential', 10, '#FF69B4'
    Village                         = 'Village',              11, '#FF8C00'
    Smallholding                    = 'Smallholding',         12, '#DDA0DD'
    Urban_Recreation                = 'Urban Recreation',     13, '#7FFF00'
    Commercial                      = 'Commercial',           14, '#DAA520'
    Industrial                      = 'Industrial',           15, '#B8860B'
    Major_Road_and_Rail             = 'Major Road and Rail',  16, '#FFD700'
    
    @property
    def id(self):
        """ Returns an ID of an enum type
        :return: An ID
        :rtype: int
        """
        return self.values[1]

    @property
    def color(self):
        """ Returns class color
        :return: A color in hexadecimal representation
        :rtype: str
        """
        return self.values[2]

def get_bounds_from_ids(ids):
    bounds = []
    for i in range(len(ids)):
        if i < len(ids) - 1:
            if i == 0:
                diff = (ids[i + 1] - ids[i]) / 2
                bounds.append(ids[i] - diff)
            diff = (ids[i + 1] - ids[i]) / 2
            bounds.append(ids[i] + diff)
        else:
            diff = (ids[i] - ids[i - 1]) / 2
            bounds.append(ids[i] + diff)
    return bounds 

# Reference colormap things
lulc_bounds = get_bounds_from_ids([x.id for x in LndC])
lulc_cmap = ListedColormap([x.color for x in LndC], name="lulc_cmap")
lulc_norm = BoundaryNorm(lulc_bounds, lulc_cmap.N)

In [None]:
# depending on the size of the reference data: this might take some time
land_use_ref_path = os.path.join(path/'data/', 'SANLC_2018_UTM32734.shp')
land_use_ref = gpd.read_file(land_use_ref_path)

rasterization_task = VectorToRaster(land_use_ref, (FeatureType.MASK_TIMELESS, 'LndC'),
                                    values_column='DN', 
                                    raster_shape=(FeatureType.MASK,'IS_DATA'),raster_dtype=np.uint8)

## Define workflow

All the tasks thus far create and fill an ```EOPatch```. The tasks need to be put in some order and executed one by one. This can be achieved by manually executing the tasks, or more conveniently, defining an ```EOWorkflow```.

The following workflow is created and executed:

    Create EOPatch with band and cloud data
    Calculate and add NDVI, NDWI, NORM
    Add mask of valid pixels
    Add scalar feature representing the count of valid pixels
    Save eopatches

Here the workflow is linear and is available as LinearWorkflow

In [None]:
# Define the workflow
workflow = LinearWorkflow(
    add_data,
    add_clm,
    ndvi,
    ndwi,
    ndbi,
    add_sh_valmask,
    count_val_sh,
    rasterization_task,
    save
)

# Let's visualize it
workflow.dependency_graph()

#### Execute the workflow on ONE tile

In [None]:
%%time

#change the range as resources permit
time_interval = ['2020-02-28','2020-03-02']

#the ID is set to the center of the selected area
bbox = bbox_splitter.bbox_list[ID]
info = bbox_splitter.info_list[ID]

patch_name = f'eopatch_{ID}'
results = workflow.execute({add_data:{'bbox':bbox, 'time_interval':time_interval},
                            save:{'eopatch_folder':patch_name}
                           })

In [None]:
#what did it save/do?
x = os.listdir("./data/eopatches/")
print(x)

In [None]:
#what is in this folder?
EOPatch.load(os.path.join('./data/eopatches/', x[0]))

#### Check the content of the first EOPatch

In [None]:
patch = list(results.values())[-1]

In [None]:
#What is in this Patch?
patch.get_features()

In [None]:
#check the date 
patch.timestamp

#### Plot RGB, SCL, Cloud probability, and valid pixels (observations)

In [None]:
def plot_frame(patch, idx, save_fig=True):
    fig, axs = plt.subplots(nrows=3, ncols=2, figsize=(25,25)) 

    axs[0,0].imshow(np.clip(patch.data['BANDS'][idx][..., [2, 1, 0]] * 3.5, 0, 1))
    axs[0,0].set_title(f'RGB {patch.timestamp[idx]}') 
    axs[0,1].imshow(patch.data['NDBI'][idx,...,0], cmap=plt.cm.inferno)
    axs[0,1].set_title(f'Built-up Index {patch.timestamp[idx]}')
    axs[1,0].imshow(patch.data['NDVI'][idx].squeeze(),cmap=plt.get_cmap('YlGn'))
    axs[1,0].set_title(f'Vegatation Index {patch.timestamp[idx]}') 
 
    divider = make_axes_locatable(axs[1,1])
    cax = divider.append_axes('right', size='5%', pad=0.05)
    im = axs[1,1].imshow(patch.data['CLP'][idx,...,0],cmap=plt.cm.magma, vmin=0.0, vmax=1.0)
    fig.colorbar(im, cax=cax, orientation='vertical')
    
    axs[1,1].imshow(patch.data['CLP'][idx,...,0],cmap=plt.cm.inferno)
    axs[1,1].set_title(f'Cloud Probability {patch.timestamp[idx]}')
    
    axs[2,0].imshow(patch.mask['IS_VALID'].squeeze(),cmap=plt.cm.inferno)
    axs[2,0].set_title(f'Valid Pixels {patch.timestamp[idx]}') 
    axs[2,1].imshow(patch.mask_timeless['LndC'].squeeze(), cmap=lulc_cmap, norm=lulc_norm)
    axs[2,1].set_title(f'Land Cover {patch.timestamp[idx]}')
    
    #fig.subplots_adjust(wspace=0.0, hspace=0.0)
    #axs.set_aspect("auto")
    plt.show()
    
    if save_fig:
        fig.savefig(f'figs/patch_{idx}.png', bbox_inches='tight')
        fig.clf()

In [None]:
#plot the satelite imagery, valid pixels, scene classification and cloud probability
plot_frame(patch, 0)

![title](figs/patch_0.png)

## Run workflow on all tiles

In [None]:
workflow = LinearWorkflow(
    add_data,
    add_clm,
    ndvi,
    ndwi,
    ndbi,
    add_sh_valmask,
    count_val_sh,
    rasterization_task,
    save)

In [None]:
def execute_workflow(tile_idx):
    bbox = bbox_splitter.bbox_list[tile_idx]
    info = bbox_splitter.info_list[tile_idx]
    patch_name = f'eopatch_{tile_idx}'
    
    results = workflow.execute({add_data:{'bbox':bbox, 'time_interval':time_interval},
                                save:{'eopatch_folder':patch_name}
                               })
    del results

In [None]:
%%time

pbar = tqdm(total=len(patchIDs))

for i in patchIDs:
    execute_workflow(i)
    pbar.update(1)

## Lets have a look at what we have.

#### RGB Color Image

In [None]:
# Draw the RGB image
path_out = './data/eopatches'
fig, axes = plt.subplots(nrows=5, ncols=5, figsize=(15, 15))

date = datetime.datetime(2020,3,1)

for i, ax in zip(patchIDs, axes.flatten()):
    eopatch = EOPatch.load(f'{path_out}/eopatch_{i}', lazy_loading=True)
    dates = np.array(eopatch.timestamp)
    closest_date_id = np.argsort(abs(date-dates))[0]
    ax.imshow(np.clip(eopatch.data['BANDS'][closest_date_id][..., [2, 1, 0]] * 3.5, 0, 1))
    ax.set_xticks([])
    ax.set_yticks([])
    ax.set_aspect("auto")

fig.subplots_adjust(wspace=0, hspace=0)
fig.savefig(f'figs/RGB_area.png', bbox_inches='tight')

![title](figs/RGB_area.png)

#### Map of Valid Pixels

In [None]:
# Draw the Valid Pixels
#path_out = './data/test_eopatch'

fig, axes = plt.subplots(nrows=5, ncols=5, figsize=(15, 15))

for i, ax in zip(patchIDs, axes.flatten()):
    eopatch = EOPatch.load(f'{path_out}/eopatch_{i}', lazy_loading=True)
    ax.imshow(eopatch.mask_timeless['VALID_COUNT'].squeeze(),
                      cmap=plt.cm.inferno, vmin=0, vmax=np.max(eopatch.mask_timeless['VALID_COUNT']))
    ax.set_xticks([])
    ax.set_yticks([])
    ax.set_aspect("auto")

fig.subplots_adjust(wspace=0, hspace=0)
fig.savefig(f'figs/ValidPixels.png', bbox_inches='tight')

![title](figs/ValidPixels.png)

#### Built-Up Index

In [None]:
# Draw the Valid Pixels

fig, axes = plt.subplots(nrows=5, ncols=5, figsize=(15, 15))

for i, ax in zip(patchIDs, axes.flatten()):
    eopatch = EOPatch.load(f'{path_out}/eopatch_{i}', lazy_loading=True)
    ax.imshow(eopatch.data['NDBI'].squeeze(), vmin=0, vmax=0.85, cmap=plt.cm.viridis)# np.max(eopatch.data['NDBI']),cmap=plt.cm.jet)
    ax.set_xticks([])
    ax.set_yticks([])
    ax.set_aspect("auto")

fig.subplots_adjust(wspace=0, hspace=0)
fig.savefig(f'figs/NDBI.png', bbox_inches='tight')

![title](figs/NDBI.png)

#### Vegetation Indices

In [None]:
fig, axes = plt.subplots(nrows=5, ncols=5, figsize=(15, 15))
    
path_out = './data/eopatches'
    
for i, ax in zip(patchIDs, axes.flatten()):
    eopatch = EOPatch.load(f'{path_out}/eopatch_{i}', lazy_loading=True)
    #ax = axs[i//5][i%5]
    #ndvi = eopatch.data['NDVI']
    #mask = eopatch.mask['IS_VALID']
    #ndvi[~mask] = np.nan
    #ndvi_mean = np.nanmean(ndvi, axis=0).squeeze()
    im = ax.imshow(eopatch.data['NDVI'].squeeze(),cmap=plt.get_cmap('YlGn'))
    ax.set_xticks([])
    ax.set_yticks([])
    ax.set_aspect("auto")
    #del eopatch

fig.subplots_adjust(wspace=0, hspace=0)

cb = fig.colorbar(im, ax=axs.ravel().tolist(), orientation='horizontal', pad=0.01, aspect=100)
cb.ax.tick_params(labelsize=20) 
fig.savefig(f'figs/NDVI.png', bbox_inches='tight')

![title](figs/NDVI.png)

#### South African Nationl Land Cover - 2018

In [None]:
# Draw SANLC
fig, axes = plt.subplots(nrows=5, ncols=5, figsize=(20, 20))

for i, ax in zip(patchIDs, axes.flatten()):
    eopatch = EOPatch.load(f'{path_out}/eopatch_{i}', lazy_loading=True)
    #dates = np.array(eopatch.timestamp)
    #closest_date_id = np.argsort(abs(date-dates))[0]
    im = ax.imshow(eopatch.mask_timeless['LndC'].squeeze(), cmap=lulc_cmap, norm=lulc_norm)
    ax.set_xticks([])
    ax.set_yticks([])
    ax.set_aspect("auto")

fig.subplots_adjust(wspace=0, hspace=0)

cb = fig.colorbar(im, ax=axes.ravel().tolist(), orientation='vertical', pad=0.02, aspect=90)
cb.ax.tick_params(labelsize=15) 
cb.set_ticks([entry.id for entry in LndC])
cb.ax.set_xticklabels([entry.name for entry in LndC], rotation=45, fontsize=15)
fig.savefig(f'figs/LandCover.png', bbox_inches='tight')

![title](figs/LandCover.png)

#### Overlay Land Cover and RGB

In [None]:

eopatch = EOPatch.load(f'{path_out}/eopatch_{86}')

fig, axs = plt.subplots(figsize=(15,15)) 

axs.imshow(np.clip(eopatch.data['BANDS'][0][..., [2, 1, 0]] * 3.5, 0, 1))
im = axs.imshow(eopatch.mask_timeless['LndC'].squeeze(), cmap=lulc_cmap, norm=lulc_norm, alpha = 0.4)

cb = fig.colorbar(im, orientation='horizontal', pad=0.01, aspect=100)
cb.ax.tick_params(labelsize=11) 
cb.set_ticks([entry.id for entry in LndC])
cb.ax.set_xticklabels([entry.name for entry in LndC], rotation=30, fontsize=11)
plt.show()
    
fig.savefig(f'figs/LCover_over_RGB.png', bbox_inches='tight')

![title](figs/patch_159.png)