### Detecting boundaries for given AOI

In [1]:
import os
import shutil
import cv2
import rasterio
import pandas as pd
import numpy as np
import geopandas as gpd
import rasterio.mask
import tempfile

from tqdm import tqdm
from os.path import join, basename, split
from skimage import measure
from scipy.ndimage import rotate
from rasterio.features import rasterize, shapes
from shapely.geometry import Polygon, shape
from pathlib import Path

from sentinel2download.downloader import Sentinel2Downloader
from preprocessing import preprocess_sentinel_raw_data, read_raster


import warnings
warnings.filterwarnings('ignore')
import logging
logger = logging.getLogger()
logger.setLevel(logging.INFO)

In [2]:
def get_tiles(aoi_path, sentinel_tiles_path, date):
    '''
    Returns Sentinel-2 tiles that intersects with specified AoI.

        Parameters:
            aoi_path (str): Path to geojson/shp file with AoI to process.
            sentinel_tiles_path (str): Path to geojson/shp file with all Sentinel-2 tiles.
            date (str): Date in %Y%m%d format.

        Returns:
            date_tile_info (GeoDataFrame): Filtered tiles (tileID, geometry, date).
    '''
    aoi_file = gpd.read_file(aoi_path)
    sentinel_tiles = gpd.read_file(sentinel_tiles_path)
    sentinel_tiles.set_index("Name", drop=False, inplace=True)

    best_interseciton = {"tileID": [], "geometry": []}
    rest_aoi = aoi_file.copy()

    while rest_aoi.area.sum() > 0:
        res_intersection = gpd.overlay(rest_aoi, sentinel_tiles, how="intersection")
        biggest_area_idx = res_intersection.area.argmax()

        tileID = res_intersection.loc[biggest_area_idx, "Name"]
        this_aoi = res_intersection.loc[biggest_area_idx, "geometry"]

        best_interseciton["tileID"].append(tileID)
        best_interseciton["geometry"].append(this_aoi)

        biggest_intersection = sentinel_tiles.loc[[tileID]]
        rest_aoi = gpd.overlay(rest_aoi, biggest_intersection, how="difference")
        sentinel_tiles = sentinel_tiles.loc[res_intersection["Name"]]

    date_tile_info = gpd.GeoDataFrame(best_interseciton)
    date_tile_info["img_date"] = date
    date_tile_info.crs = aoi_file.crs
    
    return date_tile_info


In [3]:
def find_segmentation_mask_with_multiple_th(image_path, thresholds, min_edge_size, min_obj_size, th_size=(11, 3)):
    '''
    Run segmentation on the given raster to find edges. 
    Combine multiple predictions with different thresholds with cv2.adaptiveThreshold.

        Parameters:
            image_path (str): Path to raster which will be processed.
            thresholds (:obj:`list` of :obj:`float`): List of thresholds used to filter weak edges.
            min_edge_size (int): Edges with lower size will be filtered.
            min_obj_size (int): Instances with lower size will be filtered.
            th_size (:obj:`tuple` of :obj:`int`): Size of cv2.adaptiveThreshold filter.

        Returns:
            img (numpy.array): Binary image with segmented instances.
            meta (dict): Source raster metadata
    '''
    bands, meta = read_raster(image_path)
    results = [None] * len(thresholds)
    for i, th in enumerate(thresholds):
        results[i], _ = find_segmentation_mask(image_path, th, min_edge_size, min_obj_size)
    combined = np.mean(results, 0).astype(np.uint8)
    thresholded = cv2.adaptiveThreshold(
        combined,
        255,
        cv2.ADAPTIVE_THRESH_MEAN_C,
        cv2.THRESH_BINARY,
        *th_size
    )
    labeled = label_detected_instances(thresholded)
    labeled = remove_background(labeled, bands)
    labeled[thresholded == 0] = 0
    img = convert_to_binary(labeled)

    return img, meta


def find_segmentation_mask(image_path, edge_combination_th, min_edge_size, min_obj_size):
    '''
    Run segmentation on the given raster to find edges.

        Parameters:
            image_path (str): Path to raster which will be processed.
            edge_combination_th (float): Threshold that used to filter weak edges.
            min_edge_size (int): Edges with lower size will be filtered.
            min_obj_size (int): Instances with lower size will be filtered.

        Returns:
            binary_img (numpy.array): Binary image with segmented instances.
            meta (dict): Source raster metadata
    '''
    bands, meta = read_raster(image_path)
    avg_std = compute_image_standard_deviation(bands)
    filter_list = create_edging_filters()
    edges = apply_edging_filters(avg_std, filter_list)
    edge_direction_list = combine_edges_layers(avg_std, edges, edge_combination_th)
    edge_direction_list = remove_short_edges(edge_direction_list, min_edge_size)
    binary_mask = edges_union(edge_direction_list)
    labeled_image = label_detected_instances(binary_mask)
    labeled_image = remove_small_objects(labeled_image, min_obj_size)
    labeled_image = remove_background(labeled_image, bands)
    binary_img = convert_to_binary(labeled_image)

    return binary_img, meta


def compute_image_standard_deviation(bands, kernel_size=(5, 5)):
    std_list = [
        find_edges_with_standard_deviation(b, kernel_size) for b in bands
    ]
    return np.mean(std_list, 0)


def find_edges_with_standard_deviation(sample, filter_size=(3, 3)):
    mean = cv2.blur(sample, filter_size)
    mean_sqr = cv2.blur(sample * sample, filter_size)
    std = cv2.sqrt(mean_sqr - mean*mean)
    return std


def create_edging_filters(length=13, count=16):
    base_filter = np.array([
        [-1] * length,
        [1] * length,
        [0] * length,
    ])
    filter_list = [None] * count * 2
    filter_list[0] = base_filter
    step = 180 / count

    for i in range(count // 2):
        filter_ = rotate(base_filter, i * step, order=0)
        filter_list[i*4] = filter_
        for j in range(1, 4):
            filter_list[i*4+j] = np.rot90(filter_, j)

    return filter_list


def apply_edging_filters(sample, filter_list):
    return [cv2.filter2D(sample, -1, f) for f in filter_list]


def combine_edges_layers(avg_std, edges, th):
    filter_count = len(edges) // 2
    local_max_left = [None] * filter_count
    local_max_right = [None] * filter_count

    for i in range(filter_count // 2):
        local_max_left[2*i] = cv2.bitwise_and(avg_std, edges[4*i])
        local_max_left[2*i+1] = cv2.bitwise_and(avg_std, edges[4*i+1])
        local_max_right[2*i] = cv2.bitwise_and(avg_std, edges[4*i+2])
        local_max_right[2*i+1] = cv2.bitwise_and(avg_std, edges[4*i+3])

    local_max_left = np.asanyarray(local_max_left)
    local_max_right = np.asanyarray(local_max_right)

    combined_result = np.zeros_like(local_max_left, np.uint8)
    combined_result[
        (local_max_left > 0) &
        (local_max_right > 0) &
        (local_max_left + local_max_right > th)
    ] = 1

    return combined_result


def remove_short_edges(edge_direction_list, min_edge_size):
    res = [None] * len(edge_direction_list)
    for i, edge_direction in enumerate(edge_direction_list):
        labeled_image = label_detected_instances(edge_direction, 0, 2)
        labels, counts_labels = np.unique(labeled_image, return_counts=True)
        edge_direction = np.isin(
            labeled_image,
            labels[counts_labels > min_edge_size]
        ) & (edge_direction > 0)

        res[i] = edge_direction.astype(np.uint8)

    return res


def edges_union(edges):
    result = edges[0]
    for e in edges[1:]:
        result = cv2.bitwise_or(result, e)

    return result


def label_detected_instances(binary_mask, background=1, connectivity=2):
    return measure.label(
        binary_mask,
        background=background,
        connectivity=connectivity
    )


def remove_small_objects(labeled_image, min_obj_size):
    labels, counts_labels = np.unique(labeled_image, return_counts=True)
    labeled_image[np.isin(
        labeled_image,
        labels[counts_labels < min_obj_size]
    )] = 0
    return labeled_image


def remove_background(prediction, bands):
    mask = np.sum(bands, axis=0) == 0
    prediction[mask] = 0
    return prediction


def convert_to_binary(img):
    img[img > 0] = 255
    return img.astype(np.uint8)


def polygonize(binary_img, meta, transform=True):
    polygons = shapes(
        binary_img, 
        binary_img, 
        transform=meta["transform"],
        connectivity=8
    )
    return [shape(poly) for poly, _ in polygons]


def save_polygons(polygons, crs, save_path, dst_crs="EPSG:4326"):
    if len(polygons) == 0:
        print('No polygons detected.')
        return

    directory = os.path.dirname(save_path)
    if not os.path.exists(directory):
        os.makedirs(directory)
    
    gdf = gpd.GeoDataFrame(polygons)
    gdf.crs = meta['crs']
    gdf.to_crs(dst_crs, inplace=True)
    
    save_path = save_path + ".temp" 
    gdf.to_file(save_path, driver='GeoJSON')
    os.rename(save_path, save_path[:-5])
    
    return gdf

### Find tile indexes

In [4]:
BASE = f"/home/{os.getenv('NB_USER')}/work"

API_KEY = os.path.join(BASE, ".secret/sentinel2_google_api_key.json")
LOAD_DIR = os.path.join(BASE, "satellite_imagery")
RESULTS_DIR = os.path.join(BASE, "results/pbd")

PBD_DIR = os.path.join(BASE, "notebooks/pbd")

BANDS = {'TCI', 'B08', }

# CONSTRAINTS = {'NODATA_PIXEL_PERCENTAGE': 15.0, 'CLOUDY_PIXEL_PERCENTAGE': 40.0, }
CONSTRAINTS = {'NODATA_PIXEL_PERCENTAGE': 10.0, 'CLOUDY_PIXEL_PERCENTAGE': 5.0, }

DATE = "2020-09-23" #"2019-06-04"

In [5]:
aoi_path = os.getenv("AOI", os.path.join(BASE, "notebooks/pbd/Pechenihy.geojson")) 
# "notebooks/pbd/plot_boundaries_20190604.geojson"
if not aoi_path:
    raise RuntimeError("Add AOI env var for calculations")
    
aoi_path

'/home/jovyan/work/notebooks/pbd/Pechenihy.geojson'

In [6]:
sentinel_tiles_path = "sentinel2grid.geojson"

#### Find overlap tiles

In [7]:
date_tile_info = get_tiles(aoi_path, sentinel_tiles_path, DATE)
date_tile_info

Unnamed: 0,tileID,geometry,img_date
0,36UYA,"POLYGON Z ((36.78360 50.04479 0.00000, 36.7822...",2020-09-23


### Download data

In [8]:
def load_images(api_key, tiles, date, output_dir):
    loader = Sentinel2Downloader(api_key)
    loadings = dict()
    for tile in tiles:
        print(f"Loading images for tile: {tile}...")
        loaded = loader.download('L2A',
                                 [tile],
                                 start_date=date,
                                 end_date=date,
                                 output_dir=output_dir,                       
                                 bands=BANDS,
                                constraints=CONSTRAINTS)
        
        print(f"Loading images for tile {tile} finished")
        loadings[tile] = loaded
    return loadings

In [9]:
loadings = load_images(API_KEY, date_tile_info.tileID.values, DATE, LOAD_DIR)

if not loadings:
    raise ValueError("Images not loaded. Change date or constraints")

Loading images for tile: 36UYA...
Loading images for tile 36UYA finished


In [10]:
loadings

{'36UYA': [('/home/jovyan/work/satellite_imagery/S2B_MSIL2A_20200923T083659_N0214_R064_T36UYA_20200925T161337/T36UYA_20200923T083659_TCI_10m.jp2',
   'L2/tiles/36/U/YA/S2B_MSIL2A_20200923T083659_N0214_R064_T36UYA_20200925T161337.SAFE/GRANULE/L2A_T36UYA_A018537_20200923T083655/IMG_DATA/R10m/T36UYA_20200923T083659_TCI_10m.jp2'),
  ('/home/jovyan/work/satellite_imagery/S2B_MSIL2A_20200923T083659_N0214_R064_T36UYA_20200925T161337/T36UYA_20200923T083659_B08_10m.jp2',
   'L2/tiles/36/U/YA/S2B_MSIL2A_20200923T083659_N0214_R064_T36UYA_20200925T161337.SAFE/GRANULE/L2A_T36UYA_A018537_20200923T083655/IMG_DATA/R10m/T36UYA_20200923T083659_B08_10m.jp2'),
  ('/home/jovyan/work/satellite_imagery/S2B_MSIL2A_20200923T083659_N0214_R064_T36UYA_20200925T161337/MTD_TL.xml',
   'L2/tiles/36/U/YA/S2B_MSIL2A_20200923T083659_N0214_R064_T36UYA_20200925T161337.SAFE/GRANULE/L2A_T36UYA_A018537_20200923T083655/MTD_TL.xml')]}

### Process data

In [11]:
th = [10., 13., 16., 19., 21.]
min_edge_size = 200
min_obj_size = 2000

origin_name = os.path.basename(aoi_path).replace(".geojson", "")

In [13]:
result_df = pd.DataFrame([])

with tempfile.TemporaryDirectory(dir=PBD_DIR) as tmpdirname:   
    for i, tile in tqdm(date_tile_info.iterrows(), total=date_tile_info.shape[0]):
        try:
            tile_folder = Path(loadings[tile.tileID][0][0]).parent
            print(tile_folder)
        except Exception as ex:
            print(f"Error for {tile.tileID}: {str(ex)}")
        else:
            print(f"Processing {tile.tileID}...")
    
            raster_path = preprocess_sentinel_raw_data(
                save_path=tmpdirname,
                tile_folder=tile_folder,
                aoi_mask=date_tile_info.loc[[i]]
            )
    
            if len(th) == 1:
                img, meta = find_segmentation_mask(
                    raster_path, th[0],
                    min_edge_size, min_obj_size
                )
            else:
                img, meta = find_segmentation_mask_with_multiple_th(
                    raster_path, th,
                    min_edge_size, min_obj_size
                )
                df = pd.DataFrame({"geometry": polygonize(img, meta)})
                df["id"] = pd.Series(map(lambda x: f"{origin_name}_{x}", df.index.values))
                df["tileID"] = tile.tileID
                df["start_date"] = tile.img_date
                df["end_date"] = tile.img_date
        

            result_df = pd.concat([result_df, df])
            
            print(f"Finished processing {tile.tileID}")
    # print(result_df)

save_path = os.path.join(RESULTS_DIR, f"{origin_name}_prediction.geojson")
save_polygons(result_df, meta['crs'], save_path)

  0%|          | 0/1 [00:00<?, ?it/s]

/home/jovyan/work/satellite_imagery/S2B_MSIL2A_20200923T083659_N0214_R064_T36UYA_20200925T161337
Processing 36UYA...


100%|██████████| 1/1 [06:49<00:00, 409.40s/it]

Finished processing 36UYA





Unnamed: 0,geometry,id,tileID,start_date,end_date
0,"POLYGON ((36.78341 50.04470, 36.78339 50.04452...",Pechenihy_0,36UYA,2020-09-23,2020-09-23
1,"POLYGON ((36.78444 50.04007, 36.78444 50.03998...",Pechenihy_1,36UYA,2020-09-23,2020-09-23
2,"POLYGON ((36.79111 50.03959, 36.79110 50.03950...",Pechenihy_2,36UYA,2020-09-23,2020-09-23
3,"POLYGON ((36.78399 50.03964, 36.78398 50.03955...",Pechenihy_3,36UYA,2020-09-23,2020-09-23
4,"POLYGON ((36.79429 50.03912, 36.79427 50.03894...",Pechenihy_4,36UYA,2020-09-23,2020-09-23
...,...,...,...,...,...
2272,"POLYGON ((36.68955 49.85554, 36.68954 49.85545...",Pechenihy_2272,36UYA,2020-09-23,2020-09-23
2273,"POLYGON ((36.70047 49.85456, 36.70046 49.85447...",Pechenihy_2273,36UYA,2020-09-23,2020-09-23
2274,"POLYGON ((36.68298 49.85512, 36.68297 49.85503...",Pechenihy_2274,36UYA,2020-09-23,2020-09-23
2275,"POLYGON ((36.78288 50.03977, 36.78287 50.03968...",Pechenihy_2275,36UYA,2020-09-23,2020-09-23
