### Genrates .geojson with boundaries for given AOI

In [1]:
import os
from os.path import join, basename, split
import shutil

import cv2
# import imageio
import rasterio
import pandas as pd
import numpy as np
from tqdm import tqdm
import rasterio.mask
import geopandas as gpd
from skimage import measure
from scipy.ndimage import rotate
from rasterio.features import rasterize, shapes
from shapely.geometry import Polygon, shape

from sentinel2download.downloader import Sentinel2Downloader
from sentinel2download.overlap import Sentinel2Overlap

In [2]:
import warnings
warnings.filterwarnings('ignore')
import logging
logger = logging.getLogger()
logger.setLevel(logging.INFO)

In [2]:
from sentinel_download import SentinelDownload
from settings import DOWNLOADED_IMAGES_DIR
from preprocessing import preprocess_sentinel_raw_data, read_raster

In [12]:
aoi_file = gpd.read_file(aoi_path)
aoi_file

Unnamed: 0,id,geometry
0,tmp,"POLYGON ((-83.05615 39.18668, -83.39372 39.204..."


In [2]:
aoi_path = "data/plot_boundaries_20190604.geojson"
sentinel_tiles_path = "data/sentinel2_tiles_world.shp"
date = "20190604"

In [10]:
date_tile_info

Unnamed: 0,tileID,geometry,img_date
0,17SLD,"POLYGON Z ((-83.05615 39.18668 0.00000, -83.31...",20190604
1,17SKD,"POLYGON Z ((-83.31631 39.20046 0.00000, -83.39...",20190604


In [4]:
BANDS = {'TCI', 'B04', 'B08', }
CONSTRAINTS = {'NODATA_PIXEL_PERCENTAGE': 10.0, 'CLOUDY_PIXEL_PERCENTAGE': 5.0, }

LAYERS = ['TCI', 'NDVI', ]

#### Find overlap tiles

In [5]:
%%time
overlap = Sentinel2Overlap(aoi_path)
tiles = overlap.overlap()
print(f"Overlap tiles: {tiles}")

Overlap tiles: ['17SKD', '17SLD']
CPU times: user 4.99 s, sys: 41.5 ms, total: 5.03 s
Wall time: 5.02 s


In [14]:
date_tile_info

Unnamed: 0,tileID,geometry,img_date
0,17SLD,"POLYGON Z ((-83.05615 39.18668 0.00000, -83.31...",20190604
1,17SKD,"POLYGON Z ((-83.31631 39.20046 0.00000, -83.39...",20190604


In [15]:
def find_segmentation_mask_with_multiple_th(image_path, thresholds, min_edge_size, min_obj_size, th_size=(11, 3)):
    '''
    Run segmentation on the given raster to find edges. Combine multiple predictions with different thresholds with cv2.adaptiveThreshold.

        Parameters:
            image_path (str): Path to raster which will be processed.
            thresholds (:obj:`list` of :obj:`float`): List of thresholds used to filter weak edges.
            min_edge_size (int): Edges with lower size will be filtered.
            min_obj_size (int): Instances with lower size will be filtered.
            th_size (:obj:`tuple` of :obj:`int`): Size of cv2.adaptiveThreshold filter.

        Returns:
            img (numpy.array): Binary image with segmented instances.
            meta (dict): Source raster metadata
    '''
    bands, meta = read_raster(image_path)
    results = [None] * len(thresholds)
    for i, th in enumerate(thresholds):
        results[i], _ = find_segmentation_mask(image_path, th, min_edge_size, min_obj_size)
    combined = np.mean(results, 0).astype(np.uint8)
    thresholded = cv2.adaptiveThreshold(
        combined,
        255,
        cv2.ADAPTIVE_THRESH_MEAN_C,
        cv2.THRESH_BINARY,
        *th_size
    )
    labeled = label_detected_instances(thresholded)
    labeled = remove_background(labeled, bands)
    labeled[thresholded == 0] = 0
    img = convert_to_binary(labeled)

    return img, meta


def find_segmentation_mask(image_path, edge_combination_th, min_edge_size, min_obj_size):
    '''
    Run segmentation on the given raster to find edges.

        Parameters:
            image_path (str): Path to raster which will be processed.
            edge_combination_th (float): Threshold that used to filter weak edges.
            min_edge_size (int): Edges with lower size will be filtered.
            min_obj_size (int): Instances with lower size will be filtered.

        Returns:
            binary_img (numpy.array): Binary image with segmented instances.
            meta (dict): Source raster metadata
    '''
    bands, meta = read_raster(image_path)
    avg_std = compute_image_standard_deviation(bands)
    filter_list = create_edging_filters()
    edges = apply_edging_filters(avg_std, filter_list)
    edge_direction_list = combine_edges_layers(avg_std, edges, edge_combination_th)
    edge_direction_list = remove_short_edges(edge_direction_list, min_edge_size)
    binary_mask = edges_union(edge_direction_list)
    labeled_image = label_detected_instances(binary_mask)
    labeled_image = remove_small_objects(labeled_image, min_obj_size)
    labeled_image = remove_background(labeled_image, bands)
    binary_img = convert_to_binary(labeled_image)

    return binary_img, meta


def compute_image_standard_deviation(bands, kernel_size=(5, 5)):
    std_list = [
        find_edges_with_standard_deviation(b, kernel_size) for b in bands
    ]
    return np.mean(std_list, 0)


def find_edges_with_standard_deviation(sample, filter_size=(3, 3)):
    mean = cv2.blur(sample, filter_size)
    mean_sqr = cv2.blur(sample * sample, filter_size)
    std = cv2.sqrt(mean_sqr - mean*mean)
    return std


def create_edging_filters(length=13, count=16):
    base_filter = np.array([
        [-1] * length,
        [1] * length,
        [0] * length,
    ])
    filter_list = [None] * count * 2
    filter_list[0] = base_filter
    step = 180 / count

    for i in range(count // 2):
        filter_ = rotate(base_filter, i * step, order=0)
        filter_list[i*4] = filter_
        for j in range(1, 4):
            filter_list[i*4+j] = np.rot90(filter_, j)

    return filter_list


def apply_edging_filters(sample, filter_list):
    return [cv2.filter2D(sample, -1, f) for f in filter_list]


def combine_edges_layers(avg_std, edges, th):
    filter_count = len(edges) // 2
    local_max_left = [None] * filter_count
    local_max_right = [None] * filter_count

    for i in range(filter_count // 2):
        local_max_left[2*i] = cv2.bitwise_and(avg_std, edges[4*i])
        local_max_left[2*i+1] = cv2.bitwise_and(avg_std, edges[4*i+1])
        local_max_right[2*i] = cv2.bitwise_and(avg_std, edges[4*i+2])
        local_max_right[2*i+1] = cv2.bitwise_and(avg_std, edges[4*i+3])

    local_max_left = np.asanyarray(local_max_left)
    local_max_right = np.asanyarray(local_max_right)

    combined_result = np.zeros_like(local_max_left, np.uint8)
    combined_result[
        (local_max_left > 0) &
        (local_max_right > 0) &
        (local_max_left + local_max_right > th)
    ] = 1

    return combined_result


def remove_short_edges(edge_direction_list, min_edge_size):
    res = [None] * len(edge_direction_list)
    for i, edge_direction in enumerate(edge_direction_list):
        labeled_image = label_detected_instances(edge_direction, 0, 2)
        labels, counts_labels = np.unique(labeled_image, return_counts=True)
        edge_direction = np.isin(
            labeled_image,
            labels[counts_labels > min_edge_size]
        ) & (edge_direction > 0)

        res[i] = edge_direction.astype(np.uint8)

    return res


def edges_union(edges):
    result = edges[0]
    for e in edges[1:]:
        result = cv2.bitwise_or(result, e)

    return result


def label_detected_instances(binary_mask, background=1, connectivity=2):
    return measure.label(
        binary_mask,
        background=background,
        connectivity=connectivity
    )


def remove_small_objects(labeled_image, min_obj_size):
    labels, counts_labels = np.unique(labeled_image, return_counts=True)
    labeled_image[np.isin(
        labeled_image,
        labels[counts_labels < min_obj_size]
    )] = 0
    return labeled_image


def remove_background(prediction, bands):
    mask = np.sum(bands, axis=0) == 0
    prediction[mask] = 0
    return prediction


def convert_to_binary(img):
    img[img > 0] = 255
    return img.astype(np.uint8)


def polygonize(binary_img, meta, transform=True):
    polygons = shapes(
        binary_img, 
        binary_img, 
        transform=meta["transform"],
        connectivity=8
    )
    return [shape(poly) for poly, _ in polygons]


def save_polygons(polygons, meta, save_path):
    if len(polygons) == 0:
        print('No polygons detected.')
        return

    directory = os.path.dirname(save_path)
    if not os.path.exists(directory):
        os.makedirs(directory)
    
    gdf = gpd.GeoDataFrame(poly)
    gdf.crs = meta['crs']

    gdf.to_file(save_path, driver='GeoJSON')
    return gdf


### Find tile indexes

In [3]:
aoi_path = "data/plot_boundaries_20190604.geojson"
sentinel_tiles_path = "data/sentinel2_tiles_world.shp"
date = "20190604"

In [4]:
BASE = f"/home/{os.getenv('NB_USER')}/work"

In [6]:
aoi_path = os.getenv("AOI", os.path.join(BASE, "notebooks/pbd/plot_boundaries_20190604.geojson")) 
if not aoi_path:
    raise RuntimeError("Add AOI env var for calculations")
    
aoi_path

'/home/jovyan/work/notebooks/pbd/plot_boundaries_20190604.geojson'

In [None]:
API_KEY = os.path.join(BASE, ".secret/sentinel2_google_api_key.json")
LOAD_DIR = os.path.join(BASE, "satellite_imagery")
RESULTS_DIR = os.path.join(BASE, "results/example")

SIP_DIR = os.path.join(BASE, "notebooks/example")
COLORMAP_BRBG = os.path.join(SIP_DIR, "brbg.npy") 
COLORS = prepare_colors(COLORMAP_BRBG)


BANDS = {'TCI', 'B08', }
CONSTRAINTS = {'NODATA_PIXEL_PERCENTAGE': 15.0, 'CLOUDY_PIXEL_PERCENTAGE': 40.0, }

LAYERS = ['TCI', 'NDVI', ]

In [10]:
date_tile_info = get_tiles(aoi_path, sentinel_tiles_path, date)
date_tile_info

Unnamed: 0,tileID,geometry,img_date
0,17SLD,"POLYGON Z ((-83.05615 39.18668 0.00000, -83.31...",20190604
1,17SKD,"POLYGON Z ((-83.31631 39.20046 0.00000, -83.39...",20190604


### Download data
TODO: add credentials

In [None]:
s2_downloader = SentinelDownload()
# s2_downloader.download(date_tile_info)

### Process data

In [17]:
save_folder = "../../results/pbd"
th = [10., 13., 16., 19., 21.]
min_edge_size = 200
min_obj_size = 2000

In [22]:
origin_name = os.path.basename(aoi_path).replace(".geojson", "")
save_path = os.path.join(save_folder, origin_name)

poly = pd.DataFrame([])
for i, tile in tqdm(date_tile_info.iterrows(), total=date_tile_info.shape[0]):
    tile_name = f"{s2_downloader.product_level}_{tile.tileID}_{tile.img_date}"
    tile_folder = os.path.join(DOWNLOADED_IMAGES_DIR, tile_name)
    raster_path = preprocess_sentinel_raw_data(
        save_path=save_path,
        tile_folder=tile_folder,
        aoi_mask=date_tile_info.loc[[i]]
    )
    if len(th) == 1:
        img, meta = find_segmentation_mask(
            raster_path, th[0],
            min_edge_size, min_obj_size
        )
    else:
        img, meta = find_segmentation_mask_with_multiple_th(
            raster_path, th,
            min_edge_size, min_obj_size
        )
        df = pd.DataFrame({"geometry": polygonize(img, meta)})
        df["id"] = pd.Series(map(lambda x: f"{origin_name}_{x}", df.index.values))
        df["tileID"] = tile.tileID
        df["img_date"] = tile.img_date

    poly = pd.concat([poly, df])

try:
    shutil.rmtree(save_path) 
except OSError:
    pass
save_path = os.path.join(save_folder, f"{origin_name}_prediction.geojson")
save_polygons(poly, meta, save_path)

100%|██████████| 2/2 [04:09<00:00, 124.52s/it]


Unnamed: 0,geometry,id,tileID,img_date
0,"POLYGON ((321800.000 4361410.000, 321800.000 4...",plot_boundaries_20190604_0,17SLD,20190604
1,"POLYGON ((321520.000 4361460.000, 321520.000 4...",plot_boundaries_20190604_1,17SLD,20190604
2,"POLYGON ((320920.000 4361420.000, 320920.000 4...",plot_boundaries_20190604_2,17SLD,20190604
3,"POLYGON ((320050.000 4361270.000, 320050.000 4...",plot_boundaries_20190604_3,17SLD,20190604
4,"POLYGON ((319300.000 4361140.000, 319300.000 4...",plot_boundaries_20190604_4,17SLD,20190604
...,...,...,...,...
1520,"POLYGON ((298330.000 4341740.000, 298330.000 4...",plot_boundaries_20190604_1520,17SKD,20190604
1521,"POLYGON ((298480.000 4341740.000, 298480.000 4...",plot_boundaries_20190604_1521,17SKD,20190604
1522,"POLYGON ((298860.000 4341690.000, 298860.000 4...",plot_boundaries_20190604_1522,17SKD,20190604
1523,"POLYGON ((299330.000 4341840.000, 299330.000 4...",plot_boundaries_20190604_1523,17SKD,20190604
