### Detecting boundaries for given AOI

In [1]:
import os
import shutil
import cv2
import rasterio
import pandas as pd
import numpy as np
import geopandas as gpd
import rasterio.mask
import tempfile
import shapely
import re

from tqdm import tqdm
from os.path import join, basename, split
from skimage import measure
from scipy.ndimage import rotate
from rasterio.features import rasterize, shapes
from shapely.geometry import Polygon, shape
from pathlib import Path
from datetime import datetime

from sentinel2download.downloader import Sentinel2Downloader
from preprocessing import preprocess_sentinel_raw_data, read_raster


import warnings
warnings.filterwarnings('ignore')
import logging
logger = logging.getLogger()
logger.setLevel(logging.INFO)

In [2]:
def get_tiles(aoi_path, sentinel_tiles_path):
    '''
    Returns Sentinel-2 tiles that intersects with specified AoI.

        Parameters:
            aoi_path (str): Path to geojson/shp file with AoI to process.
            sentinel_tiles_path (str): Path to geojson/shp file with all Sentinel-2 tiles.

        Returns:
            date_tile_info (GeoDataFrame): Filtered tiles (tileID, geometry, date).
    '''
    aoi_file = gpd.read_file(aoi_path)
    sentinel_tiles = gpd.read_file(sentinel_tiles_path)
    sentinel_tiles.set_index("Name", drop=False, inplace=True)

    best_interseciton = {"tileID": [], "geometry": []}
    rest_aoi = aoi_file.copy()

    while rest_aoi.area.sum() > 0:
        res_intersection = gpd.overlay(rest_aoi, sentinel_tiles, how="intersection")
        biggest_area_idx = res_intersection.area.argmax()

        tileID = res_intersection.loc[biggest_area_idx, "Name"]
        this_aoi = res_intersection.loc[biggest_area_idx, "geometry"]

        best_interseciton["tileID"].append(tileID)
        best_interseciton["geometry"].append(this_aoi)

        biggest_intersection = sentinel_tiles.loc[[tileID]]
        rest_aoi = gpd.overlay(rest_aoi, biggest_intersection, how="difference")
        sentinel_tiles = sentinel_tiles.loc[res_intersection["Name"]]

    date_tile_info = gpd.GeoDataFrame(best_interseciton)
    date_tile_info.crs = aoi_file.crs
    
    return date_tile_info


In [3]:
def find_segmentation_mask_with_multiple_th(image_path, thresholds, min_edge_size, min_obj_size, th_size=(11, 3)):
    '''
    Run segmentation on the given raster to find edges. 
    Combine multiple predictions with different thresholds with cv2.adaptiveThreshold.

        Parameters:
            image_path (str): Path to raster which will be processed.
            thresholds (:obj:`list` of :obj:`float`): List of thresholds used to filter weak edges.
            min_edge_size (int): Edges with lower size will be filtered.
            min_obj_size (int): Instances with lower size will be filtered.
            th_size (:obj:`tuple` of :obj:`int`): Size of cv2.adaptiveThreshold filter.

        Returns:
            img (numpy.array): Binary image with segmented instances.
            meta (dict): Source raster metadata
    '''
    bands, meta = read_raster(image_path)
    results = [None] * len(thresholds)
    for i, th in enumerate(thresholds):
        results[i], _ = find_segmentation_mask(image_path, th, min_edge_size, min_obj_size)
    combined = np.mean(results, 0).astype(np.uint8)
    thresholded = cv2.adaptiveThreshold(
        combined,
        255,
        cv2.ADAPTIVE_THRESH_MEAN_C,
        cv2.THRESH_BINARY,
        *th_size
    )
    labeled = label_detected_instances(thresholded, connectivity=1)
    labeled = remove_small_objects(labeled, min_obj_size / 10)
    labeled = remove_background(labeled, bands)
    labeled[thresholded == 0] = 0
    img = convert_to_binary(labeled)
    
    return img, meta


def find_segmentation_mask(image_path, edge_combination_th, min_edge_size, min_obj_size):
    '''
    Run segmentation on the given raster to find edges.

        Parameters:
            image_path (str): Path to raster which will be processed.
            edge_combination_th (float): Threshold that used to filter weak edges.
            min_edge_size (int): Edges with lower size will be filtered.
            min_obj_size (int): Instances with lower size will be filtered.

        Returns:
            binary_img (numpy.array): Binary image with segmented instances.
            meta (dict): Source raster metadata
    '''
    bands, meta = read_raster(image_path)
    avg_std = compute_image_standard_deviation(bands)
    filter_list = create_edging_filters()
    edges = apply_edging_filters(avg_std, filter_list)
    edge_direction_list = combine_edges_layers(avg_std, edges, edge_combination_th)
    edge_direction_list = remove_short_edges(edge_direction_list, min_edge_size)
    binary_mask = edges_union(edge_direction_list)
    labeled_image = label_detected_instances(binary_mask)
    labeled_image = remove_small_objects(labeled_image, min_obj_size)
    labeled_image = remove_background(labeled_image, bands)
    binary_img = convert_to_binary(labeled_image)

    return binary_img, meta


def compute_image_standard_deviation(bands, kernel_size=(5, 5)):
    std_list = [
        find_edges_with_standard_deviation(b, kernel_size) for b in bands
    ]
    return np.mean(std_list, 0)


def find_edges_with_standard_deviation(sample, filter_size=(3, 3)):
    mean = cv2.blur(sample, filter_size)
    mean_sqr = cv2.blur(sample * sample, filter_size)
    std = cv2.sqrt(mean_sqr - mean*mean)
    return std


def create_edging_filters(length=13, count=16):
    base_filter = np.array([
        [-1] * length,
        [1] * length,
        [0] * length,
    ])
    filter_list = [None] * count * 2
    filter_list[0] = base_filter
    step = 180 / count

    for i in range(count // 2):
        filter_ = rotate(base_filter, i * step, order=0)
        filter_list[i*4] = filter_
        for j in range(1, 4):
            filter_list[i*4+j] = np.rot90(filter_, j)

    return filter_list


def apply_edging_filters(sample, filter_list):
    return [cv2.filter2D(sample, -1, f) for f in filter_list]


def combine_edges_layers(avg_std, edges, th):
    filter_count = len(edges) // 2
    local_max_left = [None] * filter_count
    local_max_right = [None] * filter_count

    for i in range(filter_count // 2):
        local_max_left[2*i] = cv2.bitwise_and(avg_std, edges[4*i])
        local_max_left[2*i+1] = cv2.bitwise_and(avg_std, edges[4*i+1])
        local_max_right[2*i] = cv2.bitwise_and(avg_std, edges[4*i+2])
        local_max_right[2*i+1] = cv2.bitwise_and(avg_std, edges[4*i+3])

    local_max_left = np.asanyarray(local_max_left)
    local_max_right = np.asanyarray(local_max_right)

    combined_result = np.zeros_like(local_max_left, np.uint8)
    combined_result[
        (local_max_left > 0) &
        (local_max_right > 0) &
        (local_max_left + local_max_right > th)
    ] = 1

    return combined_result


def remove_short_edges(edge_direction_list, min_edge_size):
    res = [None] * len(edge_direction_list)
    for i, edge_direction in enumerate(edge_direction_list):
        labeled_image = label_detected_instances(edge_direction, 0, 2)
        labels, counts_labels = np.unique(labeled_image, return_counts=True)
        edge_direction = np.isin(
            labeled_image,
            labels[counts_labels > min_edge_size]
        ) & (edge_direction > 0)

        res[i] = edge_direction.astype(np.uint8)

    return res


def edges_union(edges):
    result = edges[0]
    for e in edges[1:]:
        result = cv2.bitwise_or(result, e)

    return result


def label_detected_instances(binary_mask, background=1, connectivity=2):
    return measure.label(
        binary_mask,
        background=background,
        connectivity=connectivity
    )


def remove_small_objects(labeled_image, min_obj_size):
    labels, counts_labels = np.unique(labeled_image, return_counts=True)
    labeled_image[np.isin(
        labeled_image,
        labels[counts_labels < min_obj_size]
    )] = 0
    return labeled_image


def remove_background(prediction, bands):
    mask = np.sum(bands, axis=0) == 0
    prediction[mask] = 0
    return prediction


def convert_to_binary(img):
    img[img > 0] = 255
    return img.astype(np.uint8)


def polygonize(binary_img, meta, transform=True):
    polygons = shapes(
        binary_img, 
        binary_img, 
        transform=meta["transform"],
        connectivity=8
    )
    return [shape(poly) for poly, _ in polygons]


def process_polygons(result_df, current_crs, limit=500, dst_crs="EPSG:4326"):
    """
    Prepare result Dataframe with polygons

        Parameters:
            result_df (pd.DataFrame): Result DataFrame
            limit (int): min area for polygon in m2
        Returns:
            GeoDataFrame: GeoDataFrame ready for saving
    """
        
    gdf = gpd.GeoDataFrame(result_df)
    gdf.crs = current_crs
    
    # fix invalid polygons
    # gdf["geometry"] = gdf.apply(lambda row: row.geometry.buffer(0), axis=1)
    # select only valid polygons
    # gdf = gdf.loc[gdf.is_valid]

    # print(gdf.crs)
    # TODO: can be uncomment if needed
    # gdf = gdf.loc[gdf.area >= limit]
    
    gdf.to_crs(dst_crs, inplace=True)
    # expand each polygon
    # gdf.geometry = gdf.geometry.buffer(0.0001, 1)
    return gdf


def save_polygons(gdf, save_path):
    if len(gdf) == 0:
        print('No polygons detected.')
        return

    directory = os.path.dirname(save_path)
    if not os.path.exists(directory):
        os.makedirs(directory)
        
    save_path = save_path + ".temp" 
    gdf.to_file(save_path, driver='GeoJSON')
    os.rename(save_path, save_path[:-5])
    
    return gdf

### Find tile indexes

In [26]:
BASE = f"/home/{os.getenv('NB_USER')}/work"

API_KEY = os.path.join(BASE, ".secret/sentinel2_google_api_key.json")
LOAD_DIR = os.path.join(BASE, "satellite_imagery")
RESULTS_DIR = os.path.join(BASE, "results/pbd")

PBD_DIR = os.path.join(BASE, "notebooks/pbd")

BANDS = {'TCI', 'B08', }

CONSTRAINTS = {'NODATA_PIXEL_PERCENTAGE': 15.0, 'CLOUDY_PIXEL_PERCENTAGE': 40.0, }
# CONSTRAINTS = {'NODATA_PIXEL_PERCENTAGE': 10.0, 'CLOUDY_PIXEL_PERCENTAGE': 5.0, }

PRODUCT_TYPE = 'L1C'

START_DATE = "2017-09-12"
END_DATE = "2017-09-12" 

In [27]:
aoi_path = os.getenv("AOI", os.path.join(BASE, "notebooks/pbd/data/20170912_075604_1032_aoi.geojson"))     
aoi_path

'/home/jovyan/work/notebooks/pbd/data/20170912_075604_1032_aoi.geojson'

In [28]:
sentinel_tiles_path = "sentinel2grid.geojson"

#### Find overlap tiles

In [29]:
date_tile_info = get_tiles(aoi_path, sentinel_tiles_path)
date_tile_info

Unnamed: 0,tileID,geometry
0,36UXA,"POLYGON Z ((35.11965 49.98928 0.00000, 35.1208..."


### Download data

In [30]:
def _check_folder(tile_folder, file, limit, nodata):
    with rasterio.open(os.path.join(tile_folder, file)) as src:              
        # Read in image as a numpy array
        array = src.read(1)
        # Count the occurance of NoData values in np array
        nodata_count = np.count_nonzero(array == nodata)
        # Get a % of NoData pixels
        nodata_percentage = round(nodata_count / array.size * 100, 2)
        print(f"NODATA_PIXEL_PERCENTAGE for {tile_folder} images: {nodata_percentage}%")
        if nodata_percentage <= limit:
            return True
        else:
            return False

In [31]:
def check_nodata(loadings, product_type, limit=15.0, nodata=0):
    filtered = dict()
    
    print(f"Checking NODATA_PIXEL_PERCENTAGE for {product_type}") 

    for tile, tile_paths in loadings.items():
        try:
            for tile_path in tile_paths:
                tile_folder = Path(tile_path[0]).parent
                print(tile_folder)
                if product_type == 'L1C' and limit:
                     if _check_folder(tile_folder, limit, nodata):
                        filtered[tile] = tile_folder
                else:
                    filtered[tile] = tile_folder
        except Exception as ex:
            print(f"Error for {tile}: {str(ex)}")
    return filtered

In [32]:
def load_images(api_key, tiles, start_date, end_date, output_dir, product_type="L2A"):
    loader = Sentinel2Downloader(api_key)
    loadings = dict()
    for tile in tiles:
        print(f"Loading images for tile: {tile}...")
        loaded = loader.download(product_type,
                                 [tile],
                                 start_date=start_date,
                                 end_date=end_date,
                                 output_dir=output_dir,                       
                                 bands=BANDS,
                                constraints=CONSTRAINTS)
        
        print(f"Loading images for tile {tile} finished")
        loadings[tile] = loaded
    
    tile_folders = dict()
    for tile, tile_paths in loadings.items():
        tile_folders[tile] = {str(Path(tile_path[0]).parent) for tile_path in tile_paths}
    return tile_folders

In [33]:
loadings = load_images(API_KEY, date_tile_info.tileID.values, START_DATE, END_DATE, LOAD_DIR, PRODUCT_TYPE)

Loading images for tile: 36UXA...
Loading images for tile 36UXA finished


In [34]:
loadings

{'36UXA': {'/home/jovyan/work/satellite_imagery/S2B_MSIL1C_20170912T084549_N0205_R107_T36UXA_20170912T085508'}}

#### if product type == L1C, check images NODATA_PIXEL_PERCENTAGE

In [35]:
def check_nodata(loadings, product_type, limit=15.0, nodata=0):
    filtered = dict()
    
    print(f"Checking NODATA_PIXEL_PERCENTAGE for {product_type}")            
    
    for tile, folders in loadings.items():
        filtered_folders = set()
        for folder in folders:
            # print(folder)
            for file in os.listdir(folder):
                if file.endswith(".jp2") and "OPER" not in file:
                    if product_type == 'L1C' and limit:
                         if _check_folder(folder, file, limit, nodata):
                            filtered_folders.add(folder)
                            break
                    else:
                        filtered_folders.add(folder)
        filtered[tile] = filtered_folders
    return filtered

In [36]:
checked = check_nodata(loadings, PRODUCT_TYPE)
checked

Checking NODATA_PIXEL_PERCENTAGE for L1C
NODATA_PIXEL_PERCENTAGE for /home/jovyan/work/satellite_imagery/S2B_MSIL1C_20170912T084549_N0205_R107_T36UXA_20170912T085508 images: 5.45%


{'36UXA': {'/home/jovyan/work/satellite_imagery/S2B_MSIL1C_20170912T084549_N0205_R107_T36UXA_20170912T085508'}}

#### select last folder with last date

In [37]:
def filter_date(loadings):
    def _find_last_date(folders):        
        dates = list()
        for folder in folders:        
            search = re.search(r"_(\d+)T\d+_", str(folder))
            date = search.group(1)
            date = datetime.strptime(date, '%Y%m%d')
            dates.append(date)    
        last_date = max(dates)
        last_date = datetime.strftime(last_date, '%Y%m%d')
        return last_date
    
    filtered = dict()
    for tile, folders in loadings.items():
        last_date = _find_last_date(folders)
        for folder in folders:
            if last_date in folder:
                filtered[tile] = folder
    return filtered

In [38]:
filtered = filter_date(checked)
filtered

{'36UXA': '/home/jovyan/work/satellite_imagery/S2B_MSIL1C_20170912T084549_N0205_R107_T36UXA_20170912T085508'}

In [39]:
if not filtered:
    raise ValueError("Images not loaded. Change date or constraints")

### Process data

In [40]:
th = [5.0, 6.0, 7.0, 8.0, 9.0]
min_edge_size = 200
min_obj_size = 2000

origin_name = os.path.basename(aoi_path).replace(".geojson", "")

In [41]:
result_df = pd.DataFrame([])

with tempfile.TemporaryDirectory(dir=PBD_DIR) as tmpdirname:   
    for i, tile in tqdm(date_tile_info.iterrows(), total=date_tile_info.shape[0]):
        try:
            tile_folder = Path(filtered[tile.tileID])
            print(tile_folder)
        except Exception as ex:
            print(f"Error for {tile.tileID}: {str(ex)}")
        else:
            print(f"Processing {tile.tileID}...")
    
            raster_path = preprocess_sentinel_raw_data(
                save_path=tmpdirname,
                tile_folder=tile_folder,
                aoi_mask=date_tile_info.loc[[i]]
            )
    
            if len(th) == 1:
                img, meta = find_segmentation_mask(
                    raster_path, th[0],
                    min_edge_size, min_obj_size
                )
            else:
                img, meta = find_segmentation_mask_with_multiple_th(
                    raster_path, th,
                    min_edge_size, min_obj_size
                )
                df = pd.DataFrame({"geometry": polygonize(img, meta)})
                df["id"] = pd.Series(map(lambda x: f"{origin_name}_{x}", df.index.values))
                df["tileID"] = tile.tileID
                df["start_date"] = START_DATE
                df["end_date"] = END_DATE
        

            result_df = pd.concat([result_df, df])
            
            print(f"Finished processing {tile.tileID}")
    # print(result_df)


gdf = process_polygons(result_df, meta['crs'])
save_path = os.path.join(RESULTS_DIR, f"{origin_name}_prediction_vy1.geojson")
print(save_path)
save_polygons(gdf, save_path)

  0%|          | 0/1 [00:00<?, ?it/s]

/home/jovyan/work/satellite_imagery/S2B_MSIL1C_20170912T084549_N0205_R107_T36UXA_20170912T085508
Processing 36UXA...


100%|██████████| 1/1 [00:43<00:00, 43.93s/it]

Finished processing 36UXA
/home/jovyan/work/notebooks/pbd/20170912_075604_1032_aoi_prediction_vy1.geojson





Unnamed: 0,geometry,id,tileID,start_date,end_date
0,"POLYGON ((35.22546 50.01661, 35.22545 50.01652...",20170912_075604_1032_aoi_0,36UXA,2017-09-12,2017-09-12
1,"POLYGON ((35.23219 50.01432, 35.23216 50.01378...",20170912_075604_1032_aoi_1,36UXA,2017-09-12,2017-09-12
2,"POLYGON ((35.17476 50.01307, 35.17476 50.01298...",20170912_075604_1032_aoi_2,36UXA,2017-09-12,2017-09-12
3,"POLYGON ((35.20050 50.01115, 35.20049 50.01106...",20170912_075604_1032_aoi_3,36UXA,2017-09-12,2017-09-12
4,"POLYGON ((35.24603 50.00883, 35.24603 50.00874...",20170912_075604_1032_aoi_4,36UXA,2017-09-12,2017-09-12
...,...,...,...,...,...
60,"POLYGON ((35.22109 49.98548, 35.22108 49.98539...",20170912_075604_1032_aoi_60,36UXA,2017-09-12,2017-09-12
61,"POLYGON ((35.23761 49.98372, 35.23760 49.98363...",20170912_075604_1032_aoi_61,36UXA,2017-09-12,2017-09-12
62,"POLYGON ((35.23303 49.97229, 35.23303 49.97211...",20170912_075604_1032_aoi_62,36UXA,2017-09-12,2017-09-12
63,"POLYGON ((35.23709 49.96943, 35.23708 49.96934...",20170912_075604_1032_aoi_63,36UXA,2017-09-12,2017-09-12
