In [None]:
AOI = 'POLYGON ((30.2759366834769 48.51505083641415, 30.61727452548106 48.45927941996875, 30.64285389693346 48.5330821966183, 30.30277405680401 48.58843427910546, 30.2759366834769 48.51505083641415))'
START_DATE = "2020-05-01"
END_DATE = "2020-06-30"

REQUEST_ID = '6'

### Detecting boundaries for given AOI

In [None]:
import os
import json
import time
import cv2
import rasterio
import pandas as pd
import numpy as np
import geopandas as gpd
import rasterio.mask
import tempfile
import shapely
import re

from tqdm import tqdm
from os.path import join, basename, split
from skimage import measure
from scipy.ndimage import rotate
from rasterio.features import rasterize, shapes
from rasterio.merge import merge
from shapely.geometry import Polygon, shape, LinearRing
import shapely.wkt
from pathlib import Path
from datetime import datetime
import yaml
import torch

from sentinel2download.downloader import Sentinel2Downloader
from sip_plot_boundary_detection_nn.code.preprocessing import (
    preprocess_sentinel_raw_data, read_raster, extract_tci)
from sip_plot_boundary_detection_nn.code.engine import load_model, val_tfs
from sip_plot_boundary_detection_nn.code.dataset import BoundaryDetector
from sip_plot_boundary_detection_nn.code.filter_polygons import filter_polygons

import warnings
warnings.filterwarnings('ignore')
import logging
logger = logging.getLogger()
logger.setLevel(logging.INFO)
torch.cuda.empty_cache()

In [None]:
default_crs = 'EPSG:4326'

polygon = shapely.wkt.loads(AOI)
aoi_filename = f"{time.time()}_aoi.geojson"
gpd.GeoDataFrame(gpd.GeoSeries([polygon]), columns=["geometry"]).to_file(aoi_filename, driver="GeoJSON")
start_date = datetime.strptime(START_DATE, '%Y-%m-%d')
end_date = datetime.strptime(END_DATE, '%Y-%m-%d')

In [None]:
def get_tiles(aoi_path, sentinel_tiles_path):
    '''
    Returns Sentinel-2 tiles that intersects with specified AoI.

        Parameters:
            aoi_path (str): Path to geojson/shp file with AoI to process.
            sentinel_tiles_path (str): Path to geojson/shp file with all Sentinel-2 tiles.

        Returns:
            date_tile_info (GeoDataFrame): Filtered tiles (tileID, geometry, date).
    '''
    aoi_file = gpd.read_file(aoi_path)
    sentinel_tiles = gpd.read_file(sentinel_tiles_path)
    sentinel_tiles.set_index("Name", drop=False, inplace=True)

    best_interseciton = {"tileID": [], "geometry": []}
    rest_aoi = aoi_file.copy()

    while rest_aoi.area.sum() > 0:
        res_intersection = gpd.overlay(rest_aoi, sentinel_tiles, how="intersection")
        biggest_area_idx = res_intersection.area.argmax()

        tileID = res_intersection.loc[biggest_area_idx, "Name"]
        this_aoi = res_intersection.loc[biggest_area_idx, "geometry"]

        best_interseciton["tileID"].append(tileID)
        best_interseciton["geometry"].append(this_aoi)

        biggest_intersection = sentinel_tiles.loc[[tileID]]
        rest_aoi = gpd.overlay(rest_aoi, biggest_intersection, how="difference")
        sentinel_tiles = sentinel_tiles.loc[res_intersection["Name"]]

    date_tile_info = gpd.GeoDataFrame(best_interseciton)
    date_tile_info.crs = aoi_file.crs
    
    return date_tile_info


In [None]:
def process_polygons(result_df, current_crs, limit=500, dst_crs="EPSG:4326"):
    """
    Prepare result Dataframe with polygons

        Parameters:
            result_df (pd.DataFrame): Result DataFrame
            limit (int): min area for polygon in m2
        Returns:
            GeoDataFrame: GeoDataFrame ready for saving
    """

    gdf = gpd.GeoDataFrame(result_df)
    gdf.crs = current_crs

    gdf.to_crs(dst_crs, inplace=True)
    return gdf


def save_polygons(gdf, save_path):
    if len(gdf) == 0:
        return

    directory = os.path.dirname(save_path)
    if not os.path.exists(directory):
        os.makedirs(directory)
        
    gdf.to_file(save_path, driver='GeoJSON')
    
    return gdf

### Find tile indexes

In [None]:
NB_USER = os.getenv('NB_USER')
BASE = f"/home/{NB_USER}/work"

API_KEY = os.path.join(BASE, ".secret/sentinel2_google_api_key.json")
LOAD_DIR = os.path.join(BASE, "satellite_imagery")
RESULTS_DIR = os.path.join(BASE, "results/pbdnn")
PBD_DIR = os.path.join(BASE, "notebooks/pbdnn")

BANDS = {'TCI'}
CONSTRAINTS = {'NODATA_PIXEL_PERCENTAGE': 15.0, 'CLOUDY_PIXEL_PERCENTAGE': 5.0, }
PRODUCT_TYPE = 'L2A'

In [None]:
local = False
ukr_shapefile = os.path.join(BASE, "data/notebooks/pbdnn/sip_plot_boundary_detection_nn/ukr_shapes/custom.geo.json")
if os.path.exists(ukr_shapefile):
    usa_shapefile = os.path.join(BASE, "data/notebooks/pbdnn/sip_plot_boundary_detection_nn/usa_shapes/custom.geo.json")
    config_file = os.path.join(BASE, "data/notebooks/pbdnn/sip_plot_boundary_detection_nn/code/config.yaml")
    API_KEY = os.path.join(BASE, "data/notebooks/pbdnn/sentinel2_google_api_key.json")
    PBD_DIR = os.path.join(BASE, "data/notebooks/pbdnn")
    sentinel_tiles_path = os.path.join(BASE, "data/notebooks/pbdnn/sentinel2grid.geojson")
    local = True
else:
    ukr_shapefile = os.path.join(BASE, "notebooks/pbdnn/sip_plot_boundary_detection_nn/ukr_shapes/custom.geo.json")
    usa_shapefile = os.path.join(BASE, "notebooks/pbdnn/sip_plot_boundary_detection_nn/usa_shapes/custom.geo.json")
    config_file = os.path.join(BASE, "notebooks/pbdnn/sip_plot_boundary_detection_nn/code/config.yaml")
    sentinel_tiles_path = os.path.join(BASE, "notebooks/pbdnn/sentinel2grid.geojson")
    local = False

with open(config_file) as f:
    config = yaml.safe_load(f)

### Check location before filtering non-agricultural lands

In [None]:
ukraine = gpd.read_file(ukr_shapefile)
usa = gpd.read_file(usa_shapefile)
aoi = gpd.read_file(aoi_filename)

if local:
    if aoi.intersects(ukraine)[0]:
        filter_path = os.path.join(
            BASE, "data/notebooks/pbdnn/sip_plot_boundary_detection_nn/ukr_shapes/ukr_non_agriculture.geojson")
    elif aoi.intersects(usa)[0]:
        filter_path = os.path.join(
            BASE, "data/notebooks/pbdnn/sip_plot_boundary_detection_nn/usa_shapes/us_shape.geojson")
    else:
        filter_path = None
else:
    if aoi.intersects(ukraine)[0]:
        filter_path = os.path.join(
            BASE, "notebooks/pbdnn/sip_plot_boundary_detection_nn/ukr_shapes/ukr_non_agriculture.geojson")
    elif aoi.intersects(usa)[0]:
        filter_path = os.path.join(
            BASE, "notebooks/pbdnn/sip_plot_boundary_detection_nn/usa_shapes/us_shape.geojson")
    else:
        filter_path = None

### Download data

In [None]:
def _check_folder(tile_folder, file, limit, nodata):
    with rasterio.open(os.path.join(tile_folder, file)) as src:              
        # Read in image as a numpy array
        array = src.read(1)
        # Count the occurance of NoData values in np array
        nodata_count = np.count_nonzero(array == nodata)
        # Get a % of NoData pixels
        nodata_percentage = round(nodata_count / array.size * 100, 2)
        if nodata_percentage <= limit:
            return True
        else:
            return False

In [None]:
def check_nodata(loadings, product_type, limit=15.0, nodata=0):
    filtered = dict()          
    
    for tile, folders in loadings.items():
        filtered_folders = set()
        for folder in folders:
            for file in os.listdir(folder):
                if file.endswith(".jp2") and "OPER" not in file:
                    if product_type == 'L1C' and limit:
                         if _check_folder(folder, file, limit, nodata):
                            filtered_folders.add(folder)
                            break
                    else:
                        filtered_folders.add(folder)
        filtered[tile] = filtered_folders
    return filtered

In [None]:
regex = r'\<CLOUDY_PIXEL_PERCENTAGE\>[0-9]*\.?[0-9]*</CLOUDY_PIXEL_PERCENTAGE>'

def get_min_clouds(loadings, max_ptc=5):
    filtered = dict()
    min_ptc = max_ptc
    
    for tile, folders in loadings.items():
        filtered_folders = set()
        for folder in folders:
            for file in os.listdir(folder):
                
                if "MTD_TL.xml" in file:
                    
                    with open(os.path.join(folder, file)) as f:
                        ptc = f.read()
                        ptc = re.search(regex, ptc)
                        
                        if ptc is not None:
                            ptc = ''.join([x for x in ptc.group(0) if x.isdigit() or x=='.'])
                            # print(ptc)
                            filtered_folders.add((ptc, folder))
    
        filtered[tile] = sorted(filtered_folders)[0][1]

    return filtered

In [None]:
def load_images(api_key, tiles, start_date, end_date, output_dir, product_type="L2A"):
    loader = Sentinel2Downloader(api_key)
    loadings = dict()
    for tile in tiles:

        loaded = loader.download(product_type,
                                 [tile],
                                 start_date=start_date,
                                 end_date=end_date,
                                 output_dir=output_dir,                       
                                 bands=BANDS,
                                constraints=CONSTRAINTS)
        print(f'{tile} loaded')
        
        loadings[tile] = loaded
    
    tile_folders = dict()
    for tile, tile_paths in loadings.items():
        tile_folders[tile] = {str(Path(tile_path[0]).parent) for tile_path in tile_paths}
    return tile_folders

In [None]:
# Credit for baseline: work/notebooks/pw/raster_predict.ipynb
def create_style():
    style = {'color': '#C0C0C0', 'stroke': 'e80e27', 'stroke-width': 2}

    return str(style)

In [None]:
config['device'] = 'cuda' if torch.cuda.is_available() else 'cpu'

if local:
    model_path = os.path.join(
        BASE, 'data/notebooks/pbdnn/sip_plot_boundary_detection_nn/models/UnetPlusPlus_imagenet_30_denmark_filtered.pth')
else:
    model_path = os.path.join(
        BASE, 'notebooks/pbdnn/sip_plot_boundary_detection_nn/models/UnetPlusPlus_imagenet_30_denmark_filtered.pth')

model = load_model(config['model'].lower(), model_path, config['device'])
    
print('Model loaded successfully')
    
date_tile_info = get_tiles(aoi_filename, sentinel_tiles_path)
loadings = load_images(API_KEY, date_tile_info.tileID.values, START_DATE, END_DATE, LOAD_DIR, PRODUCT_TYPE)
checked = check_nodata(loadings, PRODUCT_TYPE)

checked = get_min_clouds(checked)

origin_name = os.path.basename(aoi_filename).replace(".geojson", "")

print(f'total tiles: {len(loadings)}')

detector = BoundaryDetector(model, tiles_dir=f'/home/{NB_USER}/work/satellite_imagery')
print('Detector is ready')

In [None]:
result_df = pd.DataFrame([])

if filter_path is not None:
    filters = gpd.read_file(filter_path)
    aoi = gpd.read_file(aoi_filename).to_crs(filters.crs)
            
    if 'ukr_non_agriculture' in filter_path:
        filters['geometry'] = filters.buffer(0)
            
    filters = filters[filters.intersects(aoi.geometry.values[0])]
    print('filters are loaded')

In [None]:
def stitch_tiles(paths, out_raster_path='test.tif'):
    tiles = []
    
    for i, path in enumerate(paths):
        file = rasterio.open(path)
        tiles.append(file)
            
        
    tile_arr, transform = merge(tiles, method='last')
    meta, crs = file.meta, file.crs
    
    meta.update({"driver": "GTiff",
                 "height": tile_arr.shape[1],
                 "width": tile_arr.shape[2],
                 "transform": transform,
                 "crs": crs})
    
    if '.jp2' in out_raster_path:
        out_raster_path = out_raster_path.replace('.jp2', '_merged.tif')
    else:
        out_raster_path = out_raster_path.replace('.tif', '_merged.tif')
    print(f'saved raster {out_raster_path}')

    for tile in tiles:
        tile.close()
        
    with rasterio.open(out_raster_path, "w", **meta) as dst:
        dst.write(tile_arr)
    
    return out_raster_path

### Running boundary detection and filtration of predictions

In [None]:
rasters = []
for i, tile in date_tile_info.iterrows():
    try:
        tile_folder = Path(checked[tile.tileID])
        print(f'checked: {tile_folder}')
    except Exception as ex:
        print(ex)
        continue
        
    full_tile = [os.path.join(tile_folder, filename) for filename in os.listdir(tile_folder) if 'TCI_10m.jp2' in filename]
    rasters.append(full_tile[0])
        
print(f'rasters to be processed: {rasters}')

In [None]:
if len(rasters)>1:
    raster_path = stitch_tiles(rasters)
elif len(rasters)==1:
    raster_path = rasters[0]
elif len(rasters)==0:
    print('WARNING: no rasters were found!')

In [None]:
if '.jp2' in raster_path:
    out_raster = raster_path.replace('.jp2', '_prediction.tif')
else:
    out_raster = raster_path.replace('.tif', '_prediction.tif')
    
out_geom = aoi_filename.replace('_aoi.', '_prediction.')
        
pred_tif_path = detector.raster_prediction(in_raster_path=raster_path,
                                            out_raster_path=out_raster,
                                            aoi_path=aoi_filename,
                                            conf_thresh=0.25)

In [None]:
polygons = detector.process_raster_predictions(pred_tif_path,
                                                shapes_path=out_geom,
                                                aoi_path=aoi_filename, 
                                                conf_thresh=0.25)
polygons = gpd.GeoDataFrame(polygons)

In [None]:
if len(filters) != 0:
    try:
        polys = filter_polygons(polygons, filters)
    except Exception as e:
        print(e, '\n', 'fixing geometries...')
        filters['geometry'] = filters.buffer(0)
        polys = filter_polygons(polygons, filters)
else:
    print('Non-agricultural data is not found for a given AOI, proceeding without filtering')
    polys = polygons

In [None]:
df = pd.DataFrame({"geometry": polys.geometry}).reset_index()
df["id"] = pd.Series(map(lambda x: f"{origin_name}_{tile.tileID}_{x}", df.index.values))
df["tileID"] = tile.tileID

In [None]:
result_df = pd.concat([result_df, df])
gdf = process_polygons(result_df, filters.crs)
gdf.head(3)

### Saving polygons into results folder and adding metadata

In [None]:
tmp_suffix = ".temp"
gdf['style'] = create_style()
save_path = os.path.join(RESULTS_DIR, f"{REQUEST_ID}_{START_DATE}_{END_DATE}.geojson{tmp_suffix}")
save_polygons(gdf, save_path)

try:
    with open(save_path) as file:
        geoms = json.load(file)
except Exception:
    geoms = {}
    
geoms['end_date'] = END_DATE
geoms['start_date'] = START_DATE
geoms['name'] = "Fields' boundaries"
geoms['request_id'] = REQUEST_ID

with open(save_path, 'w') as file:
    json.dump(geoms, file)
os.rename(save_path, save_path[:-5])

In [None]:
try:
    os.remove(aoi_filename)
except FileNotFoundError:
    print('No helping geojson files were generated')

In [None]:
try:
    os.remove(out_raster)
except FileNotFoundError:
    print('No helping raster files were generated')

In [None]:
try:
    os.remove(pred_tif_path)
except FileNotFoundError:
    print('No helping prediction raster found, skipping')