### For selected ClearCut AOI prepare Planet and Sentinel2 images. Show clearcuts on AOI. Compare quality

In [None]:
import os
import rasterio
import pandas as pd
import numpy as np
import geopandas as gpd
import rasterio.mask
import tempfile
import shapely
import re

from rasterio.features import rasterize, shapes
from rasterio.merge import merge
from shapely.geometry import Polygon, shape, box
from pathlib import Path
from datetime import datetime, timedelta

from sentinel2download.downloader import Sentinel2Downloader

In [None]:
BASE = f"/home/{os.getenv('NB_USER')}/work"

WORKDIR = os.path.join(BASE, "notebooks/planet/planet_vs_sentinel")
IMGDIR = os.path.join(WORKDIR, "planet_imagery")
RESULTS_DIR = os.path.join(BASE, "results/planet/planet_vs_sentinel")

### Read AOI

In [None]:
def read_aoi(path, crs="epsg:4326"):
    df = gpd.read_file(path) 
    if str(df.crs) != crs:
        print(f"{path}: {df.crs}. Transform to {crs}")
    df.to_crs(crs, inplace=True)
    return df

In [None]:
aoi_path = os.getenv("AOI", os.path.join(WORKDIR, "aoi/Horodnie_20191001_20191101.geojson")) 
# Pervukhynka_20190801_20190831.geojson 
# Horodnie_20191001_20191101.geojson
# Harkavets_20190501_20190518.geojson

aoi = read_aoi(aoi_path)

In [None]:
aoi

In [None]:
start_date = '2019-10-01'
end_date = '2019-11-01'

In [None]:
def filter_planet(grid, start_date, end_date):
    # grid.acquired = pd.to_datetime(grid.acquired)
    grid = grid.loc[(grid.ground_control == "1") & 
                    (grid.item_type == "PSScene4Band") &
                    (grid.quality_category == "standard") &
                    (grid.cloud_cover == 0) &
                    (grid.acquired >= start_date) &
                    (grid.acquired <= end_date) &
                    (grid.snow_ice_percent == 0) &
                    (grid.clear_percent >= 95)].reset_index(drop=True)
    return grid

In [None]:
def epsg_code(longitude, latitude):
    """
    Generates EPSG code from lon, lat
    :param longitude: float
    :param latitude: float
    :return: int, EPSG code
    """

    def _zone_number(lat, lon):
        if 56 <= lat < 64 and 3 <= lon < 12:
            return 32
        if 72 <= lat <= 84 and lon >= 0:
            if lon < 9:
                return 31
            elif lon < 21:
                return 33
            elif lon < 33:
                return 35
            elif lon < 42:
                return 37

        return int((lon + 180) / 6) + 1

    zone = _zone_number(latitude, longitude)

    if latitude > 0:
        return 32600 + zone
    else:
        return 32700 + zone

In [None]:
def _intersect(aoi, grid, limit=1):
    """
    Find all tiles that intersects given region with area >= limit km2
    :param limit: float, min intersection area in km2
    :return: (GeoDataFrame, epsg), precised intersected tiles and UTM zone code
    """

    
    # Get the indices of the tiles that are likely to be inside the bounding box of the given Polygon
    geometry = aoi.geometry[0]

    tiles_indexes = list(grid.sindex.intersection(geometry.bounds))
    intersected_grid = grid.loc[tiles_indexes]
    
    # print(intersected_grid)

    # Make the precise tiles in Polygon query
    intersected_grid = intersected_grid.loc[grid.intersects(geometry)]

    # intersection area
    epsg = epsg_code(geometry.centroid.x, geometry.centroid.y)

    # to UTM projection in meters
    aoi.to_crs(epsg=epsg, inplace=True)
    intersected_grid.to_crs(epsg=epsg, inplace=True)

    return intersected_grid, epsg

In [None]:
def _overlap_aoi_row(aoi, bbox_intersected, crs):
    
    result = list()

    intersected = bbox_intersected.copy()
    rest_aoi = gpd.GeoDataFrame([aoi], crs=crs)
    
    
    try:
        while rest_aoi.area.sum() > 0:
       
            res_intersection = gpd.overlay(rest_aoi, intersected, how="intersection") # intersection area with tiles
            biggest_area_id = res_intersection.area.argmax() # max intersected area
            # print("res_intersection")
            # print(res_intersection)

            tile_id = res_intersection.loc[biggest_area_id, "Name"]
            intersected_aoi = res_intersection.loc[biggest_area_id, "geometry"]
        
            # print(tile_id)
        
            result.append({"Name": tile_id, "geometry": intersected_aoi}) # store max intersected aoi and tile geometry
            biggest_tile = intersected.loc[intersected.Name == tile_id]

            rest_aoi = gpd.overlay(rest_aoi, biggest_tile, how="difference") # aoi minus biggest tile
            intersected = intersected[intersected.Name != tile_id] # remove used tile
    except Exception as ex:
        print(f"Error: {str(ex)}")
        
    return result

In [None]:
def overlap(aoi, grid):
    # first, find bbox intersection
    bbox = box(*aoi.total_bounds)
    bbox = gpd.GeoDataFrame(geometry=[box(*aoi.total_bounds)], crs=aoi.crs)
    
    bbox_intersected, epsg = _intersect(bbox, grid)
    # print(bbox_intersected)
    
    # precise intersection
    projected_aoi = aoi.copy().to_crs(epsg=epsg)
    
    results = list()
    for row in projected_aoi.itertuples():
        result = _overlap_aoi_row(row, bbox_intersected, projected_aoi.crs)
        results.extend(result)
        
    return gpd.GeoDataFrame(results, crs=epsg)

### 1. Planet images

In [None]:
planet_grid = gpd.read_file(os.path.join(BASE, "notebooks/planet/planet_grid.geojson"), driver="GeoJSON")
planet_grid.head(1)

In [None]:
planet_grid = filter_planet(planet_grid, start_date, end_date)

In [None]:
planet_grid = planet_grid[['id', 'acquired', 'geometry']].copy()
planet_grid.rename(columns={"id": "Name"}, inplace=True)

In [None]:
planet_grid.tail(3)

In [None]:
planet_tiles = overlap(aoi, planet_grid)

In [None]:
planet_tiles

In [None]:
planet_tiles = planet_tiles.merge(planet_grid[['Name', 'acquired']], on='Name')

In [None]:
planet_tiles

In [None]:
planet_tiles['date'] = planet_tiles.acquired.apply(lambda date:
                                                   datetime.strptime(date, '%Y-%m-%dT%H:%M:%S').strftime('%Y-%m-%d'))

In [None]:
planet_tiles

In [None]:
def crop(input_path, output_path, polygon, name=None, date=None):
    with rasterio.open(input_path) as src:
        out_image, out_transform = rasterio.mask.mask(src, [polygon], crop=True)
        out_meta = src.meta
                
        out_meta.update({"driver": "GTiff",
                 "height": out_image.shape[1],
                 "width": out_image.shape[2],
                 "transform": out_transform,
                 "nodata": 0, 
                 })
        
        # if out_meta['dtype'] == 'uint16':
        #    print(out_image)
        #    print(f"Image is in {out_meta['dtype']} type. Convert to uint8!")
        #    out_image = scale(out_image[0, :, :])
            
        #    out_image = out_image.astype(np.uint8)
        #    scaled = out_image
        #    out_meta.update({"dtype": rasterio.uint8, "count": 1})
    
        
    # print(out_meta)
    os.makedirs(os.path.dirname(output_path), exist_ok=True)
    with rasterio.open(output_path, "w", **out_meta) as dest:
        if name:
            dest.update_tags(name=name)
        if date:
            dest.update_tags(start_date=date, end_date=date)
        dest.write(out_image)

In [None]:
def create_mosaic(files_to_mosaic, results_dir, name, start_date=None, end_date=None):
    src_files_to_mosaic = list()
    for fp in files_to_mosaic:
        src = rasterio.open(fp)
        src_files_to_mosaic.append(src)
    
    # crs of first input will be used
    mosaic, out_trans = merge(src_files_to_mosaic)
    out_meta = src.meta.copy()

    # Update the metadata
    out_meta.update({"driver": "GTiff",
                     "height": mosaic.shape[1],
                     "width": mosaic.shape[2],
                     "transform": out_trans,
                     })
    
    out_fp = os.path.join(results_dir, f"{name}_mosaic.tif.temp") 
    # print(out_fp)
    os.makedirs(os.path.dirname(out_fp), exist_ok=True)
    
    with rasterio.open(out_fp, "w", **out_meta) as dest:
        if name:
            dest.update_tags(name=name)
        if start_date:
            dest.update_tags(start_date=start_date)
        if end_date:
            dest.update_tags(end_date=end_date)
        dest.write(mosaic)
        
    os.rename(out_fp, out_fp[:-5])

### Prepair Planet images for AOI

In [None]:
print("Start calculations...")
aoi_name = Path(aoi_path).stem
for row in planet_tiles.itertuples():
        
    name = f"{aoi_name}_{row.Name}"
        
    input_path = os.path.join(IMGDIR, f"PSScene/{row.Name}/visual/{row.Name}_3B_Visual.tif")
    print(input_path)
    output_path = os.path.join(RESULTS_DIR, f"{name}_planet_cropped.tif.temp")
    print(output_path)
        
    crop(input_path, output_path, row.geometry, name, row.date)
    os.rename(output_path, output_path[:-5])
    
print(f"Calculations finished")

### 2. Sentinel2 images

In [None]:
sentinel_grid_path = os.path.join(BASE, "notebooks/planet/sentinel2grid.geojson")

In [None]:
sentinel_grid = gpd.read_file(sentinel_grid_path)
sentinel_grid.head()

In [None]:
sentinel_tiles = overlap(aoi, sentinel_grid)
sentinel_tiles 

In [None]:
def shift_dates(start_date, end_date, delta=5, format='%Y-%m-%d'):
    start = datetime.strptime(start_date, format)
    end = datetime.strptime(end_date, format)
    
    start = start - timedelta(days=delta)
    end = end + timedelta(days=delta)
    
    return datetime.strftime(start, format), datetime.strftime(end, format)

In [None]:
start, end = shift_dates(planet_tiles.date.min(), planet_tiles.date.max()) 
start, end

#### Load Sentinel2 images

In [None]:
API_KEY = os.path.join(BASE, ".secret/sentinel2_google_api_key.json")
LOAD_DIR = os.path.join(BASE, "satellite_imagery")


BANDS = {'TCI', }
CONSTRAINTS = {'NODATA_PIXEL_PERCENTAGE': 10.0, 'CLOUDY_PIXEL_PERCENTAGE': 5.0, }
PRODUCT_TYPE = 'L2A'

In [None]:
def load_images(api_key, tiles, start_date, end_date, output_dir, product_type="L2A"):
    loader = Sentinel2Downloader(api_key)
    loadings = dict()
    for tile in tiles:
        print(f"Loading images for tile: {tile}...")
        loaded = loader.download(product_type,
                                 [tile],
                                 start_date=start_date,
                                 end_date=end_date,
                                 output_dir=output_dir,                       
                                 bands=BANDS,
                                constraints=CONSTRAINTS)
        
        print(f"Loading images for tile {tile} finished")
        loadings[tile] = loaded
    
    # tile_folders = dict()
    # for tile, tile_paths in loadings.items():
    #    tile_folders[tile] = {str(Path(tile_path[0]).parent) for tile_path in tile_paths}
    return loadings

In [None]:
loadings = load_images(API_KEY, sentinel_tiles.Name.values, start, end, LOAD_DIR, PRODUCT_TYPE)

In [None]:
loadings

In [None]:
def filter_date(loadings):
    def _find_last_date(folders):        
        dates = list()
        for folder in folders:        
            search = re.search(r"_(\d+)T\d+_", str(folder))
            date = search.group(1)
            date = datetime.strptime(date, '%Y%m%d')
            dates.append(date)    
        last_date = max(dates)
        last_date = datetime.strftime(last_date, '%Y%m%d')
        return last_date
    
    filtered = dict()
    for tile, items in loadings.items():
        try:
            last_date = _find_last_date(items)
            for file, _ in items:
                if "TCI_10m.jp2" in file and last_date in file:
                    filtered[tile] = file
        except Exception as ex:
            print(f"Error for {tile}: {str(ex)}")
    return filtered

In [None]:
filtered = filter_date(loadings)
filtered

In [None]:
if not filtered:
    raise ValueError("Images not loaded. Change dates or constraints")

### Prepair Sentinel2 images for AOI

In [None]:
with tempfile.TemporaryDirectory(dir=WORKDIR) as tmpdirname:        
    print(f"Сreated temporary directory for calculations: {tmpdirname}")    
    files_to_mosaic = list()
    
    aoi_name = Path(aoi_path).stem
    for row in sentinel_tiles.itertuples():
        
        name = f"{aoi_name}_{row.Name}_sentinel"
        
        input_path = filtered[row.Name]        
        output_path = os.path.join(tmpdirname, f"{name}_cropped.tif")
        
        crop(input_path, output_path, row.geometry)
        files_to_mosaic.append(output_path)
    
    create_mosaic(files_to_mosaic, RESULTS_DIR, name)
    print(f"Calculations finished")

#### Select (if needed) clearcuts from original file provided by V. Kharmtsov

In [None]:
aoi_name = Path(aoi_path).stem
clearcuts = gpd.read_file("original_clearcuts.geojson")

In [None]:
clearcuts.tail(2)

In [None]:
aoi

In [None]:
clearcuts = clearcuts.loc[(clearcuts.img_date >= start_date) & 
                          (clearcuts.img_date <= end_date)]
clearcuts.tail(2)

In [None]:
aoi_clearcuts = gpd.overlay(clearcuts, aoi, how="intersection") 
aoi_clearcuts.tail(2)

In [None]:
if not aoi_clearcuts.empty:
    aoi_clearcuts.to_file(os.path.join(WORKDIR, f"{aoi_name}_clearcuts.geojson"), driver='GeoJSON')