In [None]:
import pathlib
import math
import datetime

from ipyfilechooser import FileChooser
import rasterio
from rasterio.mask import mask
from shapely import geometry
from osgeo import gdal
import pandas as pd
import numpy as np

import opensarlab_lib as asfn

### Some Prerequisites

In [None]:
# Get working directory of notebook
CWD = pathlib.Path().absolute()
CWD

# Set some paths to be used later
!mkdir -p "{CWD}/work/"

ORIGINAL_DIR = CWD / "work/original"
!mkdir -p "{CWD}/work/original"

SUPERSET_DIR = CWD / "work/superset"
!mkdir -p "{CWD}/work/superset"

FLATTEN_DIR = CWD / "work/flatten"
!mkdir -p "{CWD}/work/flatten"

TILES_DIR = CWD / "work/tiles"
!mkdir -p "{CWD}/work/tiles"

RESULTS_DIR = CWD / "work/correlation"
!mkdir -p "{CWD}/work/correlation"

#### Clear out work directory

In [None]:
clear_out = input("Clear out work directory? (YES, NO)")

if clear_out == 'YES':
    !rm -rf "{CWD}/work/*"

### 1. Select VVs

Choose the parent directory of all child directories contain the stack of VVs

In [None]:
fc = FileChooser(f'{CWD}/data/')
fc.show_only_dirs = True
display(fc)

In [None]:
parent_directory = pathlib.Path(fc.selected_path).absolute()

# Find all VVs within
all_vv_paths = parent_directory.glob(f"**/*_VV.tif")
print(list(all_vv_paths))

# Move desired products to work directory.
for source_path in all_vv_paths:
    print(f"Copying {source_path} to {CWD}/work/original/{source_path.name}")
    !cp "{source_path}" "{CWD}/work/original/{source_path.name}"

### 2. Superset VVs

Scene frames have a tendency to move over time. This means that the extant coverage for the whole scene is always different per frame. For the cross-correlation to properly work and for more accurate comparison, all the scenes need to be "normalized" by increasing/decreasing the size of the square extant. 

From extant metadata, get the full superset coordinates for all stack scenes.

In [None]:
# Open all the tiffs and get overall coords.
superset = {
    'left': math.inf,
    'bottom': math.inf,
    'right': -math.inf,
    'top': -math.inf
}

# The SRS is set to the first raster. It is assumed that the SRSs are the same (or close enough) for all.
output_srs = None

vv_original_paths = pathlib.Path(f"{CWD}/work/original").glob(f"*_VV.tif")

for i, original_path in enumerate(vv_original_paths):

    raster = rasterio.open(original_path)    
    raster_bounds = raster.bounds
    print(raster_bounds)
    
    if i == 0:
        output_srs = raster.crs
    
    superset = {
        'left': min(superset['left'], raster_bounds.left),
        'bottom': min(superset['bottom'], raster_bounds.bottom), 
        'right': max(superset['right'], raster_bounds.right), 
        'top': max(superset['top'], raster_bounds.top)
    }

print(f"Superset box coords: {superset}")
print(f"Output SRS: {output_srs}")

In [None]:
output_bounds = (
            superset['left'], 
            superset['bottom'],
            superset['right'],
            superset['top'],
        )

print(f"Output bounds (superset) set to '{output_bounds}'")
print(f"Output SRS set to '{output_srs}'")

# Superset and save VVs
vv_original_paths = pathlib.Path(f"{CWD}/work/original").glob(f"*_VV.tif")
for original_path in vv_original_paths:
    
    superset_path = pathlib.Path(str(original_path).replace('original', 'superset'))
    print(f"Taking {original_path} and supersetting to {superset_path}")
    
    gdal.Warp(
        str(superset_path),
        str(original_path), 
        outputBounds=output_bounds,
        outputBoundsSRS=output_srs
    )
    

### 3. Flatten and Save VVs

Often the VVs have extraneous high and low values that make matching difficult. So we need to get rid of these and save the intermediate results.

In [None]:
def flatten(df: pd.DataFrame) -> pd.DataFrame:
    """
    Truncated values become NaNs
    """
    df[df < np.nanpercentile(df, 1)] = np.nan
    df[df > np.nanpercentile(df, 99)] = np.nan
    return df

# Flatten and save VVs
superset_vv_paths = pathlib.Path(f"{CWD}/work/superset").glob(f"*_VV.tif")

for superset_path in superset_vv_paths:
    print(f"Flattening {superset_path}")
    
    # Convert raster to dataframe
    raster = rasterio.open(superset_path)
    raster_metadata = raster.meta

    raster0 = raster.read(1)
    df_superset = pd.DataFrame(raster0)
    
    # Flatten raster data
    df_flatten = flatten(df_superset)
    
    flatten_path = pathlib.Path(str(superset_path).replace('superset', 'flatten'))
    
    with rasterio.open(flatten_path, 'w', **raster_metadata) as out:
        out.write(df_flatten, 1)

### 4. Tile and Save VVs

In [None]:
X_NUM = 8
Y_NUM = 8

# https://gis.stackexchange.com/a/306862
# Takes a Rasterio dataset and splits it into squares of dimensions squareDim * squareDim
def splitImageIntoCells(input_file: str, output_dir: str, x_num=1, y_num=1):    

    raster = rasterio.open(input_file)
    
    x_dim = raster.shape[1] // x_num
    y_dim = raster.shape[0] // y_num

    x, y = 0, 0
    for y_iter in range(y_num):
        y = y_iter * y_dim
        for x_iter in range(x_num):
            x = x_iter * x_dim
            
            input_filestem = pathlib.Path(input_file).stem
            
            output_file = f'{input_filestem}_{y_iter}_{x_iter}.tif'
            print(f"Creating tile {output_file}...")
            
            # Get tile geometry
            corner1 = raster.transform * (x, y)
            corner2 = raster.transform * (x + x_dim, y + y_dim)
            geom = geometry.box(corner1[0], corner1[1], corner2[0], corner2[1])
            
            # Get cell 
            crop, cropTransform = mask(raster, [geom], crop=True)
            raster.meta.update(
                {
                    "driver": "GTiff",
                    "height": crop.shape[1],
                    "width": crop.shape[2],
                    "transform": cropTransform,
                    "crs": raster.crs
                }
            )
            
            # If crop tile is more than 10% NANs, make whole tile NANs
            # ??
            
            output_filepath = f"{output_dir}/{output_file}"
            with rasterio.open(output_filepath, "w", **raster.meta) as out:
                out.write(crop)

In [None]:
flatten_vv_paths = pathlib.Path(f"{CWD}/work/flatten").glob(f"*_VV.tif")

start_time = datetime.now()

for flatten_path in flatten_vv_paths:
    print(f"Tileing {flatten_path}")
    splitImageIntoCells(flatten_path, f"{CWD}/work/tiles", x_num=X_NUM, y_num=Y_NUM)
    
end_time = datetime.now()
print(f"\nEnd time is {end_time}")
print(f"Time elapsed is {end_time - start_time}\n")  

### 5. Correlate Tiles and Save Results

In [None]:
tiled_vv_paths = pathlib.Path(f"{CWD}/work/tiled").glob(f"*_VV.tif")

start_time = datetime.datetime.now()

def order_tiles():
    return None

for tiled_path in tiled_vv_paths:
    print(f"Correlating {tiled_path}")
    
    #shift, error, phase = phase_cross_correlation(
    #    df_ref.replace(np.nan, 0), 
    #    df_sec.replace(np.nan, 0),
    #    normalization=None
    #)
    
    # Put results into files to read for analysis
    
end_time = datetime.datetime.now()
print(f"\nEnd time is {end_time}")
print(f"Time elapsed is {end_time - start_time}\n")  

### 6. Do Anaylsis on Tiles

In [None]:
# Put results into 3D Pandas dataset

# Do analysis:
# 1. Slice mean and stdev
# 2. Temporeal trend mean and stdev
# 3. Global mean and stdev