# **OPERA RTC CalVal Mosaics**

Author: Alex Lewandowski

**This notebook prepares RTC-ADT CalVal v0.4 release mosaics from samples bursts located in `s3://opera-pst-rs-pop1/products/RTC_S1/`**

1. Input a Sentinel-1 scene ID
1. Identify burst IDs using ASF burst extractor
1. Download bursts
1. Reproject bursts to predominant UTM
1. Mosaic bursts

In [None]:
from collections import Counter
import boto3
from datetime import datetime
from ipyfilechooser import FileChooser
from pathlib import Path
import re
import requests
from tqdm.auto import tqdm 
from typing import Union, List, Dict

from ipywidgets import Layout

import numpy as np
from osgeo import gdal
gdal.UseExceptions()

import asf_search as disco

### **1. Input S1 scene name and identify bursts**

Currently (08-16-2023) v0.4 processed sample burst data are available for scenes in this spreadsheet: https://docs.google.com/spreadsheets/d/1tdj59dZpyx_BFO1NDc46CnW1Bt9UhNs2OK-hTUdhLh0/edit?usp=sharing

In [None]:
session = requests.Session()

scene = input("Enter a Sentinel-1 granule ID")
polarizations = ['VV', 'VH']
beam_modes = ['IW1', 'IW2', 'IW3']
bursts = range(15)

orbital_path = f"T{disco.granule_search(scene)[1].properties['pathNumber']:03d}"

run = True
while run:
    opera_bursts = []
    run = False
    for beam_mode in beam_modes:
        for polarization in polarizations:
            for burst in bursts:
                url = f'https://sentinel1-burst.asf.alaska.edu/{scene}/{beam_mode}/{polarization}/{burst}.xml'
                response = session.get(url)
                status = response.status_code
                if status != 404:
                    print(f'{url} {status}')
                    if status == 200:
                        print(response.headers['content-disposition'])
                        burst_swath = '-'.join(response.headers['content-disposition'].split("filename=")[-1].split('_')[1:3])
                        opera_bursts.append(f"OPERA_L2_RTC-S1_{orbital_path}-{burst_swath}")
                    elif status == 202:
                        run = True

In [None]:
len(opera_bursts)

### **2. Create data directories**

In [None]:
print("Select the directory in which to store your OPERA data")
fc = FileChooser(Path.cwd(), layout=Layout(width='1000px'))
display(fc)

In [None]:
opera_dir = Path(fc.selected)
scene_dir = opera_dir/f"OPERA_RTC_{scene}"
vh_dir = scene_dir/"vh"
vv_dir = scene_dir/"vv"
inc_angle_dir = scene_dir/"inc_angle"
local_inc_angle_dir = scene_dir/"local_inc_angle_dir"
ls_mask_dir = scene_dir/"ls_mask_dir"
mosaic_dir = scene_dir/f"OPERA_RTC_mosaic_{scene}"

for d in [scene_dir, vh_dir, vv_dir, inc_angle_dir, local_inc_angle_dir, ls_mask_dir, mosaic_dir]:
    d.mkdir(exist_ok=True)

### **3. Build dictionary of burst level backscatter and static file S3 URIs**

In [None]:
s1_date_regex = '(?<=_)\d{8}(?=T\d{6}_\d{8}T\d{6})'
s1_date_str = re.search(s1_date_regex, scene)
if s1_date_str:
    s1_date_str = s1_date_str.group(0)
else:
    raise Exception("date string not found in Sentinel-1 scene name")
s1_date_str

In [None]:
bucket_name = 'opera-pst-rs-pop1'
s3 = boto3.resource('s3')
my_bucket = s3.Bucket(bucket_name)

opera_ds = {
    'vv': {'dst_dir': vv_dir, 's3_uri': []},
    'vh': {'dst_dir': vh_dir, 's3_uri': []},
    'inc_angle': {'dst_dir': inc_angle_dir, 's3_uri': []},
    'local_inc_angle': {'dst_dir': local_inc_angle_dir, 's3_uri': []},
    'ls_mask': {'dst_dir': ls_mask_dir, 's3_uri': []}
}

vh_regex = '.*_VH.tif$'
vv_regex = '.*_VV.tif$'
ls_regex = '.*_static_layers/.*v0.4_static_layover_shadow_mask.tif$'
inc_angle_regex = '.*_static_layers/.*v0.4_static_incidence_angle.tif$'
local_inc_angle_regex = '.*_static_layers/.*v0.4_static_local_incidence_angle.tif$'

for burst in tqdm(opera_bursts):  
    for object_summary in my_bucket.objects.filter(Prefix=f"products/RTC_S1/{burst}"):
        if s1_date_str in object_summary.key and re.search(vh_regex, object_summary.key):
            opera_ds['vh']['s3_uri'].append(f"s3://{bucket_name}/{object_summary.key}")
        elif s1_date_str in object_summary.key and re.search(vv_regex, object_summary.key):
            opera_ds['vv']['s3_uri'].append(f"s3://{bucket_name}/{object_summary.key}")
        elif re.search(ls_regex, object_summary.key):
            opera_ds['ls_mask']['s3_uri'].append(f"s3://{bucket_name}/{object_summary.key}")
        elif re.search(inc_angle_regex, object_summary.key):
            opera_ds['inc_angle']['s3_uri'].append(f"s3://{bucket_name}/{object_summary.key}")
        elif re.search(local_inc_angle_regex, object_summary.key):
            opera_ds['local_inc_angle']['s3_uri'].append(f"s3://{bucket_name}/{object_summary.key}")

### **4. Filter duplicated bursts for those most recently processed**

The bucket contains bursts from multiple processing runs, some of which used different orbit data. The August 3, 2023 processing run used precise orbits.

In [None]:
def burst_id_from_path(pth):
    burst_id_regex = '(?<=S1_)T\d{3}-\d{6}-IW(1|2|3)'
    results = re.search(burst_id_regex, pth)
    if results:
        return results.group(0)
    else:
        raise Exception(f"No burst ID found in path: {pth}")
        
def acquisition_timestamp_from_path(pth):
    s1_date_regex = '(?<=_)\d{8}T\d{6}Z(?=_\d{8}T\d{6})'
    timestamp_str = re.search(s1_date_regex, pth)
    if timestamp_str:
        return timestamp_str.group(0)
    else:
        raise Exception(f"acquisition timestamp string not found in Sentinel-1 path: {pth}")
        
def burst_id_acquisition_timestamp_from_path(pth):
    burst_id_regex = '(?<=S1_)T\d{3}-\d{6}-IW(1|2|3)_\d{8}T\d{6}Z(?=_\d{8}T\d{6})'
    results = re.search(burst_id_regex, pth)
    if results:
        return results.group(0)
    else:
        raise Exception(f'No "burst-ID_timestamp"  found in path: {pth}')
        


def processing_timestamp_from_path(pth):
    s1_date_regex = '(?<=_\d{8}T\d{6}Z_)\d{8}T\d{6}Z(?=_S1)'
    timestamp_str = re.search(s1_date_regex, pth)
    if timestamp_str:
        return timestamp_str.group(0)
    else:
        raise Exception(f"processing timestamp string not found in Sentinel-1 path: {pth}")

In [None]:
def most_recently_processed_burst_filter(path_list):
    burst_dict = {}
    for p in path_list:
        burst_ts = burst_id_acquisition_timestamp_from_path(p)
        processing_ts = processing_timestamp_from_path(p)
        try:
            dt_str = processing_timestamp_from_path(burst_dict[burst_ts])
            if datetime.strptime(dt_str, "%Y%m%dT%H%M%SZ") < datetime.strptime(processing_ts, "%Y%m%dT%H%M%SZ"):
                burst_dict[burst_ts] = p
        except KeyError:
            burst_dict[burst_ts] = p
    return [v for v in burst_dict.values()]
        

In [None]:
%%capture
for ds in opera_ds:
    opera_ds[ds]['s3_uri'] = most_recently_processed_burst_filter(opera_ds[ds]['s3_uri'])

In [None]:
for ds in opera_ds:
    print(f"{ds} length: {len(opera_ds[ds]['s3_uri'])}")

### **5. Download the data**

In [None]:
for ds in tqdm(opera_ds):
    for burst in opera_ds[ds]['s3_uri']:
        !aws s3 cp {burst} {opera_ds[ds]['dst_dir']}/{burst.split('/')[-1]}

### **6. Reproject all bursts (backscatter and static files) to predominant UTM**

In [None]:
def get_projection(img_path: Union[Path, str]) -> Union[str, None]:
    """
    Takes: a string or posix path to a product in a UTM projection

    Returns: the projection (as a string) or None if none found
    """
    img_path = str(img_path)
    try:
        info = gdal.Info(img_path, format='json')['coordinateSystem']['wkt']
    except KeyError:
        return None
    except TypeError:
        raise FileNotFoundError

    regex = 'ID\["EPSG",[0-9]{4,5}\]\]$'
    results = re.search(regex, info)
    if results:
        return results.group(0).split(',')[1][:-2]
    else:
        return None
    
def get_projections(tiff_paths: List[Union[Path, str]]) -> Dict:
    """
    Takes: List of string or posix paths to geotiffs
    
    Returns: Dictionary key: epsg, value: number of tiffs in that epsg 
    """
    epsgs = []
    for p in tiff_paths:
        epsgs.append(get_projection(p))

    epsgs = dict(Counter(epsgs))
    return epsgs

In [None]:
vv_paths = list(vv_dir.glob("*VV.tif"))
vh_paths = list(vh_dir.glob("*VH.tif"))
local_inc_angle_paths = list(local_inc_angle_dir.glob("*static_local_incidence_angle.tif"))
inc_angle_paths = list(inc_angle_dir.glob("*static_incidence_angle.tif"))
ls_mask_paths = list(ls_mask_dir.glob("*static_layover_shadow_mask.tif"))

epsgs = get_projections(vh_paths)
predominant_epsg = None if len(epsgs) == 1 else max(epsgs, key=epsgs.get)

In [None]:
epsgs

In [None]:
predominant_epsg

In [None]:
def get_res(tiff):
    tiff = str(tiff)
    f =  gdal.Open(tiff)
    return f.GetGeoTransform()[1] 

def get_no_data_val(pth):
    pth = str(pth)
    f = gdal.Open(str(pth))
    return np.nan if not f.GetRasterBand(1).GetNoDataValue() else f.GetRasterBand(1).GetNoDataValue()

In [None]:
if predominant_epsg:
    for ds in [vh_paths, vv_paths, local_inc_angle_paths, inc_angle_paths, ls_mask_paths]:
        for pth in ds:
            src_SRS = get_projection(str(pth))
            if src_SRS != predominant_epsg:
                res = get_res(pth)
                
                no_data_val = get_no_data_val(pth)
                
                temp = pth.parent/f"temp_{pth.stem}.tif"
                pth.rename(temp)

                warp_options = {
                    "dstSRS":f"EPSG:{predominant_epsg}", "srcSRS":f"EPSG:{src_SRS}",
                    "targetAlignedPixels":True,
                    "xRes":res, "yRes":res,
                    "dstNodata": no_data_val
                }
                gdal.Warp(str(pth), str(temp), **warp_options)
                temp.unlink()

## **7. Merge bursts to create mosaics**

In [None]:
ds_pth_dict = {
    'vh': vh_paths,
    'vv': vv_paths,
    'inc_angle': inc_angle_paths,
    'local_inc_angle': local_inc_angle_paths,
    'ls_mask': ls_mask_paths
}

for ds in tqdm(ds_pth_dict):
    merge_str = ''
    for pth in ds_pth_dict[ds]:
        merge_str = f"{merge_str} {str(pth)}"

    no_data_val = get_no_data_val(ds_pth_dict[ds][0])


    output = mosaic_dir/f"OPERA_RTC_v0.4_{ds}_{scene}_mosaic.tif"
    merge_command = f"gdal_merge.py -n {no_data_val} -a_nodata {no_data_val} -o {output} {merge_str}"
    print(merge_command)
    !{merge_command}

# If your burst was improperly named by ESA, you can find the correct ID using the cell below

Note: the cell below is for demonstration purposes and would require editing for a different burst

In [None]:
import s1reader as s1
from datetime import datetime as dt

b = s1.s1_burst_id.S1BurstId('1', '002167', 'IW1')

sensing_time = dt.strptime("2021-04-19T18:01:02.204673", "%Y-%m-%dT%H:%M:%S.%f")
ascending_node_dt = dt.strptime("2021-04-19T16:21:50.499478", "%Y-%m-%dT%H:%M:%S.%f")

In [None]:
b.from_burst_params(sensing_time, ascending_node_dt, 1, 1, 'IW1')