# OPERA-RTC

In [None]:
import os
from pathlib import Path
import warnings
import geopandas as gpd
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import rasterio
import xarray as xr
import asf_search as asf

#aws-related imports
from getpass import getpass
import json
import urllib.request

from vegmapper import s1
from vegmapper import pathurl

plt.rcParams['font.size'] = 18
plt.rcParams['figure.figsize'] = [16, 12]

## User Inputs
In the cell below, make sure to provide the following inputs
- **Define** site name.
- **Locate** or create a project directory.
- **Define** the path of the site boundary layer.
  - Currently expected to be inside the project directory
- **Define** the path to the boundary tile if needed (should be automatically created inside the project directory)
- **Define** the start and end times of interest
- **Provide** NASA Earth Data token file location 

In [None]:
# Site name
sitename = 'ucayali'

# Project directory (local path or cloud bucket URL)
proj_dir = './ucayali'

# AOI file
aoifile = f'{proj_dir}/ucayali_boundary.geojson'

# Reference tiles
tiles = f'{proj_dir}/{sitename}_tiles.geojson'

# Start and end dates of interest
start_date = '2024-06-01' # june-sep measure the time it take to average for a burst first for a burst and later for ucayali 
end_date = '2024-10-01'

# Sentinel-1 paltform S1A or S1B, S1 for both
platform = 'S1'

# RTC directory
rtc_dir = f'{proj_dir}/opera_rtc'

# NASA Earth Data token
ned_token = f'{proj_dir}/ned.txt'

In [None]:
# check project directory
proj_dir_path = Path(proj_dir)
# Create the directory if it doesn't exist
if not proj_dir_path.exists():
    print(f"The project diretory: {proj_dir_path}, was not found. Please make sure its correct")
else:
    print(f"Current project directory: {proj_dir_path}")

# verify that the aoifile exits.
warnings.simplefilter("always", UserWarning)
aoifile_path = Path(aoifile)
tiles_path = Path(tiles)
if aoifile_path.is_file() or tiles_path.is_file():
    print(f"Boundary file found: {aoifile_path}")
    print(f"Tiles file found: {tiles_path}")
else:
    # Raise a warning if the file does not exist
    warnings.warn(
        f"Boundary file {aoifile_path} or {tiles_path} was not found or does not exist, "
        "please make sure the file paths are correct before continuing",
        UserWarning
    )

## Processing steps 
1. [Granule Search](#1.-Granule-Search)
2. [Access RTC data and form temporal average](#2.-Access-RTC-data-and-form-temporal-average)
3. [Run RTC load + temp average](#3.-Run-RTC-load-+-temp-average)
4. [Create VRT tiles](#4.-Create-VRT-tiles)
5. [Export tiles to an S3 bucket](#5.-Export-tiles-to-an-S3-bucket)

In [None]:
# Plot AOI
gdf_aoi = gpd.read_file(aoifile)
gdf_aoi.plot(figsize=(10, 10))

## 1. Granule Search

ASF DAAC uses *granules* and *scenes* interchangeably to refer to a Sentinel-1 product temporally and geographically, whereas *frames* are used to refer to the geolocation only for a Sentinel-1 product. The naming convention for a Sentinel-1 granule can be found [here](https://asf.alaska.edu/data-sets/sar-data-sets/sentinel-1/sentinel-1-data-and-imagery/). Each *frame* can be uniquely identified by a pair of *path* and *frame* numbers. In this section, we will search for Sentinel-1 granules that intersect with AOI and were acquired between the start and end dates.

### `s1.search_granules`

```
s1.search_granules(sitename, aoifile, start_date, end_date, skim=True, **search_opts)
```

Paremeters:

|Paremeters|Description|Required|Default|
|----|----|----|----|
|sitename|Site name|Yes||
|aoifile|AOI file in vector-based spatial data format (shapefile, GeoJSON, ...)|Yes||
|start_date|Start date (YYYY-MM-DD)|Yes||
|end_date|End date (YYYY-MM-DD)|Yes||
|skim|Skim the search results so only the frames that just cover the AOI are retained|No|True|
|search_opts|Search options for ASF Python module (asf_search). See [here](https://docs.asf.alaska.edu/asf_search/searching/).|No|True|

Returns:

|Returns|Description|
|----|----|
|gdf_granules|A GeoDataFrame containing all searched granules along with their detailed properties|
|gdf_frames|A GeoDataFrame of `gdf_granules` grouped by frames.|

In [None]:
# If the query has been ran before load the query instead of running it again.
granule_query_file = f"{proj_dir}/{sitename}_granules.shp"
if os.path.exists(granule_query_file):
    print(f"The file {granule_query_file} exists. Using existing file for current run.")
    print("- To run a new query please delete the current one in the project directory")
    gdf_granules = gpd.read_file(granule_query_file)
else:
    print(f"The file {granule_query_file} does not exist. Running query")
    # Here we search for Sentinel-1 OPERA-RTC products acquired with Interferometric Wide (IW) beam mode and both VV and VH polarizations.
    search_opts = {
        'dataset': asf.DATASET.OPERA_S1
    }
    gdf_granules = s1.search_granules(sitename, aoifile, start_date, end_date, skim=True, **search_opts)
    # export 
    gdf_granules.to_file(granule_query_file, driver="ESRI Shapefile")

In [None]:
gdf_granules

## Granules found over study area 

In [None]:
# Read in tiles
gdf_tiles = gpd.read_file(tiles).to_crs(epsg=4326)

# Plot search results
ax = gdf_aoi.plot(figsize=(10, 10))
gdf_granules.boundary.plot(ax=ax, color='red')
gdf_tiles.boundary.plot(ax=ax, color='black')
if (gdf_tiles['mask'] == 0).any():
    ax = gdf_tiles[gdf_tiles['mask'] == 0].plot(ax=ax, color='gray')

## Burst count overview

In [None]:
# Generate burst summary geodataframe and include the flightpath
# Get RTC burst id
gdf_granules['burst_id'] = gdf_granules['fileID'].str[:31]

# Group by burst id and count number of bursts per burst_id
burst_counts = gdf_granules.groupby('burst_id').size().reset_index(name='count')

# Group by burst id and aggregate the geometries
grouped_bursts = gdf_granules.groupby('burst_id').agg({
    'geometry': lambda x: x.union_all(),
    'pathNumber': lambda x: x.mode() if x.mode().size > 0 else x.iloc[0]
})

# Reset index to convert the resulting Series to DataFrame
grouped_bursts = grouped_bursts.reset_index()

# Merge the grouped geometries and counts back to fileID_grouped_counts
burst_summary = pd.merge(burst_counts, grouped_bursts, on='burst_id', how='left')

# Convert the DataFrame to a GeoDataFrame
burst_summary_gdf = gpd.GeoDataFrame(burst_summary, geometry='geometry')

# Ensure CRS is set before exporting
if burst_summary_gdf.crs is None:
    burst_summary_gdf.set_crs(gdf_granules.crs, allow_override=True, inplace=True) 

# export 
burst_summary_gdf.to_file(f"{proj_dir}/{sitename}_burst_summary.geojson", driver="GeoJSON")
burst_summary_gdf

In [None]:
# Plotting acquisition summary
plt.figure(figsize=(10, 6))
plt.bar(burst_summary_gdf.index, burst_summary_gdf['count'])
plt.xlabel('Burst index number (grouped by burst ID)')
plt.ylabel('Acquisitions per burst')
plt.title('Burst acquisition summary')
plt.tight_layout()
plt.show()

In [None]:
# get burst summary by count
count_list = burst_summary_gdf['count'].unique()

# Filter the GeoDataFrame to extract rows where count is equal to the indicated value
def filter_count_gdf(in_df, value):
    filtered_gdf = in_df[in_df['count'] == value]
    
    return filtered_gdf 

# extract geometries to use in plot
burst_count_gdfs = []
for count in count_list:
    burst_count_gdfs.append(gpd.GeoDataFrame((filter_count_gdf(burst_summary_gdf, count)), geometry='geometry'))

In [None]:
# Plot search results
cmap = plt.colormaps.get_cmap('Set2')
ax = gdf_aoi.plot(figsize=(10, 10))
for i, count_gdf in enumerate(burst_count_gdfs):
    count_gdf.boundary.plot(ax=ax, color=cmap(i))
gdf_tiles.boundary.plot(ax=ax, color='black')
if (gdf_tiles['mask'] == 0).any():
    ax = gdf_tiles[gdf_tiles['mask'] == 0].plot(ax=ax, color='gray')

## 2. Access RTC data and form temporal average

- The following cells will generate temporary credentials to access the ASF data bucket. 
- This will allow us to retrive the desired OPERA-RTC products.
- A free Earthdata login account is required.
-  Go to the link below and create one if you haven't done that yet:
    -  [Create Earthdata account](https://www.earthdata.nasa.gov/eosdis/science-system-description/eosdis-components/earthdata-login)
-  Once your account, login to Earthdata using your credentials.
-  Now we should generate a Bearer Token to access the data bucket following th einstructions below.
    - [Instructions for creating an EDL Bearer Token](https://urs.earthdata.nasa.gov/documentation/for_users/user_token)
-  Copy your Bearer Token and paste it when asked in the cell below.    

## Request S3 credencials
**Enter your Earthdata Login Bearer Token**

The cells below will create temporary credentials to access the OPERA-RTC data. 

In [None]:
# Read the token from a file instead of typing it
with open(ned_token, "r") as file:
    token = file.read().strip()
print("Token successfully loaded from file.")

In [None]:
prefix = "OPERA_L2_RTC-S1" 

In [None]:
event = {
    "CredentialsEndpoint": "https://cumulus.asf.alaska.edu/s3credentials",
    "BearerToken": token,
    "Bucket": "asf-cumulus-prod-opera-products",
    "Prefix": prefix,
    "StaticPrefix": f"{prefix}_STATIC"
}

In [None]:
# Get temporary download credentials
tea_url = event["CredentialsEndpoint"]
bearer_token = event["BearerToken"]
req = urllib.request.Request(
    url=tea_url,
    headers={"Authorization": f"Bearer {bearer_token}"}
)
with urllib.request.urlopen(req) as f:
    creds = json.loads(f.read().decode())

## 3. Run RTC load + temp average 

In this section we will estimate the temporal mean for each available burst.
  
**Required inputs:**
- List of burst id's 
- Geodataframe with granule data
- Earthdata temporary credentials

`Outputs will be stored as:`
- opera_rtc/burstID_tmean_polarization.tif

`For example:` 
- opera_rtc/OPERA_L2_RTC-S1_T025-052685-IW1_tmean_VV.tif

In [None]:
# Get a list of the available burstID to loop over their available data
burst_id_list = burst_summary_gdf['burst_id'].unique().tolist()

# Extract dates for file name 
s_date = start_date.replace('-', '')
e_date = end_date.replace('-', '')

# Create temporal mean for all available bursts
s1.run_rtc_temp_mean(burst_id_list, 
                     gdf_granules, 
                     creds, 
                     event, 
                     rtc_dir,
                     s_date,
                     e_date,
                    )

#### Display a sample

In [None]:
# Display a sample temporal mean
with rasterio.open(f'{rtc_dir}/{burst_id_list[0]}_tmean_{s_date}_{e_date}_VV.tif') as dset:
    VV1 = dset.read(1)

In [None]:
plt.imshow(VV1, vmin=0, vmax=0.5, cmap='Greys')

## 4. Create VRT tiles
In this section we will generate virtual raster tiles using the predefined reference tiles.

In [None]:
# First map the burst to tiles
burst_tile_gdf = s1.map_burst2tile(tiles, burst_summary_gdf, rtc_dir)

# use the table above to run the main flow
tile_info = s1.check_tiles_exist(burst_tile_gdf, f"{rtc_dir}/tile_vrts", sitename, s_date, e_date)
if tile_info != 'All tiles exist':
    print('---> Creating tiles')
    s1.build_opera_vrt(burst_tile_gdf, 
                       rtc_dir,
                       sitename,
                       s_date, 
                       e_date,
                      )

### Display a sample tile

In [None]:
with rasterio.open(f'{rtc_dir}/tile_vrts/s1_tile_{sitename}_{s_date}_{e_date}_h1_v1_VV.tif') as dset:
    VV = dset.read(1)
VV[VV == 0] = np.nan

In [None]:
# Plot VV of tile h1v1
plt.imshow(VV, vmin=0, vmax=0.5, cmap='Greys')

### Create RVI for each tile
The Radar Vegetation Index (RVI) is defined as:

$RVI = \frac{4CX}{CO + CX}$

where $CO$ and $CX$ are the co-polarized (VV or HH) and cross-polarized (VH or HV) radar backscatter in linear scale, respectively.

In [None]:
s1.compute_rvi_tiles(f"{rtc_dir}/tile_vrts", sitename, s_date, e_date)

### Verify that all tiles have been created

In [None]:
# Check that all tiles exist 
_ = s1.check_tiles_exist(burst_tile_gdf, f"{rtc_dir}/tile_vrts", sitename, s_date, e_date)

In [None]:
# Delete temporal averages if not needed anymore
# for rtc_path in glob.glob(f"{rtc_dir}/*_tmean_*.tif"):
#     os.remove(rtc_path)
#     print(f"Deleted: {rtc_path}")

In [None]:
# Create vrt mosaics 
s1.create_vrt_mosaic(f"{rtc_dir}/tile_vrts", sitename, s_date, e_date)

In [None]:
with rasterio.open(f"{rtc_dir}/tile_vrts/s1_tile_mosaic_{sitename}_{s_date}_{e_date}_VV.vrt") as dset:
    mosaic = dset.read(1)
mosaic[mosaic == 0] = np.nan

In [None]:
# Plot VV of tile h1v1
plt.imshow(mosaic, vmin=0, vmax=0.5, cmap='Greys')

## 5. Export tiles to an S3 bucket

In [None]:
# Upload file to a bucket. 
import boto3
# S3 bucket name
bucket_name = "your-s3-bucket-name"

# Local folder containing files to upload
local_folder = "/path/to/your/files"

# Directory name in the S3 bucket where files will be uploaded
s3_directory = "your-target-directory"

# Initialize S3 client
s3_client = boto3.client('s3')

# Iterate through all files in the folder
for root, dirs, files in os.walk(local_folder):
    for file in files:
        # Check if the file starts with 'tile'
        if file.startswith("tile"):
            # Full local file path
            file_path = os.path.join(root, file)

            # Create the S3 key with the specified directory
            s3_key = f"{s3_directory}/{file}"

            # Upload file
            s3_client.upload_file(file_path, bucket_name, s3_key)
            print(f"Uploaded {file_path} to s3://{bucket_name}/{s3_key}")

In [None]:
# List contents in the specified S3 directory
response = s3_client.list_objects_v2(Bucket=bucket_name, Prefix=f"{s3_directory}/")

if 'Contents' in response:
    print(f"Files in s3://{bucket_name}/{s3_directory}/:")
    for obj in response['Contents']:
        print(obj['Key'])  # Print the full S3 key of each object
else:
    print(f"No files found in s3://{bucket_name}/{s3_directory}/")