In [None]:
# Main notebook task
# Read RTC and create a 3 by 3 data collection. 
# Store into a csv and plot

In [None]:
%matplotlib inline
import os
import ast
import geopandas as gpd
import pandas as pd
import numpy as np
from val_io import get_burst_time_series_around_point
import matplotlib.pyplot as plt

In [None]:
# Function to convert string
def convert_to_list(v):
    if isinstance(v, str):
        return ast.literal_eval(v)
    return v

### Load necessary tables

In [None]:
# Load sites
df_sites = gpd.read_file('https://github.com/taliboliver/dist-s1-validation-harness/blob/dev/data/val_sites_subset.geojson?raw=true')

# Read busts
df_val_bursts = gpd.read_parquet('./validation_bursts_v1_coverage_updated.parquet')

# Get a list of site ID's
unique_site_ids = df_sites['site_id'].unique().tolist()
print(len(unique_site_ids))
print(unique_site_ids)

### Run for all sites
- The cell below will iterate over all sites and their respective overlapping bursts. 
- It will generate a csv table using a 3x3 window centered at the particular site latitude and longitude.
- The script will read the output csv and create a png plot.
- The outputs are directed to the /tables and /plots directories inside the predifined out_dir. Please ensure this two directories exist.   

In [None]:
# Define run parameters 
out_dir = '/Users/cabrera/Documents/jpl_projects/opera_dist/dist-s1-research/oliver/rtc_analysis'
create_plot = 'yes'

In [None]:
# loop over sites
for SITE_ID in unique_site_ids:
    # Read site
    df_site = df_sites[df_sites.site_id == SITE_ID].reset_index(drop=True)

    # Load site geometry
    geo = df_site.geometry[0]
    lon, lat = geo.x, geo.y

    # For a selected site find corresponding bursts
    df_bursts_for_site = df_val_bursts[df_val_bursts.jpl_burst_id.isin(df_site.jpl_burst_id)].reset_index(drop=True)

    # Loop over bursts and read data
    for IDX_BURST in range(len(df_bursts_for_site)):
        # Get RTC data per burst
        BURST_ID = df_bursts_for_site.iloc[IDX_BURST].jpl_burst_id

        # check if the table exists already 
        out_csv = f'{out_dir}/tables/rtc_summary_site_{SITE_ID}_burst_{BURST_ID}.csv'
        if os.path.exists(out_csv):
            print(f"File '{out_csv}' exists! Skipping...")
            continue

        df_rtc = pd.read_json('data/rtc_s1_table.json.zip')
        df_rtc_ts = df_rtc[df_rtc.jpl_burst_id == BURST_ID].reset_index(drop=True)
        df_rtc_ts['acq_datetime'] = pd.to_datetime(df_rtc_ts['acq_datetime'])

        # Load arrays
        vv_arrs = get_burst_time_series_around_point(df_rtc_ts.rtc_s1_vv_url.tolist(), lon, lat, window_size=3)
        vh_arrs = get_burst_time_series_around_point(df_rtc_ts.rtc_s1_vh_url.tolist(), lon, lat)

        # Generate the geodatarframe for the corresponding data
        vv_flattened = []
        vh_flattened = []
        vv_vh_flattened = []
        dates = df_rtc_ts['acq_datetime']

        # Loop over the arrays and dates
        for vv_array, vh_array, date in zip(vv_arrs, vh_arrs, dates):
            vv_flat = vv_array.flatten().tolist()
            vh_flat = vh_array.flatten().tolist()
            
            # Compute vv/vh with handling division by zero
            with np.errstate(divide='ignore', invalid='ignore'):
                ratio = np.divide(vv_array, vh_array)
                ratio[np.isnan(ratio) | np.isinf(ratio)] = -9999
            ratio_flat = ratio.flatten().tolist()

            vv_flattened.append(vv_flat)
            vh_flattened.append(vh_flat)
            vv_vh_flattened.append(ratio_flat)

        # Create a DataFrame with the flattened lists and dates
        data = {
            'datetime': dates,
            'vv': vv_flattened,
            'vh': vh_flattened,
            'vv/vh': vv_vh_flattened
        }

        df = pd.DataFrame(data)
        df['burst_id'] = BURST_ID
        df['site_id'] = SITE_ID
        df['site_lon'] = lon
        df['site_lat'] = lat

        desired_order = ['datetime', 'burst_id', 'site_id', 'site_lon', 'site_lat', 'vv', 'vh', 'vv/vh']
        df = df[desired_order]

        # Export table
        df.to_csv(out_csv)

        if create_plot == 'yes':
            # load csv and plot
            burst_df = pd.read_csv(out_csv)
            # convert lists
            burst_df['vv'] = burst_df['vv'].apply(convert_to_list)
            burst_df['vh'] = burst_df['vh'].apply(convert_to_list)
            burst_df['vv/vh'] = burst_df['vv/vh'].apply(convert_to_list)

            # Calculate the average of each list in the column vv and add ne average column
            burst_df['vv_avg'] = burst_df['vv'].apply(lambda x: sum(x) / len(x) if isinstance(x, list) else x)
            burst_df['vh_avg'] = burst_df['vh'].apply(lambda x: sum(x) / len(x) if isinstance(x, list) else x)
            burst_df['vv/vh_avg'] = burst_df['vv/vh'].apply(lambda x: sum(x) / len(x) if isinstance(x, list) else x)

            # convert datetime
            burst_df['datetime'] = pd.to_datetime(burst_df['datetime'])

            # plot data
            fig, ax1 = plt.subplots(figsize=(20, 5))
            ax1.plot(burst_df['datetime'], burst_df['vv_avg'], marker='o', color='tab:blue', label='vv_avg')
            ax1.set_xlabel('Datetime')
            ax1.set_ylabel('vv_avg', color='tab:blue')
            ax1.tick_params(axis='y', labelcolor='tab:blue')
            ax2 = ax1.twinx()
            ax2.plot(burst_df['datetime'], burst_df['vh_avg'], marker='v', color='tab:brown', label='vh_avg')
            ax2.set_ylabel('vh_avg', color='tab:brown')
            ax2.tick_params(axis='y', labelcolor='tab:brown')
            ax3 = ax1.twinx()
            ax3.spines['right'].set_position(('outward', 60))  
            ax3.plot(burst_df['datetime'], burst_df['vv/vh_avg'], marker='P', color='tab:purple', label='vv/vh_avg')
            ax3.set_ylabel('vv/vh_avg', color='tab:purple')
            ax3.tick_params(axis='y', labelcolor='tab:purple')

            change_type = df_site.change_type.iloc[0]
            plt.title(f'Change type {change_type}; {BURST_ID=}; {SITE_ID=}')

            ax1.set_xticks(df_rtc_ts['acq_datetime'].tolist())
            ax1.set_xticklabels(burst_df['datetime'], rotation=90)

            ax1.grid(True)

            last_observed_time = df_site['last_observation_time'][0]

            if not isinstance(last_observed_time, type(pd.NaT)):
                ax1.axvline(x=last_observed_time, color='b', linestyle='--', label=f'Last observation time ({last_observed_time})')
            ax2.legend(loc='upper left')

            change_time = df_site['change_time'][0]
            if not isinstance(change_time, type(pd.NaT)):
                ax1.axvline(x=change_time, color='r', linestyle='--', label=f'Change time ({change_time})')
            ax1.legend()

            fig.tight_layout()

            # save plot
            plt.savefig(f'{out_dir}/plots/rtc_summary_site_{SITE_ID}_burst_{BURST_ID}.png', bbox_inches='tight', transparent=False, dpi=150)
            plt.close()
            print(f"Saved figure burst_{BURST_ID}_site_{SITE_ID}_rtc_ts.png")
        
        