In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import pandas as pd
from es_db import get_rtc_urls, get_rtc_docs, get_search_client, get_static_rtc_docs
from tqdm import tqdm
import concurrent.futures

In [3]:
df_data = pd.read_csv('../OPERA-RTC_CalVal_S1_Scene_IDs.csv')
df_data['S1_Scene_IDs'] = df_data.S1_Scene_IDs.map(lambda s: s.split())
df_data = df_data.explode('S1_Scene_IDs').reset_index(drop=True)
df_data.head()

Unnamed: 0,Site,Orbit_Direction,Orbital_Path,CalVal_Module,S1_Scene_IDs
0,California,Ascending,64,Absolute Geolocation Evaluation,S1A_IW_SLC__1SDV_20200324T015024_20200324T0150...
1,California,Ascending,64,Absolute Geolocation Evaluation,S1A_IW_SLC__1SDV_20201107T015034_20201107T0151...
2,California,Ascending,64,Absolute Geolocation Evaluation,S1A_IW_SLC__1SDV_20220501T015035_20220501T0151...
3,California,Ascending,64,Absolute Geolocation Evaluation,S1A_IW_SLC__1SDV_20200604T015028_20200604T0150...
4,California,Ascending,64,Absolute Geolocation Evaluation,S1A_IW_SLC__1SDV_20210130T015031_20210130T0150...


In [4]:
sample_slc_id = df_data.S1_Scene_IDs[1]
sample_slc_id

'S1A_IW_SLC__1SDV_20201107T015034_20201107T015102_035136_0419CE_F4EF,'

In [5]:
slc_ids = df_data.S1_Scene_IDs.tolist()

In [6]:
rtc_docs = get_rtc_docs(slc_ids[0])

In [7]:
len(rtc_docs)

28

In [8]:
with concurrent.futures.ThreadPoolExecutor(max_workers=30) as executor:
    rtc_docs_lst = list(tqdm(executor.map(get_rtc_docs, slc_ids[:]), total=len(slc_ids)))

100%|███████████| 1623/1623 [02:24<00:00, 11.25it/s]


In [9]:
def get_rtc_urls(rtc_docs_lst: list[dict]) -> dict:
    urls = [{rtc_doc['id']: rtc_doc['metadata']['product_urls']
             for rtc_doc in rtc_docs} for rtc_docs in rtc_docs_lst]
    return urls

In [10]:
out_data = get_rtc_urls(rtc_docs_lst)

In [11]:
opera_rtc_ids = [' '.join(list(d.keys())) for d in out_data]
n_prods = [len(list(d.keys())) for d in out_data]
urls_split = {}
for k in range(5):
    urls_split[k] = [' '.join([val[k] for _, val in d.items()]) for d in out_data]

In [12]:
df_data['opera_rtc_ids'] = opera_rtc_ids
df_data['h5_url'] = urls_split[0]
df_data['browse_url'] = urls_split[1]
df_data['vh_url'] = urls_split[2]
df_data['vv_url'] = urls_split[3]
df_data['mask_url'] = urls_split[4]
df_data['n_bursts'] = n_prods

In [13]:
df_data.head()

Unnamed: 0,Site,Orbit_Direction,Orbital_Path,CalVal_Module,S1_Scene_IDs,opera_rtc_ids,h5_url,browse_url,vh_url,vv_url,mask_url,n_bursts
0,California,Ascending,64,Absolute Geolocation Evaluation,S1A_IW_SLC__1SDV_20200324T015024_20200324T0150...,OPERA_L2_RTC-S1_T064-135519-IW2_20200324T01502...,https://opera-pst-rs-pop1.s3.us-west-2.amazona...,https://opera-pst-rs-pop1.s3.us-west-2.amazona...,https://opera-pst-rs-pop1.s3.us-west-2.amazona...,https://opera-pst-rs-pop1.s3.us-west-2.amazona...,https://opera-pst-rs-pop1.s3.us-west-2.amazona...,28
1,California,Ascending,64,Absolute Geolocation Evaluation,S1A_IW_SLC__1SDV_20201107T015034_20201107T0151...,OPERA_L2_RTC-S1_T064-135527-IW2_20201107T01505...,https://opera-pst-rs-pop1.s3.us-west-2.amazona...,https://opera-pst-rs-pop1.s3.us-west-2.amazona...,https://opera-pst-rs-pop1.s3.us-west-2.amazona...,https://opera-pst-rs-pop1.s3.us-west-2.amazona...,https://opera-pst-rs-pop1.s3.us-west-2.amazona...,28
2,California,Ascending,64,Absolute Geolocation Evaluation,S1A_IW_SLC__1SDV_20220501T015035_20220501T0151...,OPERA_L2_RTC-S1_T064-135520-IW1_20220501T01504...,https://opera-pst-rs-pop1.s3.us-west-2.amazona...,https://opera-pst-rs-pop1.s3.us-west-2.amazona...,https://opera-pst-rs-pop1.s3.us-west-2.amazona...,https://opera-pst-rs-pop1.s3.us-west-2.amazona...,https://opera-pst-rs-pop1.s3.us-west-2.amazona...,28
3,California,Ascending,64,Absolute Geolocation Evaluation,S1A_IW_SLC__1SDV_20200604T015028_20200604T0150...,OPERA_L2_RTC-S1_T064-135523-IW1_20200604T01503...,https://opera-pst-rs-pop1.s3.us-west-2.amazona...,https://opera-pst-rs-pop1.s3.us-west-2.amazona...,https://opera-pst-rs-pop1.s3.us-west-2.amazona...,https://opera-pst-rs-pop1.s3.us-west-2.amazona...,https://opera-pst-rs-pop1.s3.us-west-2.amazona...,28
4,California,Ascending,64,Absolute Geolocation Evaluation,S1A_IW_SLC__1SDV_20210130T015031_20210130T0150...,OPERA_L2_RTC-S1_T064-135523-IW2_20210130T01504...,https://opera-pst-rs-pop1.s3.us-west-2.amazona...,https://opera-pst-rs-pop1.s3.us-west-2.amazona...,https://opera-pst-rs-pop1.s3.us-west-2.amazona...,https://opera-pst-rs-pop1.s3.us-west-2.amazona...,https://opera-pst-rs-pop1.s3.us-west-2.amazona...,28


In [14]:
df_data.to_csv('opera_rtc_table.csv.zip')

# Static urls

In [15]:
rtc_ids = df_data['opera_rtc_ids'].map(lambda ids: ids.split())
rtc_ids = [id_ for group in rtc_ids for id_ in group]
rtc_ids[:2]

['OPERA_L2_RTC-S1_T064-135519-IW2_20200324T015025Z_20230928T223445Z_S1A_30_v1.0',
 'OPERA_L2_RTC-S1_T064-135524-IW1_20200324T015038Z_20230928T223445Z_S1A_30_v1.0']

In [16]:
burst_ids = list(set([id_.split('_')[3] for id_ in rtc_ids]))
len(burst_ids)

353

In [17]:
get_static_rtc_docs(burst_ids[19])

[{'burst_id': 'T039-082704-IW2',
  'sas_version': '1.0.1',
  'product_urls': 'https://opera-pst-rs-pop1.s3.us-west-2.amazonaws.com/products/RTC_S1/OPERA_L2_RTC-S1-STATIC_T039-082704-IW2_20140403_20230929T223122Z_S1A_30_v1.0/OPERA_L2_RTC-S1-STATIC_T039-082704-IW2_20140403_20230929T223122Z_S1A_30_v1.0.h5 https://opera-pst-rs-pop1.s3.us-west-2.amazonaws.com/products/RTC_S1/OPERA_L2_RTC-S1-STATIC_T039-082704-IW2_20140403_20230929T223122Z_S1A_30_v1.0/OPERA_L2_RTC-S1-STATIC_T039-082704-IW2_20140403_20230929T223122Z_S1A_30_v1.0_BROWSE.png https://opera-pst-rs-pop1.s3.us-west-2.amazonaws.com/products/RTC_S1/OPERA_L2_RTC-S1-STATIC_T039-082704-IW2_20140403_20230929T223122Z_S1A_30_v1.0/OPERA_L2_RTC-S1-STATIC_T039-082704-IW2_20140403_20230929T223122Z_S1A_30_v1.0_incidence_angle.tif https://opera-pst-rs-pop1.s3.us-west-2.amazonaws.com/products/RTC_S1/OPERA_L2_RTC-S1-STATIC_T039-082704-IW2_20140403_20230929T223122Z_S1A_30_v1.0/OPERA_L2_RTC-S1-STATIC_T039-082704-IW2_20140403_20230929T223122Z_S1A_30_v

In [18]:
with concurrent.futures.ThreadPoolExecutor(max_workers=20) as executor:
    static_rtc_docs_lst = list(tqdm(executor.map(get_static_rtc_docs, burst_ids[:]), total=len(burst_ids)))

  0%|               | 1/353 [00:00<01:26,  4.09it/s]

T034-071102-IW3 does not have a entry in ES
T107-227899-IW3 does not have a entry in ES
T107-227897-IW3 does not have a entry in ES


  1%|▏              | 4/353 [00:00<00:58,  6.01it/s]

T107-227898-IW1 does not have a entry in ES


  7%|█             | 26/353 [00:00<00:07, 43.25it/s]

T107-227900-IW3 does not have a entry in ES
T107-227900-IW1 does not have a entry in ES
T034-071101-IW1 does not have a entry in ES
T107-227896-IW3 does not have a entry in ES
T107-227895-IW2 does not have a entry in ES


 16%|██▏           | 56/353 [00:01<00:04, 61.67it/s]

T107-227901-IW1 does not have a entry in ES


 26%|███▋          | 92/353 [00:01<00:03, 67.77it/s]

T034-071101-IW3 does not have a entry in ES


 39%|█████        | 136/353 [00:02<00:02, 80.97it/s]

T107-227899-IW1 does not have a entry in ES
T034-071081-IW2 does not have a entry in ES
T107-227895-IW3 does not have a entry in ES


 46%|█████▉       | 161/353 [00:02<00:02, 77.65it/s]

T034-071101-IW2 does not have a entry in ES
T107-227898-IW2 does not have a entry in ES
T034-071103-IW1 does not have a entry in ES
T107-227897-IW1 does not have a entry in ES
T034-071080-IW2 does not have a entry in ES
T034-071080-IW1 does not have a entry in ES


 60%|███████▊     | 211/353 [00:03<00:01, 81.27it/s]

T034-071081-IW1 does not have a entry in ES


 66%|████████▌    | 233/353 [00:03<00:01, 86.32it/s]

T107-227898-IW3 does not have a entry in ES
T034-071081-IW3 does not have a entry in ES
T107-227900-IW2 does not have a entry in ES
T034-071080-IW3 does not have a entry in ES


 74%|█████████▋   | 262/353 [00:04<00:01, 81.84it/s]

T107-227897-IW2 does not have a entry in ES
T107-227899-IW2 does not have a entry in ES
T107-227896-IW2 does not have a entry in ES


 87%|███████████▎ | 307/353 [00:04<00:00, 79.74it/s]

T034-071102-IW1 does not have a entry in ES
T107-227895-IW1 does not have a entry in ES


 96%|████████████▌| 340/353 [00:05<00:00, 78.72it/s]

T034-071102-IW2 does not have a entry in ES
T107-227896-IW1 does not have a entry in ES


100%|█████████████| 353/353 [00:05<00:00, 70.02it/s]


In [19]:
static_data = [group[0] for group in static_rtc_docs_lst]
len(static_data)

353

In [20]:
df_static = pd.DataFrame(static_data[:])
df_static.head()

Unnamed: 0,burst_id,sas_version,product_urls
0,T160-342211-IW1,1.0.1,https://opera-pst-rs-pop1.s3.us-west-2.amazona...
1,T039-082698-IW1,1.0.1,https://opera-pst-rs-pop1.s3.us-west-2.amazona...
2,T135-288080-IW3,1.0.1,https://opera-pst-rs-pop1.s3.us-west-2.amazona...
3,T170-364150-IW1,1.0.1,https://opera-pst-rs-pop1.s3.us-west-2.amazona...
4,T135-288086-IW3,1.0.1,https://opera-pst-rs-pop1.s3.us-west-2.amazona...


In [21]:
df_static.shape

(353, 3)

In [22]:
df_static.to_csv('opera_rtc_static_table.csv.zip')

In [24]:
(df_static.product_urls == '').sum()

32