In [1]:
import pandas as pd
from es_db import get_rtc_urls
from tqdm import tqdm
import concurrent.futures

In [2]:
df_data = pd.read_csv('../OPERA-RTC_CalVal_S1_Scene_IDs.csv')
df_data['S1_Scene_IDs'] = df_data.S1_Scene_IDs.map(lambda s: s.split())
df_data = df_data.explode('S1_Scene_IDs').reset_index(drop=True)
df_data.head()

Unnamed: 0,Site,Orbit_Direction,Orbital_Path,CalVal_Module,S1_Scene_IDs
0,California,Ascending,64,Absolute Geolocation Evaluation,S1A_IW_SLC__1SDV_20200324T015024_20200324T0150...
1,California,Ascending,64,Absolute Geolocation Evaluation,S1A_IW_SLC__1SDV_20201107T015034_20201107T0151...
2,California,Ascending,64,Absolute Geolocation Evaluation,S1A_IW_SLC__1SDV_20220501T015035_20220501T0151...
3,California,Ascending,64,Absolute Geolocation Evaluation,S1A_IW_SLC__1SDV_20200604T015028_20200604T0150...
4,California,Ascending,64,Absolute Geolocation Evaluation,S1A_IW_SLC__1SDV_20210130T015031_20210130T0150...


In [3]:
sample_slc_id = df_data.S1_Scene_IDs[1]
sample_slc_id

'S1A_IW_SLC__1SDV_20201107T015034_20201107T015102_035136_0419CE_F4EF,'

In [4]:
slc_ids = df_data.S1_Scene_IDs.tolist()

In [5]:
with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
    out_data = list(tqdm(executor.map(get_rtc_urls, slc_ids), total=len(slc_ids)))

100%|█████████████████████████████| 1623/1623 [02:03<00:00, 13.11it/s]


In [6]:
opera_rtc_ids = [' '.join(list(d.keys())) for d in out_data]
n_prods = [len(list(d.keys())) for d in out_data]
urls_split = {}
for k in range(5):
    urls_split[k] = [' '.join([val[k] for _, val in d.items()]) for d in out_data]

In [7]:
urls_split[4][0]

'https://opera-pst-rs-pop1.s3.us-west-2.amazonaws.com/products/RTC_S1/OPERA_L2_RTC-S1_T064-135519-IW2_20200324T015025Z_20230928T223445Z_S1A_30_v1.0/OPERA_L2_RTC-S1_T064-135519-IW2_20200324T015025Z_20230928T223445Z_S1A_30_v1.0_mask.tif https://opera-pst-rs-pop1.s3.us-west-2.amazonaws.com/products/RTC_S1/OPERA_L2_RTC-S1_T064-135524-IW1_20200324T015038Z_20230928T223445Z_S1A_30_v1.0/OPERA_L2_RTC-S1_T064-135524-IW1_20200324T015038Z_20230928T223445Z_S1A_30_v1.0_mask.tif https://opera-pst-rs-pop1.s3.us-west-2.amazonaws.com/products/RTC_S1/OPERA_L2_RTC-S1_T064-135527-IW3_20200324T015048Z_20230928T223445Z_S1A_30_v1.0/OPERA_L2_RTC-S1_T064-135527-IW3_20200324T015048Z_20230928T223445Z_S1A_30_v1.0_mask.tif https://opera-pst-rs-pop1.s3.us-west-2.amazonaws.com/products/RTC_S1/OPERA_L2_RTC-S1_T064-135528-IW1_20200324T015049Z_20230928T223445Z_S1A_30_v1.0/OPERA_L2_RTC-S1_T064-135528-IW1_20200324T015049Z_20230928T223445Z_S1A_30_v1.0_mask.tif https://opera-pst-rs-pop1.s3.us-west-2.amazonaws.com/products/R

In [8]:
df_data['opera_rtc_ids'] = opera_rtc_ids
df_data['h5_url'] = urls_split[0]
df_data['browse_url'] = urls_split[1]
df_data['vh_url'] = urls_split[2]
df_data['vv_url'] = urls_split[3]
df_data['mask_url'] = urls_split[4]
df_data['n_bursts'] = n_prods

In [9]:
df_data.head()

Unnamed: 0,Site,Orbit_Direction,Orbital_Path,CalVal_Module,S1_Scene_IDs,opera_rtc_ids,h5_url,browse_url,vh_url,vv_url,mask_url,n_bursts
0,California,Ascending,64,Absolute Geolocation Evaluation,S1A_IW_SLC__1SDV_20200324T015024_20200324T0150...,OPERA_L2_RTC-S1_T064-135519-IW2_20200324T01502...,https://opera-pst-rs-pop1.s3.us-west-2.amazona...,https://opera-pst-rs-pop1.s3.us-west-2.amazona...,https://opera-pst-rs-pop1.s3.us-west-2.amazona...,https://opera-pst-rs-pop1.s3.us-west-2.amazona...,https://opera-pst-rs-pop1.s3.us-west-2.amazona...,28
1,California,Ascending,64,Absolute Geolocation Evaluation,S1A_IW_SLC__1SDV_20201107T015034_20201107T0151...,OPERA_L2_RTC-S1_T064-135527-IW2_20201107T01505...,https://opera-pst-rs-pop1.s3.us-west-2.amazona...,https://opera-pst-rs-pop1.s3.us-west-2.amazona...,https://opera-pst-rs-pop1.s3.us-west-2.amazona...,https://opera-pst-rs-pop1.s3.us-west-2.amazona...,https://opera-pst-rs-pop1.s3.us-west-2.amazona...,28
2,California,Ascending,64,Absolute Geolocation Evaluation,S1A_IW_SLC__1SDV_20220501T015035_20220501T0151...,OPERA_L2_RTC-S1_T064-135520-IW1_20220501T01504...,https://opera-pst-rs-pop1.s3.us-west-2.amazona...,https://opera-pst-rs-pop1.s3.us-west-2.amazona...,https://opera-pst-rs-pop1.s3.us-west-2.amazona...,https://opera-pst-rs-pop1.s3.us-west-2.amazona...,https://opera-pst-rs-pop1.s3.us-west-2.amazona...,28
3,California,Ascending,64,Absolute Geolocation Evaluation,S1A_IW_SLC__1SDV_20200604T015028_20200604T0150...,OPERA_L2_RTC-S1_T064-135523-IW1_20200604T01503...,https://opera-pst-rs-pop1.s3.us-west-2.amazona...,https://opera-pst-rs-pop1.s3.us-west-2.amazona...,https://opera-pst-rs-pop1.s3.us-west-2.amazona...,https://opera-pst-rs-pop1.s3.us-west-2.amazona...,https://opera-pst-rs-pop1.s3.us-west-2.amazona...,28
4,California,Ascending,64,Absolute Geolocation Evaluation,S1A_IW_SLC__1SDV_20210130T015031_20210130T0150...,OPERA_L2_RTC-S1_T064-135523-IW2_20210130T01504...,https://opera-pst-rs-pop1.s3.us-west-2.amazona...,https://opera-pst-rs-pop1.s3.us-west-2.amazona...,https://opera-pst-rs-pop1.s3.us-west-2.amazona...,https://opera-pst-rs-pop1.s3.us-west-2.amazona...,https://opera-pst-rs-pop1.s3.us-west-2.amazona...,28


In [10]:
df_data.to_csv('linked_data.csv')

In [11]:
df_data.n_bursts.sum()

43687