In [1]:
import geopandas as gpd
from dem_stitcher.geojson_io import read_geojson_gzip
from tqdm import tqdm
import pandas as pd

In [2]:
df_mgrs = read_geojson_gzip('s2_mgrs_grid.geojson.gzip')
df_mgrs.head()

Unnamed: 0,geometry,type,identifier
0,"MULTIPOLYGON Z (((180.00000 -73.05974 0.00000,...",S2,01CCV
1,"MULTIPOLYGON Z (((180.00000 -83.80855 0.00000,...",S2,01CDH
2,"MULTIPOLYGON Z (((180.00000 -82.91344 0.00000,...",S2,01CDJ
3,"MULTIPOLYGON Z (((180.00000 -82.01866 0.00000,...",S2,01CDK
4,"MULTIPOLYGON Z (((180.00000 -81.12317 0.00000,...",S2,01CDL


In [3]:
import warnings
from functools import lru_cache
from pathlib import Path

import pandas as pd
import rasterio.mask
from rasterio.warp import transform_bounds
from rasterio.crs import CRS
import urllib3
from dotenv import dotenv_values
from elasticsearch import Elasticsearch, client
from elasticsearch_dsl import Q, Search
from shapely.geometry import box

urllib3.disable_warnings()
INDICES = {"dswx_hls": "grq_*_hls-2023.09",
           "dswx_s1": "grq_v0.1_l3_dswx_s1-*",
           "rtc": "grq_v1.0_l2_rtc_s1-*"}


@lru_cache
def get_es_search_client(prod="dswx_hls") -> client.Elasticsearch:
    if prod not in INDICES.keys():
        raise ValueError(f'prod must be {", ".join(INDICES.keys())}')
    index = INDICES[prod]

    config = dotenv_values()
    ES_USERNAME = config["ES_USERNAME"]
    ES_PASSWORD = config["ES_PASSWORD"]
    GRQ_URL = "https://100.104.62.10/grq_es/"
    grq_client = Elasticsearch(
        GRQ_URL,
        http_auth=(ES_USERNAME, ES_PASSWORD),
        verify_certs=False,
        read_timeout=50000,
        terminate_after=2500,
    )
    search = Search(using=grq_client, index=index)

    if not grq_client.ping():
        raise ValueError("Either JPL username/password is wrong or not connected to VPN")

    return search



def get_dswx_s1_doc(mgrs_tile: str) -> dict:
    search = get_es_search_client(prod="dswx_s1")

    mgrs_token = mgrs_tile

    query = Q("simple_query_string", query=f"{mgrs_token}", fields=["id"], default_operator="and")

    query_ob = search.query(query)
    total = query_ob.count()
    resp = query_ob[:total].execute()
    hits = [hit.to_dict() for hit in resp.hits]
    return hits

def format_hits_for_rtc_inputs(hit: dict):
    h5_files = [f for f in hit['metadata']['accountability']['L3_DSWx_S1']['inputs'] if f[-3:] == '.h5']
    rtc_ids = [h5[:-3] for h5 in h5_files]
    return [dict(dswx_s1_id=hit['id'],
                 rtc_id=rtc_id) for rtc_id in rtc_ids]
def format_hits_for_dswx_s1(hit: dict):
    product_urls = [url for url in sorted(hit['metadata']['product_urls']) if url[-4:] == '.tif']
    return dict(dswx_s1_id=hit['id'],
                dswx_s1_wtr=product_urls[0],
                dswx_s1_conf=product_urls[2],
                )

In [4]:
hits = get_dswx_s1_doc('T22JDM')
format_hits_for_dswx_s1(hits[0])



{'dswx_s1_id': 'OPERA_L3_DSWx-S1_T22JDM_20240508T220706Z_20240524T005858Z_S1A_30_v0.1',
 'dswx_s1_wtr': 'https://opera-pst-rs-pop1.s3.us-west-2.amazonaws.com/products/DSWx_S1/OPERA_L3_DSWx-S1_T22JDM_20240508T220706Z_20240524T005858Z_S1A_30_v0.1/OPERA_L3_DSWx-S1_T22JDM_20240508T220706Z_20240524T005858Z_S1A_30_v0.1_B01_WTR.tif',
 'dswx_s1_conf': 'https://opera-pst-rs-pop1.s3.us-west-2.amazonaws.com/products/DSWx_S1/OPERA_L3_DSWx-S1_T22JDM_20240508T220706Z_20240524T005858Z_S1A_30_v0.1/OPERA_L3_DSWx-S1_T22JDM_20240508T220706Z_20240524T005858Z_S1A_30_v0.1_B03_CONF.tif'}

# DSWx-S1 Data

In [8]:
tiles = ['22JDM', '22JDN', '22JCN', '22JBN', '22JBM', '22JCM', '22JEM', '22JEN', '22JDP', '22JEP']
hits = [hit for tile in tqdm(tiles) for hit in get_dswx_s1_doc(tile)]
dswx_s1_data = [format_hits_for_dswx_s1(hit) for hit in hits]
df_dswx_s1 = pd.DataFrame(dswx_s1_data)
df_dswx_s1.head()

100%|████████████████████████████████████████████████████████████████████| 10/10 [00:02<00:00,  4.99it/s]


Unnamed: 0,dswx_s1_id,dswx_s1_wtr,dswx_s1_conf
0,OPERA_L3_DSWx-S1_T22JDM_20240508T220706Z_20240...,https://opera-pst-rs-pop1.s3.us-west-2.amazona...,https://opera-pst-rs-pop1.s3.us-west-2.amazona...
1,OPERA_L3_DSWx-S1_T22JDM_20231125T084942Z_20240...,https://opera-pst-rs-pop1.s3.us-west-2.amazona...,https://opera-pst-rs-pop1.s3.us-west-2.amazona...
2,OPERA_L3_DSWx-S1_T22JDN_20231125T084929Z_20240...,https://opera-pst-rs-pop1.s3.us-west-2.amazona...,https://opera-pst-rs-pop1.s3.us-west-2.amazona...
3,OPERA_L3_DSWx-S1_T22JDN_20240508T220719Z_20240...,https://opera-pst-rs-pop1.s3.us-west-2.amazona...,https://opera-pst-rs-pop1.s3.us-west-2.amazona...
4,OPERA_L3_DSWx-S1_T22JCN_20240508T220722Z_20240...,https://opera-pst-rs-pop1.s3.us-west-2.amazona...,https://opera-pst-rs-pop1.s3.us-west-2.amazona...


In [9]:
df_dswx_s1.shape

(22, 3)

In [10]:
df_dswx_s1.to_csv('dswx_s1_flood.csv')