##### Outputs from this notebook is saved in `./data/files_list` folder

In [1]:
import glob
import os
from itertools import product as itrprod
import asyncio
import json
import requests

In [2]:
SAVE_DIR = "./data/files_list"
os.makedirs(SAVE_DIR, exist_ok=True)

In [3]:
AU_POLYGONS = [
    'POLYGON((154.073685 -26.398506,151.56311 -25.801189,151.075897 -27.40662,153.624039 -28.014174,154.073685 -26.398506))',
    'POLYGON((146.474747 -16.033461,144.151413 -15.494384,143.742279 -17.110708,146.086823 -17.657339,146.474747 -16.033461))',
    'POLYGON((150.214127 -28.315512,152.713028 -27.71974,152.206833 -26.059097,149.74646 -26.644444,150.214127 -28.315512))',
    'POLYGON((116.691261 -27.688679,114.187599 -27.092514,113.688507 -28.695972,116.231606 -29.302624,116.691261 -27.688679))',
]

AN_POLYGONS = [
    'POLYGON((173.982986 -78.4249729999985,172.122543 -76.221466,179.0336 -75.837616,-177.961258 -77.972656,173.982986 -78.4249729999985))',
    'POLYGON((111.136292 -67.7959589999999,115.933327 -66.5219569999998,113.435944 -65.0743559999997,108.809639 -66.2770999999998,111.136292 -67.7959589999999))',
    'POLYGON((172.382263 -78.6327819999986,171.732635 -76.398155,163.953552 -76.383263,163.121597 -78.6150589999986,172.382263 -78.6327819999986))',
    'POLYGON((70.583374 -54.4052539999951,74.212395 -53.5243909999941,73.09771 -51.946536999992,69.587746 -52.7966349999932,70.583374 -54.4052539999951))',
    'POLYGON((178.576126 -71.618423,173.430893 -70.1199569999999,176.938004 -68.7651059999999,-178.032867 -70.1673429999999,178.576126 -71.618423))',
    'POLYGON((79.214729 -70.0986789999999,74.184479 -68.6963419999999,71.050514 -69.9258959999999,76.189308 -71.414604,79.214729 -70.0986789999999))',
    'POLYGON((12.562656 -72.541679,17.732925 -70.976974,13.91748 -69.6723859999999,8.82353100000002 -71.134811,12.562656 -72.541679))',
]

In [4]:
S1_DATA_DIR = "/g/data/fj7/Copernicus/Sentinel-1/C-SAR/"
S1_AOI_LIST_AU = [
    "25S150E-30S155E", 
    "15S145E-20S150E", 
    "25S150E-30S155E",
    "25S110E-30S115E",
] # Australia
S1_AOI_LIST_AN = [
    "75S150W-80S145W",
    "65S110E-70S115E",
    "75S165E-80S170E",
    "70S155E-75S160E",
    "65S070E-70S075E",
] # Antarctica

S1_QUERY_YEARS_AU = ["2023", "2024"]
S1_QUERY_YEARS_AN = ["2023", "2024"]
S1_QUERY_PRODS = ["GRD", "SLC"]
MONTH_RANGE = ["{:02d}".format(m) for m in range(1, 13)]
S1_CASE_LIST_AU = list(itrprod(S1_QUERY_PRODS, S1_QUERY_YEARS_AU, MONTH_RANGE))
S1_CASE_LIST_AN = list(itrprod(S1_QUERY_PRODS, S1_QUERY_YEARS_AN, MONTH_RANGE))

S1_AU_FILENAME = "s1_au.csv"
S1_AN_FILENAME = "s1_an.csv"

In [5]:
S2_DATA_DIR = "/g/data/fj7/Copernicus/Sentinel-2/MSI/"
S2_AOI_LIST_AU = [
    "T56JNQ", 
    "T55KCB", 
    "T56JKQ",
    "T50JKP",
]
S2_AOI_LIST_AN = [
    "T09CWQ",
    "T49DDH",
    "T58CEU",
    "T57DVB",
    "T43DDC",
    "T33DVA",
]
S2_QUERY_YEARS_AU = ["2023", "2024"]
S2_QUERY_YEARS_AN = ["2023", "2024"] #[str(x) for x in list(range(2020,2025))]
S2_QUERY_PRODS = ["L1C", "L2A"]
MONTH_RANGE = ["{:02d}".format(m) for m in range(1, 13)]
S2_CASE_LIST_AU = list(itrprod(S2_QUERY_PRODS, S2_QUERY_YEARS_AU, MONTH_RANGE))
S2_CASE_LIST_AN = list(itrprod(S2_QUERY_PRODS, S2_QUERY_YEARS_AN, MONTH_RANGE))

S2_AU_FILENAME = "s2_au.csv"
S2_AN_FILENAME = "s2_an.csv"

In [34]:
def get_s1_filenames(
    polygon_list:str, 
    years:list[str], 
    output_file_path:str,
    products:list[str]=["GRD", "SLC"],
):
    """
    Get scene names for S1 via direct requet to SARA server
    """
    s1_names = []
    if os.path.isfile(output_file_path):
        with open(output_file_path, "r") as f:
            for l in f:
                s1_names.append(l.strip())
    else:
        start = f"{years[0]}-01-01"
        end = f"{years[1]}-12-12"
        for poly in polygon_list:
            for prod in products:
                page = 1
                query_resp = ["start"]
                while query_resp != []:
                    query = f"https://copernicus.nci.org.au/sara.server/1.0/api/collections/S1/search.json?_pretty=1&geometry={poly}&startDate={start}&completionDate={end}&instrument=C-SAR&sensor=IW&maxRecords=500&productType={prod}&page={page}"
                    response = json.loads(requests.get(query).content)
                    query_resp = [r["properties"]["title"] for r in response["features"]]
                    s1_names.extend(query_resp)
                    page += 1
        with open(output_file_path, "w") as f:
            for n in s1_names:
                f.write(f"{n}\n")
    return s1_names

def save_file_list(file_list:dict, save_path:str) -> None:
    """
    Saves the retrieved data.
    """
    with open(save_path, "w") as f:
        for (k, v) in file_list.items():
            for filename in v:
                f.write(f"{k},{filename}\n")
    return None

async def find_all_files_for_case(query_case:tuple, sat_data_dir:str) -> bool:
    """
    Finds all files for a selected case of product/year/month
    """
    case_path = os.path.join(sat_data_dir, query_case[0], query_case[1], f"{query_case[1]}-{query_case[2]}")
    print(f"Retrieving files for {case_path}", end="\r")
    return glob.glob(case_path + "/*/*.zip")

async def find_aoi_files(aoi:str, all_files:list[str]) -> list[str]:
    """
    Filters all files and finds files for the area of interest.
    """
    print(f"filtering files for {aoi}", end="\r")
    return list(filter(lambda p: aoi in p, all_files))

def flatten(l:list[list]) -> list:
    """
    Flattens the list
    """
    return[x for xs in l for x in xs]

# Not sure if async runs well in notebook
async def find_files_for_aios_async(
        query_cases:list[tuple], 
        sat_data_dir:str, 
        aoi_list:list[str], 
    ) -> dict:
    """
    Asyncronously finds the files for an AOI list given as list of identifiers based on a combination of produt/year/month from NCI Copernicus databse.
    Set `is_s1` to True for Sentinel-1.
    """
    all_files_async = [find_all_files_for_case(c, sat_data_dir) for c in query_cases]
    all_files = await asyncio.gather(*all_files_async)
    all_files = flatten(all_files)
    print("")
    
    aoi_files_async = [find_aoi_files(aoi, all_files) for aoi in aoi_list]
    aoi_files = await asyncio.gather(*aoi_files_async)
    print("")
    return dict(map(lambda k, v: (k, v), aoi_list, aoi_files))

# syncronous function for all cases and AOIs at the same time. Could take long
def find_files_for_aios(
        query_cases:list[tuple], 
        sat_data_dir:str, 
        aoi_list:list[str], 
    ) -> dict:
    """
    Finds the files for an AOI list given as list of identifiers based on a combination of produt/year/month from NCI Copernicus databse.
    Set `is_s1` to True for Sentinel-1.
    """
    all_files = []
    aoi_files = []
    for c in query_cases:
        case_path = os.path.join(sat_data_dir, c[0], c[1], f"{c[1]}-{c[2]}")
        print("\r", f"Retrieving files for {case_path}", end="")
        all_files.extend(glob.glob(case_path + "/*/*.zip"))

    print("")
    aoi_files = {}
    for aoi in aoi_list:
        print("\r", f"filtering files for {aoi}", end="")
        aoi_files[aoi] = list(filter(lambda p: aoi in p, all_files))
    
    print("")
    return aoi_files

def find_files_for_s1_aois(nci_files_dict:dict, s1_file_names:list[str]) -> dict:
    """
    Finds the files found from SARA server inside the AOI files retrieved from NCI
    """
    files_dict = {}
    for k, v in nci_files_dict.items():
        nci_files = [os.path.splitext(os.path.basename(f))[0] for f in v]
        found_idx = [nci_files.index(f) for f in s1_file_names if f in nci_files]
        found = [v[idx] for idx in found_idx]
        files_dict[k] = found
    return files_dict

In [39]:
s1_au_names = get_s1_filenames(AU_POLYGONS, S1_QUERY_YEARS_AU, "data/outputs/temp_s1_au.txt") # this needs internet connection if the output file does not already exist.
s1_aoi_files_au_nci = await find_files_for_aios_async(S1_CASE_LIST_AU, S1_DATA_DIR, S1_AOI_LIST_AU)
s1_aoi_files_au = find_files_for_s1_aois(s1_aoi_files_au_nci, s1_au_names)
for key in s1_aoi_files_au.keys():
    print(f"{key} => {len(s1_aoi_files_au[key])}")

save_file_list(s1_aoi_files_au, os.path.join(SAVE_DIR, S1_AU_FILENAME))

25S150E-30S155E => 1940
15S145E-20S150E => 465
25S110E-30S115E => 80


In [45]:
s1_an_names = get_s1_filenames(AN_POLYGONS, S1_QUERY_YEARS_AN, "data/outputs/temp_s1_an.txt") # this needs internet connection if the output file does not already exist.
s1_aoi_files_an_nci = await find_files_for_aios_async(S1_CASE_LIST_AN, S1_DATA_DIR, S1_AOI_LIST_AN)
s1_aoi_files_an = find_files_for_s1_aois(s1_aoi_files_an_nci, s1_an_names)
for key in s1_aoi_files_an.keys():
    print(f"{key} => {len(s1_aoi_files_an[key])}")

save_file_list(s1_aoi_files_an, os.path.join(SAVE_DIR, S1_AN_FILENAME))

Retrieving files for /g/data/fj7/Copernicus/Sentinel-1/C-SAR/SLC/2024/2024-12
filtering files for 65S070E-70S075E
75S150W-80S145W => 430
65S110E-70S115E => 761
75S165E-80S170E => 300
70S155E-75S160E => 302
65S070E-70S075E => 660


In [47]:
s2_aoi_files_au = await find_files_for_aios_async(S2_CASE_LIST_AU, S2_DATA_DIR, S2_AOI_LIST_AU)
for key in s2_aoi_files_au.keys():
    print(f"{key} => {len(s2_aoi_files_au[key])}")

save_file_list(s2_aoi_files_au, os.path.join(SAVE_DIR, S2_AU_FILENAME))

T56JNQ => 576
T55KCB => 580
T56JKQ => 583
T50JKP => 592


In [48]:
s2_aoi_files_an = await find_files_for_aios_async(S2_CASE_LIST_AN, S2_DATA_DIR, S2_AOI_LIST_AN)
for key in s2_aoi_files_an.keys():
    print(f"{key} => {len(s2_aoi_files_an[key])}")

save_file_list(s2_aoi_files_an, os.path.join(SAVE_DIR, S2_AN_FILENAME))

Retrieving files for /g/data/fj7/Copernicus/Sentinel-2/MSI/L2A/2024/2024-12
filtering files for T33DVA
T09CWQ => 0
T49DDH => 198
T58CEU => 502
T57DVB => 170
T43DDC => 257
T33DVA => 0
