##### Outputs from this notebook is saved in `./data/files_list` folder

In [26]:
import glob
import os
from itertools import product as itrprod
import asyncio

In [27]:
SAVE_DIR = "./data/files_list"
os.makedirs(SAVE_DIR, exist_ok=True)

In [28]:
S1_DATA_DIR = "/g/data/fj7/Copernicus/Sentinel-1/C-SAR/"
S1_AOI_LIST_AU = [
    "056919_06FDB0", 
    "056934_06FE4F", 
    "056992_07009D", 
    "056972_06FFD7",
] # Australia
S1_AOI_LIST_AN = [
    "057000_0700EF",
    "056973_06FFE1",
    "056492_06ECCE",
    "056432_06EA69",
    "057004_070121",
    "056890_06FC96",
] # Antarctica
S1_QUERY_YEARS_AU = ["2023", "2024"]
S1_QUERY_YEARS_AN = ["2020", "2021", "2022", "2023", "2024"]
S1_QUERY_PRODS = ["GRD", "SLC"]
MONTH_RANGE = ["{:02d}".format(m) for m in range(1, 13)]
S1_CASE_LIST_AU = list(itrprod(S1_QUERY_PRODS, S1_QUERY_YEARS_AU, MONTH_RANGE))
S1_CASE_LIST_AN = list(itrprod(S1_QUERY_PRODS, S1_QUERY_YEARS_AN, MONTH_RANGE))

S1_AU_FILENAME = "s1_au.csv"
S1_AN_FILENAME = "s1_an.csv"

In [29]:
S2_DATA_DIR = "/g/data/fj7/Copernicus/Sentinel-2/MSI/"
S2_AOI_LIST_AU = [
    "N0511_R030_T56JNQ", 
    "N0511_R059_T55KCB", 
    "N0511_R073_T56JKQ",
    "N0511_R103_T50JKP",
]
S2_AOI_LIST_AN = [
    "N0511_R125_T09CWQ",
    "N0511_R031_T49DDH",
    "N0511_R124_T58CEU",
    "N0511_R129_T57DVB",
    "N0511_R118_T43DDC",
    "N0511_R101_T33DVA",
]
S2_QUERY_YEARS_AU = ["2023", "2024"]
S2_QUERY_YEARS_AN = ["2020", "2021", "2022", "2023", "2024"]
S2_QUERY_PRODS = ["L1C", "L2A"]
MONTH_RANGE = ["{:02d}".format(m) for m in range(1, 13)]
S2_CASE_LIST_AU = list(itrprod(S2_QUERY_PRODS, S2_QUERY_YEARS_AU, MONTH_RANGE))
S2_CASE_LIST_AN = list(itrprod(S2_QUERY_PRODS, S2_QUERY_YEARS_AN, MONTH_RANGE))

S2_AU_FILENAME = "s2_au.csv"
S2_AN_FILENAME = "s2_an.csv"

In [30]:
def save_file_list(file_list:dict, save_path:str) -> None:
    """
    Saves the retrieved data.
    """
    with open(save_path, "w") as f:
        for (k, v) in file_list.items():
            for filename in v:
                f.write(f"{k},{filename}\n")
    return None

async def find_all_files_for_case(query_case:tuple, sat_data_dir:str) -> bool:
    """
    Finds all files for a selected case of product/year/month
    """
    case_path = os.path.join(sat_data_dir, query_case[0], query_case[1], f"{query_case[1]}-{query_case[2]}")
    print(f"Retrieving files for {case_path}", end="\r")
    return glob.glob(case_path + "/*/*.zip")

async def find_aoi_files(aoi:str, all_files:list[str]) -> list[str]:
    """
    Filters all files and finds files for the area of interest.
    """
    print(f"filtering files for {aoi}", end="\r")
    return list(filter(lambda p: aoi in p, all_files))

def flatten(l:list[list]) -> list:
    """
    Flattens the list
    """
    return[x for xs in l for x in xs]

# Not sure if async runs well in notebook
async def find_files_for_aios_async(query_cases:list[tuple], sat_data_dir:str, aoi_list:list[str]) -> dict:
    """
    Asyncronously finds the files for an AOI list given as list of identifiers based on a combination of produt/year/month from NCI Copernicus databse.
    """
    all_files_async = [find_all_files_for_case(c, sat_data_dir) for c in query_cases]
    all_files = await asyncio.gather(*all_files_async)
    all_files = flatten(all_files)

    aoi_files_async = [find_aoi_files(aoi, all_files) for aoi in aoi_list]
    aoi_files = await asyncio.gather(*aoi_files_async)
    print("")
    return dict(map(lambda k, v: (k, v), aoi_list, aoi_files))

# syncronous function for all cases and AOIs at the same time. Could take long
def find_files_for_aios(query_cases:list[tuple], sat_data_dir:str, aoi_list:list[str]) -> dict:
    """
    Finds the files for an AOI list given as list of identifiers based on a combination of produt/year/month from NCI Copernicus databse.
    """
    all_files = []
    aoi_files = []
    for c in query_cases:
        case_path = os.path.join(sat_data_dir, c[0], c[1], f"{c[1]}-{c[2]}")
        print("\r", f"Retrieving files for {case_path}", end="")
        all_files.extend(glob.glob(case_path + "/*/*.zip"))

    print("")
    aoi_files = {}
    for aoi in aoi_list:
        print("\r", f"filtering files for {aoi}", end="")
        aoi_files[aoi] = list(filter(lambda p: aoi in p, all_files))
    
    print("")
    return aoi_files

In [31]:
# s1_aoi_files = find_files_for_aios(S1_CASE_LIST, S1_DATA_DIR, S1_AOI_LIST)
s1_aoi_files_au = await find_files_for_aios_async(S1_CASE_LIST_AU, S1_DATA_DIR, S1_AOI_LIST_AU)
for key in s1_aoi_files_au.keys():
    print(len(s1_aoi_files_au[key]))

save_file_list(s1_aoi_files_au, os.path.join(SAVE_DIR, S1_AU_FILENAME))

filtering files for 056972_06FFD7Copernicus/Sentinel-1/C-SAR/SLC/2024/2024-12
32
22
32
10


In [37]:
s1_aoi_files_an = await find_files_for_aios_async(S1_CASE_LIST_AN, S1_DATA_DIR, S1_AOI_LIST_AN)
for key in s1_aoi_files_an.keys():
    print(len(s1_aoi_files_an[key]))

save_file_list(s1_aoi_files_an, os.path.join(SAVE_DIR, S1_AN_FILENAME))

filtering files for 056890_06FC96Copernicus/Sentinel-1/C-SAR/SLC/2024/2024-12
2
3
6
12
3
0


In [38]:
# s2_aoi_files = find_files_for_aios(S2_CASE_LIST, S2_DATA_DIR, S2_AOI_LIST)
s2_aoi_files_au = await find_files_for_aios_async(S2_CASE_LIST_AU, S2_DATA_DIR, S2_AOI_LIST_AU)
for key in s2_aoi_files_au.keys():
    print(len(s2_aoi_files_au[key]))

save_file_list(s2_aoi_files_au, os.path.join(SAVE_DIR, S2_AU_FILENAME))

filtering files for N0511_R103_T50JKPrnicus/Sentinel-2/MSI/L2A/2024/2024-12
58
62
60
58


In [39]:
s2_aoi_files_an = await find_files_for_aios_async(S2_CASE_LIST_AN, S2_DATA_DIR, S2_AOI_LIST_AN)
for key in s2_aoi_files_an.keys():
    print(len(s2_aoi_files_an[key]))

save_file_list(s2_aoi_files_an, os.path.join(SAVE_DIR, S2_AN_FILENAME))

filtering files for N0511_R101_T33DVArnicus/Sentinel-2/MSI/L2A/2024/2024-12
0
24
4
22
24
0
