In [1]:
from tqdm import tqdm
import pathlib
from datetime import datetime, timedelta
import requests
import s3fs


In [2]:
BASE_DIR = '../data/goes/goes16'
START_DATE = datetime.strptime('2025-01-01 13:00', '%Y-%m-%d %H:%M')
END_DATE = datetime.strptime('2025-01-01 13:10', '%Y-%m-%d %H:%M')
BASE_URL = "s3://noaa-goes16"

In [3]:
def download_file(url: str, dest_path: pathlib.Path) -> pathlib.Path:
	filename = url.split('/')[-1]
	dest_file = dest_path / filename
	try:
		fs = s3fs.S3FileSystem(anon=True)
		fs.get(url, str(dest_file))
	except Exception as e:
		print(f"Failed to download {filename}: {e}")
	return dest_file

In [5]:
datetimes = []
current_datetime = START_DATE

while current_datetime < END_DATE:
    datetimes.append(current_datetime)
    current_datetime += timedelta(minutes=1)
print(f"Total timestamps to process: {len(datetimes)}")
progress_bar = tqdm(datetimes, desc="Processing timestamps")
for dt in progress_bar:
    year = dt.strftime('%Y')
    day_of_year = dt.strftime('%j')
    hour = dt.strftime('%H')
    minute = dt.strftime('%M')
    
    url = f"{BASE_URL}/GLM-L2-LCFA/{year}/{day_of_year}/{hour}"
    fs = s3fs.S3FileSystem(anon=True)
    files = fs.ls(url)
    files = list(filter(lambda x: x.split('/')[-1].startswith(f"OR_GLM-L2-LCFA_G16_s{year}{day_of_year}{hour}{minute}"), files))
    
    dest_dir = pathlib.Path(BASE_DIR) / year / day_of_year / hour
    dest_dir.mkdir(parents=True, exist_ok=True)
    
    if len(files) > 0:
        date_dir = pathlib.Path(current_datetime.strftime('%Y/%m/%d'))
        product = pathlib.Path('GLM')
        dest_dir = pathlib.Path(BASE_DIR) / product / date_dir
        dest_dir.mkdir(parents=True, exist_ok=True)
        for file in files:
            download_file(file, dest_dir)


Total timestamps to process: 10


Processing timestamps: 100%|██████████| 10/10 [00:17<00:00,  1.73s/it]
