In [None]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
batch_hsc_cutouts_retry_v3.py
  · 已存在 FITS 跳过
  · 下载失败自动重试
  · 每次下载超时 15 s；失败后等待 5 s
  · 用 --max-connections 1 代替旧脚本不支持的 --threads
"""

from __future__ import annotations
import os, shlex, subprocess, time
from pathlib import Path
from typing import List
import shutil

import pandas as pd

# ------------------ 账号 ------------------
HSC_USR = os.getenv("HSC_USR", "XXX")
HSC_PWD = os.getenv("HSC_PWD", "YYY")

# ------------------ 路径与常量 ------------------
CSV_PATH   = "/path/to/download/csv"
SCRIPT     = Path("./data-access-tools/pdr3/downloadCutout/downloadCutout.py")
OUT_DIR    = Path("/path_to_save"); OUT_DIR.mkdir(exist_ok=True)

FILTERS: List[str] = ["g", "r", "i"]
RERUN       = "pdr3_wide"
FOV_AS      = 3.0
SIZE_DEG    = FOV_AS / 3600.0

MAX_RETRY   = 2
RETRY_WAIT  = 1          # ← 等待 5 s
DL_TIMEOUT  = 15         # ← 每次下载超时 15 s

# ------------------ 读取目标 ------------------
df = pd.read_csv(CSV_PATH,dtype={"TARGETID": "int64"} )
targets = (
    df[df["image source"] == "HSC"][["TARGETID", "TARGET_RA", "TARGET_DEC"]]
    .dropna()
    .reset_index(drop=True)
)
print(f"目标共 {len(targets)} 个，每目标需下载 {len(FILTERS)} 张")



目标共 285 个，每目标需下载 3 张


In [None]:
# ------------------ 主循环 ------------------
idx=0
for row in targets.itertuples(index=True):
    tid = int(row.TARGETID)
    ra, dec = row.TARGET_RA, row.TARGET_DEC
    idx+=1
    for flt in FILTERS:
    
        prefix   = OUT_DIR / f"hsc{tid}_{flt}"
        fitsfile = prefix.with_suffix(".fits")
        

        # -- 已有文件跳过 --
        if fitsfile.exists() and fitsfile.stat().st_size > 0:
            print(f"[{idx:3d}] {fitsfile.name} 已存在 → 跳过")
            continue

        cmd = (
            f"python {SCRIPT} "
            f"--ra {ra:.6f} --dec {dec:.6f} "
            f"--sw {SIZE_DEG:.7f} --sh {SIZE_DEG:.7f} "
            f"--filter {flt} "
            f"--rerun {RERUN} "
            "--max-connections 1 "
            "--image true --mask false --variance false "
            "--type coadd "
            f"--name {prefix} "
            f"--user {HSC_USR} --password {HSC_PWD}"
        )
        cmd_args = shlex.split(cmd)

        # -- 自动重试 --
        for attempt in range(1, MAX_RETRY + 1):
            try:
                subprocess.run(cmd_args, check=True, timeout=DL_TIMEOUT)
                print(f"[{idx+1:3d}] {fitsfile.name} 下载完成 ✓")
                break
            except subprocess.TimeoutExpired:
                err_msg = "timeout"
            except subprocess.CalledProcessError as e:
                err_msg = f"exit {e.returncode}"

            if attempt < MAX_RETRY:
                print(
                    f"[{idx+1:3d}] {fitsfile.name} 第 {attempt} 次失败（{err_msg}），{RETRY_WAIT}s 后重试…"
                )
                time.sleep(RETRY_WAIT)
            else:
                print(
                    f"[{idx+1:3d}] {fitsfile.name} 重试 {MAX_RETRY} 次仍失败 ✗ ({err_msg})"
                )
