In [2]:
import os
import io
import pandas as pd
from pathlib import Path
from typing import Iterable, Tuple, Union
import requests

# ========= 用户配置 =========
CSV_PATH      =  Path("/Users/hezizhao/Downloads/jupython-workspace/FH2/fh2_desils.csv")  # 表格路径
SAVE_DIR      = Path("./data/psfcoadd")           # 保存目录
LAYER         = "ls-dr10"                    # Legacy Survey 图层
ROUND_DIGITS  = 5                                  # 文件名小数位
TIMEOUT       = 60                                 # 请求超时(秒)
DRY_RUN       = False                              # True=不下载，只打印

# 若表格中的列名大小写不确定，在这里指定小写后自动匹配
RA_COL_HINT   = "ra"
DEC_COL_HINT  = "dec"


# ========= 工具函数：读取并返回唯一 (ra, dec) =========
def load_unique_coords(csv_path: Path,
                       ra_col_hint: str = RA_COL_HINT,
                       dec_col_hint: str = DEC_COL_HINT) -> Iterable[Tuple[float, float]]:
    """
    读取 CSV 并返回唯一 (ra, dec) 列表（保持首次出现顺序）。
    自动按大小写匹配列名。
    """
    df = pd.read_csv(csv_path)

    # 自动匹配大小写
    lower_map = {c.lower(): c for c in df.columns}
    if ra_col_hint.lower() not in lower_map:
        raise KeyError(f"Cannot find RA column matching '{ra_col_hint}' in {list(df.columns)}")
    if dec_col_hint.lower() not in lower_map:
        raise KeyError(f"Cannot find DEC column matching '{dec_col_hint}' in {list(df.columns)}")

    ra_c  = lower_map[ra_col_hint.lower()]
    dec_c = lower_map[dec_col_hint.lower()]

    # 去重
    dedup = df[[ra_c, dec_c]].drop_duplicates().astype(float)

    return list(zip(dedup[ra_c].to_numpy(), dedup[dec_c].to_numpy()))


# ========= 主下载函数 =========
def download_coadd_psf(ra: float,
                       dec: float,
                       layer: str = LAYER,
                       save_dir: Path = SAVE_DIR,
                       round_digits: int = ROUND_DIGITS,
                       timeout: Union[int, float] = TIMEOUT,
                       dry_run: bool = DRY_RUN) -> Path:
    """
    下载 Legacy Survey Viewer 的 coadd-psf 输出。

    返回保存文件路径（Path）。
    会简单检测文件头判断是否 FITS 或 PNG，并自动选扩展名。
    """
    save_dir.mkdir(parents=True, exist_ok=True)

    # 先假设 FITS，稍后检测 mime 再改名
    base_name = f"ra_{ra:.{round_digits}f}_dec_{dec:.{round_digits}f}"
    tmp_save  = save_dir / f"{base_name}.tmp"

    # URL 模板示例（用户提供）：
    # https://www.legacysurvey.org/viewer/coadd-psf/?ra=14.8518&dec=27.0591&layer=ls-dr9-south
    url = (
        "https://www.legacysurvey.org/viewer/coadd-psf/"
        f"?ra={ra}&dec={dec}&layer={layer}"
    )

    # 如果已存在成品（任一可识别扩展名），跳过
    existing = None
    for ext in (".fits", ".fits.gz", ".png", ".jpg", ".jpeg", ".psf", ".dat"):
        cand = save_dir / f"{base_name}{ext}"
        if cand.exists():
            existing = cand
            break
    if existing is not None:
        print(f"[skip] exists: {existing}")
        return existing

    print(f"GET {url}")
    if dry_run:
        return tmp_save.with_suffix(".dryrun")

    try:
        r = requests.get(url, timeout=timeout)
    except Exception as e:
        print(f"[error] request failed for RA={ra}, DEC={dec}: {e}")
        return tmp_save.with_suffix(".error")

    if r.status_code != 200:
        print(f"[warn] HTTP {r.status_code} for RA={ra}, DEC={dec}")
        return tmp_save.with_suffix(f".http{r.status_code}")

    # 检测内容类型
    content_type = r.headers.get("Content-Type", "").lower()

    # 内容判别：优先 header，其次魔数
    data = r.content
    ext = ".dat"
    if "fits" in content_type:
        ext = ".fits"
    elif "gzip" in content_type and "fits" in content_type:
        ext = ".fits.gz"
    elif "png" in content_type:
        ext = ".png"
    elif "jpeg" in content_type or "jpg" in content_type:
        ext = ".jpg"
    else:
        # 看文件头
        if data.startswith(b"SIMPLE"):
            ext = ".fits"
        elif data.startswith(b"\x89PNG"):
            ext = ".png"
        elif data[0:2] == b"\x1f\x8b":
            ext = ".fits.gz"  # gzip
        # else keep .dat

    final_path = save_dir / f"{base_name}{ext}"

    with open(final_path, "wb") as f:
        f.write(data)

    print(f"[ok] saved: {final_path}  ({len(data)} bytes; type={content_type})")
    return final_path


# ========= 批量下载入口 =========
def batch_download_coadd_psf(csv_path: Path = CSV_PATH,
                             layer: str = LAYER,
                             dry_run: bool = DRY_RUN):
    coords = load_unique_coords(csv_path)
    print(f"Total rows in table: {len(pd.read_csv(csv_path))}")
    print(f"Unique coords: {len(coords)}")
    print(f"Layer: {layer}\n")

    for i, (ra, dec) in enumerate(coords, start=1):
        print(f"[{i}/{len(coords)}] RA={ra} DEC={dec}")
        download_coadd_psf(ra, dec, layer=layer, dry_run=dry_run)

    print("\nDone.")


# 如果要立刻执行（dry-run 先测），取消注释：
# batch_download_coadd_psf(dry_run=True)   # 先试跑
batch_download_coadd_psf(dry_run=False)  # 真下载

Total rows in table: 68
Unique coords: 30
Layer: ls-dr10

[1/30] RA=179.093353 DEC=21.987776
GET https://www.legacysurvey.org/viewer/coadd-psf/?ra=179.093353&dec=21.987776&layer=ls-dr10
[ok] saved: data/psfcoadd/ra_179.09335_dec_21.98778.fits  (80640 bytes; type=image/fits)
[2/30] RA=244.706042 DEC=6.476774
GET https://www.legacysurvey.org/viewer/coadd-psf/?ra=244.706042&dec=6.476774&layer=ls-dr10
[ok] saved: data/psfcoadd/ra_244.70604_dec_6.47677.fits  (80640 bytes; type=image/fits)
[3/30] RA=100.652938 DEC=56.286423
GET https://www.legacysurvey.org/viewer/coadd-psf/?ra=100.652938&dec=56.286423&layer=ls-dr10
[ok] saved: data/psfcoadd/ra_100.65294_dec_56.28642.fits  (37440 bytes; type=image/fits)
[4/30] RA=4.946581 DEC=1.500982
GET https://www.legacysurvey.org/viewer/coadd-psf/?ra=4.946581&dec=1.500982&layer=ls-dr10
[ok] saved: data/psfcoadd/ra_4.94658_dec_1.50098.fits  (80640 bytes; type=image/fits)
[5/30] RA=65.581298 DEC=-4.79716
GET https://www.legacysurvey.org/viewer/coadd-psf/?ra