In [None]:
import requests
import os
import time
from concurrent.futures import ThreadPoolExecutor, as_completed
from urllib.parse import urlparse

def download_image(image_info, save_dir, delay=1):
    """多线程下载单个图片"""
    image_url, filename = image_info
    try:
        # 获取文件扩展名
        parsed = urlparse(image_url)
        ext = os.path.splitext(parsed.path)[1] or ".jpg"
        
        # 生成带扩展名的文件名
        final_filename = f"{filename}{ext}"
        save_path = os.path.join(save_dir, final_filename)
        
        # 如果文件已存在则跳过
        if os.path.exists(save_path):
            return f"已存在: {final_filename}"

        response = requests.get(image_url, timeout=10)
        response.raise_for_status()
        
        with open(save_path, "wb") as f:
            f.write(response.content)
        
        time.sleep(delay)  # 每个下载完成后延迟
        return f"下载成功: {final_filename}"
    except Exception as e:
        return f"失败: {final_filename} ({str(e)})"

def fetch_observation_data(taxon_id, page, per_page, headers):
    """获取观测数据API请求"""
    url = "https://api.inaturalist.org/v1/observations"
    params = {
        "taxon_id": taxon_id,
        "quality_grade": "research",
        "per_page": per_page,
        "page": page,
        "order": "desc",
        "order_by": "created_at"
    }
    response = requests.get(url, params=params, headers=headers)
    response.raise_for_status()
    return response.json()

def download_inaturalist_images(taxon_ids, save_dir="images", 
                               per_page=200, max_workers=5, 
                               api_delay=1, download_delay=0.5):
    """
    多线程版本下载器
    :param max_workers: 最大并发线程数
    :param api_delay: API请求间隔（秒）
    :param download_delay: 图片下载间隔（秒）
    """
    os.makedirs(save_dir, exist_ok=True)
    
    headers = {
        "User-Agent": "iNatThreadedDownloader/1.0 (+https://github.com/yourusername)",
        "Accept": "application/json"
    }

    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        futures = []
        for taxon_id in taxon_ids:
            page = 1
            while True:
                try:
                    data = fetch_observation_data(taxon_id, page, per_page, headers)
                    if not data["results"]:
                        break

                    # 提交下载任务到线程池
                    for obs in data["results"]:
                        if "photos" in obs:
                            for photo in obs["photos"]:
                                image_url = photo["url"].replace("square", "original")
                                filename = f"taxon_{taxon_id}_obs_{obs['id']}_photo_{photo['id']}"
                                futures.append(
                                    executor.submit(
                                        download_image,
                                        (image_url, filename),
                                        save_dir,
                                        download_delay
                                    )
                                )

                    page += 1
                    time.sleep(api_delay)  # 控制API请求频率

                except requests.exceptions.RequestException as e:
                    print(f"Taxon {taxon_id} 第 {page} 页失败: {str(e)}")
                    break

        # 处理下载结果
        for future in as_completed(futures):
            try:
                result = future.result()
                print(result)
            except Exception as e:
                print(f"线程错误: {str(e)}")

if __name__ == "__main__":
    # 示例使用：4个线程，API请求间隔1秒，下载间隔0.3秒
    download_inaturalist_images(
        taxon_ids=[48715],
        save_dir="inat_images2",
        per_page=200,
        max_workers=4,
        api_delay=1,
        download_delay=0.3
    )


Taxon 48715 第 51 页失败: 403 Client Error: Forbidden for url: https://api.inaturalist.org/v1/observations?taxon_id=48715&quality_grade=research&per_page=200&page=51&order=desc&order_by=created_at
下载成功: taxon_48715_obs_270065138_photo_485643126.jpg
下载成功: taxon_48715_obs_270364225_photo_486220647.jpeg
下载成功: taxon_48715_obs_270125237_photo_485760642.jpg
下载成功: taxon_48715_obs_270161305_photo_485831175.jpg
下载成功: taxon_48715_obs_270065138_photo_485643092.jpg
下载成功: taxon_48715_obs_270089035_photo_485688774.jpg
下载成功: taxon_48715_obs_270089035_photo_485688749.jpg
下载成功: taxon_48715_obs_270364225_photo_486220599.jpeg
下载成功: taxon_48715_obs_270125237_photo_485760608.jpg
下载成功: taxon_48715_obs_270177385_photo_485860245.jpg
下载成功: taxon_48715_obs_270065138_photo_485643044.jpg
下载成功: taxon_48715_obs_270089035_photo_485688771.jpg
下载成功: taxon_48715_obs_270365957_photo_486224089.jpeg
下载成功: taxon_48715_obs_270125237_photo_485760571.jpg
下载成功: taxon_48715_obs_270177385_photo_485860222.jpg
下载成功: taxon_48715_obs_27