In [1]:
import os
import time
import pandas as pd
from urllib.parse import urlparse
from ftplib import FTP

def download_file(ftp, remote_filepath, local_filepath):
    with open(local_filepath, 'wb') as f:
        ftp.retrbinary(f'RETR {remote_filepath}', f.write)

def find_gpff_links(ftp, directory):
    files = ftp.nlst(directory)
    return [f for f in files if f.endswith('protein.gpff.gz')]

def connect_and_login(url_parts):
    ftp = FTP(url_parts.netloc)
    ftp.login()
    return ftp

def download_with_retry(ftp, gpff_link, filename, max_retries=3, delay=5):
    for i in range(max_retries):
        try:
            download_file(ftp, gpff_link, filename)
            print(f'成功下载文件：{filename}')
            return
        except Exception as e:
            print(f'下载失败：{filename}，错误：{e}')
            if i < max_retries - 1:
                print(f'尝试重新下载，等待{delay}秒...')
                time.sleep(delay)
            else:
                print(f'放弃下载：{filename}')

# 读取CSV文件
df = pd.read_csv('links.csv')

# 遍历表格中的每一行
for index, row in df.iterrows():
    # 获取链接
    url = row['links']
    url_parts = urlparse(url)
    
    # 连接到FTP服务器
    ftp = connect_and_login(url_parts)
    
    # 寻找目标文件链接
    gpff_links = find_gpff_links(ftp, url_parts.path)

    # 下载目标文件
    for gpff_link in gpff_links:
        filename = os.path.basename(gpff_link)
        download_with_retry(ftp, gpff_link, filename)
        time.sleep(1)  # 在下载文件之间添加延迟

    # 断开FTP连接
    ftp.quit()


成功下载文件：GCF_000155695.1_ASM15569v1_protein.gpff.gz
成功下载文件：GCF_000171235.2_ASM17123v2_protein.gpff.gz
成功下载文件：GCF_000172555.1_ASM17255v1_protein.gpff.gz
成功下载文件：GCF_000173075.1_ASM17307v1_protein.gpff.gz
成功下载文件：GCF_000242935.2_ASM24293v3_protein.gpff.gz
成功下载文件：GCF_000378105.1_ASM37810v1_protein.gpff.gz
成功下载文件：GCF_000379365.1_ASM37936v1_protein.gpff.gz
成功下载文件：GCF_000436395.1_MGS154_protein.gpff.gz
成功下载文件：GCF_000526255.1_ASM52625v1_protein.gpff.gz
成功下载文件：GCF_000739615.1_ASM73961v1_protein.gpff.gz
成功下载文件：GCF_000953475.1_MfumSolVChr1_protein.gpff.gz
成功下载文件：GCF_001318295.1_ASM131829v1_protein.gpff.gz
成功下载文件：GCF_001578645.1_ASM157864v1_protein.gpff.gz
成功下载文件：GCF_001580015.1_ASM158001v1_protein.gpff.gz
成功下载文件：GCF_001580045.1_ASM158004v1_protein.gpff.gz
成功下载文件：GCF_001613545.1_ASM161354v1_protein.gpff.gz
成功下载文件：GCF_001647615.1_ASM164761v1_protein.gpff.gz
成功下载文件：GCF_001650175.1_ASM165017v1_protein.gpff.gz
成功下载文件：GCF_001683795.1_ASM168379v1_protein.gpff.gz
成功下载文件：GCF_001746835.1_ASM174683v1_protein.g