In [2]:
import ftplib
import os

def get_blastp_database_size(ftp_server, ftp_path):
    # Connect to the FTP server
    ftp = ftplib.FTP(ftp_server)
    ftp.login()
    ftp.cwd(ftp_path)

    # List files with detailed information
    entries = []
    try:
        ftp.retrlines('MLSD', entries.append)
    except ftplib.error_perm as e:
        print(f"Could not retrieve directory listing: {e}")
        ftp.quit()
        return

    total_size = 0
    file_sizes = {}

    for entry in entries:
        parts = entry.split(';')
        name_part = parts[-1].strip()
        size_part = [p for p in parts if p.startswith('size=')]
        
        if size_part and name_part.startswith('nr'):
            size = int(size_part[0].split('=')[1])
            total_size += size
            file_sizes[name_part] = size

    # Close the FTP connection
    ftp.quit()

    # Print the sizes
    for file, size in file_sizes.items():
        print(f'{file}: {size / (1024 ** 2):.2f} MB')
    print(f'Total size: {total_size / (1024 ** 3):.2f} GB')

# Parameters
ftp_server = 'ftp.ncbi.nlm.nih.gov'
ftp_path = '/blast/db/'

# Call the function to get the database size
get_blastp_database_size(ftp_server, ftp_path)

# Prompt user for download
download_prompt = input("Do you want to download the database files? (y/n): ").strip().lower()

if download_prompt == 'y':
    if not download_path:
        download_path = input("Enter the directory to save the files (default: './blast_db'): ").strip()
        download_path = download_path if download_path else './blast_db'

    os.makedirs(download_path, exist_ok=True)  # Create directory if it doesn't exist
    print(f"Downloading files to {download_path}...")

    for file in file_sizes.keys():
        local_file = os.path.join(download_path, file)
        print(f"Downloading {file} to {local_file}...")
        with open(local_file, 'wb') as f:
            ftp.retrbinary(f"RETR {file}", f.write)
        print(f"{file} downloaded successfully.")
else:
    print("Download skipped.")



nr.000.tar.gz: 36208.40 MB
nr.000.tar.gz.md5: 0.00 MB
nr.001.tar.gz: 2770.25 MB
nr.001.tar.gz.md5: 0.00 MB
nr.002.tar.gz: 2503.15 MB
nr.002.tar.gz.md5: 0.00 MB
nr.003.tar.gz: 2165.59 MB
nr.003.tar.gz.md5: 0.00 MB
nr.004.tar.gz: 2543.58 MB
nr.004.tar.gz.md5: 0.00 MB
nr.005.tar.gz: 2440.53 MB
nr.005.tar.gz.md5: 0.00 MB
nr.006.tar.gz: 2358.61 MB
nr.006.tar.gz.md5: 0.00 MB
nr.007.tar.gz: 2984.81 MB
nr.007.tar.gz.md5: 0.00 MB
nr.008.tar.gz: 30.56 MB
nr.008.tar.gz.md5: 0.00 MB
nr.009.tar.gz: 2550.57 MB
nr.009.tar.gz.md5: 0.00 MB
nr.010.tar.gz: 2985.39 MB
nr.010.tar.gz.md5: 0.00 MB
nr.011.tar.gz: 500.64 MB
nr.011.tar.gz.md5: 0.00 MB
nr.012.tar.gz: 2516.98 MB
nr.012.tar.gz.md5: 0.00 MB
nr.013.tar.gz: 2984.80 MB
nr.013.tar.gz.md5: 0.00 MB
nr.014.tar.gz: 428.30 MB
nr.014.tar.gz.md5: 0.00 MB
nr.015.tar.gz: 2589.09 MB
nr.015.tar.gz.md5: 0.00 MB
nr.016.tar.gz: 2713.99 MB
nr.016.tar.gz.md5: 0.00 MB
nr.017.tar.gz: 2984.50 MB
nr.017.tar.gz.md5: 0.00 MB
nr.018.tar.gz: 584.11 MB
nr.018.tar.gz.md5: 0.00 