In [20]:
from Bio import Entrez, SeqIO
import os
import urllib.request

# Set your email (NCBI requires this)
Entrez.email = "edwardbird@ksu.edu"

# Function to download assembly
def download_assembly(accession, output_dir="assemblies"):
    try:
        # Search for the assembly using the accession number
        handle = Entrez.esearch(db="assembly", term=accession)
        record = Entrez.read(handle)
        handle.close()
        
        # Check if any results found
        if not record['IdList']:
            print(f"No assembly found for {accession}")
            return

        # Fetch the assembly details using the ID
        assembly_id = record['IdList'][0]
        handle = Entrez.esummary(db="assembly", id=assembly_id, report="full")
        summary = Entrez.read(handle)
        handle.close()
        
        # Get FTP link for downloading the assembly (fna file, genomic data)
        ftp_path = summary['DocumentSummarySet']['DocumentSummary'][0]['FtpPath_GenBank']
        if ftp_path == "":
            print(f"No FTP link found for {accession}")
            return

        # Assemble the full URL for the genomic fasta file (fna)
        file_name = os.path.basename(ftp_path) + "_genomic.fna.gz"
        ftp_link = ftp_path + "/" + file_name

        # Download the file
        os.makedirs(output_dir, exist_ok=True)
        output_file = os.path.join(output_dir, file_name)
        print(f"Downloading {file_name} ...")
        ftp_link = 'https' + ftp_link[3:]
        print(ftp_link)
        urllib.request.urlretrieve(ftp_link, output_file)
        print(f"Downloaded to {output_file}")
    
    except Exception as e:
        print(f"Error downloading {accession}: {e}")

In [22]:
# List of assembly accession names
assembly_list = ["PDT002066735.1", "PDT002264259.1"]  # Add your assembly names here

# Download all assemblies
for assembly in assembly_list:
    download_assembly(assembly)

Downloading GCA_036249645.1_PDT002066735.1_genomic.fna.gz ...
https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/036/249/645/GCA_036249645.1_PDT002066735.1/GCA_036249645.1_PDT002066735.1_genomic.fna.gz
Downloaded to assemblies\GCA_036249645.1_PDT002066735.1_genomic.fna.gz
Downloading GCA_040868165.1_PDT002264259.1_genomic.fna.gz ...
https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/040/868/165/GCA_040868165.1_PDT002264259.1/GCA_040868165.1_PDT002264259.1_genomic.fna.gz
Downloaded to assemblies\GCA_040868165.1_PDT002264259.1_genomic.fna.gz
