In [None]:
import os
import subprocess

# --- DIRECTORIES (GENERAL) ---
# Replace these paths with your own dataset locations
input_dir = "/path/to/your/dataset/files"   # Input directory containing FASTA files
output_dir = "/path/to/your/output/folder"  # Directory to save Prodigal results

# Create output subfolders for each type of output file
genbank_dir = os.path.join(output_dir, "genbank")
protein_dir = os.path.join(output_dir, "protein")
nucleotide_dir = os.path.join(output_dir, "nucleotide")

os.makedirs(genbank_dir, exist_ok=True)
os.makedirs(protein_dir, exist_ok=True)
os.makedirs(nucleotide_dir, exist_ok=True)

# Get all FASTA files in the input directory
fasta_files = [f for f in os.listdir(input_dir) if f.endswith(".fasta")]
total_files = len(fasta_files)

# Process the FASTA files
for i, fasta_file in enumerate(fasta_files, 1):
    input_file = os.path.join(input_dir, fasta_file)
    file_name = os.path.splitext(fasta_file)[0]

    print(f"Processing file {i} of {total_files}: {fasta_file}")

    # Define output paths for GenBank, protein, and nucleotide files
    genbank_output = os.path.join(genbank_dir, f"{file_name}.gbk")
    protein_output = os.path.join(protein_dir, f"{file_name}.faa")
    nucleotide_output = os.path.join(nucleotide_dir, f"{file_name}.ffn")

    # Prodigal command for gene prediction (single mode by default)
    command = [
        "prodigal", "-i", input_file,
        "-o", genbank_output,
        "-a", protein_output,
        "-d", nucleotide_output,
        "-f", "gbk",
        "-p", "single",  # Running in single mode
        "-m", "-q"       # Mask runs of N and suppress output
    ]
    
    # Execute Prodigal
    try:
        subprocess.run(command, check=True)
    except subprocess.CalledProcessError as e:
        print(f"Error processing {fasta_file}: {e}")

print("✅ All plasmids processed successfully!")

# NOTE: Use '-p single' or '-p meta' depending upon the length of the plasmids in your datasets.
#       Single mode is recommended for plasmids and small genomes, 
#       while meta mode is for metagenomic/long sequences.
