<a href="https://colab.research.google.com/github/AhmedAboushanab/Ph.D./blob/main/split_fasta_from_drive.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install biopython

In [9]:
from Bio import SeqIO
import os
import re

def sanitize_filename(name):
    return re.sub(r'[^\w\-_.]', '_', name)

def split_all_fastas_in_folder(input_folder, output_folder):
    os.makedirs(output_folder, exist_ok=True)

    for fasta_file in os.listdir(input_folder):
        if fasta_file.endswith(".fasta") or fasta_file.endswith(".fa"):
            input_path = os.path.join(input_folder, fasta_file)
            base_name = os.path.splitext(fasta_file)[0]
            output_subfolder = os.path.join(output_folder, base_name)
            os.makedirs(output_subfolder, exist_ok=True)

            print(f"\n🧬 Splitting sequences from: {fasta_file}")

            for record in SeqIO.parse(input_path, "fasta"):
                full_header = record.description.strip()
                safe_name = sanitize_filename(full_header)
                output_path = os.path.join(output_subfolder, f"{safe_name}.fasta")

                with open(output_path, "w") as f:
                    SeqIO.write(record, f, "fasta")

                print(f"  ✅ Saved: {output_path}")

# ✅ Example usage in Google Colab:
# from google.colab import drive
# drive.mount('/content/drive')

# split_all_fastas_in_folder(
#     input_folder="/content/drive/MyDrive/input_fastas",
#     output_folder="/content/drive/MyDrive/split_fastas"
# )


In [None]:
split_all_fastas_in_folder("/content/drive/MyDrive/NRPS-Output", "/content/drive/MyDrive/NRPS-Single-Sequences")