In [None]:
from Bio import Entrez, SeqIO

def download(email, acc_list, out_prefix):
    Entrez.email = email
    for acc in acc_list:
        handle = Entrez.efetch(db="nucleotide", id=acc, rettype="fasta", retmode="text")
        sequence = handle.read()
        outputfile = f"{out_prefix}{acc}.fasta"
        with open(outputfile, "w") as f:
            f.write(sequence)
        print(f"Downloaded {acc} to {outputfile}")

email = "akshat22055@iiitd.ac.in"

# Use the following Ref_Sequences to generate their fasta files

accession_numbers = ["NC_001133.9", "NC_001134.8", "NC_001135.5", "NC_001136.10",
                     "NC_001137.3", "NC_001138.5", "NC_001139.9", "NC_001140.6",
                     "NC_001141.2", "NC_001142.9", "NC_001143.9", "NC_001144.5",
                     "NC_001145.3", "NC_001146.8", "NC_001147.6", "NC_001148.4",
                     "NC_001224.1"]

output_prefix = "out"  # Output files naming convention

download(email, accession_numbers, output_prefix)

for acc in accession_numbers:
    output_file = f"{output_prefix}{acc}.fasta"
    for seq in list(SeqIO.parse(output_file, "fasta"))[:5]:
        print(f"Sequence ID: {seq.id}")
        print(f"Sequence: {repr(seq.seq)}")
        print(f"Length: {len(seq)}")
        print("\n")


In [None]:
def find_all_positions_in_file(file_path, sub_string):
    try:
        positions = []
        with open(file_path, 'r') as file:
            # Skipping the first line as it is not relevant
            next(file, None)
            main_string = file.read().replace(" ", "").replace("\n", "")
            # removed all the spaces and newline characters present to find the exact position
            sub_string = sub_string.replace(" ", "")
            index = main_string.find(sub_string)
            #code to find the ORIs(positions of the ARS)
            while index != -1:
                positions.append(index)
                index = main_string.find(sub_string, index + 1)
            return positions
    except FileNotFoundError:
        # Error Handling
        print(f"Error: File '{file_path}' not found.")
        return []

# List of the fasta files generated using the Ref_Sequences
file_names = ['outNC_001133.9.fasta', 'outNC_001134.8.fasta', 'outNC_001135.5.fasta',
              'outNC_001136.10.fasta', 'outNC_001137.3.fasta', 'outNC_001138.5.fasta',
              'outNC_001139.9.fasta', 'outNC_001140.6.fasta', 'outNC_001141.2.fasta',
              'outNC_001142.9.fasta', 'outNC_001143.9.fasta', 'outNC_001144.5.fasta',
              'outNC_001145.3.fasta', 'outNC_001146.8.fasta', 'outNC_001147.6.fasta',
              'outNC_001148.4.fasta', 'outNC_001224.1.fasta']
# list of ARSs
possible_ars = ["ATTTATGTTTA", "TTTTATGTTTA", "ATTTATATTTA", "TTTTATATTTA",
                "ATTTATGTTTT", "TTTTATGTTTT", "ATTTATATTTT", "TTTTATATTTT"]

for sub_string in possible_ars:
    print(f"\nChecking sub-string: {sub_string}")
    for file_name in file_names:
        file_path = file_name
        positions = find_all_positions_in_file(file_path, sub_string)

        if positions:
            print(f"{file_name}: {positions}")
        else:
            print(f"{file_name}: Not found")
