a Python program that can identify patterns in DNA or RNA sequences from the GenBank database

In [12]:
from Bio import Entrez
from Bio import SeqIO

In [13]:
# Set your email address for NCBI Entrez
Entrez.email = "l191135@lhr.nu.edu.pk"

In [16]:
def search_and_identify_pattern(query, pattern):
    try:
        # Search GenBank for your query
        handle = Entrez.esearch(db="nucleotide", term=query, retmax=10)
        record = Entrez.read(handle)
        if len(record["IdList"]) == 0:
            print("No matching sequences found.")
            return

        # Fetch and process sequences
        for seq_id in record["IdList"]:
            handle = Entrez.efetch(db="nucleotide", id=seq_id, rettype="gb", retmode="text")
            genbank_record = SeqIO.read(handle, "genbank")

            # Check if the GenBank entry contains a sequence
            if "sequence" in genbank_record.annotations:
                sequence = str(genbank_record.seq)

                # Search for the pattern in the sequence
                if pattern in sequence:
                    print(f"Pattern '{pattern}' found in GenBank entry {seq_id}")
                else:
                    print(f"Pattern '{pattern}' not found in GenBank entry {seq_id}")
            else:
                print(f"No sequence found in GenBank entry {seq_id}")

    except Exception as e:
        print("An error occurred:", e)

In [17]:
if __name__ == "__main__":
    # Define your search query (e.g., sequences containing the pattern "AGGTGTT")
    search_query = "AGGTGTT[All Fields]"
    
    # Define the pattern you want to search for (e.g., "AGGTGTT")
    pattern = "AGGTGTT"

    # Call the function to search and identify the pattern
    search_and_identify_pattern(search_query, pattern)

No sequence found in GenBank entry 2413633964
No sequence found in GenBank entry 1511109186
No sequence found in GenBank entry 411115947
No sequence found in GenBank entry 411115910
No sequence found in GenBank entry 411115778
No sequence found in GenBank entry 408541616
No sequence found in GenBank entry 406021469
No sequence found in GenBank entry 402475107
No sequence found in GenBank entry 398373017
No sequence found in GenBank entry 385726818
