In [None]:
from Bio import Entrez, SeqIO, AlignIO
import subprocess # MUSCLE wrapper
import os

In [11]:
# Set your email (required by NCBI)
Entrez.email = "gunanka.is22@bmsce.ac.in"

def fetch_cox1_sequence(species_name):
    """Fetches the COX1/PTGS1 gene sequence for a given species from NCBI."""
    try:
        # Try different search terms for better results
        search_terms = [
            f"{species_name}[Organism] AND (PTGS1[Gene] OR cyclooxygenase-1[Gene Name])",
            f"{species_name}[Organism] AND (COX1[Gene] OR COX-1[Gene] OR PTGS1[Gene])",
            f"{species_name}[Organism] AND cyclooxygenase 1"
        ]
        
        for term in search_terms:
            print(f"Trying search term: {term}")
            handle = Entrez.esearch(db="nucleotide", term=term, retmax=5)
            record = Entrez.read(handle)
            handle.close()
            
            if record["IdList"]:
                # Fetch the sequence record
                seq_id = record["IdList"][0]
                print(f"Found ID: {seq_id}")
                handle = Entrez.efetch(db="nucleotide", id=seq_id, rettype="fasta", retmode="text")
                fasta_text = handle.read()
                handle.close()
                
                # print(f"Retrieved sequence with header:\n{fasta_text.split('\\n')[0]}")
                
                # Reopen handle to read with SeqIO
                handle = Entrez.efetch(db="nucleotide", id=seq_id, rettype="fasta", retmode="text")
                seq_record = SeqIO.read(handle, "fasta")
                handle.close()

                print(f"Sequence length: {len(seq_record.seq)} bp")

                return seq_record
        
        print(f"No COX1/PTGS1 sequence found for {species_name}")
        return None
    
    except Exception as e:
        print(f"Error fetching sequence: {e}")
        return None

In [None]:
### testing function fetch_cox1_sequence()

species_name = "Homo sapiens" # human beings
fetch_cox1_sequence(species_name)

species_name = "Pan troglodytes" # chimpanzees
fetch_cox1_sequence(species_name)


Trying search term: Homo sapiens[Organism] AND (PTGS1[Gene] OR cyclooxygenase-1[Gene Name])
Found ID: 1676324839
Sequence length: 4880 bp
Trying search term: Pan troglodytes[Organism] AND (PTGS1[Gene] OR cyclooxygenase-1[Gene Name])
Found ID: 2697699565
Sequence length: 5032 bp


SeqRecord(seq=Seq('TGGAGCTCCGGGCAGTGTGCGAGGCGCACGCACAGGAGACTGCACTCTGCGTCC...CCA'), id='XM_016961598.3', name='XM_016961598.3', description='XM_016961598.3 PREDICTED: Pan troglodytes prostaglandin-endoperoxide synthase 1 (PTGS1), mRNA', dbxrefs=[])