In [2]:
from Bio import Entrez

In [8]:
# Set your email (required by NCBI)
Entrez.email = "john.doe@example.com"

species = 'arabidopsis_thaliana'

# Step 1: Query with multiple gene names
gene_names = ["TT1", "TT16", "TTG2"]  # List of genes to search
search_term = " OR ".join([f"{gene}[Gene Name]" for gene in gene_names])  # Combine gene names with OR
formated_species_name = species.replace('_', ' ').capitalize()
search_term += f" AND {formated_species_name}[Organism]"
print(search_term)

# Perform the esearch query
handle = Entrez.esearch(db="gene", term=search_term)
search_results = Entrez.read(handle)

# Step 2: Fetch detailed information about the gene IDs
gene_ids = search_results["IdList"]  # List of gene IDs found
print(f"Found gene IDs: {gene_ids}")

# Fetch detailed information for each gene ID using efetch or esummary
handle = Entrez.efetch(db="gene", id=",".join(gene_ids), retmode="xml")
gene_records = Entrez.read(handle)

# Step 3: Map the gene names to their corresponding IDs by retrieving the official gene symbol
gene_mapping = {}
for record in gene_records:
    gene_symbol = record['Entrezgene_gene']['Gene-ref']['Gene-ref_locus']
    gene_id = record['Entrezgene_track-info']['Gene-track']['Gene-track_geneid']
    gene_mapping[gene_symbol] = gene_id

TT1[Gene Name] OR TT16[Gene Name] OR TTG2[Gene Name] AND Arabidopsis thaliana[Organism]
Found gene IDs: ['818303', '832390', '840386']


In [9]:
gene_mapping

{'TTG2': '818303', 'TT16': '832390', 'TT1': '840386'}