In [3]:
import requests
import xml.etree.ElementTree as ET
import time
import os
from datetime import datetime

def search_pubmed(query, retmax=100, retstart=0):
    base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/"
    search_url = f"{base_url}esearch.fcgi?db=pubmed&term={query}&retmax={retmax}&retstart={retstart}&retmode=xml"
    response = requests.get(search_url)
    root = ET.fromstring(response.content)
    
    count = int(root.find(".//Count").text)
    ids = [id_elem.text for id_elem in root.findall(".//Id")]
    
    return ids, count

def fetch_full_text(pmid):
    base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/"
    fetch_url = f"{base_url}efetch.fcgi?db=pubmed&id={pmid}&rettype=xml&retmode=xml"
    response = requests.get(fetch_url)
    return response.text

def get_all_articles(keyword):
    retmax = 4000
    retstart = 0
    all_ids = []
    
    while True:
        ids, total_count = search_pubmed(keyword, retmax, retstart)
        all_ids.extend(ids)
        
        print(f"Retrieved {len(all_ids)} of {total_count} articles")
        
        if len(all_ids) >= total_count:
            break
        
        retstart += retmax
        time.sleep(0.34)  # To respect NCBI's rate limit of 3 requests per second
    
    return all_ids

def create_folder(keyword):
    # Create a folder name based on the keyword and current timestamp
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    folder_name = f"pubmed_{keyword.replace(' ', '_')}_{timestamp}"
    
    # Create the folder
    os.makedirs(folder_name, exist_ok=True)
    
    return folder_name

def main():
    keyword = input("Enter the keyword to search for: ")
    all_pmids = get_all_articles(keyword)
    
    print(f"\nRetrieved {len(all_pmids)} PMIDs for the keyword '{keyword}'")
    
    save_folder = create_folder(keyword)
    print(f"\nCreated folder: {save_folder}")
    
    for i, pmid in enumerate(all_pmids, 1):
        full_text = fetch_full_text(pmid)
        
        file_path = os.path.join(save_folder, f"article_{pmid}.xml")
        with open(file_path, "w", encoding="utf-8") as f:
            f.write(full_text)
        
        print(f"Saved article {i} of {len(all_pmids)} (PMID: {pmid})")
        time.sleep(0.34)  # To respect NCBI's rate limit
    
    print(f"\nProcess completed. All files saved in folder: {save_folder}")

if __name__ == "__main__":
    main()

Retrieved 3880 of 3880 articles

Retrieved 3880 PMIDs for the keyword 'assisted dying'

Created folder: pubmed_assisted_dying_20240822_120138
Saved article 1 of 3880 (PMID: 39168589)
Saved article 2 of 3880 (PMID: 39167528)
Saved article 3 of 3880 (PMID: 39160544)
Saved article 4 of 3880 (PMID: 39157533)
Saved article 5 of 3880 (PMID: 39157418)
Saved article 6 of 3880 (PMID: 39152645)
Saved article 7 of 3880 (PMID: 39144136)
Saved article 8 of 3880 (PMID: 39143961)
Saved article 9 of 3880 (PMID: 39126283)
Saved article 10 of 3880 (PMID: 39122437)
Saved article 11 of 3880 (PMID: 39122386)
Saved article 12 of 3880 (PMID: 39121499)
Saved article 13 of 3880 (PMID: 39119216)
Saved article 14 of 3880 (PMID: 39117361)
Saved article 15 of 3880 (PMID: 39095146)
Saved article 16 of 3880 (PMID: 39093520)
Saved article 17 of 3880 (PMID: 39087246)
Saved article 18 of 3880 (PMID: 39083816)
Saved article 19 of 3880 (PMID: 39075491)
Saved article 20 of 3880 (PMID: 39072234)
Saved article 21 of 3880 (P

KeyboardInterrupt: 