In [4]:
import requests
import time
import pandas as pd

In [6]:
def get_pmcids(search_term, no_of_results):
    search_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
    params = {
        # "db": "pmc",                                           # Source: https://www.ncbi.nlm.nih.gov/books/NBK25497/table/chapter2.T._entrez_unique_identifiers_ui/?report=objectonly
        # "term": "open access[filter]+" + search_term,    # Source: https://pmc.ncbi.nlm.nih.gov/tools/openftlist/
        "db": "pubmed",
        "term": "pubmed pmc open access[filter] AND " + search_term,
        "retmode": "json",                                      # Return format. Changed to XML
        "retmax": no_of_results,                                # Number of results
    }
    response = requests.get(search_url, params=params)
    time.sleep(0.34)                                     # Wait after request to respect API limit of 3 requests per second TODO: Add Source
    if response.status_code == 200:
        data = response.json()
        return data["esearchresult"]["idlist"]
    else:
        print(f"Fehler bei der Anfrage für '{search_term}': {response.status_code}")
        return []

In [14]:
def get_json(search_term, no_of_results):
    search_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
    params = {
        # "db": "pmc",                                           # Source: https://www.ncbi.nlm.nih.gov/books/NBK25497/table/chapter2.T._entrez_unique_identifiers_ui/?report=objectonly
        # "term": "open access[filter]+" + search_term,    # Source: https://pmc.ncbi.nlm.nih.gov/tools/openftlist/
        "db": "pubmed",
        "term": "pubmed pmc open access[filter] AND " + search_term,
        "retmode": "json",                                      # Return format. Changed to XML
        "retmax": no_of_results,                                # Number of results
        "sort": "relevance"
    }
    response = requests.get(search_url, params=params)
    time.sleep(0.34)                                     # Wait after request to respect API limit of 3 requests per second TODO: Add Source
    if response.status_code == 200:
        data = response.json()
        return data
        # ["esearchresult"]["idlist"]
    else:
        print(f"Fehler bei der Anfrage für '{search_term}': {response.status_code}")
        return []

test = get_json('(avian influenza[MeSH Terms]) AND (disease outbreak[MeSH Terms])', 10)
test

{'header': {'type': 'esearch', 'version': '0.3'},
 'esearchresult': {'count': '672',
  'retmax': '10',
  'retstart': '0',
  'idlist': ['28915920',
   '18455796',
   '39535188',
   '40145745',
   '39053575',
   '16566867',
   '16253881',
   '36958010',
   '16277753',
   '32687049'],
  'translationset': [{'from': 'avian influenza[MeSH Terms]',
    'to': '"influenza in birds"[MeSH Terms]'},
   {'from': 'disease outbreak[MeSH Terms]',
    'to': '"disease outbreaks"[MeSH Terms]'}],
  'querytranslation': '"pubmed pmc open access"[Filter] AND "influenza in birds"[MeSH Terms] AND "disease outbreaks"[MeSH Terms]'}}

In [None]:
import requests

def translate_pmid_to_pmcid(pmid):
    url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi"
    params = {
        "db":"pubmed",
        "id": pmid,
        "retmode":"json"

    }
    db=pubmed&id={pmid}&retmode=json"
    response = requests.get(url)
    data = response.json()
    doc = data['result'][pmid]
    pmcid = doc.get("articleids", [])
    for id_obj in pmcid:
        if id_obj["idtype"] == "pmc":
            return id_obj["value"]

t = translate_pmid_to_pmcid("40285026")
t



# pmid = "40285026"  # Beispiel-PMID


# response = requests.get(url)
# data = response.json()

# # Zugriff auf PMCID (falls vorhanden)
# doc = data['result'][pmid]
# pmcid = doc.get("articleids", [])

# # Extrahiere explizit PMCID
# for id_obj in pmcid:
#     if id_obj["idtype"] == "pmc":
#         print("PMCID:", id_obj["value"])


'PMC12031089'

In [None]:
import requests
from xml.etree import ElementTree as ET

# Abstract

pmid = "34816252"  # Beispielhafte PubMed-ID
url = f"https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi"
params = {
    "db": "pubmed",
    "id": pmid,
    "retmode": "xml"
}

response = requests.get(url, params=params)
root = ET.fromstring(response.text)

# Extrahiere den Abstract
abstract_texts = root.findall(".//AbstractText")
abstract = " ".join(elem.text for elem in abstract_texts if elem.text)

print("Abstract:", abstract)


In [7]:
search1 = 'avian influenza outbreak'

pmcids = get_pmcids(search1, 10)
pmcids

['40285026',
 '40285016',
 '40284978',
 '40284893',
 '40279164',
 '40278010',
 '40256930',
 '40255978',
 '40253358',
 '40251629']

In [13]:
search2 = '"avian influenza" AND (outbreak)'

pmcids = get_pmcids(search2, 10)
pmcids

['12032427',
 '12032236',
 '12031540',
 '12031521',
 '12031341',
 '12031337',
 '12031321',
 '12030940',
 '12030858',
 '12030792']

In [14]:
search3 = '(avian influenza) AND (outbreak)'

pmcids = get_pmcids(search3, 10)
pmcids

['12032427',
 '12032236',
 '12031540',
 '12031521',
 '12031491',
 '12031341',
 '12031337',
 '12031321',
 '12031139',
 '12030940']

In [15]:
search4 = '(avian influenza[MeSH Terms]) AND (disease outbreak[MeSH Terms])'

pmcids = get_pmcids(search4, 10)
pmcids

['12032427',
 '12030858',
 '12010114',
 '12031396',
 '11998540',
 '11946100',
 '11944845',
 '11977947',
 '11882949',
 '11881443']