# Resultados publicaciones PubMed API
### Búsqueda "rare disease"

In [2]:
# Librerías
import requests
import time
import csv
from xml.etree import ElementTree as ET

In [None]:
# Parámetros para interactuar con la API de PubMed
BASE_URL = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/"
TERM = "rare disease[Title]"  # Buscar "rare disease" en el título
DB = "pubmed"
EMAIL = "mariauriburu12@gmail.com" 
TOOL = "PythonScript"
YEAR_START = 1900
YEAR_END = 2024
MAX_IDS_PER_QUERY = 200
DELAY = 0.34  # Tiempo de espera para no sobrecargar la API. Dicen que es un buen valor para que no me bloqueen

# Almacenar los artículos en una lista
results = []

# Función para buscar artículos en PubMed
def get_pmids_for_year(year):
    params = {
        "db": DB,
        "term": TERM,
        "datetype": "pdat",
        "mindate": year,
        "maxdate": year,
        "retmax": 100000,
        "retmode": "xml",
        "tool": TOOL,
        "email": EMAIL
    }
    response = requests.get(BASE_URL + "esearch.fcgi", params=params)
    time.sleep(DELAY)
    root = ET.fromstring(response.content)
    return [id_elem.text for id_elem in root.findall(".//Id")]

# Revisión de los artículos de la lista y solicitud de info específica a la API (PMID, título, año, tipo y revista)
def fetch_details(pmids):
    papers = []
    for i in range(0, len(pmids), MAX_IDS_PER_QUERY):
        batch = pmids[i:i + MAX_IDS_PER_QUERY]
        params = {
            "db": DB,
            "id": ",".join(batch),
            "retmode": "xml",
            "rettype": "medline",
            "tool": TOOL,
            "email": EMAIL
        }
        response = requests.get(BASE_URL + "efetch.fcgi", params=params)
        time.sleep(DELAY)
        root = ET.fromstring(response.content)
        for article in root.findall(".//PubmedArticle"):
            try:
                pmid = article.findtext(".//PMID")
                title = article.findtext(".//ArticleTitle", default="").strip()
                journal = article.findtext(".//Journal/Title", default="Unknown")
                pubdate_elem = article.find(".//PubDate")
                pub_year = "Unknown"
                if pubdate_elem is not None:
                    year_elem = pubdate_elem.find("Year")
                    medline_date = pubdate_elem.find("MedlineDate")
                    pub_year = year_elem.text if year_elem is not None else (medline_date.text[:4] if medline_date is not None else "Unknown")
                pubtypes = article.findall(".//PublicationType")
                pubtype = "; ".join([pt.text for pt in pubtypes if pt.text]) or "Unknown"
                papers.append((pmid, title, pub_year, pubtype, journal))
            except Exception as e:
                print(f"Failed to parse article: {e}")
    return papers

# Bucle para que haga lo mismo para cada año
for year in range(YEAR_START, YEAR_END + 1):
    try:
        print(f"Processing year {year}...")
        pmids = get_pmids_for_year(year)
        if not pmids:
            continue
        details = fetch_details(pmids)
        results.extend(details)
    except Exception as e:
        print(f"Error in year {year}: {e}")

# Write to CSV
csv_filename = r"C:\Users\maria\ITAcademy\Sprint 10 - Pràctica final\Rare Diseases\rare_disease_papers_detailed.csv"
with open(csv_filename, mode="w", newline="", encoding="utf-8") as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(["pmid", "title", "pub_year", "pubtype", "journal"])
    writer.writerows(results)

print(f"Finished. Results saved to {csv_filename}")

Processing year 1900...
Processing year 1901...
Processing year 1902...
Processing year 1903...
Processing year 1904...
Processing year 1905...
Processing year 1906...
Processing year 1907...
Processing year 1908...
Processing year 1909...
Processing year 1910...
Processing year 1911...
Processing year 1912...
Processing year 1913...
Processing year 1914...
Processing year 1915...
Processing year 1916...
Processing year 1917...
Processing year 1918...
Processing year 1919...
Processing year 1920...
Processing year 1921...
Processing year 1922...
Processing year 1923...
Processing year 1924...
Processing year 1925...
Processing year 1926...
Processing year 1927...
Processing year 1928...
Processing year 1929...
Processing year 1930...
Processing year 1931...
Processing year 1932...
Processing year 1933...
Processing year 1934...
Processing year 1935...
Processing year 1936...
Processing year 1937...
Processing year 1938...
Processing year 1939...
Processing year 1940...
Processing year 