In [4]:
import requests, json
from datetime import datetime

# Config
API_KEY    = "2666c3270101dc6d670514f592d567a8"
INST_TOKEN = ""  
BASE       = "https://api.elsevier.com"

HEADERS_JSON = {"Accept": "application/json", "X-ELS-APIKey": API_KEY}
HEADERS_XML  = {"Accept": "text/xml",          "X-ELS-APIKey": API_KEY}

if INST_TOKEN:
    HEADERS_JSON["X-ELS-Insttoken"] = INST_TOKEN
    HEADERS_XML["X-ELS-Insttoken"]  = INST_TOKEN

# Functions
def search_sciencedirect(query, count=25, offset=0):
    """Search ScienceDirect and return a list of dicts with basic article info."""
    url = f"{BASE}/content/search/sciencedirect"
    params = {"query": query, "count": str(count), "offset": str(offset), "show": "all"}
    r = requests.get(url, params=params, headers=HEADERS_JSON, timeout=30)

    # DEBUG
    print("HTTP", r.status_code, "-", r.url)
    if r.status_code != 200:
        print("Body snippet:", r.text[:300])
        r.raise_for_status()

    data = r.json()
    total = data.get("search-results", {}).get("opensearch:totalResults")
    print("totalResults:", total)

    entries = data.get("search-results", {}).get("entry", []) or []
    results = []
    for e in entries:
        results.append({
            "title":   e.get("dc:title"),
            "journal": e.get("prism:publicationName"),
            "date":    e.get("prism:coverDate"),
            "doi":     e.get("prism:doi"),
            "pii":     e.get("pii") or (e.get("dc:identifier","").replace("PII:","") if "PII:" in e.get("dc:identifier","") else None),
            "subtype": (e.get("subtypeDescription") or e.get("prism:aggregationType") or "").lower(),
        })
    return results

def get_xml_by_doi(doi):
    """Fetch the XML content of an article by DOI."""
    url = f"{BASE}/content/article/doi/{doi}"
    r = requests.get(url, headers=HEADERS_XML, timeout=60)
    print("  XML HTTP status:", r.status_code)
    print("  XML snippet:", r.text[:500].replace("\n"," ")[:200], "...")
    return r

def year(date_str):
    """Return the year as int from a date string like '2021-06-15'."""
    return int(date_str[:4]) if date_str and date_str[:4].isdigit() else 0

# main 
if __name__ == "__main__":
    query = 'TITLE-ABSTR-KEY("carbon capture") AND membrane AND permeance AND selectivity AND combustion AND polymer AND GPU AND OPENACCESS(1)' # keywords
    hits = search_sciencedirect(query, count=25)

    # Filter hits: after 2010 and not review
    keep = []
    for h in hits:
        if year(h["date"]) >= 2010 and "review" not in h["subtype"]:
            keep.append(h)

    print(f"kept {len(keep)} of {len(hits)}")
    for k in keep[:10]:  # show first 10 kept
        print("-", k["date"], k["journal"], "|", k["title"][:80], "...", k["doi"] or k["pii"])

    # Fetch XML for kept articles that have a DOI
    for k in keep[:3]:  # limit to first 3 
        if k["doi"]:
            print(f"\nFetching XML for DOI {k['doi']}")
            get_xml_by_doi(k["doi"])


HTTP 200 - https://api.elsevier.com/content/search/sciencedirect?query=TITLE-ABSTR-KEY%28%22carbon+capture%22%29+AND+membrane+AND+permeance+AND+selectivity+AND+combustion+AND+polymer+AND+GPU+AND+OPENACCESS%281%29&count=25&offset=0&show=all
totalResults: 131
kept 25 of 25
- 2024-02-29 Journal of Membrane Science | Breaking barriers: Unleashing CO<ce:inf loc=post>2</ce:inf> selectivity with ult ... 10.1016/j.memsci.2023.122272
- 2024-12-31 Process Safety and Environmental Protection | Thin film nanocomposite membranes based on renewable polymer Pebax® and zeolitic ... 10.1016/j.psep.2024.10.053
- 2025-03-31 Green Energy & Environment | Facilitated transport membranes in post-combustion carbon capture: Recent advanc ... 10.1016/j.gee.2024.04.010
- 2025-04-30 Journal of Membrane Science | Recent progress in thin film composite membranes based on the polymer of intrins ... 10.1016/j.memsci.2025.123844
- 2023-04-15 Journal of Membrane Science | Hydrogen-sieving zeolitic films by coating zeol