In [7]:
import os
import json

from pyeuropepmc.search import SearchClient

# Directory to save fixtures
FIXTURE_DIR = "."
os.makedirs(FIXTURE_DIR, exist_ok=True)

client = SearchClient()

In [8]:
# Define different test cases with parameters and output filenames
test_cases = [
    {
        "desc": "Simple JSON search",
        "method": "search",
        "params": {"query": "cancer"},
        "filename": "search_cancer.json",
        "format": "json",
    },
    {
        "desc": "Simple XML search",
        "method": "search",
        "params": {"query": "cancer", "format": "xml"},
        "filename": "search_cancer.xml",
        "format": "xml",
    },
    {
        "desc": "Simple DC XML search",
        "method": "search",
        "params": {"query": "cancer", "format": "dc"},
        "filename": "search_cancer_dc.xml",
        "format": "dc",
    },
    {
        "desc": "Search with resultType 'core'",
        "method": "search",
        "params": {"query": "cancer", "resultType": "core"},
        "filename": "search_cancer_core.json",
        "format": "json",
    },
    {
        "desc": "Search with resultType 'idlist'",
        "method": "search",
        "params": {"query": "cancer", "resultType": "idlist"},
        "filename": "search_cancer_idlist.json",
        "format": "json",
    },
    {
        "desc": "POST search with synonym expansion",
        "method": "search_post",
        "params": {"query": "cancer", "synonym": "true"},
        "filename": "search_post_cancer.json",
        "format": "json",
    },
    {
        "desc": "Search with pagination",
        "method": "search",
        "params": {"query": "cancer", "pageSize": 5, "page": 2},
        "filename": "search_cancer_page2.json",
        "format": "json",
    },
    {
        "desc": "Large JSON search",
        "method": "search",
        "params": {"query": "cancer", "pageSize": 1000},
        "filename": "search_1000results_cancer.json",
        "format": "json",
    },
    {
        "desc": "Fetch all pages for 'cancer' query",
        "method": "fetch_all_pages",
        "params": {"query": "cancer", "page_size": 100, "max_results": 1000},
        "filename": "fetch_all_1000results_cancer.json",
        "format": "json",
    },
    {
        "desc": "Search sorted by cited count",
        "method": "search",
        "params": {"query": "cancer", "sort": "CITED asc"},
        "filename": "search_cancer_sorted_cited.json",
        "format": "json",
    },
    {
        "desc": "Search with no results",
        "method": "search",
        "params": {"query": "asdkfjhasdkfjhasdfasdasdasdfgsagsd"},
        "filename": "search_no_results.json",
        "format": "json",
    },
]

In [9]:
for case in test_cases:
    print(f"Running: {case['desc']}")
    method = getattr(client, case["method"])
    response = method(**case["params"])

    out_path = os.path.join(FIXTURE_DIR, case["filename"])
    if case["format"] == "json":
        # If response is a requests.Response, get .json()
        if hasattr(response, "json"):
            response = response.json()
        with open(out_path, "w", encoding="utf-8") as f:
            json.dump(response, f, indent=2)
    else:
        # Assume XML/text
        if hasattr(response, "text"):
            response = response.text
        with open(out_path, "w", encoding="utf-8") as f:
            f.write(response)
    print(f"Saved fixture: {out_path}")

print("All fixtures saved.")

Running: Simple JSON search
Saved fixture: ./search_cancer.json
Running: Simple XML search
Saved fixture: ./search_cancer.xml
Running: Simple DC XML search
Saved fixture: ./search_cancer_dc.xml
Running: Search with resultType 'core'
Saved fixture: ./search_cancer_core.json
Running: Search with resultType 'idlist'
Saved fixture: ./search_cancer_idlist.json
Running: POST search with synonym expansion
Saved fixture: ./search_post_cancer.json
Running: Search with pagination
Saved fixture: ./search_cancer_page2.json
Running: Large JSON search
Saved fixture: ./search_1000results_cancer.json
Running: Fetch all pages for 'cancer' query
Saved fixture: ./fetch_all_1000results_cancer.json
Running: Search sorted by cited count
Saved fixture: ./search_cancer_sorted_cited.json
Running: Search with no results
Saved fixture: ./search_no_results.json
All fixtures saved.


In [10]:
query = "cancer"
page_size = 100
num_pages = 10
filename = "fetch_10pages_cancer.json"

pages = []
cursor_mark = "*"
for page in range(1, num_pages + 1):
    print(f"Fetching page {page} of {num_pages} for query '{query}' ...")
    response = client.search(query=query, pageSize=page_size, cursorMark=cursor_mark)
    # If response is a requests.Response, get .json()
    if hasattr(response, "json"):
        page_data = response.json()  # type: ignore
    else:
        page_data = response
    pages.append(page_data)

    # Get the next cursorMark for the next page
    next_cursor = page_data.get("nextCursorMark")  # type: ignore
    if not next_cursor or next_cursor == cursor_mark:
        print("No more pages or cursorMark did not advance.")
        break
    cursor_mark = next_cursor

# --- Save as a single JSON file ---
out_path = os.path.join(FIXTURE_DIR, filename)
with open(out_path, "w", encoding="utf-8") as f:
    json.dump(pages, f, indent=2)

print(f"Saved {len(pages)} pages of results to {out_path}")

Fetching page 1 of 10 for query 'cancer' ...
Fetching page 2 of 10 for query 'cancer' ...
Fetching page 3 of 10 for query 'cancer' ...
Fetching page 4 of 10 for query 'cancer' ...
Fetching page 5 of 10 for query 'cancer' ...
Fetching page 6 of 10 for query 'cancer' ...
Fetching page 7 of 10 for query 'cancer' ...
Fetching page 8 of 10 for query 'cancer' ...
Fetching page 9 of 10 for query 'cancer' ...
Fetching page 10 of 10 for query 'cancer' ...
Saved 10 pages of results to ./fetch_10pages_cancer.json
