Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 11 additions & 3 deletions src/paperqa/clients/openalex.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
from __future__ import annotations

import json
import logging
import os
from collections.abc import Collection
Expand Down Expand Up @@ -91,14 +90,23 @@ async def get_doc_details_from_openalex(

if fields:
params["select"] = ",".join(fields)
# Seen on 11/4/2025 with OpenAlex and both a client-level timeout of 15-sec
# and API request timeout of 15-sec, we repeatedly saw httpx.ConnectTimeout
# being thrown for DOIs 10.1046/j.1365-2699.2003.00795 and 10.2147/cia.s3785,
# even with up to 3 retries
response = await client.get(
url, params=params, timeout=OPENALEX_API_REQUEST_TIMEOUT
)
try:
response.raise_for_status()
response_data = response.json()
except (httpx.HTTPStatusError, json.JSONDecodeError) as exc:
raise DOINotFoundError("Could not find paper given DOI/title.") from exc
except httpx.HTTPStatusError as exc:
if response.status_code == httpx.codes.NOT_FOUND:
raise DOINotFoundError(
f"Could not find paper given DOI/title,"
f" response text was {response.text!r}."
) from exc
raise # Can get 429'd by OpenAlex

if response_data.get("status") == "failed":
raise DOINotFoundError("OpenAlex API returned a failed status for the query.")
Expand Down
68 changes: 68 additions & 0 deletions tests/cassettes/test_does_openalex_work[not-in-openalex].yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

67 changes: 67 additions & 0 deletions tests/cassettes/test_does_openalex_work[not-oa-in-openalex].yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

68 changes: 68 additions & 0 deletions tests/cassettes/test_does_openalex_work[oa-in-openalex2].yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

42 changes: 24 additions & 18 deletions tests/test_clients.py
Original file line number Diff line number Diff line change
Expand Up @@ -797,27 +797,33 @@ async def test_tricky_journal_quality_results(doi: str, score: int) -> None:

@pytest.mark.vcr
@pytest.mark.parametrize(
("doi", "oa"),
("doi", "in_oa", "is_openaccess"),
[
("10.1021/acs.jctc.5b00178", True),
pytest.param("10.1021/acs.jctc.5b00178", True, True, id="oa-in-openalex1"),
pytest.param("10.1093/nar/gkw1164", True, True, id="oa-in-openalex2"),
pytest.param("10.1002/wrna.1370", True, False, id="not-oa-in-openalex"),
pytest.param(
"10.1046/j.1365-2699.2003.00795", False, None, id="not-in-openalex"
),
],
)
@pytest.mark.asyncio
async def test_does_openalex_work(doi: str, oa: bool) -> None:
async def test_does_openalex_work(
doi: str, in_oa: bool, is_openaccess: bool | None
) -> None:
"""Run a simple test of OpenAlex, which we primarily want for open access checks."""
async with httpx_aiohttp.HttpxAiohttpClient() as http_client:
async with httpx_aiohttp.HttpxAiohttpClient(timeout=10) as http_client:
openalex_client = DocMetadataClient(
http_client,
metadata_clients=[OpenAlexProvider],
)
openalex_details = await openalex_client.query(
doi=doi,
fields=["open_access"],
)
assert openalex_details, "Failed to query OpenAlex"
assert (
openalex_details.other["open_access"]["is_oa"] is oa
), "Open access data should match"
assert (
openalex_details.year is None
), "Year should not be populated because we set fields"
http_client, metadata_clients=[OpenAlexProvider]
)
openalex_details = await openalex_client.query(doi=doi, fields=["open_access"])
if in_oa:
assert openalex_details, "Failed to query OpenAlex"
assert (
openalex_details.other["open_access"]["is_oa"] == is_openaccess
), "Open access data should match"
assert (
openalex_details.year is None
), "Year should not be populated because we set fields"
else:
assert not openalex_details, "Should have failed"