Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 10 additions & 10 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,14 +25,14 @@ repos:
- id: mixed-line-ending
- id: trailing-whitespace
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.13.2
rev: v0.14.4
hooks:
- id: ruff-check
args: [--fix, --exit-non-zero-on-fix]
- repo: https://github.com/psf/black-pre-commit-mirror
rev: 25.9.0
rev: 25.11.0
hooks:
- id: black
- id: black-jupyter
- repo: https://github.com/rbubley/mirrors-prettier
rev: v3.6.2
hooks:
Expand All @@ -48,19 +48,19 @@ repos:
- id: codespell
additional_dependencies: [".[toml]"]
- repo: https://github.com/crate-ci/typos
rev: v1.36.3
rev: v1.39.0
hooks:
- id: typos
- repo: https://github.com/jumanjihouse/pre-commit-hooks
rev: 3.0.0
hooks:
- id: check-mailmap
- repo: https://github.com/henryiii/validate-pyproject-schema-store
rev: 2025.09.26
rev: 2025.11.04
hooks:
- id: validate-pyproject
- repo: https://github.com/astral-sh/uv-pre-commit
rev: 0.8.22
rev: 0.9.8
hooks:
- id: uv-lock
- repo: https://github.com/adamchainz/blacken-docs
Expand All @@ -74,19 +74,19 @@ repos:
- id: nb-clean
args: [--preserve-cell-outputs, --remove-empty-cells]
- repo: https://github.com/jackdewinter/pymarkdown
rev: v0.9.32
rev: v0.9.33
hooks:
- id: pymarkdown
exclude: docs/tutorials/
- repo: https://github.com/mwouts/jupytext
rev: v1.17.3
rev: v1.18.1
hooks:
- id: jupytext
args: [--to, md, --pipe, black]
additional_dependencies: [black]
additional_dependencies: [black<25.11.0] # Downpin for https://github.com/mwouts/jupytext/issues/1467
files: ^docs/.*\.ipynb$
- repo: https://github.com/jsh9/markdown-toc-creator
rev: 0.0.10
rev: 0.1.3
hooks:
- id: markdown-toc-creator
- repo: local # Use local so we can inspect paperqa.version
Expand Down
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,10 @@ question answering, summarization, and contradiction detection.

<!--TOC-->

---

**Table of Contents**

- [Quickstart](#quickstart)
- [Example Output](#example-output)
- [What is PaperQA2](#what-is-paperqa2)
Expand Down Expand Up @@ -56,6 +60,8 @@ question answering, summarization, and contradiction detection.
- [Reproduction](#reproduction)
- [Citation](#citation)

---

<!--TOC-->

## Quickstart
Expand Down
8 changes: 5 additions & 3 deletions docs/tutorials/running_on_lfrqa.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -361,7 +361,9 @@
"os.makedirs(log_results_dir, exist_ok=True)\n",
"\n",
"\n",
"async def log_evaluation_to_json(lfrqa_question_evaluation: dict) -> None: # noqa: RUF029\n",
"async def log_evaluation_to_json( # noqa: RUF029\n",
" lfrqa_question_evaluation: dict,\n",
") -> None:\n",
" json_path = os.path.join(\n",
" log_results_dir, f\"{lfrqa_question_evaluation['qid']}.json\"\n",
" )\n",
Expand Down Expand Up @@ -409,7 +411,7 @@
"\n",
"\n",
"if __name__ == \"__main__\":\n",
" asyncio.run(evaluate())\n"
" asyncio.run(evaluate())"
]
},
{
Expand Down Expand Up @@ -437,7 +439,7 @@
" data.append(json_data)\n",
"\n",
"results_df = pd.DataFrame(data).set_index(\"qid\")\n",
"results_df[\"winner\"].value_counts(normalize=True)\n"
"results_df[\"winner\"].value_counts(normalize=True)"
]
}
],
Expand Down
7 changes: 3 additions & 4 deletions docs/tutorials/running_on_lfrqa.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ jupyter:
extension: .md
format_name: markdown
format_version: '1.3'
jupytext_version: 1.17.3
jupytext_version: 1.18.1
kernelspec:
display_name: .venv
language: python
Expand Down Expand Up @@ -253,9 +253,9 @@ log_results_dir = os.path.join("data", "rag-qa-benchmarking", "results")
os.makedirs(log_results_dir, exist_ok=True)


async def log_evaluation_to_json(
async def log_evaluation_to_json( # noqa: RUF029
lfrqa_question_evaluation: dict,
) -> None: # noqa: RUF029
) -> None:
json_path = os.path.join(
log_results_dir, f"{lfrqa_question_evaluation['qid']}.json"
)
Expand Down Expand Up @@ -306,7 +306,6 @@ if __name__ == "__main__":
asyncio.run(evaluate())
```


After running this, you can find the results in the `data/rag-qa-benchmarking/results` folder. Here is an example of how to read them:

```python
Expand Down
2 changes: 1 addition & 1 deletion docs/tutorials/settings_tutorial.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ jupyter:
extension: .md
format_name: markdown
format_version: '1.3'
jupytext_version: 1.17.3
jupytext_version: 1.18.1
kernelspec:
display_name: test
language: python
Expand Down
14 changes: 13 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,10 @@ skip = [
"tests/stub_data/*",
]

[tool.markdown_toc_creator]
horizontal-rule-style = "prettier"
proactive = false

[tool.mypy]
# Type-checks the interior of functions without type annotations.
check_untyped_defs = true
Expand Down Expand Up @@ -220,7 +224,7 @@ max-line-length = 88 # Match ruff line-length
# Files or directories matching the regular expression patterns are skipped.
# The regex matches against base names, not paths. The default value ignores
ignore-patterns = [
"version.py", # setuptools-scm version files, SEE: https://github.com/pylint-dev/pylint/issues/10479
"^version\\.py$", # Version files made by setuptools_scm, SEE: https://github.com/pylint-dev/pylint/issues/10479
]
# Use multiple processes to speed up Pylint. Specifying 0 will auto-detect the
# number of processors available to use, and will cap the count on Windows to
Expand Down Expand Up @@ -250,6 +254,7 @@ disable = [
"global-statement", # Rely on ruff PLW0603 for this
"global-variable-not-assigned", # Rely on ruff PLW0602 for this
"import-outside-toplevel", # Rely on ruff PLC0415 for this
"import-private-name", # Rely on ruff PLC2701 for this
"invalid-name", # Don't care to enforce this
"keyword-arg-before-vararg", # Rely on ruff B026 for this
"line-too-long", # Rely on ruff E501 for this
Expand Down Expand Up @@ -289,7 +294,9 @@ disable = [
"unused-argument", # Rely on ruff ARG002 for this
"unused-import", # Rely on ruff F401 for this
"unused-variable", # Rely on ruff F841 for this
"unused-wildcard-import", # Wildcard imports are convenient
"use-sequence-for-iteration", # Rely on ruff C0208 for this
"wildcard-import", # Wildcard imports are convenient
"wrong-import-order", # Rely on ruff I001 for this
"wrong-import-position", # Rely on ruff E402 for this
]
Expand Down Expand Up @@ -367,11 +374,14 @@ preview = true
explicit-preview-rules = true
extend-select = [
"ASYNC212",
"ASYNC240",
"ASYNC250",
"B901",
"B903",
"B909",
"B912",
"CPY001",
"DOC102",
"DOC201",
"DOC202",
"DOC402",
Expand Down Expand Up @@ -477,6 +487,7 @@ extend-select = [
"RUF061",
"RUF063",
"RUF064",
"RUF065",
"RUF102",
"TC008",
"UP042",
Expand Down Expand Up @@ -568,6 +579,7 @@ mypy-init-return = true
[tool.ruff.lint.per-file-ignores]
"**/tests/*.py" = [
"N802", # Tests function names can match class names
"PLC2701", # Test can import private names if needed
"PLR2004", # Tests can have magic values
"PLR6301", # Test classes can ignore self
"S101", # Tests can have assertions
Expand Down
3 changes: 2 additions & 1 deletion src/paperqa/agents/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -499,7 +499,8 @@ async def process_file(
) -> None:

abs_file_path = (
pathlib.Path(settings.agent.index.paper_directory).absolute() / rel_file_path
pathlib.Path(settings.agent.index.paper_directory).absolute() # noqa: ASYNC240
/ rel_file_path
)
fallback_title = rel_file_path.name
if settings.agent.index.use_absolute_paper_directory:
Expand Down
2 changes: 1 addition & 1 deletion src/paperqa/clients/crossref.py
Original file line number Diff line number Diff line change
Expand Up @@ -377,7 +377,7 @@ async def download_retracted_dataset(
async for chunk in response.aiter_bytes(chunk_size=1024):
await f.write(chunk)

if os.path.getsize(str(retraction_data_path)) == 0:
if os.path.getsize(str(retraction_data_path)) == 0: # noqa: ASYNC240
raise RuntimeError("Retraction data is empty")


Expand Down
8 changes: 6 additions & 2 deletions src/paperqa/contrib/openreview_paper_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,9 @@ async def _get_relevant_papers_chunk(self, question: str, chunk: str) -> list[An

async def download_papers(self, submissions: list[Any]) -> None:
"""Download PDFs for given submissions."""
downloaded_papers = Path(self.settings.paper_directory).rglob("*.pdf")
downloaded_papers = Path(self.settings.paper_directory).rglob( # noqa: ASYNC240
"*.pdf"
)
downloaded_ids = [p.stem for p in downloaded_papers]
logger.info("Downloading PDFs for relevant papers.")
for submission in submissions:
Expand Down Expand Up @@ -151,7 +153,9 @@ async def aadd_docs(
) -> Docs:
if docs is None:
docs = Docs()
for doc_path in Path(self.settings.paper_directory).rglob("*.pdf"):
for doc_path in Path(self.settings.paper_directory).rglob( # noqa: ASYNC240
"*.pdf"
):
sub = subs.get(doc_path.stem) if subs is not None else None
if sub:
await docs.aadd(
Expand Down
2 changes: 1 addition & 1 deletion tests/test_paperqa.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@
from paperqa.clients.journal_quality import JournalQualityPostProcessor
from paperqa.core import (
LLMContextTimeoutError,
_map_fxn_summary, # noqa: PLC2701
_map_fxn_summary,
llm_parse_json,
map_fxn_summary,
)
Expand Down