diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md new file mode 100644 index 00000000..80982496 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -0,0 +1,29 @@ +--- +name: Bug report +about: Create a report to help us improve +title: '' +labels: bug +assignees: '' + +--- + +**Describe the bug** +A clear and concise description of what the bug is. + +**To Reproduce** +Steps to reproduce the behavior: +1. Go to '...' +2. Click on '....' +3. Scroll down to '....' +4. See error + +**Expected behavior** +A clear and concise description of what you expected to happen + +**Environment (please complete the following information):** + - OS: [e.g. iOS] + - Python Version [e.g. 22] + - Frameworks [e.g, LangChain, LangFlow, etc.] + +**Additional context** +Add any other context about the problem here. diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md new file mode 100644 index 00000000..11fc491e --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature_request.md @@ -0,0 +1,20 @@ +--- +name: Feature request +about: Suggest an idea for this project +title: '' +labels: enhancement +assignees: '' + +--- + +**Is your feature request related to a problem? Please describe.** +A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] + +**Describe the solution you'd like** +A clear and concise description of what you want to happen. + +**Describe alternatives you've considered** +A clear and concise description of any alternative solutions or features you've considered. + +**Additional context** +Add any other context or screenshots about the feature request here. diff --git a/.github/actions/setup-python-env/action.yml b/.github/actions/setup-python-env/action.yml index b850d7ac..3040e62c 100644 --- a/.github/actions/setup-python-env/action.yml +++ b/.github/actions/setup-python-env/action.yml @@ -15,8 +15,4 @@ runs: with: enable-cache: "true" cache-suffix: ${{ matrix.python-version }} - python-version: ${{ matrix.python-version }} - - - name: Install Python dependencies - run: uv sync --frozen --package langchain-graph-retriever - shell: bash \ No newline at end of file + python-version: ${{ matrix.python-version }} \ No newline at end of file diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index e7543541..fd0edeef 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -27,6 +27,10 @@ jobs: uses: ./.github/actions/setup-python-env id: setup + - name: Install Python dependencies + run: uv sync --frozen --all-packages --all-extras + shell: bash + - name: Run `fmt-check` run: uv run poe fmt-check # Runs this as long as setup succeeds. @@ -47,6 +51,11 @@ jobs: # Runs this as long as setup succeeds. if: ${{ !cancelled() && steps.setup.conclusion == 'success' }} + - name: Run `nbstripout-check` + run: uv run poe nbstripout-check + # Runs this as long as setup succeeds. + if: ${{ !cancelled() && steps.setup.conclusion == 'success' }} + tests-and-type-check: runs-on: ubuntu-latest strategy: @@ -65,17 +74,41 @@ jobs: with: python-version: ${{ matrix.python-version }} - - name: Test All Stores (3.12 only) + - name: (non-3.13) Install Python dependencies (including all extras) + if: ${{ success() && matrix.python-version != '3.13' }} + run: uv sync --frozen --all-packages --all-extras + shell: bash + + - name: (3.13 only) Install Python dependencies (without extras) + if: ${{ success() && matrix.python-version == '3.13' }} + run: uv sync --frozen --all-packages + shell: bash + + - name: Test graph-retriever + run: + uv run coverage run -m pytest -vs packages/graph-retriever + --junitxml=junit/test-results-gr-${{ matrix.python-version }}.xml + + - name: (3.10, 3.11) Test langchain-graph-retriever (In-Memory Stores) with extras + if: ${{ success() && matrix.python-version == '3.10' || matrix.python-version == '3.11' }} + run: uv run pytest -vs --runextras packages/langchain-graph-retriever + --junitxml=junit/test-results-lgr-${{ matrix.python-version }}.xml + + - name: (3.12) Test langchain-graph-retriever (All Stores) with extras if: ${{ success() && matrix.python-version == '3.12' }} id: test - run: - uv run poe test-all - --junitxml=junit/test-results-${{ matrix.python-version }}.xml + run: uv run coverage run -a -m pytest -vs --runextras packages/langchain-graph-retriever --stores=all + --junitxml=junit/test-results-lgr-${{ matrix.python-version }}.xml env: ASTRA_DB_APPLICATION_TOKEN: ${{ secrets.ASTRA_DB_APPLICATION_TOKEN }} ASTRA_DB_API_ENDPOINT: ${{ secrets.ASTRA_DB_API_ENDPOINT }} ASTRA_DB_KEYSPACE: ci_${{ github.run_id }}_${{ strategy.job-index }}_${{ github.run_attempt }} + - name: (3.13) Test langchain-graph-retriever (In-Memory Stores) without extras + if: ${{ success() && matrix.python-version == '3.13' }} + run: uv run pytest -vs packages/langchain-graph-retriever + --junitxml=junit/test-results-lgr-${{ matrix.python-version }}.xml + - name: Drop Astra Keyspace # Even though it seems redundant, the `always() &&` is necessary to signal to # GitHub actions that we want this to run even if the job is cancelled. @@ -87,11 +120,13 @@ jobs: ASTRA_DB_API_ENDPOINT: ${{ secrets.ASTRA_DB_API_ENDPOINT }} ASTRA_DB_KEYSPACE: ci_${{ github.run_id }}_${{ strategy.job-index }}_${{ github.run_attempt }} - - name: Test In-Memory Stores (non-3.12) - if: ${{ success() && matrix.python-version != '3.12' }} - run: - uv run poe test - --junitxml=junit/test-results-${{ matrix.python-version }}.xml + - name: Report Coverage + if: ${{ success() && matrix.python-version == '3.12' }} + run: | + uvx coveralls + uvx coveralls --finish + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - name: Run `type-check` run: uv run poe type-check @@ -101,7 +136,38 @@ jobs: uses: actions/upload-artifact@v4 with: name: pytest-results-${{ matrix.python-version }} - path: packages/*/junit/test-results-${{ matrix.python-version }}.xml + path: junit/test-results-*.xml + + # See https://github.com/EnricoMi/publish-unit-test-result-action#support-fork-repositories-and-dependabot-branches to + # enable uploading from forks. + publish-test-results: + name: "Publish test results" + needs: tests-and-type-check + runs-on: ubuntu-latest + permissions: + checks: write + + # only needed unless run with comment_mode: off + pull-requests: write + + # only needed for private repository + contents: read + + # only needed for private repository + issues: read + if: always() + + steps: + - name: Download Artifacts + uses: actions/download-artifact@v4 + with: + pattern: pytest-results-* + path: artifacts + + - name: Publish Test Results + uses: EnricoMi/publish-unit-test-result-action@v2 + with: + files: "artifacts/**/*.xml" build-docs: runs-on: ubuntu-latest @@ -112,42 +178,40 @@ jobs: - name: Set up the environment uses: ./.github/actions/setup-python-env - - name: Set up Quarto - uses: quarto-dev/quarto-actions/setup@v2 - - name: Sync Docs Dependencies - run: uv sync --group=docs + run: uv sync --all-packages --group=docs --all-extras - name: Check if documentation can be built - run: uv run poe docs-build - id: build + run: uv run mkdocs build --strict + + - name: Test Notebooks + run: uv run poe test-nb env: OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} ASTRA_DB_APPLICATION_TOKEN: ${{ secrets.ASTRA_DB_APPLICATION_TOKEN }} ASTRA_DB_API_ENDPOINT: ${{ secrets.ASTRA_DB_API_ENDPOINT }} - - name: Upload docs - uses: actions/upload-pages-artifact@v3 - with: - # Automatically uploads an artifact from the './_site' directory by default - path: ${{ github.workspace }}/docs/_site - docs-deploy: # Deploy docs on push to main. if: github.event_name == 'push' && github.ref == 'refs/heads/main' - # Deployment job - environment: - name: github-pages - url: ${{ steps.deployment.outputs.page_url }} + runs-on: ubuntu-latest - # Grant GITHUB_TOKEN the permissions required to make a Pages deployment permissions: - pages: write # to deploy to Pages - id-token: write # to verify the deployment originates from an appropriate source + contents: write # to write to the `gh-pages` branch - runs-on: ubuntu-latest - needs: [build-docs] steps: - - name: Deploy to GitHub Pages - id: deployment - uses: actions/deploy-pages@v4 + - name: Check out + uses: actions/checkout@v4 + + - name: Set up the environment + uses: ./.github/actions/setup-python-env + + - name: Sync Docs Dependencies + run: uv sync --all-packages --group=docs --all-extras + + - name: Deploy Docs to gh-pages + run: uv run mkdocs gh-deploy --force + env: + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + ASTRA_DB_APPLICATION_TOKEN: ${{ secrets.ASTRA_DB_APPLICATION_TOKEN }} + ASTRA_DB_API_ENDPOINT: ${{ secrets.ASTRA_DB_API_ENDPOINT }} \ No newline at end of file diff --git a/.gitignore b/.gitignore index 79329c4c..996a22f8 100644 --- a/.gitignore +++ b/.gitignore @@ -50,7 +50,7 @@ coverage.xml .hypothesis/ .pytest_cache/ cover/ -junit/test-results.xml +junit/test-results*.xml # Translations *.mo @@ -170,3 +170,7 @@ cython_debug/ # PyPI configuration file .pypirc + +/.luarc.json + +.DS_Store \ No newline at end of file diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index f91f8192..8c91f1b0 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -20,7 +20,7 @@ Make sure you have > 0.5.0 of UV installed. * Use `uv python install` to install the project python version * Use `uv sync` to create a virtual environment and install the dev dependencies -* Use `uv run poe install` to install all the dependencies for all the packages +* Use `uv run poe sync` to install all the dependencies for all the packages ## Helpers @@ -42,7 +42,7 @@ Then you can run `urp lint` instead. ### Installing -* `install`: Install dependencies from all packages and all extras +* `sync`: Install dependencies from all packages and all extras * `lock-check`: Runs `uv lock --locked` to check uv.lock file consistency (fix with `lock-fix`) * `lock-fix`: Runs `uv lock` to fix uv.lock file consistency @@ -58,8 +58,6 @@ Then you can run `urp lint` instead. ### Testing -* `integration` Runs integration tests (against in-memory stores) -* `unit` Runs unit tests * `test` Runs unit and integration tests (against in-memory stores) * `test-all` Runs unit and integration tests (against all stores) diff --git a/README.md b/README.md index 4c13b220..e091cdb1 100644 --- a/README.md +++ b/README.md @@ -16,6 +16,7 @@ Retrievers providing both **unstructured** (similarity-search on vectors) and *** https://www.markdownguide.org/basic-syntax/#reference-style-links --> [![Status][status-shield]][status-url] +[![Coverage][coverage-shield]][coverage-url] [![Contributors][contributors-shield]][contributors-url] [![Forks][forks-shield]][forks-url] [![Stargazers][stars-shield]][stars-url] @@ -35,7 +36,7 @@ Retrievers providing both **unstructured** (similarity-search on vectors) and <p align="center"> Library providing Graph RAG combining vector search and traversal of metadata relationships. <br /> - <a href="https://github.com/datastax/graph-rag"><strong>Explore the docs »</strong></a> --> + <a href="https://datastax.github.io/graph-rag"><strong>Explore the docs »</strong></a> --> <br /> <br /> <a href="https://github.com/datastax/graph-rag/issues">Report Bug</a> @@ -133,6 +134,8 @@ Distributed under the Apache 2 License. See `LICENSE.txt` for more information. <p align="right">(<a href="#readme-top">back to top</a>)</p> +[coverage-shield]: https://img.shields.io/coverallsCoverage/github/datastax/graph-rag?style=for-the-badge +[coverage-url]: https://coveralls.io/github/datastax/graph-rag [status-shield]: https://img.shields.io/github/check-runs/datastax/graph-rag/main?style=for-the-badge [status-url]: https://github.com/datastax/graph-rag/actions/workflows/main.yml?query=branch%3Amain [contributors-shield]: https://img.shields.io/github/contributors/datastax/graph-rag.svg?style=for-the-badge @@ -141,7 +144,7 @@ Distributed under the Apache 2 License. See `LICENSE.txt` for more information. [forks-url]: https://github.com/datastax/graph-rag/network/members [stars-shield]: https://img.shields.io/github/stars/datastax/graph-rag.svg?style=for-the-badge [stars-url]: https://github.com/datastax/graph-rag/stargazers -[issues-shield]: https://img.shields.io/github/issues/datastax/graph-rag/repo_name.svg?style=for-the-badge +[issues-shield]: https://img.shields.io/github/issues/datastax/graph-rag.svg?style=for-the-badge [issues-url]: https://github.com/datastax/graph-rag/issues [license-shield]: https://img.shields.io/github/license/datastax/graph-rag.svg?style=for-the-badge -[license-url]: https://github.com/datastax/graph-rag/blob/master/LICENSE.txt \ No newline at end of file +[license-url]: https://github.com/datastax/graph-rag/blob/master/LICENSE.txt diff --git a/data/animals.jsonl b/data/animals.jsonl index c4e95c4a..3d1bcbd2 100644 --- a/data/animals.jsonl +++ b/data/animals.jsonl @@ -1,9 +1,9 @@ -{"id": "aardvark", "text": "the aardvark is a nocturnal mammal known for its burrowing habits and long snout used to sniff out ants.", "metadata": {"type": "mammal", "number_of_legs": 4, "keywords": ["burrowing", "nocturnal", "ants", "savanna"], "habitat": "savanna"}} -{"id": "albatross", "text": "the albatross is a large seabird with the longest wingspan of any bird, allowing it to glide effortlessly over oceans.", "metadata": {"type": "bird", "number_of_legs": 2, "keywords": ["seabird", "wingspan", "ocean"], "habitat": "marine"}} -{"id": "alligator", "text": "alligators are large reptiles with powerful jaws and are commonly found in freshwater wetlands.", "metadata": {"type": "reptile", "number_of_legs": 4, "keywords": ["reptile", "jaws", "wetlands"], "diet": "carnivorous"}} -{"id": "alpaca", "text": "alpacas are domesticated mammals valued for their soft wool and friendly demeanor.", "metadata": {"type": "mammal", "number_of_legs": 4, "keywords": ["wool", "domesticated", "friendly"], "origin": "south america"}} -{"id": "ant", "text": "ants are social insects that live in colonies and are known for their teamwork and strength.", "metadata": {"type": "insect", "number_of_legs": 6, "keywords": ["social", "colonies", "strength", "pollinator"], "diet": "omnivorous"}} -{"id": "anteater", "text": "anteaters use their long tongues to eat thousands of ants and termites each day.", "metadata": {"type": "mammal", "number_of_legs": 4, "keywords": ["ants", "tongue", "termites"], "diet": "insectivore"}} +{"id": "aardvark", "text": "the aardvark is a nocturnal mammal known for its burrowing habits and long snout used to sniff out ants.", "metadata": {"type": "mammal", "number_of_legs": 4, "keywords": ["burrowing", "nocturnal", "ants", "savanna"], "habitat": "savanna", "tags": [{"a": 5, "b": 7}, {"a": 8, "b": 10}]}} +{"id": "albatross", "text": "the albatross is a large seabird with the longest wingspan of any bird, allowing it to glide effortlessly over oceans.", "metadata": {"type": "bird", "number_of_legs": 2, "keywords": ["seabird", "wingspan", "ocean"], "habitat": "marine", "tags": [{"a": 5, "b": 8}, {"a": 8, "b": 10}]}} +{"id": "alligator", "text": "alligators are large reptiles with powerful jaws and are commonly found in freshwater wetlands.", "metadata": {"type": "reptile", "number_of_legs": 4, "keywords": ["reptile", "jaws", "wetlands"], "diet": "carnivorous", "nested": { "a": 5 }}} +{"id": "alpaca", "text": "alpacas are domesticated mammals valued for their soft wool and friendly demeanor.", "metadata": {"type": "mammal", "number_of_legs": 4, "keywords": ["wool", "domesticated", "friendly"], "origin": "south america", "nested": { "a": 5 }}} +{"id": "ant", "text": "ants are social insects that live in colonies and are known for their teamwork and strength.", "metadata": {"type": "insect", "number_of_legs": 6, "keywords": ["social", "colonies", "strength", "pollinator"], "diet": "omnivorous", "nested": { "a": 6 }}} +{"id": "anteater", "text": "anteaters use their long tongues to eat thousands of ants and termites each day.", "metadata": {"type": "mammal", "number_of_legs": 4, "keywords": ["ants", "tongue", "termites"], "diet": "insectivore", "nested": { "b": 5 }}} {"id": "antelope", "text": "antelopes are graceful herbivorous mammals that are often prey for large predators in the wild.", "metadata": {"type": "mammal", "number_of_legs": 4, "keywords": ["graceful", "herbivore", "prey"], "habitat": "grasslands"}} {"id": "armadillo", "text": "armadillos have hard, protective shells and are known for their ability to roll into a ball.", "metadata": {"type": "mammal", "number_of_legs": 4, "keywords": ["protective", "shell", "rolling"], "diet": "insectivore"}} {"id": "baboon", "text": "baboons are highly social primates with complex group dynamics and strong bonds.", "metadata": {"type": "mammal", "number_of_legs": 4, "keywords": ["social", "primates", "group"], "diet": "omnivorous"}} @@ -87,7 +87,6 @@ {"id": "leopard", "text": "leopards are big cats known for their spotted coats and ability to climb trees.", "metadata": {"type": "mammal", "number_of_legs": 4, "keywords": ["big cat", "spotted coat", "climbing"], "habitat": "forest"}} {"id": "lion", "text": "lions are large carnivorous cats known as the 'king of the jungle,' living in prides.", "metadata": {"type": "mammal", "number_of_legs": 4, "keywords": ["carnivorous", "pride", "king of the jungle"], "habitat": "savanna"}} {"id": "lizard", "text": "lizards are reptiles known for their ability to regenerate their tails and their diverse species.", "metadata": {"type": "reptile", "number_of_legs": 4, "keywords": ["reptile", "regeneration", "diverse"], "habitat": "varied"}} -{"id": "llama", "text": "llamas are domesticated mammals from south america, valued for their wool and as pack animals.", "metadata": {"type": "mammal", "number_of_legs": 4, "keywords": ["domesticated", "pack animal", "wool"], "diet": "herbivorous"}} {"id": "lobster", "text": "lobsters are marine crustaceans known for their hard shells and pincers, often found on the seafloor.", "metadata": {"type": "crustacean", "number_of_legs": 10, "keywords": ["marine", "pincers", "seafloor"], "diet": "carnivorous"}} {"id": "magpie", "text": "magpies are intelligent birds known for their black-and-white plumage and ability to mimic sounds.", "metadata": {"type": "bird", "number_of_legs": 2, "keywords": ["intelligent", "plumage", "mimicry"], "diet": "omnivorous"}} {"id": "manatee", "text": "manatees are large, gentle marine mammals often called 'sea cows' for their grazing habits.", "metadata": {"type": "mammal", "number_of_legs": 0, "keywords": ["marine", "gentle", "sea cow"], "diet": "herbivorous"}} diff --git a/data/astrapy.README.md b/data/astrapy.README.md new file mode 100644 index 00000000..b5db15b3 --- /dev/null +++ b/data/astrapy.README.md @@ -0,0 +1,37 @@ +# AstraPy Documentation + +This data file contains the AstraPy documentation in a specialized format for use +in the GraphRAG `code_generation` example. + +## Generation + +The file was generated using `astrapy` version `1.5.2` via the `convert` method in +`graph_rag_example_helpers.examples.code_generation.converter`. See the help on the +method for more information about how to use it. + +## Structure + +The JSONL file contains one JSON object per line, with the following structure: + +id: the path to the object in the package +text: the description of the object (if any, can be empty) + +metadata: Always includes `name`, `path`, `kind` keys. + The remaining keys below are included when available. + name: the name of the object + path: the path to the object in the package + kind: either `module`, `class`, `function`, or `attribute` + parameters: the parameters for a class or function. Includes type + information, default values, and descriptions + attributes: the attributes on a class or module. Includes type + information and descriptions + gathered_types: list of non-standard types in the parameters and attributes + imports: list of non-standard types imported by the class or module + exports: list of non-standard types exported by the module + properties: list of boolean properties about the module + example: any code examples for the class, function, or module + references: list of any non-standard types used in the example code + returns: the return type and description + yields: the yield type and description + bases: list of base types inherited by the class + implemented_by: list of types that implement the a base class diff --git a/data/astrapy.jsonl b/data/astrapy.jsonl new file mode 100644 index 00000000..11ed2a1d --- /dev/null +++ b/data/astrapy.jsonl @@ -0,0 +1,1081 @@ +{"id": "astrapy", "text": "", "metadata": {"kind": "module", "name": "astrapy", "path": "astrapy", "imports": {"annotations": "__future__.annotations", "importlib": "importlib", "os": "os", "toml": "toml", "astrapy": "astrapy", "AstraDBAdmin": "astrapy.admin.AstraDBAdmin", "AstraDBDatabaseAdmin": "astrapy.admin.AstraDBDatabaseAdmin", "DataAPIDatabaseAdmin": "astrapy.admin.DataAPIDatabaseAdmin", "DataAPIClient": "astrapy.client.DataAPIClient", "AsyncCollection": "astrapy.collection.AsyncCollection", "Collection": "astrapy.collection.Collection", "AsyncDatabase": "astrapy.database.AsyncDatabase", "Database": "astrapy.database.Database"}, "exports": ["DataAPIClient", "Collection", "AsyncCollection", "AsyncDatabase", "__version__", "AstraDBAdmin", "DataAPIDatabaseAdmin", "Database", "AstraDBDatabaseAdmin"], "properties": {"is_init_module": true, "is_package": true, "is_subpackage": false, "is_namespace_package": false, "is_namespace_subpackage": false}}} +{"id": "astrapy.get_version", "text": "", "metadata": {"kind": "function", "name": "get_version", "path": "astrapy.get_version", "returns": [{"type": "str"}]}} +{"id": "astrapy.api_options", "text": "", "metadata": {"kind": "module", "name": "api_options", "path": "astrapy.api_options", "imports": {"annotations": "__future__.annotations", "dataclass": "dataclasses.dataclass", "field": "dataclasses.field", "TypeVar": "typing.TypeVar", "EmbeddingAPIKeyHeaderProvider": "astrapy.authentication.EmbeddingAPIKeyHeaderProvider", "EmbeddingHeadersProvider": "astrapy.authentication.EmbeddingHeadersProvider"}, "properties": {"is_init_module": false, "is_package": false, "is_subpackage": false, "is_namespace_package": false, "is_namespace_subpackage": false}}} +{"id": "astrapy.api_options.AO", "text": "", "metadata": {"kind": "attribute", "name": "AO", "path": "astrapy.api_options.AO", "value": "AO = TypeVar('AO', bound='BaseAPIOptions')"}} +{"id": "astrapy.api_options.BaseAPIOptions", "text": "A description of the options about how to interact with the Data API.", "metadata": {"kind": "class", "name": "BaseAPIOptions", "path": "astrapy.api_options.BaseAPIOptions", "parameters": [{"name": "max_time_ms", "default": "None", "type": "int | None"}], "attributes": [{"name": "max_time_ms", "type": "int | None", "description": "a default timeout, in millisecond, for the duration of each\noperation on the collection. Individual timeouts can be provided to\neach collection method call and will take precedence, with this value\nbeing an overall default.\nNote that for some methods involving multiple API calls (such as\n`find`, `delete_many`, `insert_many` and so on), it is strongly suggested\nto provide a specific timeout as the default one likely wouldn't make\nmuch sense."}]}} +{"id": "astrapy.api_options.BaseAPIOptions.max_time_ms", "text": "", "metadata": {"kind": "attribute", "name": "max_time_ms", "path": "astrapy.api_options.BaseAPIOptions.max_time_ms", "value": "max_time_ms: int | None = None"}} +{"id": "astrapy.api_options.BaseAPIOptions.with_default", "text": "Return a new instance created by completing this instance with a default\nAPI options object.\n\nIn other words, `optA.with_default(optB)` will take fields from optA\nwhen possible and draw defaults from optB when optA has them set to anything\nevaluating to False. (This relies on the __bool__ definition of the values,\nsuch as that of the EmbeddingHeadersTokenProvider instances)", "metadata": {"kind": "function", "name": "with_default", "path": "astrapy.api_options.BaseAPIOptions.with_default", "parameters": [{"name": "default", "type": "BaseAPIOptions | None", "description": "an API options instance to draw defaults from.", "default": null}], "returns": [{"type": "AO", "description": "a new instance of this class obtained by merging this one and the default."}], "gathered_types": ["AO", "BaseAPIOptions"]}} +{"id": "astrapy.api_options.BaseAPIOptions.with_override", "text": "Return a new instance created by overriding the members of this instance\nwith those taken from a supplied \"override\" API options object.\n\nIn other words, `optA.with_default(optB)` will take fields from optB\nwhen possible and fall back to optA when optB has them set to anything\nevaluating to False. (This relies on the __bool__ definition of the values,\nsuch as that of the EmbeddingHeadersTokenProvider instances)", "metadata": {"kind": "function", "name": "with_override", "path": "astrapy.api_options.BaseAPIOptions.with_override", "parameters": [{"name": "override", "type": "BaseAPIOptions | None", "description": "an API options instance to preferentially draw fields from.", "default": null}], "returns": [{"type": "AO", "description": "a new instance of this class obtained by merging the override and this one."}], "gathered_types": ["AO", "BaseAPIOptions"]}} +{"id": "astrapy.api_options.CollectionAPIOptions", "text": "A description of the options about how to interact with the Data API\nregarding a collection.\nDevelopers should not instantiate this class directly.", "metadata": {"kind": "class", "name": "CollectionAPIOptions", "path": "astrapy.api_options.CollectionAPIOptions", "parameters": [{"name": "max_time_ms", "default": "None", "type": "int | None"}, {"name": "embedding_api_key", "default": "lambda: EmbeddingAPIKeyHeaderProvider(None)()", "type": "EmbeddingHeadersProvider"}], "bases": ["BaseAPIOptions"], "attributes": [{"name": "max_time_ms", "type": "int | None", "description": "a default timeout, in millisecond, for the duration of each\noperation on the collection. Individual timeouts can be provided to\neach collection method call and will take precedence, with this value\nbeing an overall default.\nNote that for some methods involving multiple API calls (such as\n`find`, `delete_many`, `insert_many` and so on), it is strongly suggested\nto provide a specific timeout as the default one likely wouldn't make\nmuch sense."}, {"name": "embedding_api_key", "type": "EmbeddingHeadersProvider", "description": "an `astrapy.authentication.EmbeddingHeadersProvider`\nobject, encoding embedding-related API keys that will be passed\nas headers when interacting with the collection (on each Data API request).\nThe default value is `EmbeddingAPIKeyHeaderProvider(None)`, i.e.\nno embedding-specific headers, whereas if the collection is configured\nwith an embedding service other choices for this parameter can be\nmeaningfully supplied. is configured for the collection,"}], "gathered_types": ["astrapy.authentication.EmbeddingAPIKeyHeaderProvider", "astrapy.authentication.EmbeddingHeadersProvider", "BaseAPIOptions"]}} +{"id": "astrapy.api_options.CollectionAPIOptions.embedding_api_key", "text": "", "metadata": {"kind": "attribute", "name": "embedding_api_key", "path": "astrapy.api_options.CollectionAPIOptions.embedding_api_key", "value": "embedding_api_key: EmbeddingHeadersProvider = field(default_factory=lambda: EmbeddingAPIKeyHeaderProvider(None))", "gathered_types": ["astrapy.authentication.EmbeddingAPIKeyHeaderProvider", "astrapy.authentication.EmbeddingHeadersProvider"]}} +{"id": "astrapy.results", "text": "", "metadata": {"kind": "module", "name": "results", "path": "astrapy.results", "imports": {"annotations": "__future__.annotations", "ABC": "abc.ABC", "abstractmethod": "abc.abstractmethod", "dataclass": "dataclasses.dataclass", "Any": "typing.Any"}, "properties": {"is_init_module": false, "is_package": false, "is_subpackage": false, "is_namespace_package": false, "is_namespace_subpackage": false}}} +{"id": "astrapy.results.OperationResult", "text": "Class that represents the generic result of a single mutation operation.", "metadata": {"kind": "class", "name": "OperationResult", "path": "astrapy.results.OperationResult", "parameters": [{"name": "raw_results", "type": "list[dict[str, Any]]"}], "bases": ["ABC"], "attributes": [{"name": "raw_results", "type": "list[dict[str, Any]]", "description": "response/responses from the Data API call.\nDepending on the exact delete method being used, this\nlist of raw responses can contain exactly one or a number of items."}], "gathered_types": ["ABC"], "implemented_by": ["astrapy.results.DeleteResult", "astrapy.results.InsertManyResult", "astrapy.results.UpdateResult", "astrapy.results.InsertOneResult"]}} +{"id": "astrapy.results.OperationResult.raw_results", "text": "", "metadata": {"kind": "attribute", "name": "raw_results", "path": "astrapy.results.OperationResult.raw_results", "value": "raw_results: list[dict[str, Any]]"}} +{"id": "astrapy.results.OperationResult.to_bulk_write_result", "text": "", "metadata": {"kind": "function", "name": "to_bulk_write_result", "path": "astrapy.results.OperationResult.to_bulk_write_result", "parameters": [{"name": "index_in_bulk_write", "type": "int"}], "returns": [{"type": "BulkWriteResult"}], "gathered_types": ["astrapy.results.BulkWriteResult"]}} +{"id": "astrapy.results.DeleteResult", "text": "Class that represents the result of delete operations.", "metadata": {"kind": "class", "name": "DeleteResult", "path": "astrapy.results.DeleteResult", "parameters": [{"name": "raw_results", "type": "list[dict[str, Any]]"}, {"name": "deleted_count", "type": "int"}], "bases": ["astrapy.results.OperationResult"], "attributes": [{"name": "deleted_count", "type": "int", "description": "number of deleted documents"}, {"name": "raw_results", "type": "list[dict[str, Any]]", "description": "response/responses from the Data API call.\nDepending on the exact delete method being used, this\nlist of raw responses can contain exactly one or a number of items."}], "gathered_types": ["astrapy.results.OperationResult"]}} +{"id": "astrapy.results.DeleteResult.deleted_count", "text": "", "metadata": {"kind": "attribute", "name": "deleted_count", "path": "astrapy.results.DeleteResult.deleted_count", "value": "deleted_count: int"}} +{"id": "astrapy.results.DeleteResult.to_bulk_write_result", "text": "", "metadata": {"kind": "function", "name": "to_bulk_write_result", "path": "astrapy.results.DeleteResult.to_bulk_write_result", "parameters": [{"name": "index_in_bulk_write", "type": "int"}], "returns": [{"type": "BulkWriteResult"}], "gathered_types": ["astrapy.results.BulkWriteResult"]}} +{"id": "astrapy.results.InsertOneResult", "text": "Class that represents the result of insert_one operations.", "metadata": {"kind": "class", "name": "InsertOneResult", "path": "astrapy.results.InsertOneResult", "parameters": [{"name": "raw_results", "type": "list[dict[str, Any]]"}, {"name": "inserted_id", "type": "Any"}], "bases": ["astrapy.results.OperationResult"], "attributes": [{"name": "raw_results", "type": "list[dict[str, Any]]", "description": "one-item list with the response from the Data API call"}, {"name": "inserted_id", "type": "Any", "description": "the ID of the inserted document"}], "gathered_types": ["astrapy.results.OperationResult"]}} +{"id": "astrapy.results.InsertOneResult.inserted_id", "text": "", "metadata": {"kind": "attribute", "name": "inserted_id", "path": "astrapy.results.InsertOneResult.inserted_id", "value": "inserted_id: Any"}} +{"id": "astrapy.results.InsertOneResult.to_bulk_write_result", "text": "", "metadata": {"kind": "function", "name": "to_bulk_write_result", "path": "astrapy.results.InsertOneResult.to_bulk_write_result", "parameters": [{"name": "index_in_bulk_write", "type": "int"}], "returns": [{"type": "BulkWriteResult"}], "gathered_types": ["astrapy.results.BulkWriteResult"]}} +{"id": "astrapy.results.InsertManyResult", "text": "Class that represents the result of insert_many operations.", "metadata": {"kind": "class", "name": "InsertManyResult", "path": "astrapy.results.InsertManyResult", "parameters": [{"name": "raw_results", "type": "list[dict[str, Any]]"}, {"name": "inserted_ids", "type": "list[Any]"}], "bases": ["astrapy.results.OperationResult"], "attributes": [{"name": "raw_results", "type": "list[dict[str, Any]]", "description": "responses from the Data API calls"}, {"name": "inserted_ids", "type": "list[Any]", "description": "list of the IDs of the inserted documents"}], "gathered_types": ["astrapy.results.OperationResult"]}} +{"id": "astrapy.results.InsertManyResult.inserted_ids", "text": "", "metadata": {"kind": "attribute", "name": "inserted_ids", "path": "astrapy.results.InsertManyResult.inserted_ids", "value": "inserted_ids: list[Any]"}} +{"id": "astrapy.results.InsertManyResult.to_bulk_write_result", "text": "", "metadata": {"kind": "function", "name": "to_bulk_write_result", "path": "astrapy.results.InsertManyResult.to_bulk_write_result", "parameters": [{"name": "index_in_bulk_write", "type": "int"}], "returns": [{"type": "BulkWriteResult"}], "gathered_types": ["astrapy.results.BulkWriteResult"]}} +{"id": "astrapy.results.UpdateResult", "text": "Class that represents the result of any update operation.", "metadata": {"kind": "class", "name": "UpdateResult", "path": "astrapy.results.UpdateResult", "parameters": [{"name": "raw_results", "type": "list[dict[str, Any]]"}, {"name": "update_info", "type": "dict[str, Any]"}], "bases": ["astrapy.results.OperationResult"], "attributes": [{"name": "raw_results", "type": "list[dict[str, Any]]", "description": "responses from the Data API calls"}, {"name": "update_info", "type": "dict[str, Any]", "description": "a dictionary reporting about the update"}], "note": "the \"update_info\" field has the following fields: \"n\" (int),\n\"updatedExisting\" (bool), \"ok\" (float), \"nModified\" (int)\nand optionally \"upserted\" containing the ID of an upserted document.", "gathered_types": ["astrapy.results.OperationResult"]}} +{"id": "astrapy.results.UpdateResult.update_info", "text": "", "metadata": {"kind": "attribute", "name": "update_info", "path": "astrapy.results.UpdateResult.update_info", "value": "update_info: dict[str, Any]"}} +{"id": "astrapy.results.UpdateResult.to_bulk_write_result", "text": "", "metadata": {"kind": "function", "name": "to_bulk_write_result", "path": "astrapy.results.UpdateResult.to_bulk_write_result", "parameters": [{"name": "index_in_bulk_write", "type": "int"}], "returns": [{"type": "BulkWriteResult"}], "gathered_types": ["astrapy.results.BulkWriteResult"]}} +{"id": "astrapy.results.BulkWriteResult", "text": "Class that represents the result of a bulk write operations.\n\nIndices in the maps below refer to the position of each write operation\nin the list of operations passed to the bulk_write command.\n\nThe numeric counts refer to the whole of the bulk write.", "metadata": {"kind": "class", "name": "BulkWriteResult", "path": "astrapy.results.BulkWriteResult", "parameters": [{"name": "bulk_api_results", "type": "dict[int, list[dict[str, Any]]]"}, {"name": "deleted_count", "type": "int"}, {"name": "inserted_count", "type": "int"}, {"name": "matched_count", "type": "int"}, {"name": "modified_count", "type": "int"}, {"name": "upserted_count", "type": "int"}, {"name": "upserted_ids", "type": "dict[int, Any]"}], "attributes": [{"name": "bulk_api_results", "type": "dict[int, list[dict[str, Any]]]", "description": "a map from indices to the corresponding raw responses"}, {"name": "deleted_count", "type": "int", "description": "number of deleted documents"}, {"name": "inserted_count", "type": "int", "description": "number of inserted documents"}, {"name": "matched_count", "type": "int", "description": "number of matched documents"}, {"name": "modified_count", "type": "int", "description": "number of modified documents"}, {"name": "upserted_count", "type": "int", "description": "number of upserted documents"}, {"name": "upserted_ids", "type": "dict[int, Any]", "description": "a (sparse) map from indices to ID of the upserted document"}]}} +{"id": "astrapy.results.BulkWriteResult.bulk_api_results", "text": "", "metadata": {"kind": "attribute", "name": "bulk_api_results", "path": "astrapy.results.BulkWriteResult.bulk_api_results", "value": "bulk_api_results: dict[int, list[dict[str, Any]]]"}} +{"id": "astrapy.results.BulkWriteResult.deleted_count", "text": "", "metadata": {"kind": "attribute", "name": "deleted_count", "path": "astrapy.results.BulkWriteResult.deleted_count", "value": "deleted_count: int"}} +{"id": "astrapy.results.BulkWriteResult.inserted_count", "text": "", "metadata": {"kind": "attribute", "name": "inserted_count", "path": "astrapy.results.BulkWriteResult.inserted_count", "value": "inserted_count: int"}} +{"id": "astrapy.results.BulkWriteResult.matched_count", "text": "", "metadata": {"kind": "attribute", "name": "matched_count", "path": "astrapy.results.BulkWriteResult.matched_count", "value": "matched_count: int"}} +{"id": "astrapy.results.BulkWriteResult.modified_count", "text": "", "metadata": {"kind": "attribute", "name": "modified_count", "path": "astrapy.results.BulkWriteResult.modified_count", "value": "modified_count: int"}} +{"id": "astrapy.results.BulkWriteResult.upserted_count", "text": "", "metadata": {"kind": "attribute", "name": "upserted_count", "path": "astrapy.results.BulkWriteResult.upserted_count", "value": "upserted_count: int"}} +{"id": "astrapy.results.BulkWriteResult.upserted_ids", "text": "", "metadata": {"kind": "attribute", "name": "upserted_ids", "path": "astrapy.results.BulkWriteResult.upserted_ids", "value": "upserted_ids: dict[int, Any]"}} +{"id": "astrapy.results.BulkWriteResult.zero", "text": "Return an empty BulkWriteResult, for use in no-ops and list reductions.", "metadata": {"kind": "function", "name": "zero", "path": "astrapy.results.BulkWriteResult.zero", "returns": [{"type": "BulkWriteResult"}], "gathered_types": ["astrapy.results.BulkWriteResult"]}} +{"id": "astrapy.database", "text": "", "metadata": {"kind": "module", "name": "database", "path": "astrapy.database", "imports": {"annotations": "__future__.annotations", "logging": "logging", "warnings": "warnings", "TracebackType": "types.TracebackType", "TYPE_CHECKING": "typing.TYPE_CHECKING", "Any": "typing.Any", "Sequence": "typing.Sequence", "deprecation": "deprecation", "__version__": "astrapy.__version__", "fetch_database_info": "astrapy.admin.fetch_database_info", "parse_api_endpoint": "astrapy.admin.parse_api_endpoint", "APICommander": "astrapy.api_commander.APICommander", "CollectionAPIOptions": "astrapy.api_options.CollectionAPIOptions", "coerce_embedding_headers_provider": "astrapy.authentication.coerce_embedding_headers_provider", "coerce_token_provider": "astrapy.authentication.coerce_token_provider", "redact_secret": "astrapy.authentication.redact_secret", "CallerType": "astrapy.constants.CallerType", "Environment": "astrapy.constants.Environment", "AsyncCommandCursor": "astrapy.cursors.AsyncCommandCursor", "CommandCursor": "astrapy.cursors.CommandCursor", "API_PATH_ENV_MAP": "astrapy.defaults.API_PATH_ENV_MAP", "API_VERSION_ENV_MAP": "astrapy.defaults.API_VERSION_ENV_MAP", "DEFAULT_ASTRA_DB_KEYSPACE": "astrapy.defaults.DEFAULT_ASTRA_DB_KEYSPACE", "DEFAULT_DATA_API_AUTH_HEADER": "astrapy.defaults.DEFAULT_DATA_API_AUTH_HEADER", "NAMESPACE_DEPRECATION_NOTICE_METHOD": "astrapy.defaults.NAMESPACE_DEPRECATION_NOTICE_METHOD", "SET_CALLER_DEPRECATION_NOTICE": "astrapy.defaults.SET_CALLER_DEPRECATION_NOTICE", "CollectionAlreadyExistsException": "astrapy.exceptions.CollectionAlreadyExistsException", "DataAPIFaultyResponseException": "astrapy.exceptions.DataAPIFaultyResponseException", "DevOpsAPIException": "astrapy.exceptions.DevOpsAPIException", "MultiCallTimeoutManager": "astrapy.exceptions.MultiCallTimeoutManager", "base_timeout_info": "astrapy.exceptions.base_timeout_info", "CollectionDescriptor": "astrapy.info.CollectionDescriptor", "CollectionVectorServiceOptions": "astrapy.info.CollectionVectorServiceOptions", "DatabaseInfo": "astrapy.info.DatabaseInfo", "check_caller_parameters": "astrapy.meta.check_caller_parameters", "check_namespace_keyspace": "astrapy.meta.check_namespace_keyspace", "DatabaseAdmin": "astrapy.admin.DatabaseAdmin", "EmbeddingHeadersProvider": "astrapy.authentication.EmbeddingHeadersProvider", "TokenProvider": "astrapy.authentication.TokenProvider", "AsyncCollection": "astrapy.collection.AsyncCollection", "Collection": "astrapy.collection.Collection"}, "properties": {"is_init_module": false, "is_package": false, "is_subpackage": false, "is_namespace_package": false, "is_namespace_subpackage": false}}} +{"id": "astrapy.database.logger", "text": "", "metadata": {"kind": "attribute", "name": "logger", "path": "astrapy.database.logger", "value": "logger = logging.getLogger(__name__)", "gathered_types": ["__name__"]}} +{"id": "astrapy.database.Database", "text": "A Data API database. This is the object for doing database-level\nDML, such as creating/deleting collections, and for obtaining Collection\nobjects themselves. This class has a synchronous interface.\n\nThe usual way of obtaining one Database is through the `get_database`\nmethod of a `DataAPIClient`.\n\nOn Astra DB, a Database comes with an \"API Endpoint\", which implies\na Database object instance reaches a specific region (relevant point in\ncase of multi-region databases).", "metadata": {"kind": "class", "name": "Database", "path": "astrapy.database.Database", "parameters": [{"name": "api_endpoint", "type": "str", "description": "the full \"API Endpoint\" string used to reach the Data API.\nExample: \"https://<database_id>-<region>.apps.astra.datastax.com\"", "default": null}, {"name": "token", "type": "str | TokenProvider | None", "description": "an Access Token to the database. Example: \"AstraCS:xyz...\"\nThis can be either a literal token string or a subclass of\n`astrapy.authentication.TokenProvider`.", "value": "None", "default": "None"}, {"name": "keyspace", "type": "str | None", "description": "this is the keyspace all method calls will target, unless\none is explicitly specified in the call. If no keyspace is supplied\nwhen creating a Database, on Astra DB the name \"default_keyspace\" is set,\nwhile on other environments the keyspace is left unspecified: in this case,\nmost operations are unavailable until a keyspace is set (through an explicit\n`use_keyspace` invocation or equivalent).", "value": "None", "default": "None"}, {"name": "namespace", "type": "str | None", "description": "an alias for `keyspace`. *DEPRECATED*, removal in 2.0.", "value": "None", "default": "None"}, {"name": "callers", "type": "Sequence[CallerType]", "description": "a list of caller identities, i.e. applications, or frameworks,\non behalf of which the Data API calls are performed. These end up\nin the request user-agent.\nEach caller identity is a (\"caller_name\", \"caller_version\") pair.", "value": "[]", "default": "[]"}, {"name": "caller_name", "type": "str | None", "description": "*DEPRECATED*, use `callers`. Removal 2.0. Name of the\napplication, or framework, on behalf of which the Data API calls\nare performed. This ends up in the request user-agent.", "value": "None", "default": "None"}, {"name": "caller_version", "type": "str | None", "description": "version of the caller. *DEPRECATED*, use `callers`. Removal 2.0.", "value": "None", "default": "None"}, {"name": "environment", "type": "str | None", "description": "a string representing the target Data API environment.\nIt can be left unspecified for the default value of `Environment.PROD`;\nother values include `Environment.OTHER`, `Environment.DSE`.", "value": "None", "default": "None"}, {"name": "api_path", "type": "str | None", "description": "path to append to the API Endpoint. In typical usage, this\nshould be left to its default (sensibly chosen based on the environment).", "value": "None", "default": "None"}, {"name": "api_version", "type": "str | None", "description": "version specifier to append to the API path. In typical\nusage, this should be left to its default of \"v1\".", "value": "None", "default": "None"}], "example": ">>> from astrapy import DataAPIClient\n>>> my_client = astrapy.DataAPIClient(\"AstraCS:...\")\n>>> my_db = my_client.get_database(\n... \"https://01234567-....apps.astra.datastax.com\"\n... )", "note": "creating an instance of Database does not trigger actual creation\nof the database itself, which should exist beforehand. To create databases,\nsee the AstraDBAdmin class.", "references": ["astrapy.client.DataAPIClient"], "gathered_types": ["astrapy.authentication.TokenProvider", "astrapy.constants.CallerType"]}} +{"id": "astrapy.database.Database.environment", "text": "", "metadata": {"kind": "attribute", "name": "environment", "path": "astrapy.database.Database.environment", "value": "environment = environment or Environment.PROD.lower()", "gathered_types": ["astrapy.constants.Environment", "PROD"]}} +{"id": "astrapy.database.Database.token_provider", "text": "", "metadata": {"kind": "attribute", "name": "token_provider", "path": "astrapy.database.Database.token_provider", "value": "token_provider = coerce_token_provider(token)"}} +{"id": "astrapy.database.Database.api_endpoint", "text": "", "metadata": {"kind": "attribute", "name": "api_endpoint", "path": "astrapy.database.Database.api_endpoint", "value": "api_endpoint = api_endpoint.strip('/')"}} +{"id": "astrapy.database.Database.api_path", "text": "", "metadata": {"kind": "attribute", "name": "api_path", "path": "astrapy.database.Database.api_path", "value": "api_path = _api_path", "gathered_types": ["_api_path"]}} +{"id": "astrapy.database.Database.api_version", "text": "", "metadata": {"kind": "attribute", "name": "api_version", "path": "astrapy.database.Database.api_version", "value": "api_version = _api_version", "gathered_types": ["_api_version"]}} +{"id": "astrapy.database.Database.callers", "text": "", "metadata": {"kind": "attribute", "name": "callers", "path": "astrapy.database.Database.callers", "value": "callers = callers_param"}} +{"id": "astrapy.database.Database.with_options", "text": "Create a clone of this database with some changed attributes.", "metadata": {"kind": "function", "name": "with_options", "path": "astrapy.database.Database.with_options", "parameters": [{"name": "keyspace", "type": "str | None", "description": "this is the keyspace all method calls will target, unless\none is explicitly specified in the call. If no keyspace is supplied\nwhen creating a Database, the name \"default_keyspace\" is set.", "value": "None", "default": "None"}, {"name": "namespace", "type": "str | None", "description": "an alias for `keyspace`. *DEPRECATED*, removal in 2.0.", "value": "None", "default": "None"}, {"name": "callers", "type": "Sequence[CallerType]", "description": "a list of caller identities, i.e. applications, or frameworks,\non behalf of which the Data API calls are performed. These end up\nin the request user-agent.\nEach caller identity is a (\"caller_name\", \"caller_version\") pair.", "value": "[]", "default": "[]"}, {"name": "caller_name", "type": "str | None", "description": "*DEPRECATED*, use `callers`. Removal 2.0. Name of the\napplication, or framework, on behalf of which the Data API calls\nare performed. This ends up in the request user-agent.", "value": "None", "default": "None"}, {"name": "caller_version", "type": "str | None", "description": "version of the caller. *DEPRECATED*, use `callers`.\nRemoval 2.0.", "value": "None", "default": "None"}], "returns": [{"type": "Database", "description": "a new `Database` instance."}], "example": ">>> my_db_2 = my_db.with_options(\n... keyspace=\"the_other_keyspace\",\n... callers=[(\"the_caller\", \"0.1.0\")],\n... )", "gathered_types": ["astrapy.database.Database", "astrapy.constants.CallerType"]}} +{"id": "astrapy.database.Database.to_async", "text": "Create an AsyncDatabase from this one. Save for the arguments\nexplicitly provided as overrides, everything else is kept identical\nto this database in the copy.", "metadata": {"kind": "function", "name": "to_async", "path": "astrapy.database.Database.to_async", "parameters": [{"name": "api_endpoint", "type": "str | None", "description": "the full \"API Endpoint\" string used to reach the Data API.\nExample: \"https://<database_id>-<region>.apps.astra.datastax.com\"", "value": "None", "default": "None"}, {"name": "token", "type": "str | TokenProvider | None", "description": "an Access Token to the database. Example: \"AstraCS:xyz...\"\nThis can be either a literal token string or a subclass of\n`astrapy.authentication.TokenProvider`.", "value": "None", "default": "None"}, {"name": "keyspace", "type": "str | None", "description": "this is the keyspace all method calls will target, unless\none is explicitly specified in the call. If no keyspace is supplied\nwhen creating a Database, the name \"default_keyspace\" is set.", "value": "None", "default": "None"}, {"name": "namespace", "type": "str | None", "description": "an alias for `keyspace`. *DEPRECATED*, removal in 2.0.", "value": "None", "default": "None"}, {"name": "callers", "type": "Sequence[CallerType]", "description": "a list of caller identities, i.e. applications, or frameworks,\non behalf of which the Data API calls are performed. These end up\nin the request user-agent.\nEach caller identity is a (\"caller_name\", \"caller_version\") pair.", "value": "[]", "default": "[]"}, {"name": "caller_name", "type": "str | None", "description": "*DEPRECATED*, use `callers`. Removal 2.0. Name of the\napplication, or framework, on behalf of which the Data API calls\nare performed. This ends up in the request user-agent.", "value": "None", "default": "None"}, {"name": "caller_version", "type": "str | None", "description": "version of the caller. *DEPRECATED*, use `callers`.\nRemoval 2.0.", "value": "None", "default": "None"}, {"name": "environment", "type": "str | None", "description": "a string representing the target Data API environment.\nValues are, for example, `Environment.PROD`, `Environment.OTHER`,\nor `Environment.DSE`.", "value": "None", "default": "None"}, {"name": "api_path", "type": "str | None", "description": "path to append to the API Endpoint. In typical usage, this\nshould be left to its default of \"/api/json\".", "value": "None", "default": "None"}, {"name": "api_version", "type": "str | None", "description": "version specifier to append to the API path. In typical\nusage, this should be left to its default of \"v1\".", "value": "None", "default": "None"}], "returns": [{"type": "AsyncDatabase", "description": "the new copy, an `AsyncDatabase` instance."}], "example": ">>> my_async_db = my_db.to_async()\n>>> asyncio.run(my_async_db.list_collection_names())", "gathered_types": ["astrapy.authentication.TokenProvider", "astrapy.constants.CallerType", "astrapy.database.AsyncDatabase"]}} +{"id": "astrapy.database.Database.set_caller", "text": "Set a new identity for the application/framework on behalf of which\nthe Data API calls are performed (the \"caller\").", "metadata": {"kind": "function", "name": "set_caller", "path": "astrapy.database.Database.set_caller", "parameters": [{"name": "caller_name", "type": "str | None", "description": "name of the application, or framework, on behalf of which\nthe Data API calls are performed. This ends up in the request user-agent.", "value": "None", "default": "None"}, {"name": "caller_version", "type": "str | None", "description": "version of the caller.", "value": "None", "default": "None"}], "returns": [{"type": "None"}], "example": ">>> my_db.set_caller(caller_name=\"the_caller\", caller_version=\"0.1.0\")"}} +{"id": "astrapy.database.Database.use_namespace", "text": "Switch to a new working namespace for this database.\nThis method changes (mutates) the Database instance.\n\n*DEPRECATED* (removal in 2.0). Switch to the \"use_keyspace\" method.**\n\nNote that this method does not create the namespace, which should exist\nalready (created for instance with a `DatabaseAdmin.create_namespace` call).", "metadata": {"kind": "function", "name": "use_namespace", "path": "astrapy.database.Database.use_namespace", "parameters": [{"name": "namespace", "type": "str", "description": "the new namespace to use as the database working namespace.", "default": null}], "returns": [{"type": "None", "description": "None."}], "example": ">>> my_db.list_collection_names()\n['coll_1', 'coll_2']\n>>> my_db.use_namespace(\"an_empty_namespace\")\n>>> my_db.list_collection_names()\n[]"}} +{"id": "astrapy.database.Database.use_keyspace", "text": "Switch to a new working keyspace for this database.\nThis method changes (mutates) the Database instance.\n\nNote that this method does not create the keyspace, which should exist\nalready (created for instance with a `DatabaseAdmin.create_keyspace` call).", "metadata": {"kind": "function", "name": "use_keyspace", "path": "astrapy.database.Database.use_keyspace", "parameters": [{"name": "keyspace", "type": "str", "description": "the new keyspace to use as the database working keyspace.", "default": null}], "returns": [{"type": "None", "description": "None."}], "example": ">>> my_db.list_collection_names()\n['coll_1', 'coll_2']\n>>> my_db.use_keyspace(\"an_empty_keyspace\")\n>>> my_db.list_collection_names()\n[]"}} +{"id": "astrapy.database.Database.info", "text": "Additional information on the database as a DatabaseInfo instance.\n\nSome of the returned properties are dynamic throughout the lifetime\nof the database (such as raw_info[\"keyspaces\"]). For this reason,\neach invocation of this method triggers a new request to the DevOps API.", "metadata": {"kind": "function", "name": "info", "path": "astrapy.database.Database.info", "returns": [{"type": "DatabaseInfo"}], "example": ">>> my_db.info().region\n'eu-west-1'\n\n>>> my_db.info().raw_info['datacenters'][0]['dateCreated']\n'2023-01-30T12:34:56Z'", "note": "see the DatabaseInfo documentation for a caveat about the difference\nbetween the `region` and the `raw_info[\"region\"]` attributes.", "gathered_types": ["astrapy.info.DatabaseInfo"]}} +{"id": "astrapy.database.Database.id", "text": "The ID of this database.", "metadata": {"kind": "attribute", "name": "id", "path": "astrapy.database.Database.id", "value": "id: str", "example": ">>> my_db.id\n'01234567-89ab-cdef-0123-456789abcdef'"}} +{"id": "astrapy.database.Database.name", "text": "The name of this database. Note that this bears no unicity guarantees.\n\nCalling this method the first time involves a request\nto the DevOps API (the resulting database name is then cached).\nSee the `info()` method for more details.", "metadata": {"kind": "function", "name": "name", "path": "astrapy.database.Database.name", "returns": [{"type": "str"}], "example": ">>> my_db.name()\n'the_application_database'"}} +{"id": "astrapy.database.Database.namespace", "text": "The namespace this database uses as target for all commands when\nno method-call-specific namespace is specified.\n\n*DEPRECATED* (removal in 2.0). Switch to the \"keyspace\" property.**", "metadata": {"kind": "attribute", "name": "namespace", "path": "astrapy.database.Database.namespace", "value": "namespace: str | None", "returns": [{"type": "str | None", "description": "the working namespace (a string), or None if not set."}], "example": ">>> my_db.namespace\n'the_keyspace'"}} +{"id": "astrapy.database.Database.keyspace", "text": "The keyspace this database uses as target for all commands when\nno method-call-specific keyspace is specified.", "metadata": {"kind": "attribute", "name": "keyspace", "path": "astrapy.database.Database.keyspace", "value": "keyspace: str | None", "returns": [{"type": "str | None", "description": "the working keyspace (a string), or None if not set."}], "example": ">>> my_db.keyspace\n'the_keyspace'"}} +{"id": "astrapy.database.Database.get_collection", "text": "Spawn a `Collection` object instance representing a collection\non this database.\n\nCreating a `Collection` instance does not have any effect on the\nactual state of the database: in other words, for the created\n`Collection` instance to be used meaningfully, the collection\nmust exist already (for instance, it should have been created\npreviously by calling the `create_collection` method).", "metadata": {"kind": "function", "name": "get_collection", "path": "astrapy.database.Database.get_collection", "parameters": [{"name": "name", "type": "str", "description": "the name of the collection.", "default": null}, {"name": "keyspace", "type": "str | None", "description": "the keyspace containing the collection. If no keyspace\nis specified, the general setting for this database is used.", "value": "None", "default": "None"}, {"name": "namespace", "type": "str | None", "description": "an alias for `keyspace`. *DEPRECATED*, removal in 2.0.", "value": "None", "default": "None"}, {"name": "embedding_api_key", "type": "str | EmbeddingHeadersProvider | None", "description": "optional API key(s) for interacting with the collection.\nIf an embedding service is configured, and this parameter is not None,\neach Data API call will include the necessary embedding-related headers\nas specified by this parameter. If a string is passed, it translates\ninto the one \"embedding api key\" header\n(i.e. `astrapy.authentication.EmbeddingAPIKeyHeaderProvider`).\nFor some vectorize providers/models, if using header-based\nauthentication, specialized subclasses of\n`astrapy.authentication.EmbeddingHeadersProvider` should be supplied.", "value": "None", "default": "None"}, {"name": "collection_max_time_ms", "type": "int | None", "description": "a default timeout, in millisecond, for the duration\nof each operation on the collection. Individual timeouts can be\nprovided to each collection method call and will take precedence, with\nthis value being an overall default.\nNote that for some methods involving multiple API calls (such as `find`,\n`delete_many`, `insert_many` and so on), it is strongly suggested\nto provide a specific timeout as the default one likely wouldn't make\nmuch sense.", "value": "None", "default": "None"}], "returns": [{"type": "Collection", "description": "a `Collection` instance, representing the desired collection\n(but without any form of validation)."}], "example": ">>> my_col = my_db.get_collection(\"my_collection\")\n>>> my_col.count_documents({}, upper_bound=100)\n41", "note": "The attribute and indexing syntax forms achieve the same effect\nas this method. In other words, the following are equivalent:\n my_db.get_collection(\"coll_name\")\n my_db.coll_name\n my_db[\"coll_name\"]", "gathered_types": ["astrapy.authentication.EmbeddingHeadersProvider"]}} +{"id": "astrapy.database.Database.create_collection", "text": "Creates a collection on the database and return the Collection\ninstance that represents it.\n\nThis is a blocking operation: the method returns when the collection\nis ready to be used. As opposed to the `get_collection` instance,\nthis method triggers causes the collection to be actually created on DB.", "metadata": {"kind": "function", "name": "create_collection", "path": "astrapy.database.Database.create_collection", "parameters": [{"name": "name", "type": "str", "description": "the name of the collection.", "default": null}, {"name": "keyspace", "type": "str | None", "description": "the keyspace where the collection is to be created.\nIf not specified, the general setting for this database is used.", "value": "None", "default": "None"}, {"name": "namespace", "type": "str | None", "description": "an alias for `keyspace`. *DEPRECATED*, removal in 2.0.", "value": "None", "default": "None"}, {"name": "dimension", "type": "int | None", "description": "for vector collections, the dimension of the vectors\n(i.e. the number of their components).", "value": "None", "default": "None"}, {"name": "metric", "type": "str | None", "description": "the similarity metric used for vector searches.\nAllowed values are `VectorMetric.DOT_PRODUCT`, `VectorMetric.EUCLIDEAN`\nor `VectorMetric.COSINE` (default).", "value": "None", "default": "None"}, {"name": "service", "type": "CollectionVectorServiceOptions | dict[str, Any] | None", "description": "a dictionary describing a service for\nembedding computation, e.g. `{\"provider\": \"ab\", \"modelName\": \"xy\"}`.\nAlternatively, a CollectionVectorServiceOptions object to the same effect.", "value": "None", "default": "None"}, {"name": "indexing", "type": "dict[str, Any] | None", "description": "optional specification of the indexing options for\nthe collection, in the form of a dictionary such as\n {\"deny\": [...]}\nor\n {\"allow\": [...]}", "value": "None", "default": "None"}, {"name": "default_id_type", "type": "str | None", "description": "this sets what type of IDs the API server will\ngenerate when inserting documents that do not specify their\n`_id` field explicitly. Can be set to any of the values\n`DefaultIdType.UUID`, `DefaultIdType.OBJECTID`,\n`DefaultIdType.UUIDV6`, `DefaultIdType.UUIDV7`,\n`DefaultIdType.DEFAULT`.", "value": "None", "default": "None"}, {"name": "additional_options", "type": "dict[str, Any] | None", "description": "any further set of key-value pairs that will\nbe added to the \"options\" part of the payload when sending\nthe Data API command to create a collection.", "value": "None", "default": "None"}, {"name": "check_exists", "type": "bool | None", "description": "whether to run an existence check for the collection\nname before attempting to create the collection:\nIf check_exists is True, an error is raised when creating\nan existing collection.\nIf it is False, the creation is attempted. In this case, for\npreexisting collections, the command will succeed or fail\ndepending on whether the options match or not.", "value": "None", "default": "None"}, {"name": "max_time_ms", "type": "int | None", "description": "a timeout, in milliseconds, for the underlying HTTP request.", "value": "None", "default": "None"}, {"name": "embedding_api_key", "type": "str | EmbeddingHeadersProvider | None", "description": "optional API key(s) for interacting with the collection.\nIf an embedding service is configured, and this parameter is not None,\neach Data API call will include the necessary embedding-related headers\nas specified by this parameter. If a string is passed, it translates\ninto the one \"embedding api key\" header\n(i.e. `astrapy.authentication.EmbeddingAPIKeyHeaderProvider`).\nFor some vectorize providers/models, if using header-based authentication,\nspecialized subclasses of `astrapy.authentication.EmbeddingHeadersProvider`\nshould be supplied.", "value": "None", "default": "None"}, {"name": "collection_max_time_ms", "type": "int | None", "description": "a default timeout, in millisecond, for the duration of each\noperation on the collection. Individual timeouts can be provided to\neach collection method call and will take precedence, with this value\nbeing an overall default.\nNote that for some methods involving multiple API calls (such as\n`find`, `delete_many`, `insert_many` and so on), it is strongly suggested\nto provide a specific timeout as the default one likely wouldn't make\nmuch sense.", "value": "None", "default": "None"}], "returns": [{"type": "Collection", "description": "a (synchronous) `Collection` instance, representing the"}, {"type": "Collection", "description": "newly-created collection."}], "example": ">>> new_col = my_db.create_collection(\"my_v_col\", dimension=3)\n>>> new_col.insert_one({\"name\": \"the_row\", \"$vector\": [0.4, 0.5, 0.7]})\nInsertOneResult(raw_results=..., inserted_id='e22dd65e-...-...-...')", "note": "A collection is considered a vector collection if at least one of\n`dimension` or `service` are provided and not null. In that case,\nand only in that case, is `metric` an accepted parameter.\nNote, moreover, that if passing both these parameters, then\nthe dimension must be compatible with the chosen service.", "gathered_types": ["astrapy.authentication.EmbeddingHeadersProvider", "astrapy.info.CollectionVectorServiceOptions"]}} +{"id": "astrapy.database.Database.drop_collection", "text": "Drop a collection from the database, along with all documents therein.", "metadata": {"kind": "function", "name": "drop_collection", "path": "astrapy.database.Database.drop_collection", "parameters": [{"name": "name_or_collection", "type": "str | Collection", "description": "either the name of a collection or\na `Collection` instance.", "default": null}, {"name": "max_time_ms", "type": "int | None", "description": "a timeout, in milliseconds, for the underlying HTTP request.", "value": "None", "default": "None"}], "returns": [{"type": "dict[str, Any]", "description": "a dictionary in the form {\"ok\": 1} if the command succeeds."}], "example": ">>> my_db.list_collection_names()\n['a_collection', 'my_v_col', 'another_col']\n>>> my_db.drop_collection(\"my_v_col\")\n{'ok': 1}\n>>> my_db.list_collection_names()\n['a_collection', 'another_col']", "note": "when providing a collection name, it is assumed that the collection\nis to be found in the keyspace that was set at database instance level."}} +{"id": "astrapy.database.Database.list_collections", "text": "List all collections in a given keyspace for this database.", "metadata": {"kind": "function", "name": "list_collections", "path": "astrapy.database.Database.list_collections", "parameters": [{"name": "keyspace", "type": "str | None", "description": "the keyspace to be inspected. If not specified,\nthe general setting for this database is assumed.", "value": "None", "default": "None"}, {"name": "namespace", "type": "str | None", "description": "an alias for `keyspace`. *DEPRECATED*, removal in 2.0.", "value": "None", "default": "None"}, {"name": "max_time_ms", "type": "int | None", "description": "a timeout, in milliseconds, for the underlying HTTP request.", "value": "None", "default": "None"}], "returns": [{"type": "CommandCursor[CollectionDescriptor]", "description": "a `CommandCursor` to iterate over CollectionDescriptor instances,"}, {"type": "CommandCursor[CollectionDescriptor]", "description": "each corresponding to a collection."}], "example": ">>> ccur = my_db.list_collections()\n>>> ccur\n<astrapy.cursors.CommandCursor object at ...>\n>>> list(ccur)\n[CollectionDescriptor(name='my_v_col', options=CollectionOptions())]\n>>> for coll_dict in my_db.list_collections():\n... print(coll_dict)\n...\nCollectionDescriptor(name='my_v_col', options=CollectionOptions())", "gathered_types": ["astrapy.info.CollectionDescriptor", "astrapy.cursors.CommandCursor"]}} +{"id": "astrapy.database.Database.list_collection_names", "text": "List the names of all collections in a given keyspace of this database.", "metadata": {"kind": "function", "name": "list_collection_names", "path": "astrapy.database.Database.list_collection_names", "parameters": [{"name": "keyspace", "type": "str | None", "description": "the keyspace to be inspected. If not specified,\nthe general setting for this database is assumed.", "value": "None", "default": "None"}, {"name": "namespace", "type": "str | None", "description": "an alias for `keyspace`. *DEPRECATED*, removal in 2.0.", "value": "None", "default": "None"}, {"name": "max_time_ms", "type": "int | None", "description": "a timeout, in milliseconds, for the underlying HTTP request.", "value": "None", "default": "None"}], "returns": [{"type": "list[str]", "description": "a list of the collection names as strings, in no particular order."}], "example": ">>> my_db.list_collection_names()\n['a_collection', 'another_col']"}} +{"id": "astrapy.database.Database.command", "text": "Send a POST request to the Data API for this database with\nan arbitrary, caller-provided payload.", "metadata": {"kind": "function", "name": "command", "path": "astrapy.database.Database.command", "parameters": [{"name": "body", "type": "dict[str, Any]", "description": "a JSON-serializable dictionary, the payload of the request.", "default": null}, {"name": "keyspace", "type": "str | None", "description": "the keyspace to use. Requests always target a keyspace:\nif not specified, the general setting for this database is assumed.", "value": "None", "default": "None"}, {"name": "namespace", "type": "str | None", "description": "an alias for `keyspace`. *DEPRECATED*, removal in 2.0.", "value": "None", "default": "None"}, {"name": "collection_name", "type": "str | None", "description": "if provided, the collection name is appended at the end\nof the endpoint. In this way, this method allows collection-level\narbitrary POST requests as well.", "value": "None", "default": "None"}, {"name": "raise_api_errors", "type": "bool", "description": "if True, responses with a nonempty 'errors' field\nresult in an astrapy exception being raised.", "value": "True", "default": "True"}, {"name": "max_time_ms", "type": "int | None", "description": "a timeout, in milliseconds, for the underlying HTTP request.", "value": "None", "default": "None"}], "returns": [{"type": "dict[str, Any]", "description": "a dictionary with the response of the HTTP request."}], "example": ">>> my_db.command({\"findCollections\": {}})\n{'status': {'collections': ['my_coll']}}\n>>> my_db.command({\"countDocuments\": {}}, collection_name=\"my_coll\")\n{'status': {'count': 123}}"}} +{"id": "astrapy.database.Database.get_database_admin", "text": "Return a DatabaseAdmin object corresponding to this database, for\nuse in admin tasks such as managing keyspaces.\n\nThis method, depending on the environment where the database resides,\nreturns an appropriate subclass of DatabaseAdmin.", "metadata": {"kind": "function", "name": "get_database_admin", "path": "astrapy.database.Database.get_database_admin", "parameters": [{"name": "token", "type": "str | TokenProvider | None", "description": "an access token with enough permission on the database to\nperform the desired tasks. If omitted (as it can generally be done),\nthe token of this Database is used.\nThis can be either a literal token string or a subclass of\n`astrapy.authentication.TokenProvider`.", "value": "None", "default": "None"}, {"name": "dev_ops_url", "type": "str | None", "description": "in case of custom deployments, this can be used to specify\nthe URL to the DevOps API, such as \"https://api.astra.datastax.com\".\nGenerally it can be omitted. The environment (prod/dev/...) is\ndetermined from the API Endpoint.\nNote that this parameter is allowed only for Astra DB environments.", "value": "None", "default": "None"}, {"name": "dev_ops_api_version", "type": "str | None", "description": "this can specify a custom version of the DevOps API\n(such as \"v2\"). Generally not needed.\nNote that this parameter is allowed only for Astra DB environments.", "value": "None", "default": "None"}], "returns": [{"type": "DatabaseAdmin", "description": "A DatabaseAdmin instance targeting this database. More precisely,"}, {"type": "DatabaseAdmin", "description": "for Astra DB an instance of `AstraDBDatabaseAdmin` is returned;"}, {"type": "DatabaseAdmin", "description": "for other environments, an instance of `DataAPIDatabaseAdmin` is returned."}], "example": ">>> my_db_admin = my_db.get_database_admin()\n>>> if \"new_keyspace\" not in my_db_admin.list_keyspaces():\n... my_db_admin.create_keyspace(\"new_keyspace\")\n>>> my_db_admin.list_keyspaces()\n['default_keyspace', 'new_keyspace']", "gathered_types": ["astrapy.authentication.TokenProvider", "astrapy.admin.DatabaseAdmin"]}} +{"id": "astrapy.database.AsyncDatabase", "text": "A Data API database. This is the object for doing database-level\nDML, such as creating/deleting collections, and for obtaining Collection\nobjects themselves. This class has an asynchronous interface.\n\nThe usual way of obtaining one AsyncDatabase is through the `get_async_database`\nmethod of a `DataAPIClient`.\n\nOn Astra DB, an AsyncDatabase comes with an \"API Endpoint\", which implies\nan AsyncDatabase object instance reaches a specific region (relevant point in\ncase of multi-region databases).", "metadata": {"kind": "class", "name": "AsyncDatabase", "path": "astrapy.database.AsyncDatabase", "parameters": [{"name": "api_endpoint", "type": "str", "description": "the full \"API Endpoint\" string used to reach the Data API.\nExample: \"https://<database_id>-<region>.apps.astra.datastax.com\"", "default": null}, {"name": "token", "type": "str | TokenProvider | None", "description": "an Access Token to the database. Example: \"AstraCS:xyz...\"\nThis can be either a literal token string or a subclass of\n`astrapy.authentication.TokenProvider`.", "value": "None", "default": "None"}, {"name": "keyspace", "type": "str | None", "description": "this is the keyspace all method calls will target, unless\none is explicitly specified in the call. If no keyspace is supplied\nwhen creating a Database, on Astra DB the name \"default_keyspace\" is set,\nwhile on other environments the keyspace is left unspecified: in this case,\nmost operations are unavailable until a keyspace is set (through an explicit\n`use_keyspace` invocation or equivalent).", "value": "None", "default": "None"}, {"name": "namespace", "type": "str | None", "description": "an alias for `keyspace`. *DEPRECATED*, removal in 2.0.", "value": "None", "default": "None"}, {"name": "callers", "type": "Sequence[CallerType]", "description": "a list of caller identities, i.e. applications, or frameworks,\non behalf of which the Data API calls are performed. These end up\nin the request user-agent.\nEach caller identity is a (\"caller_name\", \"caller_version\") pair.", "value": "[]", "default": "[]"}, {"name": "caller_name", "type": "str | None", "description": "*DEPRECATED*, use `callers`. Removal 2.0. Name of the\napplication, or framework, on behalf of which the Data API calls\nare performed. This ends up in the request user-agent.", "value": "None", "default": "None"}, {"name": "caller_version", "type": "str | None", "description": "version of the caller. *DEPRECATED*, use `callers`. Removal 2.0.", "value": "None", "default": "None"}, {"name": "environment", "type": "str | None", "description": "a string representing the target Data API environment.\nIt can be left unspecified for the default value of `Environment.PROD`;\nother values include `Environment.OTHER`, `Environment.DSE`.", "value": "None", "default": "None"}, {"name": "api_path", "type": "str | None", "description": "path to append to the API Endpoint. In typical usage, this\nshould be left to its default (sensibly chosen based on the environment).", "value": "None", "default": "None"}, {"name": "api_version", "type": "str | None", "description": "version specifier to append to the API path. In typical\nusage, this should be left to its default of \"v1\".", "value": "None", "default": "None"}], "example": ">>> from astrapy import DataAPIClient\n>>> my_client = astrapy.DataAPIClient(\"AstraCS:...\")\n>>> my_db = my_client.get_async_database(\n... \"https://01234567-....apps.astra.datastax.com\"\n... )", "note": "creating an instance of AsyncDatabase does not trigger actual creation\nof the database itself, which should exist beforehand. To create databases,\nsee the AstraDBAdmin class.", "references": ["astrapy.client.DataAPIClient"], "gathered_types": ["astrapy.authentication.TokenProvider", "astrapy.constants.CallerType"]}} +{"id": "astrapy.database.AsyncDatabase.environment", "text": "", "metadata": {"kind": "attribute", "name": "environment", "path": "astrapy.database.AsyncDatabase.environment", "value": "environment = environment or Environment.PROD.lower()", "gathered_types": ["astrapy.constants.Environment", "PROD"]}} +{"id": "astrapy.database.AsyncDatabase.token_provider", "text": "", "metadata": {"kind": "attribute", "name": "token_provider", "path": "astrapy.database.AsyncDatabase.token_provider", "value": "token_provider = coerce_token_provider(token)"}} +{"id": "astrapy.database.AsyncDatabase.api_endpoint", "text": "", "metadata": {"kind": "attribute", "name": "api_endpoint", "path": "astrapy.database.AsyncDatabase.api_endpoint", "value": "api_endpoint = api_endpoint.strip('/')"}} +{"id": "astrapy.database.AsyncDatabase.api_path", "text": "", "metadata": {"kind": "attribute", "name": "api_path", "path": "astrapy.database.AsyncDatabase.api_path", "value": "api_path = _api_path", "gathered_types": ["_api_path"]}} +{"id": "astrapy.database.AsyncDatabase.api_version", "text": "", "metadata": {"kind": "attribute", "name": "api_version", "path": "astrapy.database.AsyncDatabase.api_version", "value": "api_version = _api_version", "gathered_types": ["_api_version"]}} +{"id": "astrapy.database.AsyncDatabase.callers", "text": "", "metadata": {"kind": "attribute", "name": "callers", "path": "astrapy.database.AsyncDatabase.callers", "value": "callers = callers_param"}} +{"id": "astrapy.database.AsyncDatabase.with_options", "text": "Create a clone of this database with some changed attributes.", "metadata": {"kind": "function", "name": "with_options", "path": "astrapy.database.AsyncDatabase.with_options", "parameters": [{"name": "keyspace", "type": "str | None", "description": "this is the keyspace all method calls will target, unless\none is explicitly specified in the call. If no keyspace is supplied\nwhen creating a Database, the name \"default_keyspace\" is set.", "value": "None", "default": "None"}, {"name": "namespace", "type": "str | None", "description": "an alias for `keyspace`. *DEPRECATED*, removal in 2.0.", "value": "None", "default": "None"}, {"name": "callers", "type": "Sequence[CallerType]", "description": "a list of caller identities, i.e. applications, or frameworks,\non behalf of which the Data API calls are performed. These end up\nin the request user-agent.\nEach caller identity is a (\"caller_name\", \"caller_version\") pair.", "value": "[]", "default": "[]"}, {"name": "caller_name", "type": "str | None", "description": "*DEPRECATED*, use `callers`. Removal 2.0. Name of the\napplication, or framework, on behalf of which the Data API calls\nare performed. This ends up in the request user-agent.", "value": "None", "default": "None"}, {"name": "caller_version", "type": "str | None", "description": "version of the caller. *DEPRECATED*, use `callers`.\nRemoval 2.0.", "value": "None", "default": "None"}], "returns": [{"type": "AsyncDatabase", "description": "a new `AsyncDatabase` instance."}], "example": ">>> my_async_db_2 = my_async_db.with_options(\n... keyspace=\"the_other_keyspace\",\n... callers=[(\"the_caller\", \"0.1.0\")],\n... )", "gathered_types": ["astrapy.constants.CallerType", "astrapy.database.AsyncDatabase"]}} +{"id": "astrapy.database.AsyncDatabase.to_sync", "text": "Create a (synchronous) Database from this one. Save for the arguments\nexplicitly provided as overrides, everything else is kept identical\nto this database in the copy.", "metadata": {"kind": "function", "name": "to_sync", "path": "astrapy.database.AsyncDatabase.to_sync", "parameters": [{"name": "api_endpoint", "type": "str | None", "description": "the full \"API Endpoint\" string used to reach the Data API.\nExample: \"https://<database_id>-<region>.apps.astra.datastax.com\"", "value": "None", "default": "None"}, {"name": "token", "type": "str | TokenProvider | None", "description": "an Access Token to the database. Example: \"AstraCS:xyz...\"\nThis can be either a literal token string or a subclass of\n`astrapy.authentication.TokenProvider`.", "value": "None", "default": "None"}, {"name": "keyspace", "type": "str | None", "description": "this is the keyspace all method calls will target, unless\none is explicitly specified in the call. If no keyspace is supplied\nwhen creating a Database, the name \"default_keyspace\" is set.", "value": "None", "default": "None"}, {"name": "namespace", "type": "str | None", "description": "an alias for `keyspace`. *DEPRECATED*, removal in 2.0.", "value": "None", "default": "None"}, {"name": "callers", "type": "Sequence[CallerType]", "description": "a list of caller identities, i.e. applications, or frameworks,\non behalf of which the Data API calls are performed. These end up\nin the request user-agent.\nEach caller identity is a (\"caller_name\", \"caller_version\") pair.", "value": "[]", "default": "[]"}, {"name": "caller_name", "type": "str | None", "description": "*DEPRECATED*, use `callers`. Removal 2.0. Name of the\napplication, or framework, on behalf of which the Data API calls\nare performed. This ends up in the request user-agent.", "value": "None", "default": "None"}, {"name": "caller_version", "type": "str | None", "description": "version of the caller. *DEPRECATED*, use `callers`.\nRemoval 2.0.", "value": "None", "default": "None"}, {"name": "environment", "type": "str | None", "description": "a string representing the target Data API environment.\nValues are, for example, `Environment.PROD`, `Environment.OTHER`,\nor `Environment.DSE`.", "value": "None", "default": "None"}, {"name": "api_path", "type": "str | None", "description": "path to append to the API Endpoint. In typical usage, this\nshould be left to its default of \"/api/json\".", "value": "None", "default": "None"}, {"name": "api_version", "type": "str | None", "description": "version specifier to append to the API path. In typical\nusage, this should be left to its default of \"v1\".", "value": "None", "default": "None"}], "returns": [{"type": "Database", "description": "the new copy, a `Database` instance."}], "example": ">>> my_sync_db = my_async_db.to_sync()\n>>> my_sync_db.list_collection_names()\n['a_collection', 'another_collection']", "gathered_types": ["astrapy.authentication.TokenProvider", "astrapy.database.Database", "astrapy.constants.CallerType"]}} +{"id": "astrapy.database.AsyncDatabase.set_caller", "text": "Set a new identity for the application/framework on behalf of which\nthe Data API calls are performed (the \"caller\").", "metadata": {"kind": "function", "name": "set_caller", "path": "astrapy.database.AsyncDatabase.set_caller", "parameters": [{"name": "caller_name", "type": "str | None", "description": "name of the application, or framework, on behalf of which\nthe Data API calls are performed. This ends up in the request user-agent.", "value": "None", "default": "None"}, {"name": "caller_version", "type": "str | None", "description": "version of the caller.", "value": "None", "default": "None"}], "returns": [{"type": "None"}], "example": ">>> my_db.set_caller(caller_name=\"the_caller\", caller_version=\"0.1.0\")"}} +{"id": "astrapy.database.AsyncDatabase.use_namespace", "text": "Switch to a new working namespace for this database.\nThis method changes (mutates) the AsyncDatabase instance.\n\n*DEPRECATED* (removal in 2.0). Switch to the \"use_keyspace\" method.**\n\nNote that this method does not create the namespace, which should exist\nalready (created for instance with a `DatabaseAdmin.async_create_namespace` call).", "metadata": {"kind": "function", "name": "use_namespace", "path": "astrapy.database.AsyncDatabase.use_namespace", "parameters": [{"name": "namespace", "type": "str", "description": "the new namespace to use as the database working namespace.", "default": null}], "returns": [{"type": "None", "description": "None."}], "example": ">>> asyncio.run(my_async_db.list_collection_names())\n['coll_1', 'coll_2']\n>>> my_async_db.use_namespace(\"an_empty_namespace\")\n>>> asyncio.run(my_async_db.list_collection_names())\n[]"}} +{"id": "astrapy.database.AsyncDatabase.use_keyspace", "text": "Switch to a new working keyspace for this database.\nThis method changes (mutates) the AsyncDatabase instance.\n\nNote that this method does not create the keyspace, which should exist\nalready (created for instance with a `DatabaseAdmin.async_create_keyspace` call).", "metadata": {"kind": "function", "name": "use_keyspace", "path": "astrapy.database.AsyncDatabase.use_keyspace", "parameters": [{"name": "keyspace", "type": "str", "description": "the new keyspace to use as the database working keyspace.", "default": null}], "returns": [{"type": "None", "description": "None."}], "example": ">>> asyncio.run(my_async_db.list_collection_names())\n['coll_1', 'coll_2']\n>>> my_async_db.use_keyspace(\"an_empty_keyspace\")\n>>> asyncio.run(my_async_db.list_collection_names())\n[]"}} +{"id": "astrapy.database.AsyncDatabase.info", "text": "Additional information on the database as a DatabaseInfo instance.\n\nSome of the returned properties are dynamic throughout the lifetime\nof the database (such as raw_info[\"keyspaces\"]). For this reason,\neach invocation of this method triggers a new request to the DevOps API.", "metadata": {"kind": "function", "name": "info", "path": "astrapy.database.AsyncDatabase.info", "returns": [{"type": "DatabaseInfo"}], "example": ">>> my_async_db.info().region\n'eu-west-1'\n\n>>> my_async_db.info().raw_info['datacenters'][0]['dateCreated']\n'2023-01-30T12:34:56Z'", "note": "see the DatabaseInfo documentation for a caveat about the difference\nbetween the `region` and the `raw_info[\"region\"]` attributes.", "gathered_types": ["astrapy.info.DatabaseInfo"]}} +{"id": "astrapy.database.AsyncDatabase.id", "text": "The ID of this database.", "metadata": {"kind": "attribute", "name": "id", "path": "astrapy.database.AsyncDatabase.id", "value": "id: str", "example": ">>> my_async_db.id\n'01234567-89ab-cdef-0123-456789abcdef'"}} +{"id": "astrapy.database.AsyncDatabase.name", "text": "The name of this database. Note that this bears no unicity guarantees.\n\nCalling this method the first time involves a request\nto the DevOps API (the resulting database name is then cached).\nSee the `info()` method for more details.", "metadata": {"kind": "function", "name": "name", "path": "astrapy.database.AsyncDatabase.name", "returns": [{"type": "str"}], "example": ">>> my_async_db.name()\n'the_application_database'"}} +{"id": "astrapy.database.AsyncDatabase.namespace", "text": "The namespace this database uses as target for all commands when\nno method-call-specific namespace is specified.\n\n*DEPRECATED* (removal in 2.0). Switch to the \"keyspace\" property.**", "metadata": {"kind": "attribute", "name": "namespace", "path": "astrapy.database.AsyncDatabase.namespace", "value": "namespace: str | None", "returns": [{"type": "str | None", "description": "the working namespace (a string), or None if not set."}], "example": ">>> my_async_db.namespace\n'the_keyspace'"}} +{"id": "astrapy.database.AsyncDatabase.keyspace", "text": "The keyspace this database uses as target for all commands when\nno method-call-specific keyspace is specified.", "metadata": {"kind": "attribute", "name": "keyspace", "path": "astrapy.database.AsyncDatabase.keyspace", "value": "keyspace: str | None", "returns": [{"type": "str | None", "description": "the working keyspace (a string), or None if not set."}], "example": ">>> my_async_db.keyspace\n'the_keyspace'"}} +{"id": "astrapy.database.AsyncDatabase.get_collection", "text": "Spawn an `AsyncCollection` object instance representing a collection\non this database.\n\nCreating an `AsyncCollection` instance does not have any effect on the\nactual state of the database: in other words, for the created\n`AsyncCollection` instance to be used meaningfully, the collection\nmust exist already (for instance, it should have been created\npreviously by calling the `create_collection` method).", "metadata": {"kind": "function", "name": "get_collection", "path": "astrapy.database.AsyncDatabase.get_collection", "parameters": [{"name": "name", "type": "str", "description": "the name of the collection.", "default": null}, {"name": "keyspace", "type": "str | None", "description": "the keyspace containing the collection. If no keyspace\nis specified, the setting for this database is used.", "value": "None", "default": "None"}, {"name": "namespace", "type": "str | None", "description": "an alias for `keyspace`. *DEPRECATED*, removal in 2.0.", "value": "None", "default": "None"}, {"name": "embedding_api_key", "type": "str | EmbeddingHeadersProvider | None", "description": "optional API key(s) for interacting with the collection.\nIf an embedding service is configured, and this parameter is not None,\neach Data API call will include the necessary embedding-related headers\nas specified by this parameter. If a string is passed, it translates\ninto the one \"embedding api key\" header\n(i.e. `astrapy.authentication.EmbeddingAPIKeyHeaderProvider`).\nFor some vectorize providers/models, if using header-based\nauthentication, specialized subclasses of\n`astrapy.authentication.EmbeddingHeadersProvider` should be supplied.", "value": "None", "default": "None"}, {"name": "collection_max_time_ms", "type": "int | None", "description": "a default timeout, in millisecond, for the duration\nof each operation on the collection. Individual timeouts can be\nprovided to each collection method call and will take precedence, with\nthis value being an overall default.\nNote that for some methods involving multiple API calls (such as `find`,\n`delete_many`, `insert_many` and so on), it is strongly suggested\nto provide a specific timeout as the default one likely wouldn't make\nmuch sense.", "value": "None", "default": "None"}], "returns": [{"type": "AsyncCollection", "description": "an `AsyncCollection` instance, representing the desired collection\n(but without any form of validation)."}], "example": ">>> async def count_docs(adb: AsyncDatabase, c_name: str) -> int:\n... async_col = await adb.get_collection(c_name)\n... return await async_col.count_documents({}, upper_bound=100)\n...\n>>> asyncio.run(count_docs(my_async_db, \"my_collection\"))\n45", "note": "as this method, returning an AsyncCollection, albeit\nin a synchronous way. In other words, the following are equivalent:\n await my_async_db.get_collection(\"coll_name\")\n my_async_db.coll_name\n my_async_db[\"coll_name\"]", "gathered_types": ["astrapy.authentication.EmbeddingHeadersProvider", "astrapy.collection.AsyncCollection"]}} +{"id": "astrapy.database.AsyncDatabase.create_collection", "text": "Creates a collection on the database and return the AsyncCollection\ninstance that represents it.\n\nThis is a blocking operation: the method returns when the collection\nis ready to be used. As opposed to the `get_collection` instance,\nthis method triggers causes the collection to be actually created on DB.", "metadata": {"kind": "function", "name": "create_collection", "path": "astrapy.database.AsyncDatabase.create_collection", "parameters": [{"name": "name", "type": "str", "description": "the name of the collection.", "default": null}, {"name": "keyspace", "type": "str | None", "description": "the keyspace where the collection is to be created.\nIf not specified, the general setting for this database is used.", "value": "None", "default": "None"}, {"name": "namespace", "type": "str | None", "description": "an alias for `keyspace`. *DEPRECATED*, removal in 2.0.", "value": "None", "default": "None"}, {"name": "dimension", "type": "int | None", "description": "for vector collections, the dimension of the vectors\n(i.e. the number of their components).", "value": "None", "default": "None"}, {"name": "metric", "type": "str | None", "description": "the similarity metric used for vector searches.\nAllowed values are `VectorMetric.DOT_PRODUCT`, `VectorMetric.EUCLIDEAN`\nor `VectorMetric.COSINE` (default).", "value": "None", "default": "None"}, {"name": "service", "type": "CollectionVectorServiceOptions | dict[str, Any] | None", "description": "a dictionary describing a service for\nembedding computation, e.g. `{\"provider\": \"ab\", \"modelName\": \"xy\"}`.\nAlternatively, a CollectionVectorServiceOptions object to the same effect.", "value": "None", "default": "None"}, {"name": "indexing", "type": "dict[str, Any] | None", "description": "optional specification of the indexing options for\nthe collection, in the form of a dictionary such as\n {\"deny\": [...]}\nor\n {\"allow\": [...]}", "value": "None", "default": "None"}, {"name": "default_id_type", "type": "str | None", "description": "this sets what type of IDs the API server will\ngenerate when inserting documents that do not specify their\n`_id` field explicitly. Can be set to any of the values\n`DefaultIdType.UUID`, `DefaultIdType.OBJECTID`,\n`DefaultIdType.UUIDV6`, `DefaultIdType.UUIDV7`,\n`DefaultIdType.DEFAULT`.", "value": "None", "default": "None"}, {"name": "additional_options", "type": "dict[str, Any] | None", "description": "any further set of key-value pairs that will\nbe added to the \"options\" part of the payload when sending\nthe Data API command to create a collection.", "value": "None", "default": "None"}, {"name": "check_exists", "type": "bool | None", "description": "whether to run an existence check for the collection\nname before attempting to create the collection:\nIf check_exists is True, an error is raised when creating\nan existing collection.\nIf it is False, the creation is attempted. In this case, for\npreexisting collections, the command will succeed or fail\ndepending on whether the options match or not.", "value": "None", "default": "None"}, {"name": "max_time_ms", "type": "int | None", "description": "a timeout, in milliseconds, for the underlying HTTP request.", "value": "None", "default": "None"}, {"name": "embedding_api_key", "type": "str | EmbeddingHeadersProvider | None", "description": "optional API key(s) for interacting with the collection.\nIf an embedding service is configured, and this parameter is not None,\neach Data API call will include the necessary embedding-related headers\nas specified by this parameter. If a string is passed, it translates\ninto the one \"embedding api key\" header\n(i.e. `astrapy.authentication.EmbeddingAPIKeyHeaderProvider`).\nFor some vectorize providers/models, if using header-based authentication,\nspecialized subclasses of `astrapy.authentication.EmbeddingHeadersProvider`\nshould be supplied.", "value": "None", "default": "None"}, {"name": "collection_max_time_ms", "type": "int | None", "description": "a default timeout, in millisecond, for the duration of each\noperation on the collection. Individual timeouts can be provided to\neach collection method call and will take precedence, with this value\nbeing an overall default.\nNote that for some methods involving multiple API calls (such as\n`find`, `delete_many`, `insert_many` and so on), it is strongly suggested\nto provide a specific timeout as the default one likely wouldn't make\nmuch sense.", "value": "None", "default": "None"}], "returns": [{"type": "AsyncCollection", "description": "an `AsyncCollection` instance, representing the newly-created collection."}], "example": ">>> async def create_and_insert(adb: AsyncDatabase) -> Dict[str, Any]:\n... new_a_col = await adb.create_collection(\"my_v_col\", dimension=3)\n... return await new_a_col.insert_one(\n... {\"name\": \"the_row\", \"$vector\": [0.4, 0.5, 0.7]},\n... )\n...\n>>> asyncio.run(create_and_insert(my_async_db))\nInsertOneResult(raw_results=..., inserted_id='08f05ecf-...-...-...')", "note": "A collection is considered a vector collection if at least one of\n`dimension` or `service` are provided and not null. In that case,\nand only in that case, is `metric` an accepted parameter.\nNote, moreover, that if passing both these parameters, then\nthe dimension must be compatible with the chosen service.", "gathered_types": ["astrapy.authentication.EmbeddingHeadersProvider", "astrapy.info.CollectionVectorServiceOptions", "astrapy.collection.AsyncCollection"]}} +{"id": "astrapy.database.AsyncDatabase.drop_collection", "text": "Drop a collection from the database, along with all documents therein.", "metadata": {"kind": "function", "name": "drop_collection", "path": "astrapy.database.AsyncDatabase.drop_collection", "parameters": [{"name": "name_or_collection", "type": "str | AsyncCollection", "description": "either the name of a collection or\nan `AsyncCollection` instance.", "default": null}, {"name": "max_time_ms", "type": "int | None", "description": "a timeout, in milliseconds, for the underlying HTTP request.", "value": "None", "default": "None"}], "returns": [{"type": "dict[str, Any]", "description": "a dictionary in the form {\"ok\": 1} if the command succeeds."}], "example": ">>> asyncio.run(my_async_db.list_collection_names())\n['a_collection', 'my_v_col', 'another_col']\n>>> asyncio.run(my_async_db.drop_collection(\"my_v_col\"))\n{'ok': 1}\n>>> asyncio.run(my_async_db.list_collection_names())\n['a_collection', 'another_col']", "note": "when providing a collection name, it is assumed that the collection\nis to be found in the keyspace that was set at database instance level.", "gathered_types": ["astrapy.collection.AsyncCollection"]}} +{"id": "astrapy.database.AsyncDatabase.list_collections", "text": "List all collections in a given keyspace for this database.", "metadata": {"kind": "function", "name": "list_collections", "path": "astrapy.database.AsyncDatabase.list_collections", "parameters": [{"name": "keyspace", "type": "str | None", "description": "the keyspace to be inspected. If not specified,\nthe general setting for this database is assumed.", "value": "None", "default": "None"}, {"name": "namespace", "type": "str | None", "description": "an alias for `keyspace`. *DEPRECATED*, removal in 2.0.", "value": "None", "default": "None"}, {"name": "max_time_ms", "type": "int | None", "description": "a timeout, in milliseconds, for the underlying HTTP request.", "value": "None", "default": "None"}], "returns": [{"type": "AsyncCommandCursor[CollectionDescriptor]", "description": "an `AsyncCommandCursor` to iterate over CollectionDescriptor instances,"}, {"type": "AsyncCommandCursor[CollectionDescriptor]", "description": "each corresponding to a collection."}], "example": ">>> async def a_list_colls(adb: AsyncDatabase) -> None:\n... a_ccur = adb.list_collections()\n... print(\"* a_ccur:\", a_ccur)\n... print(\"* list:\", [coll async for coll in a_ccur])\n... async for coll in adb.list_collections():\n... print(\"* coll:\", coll)\n...\n>>> asyncio.run(a_list_colls(my_async_db))\n* a_ccur: <astrapy.cursors.AsyncCommandCursor object at ...>\n* list: [CollectionDescriptor(name='my_v_col', options=CollectionOptions())]\n* coll: CollectionDescriptor(name='my_v_col', options=CollectionOptions())", "gathered_types": ["astrapy.info.CollectionDescriptor", "astrapy.cursors.AsyncCommandCursor"]}} +{"id": "astrapy.database.AsyncDatabase.list_collection_names", "text": "List the names of all collections in a given keyspace of this database.", "metadata": {"kind": "function", "name": "list_collection_names", "path": "astrapy.database.AsyncDatabase.list_collection_names", "parameters": [{"name": "keyspace", "type": "str | None", "description": "the keyspace to be inspected. If not specified,\nthe general setting for this database is assumed.", "value": "None", "default": "None"}, {"name": "namespace", "type": "str | None", "description": "an alias for `keyspace`. *DEPRECATED*, removal in 2.0.", "value": "None", "default": "None"}, {"name": "max_time_ms", "type": "int | None", "description": "a timeout, in milliseconds, for the underlying HTTP request.", "value": "None", "default": "None"}], "returns": [{"type": "list[str]", "description": "a list of the collection names as strings, in no particular order."}], "example": ">>> asyncio.run(my_async_db.list_collection_names())\n['a_collection', 'another_col']"}} +{"id": "astrapy.database.AsyncDatabase.command", "text": "Send a POST request to the Data API for this database with\nan arbitrary, caller-provided payload.", "metadata": {"kind": "function", "name": "command", "path": "astrapy.database.AsyncDatabase.command", "parameters": [{"name": "body", "type": "dict[str, Any]", "description": "a JSON-serializable dictionary, the payload of the request.", "default": null}, {"name": "keyspace", "type": "str | None", "description": "the keyspace to use. Requests always target a keyspace:\nif not specified, the general setting for this database is assumed.", "value": "None", "default": "None"}, {"name": "namespace", "type": "str | None", "description": "an alias for `keyspace`. *DEPRECATED*, removal in 2.0.", "value": "None", "default": "None"}, {"name": "collection_name", "type": "str | None", "description": "if provided, the collection name is appended at the end\nof the endpoint. In this way, this method allows collection-level\narbitrary POST requests as well.", "value": "None", "default": "None"}, {"name": "raise_api_errors", "type": "bool", "description": "if True, responses with a nonempty 'errors' field\nresult in an astrapy exception being raised.", "value": "True", "default": "True"}, {"name": "max_time_ms", "type": "int | None", "description": "a timeout, in milliseconds, for the underlying HTTP request.", "value": "None", "default": "None"}], "returns": [{"type": "dict[str, Any]", "description": "a dictionary with the response of the HTTP request."}], "example": ">>> asyncio.run(my_async_db.command({\"findCollections\": {}}))\n{'status': {'collections': ['my_coll']}}\n>>> asyncio.run(my_async_db.command(\n... {\"countDocuments\": {}},\n... collection_name=\"my_coll\",\n... )\n{'status': {'count': 123}}"}} +{"id": "astrapy.database.AsyncDatabase.get_database_admin", "text": "Return a DatabaseAdmin object corresponding to this database, for\nuse in admin tasks such as managing keyspaces.\n\nThis method, depending on the environment where the database resides,\nreturns an appropriate subclass of DatabaseAdmin.", "metadata": {"kind": "function", "name": "get_database_admin", "path": "astrapy.database.AsyncDatabase.get_database_admin", "parameters": [{"name": "token", "type": "str | TokenProvider | None", "description": "an access token with enough permission on the database to\nperform the desired tasks. If omitted (as it can generally be done),\nthe token of this Database is used.\nThis can be either a literal token string or a subclass of\n`astrapy.authentication.TokenProvider`.", "value": "None", "default": "None"}, {"name": "dev_ops_url", "type": "str | None", "description": "in case of custom deployments, this can be used to specify\nthe URL to the DevOps API, such as \"https://api.astra.datastax.com\".\nGenerally it can be omitted. The environment (prod/dev/...) is\ndetermined from the API Endpoint.\nNote that this parameter is allowed only for Astra DB environments.", "value": "None", "default": "None"}, {"name": "dev_ops_api_version", "type": "str | None", "description": "this can specify a custom version of the DevOps API\n(such as \"v2\"). Generally not needed.\nNote that this parameter is allowed only for Astra DB environments.", "value": "None", "default": "None"}], "returns": [{"type": "DatabaseAdmin", "description": "A DatabaseAdmin instance targeting this database. More precisely,"}, {"type": "DatabaseAdmin", "description": "for Astra DB an instance of `AstraDBDatabaseAdmin` is returned;"}, {"type": "DatabaseAdmin", "description": "for other environments, an instance of `DataAPIDatabaseAdmin` is returned."}], "example": ">>> my_db_admin = my_async_db.get_database_admin()\n>>> if \"new_keyspace\" not in my_db_admin.list_keyspaces():\n... my_db_admin.create_keyspace(\"new_keyspace\")\n>>> my_db_admin.list_keyspaces()\n['default_keyspace', 'new_keyspace']", "gathered_types": ["astrapy.authentication.TokenProvider", "astrapy.admin.DatabaseAdmin"]}} +{"id": "astrapy.client", "text": "", "metadata": {"kind": "module", "name": "client", "path": "astrapy.client", "imports": {"annotations": "__future__.annotations", "logging": "logging", "TYPE_CHECKING": "typing.TYPE_CHECKING", "Any": "typing.Any", "Sequence": "typing.Sequence", "deprecation": "deprecation", "__version__": "astrapy.__version__", "api_endpoint_parsing_error_message": "astrapy.admin.api_endpoint_parsing_error_message", "build_api_endpoint": "astrapy.admin.build_api_endpoint", "check_id_endpoint_parg_kwargs": "astrapy.admin.check_id_endpoint_parg_kwargs", "generic_api_url_parsing_error_message": "astrapy.admin.generic_api_url_parsing_error_message", "normalize_region_for_id": "astrapy.admin.normalize_region_for_id", "parse_api_endpoint": "astrapy.admin.parse_api_endpoint", "parse_generic_api_url": "astrapy.admin.parse_generic_api_url", "coerce_token_provider": "astrapy.authentication.coerce_token_provider", "redact_secret": "astrapy.authentication.redact_secret", "CallerType": "astrapy.constants.CallerType", "Environment": "astrapy.constants.Environment", "SET_CALLER_DEPRECATION_NOTICE": "astrapy.defaults.SET_CALLER_DEPRECATION_NOTICE", "check_caller_parameters": "astrapy.meta.check_caller_parameters", "check_deprecated_id_region": "astrapy.meta.check_deprecated_id_region", "check_namespace_keyspace": "astrapy.meta.check_namespace_keyspace", "AsyncDatabase": "astrapy.AsyncDatabase", "Database": "astrapy.Database", "AstraDBAdmin": "astrapy.admin.AstraDBAdmin", "TokenProvider": "astrapy.authentication.TokenProvider"}, "properties": {"is_init_module": false, "is_package": false, "is_subpackage": false, "is_namespace_package": false, "is_namespace_subpackage": false}}} +{"id": "astrapy.client.logger", "text": "", "metadata": {"kind": "attribute", "name": "logger", "path": "astrapy.client.logger", "value": "logger = logging.getLogger(__name__)", "gathered_types": ["__name__"]}} +{"id": "astrapy.client.DataAPIClient", "text": "A client for using the Data API. This is the main entry point and sits\nat the top of the conceptual \"client -> database -> collection\" hierarchy.\n\nA client is created first, optionally passing it a suitable Access Token.\nStarting from the client, then:\n - databases (Database and AsyncDatabase) are created for working with data\n - AstraDBAdmin objects can be created for admin-level work", "metadata": {"kind": "class", "name": "DataAPIClient", "path": "astrapy.client.DataAPIClient", "parameters": [{"name": "token", "type": "str | TokenProvider | None", "description": "an Access Token to the database. Example: `\"AstraCS:xyz...\"`.\nThis can be either a literal token string or a subclass of\n`astrapy.authentication.TokenProvider`.", "value": "None", "default": "None"}, {"name": "environment", "type": "str | None", "description": "a string representing the target Data API environment.\nIt can be left unspecified for the default value of `Environment.PROD`;\nother values include `Environment.OTHER`, `Environment.DSE`.", "value": "None", "default": "None"}, {"name": "callers", "type": "Sequence[CallerType]", "description": "a list of caller identities, i.e. applications, or frameworks,\non behalf of which Data API and DevOps API calls are performed.\nThese end up in the request user-agent.\nEach caller identity is a (\"caller_name\", \"caller_version\") pair.", "value": "[]", "default": "[]"}, {"name": "caller_name", "type": "str | None", "description": "*DEPRECATED*, use `callers`. Removal 2.0. Name of the\napplication, or framework, on behalf of which the Data API and\nDevOps API calls are performed. This ends up in the request user-agent.", "value": "None", "default": "None"}, {"name": "caller_version", "type": "str | None", "description": "version of the caller. *DEPRECATED*, use `callers`.\nRemoval 2.0.", "value": "None", "default": "None"}], "example": ">>> from astrapy import DataAPIClient\n>>> my_client = DataAPIClient(\"AstraCS:...\")\n>>> my_db0 = my_client.get_database(\n... \"https://01234567-....apps.astra.datastax.com\"\n... )\n>>> my_coll = my_db0.create_collection(\"movies\", dimension=2)\n>>> my_coll.insert_one({\"title\": \"The Title\", \"$vector\": [0.1, 0.3]})\n>>> my_db1 = my_client.get_database(\"01234567-...\")\n>>> my_db2 = my_client.get_database(\"01234567-...\", region=\"us-east1\")\n>>> my_adm0 = my_client.get_admin()\n>>> my_adm1 = my_client.get_admin(token=more_powerful_token_override)\n>>> database_list = my_adm0.list_databases()", "references": ["astrapy.client.DataAPIClient"], "gathered_types": ["astrapy.authentication.TokenProvider", "astrapy.constants.CallerType"]}} +{"id": "astrapy.client.DataAPIClient.token_provider", "text": "", "metadata": {"kind": "attribute", "name": "token_provider", "path": "astrapy.client.DataAPIClient.token_provider", "value": "token_provider = coerce_token_provider(token)"}} +{"id": "astrapy.client.DataAPIClient.environment", "text": "", "metadata": {"kind": "attribute", "name": "environment", "path": "astrapy.client.DataAPIClient.environment", "value": "environment = environment or Environment.PROD.lower()", "gathered_types": ["astrapy.constants.Environment", "PROD"]}} +{"id": "astrapy.client.DataAPIClient.callers", "text": "", "metadata": {"kind": "attribute", "name": "callers", "path": "astrapy.client.DataAPIClient.callers", "value": "callers = callers_param"}} +{"id": "astrapy.client.DataAPIClient.with_options", "text": "Create a clone of this DataAPIClient with some changed attributes.", "metadata": {"kind": "function", "name": "with_options", "path": "astrapy.client.DataAPIClient.with_options", "parameters": [{"name": "token", "type": "str | TokenProvider | None", "description": "an Access Token to the database. Example: `\"AstraCS:xyz...\"`.\nThis can be either a literal token string or a subclass of\n`astrapy.authentication.TokenProvider`.", "value": "None", "default": "None"}, {"name": "callers", "type": "Sequence[CallerType]", "description": "a list of caller identities, i.e. applications, or frameworks,\non behalf of which Data API and DevOps API calls are performed.\nThese end up in the request user-agent.\nEach caller identity is a (\"caller_name\", \"caller_version\") pair.", "value": "[]", "default": "[]"}, {"name": "caller_name", "type": "str | None", "description": "*DEPRECATED*, use `callers`. Removal 2.0. Name of the\napplication, or framework, on behalf of which the Data API and\nDevOps API calls are performed. This ends up in the request user-agent.", "value": "None", "default": "None"}, {"name": "caller_version", "type": "str | None", "description": "version of the caller. *DEPRECATED*, use `callers`.\nRemoval 2.0.", "value": "None", "default": "None"}], "returns": [{"type": "DataAPIClient", "description": "a new DataAPIClient instance."}], "example": ">>> another_client = my_client.with_options(\n... callers=[(\"caller_identity\", \"1.2.0\")],\n... )", "gathered_types": ["astrapy.authentication.TokenProvider", "astrapy.client.DataAPIClient", "astrapy.constants.CallerType"]}} +{"id": "astrapy.client.DataAPIClient.set_caller", "text": "Set a new identity for the application/framework on behalf of which\nthe API calls will be performed (the \"caller\").\n\nNew objects spawned from this client afterwards will inherit the new settings.", "metadata": {"kind": "function", "name": "set_caller", "path": "astrapy.client.DataAPIClient.set_caller", "parameters": [{"name": "caller_name", "type": "str | None", "description": "name of the application, or framework, on behalf of which\nthe API API calls are performed. This ends up in the request user-agent.", "value": "None", "default": "None"}, {"name": "caller_version", "type": "str | None", "description": "version of the caller.", "value": "None", "default": "None"}], "returns": [{"type": "None"}], "example": ">>> my_client.set_caller(caller_name=\"the_caller\", caller_version=\"0.1.0\")"}} +{"id": "astrapy.client.DataAPIClient.get_database", "text": "Get a Database object from this client, for doing data-related work.", "metadata": {"kind": "function", "name": "get_database", "path": "astrapy.client.DataAPIClient.get_database", "parameters": [{"name": "api_endpoint_or_id", "type": "str | None", "description": "positional parameter that can stand for both\n`api_endpoint` and `id`. Passing them together is an error.", "value": "None", "default": "None"}, {"name": "api_endpoint", "type": "str | None", "description": "the API Endpoint for the target database\n(e.g. `https://<ID>-<REGION>.apps.astra.datastax.com`).\nThe database must exist already for the resulting object\nto be effectively used; in other words, this invocation\ndoes not create the database, just the object instance.\nActual admin work can be achieved by using the AstraDBAdmin object.", "value": "None", "default": "None"}, {"name": "token", "type": "str | TokenProvider | None", "description": "if supplied, is passed to the Database instead of the client token.\nThis can be either a literal token string or a subclass of\n`astrapy.authentication.TokenProvider`.", "value": "None", "default": "None"}, {"name": "keyspace", "type": "str | None", "description": "if provided, it is passed to the Database; otherwise\nthe Database class will apply an environment-specific default.", "value": "None", "default": "None"}, {"name": "namespace", "type": "str | None", "description": "an alias for `keyspace`. *DEPRECATED*, removal in 2.0.", "value": "None", "default": "None"}, {"name": "id", "type": "str | None", "description": "the target database ID. This is alternative to using the API Endpoint.", "value": "None", "default": "None"}, {"name": "region", "type": "str | None", "description": "the region to use for connecting to the database. The\ndatabase must be located in that region. This parameter can be used\nonly if the database is specified by its ID (instead of API Endpoint).\nIf this parameter is not passed, and cannot be inferred\nfrom the API endpoint, an additional DevOps API request is made\nto determine the default region and use it subsequently.", "value": "None", "default": "None"}, {"name": "api_path", "type": "str | None", "description": "path to append to the API Endpoint. In typical usage, this\nshould be left to its default of \"/api/json\".", "value": "None", "default": "None"}, {"name": "api_version", "type": "str | None", "description": "version specifier to append to the API path. In typical\nusage, this should be left to its default of \"v1\".", "value": "None", "default": "None"}, {"name": "max_time_ms", "type": "int | None", "description": "a timeout, in milliseconds, for the DevOps API\nHTTP request should it be necessary (see the `region` argument).", "value": "None", "default": "None"}], "returns": [{"type": "Database", "description": "a Database object with which to work on Data API collections."}], "example": ">>> my_db0 = my_client.get_database(\"01234567-...\")\n>>> my_db1 = my_client.get_database(\n... \"https://01234567-...us-west1.apps.astra.datastax.com\",\n... )\n>>> my_db2 = my_client.get_database(\"01234567-...\", token=\"AstraCS:...\")\n>>> my_db3 = my_client.get_database(\"01234567-...\", region=\"us-west1\")\n>>> my_coll = my_db0.create_collection(\"movies\", dimension=2)\n>>> my_coll.insert_one({\"title\": \"The Title\", \"$vector\": [0.3, 0.4]})", "note": "This method does not perform any admin-level operation through\nthe DevOps API. For actual creation of a database, see the\n`create_database` method of class AstraDBAdmin.", "gathered_types": ["astrapy.authentication.TokenProvider", "astrapy.database.Database"]}} +{"id": "astrapy.client.DataAPIClient.get_async_database", "text": "Get an AsyncDatabase object from this client, for doing data-related work.", "metadata": {"kind": "function", "name": "get_async_database", "path": "astrapy.client.DataAPIClient.get_async_database", "parameters": [{"name": "api_endpoint_or_id", "type": "str | None", "description": "positional parameter that can stand for both\n`api_endpoint` and `id`. Passing them together is an error.", "value": "None", "default": "None"}, {"name": "api_endpoint", "type": "str | None", "description": "the API Endpoint for the target database\n(e.g. `https://<ID>-<REGION>.apps.astra.datastax.com`).\nThe database must exist already for the resulting object\nto be effectively used; in other words, this invocation\ndoes not create the database, just the object instance.\nActual admin work can be achieved by using the AstraDBAdmin object.", "value": "None", "default": "None"}, {"name": "token", "type": "str | TokenProvider | None", "description": "if supplied, is passed to the Database instead of the client token.\nThis can be either a literal token string or a subclass of\n`astrapy.authentication.TokenProvider`.", "value": "None", "default": "None"}, {"name": "keyspace", "type": "str | None", "description": "if provided, it is passed to the Database; otherwise\nthe Database class will apply an environment-specific default.", "value": "None", "default": "None"}, {"name": "namespace", "type": "str | None", "description": "an alias for `keyspace`. *DEPRECATED*, removal in 2.0.", "value": "None", "default": "None"}, {"name": "id", "type": "str | None", "description": "the target database ID. This is alternative to using the API Endpoint.", "value": "None", "default": "None"}, {"name": "region", "type": "str | None", "description": "the region to use for connecting to the database. The\ndatabase must be located in that region. This parameter can be used\nonly if the database is specified by its ID (instead of API Endpoint).\nIf this parameter is not passed, and cannot be inferred\nfrom the API endpoint, an additional DevOps API request is made\nto determine the default region and use it subsequently.", "value": "None", "default": "None"}, {"name": "api_path", "type": "str | None", "description": "path to append to the API Endpoint. In typical usage, this\nshould be left to its default of \"/api/json\".", "value": "None", "default": "None"}, {"name": "api_version", "type": "str | None", "description": "version specifier to append to the API path. In typical\nusage, this should be left to its default of \"v1\".", "value": "None", "default": "None"}, {"name": "max_time_ms", "type": "int | None", "description": "a timeout, in milliseconds, for the DevOps API\nHTTP request should it be necessary (see the `region` argument).", "value": "None", "default": "None"}], "returns": [{"type": "AsyncDatabase", "description": "a Database object with which to work on Data API collections."}], "example": ">>> async def create_use_db(cl: DataAPIClient, api_ep: str) -> None:\n... async_db = cl.get_async_database(api_ep)\n... my_a_coll = await async_db.create_collection(\"movies\", dimension=2)\n... await my_a_coll.insert_one({\"title\": \"The Title\", \"$vector\": [0.3, 0.4]})\n...\n>>> asyncio.run(\n... create_use_db(\n... my_client,\n... \"https://01234567-...us-west1.apps.astra.datastax.com\",\n... )\n... )", "note": "This method does not perform any admin-level operation through\nthe DevOps API. For actual creation of a database, see the\n`create_database` method of class AstraDBAdmin.", "gathered_types": ["astrapy.authentication.TokenProvider", "astrapy.database.AsyncDatabase"]}} +{"id": "astrapy.client.DataAPIClient.get_database_by_api_endpoint", "text": "Get a Database object from this client, for doing data-related work.\nThe Database is specified by an API Endpoint instead of the ID and a region.\n\nNote that using this method is generally equivalent to passing\nan API Endpoint as parameter to the `get_database` method (see).", "metadata": {"kind": "function", "name": "get_database_by_api_endpoint", "path": "astrapy.client.DataAPIClient.get_database_by_api_endpoint", "parameters": [{"name": "api_endpoint", "type": "str", "description": "the full \"API Endpoint\" string used to reach the Data API.\nExample: \"https://DATABASE_ID-REGION.apps.astra.datastax.com\"", "default": null}, {"name": "token", "type": "str | TokenProvider | None", "description": "if supplied, is passed to the Database instead of the client token.\nThis can be either a literal token string or a subclass of\n`astrapy.authentication.TokenProvider`.", "value": "None", "default": "None"}, {"name": "keyspace", "type": "str | None", "description": "if provided, it is passed to the Database; otherwise\nthe Database class will apply an environment-specific default.", "value": "None", "default": "None"}, {"name": "namespace", "type": "str | None", "description": "an alias for `keyspace`. *DEPRECATED*, removal in 2.0.", "value": "None", "default": "None"}, {"name": "api_path", "type": "str | None", "description": "path to append to the API Endpoint. In typical usage, this\nshould be left to its default of \"/api/json\".", "value": "None", "default": "None"}, {"name": "api_version", "type": "str | None", "description": "version specifier to append to the API path. In typical\nusage, this should be left to its default of \"v1\".", "value": "None", "default": "None"}], "returns": [{"type": "Database", "description": "a Database object with which to work on Data API collections."}], "example": ">>> my_db0 = my_client.get_database_by_api_endpoint(\"01234567-...\")\n>>> my_db1 = my_client.get_database_by_api_endpoint(\n... \"https://01234567-....apps.astra.datastax.com\",\n... token=\"AstraCS:...\",\n... )\n>>> my_db2 = my_client.get_database_by_api_endpoint(\n... \"https://01234567-....apps.astra.datastax.com\",\n... keyspace=\"the_other_keyspace\",\n... )\n>>> my_coll = my_db0.create_collection(\"movies\", dimension=2)\n>>> my_coll.insert_one({\"title\": \"The Title\", \"$vector\": [0.5, 0.6]})", "note": "This method does not perform any admin-level operation through\nthe DevOps API. For actual creation of a database, see the\n`create_database` method of class AstraDBAdmin.", "gathered_types": ["astrapy.authentication.TokenProvider", "astrapy.database.Database"]}} +{"id": "astrapy.client.DataAPIClient.get_async_database_by_api_endpoint", "text": "Get an AsyncDatabase object from this client, for doing data-related work.\nThe Database is specified by an API Endpoint instead of the ID and a region.\n\nNote that using this method is generally equivalent to passing\nan API Endpoint as parameter to the `get_async_database` method (see).\n\nThis method has identical behavior and signature as the sync\ncounterpart `get_database_by_api_endpoint`: please see that one\nfor more details.", "metadata": {"kind": "function", "name": "get_async_database_by_api_endpoint", "path": "astrapy.client.DataAPIClient.get_async_database_by_api_endpoint", "parameters": [{"name": "api_endpoint", "type": "str"}, {"name": "token", "default": "None", "type": "str | TokenProvider | None"}, {"name": "keyspace", "default": "None", "type": "str | None"}, {"name": "namespace", "default": "None", "type": "str | None"}, {"name": "api_path", "default": "None", "type": "str | None"}, {"name": "api_version", "default": "None", "type": "str | None"}], "returns": [{"type": "AsyncDatabase"}], "gathered_types": ["astrapy.authentication.TokenProvider", "astrapy.database.AsyncDatabase"]}} +{"id": "astrapy.client.DataAPIClient.get_admin", "text": "Get an AstraDBAdmin instance corresponding to this client, for\nadmin work such as managing databases.", "metadata": {"kind": "function", "name": "get_admin", "path": "astrapy.client.DataAPIClient.get_admin", "parameters": [{"name": "token", "type": "str | TokenProvider | None", "description": "if supplied, is passed to the Astra DB Admin instead of the\nclient token. This may be useful when switching to a more powerful,\nadmin-capable permission set.\nThis can be either a literal token string or a subclass of\n`astrapy.authentication.TokenProvider`.", "value": "None", "default": "None"}, {"name": "dev_ops_url", "type": "str | None", "description": "in case of custom deployments, this can be used to specify\nthe URL to the DevOps API, such as \"https://api.astra.datastax.com\".\nGenerally it can be omitted. The environment (prod/dev/...) is\ndetermined from the API Endpoint.", "value": "None", "default": "None"}, {"name": "dev_ops_api_version", "type": "str | None", "description": "this can specify a custom version of the DevOps API\n(such as \"v2\"). Generally not needed.", "value": "None", "default": "None"}], "returns": [{"type": "AstraDBAdmin", "description": "An AstraDBAdmin instance, wich which to perform management at the"}, {"type": "AstraDBAdmin", "description": "database level."}], "example": ">>> my_adm0 = my_client.get_admin()\n>>> my_adm1 = my_client.get_admin(token=more_powerful_token_override)\n>>> database_list = my_adm0.list_databases()\n>>> my_db_admin = my_adm0.create_database(\n... \"the_other_database\",\n... cloud_provider=\"AWS\",\n... region=\"eu-west-1\",\n... )\n>>> my_db_admin.list_keyspaces()\n['default_keyspace', 'that_other_one']", "gathered_types": ["astrapy.authentication.TokenProvider", "astrapy.admin.AstraDBAdmin"]}} +{"id": "astrapy.cursors", "text": "", "metadata": {"kind": "module", "name": "cursors", "path": "astrapy.cursors", "imports": {"annotations": "__future__.annotations", "hashlib": "hashlib", "json": "json", "logging": "logging", "time": "time", "warnings": "warnings", "AsyncIterator": "collections.abc.AsyncIterator", "Enum": "enum.Enum", "TYPE_CHECKING": "typing.TYPE_CHECKING", "Any": "typing.Any", "Callable": "typing.Callable", "Generic": "typing.Generic", "Iterable": "typing.Iterable", "Iterator": "typing.Iterator", "Optional": "typing.Optional", "Tuple": "typing.Tuple", "TypeVar": "typing.TypeVar", "deprecation": "deprecation", "DocumentType": "astrapy.constants.DocumentType", "ProjectionType": "astrapy.constants.ProjectionType", "normalize_optional_projection": "astrapy.constants.normalize_optional_projection", "CursorIsStartedException": "astrapy.exceptions.CursorIsStartedException", "DataAPIFaultyResponseException": "astrapy.exceptions.DataAPIFaultyResponseException", "DataAPITimeoutException": "astrapy.exceptions.DataAPITimeoutException", "normalize_payload_value": "astrapy.transform_payload.normalize_payload_value", "AsyncCollection": "astrapy.collection.AsyncCollection", "Collection": "astrapy.collection.Collection"}, "properties": {"is_init_module": false, "is_package": false, "is_subpackage": false, "is_namespace_package": false, "is_namespace_subpackage": false}}} +{"id": "astrapy.cursors.logger", "text": "", "metadata": {"kind": "attribute", "name": "logger", "path": "astrapy.cursors.logger", "value": "logger = logging.getLogger(__name__)", "gathered_types": ["__name__"]}} +{"id": "astrapy.cursors.BC", "text": "", "metadata": {"kind": "attribute", "name": "BC", "path": "astrapy.cursors.BC", "value": "BC = TypeVar('BC', bound='BaseCursor')"}} +{"id": "astrapy.cursors.T", "text": "", "metadata": {"kind": "attribute", "name": "T", "path": "astrapy.cursors.T", "value": "T = TypeVar('T')"}} +{"id": "astrapy.cursors.IndexPairType", "text": "", "metadata": {"kind": "attribute", "name": "IndexPairType", "path": "astrapy.cursors.IndexPairType", "value": "IndexPairType = Tuple[str, Optional[int]]"}} +{"id": "astrapy.cursors.CursorState", "text": "", "metadata": {"kind": "class", "name": "CursorState", "path": "astrapy.cursors.CursorState", "bases": ["Enum"], "gathered_types": ["Enum"]}} +{"id": "astrapy.cursors.CursorState.IDLE", "text": "", "metadata": {"kind": "attribute", "name": "IDLE", "path": "astrapy.cursors.CursorState.IDLE", "value": "IDLE = 'idle'"}} +{"id": "astrapy.cursors.CursorState.STARTED", "text": "", "metadata": {"kind": "attribute", "name": "STARTED", "path": "astrapy.cursors.CursorState.STARTED", "value": "STARTED = 'started'"}} +{"id": "astrapy.cursors.CursorState.CLOSED", "text": "", "metadata": {"kind": "attribute", "name": "CLOSED", "path": "astrapy.cursors.CursorState.CLOSED", "value": "CLOSED = 'closed'"}} +{"id": "astrapy.cursors._LookAheadIterator.iterator", "text": "", "metadata": {"kind": "attribute", "name": "iterator", "path": "astrapy.cursors._LookAheadIterator.iterator", "value": "iterator = iterator"}} +{"id": "astrapy.cursors._LookAheadIterator.preread_item", "text": "", "metadata": {"kind": "attribute", "name": "preread_item", "path": "astrapy.cursors._LookAheadIterator.preread_item", "value": "preread_item: DocumentType | None = None", "gathered_types": ["astrapy.constants.DocumentType"]}} +{"id": "astrapy.cursors._LookAheadIterator.has_preread", "text": "", "metadata": {"kind": "attribute", "name": "has_preread", "path": "astrapy.cursors._LookAheadIterator.has_preread", "value": "has_preread = False"}} +{"id": "astrapy.cursors._LookAheadIterator.preread_exhausted", "text": "", "metadata": {"kind": "attribute", "name": "preread_exhausted", "path": "astrapy.cursors._LookAheadIterator.preread_exhausted", "value": "preread_exhausted = False"}} +{"id": "astrapy.cursors._LookAheadIterator.preread", "text": "", "metadata": {"kind": "function", "name": "preread", "path": "astrapy.cursors._LookAheadIterator.preread", "returns": [{"type": "None"}]}} +{"id": "astrapy.cursors._AsyncLookAheadIterator.async_iterator", "text": "", "metadata": {"kind": "attribute", "name": "async_iterator", "path": "astrapy.cursors._AsyncLookAheadIterator.async_iterator", "value": "async_iterator = async_iterator"}} +{"id": "astrapy.cursors._AsyncLookAheadIterator.preread_item", "text": "", "metadata": {"kind": "attribute", "name": "preread_item", "path": "astrapy.cursors._AsyncLookAheadIterator.preread_item", "value": "preread_item: DocumentType | None = None", "gathered_types": ["astrapy.constants.DocumentType"]}} +{"id": "astrapy.cursors._AsyncLookAheadIterator.has_preread", "text": "", "metadata": {"kind": "attribute", "name": "has_preread", "path": "astrapy.cursors._AsyncLookAheadIterator.has_preread", "value": "has_preread = False"}} +{"id": "astrapy.cursors._AsyncLookAheadIterator.preread_exhausted", "text": "", "metadata": {"kind": "attribute", "name": "preread_exhausted", "path": "astrapy.cursors._AsyncLookAheadIterator.preread_exhausted", "value": "preread_exhausted = False"}} +{"id": "astrapy.cursors._AsyncLookAheadIterator.preread", "text": "", "metadata": {"kind": "function", "name": "preread", "path": "astrapy.cursors._AsyncLookAheadIterator.preread", "returns": [{"type": "None"}]}} +{"id": "astrapy.cursors.BaseCursor", "text": "Represents a generic Cursor over query results, regardless of whether\nsynchronous or asynchronous. It cannot be instantiated.\n\nSee classes Cursor and AsyncCursor for more information.", "metadata": {"kind": "class", "name": "BaseCursor", "path": "astrapy.cursors.BaseCursor", "parameters": [{"name": "collection", "type": "Collection | AsyncCollection"}, {"name": "filter", "type": "dict[str, Any] | None"}, {"name": "projection", "type": "ProjectionType | None"}, {"name": "max_time_ms", "type": "int | None"}, {"name": "overall_max_time_ms", "type": "int | None"}], "gathered_types": ["astrapy.constants.ProjectionType", "astrapy.collection.AsyncCollection"]}} +{"id": "astrapy.cursors.BaseCursor.state", "text": "The current state of this cursor, which can be one of\nthe astrapy.cursors.CursorState enum.", "metadata": {"kind": "attribute", "name": "state", "path": "astrapy.cursors.BaseCursor.state", "value": "state: str"}} +{"id": "astrapy.cursors.BaseCursor.address", "text": "The API endpoint used by this cursor when issuing\nrequests to the database.", "metadata": {"kind": "attribute", "name": "address", "path": "astrapy.cursors.BaseCursor.address", "value": "address: str"}} +{"id": "astrapy.cursors.BaseCursor.alive", "text": "Whether the cursor has the potential to yield more data.", "metadata": {"kind": "attribute", "name": "alive", "path": "astrapy.cursors.BaseCursor.alive", "value": "alive: bool"}} +{"id": "astrapy.cursors.BaseCursor.clone", "text": "Clone the cursor into a new, fresh one.", "metadata": {"kind": "function", "name": "clone", "path": "astrapy.cursors.BaseCursor.clone", "returns": [{"type": "BC", "description": "a copy of this cursor, reset to its pristine state,"}, {"type": "BC", "description": "i.e. fully un-consumed."}], "gathered_types": ["BC"]}} +{"id": "astrapy.cursors.BaseCursor.close", "text": "Stop/kill the cursor, regardless of its status.", "metadata": {"kind": "function", "name": "close", "path": "astrapy.cursors.BaseCursor.close", "returns": [{"type": "None"}]}} +{"id": "astrapy.cursors.BaseCursor.cursor_id", "text": "An integer uniquely identifying this cursor.", "metadata": {"kind": "attribute", "name": "cursor_id", "path": "astrapy.cursors.BaseCursor.cursor_id", "value": "cursor_id: int"}} +{"id": "astrapy.cursors.BaseCursor.limit", "text": "Set a new `limit` value for this cursor.", "metadata": {"kind": "function", "name": "limit", "path": "astrapy.cursors.BaseCursor.limit", "parameters": [{"name": "limit", "type": "int | None", "description": "the new value to set", "default": null}], "returns": [{"type": "BC", "description": "this cursor itself."}], "gathered_types": ["BC"]}} +{"id": "astrapy.cursors.BaseCursor.include_similarity", "text": "Set a new `include_similarity` value for this cursor.", "metadata": {"kind": "function", "name": "include_similarity", "path": "astrapy.cursors.BaseCursor.include_similarity", "parameters": [{"name": "include_similarity", "type": "bool | None", "description": "the new value to set", "default": null}], "returns": [{"type": "BC", "description": "this cursor itself."}], "gathered_types": ["BC"]}} +{"id": "astrapy.cursors.BaseCursor.include_sort_vector", "text": "Set a new `include_sort_vector` value for this cursor.", "metadata": {"kind": "function", "name": "include_sort_vector", "path": "astrapy.cursors.BaseCursor.include_sort_vector", "parameters": [{"name": "include_sort_vector", "type": "bool | None", "description": "the new value to set", "default": null}], "returns": [{"type": "BC", "description": "this cursor itself."}], "gathered_types": ["BC"]}} +{"id": "astrapy.cursors.BaseCursor.retrieved", "text": "The number of documents consumed so far (by the code consuming the cursor).", "metadata": {"kind": "attribute", "name": "retrieved", "path": "astrapy.cursors.BaseCursor.retrieved", "value": "retrieved: int"}} +{"id": "astrapy.cursors.BaseCursor.consumed", "text": "The number of documents consumed so far (by the code consuming the cursor).", "metadata": {"kind": "attribute", "name": "consumed", "path": "astrapy.cursors.BaseCursor.consumed", "value": "consumed: int"}} +{"id": "astrapy.cursors.BaseCursor.rewind", "text": "Reset the cursor to its pristine state, i.e. fully unconsumed.", "metadata": {"kind": "function", "name": "rewind", "path": "astrapy.cursors.BaseCursor.rewind", "returns": [{"type": "BC", "description": "this cursor itself."}], "gathered_types": ["BC"]}} +{"id": "astrapy.cursors.BaseCursor.skip", "text": "Set a new `skip` value for this cursor.", "metadata": {"kind": "function", "name": "skip", "path": "astrapy.cursors.BaseCursor.skip", "parameters": [{"name": "skip", "type": "int | None", "description": "the new value to set", "default": null}], "returns": [{"type": "BC", "description": "this cursor itself."}], "note": "This parameter can be used only in conjunction with an explicit\n`sort` criterion of the ascending/descending type (i.e. it cannot\nbe used when not sorting, nor with vector-based ANN search).", "gathered_types": ["BC"]}} +{"id": "astrapy.cursors.BaseCursor.sort", "text": "Set a new `sort` value for this cursor.", "metadata": {"kind": "function", "name": "sort", "path": "astrapy.cursors.BaseCursor.sort", "parameters": [{"name": "sort", "type": "dict[str, Any] | None", "description": "the new sorting prescription to set", "default": null}], "returns": [{"type": "BC", "description": "this cursor itself."}], "note": "Some combinations of arguments impose an implicit upper bound on the\nnumber of documents that are returned by the Data API. More specifically:\n(a) Vector ANN searches cannot return more than a number of documents\nthat at the time of writing is set to 1000 items.\n(b) When using a sort criterion of the ascending/descending type,\nthe Data API will return a smaller number of documents, set to 20\nat the time of writing, and stop there. The returned documents are\nthe top results across the whole collection according to the requested\ncriterion.\nThese provisions should be kept in mind even when subsequently running\na command such as `.distinct()` on a cursor.", "gathered_types": ["BC"]}} +{"id": "astrapy.cursors.Cursor", "text": "Represents a (synchronous) cursor over documents in a collection.\nA cursor is iterated over, e.g. with a for loop, and keeps track of\nits progress.\n\nGenerally cursors are not supposed to be instantiated directly,\nrather they are obtained by invoking the `find` method on a collection.", "metadata": {"kind": "class", "name": "Cursor", "path": "astrapy.cursors.Cursor", "parameters": [{"name": "collection", "type": "Collection"}, {"name": "filter", "type": "dict[str, Any] | None"}, {"name": "projection", "type": "ProjectionType | None"}, {"name": "max_time_ms", "type": "int | None"}, {"name": "overall_max_time_ms", "type": "int | None"}], "bases": ["BaseCursor"], "attributes": [{"name": "collection", "type": "Collection", "description": "the collection to find documents in\nfilter: a predicate expressed as a dictionary according to the\n Data API filter syntax. Examples are:\n {}\n {\"name\": \"John\"}\n {\"price\": {\"$le\": 100}}\n {\"$and\": [{\"name\": \"John\"}, {\"price\": {\"$le\": 100}}]}\n See the Data API documentation for the full set of operators.\nprojection: used to select a subset of fields in the document being\n returned. The projection can be: an iterable over the field names\n to return; a dictionary {field_name: True} to positively select\n certain fields; or a dictionary {field_name: False} if one wants\n to discard some fields from the response.\n The default is to return the whole documents.\nmax_time_ms: a timeout, in milliseconds, for each single one\n of the underlying HTTP requests used to fetch documents as the\n cursor is iterated over."}], "note": "When not specifying sorting criteria at all (by vector or otherwise),\nthe cursor can scroll through an arbitrary number of documents as\nthe Data API and the client periodically exchange new chunks of documents.\nIt should be noted that the behavior of the cursor in the case documents\nhave been added/removed after the cursor was started depends on database\ninternals and it is not guaranteed, nor excluded, that such \"real-time\"\nchanges in the data would be picked up by the cursor.", "gathered_types": ["astrapy.constants.ProjectionType", "BaseCursor"]}} +{"id": "astrapy.cursors.Cursor.get_sort_vector", "text": "Return the vector used in this ANN search, if applicable.\nIf this is not an ANN search, or it was invoked without the\n`include_sort_vector` parameter, return None.\n\nInvoking this method on a pristine cursor will trigger an API call\nto get the first page of results.", "metadata": {"kind": "function", "name": "get_sort_vector", "path": "astrapy.cursors.Cursor.get_sort_vector", "returns": [{"type": "list[float] | None"}]}} +{"id": "astrapy.cursors.Cursor.collection", "text": "The (synchronous) collection this cursor is targeting.", "metadata": {"kind": "attribute", "name": "collection", "path": "astrapy.cursors.Cursor.collection", "value": "collection: Collection"}} +{"id": "astrapy.cursors.Cursor.data_source", "text": "The (synchronous) collection this cursor is targeting.", "metadata": {"kind": "attribute", "name": "data_source", "path": "astrapy.cursors.Cursor.data_source", "value": "data_source: Collection"}} +{"id": "astrapy.cursors.Cursor.distinct", "text": "Compute a list of unique values for a specific field across all\ndocuments the cursor iterates through.\n\nInvoking this method has no effect on the cursor state, i.e.\nthe position of the cursor is unchanged.", "metadata": {"kind": "function", "name": "distinct", "path": "astrapy.cursors.Cursor.distinct", "parameters": [{"name": "key", "type": "str", "description": "the name of the field whose value is inspected across documents.\nKeys can use dot-notation to descend to deeper document levels.\nExample of acceptable `key` values:\n \"field\"\n \"field.subfield\"\n \"field.3\"\n \"field.3.subfield\"\nif lists are encountered and no numeric index is specified,\nall items in the list are visited.", "default": null}, {"name": "max_time_ms", "type": "int | None", "description": "a timeout, in milliseconds, for the operation.", "value": "None", "default": "None"}], "returns": [{"type": "list[Any]"}], "note": "this operation works at client-side by scrolling through all\ndocuments matching the cursor parameters (such as `filter`).\nPlease be aware of this fact, especially for a very large\namount of documents, for this may have implications on latency,\nnetwork traffic and possibly billing."}} +{"id": "astrapy.cursors.AsyncCursor", "text": "Represents a (asynchronous) cursor over documents in a collection.\nAn asynchronous cursor is iterated over, e.g. with a for loop,\nand keeps track of its progress.\n\nGenerally cursors are not supposed to be instantiated directly,\nrather they are obtained by invoking the `find` method on a collection.", "metadata": {"kind": "class", "name": "AsyncCursor", "path": "astrapy.cursors.AsyncCursor", "parameters": [{"name": "collection", "type": "AsyncCollection"}, {"name": "filter", "type": "dict[str, Any] | None"}, {"name": "projection", "type": "ProjectionType | None"}, {"name": "max_time_ms", "type": "int | None"}, {"name": "overall_max_time_ms", "type": "int | None"}], "bases": ["BaseCursor"], "attributes": [{"name": "collection", "type": "AsyncCollection", "description": "the collection to find documents in\nfilter: a predicate expressed as a dictionary according to the\n Data API filter syntax. Examples are:\n {}\n {\"name\": \"John\"}\n {\"price\": {\"$le\": 100}}\n {\"$and\": [{\"name\": \"John\"}, {\"price\": {\"$le\": 100}}]}\n See the Data API documentation for the full set of operators.\nprojection: used to select a subset of fields in the document being\n returned. The projection can be: an iterable over the field names\n to return; a dictionary {field_name: True} to positively select\n certain fields; or a dictionary {field_name: False} if one wants\n to discard some fields from the response.\n The default is to return the whole documents.\nmax_time_ms: a timeout, in milliseconds, for each single one\n of the underlying HTTP requests used to fetch documents as the\n cursor is iterated over."}], "note": "When not specifying sorting criteria at all (by vector or otherwise),\nthe cursor can scroll through an arbitrary number of documents as\nthe Data API and the client periodically exchange new chunks of documents.\nIt should be noted that the behavior of the cursor in the case documents\nhave been added/removed after the cursor was started depends on database\ninternals and it is not guaranteed, nor excluded, that such \"real-time\"\nchanges in the data would be picked up by the cursor.", "gathered_types": ["astrapy.constants.ProjectionType", "astrapy.collection.AsyncCollection", "BaseCursor"]}} +{"id": "astrapy.cursors.AsyncCursor.get_sort_vector", "text": "Return the vector used in this ANN search, if applicable.\nIf this is not an ANN search, or it was invoked without the\n`include_sort_vector` parameter, return None.\n\nInvoking this method on a pristine cursor will trigger an API call\nto get the first page of results.", "metadata": {"kind": "function", "name": "get_sort_vector", "path": "astrapy.cursors.AsyncCursor.get_sort_vector", "returns": [{"type": "list[float] | None"}]}} +{"id": "astrapy.cursors.AsyncCursor.collection", "text": "The (asynchronous) collection this cursor is targeting.", "metadata": {"kind": "attribute", "name": "collection", "path": "astrapy.cursors.AsyncCursor.collection", "value": "collection: AsyncCollection", "gathered_types": ["astrapy.collection.AsyncCollection"]}} +{"id": "astrapy.cursors.AsyncCursor.data_source", "text": "The (asynchronous) collection this cursor is targeting.", "metadata": {"kind": "attribute", "name": "data_source", "path": "astrapy.cursors.AsyncCursor.data_source", "value": "data_source: AsyncCollection", "gathered_types": ["astrapy.collection.AsyncCollection"]}} +{"id": "astrapy.cursors.AsyncCursor.distinct", "text": "Compute a list of unique values for a specific field across all\ndocuments the cursor iterates through.\n\nInvoking this method has no effect on the cursor state, i.e.\nthe position of the cursor is unchanged.", "metadata": {"kind": "function", "name": "distinct", "path": "astrapy.cursors.AsyncCursor.distinct", "parameters": [{"name": "key", "type": "str", "description": "the name of the field whose value is inspected across documents.\nKeys can use dot-notation to descend to deeper document levels.\nExample of acceptable `key` values:\n \"field\"\n \"field.subfield\"\n \"field.3\"\n \"field.3.subfield\"\nif lists are encountered and no numeric index is specified,\nall items in the list are visited.", "default": null}, {"name": "max_time_ms", "type": "int | None", "description": "a timeout, in milliseconds, for the operation.", "value": "None", "default": "None"}], "returns": [{"type": "list[Any]"}], "note": "this operation works at client-side by scrolling through all\ndocuments matching the cursor parameters (such as `filter`).\nPlease be aware of this fact, especially for a very large\namount of documents, for this may have implications on latency,\nnetwork traffic and possibly billing."}} +{"id": "astrapy.cursors.CommandCursor", "text": "A (synchronous) cursor over the results of a Data API command\n(as opposed to a cursor over data as one would get with a `find` method).\n\nCommand cursors are iterated over, e.g. with a for loop.\n\nGenerally command cursors are not supposed to be instantiated directly,\nrather they are obtained by invoking methods on a collection/database\n(such as the database `list_collections` method).", "metadata": {"kind": "class", "name": "CommandCursor", "path": "astrapy.cursors.CommandCursor", "parameters": [{"name": "address", "type": "str"}, {"name": "items", "type": "list[T]"}], "bases": ["Generic[T]"]}} +{"id": "astrapy.cursors.CommandCursor.items", "text": "", "metadata": {"kind": "attribute", "name": "items", "path": "astrapy.cursors.CommandCursor.items", "value": "items = items"}} +{"id": "astrapy.cursors.CommandCursor.iterable", "text": "", "metadata": {"kind": "attribute", "name": "iterable", "path": "astrapy.cursors.CommandCursor.iterable", "value": "iterable = items.__iter__()", "gathered_types": ["__iter__"]}} +{"id": "astrapy.cursors.CommandCursor.state", "text": "The current state of this cursor, which can be:\n - \"alive\": the cursor has still the potential to return items.\n - \"exhausted\": the cursor has finished and won't return documents", "metadata": {"kind": "attribute", "name": "state", "path": "astrapy.cursors.CommandCursor.state", "value": "state: str"}} +{"id": "astrapy.cursors.CommandCursor.address", "text": "The API endpoint used by this cursor when issuing\nrequests to the database.", "metadata": {"kind": "attribute", "name": "address", "path": "astrapy.cursors.CommandCursor.address", "value": "address: str"}} +{"id": "astrapy.cursors.CommandCursor.alive", "text": "Whether the cursor has the potential to yield more data.", "metadata": {"kind": "attribute", "name": "alive", "path": "astrapy.cursors.CommandCursor.alive", "value": "alive: bool"}} +{"id": "astrapy.cursors.CommandCursor.cursor_id", "text": "An integer uniquely identifying this cursor.", "metadata": {"kind": "attribute", "name": "cursor_id", "path": "astrapy.cursors.CommandCursor.cursor_id", "value": "cursor_id: int"}} +{"id": "astrapy.cursors.CommandCursor.close", "text": "Stop/kill the cursor, regardless of its status.", "metadata": {"kind": "function", "name": "close", "path": "astrapy.cursors.CommandCursor.close", "returns": [{"type": "None"}]}} +{"id": "astrapy.cursors.AsyncCommandCursor", "text": "A (asynchronous) cursor over the results of a Data API command\n(as opposed to a cursor over data as one would get with a `find` method).\n\nAsynchronous command cursors are iterated over, e.g. with an async for loop.\n\nGenerally command cursors are not supposed to be instantiated directly,\nrather they are obtained by invoking methods on a collection/database\n(such as the database `list_collections` method).", "metadata": {"kind": "class", "name": "AsyncCommandCursor", "path": "astrapy.cursors.AsyncCommandCursor", "parameters": [{"name": "address", "type": "str"}, {"name": "items", "type": "list[T]"}], "bases": ["Generic[T]"]}} +{"id": "astrapy.cursors.AsyncCommandCursor.items", "text": "", "metadata": {"kind": "attribute", "name": "items", "path": "astrapy.cursors.AsyncCommandCursor.items", "value": "items = items"}} +{"id": "astrapy.cursors.AsyncCommandCursor.iterable", "text": "", "metadata": {"kind": "attribute", "name": "iterable", "path": "astrapy.cursors.AsyncCommandCursor.iterable", "value": "iterable = items.__iter__()", "gathered_types": ["__iter__"]}} +{"id": "astrapy.cursors.AsyncCommandCursor.state", "text": "The current state of this cursor, which can be:\n - \"alive\": the cursor has still the potential to return items.\n - \"exhausted\": the cursor has finished and won't return documents", "metadata": {"kind": "attribute", "name": "state", "path": "astrapy.cursors.AsyncCommandCursor.state", "value": "state: str"}} +{"id": "astrapy.cursors.AsyncCommandCursor.address", "text": "The API endpoint used by this cursor when issuing\nrequests to the database.", "metadata": {"kind": "attribute", "name": "address", "path": "astrapy.cursors.AsyncCommandCursor.address", "value": "address: str"}} +{"id": "astrapy.cursors.AsyncCommandCursor.alive", "text": "Whether the cursor has the potential to yield more data.", "metadata": {"kind": "attribute", "name": "alive", "path": "astrapy.cursors.AsyncCommandCursor.alive", "value": "alive: bool"}} +{"id": "astrapy.cursors.AsyncCommandCursor.cursor_id", "text": "An integer uniquely identifying this cursor.", "metadata": {"kind": "attribute", "name": "cursor_id", "path": "astrapy.cursors.AsyncCommandCursor.cursor_id", "value": "cursor_id: int"}} +{"id": "astrapy.cursors.AsyncCommandCursor.close", "text": "Stop/kill the cursor, regardless of its status.", "metadata": {"kind": "function", "name": "close", "path": "astrapy.cursors.AsyncCommandCursor.close", "returns": [{"type": "None"}]}} +{"id": "astrapy.constants", "text": "", "metadata": {"kind": "module", "name": "constants", "path": "astrapy.constants", "imports": {"annotations": "__future__.annotations", "Any": "typing.Any", "Dict": "typing.Dict", "Iterable": "typing.Iterable", "Tuple": "typing.Tuple", "Union": "typing.Union", "DATA_API_ENVIRONMENT_CASSANDRA": "astrapy.defaults.DATA_API_ENVIRONMENT_CASSANDRA", "DATA_API_ENVIRONMENT_DEV": "astrapy.defaults.DATA_API_ENVIRONMENT_DEV", "DATA_API_ENVIRONMENT_DSE": "astrapy.defaults.DATA_API_ENVIRONMENT_DSE", "DATA_API_ENVIRONMENT_HCD": "astrapy.defaults.DATA_API_ENVIRONMENT_HCD", "DATA_API_ENVIRONMENT_OTHER": "astrapy.defaults.DATA_API_ENVIRONMENT_OTHER", "DATA_API_ENVIRONMENT_PROD": "astrapy.defaults.DATA_API_ENVIRONMENT_PROD", "DATA_API_ENVIRONMENT_TEST": "astrapy.defaults.DATA_API_ENVIRONMENT_TEST"}, "properties": {"is_init_module": false, "is_package": false, "is_subpackage": false, "is_namespace_package": false, "is_namespace_subpackage": false}}} +{"id": "astrapy.constants.DocumentType", "text": "", "metadata": {"kind": "attribute", "name": "DocumentType", "path": "astrapy.constants.DocumentType", "value": "DocumentType = Dict[str, Any]"}} +{"id": "astrapy.constants.ProjectionType", "text": "", "metadata": {"kind": "attribute", "name": "ProjectionType", "path": "astrapy.constants.ProjectionType", "value": "ProjectionType = Union[Iterable[str], Dict[str, Union[bool, Dict[str, Union[int, Iterable[int]]]]]]"}} +{"id": "astrapy.constants.SortType", "text": "", "metadata": {"kind": "attribute", "name": "SortType", "path": "astrapy.constants.SortType", "value": "SortType = Dict[str, Any]"}} +{"id": "astrapy.constants.FilterType", "text": "", "metadata": {"kind": "attribute", "name": "FilterType", "path": "astrapy.constants.FilterType", "value": "FilterType = Dict[str, Any]"}} +{"id": "astrapy.constants.VectorType", "text": "", "metadata": {"kind": "attribute", "name": "VectorType", "path": "astrapy.constants.VectorType", "value": "VectorType = Iterable[float]"}} +{"id": "astrapy.constants.CallerType", "text": "", "metadata": {"kind": "attribute", "name": "CallerType", "path": "astrapy.constants.CallerType", "value": "CallerType = Tuple[Union[str, None], Union[str, None]]"}} +{"id": "astrapy.constants.normalize_optional_projection", "text": "", "metadata": {"kind": "function", "name": "normalize_optional_projection", "path": "astrapy.constants.normalize_optional_projection", "parameters": [{"name": "projection", "type": "ProjectionType | None"}], "returns": [{"type": "dict[str, bool | dict[str, int | Iterable[int]]] | None"}], "gathered_types": ["astrapy.constants.ProjectionType"]}} +{"id": "astrapy.constants.ReturnDocument", "text": "Admitted values for the `return_document` parameter in\n`find_one_and_replace` and `find_one_and_update` collection\nmethods.", "metadata": {"kind": "class", "name": "ReturnDocument", "path": "astrapy.constants.ReturnDocument"}} +{"id": "astrapy.constants.ReturnDocument.BEFORE", "text": "", "metadata": {"kind": "attribute", "name": "BEFORE", "path": "astrapy.constants.ReturnDocument.BEFORE", "value": "BEFORE = 'before'"}} +{"id": "astrapy.constants.ReturnDocument.AFTER", "text": "", "metadata": {"kind": "attribute", "name": "AFTER", "path": "astrapy.constants.ReturnDocument.AFTER", "value": "AFTER = 'after'"}} +{"id": "astrapy.constants.SortDocuments", "text": "Admitted values for the `sort` parameter in the find collection methods,\ne.g. `sort={\"field\": SortDocuments.ASCENDING}`.", "metadata": {"kind": "class", "name": "SortDocuments", "path": "astrapy.constants.SortDocuments"}} +{"id": "astrapy.constants.SortDocuments.ASCENDING", "text": "", "metadata": {"kind": "attribute", "name": "ASCENDING", "path": "astrapy.constants.SortDocuments.ASCENDING", "value": "ASCENDING = 1"}} +{"id": "astrapy.constants.SortDocuments.DESCENDING", "text": "", "metadata": {"kind": "attribute", "name": "DESCENDING", "path": "astrapy.constants.SortDocuments.DESCENDING", "value": "DESCENDING = -1"}} +{"id": "astrapy.constants.VectorMetric", "text": "Admitted values for the \"metric\" parameter when creating vector collections\nthrough the database `create_collection` method.", "metadata": {"kind": "class", "name": "VectorMetric", "path": "astrapy.constants.VectorMetric"}} +{"id": "astrapy.constants.VectorMetric.DOT_PRODUCT", "text": "", "metadata": {"kind": "attribute", "name": "DOT_PRODUCT", "path": "astrapy.constants.VectorMetric.DOT_PRODUCT", "value": "DOT_PRODUCT = 'dot_product'"}} +{"id": "astrapy.constants.VectorMetric.EUCLIDEAN", "text": "", "metadata": {"kind": "attribute", "name": "EUCLIDEAN", "path": "astrapy.constants.VectorMetric.EUCLIDEAN", "value": "EUCLIDEAN = 'euclidean'"}} +{"id": "astrapy.constants.VectorMetric.COSINE", "text": "", "metadata": {"kind": "attribute", "name": "COSINE", "path": "astrapy.constants.VectorMetric.COSINE", "value": "COSINE = 'cosine'"}} +{"id": "astrapy.constants.DefaultIdType", "text": "Admitted values for the \"default_id_type\" parameter when creating collections\nthrough the database `create_collection` method.", "metadata": {"kind": "class", "name": "DefaultIdType", "path": "astrapy.constants.DefaultIdType"}} +{"id": "astrapy.constants.DefaultIdType.UUID", "text": "", "metadata": {"kind": "attribute", "name": "UUID", "path": "astrapy.constants.DefaultIdType.UUID", "value": "UUID = 'uuid'"}} +{"id": "astrapy.constants.DefaultIdType.OBJECTID", "text": "", "metadata": {"kind": "attribute", "name": "OBJECTID", "path": "astrapy.constants.DefaultIdType.OBJECTID", "value": "OBJECTID = 'objectId'"}} +{"id": "astrapy.constants.DefaultIdType.UUIDV6", "text": "", "metadata": {"kind": "attribute", "name": "UUIDV6", "path": "astrapy.constants.DefaultIdType.UUIDV6", "value": "UUIDV6 = 'uuidv6'"}} +{"id": "astrapy.constants.DefaultIdType.UUIDV7", "text": "", "metadata": {"kind": "attribute", "name": "UUIDV7", "path": "astrapy.constants.DefaultIdType.UUIDV7", "value": "UUIDV7 = 'uuidv7'"}} +{"id": "astrapy.constants.DefaultIdType.DEFAULT", "text": "", "metadata": {"kind": "attribute", "name": "DEFAULT", "path": "astrapy.constants.DefaultIdType.DEFAULT", "value": "DEFAULT = 'uuid'"}} +{"id": "astrapy.constants.Environment", "text": "Admitted values for `environment` property,\ndenoting the targeted API deployment type.", "metadata": {"kind": "class", "name": "Environment", "path": "astrapy.constants.Environment"}} +{"id": "astrapy.constants.Environment.PROD", "text": "", "metadata": {"kind": "attribute", "name": "PROD", "path": "astrapy.constants.Environment.PROD", "value": "PROD = DATA_API_ENVIRONMENT_PROD", "gathered_types": ["astrapy.defaults.DATA_API_ENVIRONMENT_PROD"]}} +{"id": "astrapy.constants.Environment.DEV", "text": "", "metadata": {"kind": "attribute", "name": "DEV", "path": "astrapy.constants.Environment.DEV", "value": "DEV = DATA_API_ENVIRONMENT_DEV", "gathered_types": ["astrapy.defaults.DATA_API_ENVIRONMENT_DEV"]}} +{"id": "astrapy.constants.Environment.TEST", "text": "", "metadata": {"kind": "attribute", "name": "TEST", "path": "astrapy.constants.Environment.TEST", "value": "TEST = DATA_API_ENVIRONMENT_TEST", "gathered_types": ["astrapy.defaults.DATA_API_ENVIRONMENT_TEST"]}} +{"id": "astrapy.constants.Environment.DSE", "text": "", "metadata": {"kind": "attribute", "name": "DSE", "path": "astrapy.constants.Environment.DSE", "value": "DSE = DATA_API_ENVIRONMENT_DSE", "gathered_types": ["astrapy.defaults.DATA_API_ENVIRONMENT_DSE"]}} +{"id": "astrapy.constants.Environment.HCD", "text": "", "metadata": {"kind": "attribute", "name": "HCD", "path": "astrapy.constants.Environment.HCD", "value": "HCD = DATA_API_ENVIRONMENT_HCD", "gathered_types": ["astrapy.defaults.DATA_API_ENVIRONMENT_HCD"]}} +{"id": "astrapy.constants.Environment.CASSANDRA", "text": "", "metadata": {"kind": "attribute", "name": "CASSANDRA", "path": "astrapy.constants.Environment.CASSANDRA", "value": "CASSANDRA = DATA_API_ENVIRONMENT_CASSANDRA", "gathered_types": ["astrapy.defaults.DATA_API_ENVIRONMENT_CASSANDRA"]}} +{"id": "astrapy.constants.Environment.OTHER", "text": "", "metadata": {"kind": "attribute", "name": "OTHER", "path": "astrapy.constants.Environment.OTHER", "value": "OTHER = DATA_API_ENVIRONMENT_OTHER", "gathered_types": ["astrapy.defaults.DATA_API_ENVIRONMENT_OTHER"]}} +{"id": "astrapy.constants.Environment.values", "text": "", "metadata": {"kind": "attribute", "name": "values", "path": "astrapy.constants.Environment.values", "value": "values = {PROD, DEV, TEST, DSE, HCD, CASSANDRA, OTHER}", "gathered_types": ["PROD", "OTHER", "DSE", "DEV", "HCD", "TEST", "CASSANDRA"]}} +{"id": "astrapy.constants.Environment.astra_db_values", "text": "", "metadata": {"kind": "attribute", "name": "astra_db_values", "path": "astrapy.constants.Environment.astra_db_values", "value": "astra_db_values = {PROD, DEV, TEST}", "gathered_types": ["PROD", "TEST", "DEV"]}} +{"id": "astrapy.operations", "text": "", "metadata": {"kind": "module", "name": "operations", "path": "astrapy.operations", "imports": {"annotations": "__future__.annotations", "ABC": "abc.ABC", "abstractmethod": "abc.abstractmethod", "dataclass": "dataclasses.dataclass", "reduce": "functools.reduce", "Any": "typing.Any", "Iterable": "typing.Iterable", "AsyncCollection": "astrapy.collection.AsyncCollection", "Collection": "astrapy.collection.Collection", "DocumentType": "astrapy.constants.DocumentType", "SortType": "astrapy.constants.SortType", "VectorType": "astrapy.constants.VectorType", "check_deprecated_vector_ize": "astrapy.meta.check_deprecated_vector_ize", "BulkWriteResult": "astrapy.results.BulkWriteResult", "DeleteResult": "astrapy.results.DeleteResult", "InsertManyResult": "astrapy.results.InsertManyResult", "InsertOneResult": "astrapy.results.InsertOneResult", "UpdateResult": "astrapy.results.UpdateResult"}, "properties": {"is_init_module": false, "is_package": false, "is_subpackage": false, "is_namespace_package": false, "is_namespace_subpackage": false}}} +{"id": "astrapy.operations.reduce_bulk_write_results", "text": "Reduce a list of bulk write results into a single one.", "metadata": {"kind": "function", "name": "reduce_bulk_write_results", "path": "astrapy.operations.reduce_bulk_write_results", "parameters": [{"name": "results", "type": "list[BulkWriteResult]", "description": "a list of BulkWriteResult instances.", "default": null}], "returns": [{"type": "BulkWriteResult", "description": "A new BulkWRiteResult object which summarized the whole input list."}], "gathered_types": ["astrapy.results.BulkWriteResult"]}} +{"id": "astrapy.operations.BaseOperation", "text": "Base class for all operations amenable to be used\nin bulk writes on (sync) collections.", "metadata": {"kind": "class", "name": "BaseOperation", "path": "astrapy.operations.BaseOperation", "bases": ["ABC"], "gathered_types": ["ABC"], "implemented_by": ["astrapy.operations.ReplaceOne", "astrapy.operations.UpdateMany", "astrapy.operations.DeleteMany", "astrapy.operations.UpdateOne", "astrapy.operations.InsertOne", "astrapy.operations.DeleteOne", "astrapy.operations.InsertMany"]}} +{"id": "astrapy.operations.BaseOperation.execute", "text": "", "metadata": {"kind": "function", "name": "execute", "path": "astrapy.operations.BaseOperation.execute", "parameters": [{"name": "collection", "type": "Collection"}, {"name": "index_in_bulk_write", "type": "int"}, {"name": "bulk_write_timeout_ms", "type": "int | None"}], "returns": [{"type": "BulkWriteResult"}], "gathered_types": ["astrapy.results.BulkWriteResult"]}} +{"id": "astrapy.operations.InsertOne", "text": "Represents an `insert_one` operation on a (sync) collection.\nSee the documentation on the collection method for more information.", "metadata": {"kind": "class", "name": "InsertOne", "path": "astrapy.operations.InsertOne", "parameters": [{"name": "document", "type": "DocumentType"}, {"name": "vector", "default": "None", "type": "VectorType | None"}, {"name": "vectorize", "default": "None", "type": "str | None"}], "bases": ["astrapy.operations.BaseOperation"], "attributes": [{"name": "document", "type": "DocumentType", "description": "the document to insert."}, {"name": "vector", "type": "VectorType | None", "description": "an optional suitable vector to enrich the document at insertion.\n*DEPRECATED* (removal in 2.0). Use a `$vector` key in the document instead."}, {"name": "vectorize", "type": "str | None", "description": "a string to be made into a vector, with the same result as the\n`vector` attribute, through an embedding service, assuming one is\nconfigured for the collection.\n*DEPRECATED* (removal in 2.0). Use a `$vectorize` key in the documents instead."}], "gathered_types": ["astrapy.constants.VectorType", "astrapy.constants.DocumentType", "astrapy.operations.BaseOperation"]}} +{"id": "astrapy.operations.InsertOne.document", "text": "", "metadata": {"kind": "attribute", "name": "document", "path": "astrapy.operations.InsertOne.document", "value": "document: DocumentType = document", "gathered_types": ["astrapy.constants.DocumentType"]}} +{"id": "astrapy.operations.InsertOne.vector", "text": "", "metadata": {"kind": "attribute", "name": "vector", "path": "astrapy.operations.InsertOne.vector", "value": "vector: VectorType | None = vector", "gathered_types": ["astrapy.constants.VectorType"]}} +{"id": "astrapy.operations.InsertOne.vectorize", "text": "", "metadata": {"kind": "attribute", "name": "vectorize", "path": "astrapy.operations.InsertOne.vectorize", "value": "vectorize: str | None = vectorize"}} +{"id": "astrapy.operations.InsertOne.execute", "text": "Execute this operation against a collection as part of a bulk write.", "metadata": {"kind": "function", "name": "execute", "path": "astrapy.operations.InsertOne.execute", "parameters": [{"name": "collection", "type": "Collection", "description": "the collection this write targets.", "default": null}, {"name": "insert_in_bulk_write", "description": "the index in the list of bulkoperations", "default": null}], "returns": [{"type": "BulkWriteResult"}], "gathered_types": ["astrapy.results.BulkWriteResult"]}} +{"id": "astrapy.operations.InsertMany", "text": "Represents an `insert_many` operation on a (sync) collection.\nSee the documentation on the collection method for more information.", "metadata": {"kind": "class", "name": "InsertMany", "path": "astrapy.operations.InsertMany", "parameters": [{"name": "documents", "type": "Iterable[DocumentType]"}, {"name": "vectors", "default": "None", "type": "Iterable[VectorType | None] | None"}, {"name": "vectorize", "default": "None", "type": "Iterable[str | None] | None"}, {"name": "ordered", "default": "True", "type": "bool"}, {"name": "chunk_size", "default": "None", "type": "int | None"}, {"name": "concurrency", "default": "None", "type": "int | None"}], "bases": ["astrapy.operations.BaseOperation"], "attributes": [{"name": "documents", "type": "Iterable[DocumentType]", "description": "the list document to insert."}, {"name": "vectors", "type": "Iterable[VectorType | None] | None", "description": "an optional list of vectors to enrich the documents at insertion.\n*DEPRECATED* (removal in 2.0). Use a `$vector` key in the documents instead."}, {"name": "vectorize", "type": "Iterable[str | None] | None", "description": "an optional list of texts achieving the same effect as `vectors`\nexcept through an embedding service, if one is configured for the collection.\n*DEPRECATED* (removal in 2.0). Use a `$vectorize` key in the documents instead."}, {"name": "ordered", "type": "bool", "description": "whether the inserts should be done in sequence."}, {"name": "chunk_size", "type": "int | None", "description": "how many documents to include in a single API request.\nExceeding the server maximum allowed value results in an error.\nLeave it unspecified (recommended) to use the system default."}, {"name": "concurrency", "type": "int | None", "description": "maximum number of concurrent requests to the API at\na given time. It cannot be more than one for ordered insertions."}], "gathered_types": ["astrapy.constants.VectorType", "astrapy.constants.DocumentType", "astrapy.operations.BaseOperation"]}} +{"id": "astrapy.operations.InsertMany.documents", "text": "", "metadata": {"kind": "attribute", "name": "documents", "path": "astrapy.operations.InsertMany.documents", "value": "documents: Iterable[DocumentType] = documents", "gathered_types": ["astrapy.constants.DocumentType"]}} +{"id": "astrapy.operations.InsertMany.vectors", "text": "", "metadata": {"kind": "attribute", "name": "vectors", "path": "astrapy.operations.InsertMany.vectors", "value": "vectors: Iterable[VectorType | None] | None = vectors", "gathered_types": ["astrapy.constants.VectorType"]}} +{"id": "astrapy.operations.InsertMany.vectorize", "text": "", "metadata": {"kind": "attribute", "name": "vectorize", "path": "astrapy.operations.InsertMany.vectorize", "value": "vectorize: Iterable[str | None] | None = vectorize"}} +{"id": "astrapy.operations.InsertMany.ordered", "text": "", "metadata": {"kind": "attribute", "name": "ordered", "path": "astrapy.operations.InsertMany.ordered", "value": "ordered: bool = ordered"}} +{"id": "astrapy.operations.InsertMany.chunk_size", "text": "", "metadata": {"kind": "attribute", "name": "chunk_size", "path": "astrapy.operations.InsertMany.chunk_size", "value": "chunk_size: int | None = chunk_size"}} +{"id": "astrapy.operations.InsertMany.concurrency", "text": "", "metadata": {"kind": "attribute", "name": "concurrency", "path": "astrapy.operations.InsertMany.concurrency", "value": "concurrency: int | None = concurrency"}} +{"id": "astrapy.operations.InsertMany.execute", "text": "Execute this operation against a collection as part of a bulk write.", "metadata": {"kind": "function", "name": "execute", "path": "astrapy.operations.InsertMany.execute", "parameters": [{"name": "collection", "type": "Collection", "description": "the collection this write targets.", "default": null}, {"name": "insert_in_bulk_write", "description": "the index in the list of bulkoperations", "default": null}], "returns": [{"type": "BulkWriteResult"}], "gathered_types": ["astrapy.results.BulkWriteResult"]}} +{"id": "astrapy.operations.UpdateOne", "text": "Represents an `update_one` operation on a (sync) collection.\nSee the documentation on the collection method for more information.", "metadata": {"kind": "class", "name": "UpdateOne", "path": "astrapy.operations.UpdateOne", "parameters": [{"name": "filter", "type": "dict[str, Any]"}, {"name": "update", "type": "dict[str, Any]"}, {"name": "vector", "default": "None", "type": "VectorType | None"}, {"name": "vectorize", "default": "None", "type": "str | None"}, {"name": "sort", "default": "None", "type": "SortType | None"}, {"name": "upsert", "default": "False", "type": "bool"}], "bases": ["astrapy.operations.BaseOperation"], "attributes": [{"name": "filter", "type": "dict[str, Any]", "description": "a filter condition to select a target document."}, {"name": "update", "type": "dict[str, Any]", "description": "an update prescription to apply to the document."}, {"name": "vector", "type": "VectorType | None", "description": "a vector of numbers to use for ANN (vector-search) sorting.\n*DEPRECATED* (removal in 2.0). Use a `$vector` key in the\nsort clause dict instead."}, {"name": "vectorize", "type": "str | None", "description": "a string to be made into a vector, with the same result as the\n`vector` attribute, through an embedding service, assuming one is\nconfigured for the collection.\n*DEPRECATED* (removal in 2.0). Use a `$vectorize` key in the\nsort clause dict instead."}, {"name": "sort", "type": "SortType | None", "description": "controls ordering of results, hence which document is affected."}, {"name": "upsert", "type": "bool", "description": "controls what to do when no documents are found."}], "gathered_types": ["astrapy.constants.VectorType", "astrapy.constants.SortType", "astrapy.operations.BaseOperation"]}} +{"id": "astrapy.operations.UpdateOne.filter", "text": "", "metadata": {"kind": "attribute", "name": "filter", "path": "astrapy.operations.UpdateOne.filter", "value": "filter: dict[str, Any] = filter"}} +{"id": "astrapy.operations.UpdateOne.update", "text": "", "metadata": {"kind": "attribute", "name": "update", "path": "astrapy.operations.UpdateOne.update", "value": "update: dict[str, Any] = update"}} +{"id": "astrapy.operations.UpdateOne.vector", "text": "", "metadata": {"kind": "attribute", "name": "vector", "path": "astrapy.operations.UpdateOne.vector", "value": "vector: VectorType | None = vector", "gathered_types": ["astrapy.constants.VectorType"]}} +{"id": "astrapy.operations.UpdateOne.vectorize", "text": "", "metadata": {"kind": "attribute", "name": "vectorize", "path": "astrapy.operations.UpdateOne.vectorize", "value": "vectorize: str | None = vectorize"}} +{"id": "astrapy.operations.UpdateOne.sort", "text": "", "metadata": {"kind": "attribute", "name": "sort", "path": "astrapy.operations.UpdateOne.sort", "value": "sort: SortType | None = sort", "gathered_types": ["astrapy.constants.SortType"]}} +{"id": "astrapy.operations.UpdateOne.upsert", "text": "", "metadata": {"kind": "attribute", "name": "upsert", "path": "astrapy.operations.UpdateOne.upsert", "value": "upsert: bool = upsert"}} +{"id": "astrapy.operations.UpdateOne.execute", "text": "Execute this operation against a collection as part of a bulk write.", "metadata": {"kind": "function", "name": "execute", "path": "astrapy.operations.UpdateOne.execute", "parameters": [{"name": "collection", "type": "Collection", "description": "the collection this write targets.", "default": null}, {"name": "insert_in_bulk_write", "description": "the index in the list of bulkoperations", "default": null}], "returns": [{"type": "BulkWriteResult"}], "gathered_types": ["astrapy.results.BulkWriteResult"]}} +{"id": "astrapy.operations.UpdateMany", "text": "Represents an `update_many` operation on a (sync) collection.\nSee the documentation on the collection method for more information.", "metadata": {"kind": "class", "name": "UpdateMany", "path": "astrapy.operations.UpdateMany", "parameters": [{"name": "filter", "type": "dict[str, Any]"}, {"name": "update", "type": "dict[str, Any]"}, {"name": "upsert", "default": "False", "type": "bool"}], "bases": ["astrapy.operations.BaseOperation"], "attributes": [{"name": "filter", "type": "dict[str, Any]", "description": "a filter condition to select target documents."}, {"name": "update", "type": "dict[str, Any]", "description": "an update prescription to apply to the documents."}, {"name": "upsert", "type": "bool", "description": "controls what to do when no documents are found."}], "gathered_types": ["astrapy.operations.BaseOperation"]}} +{"id": "astrapy.operations.UpdateMany.filter", "text": "", "metadata": {"kind": "attribute", "name": "filter", "path": "astrapy.operations.UpdateMany.filter", "value": "filter: dict[str, Any] = filter"}} +{"id": "astrapy.operations.UpdateMany.update", "text": "", "metadata": {"kind": "attribute", "name": "update", "path": "astrapy.operations.UpdateMany.update", "value": "update: dict[str, Any] = update"}} +{"id": "astrapy.operations.UpdateMany.upsert", "text": "", "metadata": {"kind": "attribute", "name": "upsert", "path": "astrapy.operations.UpdateMany.upsert", "value": "upsert: bool = upsert"}} +{"id": "astrapy.operations.UpdateMany.execute", "text": "Execute this operation against a collection as part of a bulk write.", "metadata": {"kind": "function", "name": "execute", "path": "astrapy.operations.UpdateMany.execute", "parameters": [{"name": "collection", "type": "Collection", "description": "the collection this write targets.", "default": null}, {"name": "insert_in_bulk_write", "description": "the index in the list of bulkoperations", "default": null}], "returns": [{"type": "BulkWriteResult"}], "gathered_types": ["astrapy.results.BulkWriteResult"]}} +{"id": "astrapy.operations.ReplaceOne", "text": "Represents a `replace_one` operation on a (sync) collection.\nSee the documentation on the collection method for more information.", "metadata": {"kind": "class", "name": "ReplaceOne", "path": "astrapy.operations.ReplaceOne", "parameters": [{"name": "filter", "type": "dict[str, Any]"}, {"name": "replacement", "type": "DocumentType"}, {"name": "vector", "default": "None", "type": "VectorType | None"}, {"name": "vectorize", "default": "None", "type": "str | None"}, {"name": "sort", "default": "None", "type": "SortType | None"}, {"name": "upsert", "default": "False", "type": "bool"}], "bases": ["astrapy.operations.BaseOperation"], "attributes": [{"name": "filter", "type": "dict[str, Any]", "description": "a filter condition to select a target document."}, {"name": "replacement", "type": "DocumentType", "description": "the replacement document."}, {"name": "vector", "type": "VectorType | None", "description": "a vector of numbers to use for ANN (vector-search) sorting.\n*DEPRECATED* (removal in 2.0). Use a `$vector` key in the\nsort clause dict instead."}, {"name": "vectorize", "type": "str | None", "description": "a string to be made into a vector, with the same result as the\n`vector` attribute, through an embedding service, assuming one is\nconfigured for the collection.\n*DEPRECATED* (removal in 2.0). Use a `$vectorize` key in the\nsort clause dict instead."}, {"name": "sort", "type": "SortType | None", "description": "controls ordering of results, hence which document is affected."}, {"name": "upsert", "type": "bool", "description": "controls what to do when no documents are found."}], "gathered_types": ["astrapy.constants.VectorType", "astrapy.constants.DocumentType", "astrapy.constants.SortType", "astrapy.operations.BaseOperation"]}} +{"id": "astrapy.operations.ReplaceOne.filter", "text": "", "metadata": {"kind": "attribute", "name": "filter", "path": "astrapy.operations.ReplaceOne.filter", "value": "filter: dict[str, Any] = filter"}} +{"id": "astrapy.operations.ReplaceOne.replacement", "text": "", "metadata": {"kind": "attribute", "name": "replacement", "path": "astrapy.operations.ReplaceOne.replacement", "value": "replacement: DocumentType = replacement", "gathered_types": ["astrapy.constants.DocumentType"]}} +{"id": "astrapy.operations.ReplaceOne.vector", "text": "", "metadata": {"kind": "attribute", "name": "vector", "path": "astrapy.operations.ReplaceOne.vector", "value": "vector: VectorType | None = vector", "gathered_types": ["astrapy.constants.VectorType"]}} +{"id": "astrapy.operations.ReplaceOne.vectorize", "text": "", "metadata": {"kind": "attribute", "name": "vectorize", "path": "astrapy.operations.ReplaceOne.vectorize", "value": "vectorize: str | None = vectorize"}} +{"id": "astrapy.operations.ReplaceOne.sort", "text": "", "metadata": {"kind": "attribute", "name": "sort", "path": "astrapy.operations.ReplaceOne.sort", "value": "sort: SortType | None = sort", "gathered_types": ["astrapy.constants.SortType"]}} +{"id": "astrapy.operations.ReplaceOne.upsert", "text": "", "metadata": {"kind": "attribute", "name": "upsert", "path": "astrapy.operations.ReplaceOne.upsert", "value": "upsert: bool = upsert"}} +{"id": "astrapy.operations.ReplaceOne.execute", "text": "Execute this operation against a collection as part of a bulk write.", "metadata": {"kind": "function", "name": "execute", "path": "astrapy.operations.ReplaceOne.execute", "parameters": [{"name": "collection", "type": "Collection", "description": "the collection this write targets.", "default": null}, {"name": "insert_in_bulk_write", "description": "the index in the list of bulkoperations", "default": null}], "returns": [{"type": "BulkWriteResult"}], "gathered_types": ["astrapy.results.BulkWriteResult"]}} +{"id": "astrapy.operations.DeleteOne", "text": "Represents a `delete_one` operation on a (sync) collection.\nSee the documentation on the collection method for more information.", "metadata": {"kind": "class", "name": "DeleteOne", "path": "astrapy.operations.DeleteOne", "parameters": [{"name": "filter", "type": "dict[str, Any]"}, {"name": "vector", "default": "None", "type": "VectorType | None"}, {"name": "vectorize", "default": "None", "type": "str | None"}, {"name": "sort", "default": "None", "type": "SortType | None"}], "bases": ["astrapy.operations.BaseOperation"], "attributes": [{"name": "filter", "type": "dict[str, Any]", "description": "a filter condition to select a target document."}, {"name": "vector", "type": "VectorType | None", "description": "a vector of numbers to use for ANN (vector-search) sorting.\n*DEPRECATED* (removal in 2.0). Use a `$vector` key in the\nsort clause dict instead."}, {"name": "vectorize", "type": "str | None", "description": "a string to be made into a vector, with the same result as the\n`vector` attribute, through an embedding service, assuming one is\nconfigured for the collection.\n*DEPRECATED* (removal in 2.0). Use a `$vectorize` key in the\nsort clause dict instead."}, {"name": "sort", "type": "SortType | None", "description": "controls ordering of results, hence which document is affected."}], "gathered_types": ["astrapy.constants.VectorType", "astrapy.constants.SortType", "astrapy.operations.BaseOperation"]}} +{"id": "astrapy.operations.DeleteOne.filter", "text": "", "metadata": {"kind": "attribute", "name": "filter", "path": "astrapy.operations.DeleteOne.filter", "value": "filter: dict[str, Any] = filter"}} +{"id": "astrapy.operations.DeleteOne.vector", "text": "", "metadata": {"kind": "attribute", "name": "vector", "path": "astrapy.operations.DeleteOne.vector", "value": "vector: VectorType | None = vector", "gathered_types": ["astrapy.constants.VectorType"]}} +{"id": "astrapy.operations.DeleteOne.vectorize", "text": "", "metadata": {"kind": "attribute", "name": "vectorize", "path": "astrapy.operations.DeleteOne.vectorize", "value": "vectorize: str | None = vectorize"}} +{"id": "astrapy.operations.DeleteOne.sort", "text": "", "metadata": {"kind": "attribute", "name": "sort", "path": "astrapy.operations.DeleteOne.sort", "value": "sort: SortType | None = sort", "gathered_types": ["astrapy.constants.SortType"]}} +{"id": "astrapy.operations.DeleteOne.execute", "text": "Execute this operation against a collection as part of a bulk write.", "metadata": {"kind": "function", "name": "execute", "path": "astrapy.operations.DeleteOne.execute", "parameters": [{"name": "collection", "type": "Collection", "description": "the collection this write targets.", "default": null}, {"name": "insert_in_bulk_write", "description": "the index in the list of bulkoperations", "default": null}], "returns": [{"type": "BulkWriteResult"}], "gathered_types": ["astrapy.results.BulkWriteResult"]}} +{"id": "astrapy.operations.DeleteMany", "text": "Represents a `delete_many` operation on a (sync) collection.\nSee the documentation on the collection method for more information.", "metadata": {"kind": "class", "name": "DeleteMany", "path": "astrapy.operations.DeleteMany", "parameters": [{"name": "filter", "type": "dict[str, Any]"}], "bases": ["astrapy.operations.BaseOperation"], "attributes": [{"name": "filter", "type": "dict[str, Any]", "description": "a filter condition to select target documents."}], "gathered_types": ["astrapy.operations.BaseOperation"]}} +{"id": "astrapy.operations.DeleteMany.filter", "text": "", "metadata": {"kind": "attribute", "name": "filter", "path": "astrapy.operations.DeleteMany.filter", "value": "filter: dict[str, Any] = filter"}} +{"id": "astrapy.operations.DeleteMany.execute", "text": "Execute this operation against a collection as part of a bulk write.", "metadata": {"kind": "function", "name": "execute", "path": "astrapy.operations.DeleteMany.execute", "parameters": [{"name": "collection", "type": "Collection", "description": "the collection this write targets.", "default": null}, {"name": "insert_in_bulk_write", "description": "the index in the list of bulkoperations", "default": null}], "returns": [{"type": "BulkWriteResult"}], "gathered_types": ["astrapy.results.BulkWriteResult"]}} +{"id": "astrapy.operations.AsyncBaseOperation", "text": "Base class for all operations amenable to be used\nin bulk writes on (async) collections.", "metadata": {"kind": "class", "name": "AsyncBaseOperation", "path": "astrapy.operations.AsyncBaseOperation", "bases": ["ABC"], "gathered_types": ["ABC"], "implemented_by": ["astrapy.operations.AsyncInsertMany", "astrapy.operations.AsyncDeleteMany", "astrapy.operations.AsyncInsertOne", "astrapy.operations.AsyncUpdateMany", "astrapy.operations.AsyncReplaceOne", "astrapy.operations.AsyncDeleteOne", "astrapy.operations.AsyncUpdateOne"]}} +{"id": "astrapy.operations.AsyncBaseOperation.execute", "text": "", "metadata": {"kind": "function", "name": "execute", "path": "astrapy.operations.AsyncBaseOperation.execute", "parameters": [{"name": "collection", "type": "AsyncCollection"}, {"name": "index_in_bulk_write", "type": "int"}, {"name": "bulk_write_timeout_ms", "type": "int | None"}], "returns": [{"type": "BulkWriteResult"}], "gathered_types": ["astrapy.results.BulkWriteResult", "astrapy.collection.AsyncCollection"]}} +{"id": "astrapy.operations.AsyncInsertOne", "text": "Represents an `insert_one` operation on a (async) collection.\nSee the documentation on the collection method for more information.", "metadata": {"kind": "class", "name": "AsyncInsertOne", "path": "astrapy.operations.AsyncInsertOne", "parameters": [{"name": "document", "type": "DocumentType"}, {"name": "vector", "default": "None", "type": "VectorType | None"}, {"name": "vectorize", "default": "None", "type": "str | None"}], "bases": ["astrapy.operations.AsyncBaseOperation"], "attributes": [{"name": "document", "type": "DocumentType", "description": "the document to insert."}, {"name": "vector", "type": "VectorType | None", "description": "an optional suitable vector to enrich the document at insertion.\n*DEPRECATED* (removal in 2.0). Use a `$vector` key in the document instead."}, {"name": "vectorize", "type": "str | None", "description": "a string to be made into a vector, with the same result as the\n`vector` attribute, through an embedding service, assuming one is\nconfigured for the collection.\n*DEPRECATED* (removal in 2.0). Use a `$vectorize` key in the document instead."}], "gathered_types": ["astrapy.constants.VectorType", "astrapy.constants.DocumentType", "astrapy.operations.AsyncBaseOperation"]}} +{"id": "astrapy.operations.AsyncInsertOne.document", "text": "", "metadata": {"kind": "attribute", "name": "document", "path": "astrapy.operations.AsyncInsertOne.document", "value": "document: DocumentType = document", "gathered_types": ["astrapy.constants.DocumentType"]}} +{"id": "astrapy.operations.AsyncInsertOne.vector", "text": "", "metadata": {"kind": "attribute", "name": "vector", "path": "astrapy.operations.AsyncInsertOne.vector", "value": "vector: VectorType | None = vector", "gathered_types": ["astrapy.constants.VectorType"]}} +{"id": "astrapy.operations.AsyncInsertOne.vectorize", "text": "", "metadata": {"kind": "attribute", "name": "vectorize", "path": "astrapy.operations.AsyncInsertOne.vectorize", "value": "vectorize: str | None = vectorize"}} +{"id": "astrapy.operations.AsyncInsertOne.execute", "text": "Execute this operation against a collection as part of a bulk write.", "metadata": {"kind": "function", "name": "execute", "path": "astrapy.operations.AsyncInsertOne.execute", "parameters": [{"name": "collection", "type": "AsyncCollection", "description": "the collection this write targets.", "default": null}, {"name": "insert_in_bulk_write", "description": "the index in the list of bulkoperations", "default": null}], "returns": [{"type": "BulkWriteResult"}], "gathered_types": ["astrapy.results.BulkWriteResult", "astrapy.collection.AsyncCollection"]}} +{"id": "astrapy.operations.AsyncInsertMany", "text": "Represents an `insert_many` operation on a (async) collection.\nSee the documentation on the collection method for more information.", "metadata": {"kind": "class", "name": "AsyncInsertMany", "path": "astrapy.operations.AsyncInsertMany", "parameters": [{"name": "documents", "type": "Iterable[DocumentType]"}, {"name": "vectors", "default": "None", "type": "Iterable[VectorType | None] | None"}, {"name": "vectorize", "default": "None", "type": "Iterable[str | None] | None"}, {"name": "ordered", "default": "True", "type": "bool"}, {"name": "chunk_size", "default": "None", "type": "int | None"}, {"name": "concurrency", "default": "None", "type": "int | None"}], "bases": ["astrapy.operations.AsyncBaseOperation"], "attributes": [{"name": "documents", "type": "Iterable[DocumentType]", "description": "the list document to insert."}, {"name": "vectors", "type": "Iterable[VectorType | None] | None", "description": "an optional list of vectors to enrich the documents at insertion.\n*DEPRECATED* (removal in 2.0). Use a `$vector` key in the documents instead."}, {"name": "vectorize", "type": "Iterable[str | None] | None", "description": "an optional list of texts achieving the same effect as `vectors`\nexcept through an embedding service, if one is configured for the collection.\n*DEPRECATED* (removal in 2.0). Use a `$vectorize` key in the documents instead."}, {"name": "ordered", "type": "bool", "description": "whether the inserts should be done in sequence."}, {"name": "chunk_size", "type": "int | None", "description": "how many documents to include in a single API request.\nExceeding the server maximum allowed value results in an error.\nLeave it unspecified (recommended) to use the system default."}, {"name": "concurrency", "type": "int | None", "description": "maximum number of concurrent requests to the API at\na given time. It cannot be more than one for ordered insertions."}], "gathered_types": ["astrapy.constants.VectorType", "astrapy.constants.DocumentType", "astrapy.operations.AsyncBaseOperation"]}} +{"id": "astrapy.operations.AsyncInsertMany.documents", "text": "", "metadata": {"kind": "attribute", "name": "documents", "path": "astrapy.operations.AsyncInsertMany.documents", "value": "documents: Iterable[DocumentType] = documents", "gathered_types": ["astrapy.constants.DocumentType"]}} +{"id": "astrapy.operations.AsyncInsertMany.vectors", "text": "", "metadata": {"kind": "attribute", "name": "vectors", "path": "astrapy.operations.AsyncInsertMany.vectors", "value": "vectors: Iterable[VectorType | None] | None = vectors", "gathered_types": ["astrapy.constants.VectorType"]}} +{"id": "astrapy.operations.AsyncInsertMany.vectorize", "text": "", "metadata": {"kind": "attribute", "name": "vectorize", "path": "astrapy.operations.AsyncInsertMany.vectorize", "value": "vectorize: Iterable[str | None] | None = vectorize"}} +{"id": "astrapy.operations.AsyncInsertMany.ordered", "text": "", "metadata": {"kind": "attribute", "name": "ordered", "path": "astrapy.operations.AsyncInsertMany.ordered", "value": "ordered: bool = ordered"}} +{"id": "astrapy.operations.AsyncInsertMany.chunk_size", "text": "", "metadata": {"kind": "attribute", "name": "chunk_size", "path": "astrapy.operations.AsyncInsertMany.chunk_size", "value": "chunk_size: int | None = chunk_size"}} +{"id": "astrapy.operations.AsyncInsertMany.concurrency", "text": "", "metadata": {"kind": "attribute", "name": "concurrency", "path": "astrapy.operations.AsyncInsertMany.concurrency", "value": "concurrency: int | None = concurrency"}} +{"id": "astrapy.operations.AsyncInsertMany.execute", "text": "Execute this operation against a collection as part of a bulk write.", "metadata": {"kind": "function", "name": "execute", "path": "astrapy.operations.AsyncInsertMany.execute", "parameters": [{"name": "collection", "type": "AsyncCollection", "description": "the collection this write targets.", "default": null}, {"name": "insert_in_bulk_write", "description": "the index in the list of bulkoperations", "default": null}], "returns": [{"type": "BulkWriteResult"}], "gathered_types": ["astrapy.results.BulkWriteResult", "astrapy.collection.AsyncCollection"]}} +{"id": "astrapy.operations.AsyncUpdateOne", "text": "Represents an `update_one` operation on a (async) collection.\nSee the documentation on the collection method for more information.", "metadata": {"kind": "class", "name": "AsyncUpdateOne", "path": "astrapy.operations.AsyncUpdateOne", "parameters": [{"name": "filter", "type": "dict[str, Any]"}, {"name": "update", "type": "dict[str, Any]"}, {"name": "vector", "default": "None", "type": "VectorType | None"}, {"name": "vectorize", "default": "None", "type": "str | None"}, {"name": "sort", "default": "None", "type": "SortType | None"}, {"name": "upsert", "default": "False", "type": "bool"}], "bases": ["astrapy.operations.AsyncBaseOperation"], "attributes": [{"name": "filter", "type": "dict[str, Any]", "description": "a filter condition to select a target document."}, {"name": "update", "type": "dict[str, Any]", "description": "an update prescription to apply to the document."}, {"name": "vector", "type": "VectorType | None", "description": "a vector of numbers to use for ANN (vector-search) sorting.\n*DEPRECATED* (removal in 2.0). Use a `$vector` key in the\nsort clause dict instead."}, {"name": "vectorize", "type": "str | None", "description": "a string to be made into a vector, with the same result as the\n`vector` attribute, through an embedding service, assuming one is\nconfigured for the collection.\n*DEPRECATED* (removal in 2.0). Use a `$vectorize` key in the\nsort clause dict instead."}, {"name": "sort", "type": "SortType | None", "description": "controls ordering of results, hence which document is affected."}, {"name": "upsert", "type": "bool", "description": "controls what to do when no documents are found."}], "gathered_types": ["astrapy.constants.VectorType", "astrapy.constants.SortType", "astrapy.operations.AsyncBaseOperation"]}} +{"id": "astrapy.operations.AsyncUpdateOne.filter", "text": "", "metadata": {"kind": "attribute", "name": "filter", "path": "astrapy.operations.AsyncUpdateOne.filter", "value": "filter: dict[str, Any] = filter"}} +{"id": "astrapy.operations.AsyncUpdateOne.update", "text": "", "metadata": {"kind": "attribute", "name": "update", "path": "astrapy.operations.AsyncUpdateOne.update", "value": "update: dict[str, Any] = update"}} +{"id": "astrapy.operations.AsyncUpdateOne.vector", "text": "", "metadata": {"kind": "attribute", "name": "vector", "path": "astrapy.operations.AsyncUpdateOne.vector", "value": "vector: VectorType | None = vector", "gathered_types": ["astrapy.constants.VectorType"]}} +{"id": "astrapy.operations.AsyncUpdateOne.vectorize", "text": "", "metadata": {"kind": "attribute", "name": "vectorize", "path": "astrapy.operations.AsyncUpdateOne.vectorize", "value": "vectorize: str | None = vectorize"}} +{"id": "astrapy.operations.AsyncUpdateOne.sort", "text": "", "metadata": {"kind": "attribute", "name": "sort", "path": "astrapy.operations.AsyncUpdateOne.sort", "value": "sort: SortType | None = sort", "gathered_types": ["astrapy.constants.SortType"]}} +{"id": "astrapy.operations.AsyncUpdateOne.upsert", "text": "", "metadata": {"kind": "attribute", "name": "upsert", "path": "astrapy.operations.AsyncUpdateOne.upsert", "value": "upsert: bool = upsert"}} +{"id": "astrapy.operations.AsyncUpdateOne.execute", "text": "Execute this operation against a collection as part of a bulk write.", "metadata": {"kind": "function", "name": "execute", "path": "astrapy.operations.AsyncUpdateOne.execute", "parameters": [{"name": "collection", "type": "AsyncCollection", "description": "the collection this write targets.", "default": null}, {"name": "insert_in_bulk_write", "description": "the index in the list of bulkoperations", "default": null}], "returns": [{"type": "BulkWriteResult"}], "gathered_types": ["astrapy.results.BulkWriteResult", "astrapy.collection.AsyncCollection"]}} +{"id": "astrapy.operations.AsyncUpdateMany", "text": "Represents an `update_many` operation on a (async) collection.\nSee the documentation on the collection method for more information.", "metadata": {"kind": "class", "name": "AsyncUpdateMany", "path": "astrapy.operations.AsyncUpdateMany", "parameters": [{"name": "filter", "type": "dict[str, Any]"}, {"name": "update", "type": "dict[str, Any]"}, {"name": "upsert", "default": "False", "type": "bool"}], "bases": ["astrapy.operations.AsyncBaseOperation"], "attributes": [{"name": "filter", "type": "dict[str, Any]", "description": "a filter condition to select target documents."}, {"name": "update", "type": "dict[str, Any]", "description": "an update prescription to apply to the documents."}, {"name": "upsert", "type": "bool", "description": "controls what to do when no documents are found."}], "gathered_types": ["astrapy.operations.AsyncBaseOperation"]}} +{"id": "astrapy.operations.AsyncUpdateMany.filter", "text": "", "metadata": {"kind": "attribute", "name": "filter", "path": "astrapy.operations.AsyncUpdateMany.filter", "value": "filter: dict[str, Any] = filter"}} +{"id": "astrapy.operations.AsyncUpdateMany.update", "text": "", "metadata": {"kind": "attribute", "name": "update", "path": "astrapy.operations.AsyncUpdateMany.update", "value": "update: dict[str, Any] = update"}} +{"id": "astrapy.operations.AsyncUpdateMany.upsert", "text": "", "metadata": {"kind": "attribute", "name": "upsert", "path": "astrapy.operations.AsyncUpdateMany.upsert", "value": "upsert: bool = upsert"}} +{"id": "astrapy.operations.AsyncUpdateMany.execute", "text": "Execute this operation against a collection as part of a bulk write.", "metadata": {"kind": "function", "name": "execute", "path": "astrapy.operations.AsyncUpdateMany.execute", "parameters": [{"name": "collection", "type": "AsyncCollection", "description": "the collection this write targets.", "default": null}, {"name": "insert_in_bulk_write", "description": "the index in the list of bulkoperations", "default": null}], "returns": [{"type": "BulkWriteResult"}], "gathered_types": ["astrapy.results.BulkWriteResult", "astrapy.collection.AsyncCollection"]}} +{"id": "astrapy.operations.AsyncReplaceOne", "text": "Represents a `replace_one` operation on a (async) collection.\nSee the documentation on the collection method for more information.", "metadata": {"kind": "class", "name": "AsyncReplaceOne", "path": "astrapy.operations.AsyncReplaceOne", "parameters": [{"name": "filter", "type": "dict[str, Any]"}, {"name": "replacement", "type": "DocumentType"}, {"name": "vector", "default": "None", "type": "VectorType | None"}, {"name": "vectorize", "default": "None", "type": "str | None"}, {"name": "sort", "default": "None", "type": "SortType | None"}, {"name": "upsert", "default": "False", "type": "bool"}], "bases": ["astrapy.operations.AsyncBaseOperation"], "attributes": [{"name": "filter", "type": "dict[str, Any]", "description": "a filter condition to select a target document."}, {"name": "replacement", "type": "DocumentType", "description": "the replacement document."}, {"name": "vector", "type": "VectorType | None", "description": "a vector of numbers to use for ANN (vector-search) sorting.\n*DEPRECATED* (removal in 2.0). Use a `$vector` key in the\nsort clause dict instead."}, {"name": "vectorize", "type": "str | None", "description": "a string to be made into a vector, with the same result as the\n`vector` attribute, through an embedding service, assuming one is\nconfigured for the collection.\n*DEPRECATED* (removal in 2.0). Use a `$vectorize` key in the\nsort clause dict instead."}, {"name": "sort", "type": "SortType | None", "description": "controls ordering of results, hence which document is affected."}, {"name": "upsert", "type": "bool", "description": "controls what to do when no documents are found."}], "gathered_types": ["astrapy.constants.VectorType", "astrapy.constants.DocumentType", "astrapy.constants.SortType", "astrapy.operations.AsyncBaseOperation"]}} +{"id": "astrapy.operations.AsyncReplaceOne.filter", "text": "", "metadata": {"kind": "attribute", "name": "filter", "path": "astrapy.operations.AsyncReplaceOne.filter", "value": "filter: dict[str, Any] = filter"}} +{"id": "astrapy.operations.AsyncReplaceOne.replacement", "text": "", "metadata": {"kind": "attribute", "name": "replacement", "path": "astrapy.operations.AsyncReplaceOne.replacement", "value": "replacement: DocumentType = replacement", "gathered_types": ["astrapy.constants.DocumentType"]}} +{"id": "astrapy.operations.AsyncReplaceOne.vector", "text": "", "metadata": {"kind": "attribute", "name": "vector", "path": "astrapy.operations.AsyncReplaceOne.vector", "value": "vector: VectorType | None = vector", "gathered_types": ["astrapy.constants.VectorType"]}} +{"id": "astrapy.operations.AsyncReplaceOne.vectorize", "text": "", "metadata": {"kind": "attribute", "name": "vectorize", "path": "astrapy.operations.AsyncReplaceOne.vectorize", "value": "vectorize: str | None = vectorize"}} +{"id": "astrapy.operations.AsyncReplaceOne.sort", "text": "", "metadata": {"kind": "attribute", "name": "sort", "path": "astrapy.operations.AsyncReplaceOne.sort", "value": "sort: SortType | None = sort", "gathered_types": ["astrapy.constants.SortType"]}} +{"id": "astrapy.operations.AsyncReplaceOne.upsert", "text": "", "metadata": {"kind": "attribute", "name": "upsert", "path": "astrapy.operations.AsyncReplaceOne.upsert", "value": "upsert: bool = upsert"}} +{"id": "astrapy.operations.AsyncReplaceOne.execute", "text": "Execute this operation against a collection as part of a bulk write.", "metadata": {"kind": "function", "name": "execute", "path": "astrapy.operations.AsyncReplaceOne.execute", "parameters": [{"name": "collection", "type": "AsyncCollection", "description": "the collection this write targets.", "default": null}, {"name": "insert_in_bulk_write", "description": "the index in the list of bulkoperations", "default": null}], "returns": [{"type": "BulkWriteResult"}], "gathered_types": ["astrapy.results.BulkWriteResult", "astrapy.collection.AsyncCollection"]}} +{"id": "astrapy.operations.AsyncDeleteOne", "text": "Represents a `delete_one` operation on a (async) collection.\nSee the documentation on the collection method for more information.", "metadata": {"kind": "class", "name": "AsyncDeleteOne", "path": "astrapy.operations.AsyncDeleteOne", "parameters": [{"name": "filter", "type": "dict[str, Any]"}, {"name": "vector", "default": "None", "type": "VectorType | None"}, {"name": "vectorize", "default": "None", "type": "str | None"}, {"name": "sort", "default": "None", "type": "SortType | None"}], "bases": ["astrapy.operations.AsyncBaseOperation"], "attributes": [{"name": "filter", "type": "dict[str, Any]", "description": "a filter condition to select a target document."}, {"name": "vector", "type": "VectorType | None", "description": "a vector of numbers to use for ANN (vector-search) sorting.\n*DEPRECATED* (removal in 2.0). Use a `$vector` key in the\nsort clause dict instead."}, {"name": "vectorize", "type": "str | None", "description": "a string to be made into a vector, with the same result as the\n`vector` attribute, through an embedding service, assuming one is\nconfigured for the collection.\n*DEPRECATED* (removal in 2.0). Use a `$vectorize` key in the\nsort clause dict instead."}, {"name": "sort", "type": "SortType | None", "description": "controls ordering of results, hence which document is affected."}], "gathered_types": ["astrapy.constants.VectorType", "astrapy.constants.SortType", "astrapy.operations.AsyncBaseOperation"]}} +{"id": "astrapy.operations.AsyncDeleteOne.filter", "text": "", "metadata": {"kind": "attribute", "name": "filter", "path": "astrapy.operations.AsyncDeleteOne.filter", "value": "filter: dict[str, Any] = filter"}} +{"id": "astrapy.operations.AsyncDeleteOne.vector", "text": "", "metadata": {"kind": "attribute", "name": "vector", "path": "astrapy.operations.AsyncDeleteOne.vector", "value": "vector: VectorType | None = vector", "gathered_types": ["astrapy.constants.VectorType"]}} +{"id": "astrapy.operations.AsyncDeleteOne.vectorize", "text": "", "metadata": {"kind": "attribute", "name": "vectorize", "path": "astrapy.operations.AsyncDeleteOne.vectorize", "value": "vectorize: str | None = vectorize"}} +{"id": "astrapy.operations.AsyncDeleteOne.sort", "text": "", "metadata": {"kind": "attribute", "name": "sort", "path": "astrapy.operations.AsyncDeleteOne.sort", "value": "sort: SortType | None = sort", "gathered_types": ["astrapy.constants.SortType"]}} +{"id": "astrapy.operations.AsyncDeleteOne.execute", "text": "Execute this operation against a collection as part of a bulk write.", "metadata": {"kind": "function", "name": "execute", "path": "astrapy.operations.AsyncDeleteOne.execute", "parameters": [{"name": "collection", "type": "AsyncCollection", "description": "the collection this write targets.", "default": null}, {"name": "insert_in_bulk_write", "description": "the index in the list of bulkoperations", "default": null}], "returns": [{"type": "BulkWriteResult"}], "gathered_types": ["astrapy.results.BulkWriteResult", "astrapy.collection.AsyncCollection"]}} +{"id": "astrapy.operations.AsyncDeleteMany", "text": "Represents a `delete_many` operation on a (async) collection.\nSee the documentation on the collection method for more information.", "metadata": {"kind": "class", "name": "AsyncDeleteMany", "path": "astrapy.operations.AsyncDeleteMany", "parameters": [{"name": "filter", "type": "dict[str, Any]"}], "bases": ["astrapy.operations.AsyncBaseOperation"], "attributes": [{"name": "filter", "type": "dict[str, Any]", "description": "a filter condition to select target documents."}], "gathered_types": ["astrapy.operations.AsyncBaseOperation"]}} +{"id": "astrapy.operations.AsyncDeleteMany.filter", "text": "", "metadata": {"kind": "attribute", "name": "filter", "path": "astrapy.operations.AsyncDeleteMany.filter", "value": "filter: dict[str, Any] = filter"}} +{"id": "astrapy.operations.AsyncDeleteMany.execute", "text": "Execute this operation against a collection as part of a bulk write.", "metadata": {"kind": "function", "name": "execute", "path": "astrapy.operations.AsyncDeleteMany.execute", "parameters": [{"name": "collection", "type": "AsyncCollection", "description": "the collection this write targets.", "default": null}, {"name": "insert_in_bulk_write", "description": "the index in the list of bulkoperations", "default": null}], "returns": [{"type": "BulkWriteResult"}], "gathered_types": ["astrapy.results.BulkWriteResult", "astrapy.collection.AsyncCollection"]}} +{"id": "astrapy.admin", "text": "", "metadata": {"kind": "module", "name": "admin", "path": "astrapy.admin", "imports": {"annotations": "__future__.annotations", "asyncio": "asyncio", "logging": "logging", "re": "re", "time": "time", "warnings": "warnings", "ABC": "abc.ABC", "abstractmethod": "abc.abstractmethod", "dataclass": "dataclasses.dataclass", "TYPE_CHECKING": "typing.TYPE_CHECKING", "Any": "typing.Any", "Sequence": "typing.Sequence", "deprecation": "deprecation", "__version__": "astrapy.__version__", "APICommander": "astrapy.api_commander.APICommander", "coerce_token_provider": "astrapy.authentication.coerce_token_provider", "redact_secret": "astrapy.authentication.redact_secret", "CallerType": "astrapy.constants.CallerType", "Environment": "astrapy.constants.Environment", "CommandCursor": "astrapy.cursors.CommandCursor", "API_ENDPOINT_TEMPLATE_ENV_MAP": "astrapy.defaults.API_ENDPOINT_TEMPLATE_ENV_MAP", "API_PATH_ENV_MAP": "astrapy.defaults.API_PATH_ENV_MAP", "API_VERSION_ENV_MAP": "astrapy.defaults.API_VERSION_ENV_MAP", "DEFAULT_DATA_API_AUTH_HEADER": "astrapy.defaults.DEFAULT_DATA_API_AUTH_HEADER", "DEFAULT_DEV_OPS_AUTH_HEADER": "astrapy.defaults.DEFAULT_DEV_OPS_AUTH_HEADER", "DEFAULT_DEV_OPS_AUTH_PREFIX": "astrapy.defaults.DEFAULT_DEV_OPS_AUTH_PREFIX", "DEV_OPS_DATABASE_POLL_INTERVAL_S": "astrapy.defaults.DEV_OPS_DATABASE_POLL_INTERVAL_S", "DEV_OPS_DATABASE_STATUS_ACTIVE": "astrapy.defaults.DEV_OPS_DATABASE_STATUS_ACTIVE", "DEV_OPS_DATABASE_STATUS_INITIALIZING": "astrapy.defaults.DEV_OPS_DATABASE_STATUS_INITIALIZING", "DEV_OPS_DATABASE_STATUS_MAINTENANCE": "astrapy.defaults.DEV_OPS_DATABASE_STATUS_MAINTENANCE", "DEV_OPS_DATABASE_STATUS_PENDING": "astrapy.defaults.DEV_OPS_DATABASE_STATUS_PENDING", "DEV_OPS_DATABASE_STATUS_TERMINATING": "astrapy.defaults.DEV_OPS_DATABASE_STATUS_TERMINATING", "DEV_OPS_DEFAULT_DATABASES_PAGE_SIZE": "astrapy.defaults.DEV_OPS_DEFAULT_DATABASES_PAGE_SIZE", "DEV_OPS_KEYSPACE_POLL_INTERVAL_S": "astrapy.defaults.DEV_OPS_KEYSPACE_POLL_INTERVAL_S", "DEV_OPS_RESPONSE_HTTP_ACCEPTED": "astrapy.defaults.DEV_OPS_RESPONSE_HTTP_ACCEPTED", "DEV_OPS_RESPONSE_HTTP_CREATED": "astrapy.defaults.DEV_OPS_RESPONSE_HTTP_CREATED", "DEV_OPS_URL_ENV_MAP": "astrapy.defaults.DEV_OPS_URL_ENV_MAP", "DEV_OPS_VERSION_ENV_MAP": "astrapy.defaults.DEV_OPS_VERSION_ENV_MAP", "NAMESPACE_DEPRECATION_NOTICE_METHOD": "astrapy.defaults.NAMESPACE_DEPRECATION_NOTICE_METHOD", "SET_CALLER_DEPRECATION_NOTICE": "astrapy.defaults.SET_CALLER_DEPRECATION_NOTICE", "DataAPIFaultyResponseException": "astrapy.exceptions.DataAPIFaultyResponseException", "DevOpsAPIException": "astrapy.exceptions.DevOpsAPIException", "MultiCallTimeoutManager": "astrapy.exceptions.MultiCallTimeoutManager", "base_timeout_info": "astrapy.exceptions.base_timeout_info", "AdminDatabaseInfo": "astrapy.info.AdminDatabaseInfo", "DatabaseInfo": "astrapy.info.DatabaseInfo", "FindEmbeddingProvidersResult": "astrapy.info.FindEmbeddingProvidersResult", "check_caller_parameters": "astrapy.meta.check_caller_parameters", "check_namespace_keyspace": "astrapy.meta.check_namespace_keyspace", "check_update_db_namespace_keyspace": "astrapy.meta.check_update_db_namespace_keyspace", "HttpMethod": "astrapy.request_tools.HttpMethod", "AsyncDatabase": "astrapy.AsyncDatabase", "Database": "astrapy.Database", "TokenProvider": "astrapy.authentication.TokenProvider"}, "properties": {"is_init_module": false, "is_package": false, "is_subpackage": false, "is_namespace_package": false, "is_namespace_subpackage": false}}} +{"id": "astrapy.admin.logger", "text": "", "metadata": {"kind": "attribute", "name": "logger", "path": "astrapy.admin.logger", "value": "logger = logging.getLogger(__name__)", "gathered_types": ["__name__"]}} +{"id": "astrapy.admin.database_id_matcher", "text": "", "metadata": {"kind": "attribute", "name": "database_id_matcher", "path": "astrapy.admin.database_id_matcher", "value": "database_id_matcher = re.compile('^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$')"}} +{"id": "astrapy.admin.api_endpoint_parser", "text": "", "metadata": {"kind": "attribute", "name": "api_endpoint_parser", "path": "astrapy.admin.api_endpoint_parser", "value": "api_endpoint_parser = re.compile('https://([0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12})-([a-z0-9\\\\-]+).apps.astra[\\\\-]{0,1}(dev|test)?.datastax.com')"}} +{"id": "astrapy.admin.api_endpoint_description", "text": "", "metadata": {"kind": "attribute", "name": "api_endpoint_description", "path": "astrapy.admin.api_endpoint_description", "value": "api_endpoint_description = 'https://<db uuid, 8-4-4-4-12 hex format>-<db region>.apps.astra.datastax.com'"}} +{"id": "astrapy.admin.generic_api_url_matcher", "text": "", "metadata": {"kind": "attribute", "name": "generic_api_url_matcher", "path": "astrapy.admin.generic_api_url_matcher", "value": "generic_api_url_matcher = re.compile('^https?:\\\\/\\\\/[a-zA-Z0-9\\\\-.]+(\\\\:[0-9]{1,6}){0,1}$')"}} +{"id": "astrapy.admin.generic_api_url_descriptor", "text": "", "metadata": {"kind": "attribute", "name": "generic_api_url_descriptor", "path": "astrapy.admin.generic_api_url_descriptor", "value": "generic_api_url_descriptor = 'http[s]://<domain name or IP>[:port]'"}} +{"id": "astrapy.admin.ParsedAPIEndpoint", "text": "The results of successfully parsing an Astra DB API endpoint, for internal\nby database metadata-related functions.", "metadata": {"kind": "class", "name": "ParsedAPIEndpoint", "path": "astrapy.admin.ParsedAPIEndpoint", "parameters": [{"name": "database_id", "type": "str"}, {"name": "region", "type": "str"}, {"name": "environment", "type": "str"}], "attributes": [{"name": "database_id", "type": "str", "description": "e. g. \"01234567-89ab-cdef-0123-456789abcdef\"."}, {"name": "region", "type": "str", "description": "a region ID, such as \"us-west1\"."}, {"name": "environment", "type": "str", "description": "a label, whose value is one of Environment.PROD,\nEnvironment.DEV or Environment.TEST."}]}} +{"id": "astrapy.admin.ParsedAPIEndpoint.database_id", "text": "", "metadata": {"kind": "attribute", "name": "database_id", "path": "astrapy.admin.ParsedAPIEndpoint.database_id", "value": "database_id: str"}} +{"id": "astrapy.admin.ParsedAPIEndpoint.region", "text": "", "metadata": {"kind": "attribute", "name": "region", "path": "astrapy.admin.ParsedAPIEndpoint.region", "value": "region: str"}} +{"id": "astrapy.admin.ParsedAPIEndpoint.environment", "text": "", "metadata": {"kind": "attribute", "name": "environment", "path": "astrapy.admin.ParsedAPIEndpoint.environment", "value": "environment: str"}} +{"id": "astrapy.admin.parse_api_endpoint", "text": "Parse an API Endpoint into a ParsedAPIEndpoint structure.", "metadata": {"kind": "function", "name": "parse_api_endpoint", "path": "astrapy.admin.parse_api_endpoint", "parameters": [{"name": "api_endpoint", "type": "str", "description": "a full API endpoint for the Data API.", "default": null}], "returns": [{"type": "ParsedAPIEndpoint | None", "description": "The parsed ParsedAPIEndpoint. If parsing fails, return None."}], "gathered_types": ["ParsedAPIEndpoint"]}} +{"id": "astrapy.admin.api_endpoint_parsing_error_message", "text": "Format an error message with a suggestion for the expected url format.", "metadata": {"kind": "function", "name": "api_endpoint_parsing_error_message", "path": "astrapy.admin.api_endpoint_parsing_error_message", "parameters": [{"name": "failing_url", "type": "str"}], "returns": [{"type": "str"}]}} +{"id": "astrapy.admin.parse_generic_api_url", "text": "Validate a generic API Endpoint string,\nsuch as `http://10.1.1.1:123` or `https://my.domain`.", "metadata": {"kind": "function", "name": "parse_generic_api_url", "path": "astrapy.admin.parse_generic_api_url", "parameters": [{"name": "api_endpoint", "type": "str", "description": "a string supposedly expressing a valid API Endpoint", "default": null}], "returns": [{"type": "str | None", "description": "a normalized (stripped) version of the endpoint if valid. If invalid,"}, {"type": "str | None", "description": "return None."}]}} +{"id": "astrapy.admin.generic_api_url_parsing_error_message", "text": "Format an error message with a suggestion for the expected url format.", "metadata": {"kind": "function", "name": "generic_api_url_parsing_error_message", "path": "astrapy.admin.generic_api_url_parsing_error_message", "parameters": [{"name": "failing_url", "type": "str"}], "returns": [{"type": "str"}]}} +{"id": "astrapy.admin.build_api_endpoint", "text": "Build the API Endpoint full strings from database parameters.", "metadata": {"kind": "function", "name": "build_api_endpoint", "path": "astrapy.admin.build_api_endpoint", "parameters": [{"name": "environment", "type": "str", "description": "a label, whose value can be Environment.PROD\nor another of Environment.* for which this operation makes sense.", "default": null}, {"name": "database_id", "type": "str", "description": "e. g. \"01234567-89ab-cdef-0123-456789abcdef\".", "default": null}, {"name": "region", "type": "str", "description": "a region ID, such as \"us-west1\".", "default": null}], "returns": [{"type": "str", "description": "the endpoint string, such as \"https://01234567-...-eu-west1.apps.datastax.com\""}]}} +{"id": "astrapy.admin.check_id_endpoint_parg_kwargs", "text": "Utility function helping with the transition to endpoint-first constructors,\nwith ID being the other possibility.\n\nIt is called with the positional argument, the api_endpoint and id kwargs: it\nthen verifies legitimacy and returns a normalized (endpoint, id) \"either\" value.\n\nNote: this uses the ID regexp to recognize IDs. Crucially, no endpoint regexp\nhere, since even non-Astra endpoints must be properly processed by this validator.", "metadata": {"kind": "function", "name": "check_id_endpoint_parg_kwargs", "path": "astrapy.admin.check_id_endpoint_parg_kwargs", "parameters": [{"name": "p_arg", "type": "str | None"}, {"name": "api_endpoint", "type": "str | None"}, {"name": "id", "type": "str | None"}], "returns": [{"type": "tuple[str | None, str | None]"}]}} +{"id": "astrapy.admin.fetch_raw_database_info_from_id_token", "text": "Fetch database information through the DevOps API and return it in\nfull, exactly like the API gives it back.", "metadata": {"kind": "function", "name": "fetch_raw_database_info_from_id_token", "path": "astrapy.admin.fetch_raw_database_info_from_id_token", "parameters": [{"name": "id", "type": "str", "description": "e. g. \"01234567-89ab-cdef-0123-456789abcdef\".", "default": null}, {"name": "token", "type": "str | None", "description": "a valid token to access the database information.", "default": null}, {"name": "environment", "type": "str", "description": "a string representing the target Data API environment.\nIt can be left unspecified for the default value of `Environment.PROD`.\nOnly Astra DB environments can be meaningfully supplied.", "value": "Environment.PROD", "default": "Environment.PROD"}, {"name": "max_time_ms", "type": "int | None", "description": "a timeout, in milliseconds, for waiting on a response.", "value": "None", "default": "None"}], "returns": [{"type": "dict[str, Any]", "description": "The full response from the DevOps API about the database."}], "gathered_types": ["astrapy.constants.Environment", "PROD"]}} +{"id": "astrapy.admin.async_fetch_raw_database_info_from_id_token", "text": "Fetch database information through the DevOps API and return it in\nfull, exactly like the API gives it back.\nAsync version of the function, for use in an asyncio context.", "metadata": {"kind": "function", "name": "async_fetch_raw_database_info_from_id_token", "path": "astrapy.admin.async_fetch_raw_database_info_from_id_token", "parameters": [{"name": "id", "type": "str", "description": "e. g. \"01234567-89ab-cdef-0123-456789abcdef\".", "default": null}, {"name": "token", "type": "str | None", "description": "a valid token to access the database information.", "default": null}, {"name": "environment", "type": "str", "description": "a string representing the target Data API environment.\nIt can be left unspecified for the default value of `Environment.PROD`.\nOnly Astra DB environments can be meaningfully supplied.", "value": "Environment.PROD", "default": "Environment.PROD"}, {"name": "max_time_ms", "type": "int | None", "description": "a timeout, in milliseconds, for waiting on a response.", "value": "None", "default": "None"}], "returns": [{"type": "dict[str, Any]", "description": "The full response from the DevOps API about the database."}], "gathered_types": ["astrapy.constants.Environment", "PROD"]}} +{"id": "astrapy.admin.fetch_database_info", "text": "Fetch database information through the DevOps API.", "metadata": {"kind": "function", "name": "fetch_database_info", "path": "astrapy.admin.fetch_database_info", "parameters": [{"name": "api_endpoint", "type": "str", "description": "a full API endpoint for the Data API.", "default": null}, {"name": "token", "type": "str | None", "description": "a valid token to access the database information.", "default": null}, {"name": "keyspace", "type": "str | None", "description": "the desired keyspace that will be used in the result.\nIf not specified, the resulting database info will show it as None.", "value": "None", "default": "None"}, {"name": "namespace", "type": "str | None", "description": "an alias for `keyspace`. *DEPRECATED*, removal in 2.0.", "value": "None", "default": "None"}, {"name": "max_time_ms", "type": "int | None", "description": "a timeout, in milliseconds, for waiting on a response.", "value": "None", "default": "None"}], "returns": [{"type": "DatabaseInfo | None", "description": "A DatabaseInfo object."}, {"type": "DatabaseInfo | None", "description": "If the API endpoint fails to be parsed, None is returned."}, {"type": "DatabaseInfo | None", "description": "For valid-looking endpoints, if something goes wrong an exception is raised."}], "gathered_types": ["astrapy.info.DatabaseInfo"]}} +{"id": "astrapy.admin.async_fetch_database_info", "text": "Fetch database information through the DevOps API.\nAsync version of the function, for use in an asyncio context.", "metadata": {"kind": "function", "name": "async_fetch_database_info", "path": "astrapy.admin.async_fetch_database_info", "parameters": [{"name": "api_endpoint", "type": "str", "description": "a full API endpoint for the Data API.", "default": null}, {"name": "token", "type": "str | None", "description": "a valid token to access the database information.", "default": null}, {"name": "keyspace", "type": "str | None", "description": "the desired keyspace that will be used in the result.\nIf not specified, the resulting database info will show it as None.", "value": "None", "default": "None"}, {"name": "namespace", "type": "str | None", "description": "an alias for `keyspace`. *DEPRECATED*, removal in 2.0.", "value": "None", "default": "None"}, {"name": "max_time_ms", "type": "int | None", "description": "a timeout, in milliseconds, for waiting on a response.", "value": "None", "default": "None"}], "returns": [{"type": "DatabaseInfo | None", "description": "A DatabaseInfo object."}, {"type": "DatabaseInfo | None", "description": "If the API endpoint fails to be parsed, None is returned."}, {"type": "DatabaseInfo | None", "description": "For valid-looking endpoints, if something goes wrong an exception is raised."}], "gathered_types": ["astrapy.info.DatabaseInfo"]}} +{"id": "astrapy.admin.normalize_region_for_id", "text": "", "metadata": {"kind": "function", "name": "normalize_region_for_id", "path": "astrapy.admin.normalize_region_for_id", "parameters": [{"name": "database_id", "type": "str"}, {"name": "token_str", "type": "str | None"}, {"name": "environment", "type": "str"}, {"name": "region_param", "type": "str | None"}, {"name": "max_time_ms", "type": "int | None"}], "returns": [{"type": "str"}]}} +{"id": "astrapy.admin.AstraDBAdmin", "text": "An \"admin\" object, able to perform administrative tasks at the databases\nlevel, such as creating, listing or dropping databases.", "metadata": {"kind": "class", "name": "AstraDBAdmin", "path": "astrapy.admin.AstraDBAdmin", "parameters": [{"name": "token", "type": "str | TokenProvider | None", "description": "an access token with enough permission to perform admin tasks.\nThis can be either a literal token string or a subclass of\n`astrapy.authentication.TokenProvider`.", "value": "None", "default": "None"}, {"name": "environment", "type": "str | None", "description": "a label, whose value is one of Environment.PROD (default),\nEnvironment.DEV or Environment.TEST.", "value": "None", "default": "None"}, {"name": "callers", "type": "Sequence[CallerType]", "description": "a list of caller identities, i.e. applications, or frameworks,\non behalf of which DevOps API calls are performed. These end up in\nthe request user-agent.\nEach caller identity is a (\"caller_name\", \"caller_version\") pair.", "value": "[]", "default": "[]"}, {"name": "caller_name", "type": "str | None", "description": "*DEPRECATED*, use `callers`. Removal 2.0. Name of the\napplication, or framework, on behalf of which the DevOps API calls\nare performed. This ends up in the request user-agent.", "value": "None", "default": "None"}, {"name": "caller_version", "type": "str | None", "description": "version of the caller. *DEPRECATED*, use `callers`. Removal 2.0.", "value": "None", "default": "None"}, {"name": "dev_ops_url", "type": "str | None", "description": "in case of custom deployments, this can be used to specify\nthe URL to the DevOps API, such as \"https://api.astra.datastax.com\".\nGenerally it can be omitted. The environment (prod/dev/...) is\ndetermined from the API Endpoint.", "value": "None", "default": "None"}, {"name": "dev_ops_api_version", "type": "str | None", "description": "this can specify a custom version of the DevOps API\n(such as \"v2\"). Generally not needed.", "value": "None", "default": "None"}], "example": ">>> from astrapy import DataAPIClient\n>>> my_client = DataAPIClient(\"AstraCS:...\")\n>>> my_astra_db_admin = my_client.get_admin()\n>>> database_list = my_astra_db_admin.list_databases()\n>>> len(database_list)\n3\n>>> database_list[2].id\n'01234567-...'\n>>> my_db_admin = my_astra_db_admin.get_database_admin(\"01234567-...\")\n>>> my_db_admin.list_keyspaces()\n['default_keyspace', 'staging_keyspace']", "references": ["astrapy.client.DataAPIClient"], "gathered_types": ["astrapy.authentication.TokenProvider", "astrapy.constants.CallerType"]}} +{"id": "astrapy.admin.AstraDBAdmin.token_provider", "text": "", "metadata": {"kind": "attribute", "name": "token_provider", "path": "astrapy.admin.AstraDBAdmin.token_provider", "value": "token_provider = coerce_token_provider(token)"}} +{"id": "astrapy.admin.AstraDBAdmin.environment", "text": "", "metadata": {"kind": "attribute", "name": "environment", "path": "astrapy.admin.AstraDBAdmin.environment", "value": "environment = environment or Environment.PROD.lower()", "gathered_types": ["astrapy.constants.Environment", "PROD"]}} +{"id": "astrapy.admin.AstraDBAdmin.dev_ops_url", "text": "", "metadata": {"kind": "attribute", "name": "dev_ops_url", "path": "astrapy.admin.AstraDBAdmin.dev_ops_url", "value": "dev_ops_url = DEV_OPS_URL_ENV_MAP[self.environment]", "gathered_types": ["astrapy.defaults.DEV_OPS_URL_ENV_MAP"]}} +{"id": "astrapy.admin.AstraDBAdmin.callers", "text": "", "metadata": {"kind": "attribute", "name": "callers", "path": "astrapy.admin.AstraDBAdmin.callers", "value": "callers = callers_param"}} +{"id": "astrapy.admin.AstraDBAdmin.with_options", "text": "Create a clone of this AstraDBAdmin with some changed attributes.", "metadata": {"kind": "function", "name": "with_options", "path": "astrapy.admin.AstraDBAdmin.with_options", "parameters": [{"name": "token", "type": "str | TokenProvider | None", "description": "an Access Token to the database. Example: `\"AstraCS:xyz...\"`.\nThis can be either a literal token string or a subclass of\n`astrapy.authentication.TokenProvider`.", "value": "None", "default": "None"}, {"name": "callers", "type": "Sequence[CallerType]", "description": "a list of caller identities, i.e. applications, or frameworks,\non behalf of which DevOps API calls are performed. These end up in\nthe request user-agent.\nEach caller identity is a (\"caller_name\", \"caller_version\") pair.", "value": "[]", "default": "[]"}, {"name": "caller_name", "type": "str | None", "description": "*DEPRECATED*, use `callers`. Removal 2.0. Name of the\napplication, or framework, on behalf of which the DevOps API calls\nare performed. This ends up in the request user-agent.", "value": "None", "default": "None"}, {"name": "caller_version", "type": "str | None", "description": "version of the caller. *DEPRECATED*, use `callers`.\nRemoval 2.0.", "value": "None", "default": "None"}], "returns": [{"type": "AstraDBAdmin", "description": "a new AstraDBAdmin instance."}], "example": ">>> another_astra_db_admin = my_astra_db_admin.with_options(\n... callers=[(\"caller_identity\", \"1.2.0\")],\n... )", "gathered_types": ["astrapy.authentication.TokenProvider", "astrapy.admin.AstraDBAdmin", "astrapy.constants.CallerType"]}} +{"id": "astrapy.admin.AstraDBAdmin.set_caller", "text": "Set a new identity for the application/framework on behalf of which\nthe DevOps API calls will be performed (the \"caller\").\n\nNew objects spawned from this client afterwards will inherit the new settings.", "metadata": {"kind": "function", "name": "set_caller", "path": "astrapy.admin.AstraDBAdmin.set_caller", "parameters": [{"name": "caller_name", "type": "str | None", "description": "name of the application, or framework, on behalf of which\nthe DevOps API calls are performed. This ends up in the request user-agent.", "value": "None", "default": "None"}, {"name": "caller_version", "type": "str | None", "description": "version of the caller.", "value": "None", "default": "None"}], "returns": [{"type": "None"}], "example": ">>> my_astra_db_admin.set_caller(\n... callers=[(\"the_caller\", \"0.1.0\")],\n... )"}} +{"id": "astrapy.admin.AstraDBAdmin.list_databases", "text": "Get the list of databases, as obtained with a request to the DevOps API.", "metadata": {"kind": "function", "name": "list_databases", "path": "astrapy.admin.AstraDBAdmin.list_databases", "parameters": [{"name": "include", "type": "str | None", "description": "a filter on what databases are to be returned. As per\nDevOps API, defaults to \"nonterminated\". Pass \"all\" to include\nthe already terminated databases.", "value": "None", "default": "None"}, {"name": "provider", "type": "str | None", "description": "a filter on the cloud provider for the databases.\nAs per DevOps API, defaults to \"ALL\". Pass e.g. \"AWS\" to\nrestrict the results.", "value": "None", "default": "None"}, {"name": "page_size", "type": "int | None", "description": "number of results per page from the DevOps API. Optional.", "value": "None", "default": "None"}, {"name": "max_time_ms", "type": "int | None", "description": "a timeout, in milliseconds, for the API request.", "value": "None", "default": "None"}], "returns": [{"type": "CommandCursor[AdminDatabaseInfo]", "description": "A CommandCursor to iterate over the detected databases,"}, {"type": "CommandCursor[AdminDatabaseInfo]", "description": "represented as AdminDatabaseInfo objects."}], "example": ">>> database_cursor = my_astra_db_admin.list_databases()\n>>> database_list = list(database_cursor)\n>>> len(database_list)\n3\n>>> database_list[2].id\n'01234567-...'\n>>> database_list[2].status\n'ACTIVE'\n>>> database_list[2].info.region\n'eu-west-1'", "gathered_types": ["astrapy.info.AdminDatabaseInfo", "astrapy.cursors.CommandCursor"]}} +{"id": "astrapy.admin.AstraDBAdmin.async_list_databases", "text": "Get the list of databases, as obtained with a request to the DevOps API.\nAsync version of the method, for use in an asyncio context.", "metadata": {"kind": "function", "name": "async_list_databases", "path": "astrapy.admin.AstraDBAdmin.async_list_databases", "parameters": [{"name": "include", "type": "str | None", "description": "a filter on what databases are to be returned. As per\nDevOps API, defaults to \"nonterminated\". Pass \"all\" to include\nthe already terminated databases.", "value": "None", "default": "None"}, {"name": "provider", "type": "str | None", "description": "a filter on the cloud provider for the databases.\nAs per DevOps API, defaults to \"ALL\". Pass e.g. \"AWS\" to\nrestrict the results.", "value": "None", "default": "None"}, {"name": "page_size", "type": "int | None", "description": "number of results per page from the DevOps API. Optional.", "value": "None", "default": "None"}, {"name": "max_time_ms", "type": "int | None", "description": "a timeout, in milliseconds, for the API request.", "value": "None", "default": "None"}], "returns": [{"type": "CommandCursor[AdminDatabaseInfo]", "description": "A CommandCursor to iterate over the detected databases,"}, {"type": "CommandCursor[AdminDatabaseInfo]", "description": "represented as AdminDatabaseInfo objects."}, {"type": "CommandCursor[AdminDatabaseInfo]", "description": "Note that the return type is not an awaitable, rather"}, {"type": "CommandCursor[AdminDatabaseInfo]", "description": "a regular iterable, e.g. for use in ordinary \"for\" loops."}], "example": ">>> async def check_if_db_exists(db_id: str) -> bool:\n... db_cursor = await my_astra_db_admin.async_list_databases()\n... db_list = list(dd_cursor)\n... return db_id in db_list\n...\n>>> asyncio.run(check_if_db_exists(\"xyz\"))\nTrue\n>>> asyncio.run(check_if_db_exists(\"01234567-...\"))\nFalse", "gathered_types": ["astrapy.info.AdminDatabaseInfo", "astrapy.cursors.CommandCursor"]}} +{"id": "astrapy.admin.AstraDBAdmin.database_info", "text": "Get the full information on a given database, through a request to the DevOps API.", "metadata": {"kind": "function", "name": "database_info", "path": "astrapy.admin.AstraDBAdmin.database_info", "parameters": [{"name": "id", "type": "str", "description": "the ID of the target database, e. g.\n\"01234567-89ab-cdef-0123-456789abcdef\".", "default": null}, {"name": "max_time_ms", "type": "int | None", "description": "a timeout, in milliseconds, for the API request.", "value": "None", "default": "None"}], "returns": [{"type": "AdminDatabaseInfo", "description": "An AdminDatabaseInfo object."}], "example": ">>> details_of_my_db = my_astra_db_admin.database_info(\"01234567-...\")\n>>> details_of_my_db.id\n'01234567-...'\n>>> details_of_my_db.status\n'ACTIVE'\n>>> details_of_my_db.info.region\n'eu-west-1'", "gathered_types": ["astrapy.info.AdminDatabaseInfo"]}} +{"id": "astrapy.admin.AstraDBAdmin.async_database_info", "text": "Get the full information on a given database, through a request to the DevOps API.\nThis is an awaitable method suitable for use within an asyncio event loop.", "metadata": {"kind": "function", "name": "async_database_info", "path": "astrapy.admin.AstraDBAdmin.async_database_info", "parameters": [{"name": "id", "type": "str", "description": "the ID of the target database, e. g.\n\"01234567-89ab-cdef-0123-456789abcdef\".", "default": null}, {"name": "max_time_ms", "type": "int | None", "description": "a timeout, in milliseconds, for the API request.", "value": "None", "default": "None"}], "returns": [{"type": "AdminDatabaseInfo", "description": "An AdminDatabaseInfo object."}], "example": ">>> async def check_if_db_active(db_id: str) -> bool:\n... db_info = await my_astra_db_admin.async_database_info(db_id)\n... return db_info.status == \"ACTIVE\"\n...\n>>> asyncio.run(check_if_db_active(\"01234567-...\"))\nTrue", "gathered_types": ["astrapy.info.AdminDatabaseInfo"]}} +{"id": "astrapy.admin.AstraDBAdmin.create_database", "text": "Create a database as requested, optionally waiting for it to be ready.", "metadata": {"kind": "function", "name": "create_database", "path": "astrapy.admin.AstraDBAdmin.create_database", "parameters": [{"name": "name", "type": "str", "description": "the desired name for the database.", "default": null}, {"name": "cloud_provider", "type": "str", "description": "one of 'aws', 'gcp' or 'azure'.", "default": null}, {"name": "region", "type": "str", "description": "any of the available cloud regions.", "default": null}, {"name": "keyspace", "type": "str | None", "description": "name for the one keyspace the database starts with.\nIf omitted, DevOps API will use its default.", "value": "None", "default": "None"}, {"name": "namespace", "type": "str | None", "description": "an alias for `keyspace`. *DEPRECATED*, removal in 2.0.", "value": "None", "default": "None"}, {"name": "wait_until_active", "type": "bool", "description": "if True (default), the method returns only after\nthe newly-created database is in ACTIVE state (a few minutes,\nusually). If False, it will return right after issuing the\ncreation request to the DevOps API, and it will be responsibility\nof the caller to check the database status before working with it.", "value": "True", "default": "True"}, {"name": "max_time_ms", "type": "int | None", "description": "a timeout, in milliseconds, for the whole requested\noperation to complete.\nNote that a timeout is no guarantee that the creation request\nhas not reached the API server.", "value": "None", "default": "None"}], "returns": [{"type": "AstraDBDatabaseAdmin", "description": "An AstraDBDatabaseAdmin instance."}], "example": ">>> my_new_db_admin = my_astra_db_admin.create_database(\n... \"new_database\",\n... cloud_provider=\"aws\",\n... region=\"ap-south-1\",\n... )\n>>> my_new_db = my_new_db_admin.get_database()\n>>> my_coll = my_new_db.create_collection(\"movies\", dimension=2)\n>>> my_coll.insert_one({\"title\": \"The Title\", \"$vector\": [0.1, 0.2]})", "gathered_types": ["astrapy.admin.AstraDBDatabaseAdmin"]}} +{"id": "astrapy.admin.AstraDBAdmin.async_create_database", "text": "Create a database as requested, optionally waiting for it to be ready.\nThis is an awaitable method suitable for use within an asyncio event loop.", "metadata": {"kind": "function", "name": "async_create_database", "path": "astrapy.admin.AstraDBAdmin.async_create_database", "parameters": [{"name": "name", "type": "str", "description": "the desired name for the database.", "default": null}, {"name": "cloud_provider", "type": "str", "description": "one of 'aws', 'gcp' or 'azure'.", "default": null}, {"name": "region", "type": "str", "description": "any of the available cloud regions.", "default": null}, {"name": "keyspace", "type": "str | None", "description": "name for the one keyspace the database starts with.\nIf omitted, DevOps API will use its default.", "value": "None", "default": "None"}, {"name": "namespace", "type": "str | None", "description": "an alias for `keyspace`. *DEPRECATED*, removal in 2.0.", "value": "None", "default": "None"}, {"name": "wait_until_active", "type": "bool", "description": "if True (default), the method returns only after\nthe newly-created database is in ACTIVE state (a few minutes,\nusually). If False, it will return right after issuing the\ncreation request to the DevOps API, and it will be responsibility\nof the caller to check the database status before working with it.", "value": "True", "default": "True"}, {"name": "max_time_ms", "type": "int | None", "description": "a timeout, in milliseconds, for the whole requested\noperation to complete.\nNote that a timeout is no guarantee that the creation request\nhas not reached the API server.", "value": "None", "default": "None"}], "returns": [{"type": "AstraDBDatabaseAdmin", "description": "An AstraDBDatabaseAdmin instance."}], "example": ">>> asyncio.run(\n... my_astra_db_admin.async_create_database(\n... \"new_database\",\n... cloud_provider=\"aws\",\n... region=\"ap-south-1\",\n.... )\n... )\nAstraDBDatabaseAdmin(id=...)", "gathered_types": ["astrapy.admin.AstraDBDatabaseAdmin"]}} +{"id": "astrapy.admin.AstraDBAdmin.drop_database", "text": "Drop a database, i.e. delete it completely and permanently with all its data.", "metadata": {"kind": "function", "name": "drop_database", "path": "astrapy.admin.AstraDBAdmin.drop_database", "parameters": [{"name": "id", "type": "str", "description": "The ID of the database to drop, e. g.\n\"01234567-89ab-cdef-0123-456789abcdef\".", "default": null}, {"name": "wait_until_active", "type": "bool", "description": "if True (default), the method returns only after\nthe database has actually been deleted (generally a few minutes).\nIf False, it will return right after issuing the\ndrop request to the DevOps API, and it will be responsibility\nof the caller to check the database status/availability\nafter that, if desired.", "value": "True", "default": "True"}, {"name": "max_time_ms", "type": "int | None", "description": "a timeout, in milliseconds, for the whole requested\noperation to complete.\nNote that a timeout is no guarantee that the deletion request\nhas not reached the API server.", "value": "None", "default": "None"}], "returns": [{"type": "dict[str, Any]", "description": "A dictionary of the form {\"ok\": 1} in case of success."}, {"type": "dict[str, Any]", "description": "Otherwise, an exception is raised."}], "example": ">>> database_list_pre = my_astra_db_admin.list_databases()\n>>> len(database_list_pre)\n3\n>>> my_astra_db_admin.drop_database(\"01234567-...\")\n{'ok': 1}\n>>> database_list_post = my_astra_db_admin.list_databases()\n>>> len(database_list_post)\n2"}} +{"id": "astrapy.admin.AstraDBAdmin.async_drop_database", "text": "Drop a database, i.e. delete it completely and permanently with all its data.\nAsync version of the method, for use in an asyncio context.", "metadata": {"kind": "function", "name": "async_drop_database", "path": "astrapy.admin.AstraDBAdmin.async_drop_database", "parameters": [{"name": "id", "type": "str", "description": "The ID of the database to drop, e. g.\n\"01234567-89ab-cdef-0123-456789abcdef\".", "default": null}, {"name": "wait_until_active", "type": "bool", "description": "if True (default), the method returns only after\nthe database has actually been deleted (generally a few minutes).\nIf False, it will return right after issuing the\ndrop request to the DevOps API, and it will be responsibility\nof the caller to check the database status/availability\nafter that, if desired.", "value": "True", "default": "True"}, {"name": "max_time_ms", "type": "int | None", "description": "a timeout, in milliseconds, for the whole requested\noperation to complete.\nNote that a timeout is no guarantee that the deletion request\nhas not reached the API server.", "value": "None", "default": "None"}], "returns": [{"type": "dict[str, Any]", "description": "A dictionary of the form {\"ok\": 1} in case of success."}, {"type": "dict[str, Any]", "description": "Otherwise, an exception is raised."}], "example": ">>> asyncio.run(\n... my_astra_db_admin.async_drop_database(\"01234567-...\")\n... )\n{'ok': 1}"}} +{"id": "astrapy.admin.AstraDBAdmin.get_database_admin", "text": "Create an AstraDBDatabaseAdmin object for admin work within a certain database.", "metadata": {"kind": "function", "name": "get_database_admin", "path": "astrapy.admin.AstraDBAdmin.get_database_admin", "parameters": [{"name": "api_endpoint_or_id", "type": "str | None", "description": "positional parameter that can stand for both\n`api_endpoint` and `id`. Passing them together is an error.", "value": "None", "default": "None"}, {"name": "api_endpoint", "type": "str | None", "description": "the API Endpoint for the target database\n(e.g. `https://<ID>-<REGION>.apps.astra.datastax.com`).\nThe database must exist already for the resulting object\nto be effectively used; in other words, this invocation\ndoes not create the database, just the object instance.", "value": "None", "default": "None"}, {"name": "id", "type": "str | None", "description": "the target database ID. This is alternative to using the API Endpoint.", "value": "None", "default": "None"}, {"name": "region", "type": "str | None", "description": "the region to use for connecting to the database. The\ndatabase must be located in that region. This parameter can be used\nonly if the database is specified by its ID (instead of API Endpoint).\nIf this parameter is not passed, and cannot be inferred\nfrom the API endpoint, an additional DevOps API request is made\nto determine the default region and use it subsequently.", "value": "None", "default": "None"}, {"name": "max_time_ms", "type": "int | None", "description": "a timeout, in milliseconds, for the DevOps API\nHTTP request should it be necessary (see the `region` argument).", "value": "None", "default": "None"}], "returns": [{"type": "AstraDBDatabaseAdmin", "description": "An AstraDBDatabaseAdmin instance representing the requested database."}], "example": ">>> my_db_admin = my_astra_db_admin.get_database_admin(\"01234567-...\")\n>>> my_db_admin.list_keyspaces()\n['default_keyspace']\n>>> my_db_admin.create_keyspace(\"that_other_one\")\n{'ok': 1}\n>>> my_db_admin.list_keyspaces()\n['default_keyspace', 'that_other_one']", "note": "This method does not perform any admin-level operation through\nthe DevOps API. For actual creation of a database, see the\n`create_database` method.", "gathered_types": ["astrapy.admin.AstraDBDatabaseAdmin"]}} +{"id": "astrapy.admin.AstraDBAdmin.get_database", "text": "Create a Database instance for a specific database, to be used\nwhen doing data-level work (such as creating/managing collections).", "metadata": {"kind": "function", "name": "get_database", "path": "astrapy.admin.AstraDBAdmin.get_database", "parameters": [{"name": "api_endpoint_or_id", "type": "str | None", "description": "positional parameter that can stand for both\n`api_endpoint` and `id`. Passing them together is an error.", "value": "None", "default": "None"}, {"name": "api_endpoint", "type": "str | None", "description": "the API Endpoint for the target database\n(e.g. `https://<ID>-<REGION>.apps.astra.datastax.com`).\nThe database must exist already for the resulting object\nto be effectively used; in other words, this invocation\ndoes not create the database, just the object instance.", "value": "None", "default": "None"}, {"name": "token", "type": "str | TokenProvider | None", "description": "if supplied, is passed to the Database instead of\nthe one set for this object.\nThis can be either a literal token string or a subclass of\n`astrapy.authentication.TokenProvider`.", "value": "None", "default": "None"}, {"name": "keyspace", "type": "str | None", "description": "used to specify a certain keyspace the resulting\nDatabase will primarily work on. If not specified, an additional\nDevOps API call reveals the default keyspace for the target database.", "value": "None", "default": "None"}, {"name": "namespace", "type": "str | None", "description": "an alias for `keyspace`. *DEPRECATED*, removal in 2.0.", "value": "None", "default": "None"}, {"name": "id", "type": "str | None", "description": "the target database ID. This is alternative to using the API Endpoint.", "value": "None", "default": "None"}, {"name": "region", "type": "str | None", "description": "the region to use for connecting to the database. The\ndatabase must be located in that region. This parameter can be used\nonly if the database is specified by its ID (instead of API Endpoint).\nIf this parameter is not passed, and cannot be inferred\nfrom the API endpoint, an additional DevOps API request is made\nto determine the default region and use it subsequently.", "value": "None", "default": "None"}, {"name": "api_path", "type": "str | None", "description": "path to append to the API Endpoint. In typical usage, this\nshould be left to its default of \"/api/json\".", "value": "None", "default": "None"}, {"name": "api_version", "type": "str | None", "description": "version specifier to append to the API path. In typical\nusage, this should be left to its default of \"v1\".", "value": "None", "default": "None"}, {"name": "max_time_ms", "type": "int | None", "description": "a timeout, in milliseconds, for the DevOps API\nHTTP request should it be necessary (see the `region` argument).", "value": "None", "default": "None"}], "returns": [{"type": "Database", "description": "A Database object ready to be used."}], "example": ">>> my_db = my_astra_db_admin.get_database(\n... \"https://<ID>-<REGION>.apps.astra.datastax.com\",\n... keyspace=\"my_prod_keyspace\",\n... )\n>>> coll = my_db.create_collection(\"movies\", dimension=2)\n>>> my_coll.insert_one({\"title\": \"The Title\", \"$vector\": [0.3, 0.4]})", "gathered_types": ["astrapy.authentication.TokenProvider", "astrapy.database.Database"]}} +{"id": "astrapy.admin.AstraDBAdmin.get_async_database", "text": "Create an AsyncDatabase instance for a specific database, to be used\nwhen doing data-level work (such as creating/managing collections).", "metadata": {"kind": "function", "name": "get_async_database", "path": "astrapy.admin.AstraDBAdmin.get_async_database", "parameters": [{"name": "api_endpoint_or_id", "type": "str | None", "description": "positional parameter that can stand for both\n`api_endpoint` and `id`. Passing them together is an error.", "value": "None", "default": "None"}, {"name": "api_endpoint", "type": "str | None", "description": "the API Endpoint for the target database\n(e.g. `https://<ID>-<REGION>.apps.astra.datastax.com`).\nThe database must exist already for the resulting object\nto be effectively used; in other words, this invocation\ndoes not create the database, just the object instance.", "value": "None", "default": "None"}, {"name": "token", "type": "str | TokenProvider | None", "description": "if supplied, is passed to the Database instead of\nthe one set for this object.\nThis can be either a literal token string or a subclass of\n`astrapy.authentication.TokenProvider`.", "value": "None", "default": "None"}, {"name": "keyspace", "type": "str | None", "description": "used to specify a certain keyspace the resulting\nAsyncDatabase will primarily work on. If not specified, an additional\nDevOps API call reveals the default keyspace for the target database.", "value": "None", "default": "None"}, {"name": "namespace", "type": "str | None", "description": "an alias for `keyspace`. *DEPRECATED*, removal in 2.0.", "value": "None", "default": "None"}, {"name": "id", "type": "str | None", "description": "the target database ID. This is alternative to using the API Endpoint.", "value": "None", "default": "None"}, {"name": "region", "type": "str | None", "description": "the region to use for connecting to the database. The\ndatabase must be located in that region. This parameter can be used\nonly if the database is specified by its ID (instead of API Endpoint).\nIf this parameter is not passed, and cannot be inferred\nfrom the API endpoint, an additional DevOps API request is made\nto determine the default region and use it subsequently.", "value": "None", "default": "None"}, {"name": "api_path", "type": "str | None", "description": "path to append to the API Endpoint. In typical usage, this\nshould be left to its default of \"/api/json\".", "value": "None", "default": "None"}, {"name": "api_version", "type": "str | None", "description": "version specifier to append to the API path. In typical\nusage, this should be left to its default of \"v1\".", "value": "None", "default": "None"}, {"name": "max_time_ms", "description": "a timeout, in milliseconds, for the DevOps API\nHTTP request should it be necessary (see the `region` argument).", "default": null}], "returns": [{"type": "AsyncDatabase", "description": "An AsyncDatabase object ready to be used."}], "example": ">>> async def create_use_collection(\n... admin: AstraDBAdmin,\n... api_endpoint: str,\n... keyspace: str,\n... ) -> None:\n... my_async_db = admin.get_async_database(\n... api_endpoint,\n... keyspace=keyspace,\n... )\n... a_coll = await my_async_db.create_collection(\"movies\", dimension=2)\n... await a_coll.insert_one(\n... {\"title\": \"The Title\", \"$vector\": [0.3, 0.4]}\n... )\n...\n>>> asyncio.run(create_use_collection(\n... my_admin,\n... \"https://<ID>-<REGION>.apps.astra.datastax.com\",\n... \"default_keyspace\",\n... ))\n>>>", "gathered_types": ["astrapy.authentication.TokenProvider", "astrapy.database.AsyncDatabase"]}} +{"id": "astrapy.admin.DatabaseAdmin", "text": "An abstract class defining the interface for a database admin object.\nThis supports generic keyspace crud, as well as spawning databases,\nwithout committing to a specific database architecture (e.g. Astra DB).", "metadata": {"kind": "class", "name": "DatabaseAdmin", "path": "astrapy.admin.DatabaseAdmin", "bases": ["ABC"], "gathered_types": ["ABC"], "implemented_by": ["astrapy.admin.AstraDBDatabaseAdmin", "astrapy.admin.DataAPIDatabaseAdmin"]}} +{"id": "astrapy.admin.DatabaseAdmin.environment", "text": "", "metadata": {"kind": "attribute", "name": "environment", "path": "astrapy.admin.DatabaseAdmin.environment", "value": "environment: str"}} +{"id": "astrapy.admin.DatabaseAdmin.spawner_database", "text": "", "metadata": {"kind": "attribute", "name": "spawner_database", "path": "astrapy.admin.DatabaseAdmin.spawner_database", "value": "spawner_database: Database | AsyncDatabase", "gathered_types": ["astrapy.database.Database", "astrapy.database.AsyncDatabase"]}} +{"id": "astrapy.admin.DatabaseAdmin.list_namespaces", "text": "Get a list of namespaces for the database.", "metadata": {"kind": "function", "name": "list_namespaces", "path": "astrapy.admin.DatabaseAdmin.list_namespaces", "parameters": [{"name": "pargs", "default": "()", "type": "Any"}, {"name": "kwargs", "default": "{}", "type": "Any"}], "returns": [{"type": "list[str]"}]}} +{"id": "astrapy.admin.DatabaseAdmin.list_keyspaces", "text": "Get a list of keyspaces for the database.", "metadata": {"kind": "function", "name": "list_keyspaces", "path": "astrapy.admin.DatabaseAdmin.list_keyspaces", "parameters": [{"name": "pargs", "default": "()", "type": "Any"}, {"name": "kwargs", "default": "{}", "type": "Any"}], "returns": [{"type": "list[str]"}]}} +{"id": "astrapy.admin.DatabaseAdmin.create_namespace", "text": "Create a namespace in the database, returning {'ok': 1} if successful.", "metadata": {"kind": "function", "name": "create_namespace", "path": "astrapy.admin.DatabaseAdmin.create_namespace", "parameters": [{"name": "name", "type": "str"}, {"name": "update_db_keyspace", "default": "None", "type": "bool | None"}, {"name": "update_db_namespace", "default": "None", "type": "bool | None"}, {"name": "kwargs", "default": "{}", "type": "Any"}], "returns": [{"type": "dict[str, Any]"}]}} +{"id": "astrapy.admin.DatabaseAdmin.create_keyspace", "text": "Create a keyspace in the database, returning {'ok': 1} if successful.", "metadata": {"kind": "function", "name": "create_keyspace", "path": "astrapy.admin.DatabaseAdmin.create_keyspace", "parameters": [{"name": "name", "type": "str"}, {"name": "update_db_keyspace", "default": "None", "type": "bool | None"}, {"name": "update_db_namespace", "default": "None", "type": "bool | None"}, {"name": "kwargs", "default": "{}", "type": "Any"}], "returns": [{"type": "dict[str, Any]"}]}} +{"id": "astrapy.admin.DatabaseAdmin.drop_namespace", "text": "Drop (delete) a namespace from the database, returning {'ok': 1} if successful.", "metadata": {"kind": "function", "name": "drop_namespace", "path": "astrapy.admin.DatabaseAdmin.drop_namespace", "parameters": [{"name": "name", "type": "str"}, {"name": "pargs", "default": "()", "type": "Any"}, {"name": "kwargs", "default": "{}", "type": "Any"}], "returns": [{"type": "dict[str, Any]"}]}} +{"id": "astrapy.admin.DatabaseAdmin.drop_keyspace", "text": "Drop (delete) a keyspace from the database, returning {'ok': 1} if successful.", "metadata": {"kind": "function", "name": "drop_keyspace", "path": "astrapy.admin.DatabaseAdmin.drop_keyspace", "parameters": [{"name": "name", "type": "str"}, {"name": "pargs", "default": "()", "type": "Any"}, {"name": "kwargs", "default": "{}", "type": "Any"}], "returns": [{"type": "dict[str, Any]"}]}} +{"id": "astrapy.admin.DatabaseAdmin.async_list_namespaces", "text": "Get a list of namespaces for the database.\n(Async version of the method.)", "metadata": {"kind": "function", "name": "async_list_namespaces", "path": "astrapy.admin.DatabaseAdmin.async_list_namespaces", "parameters": [{"name": "pargs", "default": "()", "type": "Any"}, {"name": "kwargs", "default": "{}", "type": "Any"}], "returns": [{"type": "list[str]"}]}} +{"id": "astrapy.admin.DatabaseAdmin.async_list_keyspaces", "text": "Get a list of keyspaces for the database.\n(Async version of the method.)", "metadata": {"kind": "function", "name": "async_list_keyspaces", "path": "astrapy.admin.DatabaseAdmin.async_list_keyspaces", "parameters": [{"name": "pargs", "default": "()", "type": "Any"}, {"name": "kwargs", "default": "{}", "type": "Any"}], "returns": [{"type": "list[str]"}]}} +{"id": "astrapy.admin.DatabaseAdmin.async_create_namespace", "text": "Create a namespace in the database, returning {'ok': 1} if successful.\n(Async version of the method.)", "metadata": {"kind": "function", "name": "async_create_namespace", "path": "astrapy.admin.DatabaseAdmin.async_create_namespace", "parameters": [{"name": "name", "type": "str"}, {"name": "update_db_keyspace", "default": "None", "type": "bool | None"}, {"name": "update_db_namespace", "default": "None", "type": "bool | None"}, {"name": "kwargs", "default": "{}", "type": "Any"}], "returns": [{"type": "dict[str, Any]"}]}} +{"id": "astrapy.admin.DatabaseAdmin.async_create_keyspace", "text": "Create a keyspace in the database, returning {'ok': 1} if successful.\n(Async version of the method.)", "metadata": {"kind": "function", "name": "async_create_keyspace", "path": "astrapy.admin.DatabaseAdmin.async_create_keyspace", "parameters": [{"name": "name", "type": "str"}, {"name": "update_db_keyspace", "default": "None", "type": "bool | None"}, {"name": "update_db_namespace", "default": "None", "type": "bool | None"}, {"name": "kwargs", "default": "{}", "type": "Any"}], "returns": [{"type": "dict[str, Any]"}]}} +{"id": "astrapy.admin.DatabaseAdmin.async_drop_namespace", "text": "Drop (delete) a namespace from the database, returning {'ok': 1} if successful.\n(Async version of the method.)", "metadata": {"kind": "function", "name": "async_drop_namespace", "path": "astrapy.admin.DatabaseAdmin.async_drop_namespace", "parameters": [{"name": "name", "type": "str"}, {"name": "pargs", "default": "()", "type": "Any"}, {"name": "kwargs", "default": "{}", "type": "Any"}], "returns": [{"type": "dict[str, Any]"}]}} +{"id": "astrapy.admin.DatabaseAdmin.async_drop_keyspace", "text": "Drop (delete) a keyspace from the database, returning {'ok': 1} if successful.\n(Async version of the method.)", "metadata": {"kind": "function", "name": "async_drop_keyspace", "path": "astrapy.admin.DatabaseAdmin.async_drop_keyspace", "parameters": [{"name": "name", "type": "str"}, {"name": "pargs", "default": "()", "type": "Any"}, {"name": "kwargs", "default": "{}", "type": "Any"}], "returns": [{"type": "dict[str, Any]"}]}} +{"id": "astrapy.admin.DatabaseAdmin.get_database", "text": "Get a Database object from this database admin.", "metadata": {"kind": "function", "name": "get_database", "path": "astrapy.admin.DatabaseAdmin.get_database", "parameters": [{"name": "pargs", "default": "()", "type": "Any"}, {"name": "kwargs", "default": "{}", "type": "Any"}], "returns": [{"type": "Database"}], "gathered_types": ["astrapy.database.Database"]}} +{"id": "astrapy.admin.DatabaseAdmin.get_async_database", "text": "Get an AsyncDatabase object from this database admin.", "metadata": {"kind": "function", "name": "get_async_database", "path": "astrapy.admin.DatabaseAdmin.get_async_database", "parameters": [{"name": "pargs", "default": "()", "type": "Any"}, {"name": "kwargs", "default": "{}", "type": "Any"}], "returns": [{"type": "AsyncDatabase"}], "gathered_types": ["astrapy.database.AsyncDatabase"]}} +{"id": "astrapy.admin.DatabaseAdmin.find_embedding_providers", "text": "Query the Data API for the available embedding providers.", "metadata": {"kind": "function", "name": "find_embedding_providers", "path": "astrapy.admin.DatabaseAdmin.find_embedding_providers", "parameters": [{"name": "pargs", "default": "()", "type": "Any"}, {"name": "kwargs", "default": "{}", "type": "Any"}], "returns": [{"type": "FindEmbeddingProvidersResult"}], "gathered_types": ["astrapy.info.FindEmbeddingProvidersResult"]}} +{"id": "astrapy.admin.DatabaseAdmin.async_find_embedding_providers", "text": "Query the Data API for the available embedding providers.\n(Async version of the method.)", "metadata": {"kind": "function", "name": "async_find_embedding_providers", "path": "astrapy.admin.DatabaseAdmin.async_find_embedding_providers", "parameters": [{"name": "pargs", "default": "()", "type": "Any"}, {"name": "kwargs", "default": "{}", "type": "Any"}], "returns": [{"type": "FindEmbeddingProvidersResult"}], "gathered_types": ["astrapy.info.FindEmbeddingProvidersResult"]}} +{"id": "astrapy.admin.AstraDBDatabaseAdmin", "text": "An \"admin\" object, able to perform administrative tasks at the keyspaces level\n(i.e. within a certain database), such as creating/listing/dropping keyspaces.\n\nThis is one layer below the AstraDBAdmin concept, in that it is tied to\na single database and enables admin work within it. As such, it is generally\ncreated by a method call on an AstraDBAdmin.", "metadata": {"kind": "class", "name": "AstraDBDatabaseAdmin", "path": "astrapy.admin.AstraDBDatabaseAdmin", "parameters": [{"name": "api_endpoint", "type": "str", "description": "the API Endpoint for the target database\n(e.g. `https://<ID>-<REGION>.apps.astra.datastax.com`).\nThe database must exist already for the resulting object\nto be effectively used; in other words, this invocation\ndoes not create the database, just the object instance.", "default": null}, {"name": "token", "type": "str | TokenProvider | None", "description": "an access token with enough permission to perform admin tasks.\nThis can be either a literal token string or a subclass of\n`astrapy.authentication.TokenProvider`.", "value": "None", "default": "None"}, {"name": "environment", "type": "str | None", "description": "a label, whose value is one of Environment.PROD (default),\nEnvironment.DEV or Environment.TEST.", "value": "None", "default": "None"}, {"name": "callers", "type": "Sequence[CallerType]", "description": "a list of caller identities, i.e. applications, or frameworks,\non behalf of which Data API and DevOps API calls are performed.\nThese end up in the request user-agent.\nEach caller identity is a (\"caller_name\", \"caller_version\") pair.", "value": "[]", "default": "[]"}, {"name": "caller_name", "type": "str | None", "description": "*DEPRECATED*, use `callers`. Removal 2.0. Name of the\napplication, or framework, on behalf of which the Data API and\nDevOps API calls are performed. This ends up in the request user-agent.", "value": "None", "default": "None"}, {"name": "caller_version", "type": "str | None", "description": "version of the caller. *DEPRECATED*, use `callers`. Removal 2.0.", "value": "None", "default": "None"}, {"name": "dev_ops_url", "type": "str | None", "description": "in case of custom deployments, this can be used to specify\nthe URL to the DevOps API, such as \"https://api.astra.datastax.com\".\nGenerally it can be omitted. The environment (prod/dev/...) is\ndetermined from the API Endpoint.", "value": "None", "default": "None"}, {"name": "dev_ops_api_version", "type": "str | None", "description": "this can specify a custom version of the DevOps API\n(such as \"v2\"). Generally not needed.", "value": "None", "default": "None"}, {"name": "api_path", "type": "str | None", "description": "path to append to the API Endpoint. In typical usage, this\nclass is created by a method such as `Database.get_database_admin()`,\nwhich passes the matching value. Generally to be left to its Astra DB\ndefault of \"/api/json\".", "value": "None", "default": "None"}, {"name": "api_version", "type": "str | None", "description": "version specifier to append to the API path. In typical\nusage, this class is created by a method such as\n`Database.get_database_admin()`, which passes the matching value.\nGenerally to be left to its Astra DB default of \"/v1\".", "value": "None", "default": "None"}, {"name": "spawner_database", "type": "Database | AsyncDatabase | None", "description": "either a Database or an AsyncDatabase instance. This represents\nthe database class which spawns this admin object, so that, if required,\na keyspace creation can retroactively \"use\" the new keyspace in the spawner.\nUsed to enable the Async/Database.get_admin_database().create_keyspace() pattern.", "value": "None", "default": "None"}, {"name": "max_time_ms", "type": "int | None", "description": "a timeout, in milliseconds, for the DevOps API\nHTTP request should it be necessary (see the `region` argument).", "value": "None", "default": "None"}], "bases": ["astrapy.admin.DatabaseAdmin"], "example": ">>> from astrapy import DataAPIClient\n>>> my_client = DataAPIClient(\"AstraCS:...\")\n>>> admin_for_my_db = my_client.get_admin().get_database_admin(\n... \"https://<ID>-<REGION>.apps.astra.datastax.com\"\n... )\n>>> admin_for_my_db.list_keyspaces()\n['default_keyspace', 'staging_keyspace']\n>>> admin_for_my_db.info().status\n'ACTIVE'", "note": "creating an instance of AstraDBDatabaseAdmin does not trigger actual creation\nof the database itself, which should exist beforehand. To create databases,\nsee the AstraDBAdmin class.", "references": ["astrapy.client.DataAPIClient"], "gathered_types": ["astrapy.constants.CallerType", "astrapy.admin.DatabaseAdmin", "astrapy.database.AsyncDatabase", "astrapy.authentication.TokenProvider", "astrapy.database.Database"]}} +{"id": "astrapy.admin.AstraDBDatabaseAdmin.token_provider", "text": "", "metadata": {"kind": "attribute", "name": "token_provider", "path": "astrapy.admin.AstraDBDatabaseAdmin.token_provider", "value": "token_provider = coerce_token_provider(token)"}} +{"id": "astrapy.admin.AstraDBDatabaseAdmin.environment", "text": "", "metadata": {"kind": "attribute", "name": "environment", "path": "astrapy.admin.AstraDBDatabaseAdmin.environment", "value": "environment = environment or Environment.PROD.lower()", "gathered_types": ["astrapy.constants.Environment", "PROD"]}} +{"id": "astrapy.admin.AstraDBDatabaseAdmin.api_endpoint", "text": "", "metadata": {"kind": "attribute", "name": "api_endpoint", "path": "astrapy.admin.AstraDBDatabaseAdmin.api_endpoint", "value": "api_endpoint = api_endpoint"}} +{"id": "astrapy.admin.AstraDBDatabaseAdmin.callers", "text": "", "metadata": {"kind": "attribute", "name": "callers", "path": "astrapy.admin.AstraDBDatabaseAdmin.callers", "value": "callers = callers_param"}} +{"id": "astrapy.admin.AstraDBDatabaseAdmin.api_path", "text": "", "metadata": {"kind": "attribute", "name": "api_path", "path": "astrapy.admin.AstraDBDatabaseAdmin.api_path", "value": "api_path = api_path if api_path is not None else API_PATH_ENV_MAP[self.environment]", "gathered_types": ["astrapy.defaults.API_PATH_ENV_MAP"]}} +{"id": "astrapy.admin.AstraDBDatabaseAdmin.api_version", "text": "", "metadata": {"kind": "attribute", "name": "api_version", "path": "astrapy.admin.AstraDBDatabaseAdmin.api_version", "value": "api_version = api_version if api_version is not None else API_VERSION_ENV_MAP[self.environment]", "gathered_types": ["astrapy.defaults.API_VERSION_ENV_MAP"]}} +{"id": "astrapy.admin.AstraDBDatabaseAdmin.spawner_database", "text": "", "metadata": {"kind": "attribute", "name": "spawner_database", "path": "astrapy.admin.AstraDBDatabaseAdmin.spawner_database", "value": "spawner_database = spawner_database"}} +{"id": "astrapy.admin.AstraDBDatabaseAdmin.dev_ops_url", "text": "", "metadata": {"kind": "attribute", "name": "dev_ops_url", "path": "astrapy.admin.AstraDBDatabaseAdmin.dev_ops_url", "value": "dev_ops_url = dev_ops_url if dev_ops_url is not None else DEV_OPS_URL_ENV_MAP[self.environment].rstrip('/')", "gathered_types": ["astrapy.defaults.DEV_OPS_URL_ENV_MAP"]}} +{"id": "astrapy.admin.AstraDBDatabaseAdmin.dev_ops_api_version", "text": "", "metadata": {"kind": "attribute", "name": "dev_ops_api_version", "path": "astrapy.admin.AstraDBDatabaseAdmin.dev_ops_api_version", "value": "dev_ops_api_version = dev_ops_api_version if dev_ops_api_version is not None else DEV_OPS_VERSION_ENV_MAP[self.environment].strip('/')", "gathered_types": ["astrapy.defaults.DEV_OPS_VERSION_ENV_MAP"]}} +{"id": "astrapy.admin.AstraDBDatabaseAdmin.with_options", "text": "Create a clone of this AstraDBDatabaseAdmin with some changed attributes.", "metadata": {"kind": "function", "name": "with_options", "path": "astrapy.admin.AstraDBDatabaseAdmin.with_options", "parameters": [{"name": "api_endpoint", "type": "str | None", "description": "the API Endpoint for the target database\n(e.g. `https://<ID>-<REGION>.apps.astra.datastax.com`).\nThe database must exist already for the resulting object\nto be effectively used; in other words, this invocation\ndoes not create the database, just the object instance.", "value": "None", "default": "None"}, {"name": "token", "type": "str | TokenProvider | None", "description": "an Access Token to the database. Example: `\"AstraCS:xyz...\"`.\nThis can be either a literal token string or a subclass of\n`astrapy.authentication.TokenProvider`.", "value": "None", "default": "None"}, {"name": "callers", "type": "Sequence[CallerType]", "description": "a list of caller identities, i.e. applications, or frameworks,\non behalf of which Data API and DevOps API calls are performed.\nThese end up in the request user-agent.\nEach caller identity is a (\"caller_name\", \"caller_version\") pair.", "value": "[]", "default": "[]"}, {"name": "caller_name", "type": "str | None", "description": "*DEPRECATED*, use `callers`. Removal 2.0. Name of the\napplication, or framework, on behalf of which the Data API and\nDevOps API calls are performed. This ends up in the request user-agent.", "value": "None", "default": "None"}, {"name": "caller_version", "type": "str | None", "description": "version of the caller. *DEPRECATED*, use `callers`.\nRemoval 2.0.", "value": "None", "default": "None"}], "returns": [{"type": "AstraDBDatabaseAdmin", "description": "a new AstraDBDatabaseAdmin instance."}], "example": ">>> admin_for_my_other_db = admin_for_my_db.with_options(\n... \"https://<ID>-<REGION>.apps.astra.datastax.com\",\n... )", "gathered_types": ["astrapy.authentication.TokenProvider", "astrapy.constants.CallerType", "astrapy.admin.AstraDBDatabaseAdmin"]}} +{"id": "astrapy.admin.AstraDBDatabaseAdmin.set_caller", "text": "Set a new identity for the application/framework on behalf of which\nthe DevOps API calls will be performed (the \"caller\").\n\nNew objects spawned from this client afterwards will inherit the new settings.", "metadata": {"kind": "function", "name": "set_caller", "path": "astrapy.admin.AstraDBDatabaseAdmin.set_caller", "parameters": [{"name": "caller_name", "type": "str | None", "description": "name of the application, or framework, on behalf of which\nthe DevOps API calls are performed. This ends up in the request user-agent.", "value": "None", "default": "None"}, {"name": "caller_version", "type": "str | None", "description": "version of the caller.", "value": "None", "default": "None"}], "returns": [{"type": "None"}], "example": ">>> admin_for_my_db.set_caller(\n... caller_name=\"the_caller\",\n... caller_version=\"0.1.0\",\n... )"}} +{"id": "astrapy.admin.AstraDBDatabaseAdmin.id", "text": "The ID of this database admin.", "metadata": {"kind": "attribute", "name": "id", "path": "astrapy.admin.AstraDBDatabaseAdmin.id", "value": "id: str", "example": ">>> my_db_admin.id\n'01234567-89ab-cdef-0123-456789abcdef'"}} +{"id": "astrapy.admin.AstraDBDatabaseAdmin.region", "text": "The region for this database admin.", "metadata": {"kind": "attribute", "name": "region", "path": "astrapy.admin.AstraDBDatabaseAdmin.region", "value": "region: str", "example": ">>> my_db_admin.region\n'us-east-1'"}} +{"id": "astrapy.admin.AstraDBDatabaseAdmin.from_astra_db_admin", "text": "Create an AstraDBDatabaseAdmin from an AstraDBAdmin and an API Endpoint.", "metadata": {"kind": "function", "name": "from_astra_db_admin", "path": "astrapy.admin.AstraDBDatabaseAdmin.from_astra_db_admin", "parameters": [{"name": "api_endpoint", "type": "str", "description": "the API Endpoint for the target database\n(e.g. `https://<ID>-<REGION>.apps.astra.datastax.com`).\nThe database must exist already for the resulting object\nto be effectively used; in other words, this invocation\ndoes not create the database, just the object instance.", "default": null}, {"name": "astra_db_admin", "type": "AstraDBAdmin", "description": "an AstraDBAdmin object that has visibility over\nthe target database.", "default": null}, {"name": "max_time_ms", "type": "int | None", "description": "a timeout, in milliseconds, for the DevOps API\nHTTP request should it be necessary (see the `region` argument).", "value": "None", "default": "None"}], "returns": [{"type": "AstraDBDatabaseAdmin", "description": "An AstraDBDatabaseAdmin object, for admin work within the database."}], "example": ">>> from astrapy import DataAPIClient, AstraDBDatabaseAdmin\n>>> admin_for_my_db = AstraDBDatabaseAdmin.from_astra_db_admin(\n... \"https://<ID>-<REGION>.apps.astra.datastax.com\",\n... astra_db_admin=DataAPIClient(\"AstraCS:...\").get_admin(),\n... )\n>>> admin_for_my_db.list_keyspaces()\n['default_keyspace', 'staging_keyspace']\n>>> admin_for_my_db.info().status\n'ACTIVE'", "note": "Creating an instance of AstraDBDatabaseAdmin does not trigger actual creation\nof the database itself, which should exist beforehand. To create databases,\nsee the AstraDBAdmin class.", "references": ["astrapy.admin.AstraDBDatabaseAdmin", "astrapy.client.DataAPIClient"], "gathered_types": ["astrapy.admin.AstraDBAdmin", "astrapy.admin.AstraDBDatabaseAdmin"]}} +{"id": "astrapy.admin.AstraDBDatabaseAdmin.from_api_endpoint", "text": "Create an AstraDBDatabaseAdmin from an API Endpoint and optionally a token.", "metadata": {"kind": "function", "name": "from_api_endpoint", "path": "astrapy.admin.AstraDBDatabaseAdmin.from_api_endpoint", "parameters": [{"name": "api_endpoint", "type": "str", "description": "the API Endpoint for the target database\n(e.g. `https://<ID>-<REGION>.apps.astra.datastax.com`).\nThe database must exist already for the resulting object\nto be effectively used; in other words, this invocation\ndoes not create the database, just the object instance.", "default": null}, {"name": "token", "type": "str | TokenProvider | None", "description": "an access token with enough permissions to do admin work.\nThis can be either a literal token string or a subclass of\n`astrapy.authentication.TokenProvider`.", "value": "None", "default": "None"}, {"name": "callers", "type": "Sequence[CallerType]", "description": "a list of caller identities, i.e. applications, or frameworks,\non behalf of which Data API and DevOps API calls are performed.\nThese end up in the request user-agent.\nEach caller identity is a (\"caller_name\", \"caller_version\") pair.", "value": "[]", "default": "[]"}, {"name": "caller_name", "type": "str | None", "description": "*DEPRECATED*, use `callers`. Removal 2.0. Name of the\napplication, or framework, on behalf of which the Data API and\nDevOps API calls are performed. This ends up in the request user-agent.", "value": "None", "default": "None"}, {"name": "caller_version", "type": "str | None", "description": "version of the caller. *DEPRECATED*, use `callers`.\nRemoval 2.0.", "value": "None", "default": "None"}, {"name": "dev_ops_url", "type": "str | None", "description": "in case of custom deployments, this can be used to specify\nthe URL to the DevOps API, such as \"https://api.astra.datastax.com\".\nGenerally it can be omitted. The environment (prod/dev/...) is\ndetermined from the API Endpoint.", "value": "None", "default": "None"}, {"name": "dev_ops_api_version", "type": "str | None", "description": "this can specify a custom version of the DevOps API\n(such as \"v2\"). Generally not needed.", "value": "None", "default": "None"}], "returns": [{"type": "AstraDBDatabaseAdmin", "description": "An AstraDBDatabaseAdmin object, for admin work within the database."}], "example": ">>> from astrapy import AstraDBDatabaseAdmin\n>>> admin_for_my_db = AstraDBDatabaseAdmin.from_api_endpoint(\n... api_endpoint=\"https://01234567-....apps.astra.datastax.com\",\n... token=\"AstraCS:...\",\n... )\n>>> admin_for_my_db.list_keyspaces()\n['default_keyspace', 'another_keyspace']\n>>> admin_for_my_db.info().status\n'ACTIVE'", "note": "Creating an instance of AstraDBDatabaseAdmin does not trigger actual creation\nof the database itself, which should exist beforehand. To create databases,\nsee the AstraDBAdmin class.", "references": ["astrapy.admin.AstraDBDatabaseAdmin"], "gathered_types": ["astrapy.authentication.TokenProvider", "astrapy.constants.CallerType", "astrapy.admin.AstraDBDatabaseAdmin"]}} +{"id": "astrapy.admin.AstraDBDatabaseAdmin.info", "text": "Query the DevOps API for the full info on this database.", "metadata": {"kind": "function", "name": "info", "path": "astrapy.admin.AstraDBDatabaseAdmin.info", "parameters": [{"name": "max_time_ms", "type": "int | None", "description": "a timeout, in milliseconds, for the DevOps API request.", "value": "None", "default": "None"}], "returns": [{"type": "AdminDatabaseInfo", "description": "An AdminDatabaseInfo object."}], "example": ">>> my_db_info = admin_for_my_db.info()\n>>> my_db_info.status\n'ACTIVE'\n>>> my_db_info.info.region\n'us-east1'", "gathered_types": ["astrapy.info.AdminDatabaseInfo"]}} +{"id": "astrapy.admin.AstraDBDatabaseAdmin.async_info", "text": "Query the DevOps API for the full info on this database.\nAsync version of the method, for use in an asyncio context.", "metadata": {"kind": "function", "name": "async_info", "path": "astrapy.admin.AstraDBDatabaseAdmin.async_info", "parameters": [{"name": "max_time_ms", "type": "int | None", "description": "a timeout, in milliseconds, for the DevOps API request.", "value": "None", "default": "None"}], "returns": [{"type": "AdminDatabaseInfo", "description": "An AdminDatabaseInfo object."}], "example": ">>> async def wait_until_active(db_admin: AstraDBDatabaseAdmin) -> None:\n... while True:\n... info = await db_admin.async_info()\n... if info.status == \"ACTIVE\":\n... return\n...\n>>> asyncio.run(wait_until_active(admin_for_my_db))", "gathered_types": ["astrapy.info.AdminDatabaseInfo"]}} +{"id": "astrapy.admin.AstraDBDatabaseAdmin.list_namespaces", "text": "Query the DevOps API for a list of the namespaces in the database.\n\n*DEPRECATED* (removal in 2.0). Switch to the \"list_keyspaces\" method.**", "metadata": {"kind": "function", "name": "list_namespaces", "path": "astrapy.admin.AstraDBDatabaseAdmin.list_namespaces", "parameters": [{"name": "max_time_ms", "type": "int | None", "description": "a timeout, in milliseconds, for the DevOps API request.", "value": "None", "default": "None"}], "returns": [{"type": "list[str]", "description": "A list of the namespaces, each a string, in no particular order."}], "example": ">>> admin_for_my_db.list_namespaces()\n['default_keyspace', 'staging_namespace']"}} +{"id": "astrapy.admin.AstraDBDatabaseAdmin.list_keyspaces", "text": "Query the DevOps API for a list of the keyspaces in the database.", "metadata": {"kind": "function", "name": "list_keyspaces", "path": "astrapy.admin.AstraDBDatabaseAdmin.list_keyspaces", "parameters": [{"name": "max_time_ms", "type": "int | None", "description": "a timeout, in milliseconds, for the DevOps API request.", "value": "None", "default": "None"}], "returns": [{"type": "list[str]", "description": "A list of the keyspaces, each a string, in no particular order."}], "example": ">>> admin_for_my_db.list_keyspaces()\n['default_keyspace', 'staging_keyspace']"}} +{"id": "astrapy.admin.AstraDBDatabaseAdmin.async_list_namespaces", "text": "Query the DevOps API for a list of the namespaces in the database.\nAsync version of the method, for use in an asyncio context.\n\n*DEPRECATED* (removal in 2.0). Switch to the \"keyspace\" property.**", "metadata": {"kind": "function", "name": "async_list_namespaces", "path": "astrapy.admin.AstraDBDatabaseAdmin.async_list_namespaces", "parameters": [{"name": "max_time_ms", "type": "int | None", "description": "a timeout, in milliseconds, for the DevOps API request.", "value": "None", "default": "None"}], "returns": [{"type": "list[str]", "description": "A list of the namespaces, each a string, in no particular order."}], "example": ">>> async def check_if_ns_exists(\n... db_admin: AstraDBDatabaseAdmin, namespace: str\n... ) -> bool:\n... ns_list = await db_admin.async_list_namespaces()\n... return namespace in ns_list\n...\n>>> asyncio.run(check_if_ns_exists(admin_for_my_db, \"dragons\"))\nFalse\n>>> asyncio.run(check_if_db_exists(admin_for_my_db, \"app_namespace\"))\nTrue"}} +{"id": "astrapy.admin.AstraDBDatabaseAdmin.async_list_keyspaces", "text": "Query the DevOps API for a list of the keyspaces in the database.\nAsync version of the method, for use in an asyncio context.", "metadata": {"kind": "function", "name": "async_list_keyspaces", "path": "astrapy.admin.AstraDBDatabaseAdmin.async_list_keyspaces", "parameters": [{"name": "max_time_ms", "type": "int | None", "description": "a timeout, in milliseconds, for the DevOps API request.", "value": "None", "default": "None"}], "returns": [{"type": "list[str]", "description": "A list of the keyspaces, each a string, in no particular order."}], "example": ">>> async def check_if_ks_exists(\n... db_admin: AstraDBDatabaseAdmin, keyspace: str\n... ) -> bool:\n... ks_list = await db_admin.async_list_keyspaces()\n... return keyspace in ks_list\n...\n>>> asyncio.run(check_if_ks_exists(admin_for_my_db, \"dragons\"))\nFalse\n>>> asyncio.run(check_if_db_exists(admin_for_my_db, \"app_keyspace\"))\nTrue"}} +{"id": "astrapy.admin.AstraDBDatabaseAdmin.create_namespace", "text": "Create a namespace in this database as requested,\noptionally waiting for it to be ready.\n\n*DEPRECATED* (removal in 2.0). Switch to the \"keyspace\" property.**", "metadata": {"kind": "function", "name": "create_namespace", "path": "astrapy.admin.AstraDBDatabaseAdmin.create_namespace", "parameters": [{"name": "name", "type": "str", "description": "the namespace name. If supplying a namespace that exists\nalready, the method call proceeds as usual, no errors are\nraised, and the whole invocation is a no-op.", "default": null}, {"name": "wait_until_active", "type": "bool", "description": "if True (default), the method returns only after\nthe target database is in ACTIVE state again (a few\nseconds, usually). If False, it will return right after issuing the\ncreation request to the DevOps API, and it will be responsibility\nof the caller to check the database status/namespace availability\nbefore working with it.", "value": "True", "default": "True"}, {"name": "update_db_keyspace", "type": "bool | None", "description": "if True, the `Database` or `AsyncDatabase` class\nthat spawned this DatabaseAdmin, if any, gets updated to work on\nthe newly-created keyspace starting when this method returns.", "value": "None", "default": "None"}, {"name": "update_db_namespace", "type": "bool | None", "description": "an alias for update_db_keyspace.\n*DEPRECATED* as of v1.5.0, removal in v2.0.0.", "value": "None", "default": "None"}, {"name": "max_time_ms", "type": "int | None", "description": "a timeout, in milliseconds, for the whole requested\noperation to complete.\nNote that a timeout is no guarantee that the creation request\nhas not reached the API server.", "value": "None", "default": "None"}], "returns": [{"type": "dict[str, Any]", "description": "A dictionary of the form {\"ok\": 1} in case of success."}, {"type": "dict[str, Any]", "description": "Otherwise, an exception is raised."}], "example": ">>> my_db_admin.list_namespaces()\n['default_keyspace']\n>>> my_db_admin.create_namespace(\"that_other_one\")\n{'ok': 1}\n>>> my_db_admin.list_namespaces()\n['default_keyspace', 'that_other_one']"}} +{"id": "astrapy.admin.AstraDBDatabaseAdmin.create_keyspace", "text": "Create a keyspace in this database as requested,\noptionally waiting for it to be ready.", "metadata": {"kind": "function", "name": "create_keyspace", "path": "astrapy.admin.AstraDBDatabaseAdmin.create_keyspace", "parameters": [{"name": "name", "type": "str", "description": "the keyspace name. If supplying a keyspace that exists\nalready, the method call proceeds as usual, no errors are\nraised, and the whole invocation is a no-op.", "default": null}, {"name": "wait_until_active", "type": "bool", "description": "if True (default), the method returns only after\nthe target database is in ACTIVE state again (a few\nseconds, usually). If False, it will return right after issuing the\ncreation request to the DevOps API, and it will be responsibility\nof the caller to check the database status/keyspace availability\nbefore working with it.", "value": "True", "default": "True"}, {"name": "update_db_keyspace", "type": "bool | None", "description": "if True, the `Database` or `AsyncDatabase` class\nthat spawned this DatabaseAdmin, if any, gets updated to work on\nthe newly-created keyspace starting when this method returns.", "value": "None", "default": "None"}, {"name": "update_db_namespace", "type": "bool | None", "description": "an alias for update_db_keyspace.\n*DEPRECATED* as of v1.5.0, removal in v2.0.0.", "value": "None", "default": "None"}, {"name": "max_time_ms", "type": "int | None", "description": "a timeout, in milliseconds, for the whole requested\noperation to complete.\nNote that a timeout is no guarantee that the creation request\nhas not reached the API server.", "value": "None", "default": "None"}], "returns": [{"type": "dict[str, Any]", "description": "A dictionary of the form {\"ok\": 1} in case of success."}, {"type": "dict[str, Any]", "description": "Otherwise, an exception is raised."}], "example": ">>> my_db_admin.keyspaces()\n['default_keyspace']\n>>> my_db_admin.create_keyspace(\"that_other_one\")\n{'ok': 1}\n>>> my_db_admin.list_keyspaces()\n['default_keyspace', 'that_other_one']"}} +{"id": "astrapy.admin.AstraDBDatabaseAdmin.async_create_namespace", "text": "Create a namespace in this database as requested,\noptionally waiting for it to be ready.\nAsync version of the method, for use in an asyncio context.\n\n*DEPRECATED* (removal in 2.0). Switch to the \"async_create_keyspace\" method.**", "metadata": {"kind": "function", "name": "async_create_namespace", "path": "astrapy.admin.AstraDBDatabaseAdmin.async_create_namespace", "parameters": [{"name": "name", "type": "str", "description": "the namespace name. If supplying a namespace that exists\nalready, the method call proceeds as usual, no errors are\nraised, and the whole invocation is a no-op.", "default": null}, {"name": "wait_until_active", "type": "bool", "description": "if True (default), the method returns only after\nthe target database is in ACTIVE state again (a few\nseconds, usually). If False, it will return right after issuing the\ncreation request to the DevOps API, and it will be responsibility\nof the caller to check the database status/namespace availability\nbefore working with it.", "value": "True", "default": "True"}, {"name": "update_db_keyspace", "type": "bool | None", "description": "if True, the `Database` or `AsyncDatabase` class\nthat spawned this DatabaseAdmin, if any, gets updated to work on\nthe newly-created keyspace starting when this method returns.", "value": "None", "default": "None"}, {"name": "update_db_namespace", "type": "bool | None", "description": "an alias for update_db_keyspace.\n*DEPRECATED* as of v1.5.0, removal in v2.0.0.", "value": "None", "default": "None"}, {"name": "max_time_ms", "type": "int | None", "description": "a timeout, in milliseconds, for the whole requested\noperation to complete.\nNote that a timeout is no guarantee that the creation request\nhas not reached the API server.", "value": "None", "default": "None"}], "returns": [{"type": "dict[str, Any]", "description": "A dictionary of the form {\"ok\": 1} in case of success."}, {"type": "dict[str, Any]", "description": "Otherwise, an exception is raised."}], "example": ">>> asyncio.run(\n... my_db_admin.async_create_namespace(\"app_namespace\")\n... )\n{'ok': 1}"}} +{"id": "astrapy.admin.AstraDBDatabaseAdmin.async_create_keyspace", "text": "Create a keyspace in this database as requested,\noptionally waiting for it to be ready.\nAsync version of the method, for use in an asyncio context.", "metadata": {"kind": "function", "name": "async_create_keyspace", "path": "astrapy.admin.AstraDBDatabaseAdmin.async_create_keyspace", "parameters": [{"name": "name", "type": "str", "description": "the keyspace name. If supplying a keyspace that exists\nalready, the method call proceeds as usual, no errors are\nraised, and the whole invocation is a no-op.", "default": null}, {"name": "wait_until_active", "type": "bool", "description": "if True (default), the method returns only after\nthe target database is in ACTIVE state again (a few\nseconds, usually). If False, it will return right after issuing the\ncreation request to the DevOps API, and it will be responsibility\nof the caller to check the database status/keyspace availability\nbefore working with it.", "value": "True", "default": "True"}, {"name": "update_db_keyspace", "type": "bool | None", "description": "if True, the `Database` or `AsyncDatabase` class\nthat spawned this DatabaseAdmin, if any, gets updated to work on\nthe newly-created keyspace starting when this method returns.", "value": "None", "default": "None"}, {"name": "update_db_namespace", "type": "bool | None", "description": "an alias for update_db_keyspace.\n*DEPRECATED* as of v1.5.0, removal in v2.0.0.", "value": "None", "default": "None"}, {"name": "max_time_ms", "type": "int | None", "description": "a timeout, in milliseconds, for the whole requested\noperation to complete.\nNote that a timeout is no guarantee that the creation request\nhas not reached the API server.", "value": "None", "default": "None"}], "returns": [{"type": "dict[str, Any]", "description": "A dictionary of the form {\"ok\": 1} in case of success."}, {"type": "dict[str, Any]", "description": "Otherwise, an exception is raised."}], "example": ">>> asyncio.run(\n... my_db_admin.async_create_keyspace(\"app_keyspace\")\n... )\n{'ok': 1}"}} +{"id": "astrapy.admin.AstraDBDatabaseAdmin.drop_namespace", "text": "Delete a namespace from the database, optionally waiting for the database\nto become active again.\n\n*DEPRECATED* (removal in 2.0). Switch to the \"drop_keyspace\" method.**", "metadata": {"kind": "function", "name": "drop_namespace", "path": "astrapy.admin.AstraDBDatabaseAdmin.drop_namespace", "parameters": [{"name": "name", "type": "str", "description": "the namespace to delete. If it does not exist in this database,\nan error is raised.", "default": null}, {"name": "wait_until_active", "type": "bool", "description": "if True (default), the method returns only after\nthe target database is in ACTIVE state again (a few\nseconds, usually). If False, it will return right after issuing the\ndeletion request to the DevOps API, and it will be responsibility\nof the caller to check the database status/namespace availability\nbefore working with it.", "value": "True", "default": "True"}, {"name": "max_time_ms", "type": "int | None", "description": "a timeout, in milliseconds, for the whole requested\noperation to complete.\nNote that a timeout is no guarantee that the deletion request\nhas not reached the API server.", "value": "None", "default": "None"}], "returns": [{"type": "dict[str, Any]", "description": "A dictionary of the form {\"ok\": 1} in case of success."}, {"type": "dict[str, Any]", "description": "Otherwise, an exception is raised."}], "example": ">>> my_db_admin.list_namespaces()\n['default_keyspace', 'that_other_one']\n>>> my_db_admin.drop_namespace(\"that_other_one\")\n{'ok': 1}\n>>> my_db_admin.list_namespaces()\n['default_keyspace']"}} +{"id": "astrapy.admin.AstraDBDatabaseAdmin.drop_keyspace", "text": "Delete a keyspace from the database, optionally waiting for the database\nto become active again.", "metadata": {"kind": "function", "name": "drop_keyspace", "path": "astrapy.admin.AstraDBDatabaseAdmin.drop_keyspace", "parameters": [{"name": "name", "type": "str", "description": "the keyspace to delete. If it does not exist in this database,\nan error is raised.", "default": null}, {"name": "wait_until_active", "type": "bool", "description": "if True (default), the method returns only after\nthe target database is in ACTIVE state again (a few\nseconds, usually). If False, it will return right after issuing the\ndeletion request to the DevOps API, and it will be responsibility\nof the caller to check the database status/keyspace availability\nbefore working with it.", "value": "True", "default": "True"}, {"name": "max_time_ms", "type": "int | None", "description": "a timeout, in milliseconds, for the whole requested\noperation to complete.\nNote that a timeout is no guarantee that the deletion request\nhas not reached the API server.", "value": "None", "default": "None"}], "returns": [{"type": "dict[str, Any]", "description": "A dictionary of the form {\"ok\": 1} in case of success."}, {"type": "dict[str, Any]", "description": "Otherwise, an exception is raised."}], "example": ">>> my_db_admin.list_keyspaces()\n['default_keyspace', 'that_other_one']\n>>> my_db_admin.drop_keyspace(\"that_other_one\")\n{'ok': 1}\n>>> my_db_admin.list_keyspaces()\n['default_keyspace']"}} +{"id": "astrapy.admin.AstraDBDatabaseAdmin.async_drop_namespace", "text": "Delete a namespace from the database, optionally waiting for the database\nto become active again.\nAsync version of the method, for use in an asyncio context.\n\n*DEPRECATED* (removal in 2.0). Switch to the \"async_drop_namespace\" method.**", "metadata": {"kind": "function", "name": "async_drop_namespace", "path": "astrapy.admin.AstraDBDatabaseAdmin.async_drop_namespace", "parameters": [{"name": "name", "type": "str", "description": "the namespace to delete. If it does not exist in this database,\nan error is raised.", "default": null}, {"name": "wait_until_active", "type": "bool", "description": "if True (default), the method returns only after\nthe target database is in ACTIVE state again (a few\nseconds, usually). If False, it will return right after issuing the\ndeletion request to the DevOps API, and it will be responsibility\nof the caller to check the database status/namespace availability\nbefore working with it.", "value": "True", "default": "True"}, {"name": "max_time_ms", "type": "int | None", "description": "a timeout, in milliseconds, for the whole requested\noperation to complete.\nNote that a timeout is no guarantee that the deletion request\nhas not reached the API server.", "value": "None", "default": "None"}], "returns": [{"type": "dict[str, Any]", "description": "A dictionary of the form {\"ok\": 1} in case of success."}, {"type": "dict[str, Any]", "description": "Otherwise, an exception is raised."}], "example": ">>> asyncio.run(\n... my_db_admin.async_drop_namespace(\"app_namespace\")\n... )\n{'ok': 1}"}} +{"id": "astrapy.admin.AstraDBDatabaseAdmin.async_drop_keyspace", "text": "Delete a keyspace from the database, optionally waiting for the database\nto become active again.\nAsync version of the method, for use in an asyncio context.", "metadata": {"kind": "function", "name": "async_drop_keyspace", "path": "astrapy.admin.AstraDBDatabaseAdmin.async_drop_keyspace", "parameters": [{"name": "name", "type": "str", "description": "the keyspace to delete. If it does not exist in this database,\nan error is raised.", "default": null}, {"name": "wait_until_active", "type": "bool", "description": "if True (default), the method returns only after\nthe target database is in ACTIVE state again (a few\nseconds, usually). If False, it will return right after issuing the\ndeletion request to the DevOps API, and it will be responsibility\nof the caller to check the database status/keyspace availability\nbefore working with it.", "value": "True", "default": "True"}, {"name": "max_time_ms", "type": "int | None", "description": "a timeout, in milliseconds, for the whole requested\noperation to complete.\nNote that a timeout is no guarantee that the deletion request\nhas not reached the API server.", "value": "None", "default": "None"}], "returns": [{"type": "dict[str, Any]", "description": "A dictionary of the form {\"ok\": 1} in case of success."}, {"type": "dict[str, Any]", "description": "Otherwise, an exception is raised."}], "example": ">>> asyncio.run(\n... my_db_admin.async_drop_keyspace(\"app_keyspace\")\n... )\n{'ok': 1}"}} +{"id": "astrapy.admin.AstraDBDatabaseAdmin.drop", "text": "Drop this database, i.e. delete it completely and permanently with all its data.\n\nThis method wraps the `drop_database` method of the AstraDBAdmin class,\nwhere more information may be found.", "metadata": {"kind": "function", "name": "drop", "path": "astrapy.admin.AstraDBDatabaseAdmin.drop", "parameters": [{"name": "wait_until_active", "type": "bool", "description": "if True (default), the method returns only after\nthe database has actually been deleted (generally a few minutes).\nIf False, it will return right after issuing the\ndrop request to the DevOps API, and it will be responsibility\nof the caller to check the database status/availability\nafter that, if desired.", "value": "True", "default": "True"}, {"name": "max_time_ms", "type": "int | None", "description": "a timeout, in milliseconds, for the whole requested\noperation to complete.\nNote that a timeout is no guarantee that the deletion request\nhas not reached the API server.", "value": "None", "default": "None"}], "returns": [{"type": "dict[str, Any]", "description": "A dictionary of the form {\"ok\": 1} in case of success."}, {"type": "dict[str, Any]", "description": "Otherwise, an exception is raised."}], "example": ">>> my_db_admin.list_keyspaces()\n['default_keyspace', 'that_other_one']\n>>> my_db_admin.drop()\n{'ok': 1}\n>>> my_db_admin.list_keyspaces() # raises a 404 Not Found http error", "note": "Once the method succeeds, methods on this object -- such as `info()`,\nor `list_keyspaces()` -- can still be invoked: however, this hardly\nmakes sense as the underlying actual database is no more.\nIt is responsibility of the developer to design a correct flow\nwhich avoids using a deceased database any further."}} +{"id": "astrapy.admin.AstraDBDatabaseAdmin.async_drop", "text": "Drop this database, i.e. delete it completely and permanently with all its data.\nAsync version of the method, for use in an asyncio context.\n\nThis method wraps the `drop_database` method of the AstraDBAdmin class,\nwhere more information may be found.", "metadata": {"kind": "function", "name": "async_drop", "path": "astrapy.admin.AstraDBDatabaseAdmin.async_drop", "parameters": [{"name": "wait_until_active", "type": "bool", "description": "if True (default), the method returns only after\nthe database has actually been deleted (generally a few minutes).\nIf False, it will return right after issuing the\ndrop request to the DevOps API, and it will be responsibility\nof the caller to check the database status/availability\nafter that, if desired.", "value": "True", "default": "True"}, {"name": "max_time_ms", "type": "int | None", "description": "a timeout, in milliseconds, for the whole requested\noperation to complete.\nNote that a timeout is no guarantee that the deletion request\nhas not reached the API server.", "value": "None", "default": "None"}], "returns": [{"type": "dict[str, Any]", "description": "A dictionary of the form {\"ok\": 1} in case of success."}, {"type": "dict[str, Any]", "description": "Otherwise, an exception is raised."}], "example": ">>> asyncio.run(my_db_admin.async_drop())\n{'ok': 1}", "note": "Once the method succeeds, methods on this object -- such as `info()`,\nor `list_keyspaces()` -- can still be invoked: however, this hardly\nmakes sense as the underlying actual database is no more.\nIt is responsibility of the developer to design a correct flow\nwhich avoids using a deceased database any further."}} +{"id": "astrapy.admin.AstraDBDatabaseAdmin.get_database", "text": "Create a Database instance from this database admin, for data-related tasks.", "metadata": {"kind": "function", "name": "get_database", "path": "astrapy.admin.AstraDBDatabaseAdmin.get_database", "parameters": [{"name": "token", "type": "str | TokenProvider | None", "description": "if supplied, is passed to the Database instead of\nthe one set for this object. Useful if one wants to work in\na least-privilege manner, limiting the permissions for non-admin work.\nThis can be either a literal token string or a subclass of\n`astrapy.authentication.TokenProvider`.", "value": "None", "default": "None"}, {"name": "keyspace", "type": "str | None", "description": "an optional keyspace to set in the resulting Database.\nThe same default logic as for `AstraDBAdmin.get_database` applies.", "value": "None", "default": "None"}, {"name": "namespace", "type": "str | None", "description": "an alias for `keyspace`. *DEPRECATED*, removal in 2.0.", "value": "None", "default": "None"}, {"name": "region", "type": "str | None", "description": "*This parameter is deprecated and should not be used.*\nIgnored in the method.", "value": "None", "default": "None"}, {"name": "api_path", "type": "str | None", "description": "path to append to the API Endpoint. In typical usage, this\nshould be left to its default of \"/api/json\".", "value": "None", "default": "None"}, {"name": "api_version", "type": "str | None", "description": "version specifier to append to the API path. In typical\nusage, this should be left to its default of \"v1\".", "value": "None", "default": "None"}], "returns": [{"type": "Database", "description": "A Database object, ready to be used for working with data and collections."}], "example": ">>> my_db = my_db_admin.get_database()\n>>> my_db.list_collection_names()\n['movies', 'another_collection']", "note": "creating an instance of Database does not trigger actual creation\nof the database itself, which should exist beforehand. To create databases,\nsee the AstraDBAdmin class.", "gathered_types": ["astrapy.authentication.TokenProvider", "astrapy.database.Database"]}} +{"id": "astrapy.admin.AstraDBDatabaseAdmin.get_async_database", "text": "Create an AsyncDatabase instance out of this class for working\nwith the data in it.\n\nThis method has identical behavior and signature as the sync\ncounterpart `get_database`: please see that one for more details.", "metadata": {"kind": "function", "name": "get_async_database", "path": "astrapy.admin.AstraDBDatabaseAdmin.get_async_database", "parameters": [{"name": "token", "default": "None", "type": "str | TokenProvider | None"}, {"name": "keyspace", "default": "None", "type": "str | None"}, {"name": "namespace", "default": "None", "type": "str | None"}, {"name": "region", "default": "None", "type": "str | None"}, {"name": "api_path", "default": "None", "type": "str | None"}, {"name": "api_version", "default": "None", "type": "str | None"}, {"name": "max_time_ms", "default": "None", "type": "int | None"}], "returns": [{"type": "AsyncDatabase"}], "gathered_types": ["astrapy.authentication.TokenProvider", "astrapy.database.AsyncDatabase"]}} +{"id": "astrapy.admin.AstraDBDatabaseAdmin.find_embedding_providers", "text": "Example (output abridged and indented for clarity):\n >>> admin_for_my_db.find_embedding_providers()\n FindEmbeddingProvidersResult(embedding_providers=..., openai, ...)\n >>> admin_for_my_db.find_embedding_providers().embedding_providers\n {\n 'openai': EmbeddingProvider(\n display_name='OpenAI',\n models=[\n EmbeddingProviderModel(name='text-embedding-3-small'),\n ...\n ]\n ),\n ...\n }", "metadata": {"kind": "function", "name": "find_embedding_providers", "path": "astrapy.admin.AstraDBDatabaseAdmin.find_embedding_providers", "parameters": [{"name": "max_time_ms", "type": "int | None", "description": "a timeout, in milliseconds, for the DevOps API request.", "value": "None", "default": "None"}], "returns": [{"type": "FindEmbeddingProvidersResult", "description": "A `FindEmbeddingProvidersResult` object with the complete information"}, {"type": "FindEmbeddingProvidersResult", "description": "returned by the API about available embedding providers"}], "gathered_types": ["astrapy.info.FindEmbeddingProvidersResult"]}} +{"id": "astrapy.admin.AstraDBDatabaseAdmin.async_find_embedding_providers", "text": "Example (output abridged and indented for clarity):\n >>> admin_for_my_db.find_embedding_providers()\n FindEmbeddingProvidersResult(embedding_providers=..., openai, ...)\n >>> admin_for_my_db.find_embedding_providers().embedding_providers\n {\n 'openai': EmbeddingProvider(\n display_name='OpenAI',\n models=[\n EmbeddingProviderModel(name='text-embedding-3-small'),\n ...\n ]\n ),\n ...\n }", "metadata": {"kind": "function", "name": "async_find_embedding_providers", "path": "astrapy.admin.AstraDBDatabaseAdmin.async_find_embedding_providers", "parameters": [{"name": "max_time_ms", "type": "int | None", "description": "a timeout, in milliseconds, for the DevOps API request.", "value": "None", "default": "None"}], "returns": [{"type": "FindEmbeddingProvidersResult", "description": "A `FindEmbeddingProvidersResult` object with the complete information"}, {"type": "FindEmbeddingProvidersResult", "description": "returned by the API about available embedding providers"}], "gathered_types": ["astrapy.info.FindEmbeddingProvidersResult"]}} +{"id": "astrapy.admin.DataAPIDatabaseAdmin", "text": "An \"admin\" object for non-Astra Data API environments, to perform administrative\ntasks at the keyspaces level such as creating/listing/dropping keyspaces.\n\nConforming to the architecture of non-Astra deployments of the Data API,\nthis object works within the one existing database. It is within that database\nthat the keyspace CRUD operations (and possibly other admin operations)\nare performed. Since non-Astra environment lack the concept of an overall\nadmin (such as the all-databases AstraDBAdmin class), a `DataAPIDatabaseAdmin`\nis generally created by invoking the `get_database_admin` method of the\ncorresponding `Database` object (which in turn is spawned by a DataAPIClient).", "metadata": {"kind": "class", "name": "DataAPIDatabaseAdmin", "path": "astrapy.admin.DataAPIDatabaseAdmin", "parameters": [{"name": "api_endpoint", "type": "str", "description": "the full URI to access the Data API,\ne.g. \"http://localhost:8181\".", "default": null}, {"name": "token", "type": "str | TokenProvider | None", "description": "an access token with enough permission to perform admin tasks.\nThis can be either a literal token string or a subclass of\n`astrapy.authentication.TokenProvider`.", "value": "None", "default": "None"}, {"name": "environment", "type": "str | None", "description": "a label, whose value is one of Environment.OTHER (default)\nor other non-Astra environment values in the `Environment` enum.", "value": "None", "default": "None"}, {"name": "api_path", "type": "str | None", "description": "path to append to the API Endpoint. In typical usage, this\nclass is created by a method such as `Database.get_database_admin()`,\nwhich passes the matching value. Defaults to this portion of the path\nbeing absent.", "value": "None", "default": "None"}, {"name": "api_version", "type": "str | None", "description": "version specifier to append to the API path. In typical\nusage, this class is created by a method such as\n`Database.get_database_admin()`, which passes the matching value.\nDefaults to this portion of the path being absent.", "value": "None", "default": "None"}, {"name": "callers", "type": "Sequence[CallerType]", "description": "a list of caller identities, i.e. applications, or frameworks,\non behalf of which Data API calls are performed. These end up in the\nrequest user-agent.\nEach caller identity is a (\"caller_name\", \"caller_version\") pair.", "value": "[]", "default": "[]"}, {"name": "caller_name", "type": "str | None", "description": "*DEPRECATED*, use `callers`. Removal 2.0. Name of the\napplication, or framework, on behalf of which the Data API calls\nare performed. This ends up in the request user-agent.", "value": "None", "default": "None"}, {"name": "caller_version", "type": "str | None", "description": "version of the caller. *DEPRECATED*, use `callers`. Removal 2.0.", "value": "None", "default": "None"}, {"name": "spawner_database", "type": "Database | AsyncDatabase | None", "description": "either a Database or an AsyncDatabase instance.\nThis represents the database class which spawns this admin object, so that,\nif required, a keyspace creation can retroactively \"use\" the new keyspace\nin the spawner. Used to enable the\nAsync/Database.get_admin_database().create_keyspace() pattern.", "value": "None", "default": "None"}], "bases": ["astrapy.admin.DatabaseAdmin"], "example": ">>> from astrapy import DataAPIClient\n>>> from astrapy.constants import Environment\n>>> from astrapy.authentication import UsernamePasswordTokenProvider\n>>>\n>>> token_provider = UsernamePasswordTokenProvider(\"username\", \"password\")\n>>> endpoint = \"http://localhost:8181\"\n>>>\n>>> client = DataAPIClient(\n>>> token=token_provider,\n>>> environment=Environment.OTHER,\n>>> )\n>>> database = client.get_database(endpoint)\n>>> admin_for_my_db = database.get_database_admin()\n>>>\n>>> admin_for_my_db.list_keyspaces()\n['keyspace1', 'keyspace2']", "references": ["astrapy.constants.Environment", "astrapy.client.DataAPIClient", "astrapy.authentication.UsernamePasswordTokenProvider"], "gathered_types": ["astrapy.constants.CallerType", "astrapy.admin.DatabaseAdmin", "astrapy.database.AsyncDatabase", "astrapy.authentication.TokenProvider", "astrapy.database.Database"]}} +{"id": "astrapy.admin.DataAPIDatabaseAdmin.environment", "text": "", "metadata": {"kind": "attribute", "name": "environment", "path": "astrapy.admin.DataAPIDatabaseAdmin.environment", "value": "environment = environment or Environment.OTHER.lower()", "gathered_types": ["astrapy.constants.Environment", "OTHER"]}} +{"id": "astrapy.admin.DataAPIDatabaseAdmin.token_provider", "text": "", "metadata": {"kind": "attribute", "name": "token_provider", "path": "astrapy.admin.DataAPIDatabaseAdmin.token_provider", "value": "token_provider = coerce_token_provider(token)"}} +{"id": "astrapy.admin.DataAPIDatabaseAdmin.api_endpoint", "text": "", "metadata": {"kind": "attribute", "name": "api_endpoint", "path": "astrapy.admin.DataAPIDatabaseAdmin.api_endpoint", "value": "api_endpoint = api_endpoint"}} +{"id": "astrapy.admin.DataAPIDatabaseAdmin.callers", "text": "", "metadata": {"kind": "attribute", "name": "callers", "path": "astrapy.admin.DataAPIDatabaseAdmin.callers", "value": "callers = callers_param"}} +{"id": "astrapy.admin.DataAPIDatabaseAdmin.api_path", "text": "", "metadata": {"kind": "attribute", "name": "api_path", "path": "astrapy.admin.DataAPIDatabaseAdmin.api_path", "value": "api_path = api_path if api_path is not None else ''"}} +{"id": "astrapy.admin.DataAPIDatabaseAdmin.api_version", "text": "", "metadata": {"kind": "attribute", "name": "api_version", "path": "astrapy.admin.DataAPIDatabaseAdmin.api_version", "value": "api_version = api_version if api_version is not None else ''"}} +{"id": "astrapy.admin.DataAPIDatabaseAdmin.spawner_database", "text": "", "metadata": {"kind": "attribute", "name": "spawner_database", "path": "astrapy.admin.DataAPIDatabaseAdmin.spawner_database", "value": "spawner_database = spawner_database"}} +{"id": "astrapy.admin.DataAPIDatabaseAdmin.with_options", "text": "Create a clone of this DataAPIDatabaseAdmin with some changed attributes.", "metadata": {"kind": "function", "name": "with_options", "path": "astrapy.admin.DataAPIDatabaseAdmin.with_options", "parameters": [{"name": "api_endpoint", "type": "str | None", "description": "the full URI to access the Data API,\ne.g. \"http://localhost:8181\".", "value": "None", "default": "None"}, {"name": "token", "type": "str | TokenProvider | None", "description": "an access token with enough permission to perform admin tasks.\nThis can be either a literal token string or a subclass of\n`astrapy.authentication.TokenProvider`.", "value": "None", "default": "None"}, {"name": "callers", "type": "Sequence[CallerType]", "description": "a list of caller identities, i.e. applications, or frameworks,\non behalf of which Data API calls are performed. These end up in the\nrequest user-agent.\nEach caller identity is a (\"caller_name\", \"caller_version\") pair.", "value": "[]", "default": "[]"}, {"name": "caller_name", "type": "str | None", "description": "*DEPRECATED*, use `callers`. Removal 2.0. Name of the\napplication, or framework, on behalf of which the Data API calls\nare performed. This ends up in the request user-agent.", "value": "None", "default": "None"}, {"name": "caller_version", "type": "str | None", "description": "version of the caller. *DEPRECATED*, use `callers`.\nRemoval 2.0.", "value": "None", "default": "None"}], "returns": [{"type": "DataAPIDatabaseAdmin", "description": "a new DataAPIDatabaseAdmin instance."}], "example": ">>> admin_for_my_other_db = admin_for_my_db.with_options(\n... api_endpoint=\"http://10.1.1.5:8181\",\n... )", "gathered_types": ["astrapy.authentication.TokenProvider", "astrapy.constants.CallerType", "astrapy.admin.DataAPIDatabaseAdmin"]}} +{"id": "astrapy.admin.DataAPIDatabaseAdmin.set_caller", "text": "Set a new identity for the application/framework on behalf of which\nthe DevOps API calls will be performed (the \"caller\").\n\nNew objects spawned from this client afterwards will inherit the new settings.", "metadata": {"kind": "function", "name": "set_caller", "path": "astrapy.admin.DataAPIDatabaseAdmin.set_caller", "parameters": [{"name": "caller_name", "type": "str | None", "description": "name of the application, or framework, on behalf of which\nthe DevOps API calls are performed. This ends up in the request user-agent.", "value": "None", "default": "None"}, {"name": "caller_version", "type": "str | None", "description": "version of the caller.", "value": "None", "default": "None"}], "returns": [{"type": "None"}], "example": ">>> admin_for_my_db.set_caller(\n... caller_name=\"the_caller\",\n... caller_version=\"0.1.0\",\n... )"}} +{"id": "astrapy.admin.DataAPIDatabaseAdmin.list_namespaces", "text": "Query the API for a list of the namespaces in the database.\n\n*DEPRECATED* (removal in 2.0). Switch to the \"list_keyspaces\" method.**", "metadata": {"kind": "function", "name": "list_namespaces", "path": "astrapy.admin.DataAPIDatabaseAdmin.list_namespaces", "parameters": [{"name": "max_time_ms", "type": "int | None", "description": "a timeout, in milliseconds, for the DevOps API request.", "value": "None", "default": "None"}], "returns": [{"type": "list[str]", "description": "A list of the namespaces, each a string, in no particular order."}], "example": ">>> admin_for_my_db.list_namespaces()\n['default_keyspace', 'staging_namespace']"}} +{"id": "astrapy.admin.DataAPIDatabaseAdmin.list_keyspaces", "text": "Query the API for a list of the keyspaces in the database.", "metadata": {"kind": "function", "name": "list_keyspaces", "path": "astrapy.admin.DataAPIDatabaseAdmin.list_keyspaces", "parameters": [{"name": "max_time_ms", "type": "int | None", "description": "a timeout, in milliseconds, for the DevOps API request.", "value": "None", "default": "None"}], "returns": [{"type": "list[str]", "description": "A list of the keyspaces, each a string, in no particular order."}], "example": ">>> admin_for_my_db.list_keyspaces()\n['default_keyspace', 'staging_keyspace']"}} +{"id": "astrapy.admin.DataAPIDatabaseAdmin.create_namespace", "text": "Create a namespace in the database, returning {'ok': 1} if successful.\n\n*DEPRECATED* (removal in 2.0). Switch to the \"create_keyspace\" method.**", "metadata": {"kind": "function", "name": "create_namespace", "path": "astrapy.admin.DataAPIDatabaseAdmin.create_namespace", "parameters": [{"name": "name", "type": "str", "description": "the namespace name. If supplying a namespace that exists\nalready, the method call proceeds as usual, no errors are\nraised, and the whole invocation is a no-op.", "default": null}, {"name": "replication_options", "type": "dict[str, Any] | None", "description": "this dictionary can specify the options about\nreplication of the namespace (across database nodes). If provided,\nit must have a structure similar to:\n`{\"class\": \"SimpleStrategy\", \"replication_factor\": 1}`.", "value": "None", "default": "None"}, {"name": "update_db_keyspace", "type": "bool | None", "description": "if True, the `Database` or `AsyncDatabase` class\nthat spawned this DatabaseAdmin, if any, gets updated to work on\nthe newly-created keyspace starting when this method returns.", "value": "None", "default": "None"}, {"name": "update_db_namespace", "type": "bool | None", "description": "an alias for update_db_keyspace.\n*DEPRECATED* as of v1.5.0, removal in v2.0.0.", "value": "None", "default": "None"}, {"name": "max_time_ms", "type": "int | None", "description": "a timeout, in milliseconds, for the whole requested\noperation to complete.\nNote that a timeout is no guarantee that the creation request\nhas not reached the API server.", "value": "None", "default": "None"}], "returns": [{"type": "dict[str, Any]", "description": "A dictionary of the form {\"ok\": 1} in case of success."}, {"type": "dict[str, Any]", "description": "Otherwise, an exception is raised."}], "example": ">>> admin_for_my_db.list_namespaces()\n['default_keyspace']\n>>> admin_for_my_db.create_namespace(\"that_other_one\")\n{'ok': 1}\n>>> admin_for_my_db.list_namespaces()\n['default_keyspace', 'that_other_one']"}} +{"id": "astrapy.admin.DataAPIDatabaseAdmin.create_keyspace", "text": "Create a keyspace in the database, returning {'ok': 1} if successful.", "metadata": {"kind": "function", "name": "create_keyspace", "path": "astrapy.admin.DataAPIDatabaseAdmin.create_keyspace", "parameters": [{"name": "name", "type": "str", "description": "the keyspace name. If supplying a keyspace that exists\nalready, the method call proceeds as usual, no errors are\nraised, and the whole invocation is a no-op.", "default": null}, {"name": "replication_options", "type": "dict[str, Any] | None", "description": "this dictionary can specify the options about\nreplication of the keyspace (across database nodes). If provided,\nit must have a structure similar to:\n`{\"class\": \"SimpleStrategy\", \"replication_factor\": 1}`.", "value": "None", "default": "None"}, {"name": "update_db_keyspace", "type": "bool | None", "description": "if True, the `Database` or `AsyncDatabase` class\nthat spawned this DatabaseAdmin, if any, gets updated to work on\nthe newly-created keyspace starting when this method returns.", "value": "None", "default": "None"}, {"name": "update_db_namespace", "type": "bool | None", "description": "an alias for update_db_keyspace.\n*DEPRECATED* as of v1.5.0, removal in v2.0.0.", "value": "None", "default": "None"}, {"name": "max_time_ms", "type": "int | None", "description": "a timeout, in milliseconds, for the whole requested\noperation to complete.\nNote that a timeout is no guarantee that the creation request\nhas not reached the API server.", "value": "None", "default": "None"}], "returns": [{"type": "dict[str, Any]", "description": "A dictionary of the form {\"ok\": 1} in case of success."}, {"type": "dict[str, Any]", "description": "Otherwise, an exception is raised."}], "example": ">>> admin_for_my_db.list_keyspaces()\n['default_keyspace']\n>>> admin_for_my_db.create_keyspace(\"that_other_one\")\n{'ok': 1}\n>>> admin_for_my_db.list_keyspaces()\n['default_keyspace', 'that_other_one']"}} +{"id": "astrapy.admin.DataAPIDatabaseAdmin.drop_namespace", "text": "Drop (delete) a namespace from the database.\n\n*DEPRECATED* (removal in 2.0). Switch to the \"drop_namespace\" method.**", "metadata": {"kind": "function", "name": "drop_namespace", "path": "astrapy.admin.DataAPIDatabaseAdmin.drop_namespace", "parameters": [{"name": "name", "type": "str", "description": "the namespace to delete. If it does not exist in this database,\nan error is raised.", "default": null}, {"name": "max_time_ms", "type": "int | None", "description": "a timeout, in milliseconds, for the whole requested\noperation to complete.\nNote that a timeout is no guarantee that the deletion request\nhas not reached the API server.", "value": "None", "default": "None"}], "returns": [{"type": "dict[str, Any]", "description": "A dictionary of the form {\"ok\": 1} in case of success."}, {"type": "dict[str, Any]", "description": "Otherwise, an exception is raised."}], "example": ">>> admin_for_my_db.list_namespaces()\n['default_keyspace', 'that_other_one']\n>>> admin_for_my_db.drop_namespace(\"that_other_one\")\n{'ok': 1}\n>>> admin_for_my_db.list_namespaces()\n['default_keyspace']"}} +{"id": "astrapy.admin.DataAPIDatabaseAdmin.drop_keyspace", "text": "Drop (delete) a keyspace from the database.", "metadata": {"kind": "function", "name": "drop_keyspace", "path": "astrapy.admin.DataAPIDatabaseAdmin.drop_keyspace", "parameters": [{"name": "name", "type": "str", "description": "the keyspace to delete. If it does not exist in this database,\nan error is raised.", "default": null}, {"name": "max_time_ms", "type": "int | None", "description": "a timeout, in milliseconds, for the whole requested\noperation to complete.\nNote that a timeout is no guarantee that the deletion request\nhas not reached the API server.", "value": "None", "default": "None"}], "returns": [{"type": "dict[str, Any]", "description": "A dictionary of the form {\"ok\": 1} in case of success."}, {"type": "dict[str, Any]", "description": "Otherwise, an exception is raised."}], "example": ">>> admin_for_my_db.list_keyspaces()\n['default_keyspace', 'that_other_one']\n>>> admin_for_my_db.drop_keyspace(\"that_other_one\")\n{'ok': 1}\n>>> admin_for_my_db.list_keyspaces()\n['default_keyspace']"}} +{"id": "astrapy.admin.DataAPIDatabaseAdmin.async_list_namespaces", "text": "Query the API for a list of the namespaces in the database.\nAsync version of the method, for use in an asyncio context.\n\n*DEPRECATED* (removal in 2.0). Switch to the \"async_list_keyspaces\" method.**", "metadata": {"kind": "function", "name": "async_list_namespaces", "path": "astrapy.admin.DataAPIDatabaseAdmin.async_list_namespaces", "parameters": [{"name": "max_time_ms", "type": "int | None", "description": "a timeout, in milliseconds, for the DevOps API request.", "value": "None", "default": "None"}], "returns": [{"type": "list[str]", "description": "A list of the namespaces, each a string, in no particular order."}], "example": ">>> asyncio.run(admin_for_my_db.async_list_namespaces())\n['default_keyspace', 'staging_namespace']"}} +{"id": "astrapy.admin.DataAPIDatabaseAdmin.async_list_keyspaces", "text": "Query the API for a list of the keyspaces in the database.\nAsync version of the method, for use in an asyncio context.", "metadata": {"kind": "function", "name": "async_list_keyspaces", "path": "astrapy.admin.DataAPIDatabaseAdmin.async_list_keyspaces", "parameters": [{"name": "max_time_ms", "type": "int | None", "description": "a timeout, in milliseconds, for the DevOps API request.", "value": "None", "default": "None"}], "returns": [{"type": "list[str]", "description": "A list of the keyspaces, each a string, in no particular order."}], "example": ">>> asyncio.run(admin_for_my_db.async_list_keyspaces())\n['default_keyspace', 'staging_keyspace']"}} +{"id": "astrapy.admin.DataAPIDatabaseAdmin.async_create_namespace", "text": "Create a namespace in the database, returning {'ok': 1} if successful.\nAsync version of the method, for use in an asyncio context.\n\n*DEPRECATED* (removal in 2.0). Switch to the \"async_create_keyspace\" method.**", "metadata": {"kind": "function", "name": "async_create_namespace", "path": "astrapy.admin.DataAPIDatabaseAdmin.async_create_namespace", "parameters": [{"name": "name", "type": "str", "description": "the namespace name. If supplying a namespace that exists\nalready, the method call proceeds as usual, no errors are\nraised, and the whole invocation is a no-op.", "default": null}, {"name": "replication_options", "type": "dict[str, Any] | None", "description": "this dictionary can specify the options about\nreplication of the namespace (across database nodes). If provided,\nit must have a structure similar to:\n`{\"class\": \"SimpleStrategy\", \"replication_factor\": 1}`.", "value": "None", "default": "None"}, {"name": "update_db_keyspace", "type": "bool | None", "description": "if True, the `Database` or `AsyncDatabase` class\nthat spawned this DatabaseAdmin, if any, gets updated to work on\nthe newly-created keyspace starting when this method returns.", "value": "None", "default": "None"}, {"name": "update_db_namespace", "type": "bool | None", "description": "an alias for update_db_keyspace.\n*DEPRECATED* as of v1.5.0, removal in v2.0.0.", "value": "None", "default": "None"}, {"name": "max_time_ms", "type": "int | None", "description": "a timeout, in milliseconds, for the whole requested\noperation to complete.\nNote that a timeout is no guarantee that the creation request\nhas not reached the API server.", "value": "None", "default": "None"}], "returns": [{"type": "dict[str, Any]", "description": "A dictionary of the form {\"ok\": 1} in case of success."}, {"type": "dict[str, Any]", "description": "Otherwise, an exception is raised."}], "example": ">>> admin_for_my_db.list_namespaces()\n['default_keyspace']\n>>> asyncio.run(admin_for_my_db.async_create_namespace(\n... \"that_other_one\"\n... ))\n{'ok': 1}\n>>> admin_for_my_db.list_namespaces()\n['default_keyspace', 'that_other_one']"}} +{"id": "astrapy.admin.DataAPIDatabaseAdmin.async_create_keyspace", "text": "Create a keyspace in the database, returning {'ok': 1} if successful.\nAsync version of the method, for use in an asyncio context.", "metadata": {"kind": "function", "name": "async_create_keyspace", "path": "astrapy.admin.DataAPIDatabaseAdmin.async_create_keyspace", "parameters": [{"name": "name", "type": "str", "description": "the keyspace name. If supplying a keyspace that exists\nalready, the method call proceeds as usual, no errors are\nraised, and the whole invocation is a no-op.", "default": null}, {"name": "replication_options", "type": "dict[str, Any] | None", "description": "this dictionary can specify the options about\nreplication of the keyspace (across database nodes). If provided,\nit must have a structure similar to:\n`{\"class\": \"SimpleStrategy\", \"replication_factor\": 1}`.", "value": "None", "default": "None"}, {"name": "update_db_keyspace", "type": "bool | None", "description": "if True, the `Database` or `AsyncDatabase` class\nthat spawned this DatabaseAdmin, if any, gets updated to work on\nthe newly-created keyspace starting when this method returns.", "value": "None", "default": "None"}, {"name": "update_db_namespace", "type": "bool | None", "description": "an alias for update_db_keyspace.\n*DEPRECATED* as of v1.5.0, removal in v2.0.0.", "value": "None", "default": "None"}, {"name": "max_time_ms", "type": "int | None", "description": "a timeout, in milliseconds, for the whole requested\noperation to complete.\nNote that a timeout is no guarantee that the creation request\nhas not reached the API server.", "value": "None", "default": "None"}], "returns": [{"type": "dict[str, Any]", "description": "A dictionary of the form {\"ok\": 1} in case of success."}, {"type": "dict[str, Any]", "description": "Otherwise, an exception is raised."}], "example": ">>> admin_for_my_db.list_keyspaces()\n['default_keyspace']\n>>> asyncio.run(admin_for_my_db.async_create_keyspace(\n... \"that_other_one\"\n... ))\n{'ok': 1}\n>>> admin_for_my_db.list_leyspaces()\n['default_keyspace', 'that_other_one']"}} +{"id": "astrapy.admin.DataAPIDatabaseAdmin.async_drop_namespace", "text": "Drop (delete) a namespace from the database.\nAsync version of the method, for use in an asyncio context.\n\n*DEPRECATED* (removal in 2.0). Switch to the \"async_drop_keyspace\" method.**", "metadata": {"kind": "function", "name": "async_drop_namespace", "path": "astrapy.admin.DataAPIDatabaseAdmin.async_drop_namespace", "parameters": [{"name": "name", "type": "str", "description": "the namespace to delete. If it does not exist in this database,\nan error is raised.", "default": null}, {"name": "max_time_ms", "type": "int | None", "description": "a timeout, in milliseconds, for the whole requested\noperation to complete.\nNote that a timeout is no guarantee that the deletion request\nhas not reached the API server.", "value": "None", "default": "None"}], "returns": [{"type": "dict[str, Any]", "description": "A dictionary of the form {\"ok\": 1} in case of success."}, {"type": "dict[str, Any]", "description": "Otherwise, an exception is raised."}], "example": ">>> admin_for_my_db.list_namespaces()\n['that_other_one', 'default_keyspace']\n>>> asyncio.run(admin_for_my_db.async_drop_namespace(\n... \"that_other_one\"\n... ))\n{'ok': 1}\n>>> admin_for_my_db.list_namespaces()\n['default_keyspace']"}} +{"id": "astrapy.admin.DataAPIDatabaseAdmin.async_drop_keyspace", "text": "Drop (delete) a keyspace from the database.\nAsync version of the method, for use in an asyncio context.", "metadata": {"kind": "function", "name": "async_drop_keyspace", "path": "astrapy.admin.DataAPIDatabaseAdmin.async_drop_keyspace", "parameters": [{"name": "name", "type": "str", "description": "the keyspace to delete. If it does not exist in this database,\nan error is raised.", "default": null}, {"name": "max_time_ms", "type": "int | None", "description": "a timeout, in milliseconds, for the whole requested\noperation to complete.\nNote that a timeout is no guarantee that the deletion request\nhas not reached the API server.", "value": "None", "default": "None"}], "returns": [{"type": "dict[str, Any]", "description": "A dictionary of the form {\"ok\": 1} in case of success."}, {"type": "dict[str, Any]", "description": "Otherwise, an exception is raised."}], "example": ">>> admin_for_my_db.list_keyspaces()\n['that_other_one', 'default_keyspace']\n>>> asyncio.run(admin_for_my_db.async_drop_keyspace(\n... \"that_other_one\"\n... ))\n{'ok': 1}\n>>> admin_for_my_db.list_keyspaces()\n['default_keyspace']"}} +{"id": "astrapy.admin.DataAPIDatabaseAdmin.get_database", "text": "Create a Database instance out of this class for working with the data in it.", "metadata": {"kind": "function", "name": "get_database", "path": "astrapy.admin.DataAPIDatabaseAdmin.get_database", "parameters": [{"name": "token", "type": "str | TokenProvider | None", "description": "if supplied, is passed to the Database instead of\nthe one set for this object. Useful if one wants to work in\na least-privilege manner, limiting the permissions for non-admin work.\nThis can be either a literal token string or a subclass of\n`astrapy.authentication.TokenProvider`.", "value": "None", "default": "None"}, {"name": "keyspace", "type": "str | None", "description": "an optional keyspace to set in the resulting Database.\nIf not provided, no keyspace is set, limiting what the Database\ncan do until setting it with e.g. a `use_keyspace` method call.", "value": "None", "default": "None"}, {"name": "namespace", "type": "str | None", "description": "an alias for `keyspace`. *DEPRECATED*, removal in 2.0.", "value": "None", "default": "None"}, {"name": "api_path", "type": "str | None", "description": "path to append to the API Endpoint. In typical usage, this\nshould be left to its default of \"\".", "value": "None", "default": "None"}, {"name": "api_version", "type": "str | None", "description": "version specifier to append to the API path. In typical\nusage, this should be left to its default of \"v1\".", "value": "None", "default": "None"}], "returns": [{"type": "Database", "description": "A Database object, ready to be used for working with data and collections."}], "example": ">>> my_db = admin_for_my_db.get_database()\n>>> my_db.list_collection_names()\n['movies', 'another_collection']", "note": "creating an instance of Database does not trigger actual creation\nof the database itself, which should exist beforehand.", "gathered_types": ["astrapy.authentication.TokenProvider", "astrapy.database.Database"]}} +{"id": "astrapy.admin.DataAPIDatabaseAdmin.get_async_database", "text": "Create an AsyncDatabase instance for the database, to be used\nwhen doing data-level work (such as creating/managing collections).\n\nThis method has identical behavior and signature as the sync\ncounterpart `get_database`: please see that one for more details.", "metadata": {"kind": "function", "name": "get_async_database", "path": "astrapy.admin.DataAPIDatabaseAdmin.get_async_database", "parameters": [{"name": "token", "default": "None", "type": "str | TokenProvider | None"}, {"name": "keyspace", "default": "None", "type": "str | None"}, {"name": "namespace", "default": "None", "type": "str | None"}, {"name": "api_path", "default": "None", "type": "str | None"}, {"name": "api_version", "default": "None", "type": "str | None"}], "returns": [{"type": "AsyncDatabase"}], "gathered_types": ["astrapy.authentication.TokenProvider", "astrapy.database.AsyncDatabase"]}} +{"id": "astrapy.admin.DataAPIDatabaseAdmin.find_embedding_providers", "text": "Example (output abridged and indented for clarity):\n >>> admin_for_my_db.find_embedding_providers()\n FindEmbeddingProvidersResult(embedding_providers=..., openai, ...)\n >>> admin_for_my_db.find_embedding_providers().embedding_providers\n {\n 'openai': EmbeddingProvider(\n display_name='OpenAI',\n models=[\n EmbeddingProviderModel(name='text-embedding-3-small'),\n ...\n ]\n ),\n ...\n }", "metadata": {"kind": "function", "name": "find_embedding_providers", "path": "astrapy.admin.DataAPIDatabaseAdmin.find_embedding_providers", "parameters": [{"name": "max_time_ms", "type": "int | None", "description": "a timeout, in milliseconds, for the DevOps API request.", "value": "None", "default": "None"}], "returns": [{"type": "FindEmbeddingProvidersResult", "description": "A `FindEmbeddingProvidersResult` object with the complete information"}, {"type": "FindEmbeddingProvidersResult", "description": "returned by the API about available embedding providers"}], "gathered_types": ["astrapy.info.FindEmbeddingProvidersResult"]}} +{"id": "astrapy.admin.DataAPIDatabaseAdmin.async_find_embedding_providers", "text": "Example (output abridged and indented for clarity):\n >>> admin_for_my_db.find_embedding_providers()\n FindEmbeddingProvidersResult(embedding_providers=..., openai, ...)\n >>> admin_for_my_db.find_embedding_providers().embedding_providers\n {\n 'openai': EmbeddingProvider(\n display_name='OpenAI',\n models=[\n EmbeddingProviderModel(name='text-embedding-3-small'),\n ...\n ]\n ),\n ...\n }", "metadata": {"kind": "function", "name": "async_find_embedding_providers", "path": "astrapy.admin.DataAPIDatabaseAdmin.async_find_embedding_providers", "parameters": [{"name": "max_time_ms", "type": "int | None", "description": "a timeout, in milliseconds, for the DevOps API request.", "value": "None", "default": "None"}], "returns": [{"type": "FindEmbeddingProvidersResult", "description": "A `FindEmbeddingProvidersResult` object with the complete information"}, {"type": "FindEmbeddingProvidersResult", "description": "returned by the API about available embedding providers"}], "gathered_types": ["astrapy.info.FindEmbeddingProvidersResult"]}} +{"id": "astrapy.transform_payload", "text": "", "metadata": {"kind": "module", "name": "transform_payload", "path": "astrapy.transform_payload", "imports": {"annotations": "__future__.annotations", "datetime": "datetime", "time": "time", "Any": "typing.Any", "Dict": "typing.Dict", "Iterable": "typing.Iterable", "cast": "typing.cast", "DocumentType": "astrapy.constants.DocumentType", "UUID": "astrapy.ids.UUID", "ObjectId": "astrapy.ids.ObjectId"}, "properties": {"is_init_module": false, "is_package": false, "is_subpackage": false, "is_namespace_package": false, "is_namespace_subpackage": false}}} +{"id": "astrapy.transform_payload.convert_vector_to_floats", "text": "Convert a vector of strings to a vector of floats.", "metadata": {"kind": "function", "name": "convert_vector_to_floats", "path": "astrapy.transform_payload.convert_vector_to_floats", "parameters": [{"name": "vector", "type": "list", "description": "A vector of objects.", "default": null}], "returns": [{"name": "list", "type": "list[float]", "description": "A vector of floats."}]}} +{"id": "astrapy.transform_payload.is_list_of_floats", "text": "Safely determine if it's a list of floats.\nAssumption: if list, and first item is float, then all items are.", "metadata": {"kind": "function", "name": "is_list_of_floats", "path": "astrapy.transform_payload.is_list_of_floats", "parameters": [{"name": "vector", "type": "Iterable[Any]"}], "returns": [{"type": "bool"}]}} +{"id": "astrapy.transform_payload.convert_to_ejson_date_object", "text": "", "metadata": {"kind": "function", "name": "convert_to_ejson_date_object", "path": "astrapy.transform_payload.convert_to_ejson_date_object", "parameters": [{"name": "date_value", "type": "datetime.date | datetime.datetime"}], "returns": [{"type": "dict[str, int]"}]}} +{"id": "astrapy.transform_payload.convert_to_ejson_uuid_object", "text": "", "metadata": {"kind": "function", "name": "convert_to_ejson_uuid_object", "path": "astrapy.transform_payload.convert_to_ejson_uuid_object", "parameters": [{"name": "uuid_value", "type": "UUID"}], "returns": [{"type": "dict[str, str]"}], "gathered_types": ["UUID"]}} +{"id": "astrapy.transform_payload.convert_to_ejson_objectid_object", "text": "", "metadata": {"kind": "function", "name": "convert_to_ejson_objectid_object", "path": "astrapy.transform_payload.convert_to_ejson_objectid_object", "parameters": [{"name": "objectid_value", "type": "ObjectId"}], "returns": [{"type": "dict[str, str]"}], "gathered_types": ["ObjectId"]}} +{"id": "astrapy.transform_payload.convert_ejson_date_object_to_datetime", "text": "", "metadata": {"kind": "function", "name": "convert_ejson_date_object_to_datetime", "path": "astrapy.transform_payload.convert_ejson_date_object_to_datetime", "parameters": [{"name": "date_object", "type": "dict[str, int]"}], "returns": [{"type": "datetime.datetime"}]}} +{"id": "astrapy.transform_payload.convert_ejson_uuid_object_to_uuid", "text": "", "metadata": {"kind": "function", "name": "convert_ejson_uuid_object_to_uuid", "path": "astrapy.transform_payload.convert_ejson_uuid_object_to_uuid", "parameters": [{"name": "uuid_object", "type": "dict[str, str]"}], "returns": [{"type": "UUID"}], "gathered_types": ["UUID"]}} +{"id": "astrapy.transform_payload.convert_ejson_objectid_object_to_objectid", "text": "", "metadata": {"kind": "function", "name": "convert_ejson_objectid_object_to_objectid", "path": "astrapy.transform_payload.convert_ejson_objectid_object_to_objectid", "parameters": [{"name": "objectid_object", "type": "dict[str, str]"}], "returns": [{"type": "ObjectId"}], "gathered_types": ["ObjectId"]}} +{"id": "astrapy.transform_payload.normalize_payload_value", "text": "The path helps determining special treatments", "metadata": {"kind": "function", "name": "normalize_payload_value", "path": "astrapy.transform_payload.normalize_payload_value", "parameters": [{"name": "path", "type": "list[str]"}, {"name": "value", "type": "Any"}], "returns": [{"type": "Any"}]}} +{"id": "astrapy.transform_payload.normalize_for_api", "text": "Normalize a payload for API calls.\nThis includes e.g. ensuring values for \"$vector\" key\nare made into plain lists of floats.", "metadata": {"kind": "function", "name": "normalize_for_api", "path": "astrapy.transform_payload.normalize_for_api", "parameters": [{"name": "payload", "type": "Dict[str, Any]", "description": "A dict expressing a payload for an API call", "default": null}], "returns": [{"type": "dict[str, Any] | None", "description": "Dict[str, Any]: a \"normalized\" payload dict"}]}} +{"id": "astrapy.transform_payload.restore_response_value", "text": "The path helps determining special treatments", "metadata": {"kind": "function", "name": "restore_response_value", "path": "astrapy.transform_payload.restore_response_value", "parameters": [{"name": "path", "type": "list[str]"}, {"name": "value", "type": "Any"}], "returns": [{"type": "Any"}]}} +{"id": "astrapy.transform_payload.restore_from_api", "text": "Process a dictionary just returned from the API.\nThis is the place where e.g. `{\"$date\": 123}` is\nconverted back into a datetime object.", "metadata": {"kind": "function", "name": "restore_from_api", "path": "astrapy.transform_payload.restore_from_api", "parameters": [{"name": "response", "type": "DocumentType"}], "returns": [{"type": "DocumentType"}], "gathered_types": ["astrapy.constants.DocumentType"]}} +{"id": "astrapy.authentication", "text": "", "metadata": {"kind": "module", "name": "authentication", "path": "astrapy.authentication", "imports": {"annotations": "__future__.annotations", "base64": "base64", "ABC": "abc.ABC", "abstractmethod": "abc.abstractmethod", "Any": "typing.Any", "EMBEDDING_HEADER_API_KEY": "astrapy.defaults.EMBEDDING_HEADER_API_KEY", "EMBEDDING_HEADER_AWS_ACCESS_ID": "astrapy.defaults.EMBEDDING_HEADER_AWS_ACCESS_ID", "EMBEDDING_HEADER_AWS_SECRET_ID": "astrapy.defaults.EMBEDDING_HEADER_AWS_SECRET_ID", "SECRETS_REDACT_CHAR": "astrapy.defaults.SECRETS_REDACT_CHAR", "SECRETS_REDACT_ENDING": "astrapy.defaults.SECRETS_REDACT_ENDING", "SECRETS_REDACT_ENDING_LENGTH": "astrapy.defaults.SECRETS_REDACT_ENDING_LENGTH"}, "properties": {"is_init_module": false, "is_package": false, "is_subpackage": false, "is_namespace_package": false, "is_namespace_subpackage": false}}} +{"id": "astrapy.authentication.coerce_token_provider", "text": "", "metadata": {"kind": "function", "name": "coerce_token_provider", "path": "astrapy.authentication.coerce_token_provider", "parameters": [{"name": "token", "type": "str | TokenProvider | None"}], "returns": [{"type": "TokenProvider"}], "gathered_types": ["astrapy.authentication.TokenProvider"]}} +{"id": "astrapy.authentication.coerce_embedding_headers_provider", "text": "", "metadata": {"kind": "function", "name": "coerce_embedding_headers_provider", "path": "astrapy.authentication.coerce_embedding_headers_provider", "parameters": [{"name": "embedding_api_key", "type": "str | EmbeddingHeadersProvider | None"}], "returns": [{"type": "EmbeddingHeadersProvider"}], "gathered_types": ["astrapy.authentication.EmbeddingHeadersProvider"]}} +{"id": "astrapy.authentication.redact_secret", "text": "Return a shortened-if-necessary version of a 'secret' string (with ellipsis).", "metadata": {"kind": "function", "name": "redact_secret", "path": "astrapy.authentication.redact_secret", "parameters": [{"name": "secret", "type": "str", "description": "a secret string to redact", "default": null}, {"name": "max_length", "type": "int", "description": "if the secret and the fixed ending exceed this size,\nshortening takes place.", "default": null}, {"name": "hide_if_short", "type": "bool", "description": "this controls what to do when the input secret is\nshorter, i.e. when no shortening takes place.\nif False, the secret is returned as-is;\nIf True, a masked string is returned of the same length as secret.", "value": "True", "default": "True"}], "returns": [{"type": "str", "description": "a 'redacted' form of the secret string as per the rules outlined above."}]}} +{"id": "astrapy.authentication.TokenProvider", "text": "Abstract base class for a token provider.\nThe relevant method in this interface is returning a string to use as token.\n\nThe __str__ / __repr__ methods are NOT to be used as source of tokens:\nuse get_token instead.\n\nNote that equality (__eq__) checks if the generated tokens match\nunder all circumstances (e.g. a literal passthrough matches a\ndifferent-encoding token provider that yields the same token).\nIf a token provider comes that encodes a recipe for nondeterministic\nperiodic renewal, its __eq__ method will have to override the one in this class.", "metadata": {"kind": "class", "name": "TokenProvider", "path": "astrapy.authentication.TokenProvider", "bases": ["ABC"], "gathered_types": ["ABC"], "implemented_by": ["astrapy.authentication.UsernamePasswordTokenProvider", "astrapy.authentication.StaticTokenProvider"]}} +{"id": "astrapy.authentication.TokenProvider.get_token", "text": "Produce a string for direct use as token in a subsequent API request,\nor None for no token.", "metadata": {"kind": "function", "name": "get_token", "path": "astrapy.authentication.TokenProvider.get_token", "returns": [{"type": "str | None"}]}} +{"id": "astrapy.authentication.StaticTokenProvider", "text": "A \"pass-through\" provider that wraps a supplied literal token.", "metadata": {"kind": "class", "name": "StaticTokenProvider", "path": "astrapy.authentication.StaticTokenProvider", "parameters": [{"name": "token", "type": "str | None", "description": "an access token for subsequent use in the client.", "default": null}], "bases": ["astrapy.authentication.TokenProvider"], "example": ">>> from astrapy import DataAPIClient\n>>> from astrapy.authentication import StaticTokenProvider\n>>> token_provider = StaticTokenProvider(\"AstraCS:xyz...\")\n>>> database = DataAPIClient().get_database(\n... \"https://01234567-...-eu-west1.apps.datastax.com\",\n... token=token_provider,\n... )", "references": ["astrapy.client.DataAPIClient"], "gathered_types": ["astrapy.authentication.TokenProvider"]}} +{"id": "astrapy.authentication.StaticTokenProvider.token", "text": "", "metadata": {"kind": "attribute", "name": "token", "path": "astrapy.authentication.StaticTokenProvider.token", "value": "token = token"}} +{"id": "astrapy.authentication.StaticTokenProvider.get_token", "text": "", "metadata": {"kind": "function", "name": "get_token", "path": "astrapy.authentication.StaticTokenProvider.get_token", "returns": [{"type": "str | None"}]}} +{"id": "astrapy.authentication.UsernamePasswordTokenProvider", "text": "A token provider encoding username/password-based authentication,\nas used e.g. for DSE and HCD. These are base64-encoded and concatenated\nby colons, with a prepended suffix 'Cassandra', as required by\nthis authentication scheme.", "metadata": {"kind": "class", "name": "UsernamePasswordTokenProvider", "path": "astrapy.authentication.UsernamePasswordTokenProvider", "parameters": [{"name": "username", "type": "str", "description": "the username for accessing the database.", "default": null}, {"name": "password", "type": "str", "description": "the corresponding password.", "default": null}], "bases": ["astrapy.authentication.TokenProvider"], "example": ">>> from astrapy import DataAPIClient\n>>> from astrapy.constants imort Environment\n>>> from astrapy.authentication import UsernamePasswordTokenProvider\n>>> token_provider = UsernamePasswordTokenProvider(\"username\", \"password\")\n>>> database = DataAPIClient(environment=Environment.HCD).get_database(\n... \"http://localhost:8181\",\n... token=token_provider,\n... )", "references": ["astrapy.constants.Environment", "astrapy.client.DataAPIClient"], "gathered_types": ["astrapy.authentication.TokenProvider"]}} +{"id": "astrapy.authentication.UsernamePasswordTokenProvider.PREFIX", "text": "", "metadata": {"kind": "attribute", "name": "PREFIX", "path": "astrapy.authentication.UsernamePasswordTokenProvider.PREFIX", "value": "PREFIX = 'Cassandra'"}} +{"id": "astrapy.authentication.UsernamePasswordTokenProvider.username", "text": "", "metadata": {"kind": "attribute", "name": "username", "path": "astrapy.authentication.UsernamePasswordTokenProvider.username", "value": "username = username"}} +{"id": "astrapy.authentication.UsernamePasswordTokenProvider.password", "text": "", "metadata": {"kind": "attribute", "name": "password", "path": "astrapy.authentication.UsernamePasswordTokenProvider.password", "value": "password = password"}} +{"id": "astrapy.authentication.UsernamePasswordTokenProvider.token", "text": "", "metadata": {"kind": "attribute", "name": "token", "path": "astrapy.authentication.UsernamePasswordTokenProvider.token", "value": "token = f'{self.PREFIX}:{self._b64(self.username)}:{self._b64(self.password)}'", "gathered_types": ["_b64", "PREFIX"]}} +{"id": "astrapy.authentication.UsernamePasswordTokenProvider.get_token", "text": "", "metadata": {"kind": "function", "name": "get_token", "path": "astrapy.authentication.UsernamePasswordTokenProvider.get_token", "returns": [{"type": "str"}]}} +{"id": "astrapy.authentication.EmbeddingHeadersProvider", "text": "Abstract base class for a provider of embedding-related headers (such as API Keys).\nThe relevant method in this interface is returning a dict to use as\n(part of the) headers in Data API requests for a collection.\n\nThis class captures the fact that, depending on the embedding provider for\nthe collection, there may be zero, one *or more* headers to be passed\nif relying on the HEADERS auth method for Vectorize.", "metadata": {"kind": "class", "name": "EmbeddingHeadersProvider", "path": "astrapy.authentication.EmbeddingHeadersProvider", "bases": ["ABC"], "gathered_types": ["ABC"], "implemented_by": ["astrapy.authentication.EmbeddingAPIKeyHeaderProvider", "astrapy.authentication.AWSEmbeddingHeadersProvider"]}} +{"id": "astrapy.authentication.EmbeddingHeadersProvider.get_headers", "text": "Produce a dictionary for use as (part of) the headers in HTTP requests\nto the Data API.", "metadata": {"kind": "function", "name": "get_headers", "path": "astrapy.authentication.EmbeddingHeadersProvider.get_headers", "returns": [{"type": "dict[str, str]"}]}} +{"id": "astrapy.authentication.EmbeddingAPIKeyHeaderProvider", "text": "A \"pass-through\" header provider representing the single-header\n(typically \"X-Embedding-Api-Key\") auth scheme, in use by most of the\nembedding models in Vectorize.", "metadata": {"kind": "class", "name": "EmbeddingAPIKeyHeaderProvider", "path": "astrapy.authentication.EmbeddingAPIKeyHeaderProvider", "parameters": [{"name": "embedding_api_key", "type": "str | None", "description": "a string that will be the value for the header.\nIf None is passed, this results in a no-headers provider (such\nas the one used for non-Vectorize collections).", "default": null}], "bases": ["astrapy.authentication.EmbeddingHeadersProvider"], "example": ">>> from astrapy import DataAPIClient\n>>> from astrapy.authentication import (\n CollectionVectorServiceOptions,\n EmbeddingAPIKeyHeaderProvider,\n)\n>>> my_emb_api_key = EmbeddingAPIKeyHeaderProvider(\"abc012...\")\n>>> service_options = CollectionVectorServiceOptions(\n... provider=\"a-certain-provider\",\n... model_name=\"some-embedding-model\",\n... )\n>>>\n>>> database = DataAPIClient().get_database(\n... \"https://01234567-...-eu-west1.apps.datastax.com\",\n... token=\"AstraCS:...\",\n... )\n>>> collection = database.create_collection(\n... \"vectorize_collection\",\n... service=service_options,\n... embedding_api_key=my_emb_api_key,\n... )\n>>> # likewise:\n>>> collection_b = database.get_collection(\n... \"vectorize_collection\",\n... embedding_api_key=my_emb_api_key,\n... )", "references": ["astrapy.authentication.CollectionVectorServiceOptions", "astrapy.client.DataAPIClient"], "gathered_types": ["astrapy.authentication.EmbeddingHeadersProvider"]}} +{"id": "astrapy.authentication.EmbeddingAPIKeyHeaderProvider.embedding_api_key", "text": "", "metadata": {"kind": "attribute", "name": "embedding_api_key", "path": "astrapy.authentication.EmbeddingAPIKeyHeaderProvider.embedding_api_key", "value": "embedding_api_key = embedding_api_key"}} +{"id": "astrapy.authentication.EmbeddingAPIKeyHeaderProvider.get_headers", "text": "", "metadata": {"kind": "function", "name": "get_headers", "path": "astrapy.authentication.EmbeddingAPIKeyHeaderProvider.get_headers", "returns": [{"type": "dict[str, str]"}]}} +{"id": "astrapy.authentication.AWSEmbeddingHeadersProvider", "text": "A header provider representing the two-header auth scheme in use\nby the Amazon Web Services (e.g. AWS Bedrock) when using header-based\nauthentication.", "metadata": {"kind": "class", "name": "AWSEmbeddingHeadersProvider", "path": "astrapy.authentication.AWSEmbeddingHeadersProvider", "parameters": [{"name": "embedding_access_id", "type": "str", "description": "value of the \"Access ID\" secret. This will become\nthe value for the corresponding header.", "default": null}, {"name": "embedding_secret_id", "type": "str", "description": "value of the \"Secret ID\" secret. This will become\nthe value for the corresponding header.", "default": null}], "bases": ["astrapy.authentication.EmbeddingHeadersProvider"], "example": ">>> from astrapy import DataAPIClient\n>>> from astrapy.authentication import (\n CollectionVectorServiceOptions,\n AWSEmbeddingHeadersProvider,\n)\n>>> my_aws_emb_api_key = AWSEmbeddingHeadersProvider(\n embedding_access_id=\"my-access-id-012...\",\n embedding_secret_id=\"my-secret-id-abc...\",\n)\n>>> service_options = CollectionVectorServiceOptions(\n... provider=\"bedrock\",\n... model_name=\"some-aws-bedrock-model\",\n... )\n>>>\n>>> database = DataAPIClient().get_database(\n... \"https://01234567-...-eu-west1.apps.datastax.com\",\n... token=\"AstraCS:...\",\n... )\n>>> collection = database.create_collection(\n... \"vectorize_aws_collection\",\n... service=service_options,\n... embedding_api_key=my_aws_emb_api_key,\n... )\n>>> # likewise:\n>>> collection_b = database.get_collection(\n... \"vectorize_aws_collection\",\n... embedding_api_key=my_aws_emb_api_key,\n... )", "references": ["astrapy.authentication.CollectionVectorServiceOptions", "astrapy.client.DataAPIClient"], "gathered_types": ["astrapy.authentication.EmbeddingHeadersProvider"]}} +{"id": "astrapy.authentication.AWSEmbeddingHeadersProvider.embedding_access_id", "text": "", "metadata": {"kind": "attribute", "name": "embedding_access_id", "path": "astrapy.authentication.AWSEmbeddingHeadersProvider.embedding_access_id", "value": "embedding_access_id = embedding_access_id"}} +{"id": "astrapy.authentication.AWSEmbeddingHeadersProvider.embedding_secret_id", "text": "", "metadata": {"kind": "attribute", "name": "embedding_secret_id", "path": "astrapy.authentication.AWSEmbeddingHeadersProvider.embedding_secret_id", "value": "embedding_secret_id = embedding_secret_id"}} +{"id": "astrapy.authentication.AWSEmbeddingHeadersProvider.get_headers", "text": "", "metadata": {"kind": "function", "name": "get_headers", "path": "astrapy.authentication.AWSEmbeddingHeadersProvider.get_headers", "returns": [{"type": "dict[str, str]"}]}} +{"id": "astrapy.collection", "text": "", "metadata": {"kind": "module", "name": "collection", "path": "astrapy.collection", "imports": {"annotations": "__future__.annotations", "asyncio": "asyncio", "json": "json", "logging": "logging", "warnings": "warnings", "ThreadPoolExecutor": "concurrent.futures.ThreadPoolExecutor", "TracebackType": "types.TracebackType", "TYPE_CHECKING": "typing.TYPE_CHECKING", "Any": "typing.Any", "Iterable": "typing.Iterable", "Sequence": "typing.Sequence", "deprecation": "deprecation", "__version__": "astrapy.__version__", "APICommander": "astrapy.api_commander.APICommander", "CollectionAPIOptions": "astrapy.api_options.CollectionAPIOptions", "coerce_embedding_headers_provider": "astrapy.authentication.coerce_embedding_headers_provider", "CallerType": "astrapy.constants.CallerType", "DocumentType": "astrapy.constants.DocumentType", "FilterType": "astrapy.constants.FilterType", "ProjectionType": "astrapy.constants.ProjectionType", "ReturnDocument": "astrapy.constants.ReturnDocument", "SortType": "astrapy.constants.SortType", "VectorType": "astrapy.constants.VectorType", "normalize_optional_projection": "astrapy.constants.normalize_optional_projection", "AsyncCursor": "astrapy.cursors.AsyncCursor", "Cursor": "astrapy.cursors.Cursor", "AsyncDatabase": "astrapy.database.AsyncDatabase", "Database": "astrapy.database.Database", "DEFAULT_BULK_WRITE_CONCURRENCY": "astrapy.defaults.DEFAULT_BULK_WRITE_CONCURRENCY", "DEFAULT_DATA_API_AUTH_HEADER": "astrapy.defaults.DEFAULT_DATA_API_AUTH_HEADER", "DEFAULT_INSERT_MANY_CHUNK_SIZE": "astrapy.defaults.DEFAULT_INSERT_MANY_CHUNK_SIZE", "DEFAULT_INSERT_MANY_CONCURRENCY": "astrapy.defaults.DEFAULT_INSERT_MANY_CONCURRENCY", "NAMESPACE_DEPRECATION_NOTICE_METHOD": "astrapy.defaults.NAMESPACE_DEPRECATION_NOTICE_METHOD", "SET_CALLER_DEPRECATION_NOTICE": "astrapy.defaults.SET_CALLER_DEPRECATION_NOTICE", "BulkWriteException": "astrapy.exceptions.BulkWriteException", "CollectionNotFoundException": "astrapy.exceptions.CollectionNotFoundException", "CumulativeOperationException": "astrapy.exceptions.CumulativeOperationException", "DataAPIFaultyResponseException": "astrapy.exceptions.DataAPIFaultyResponseException", "DataAPIResponseException": "astrapy.exceptions.DataAPIResponseException", "DeleteManyException": "astrapy.exceptions.DeleteManyException", "InsertManyException": "astrapy.exceptions.InsertManyException", "MultiCallTimeoutManager": "astrapy.exceptions.MultiCallTimeoutManager", "TooManyDocumentsToCountException": "astrapy.exceptions.TooManyDocumentsToCountException", "UpdateManyException": "astrapy.exceptions.UpdateManyException", "base_timeout_info": "astrapy.exceptions.base_timeout_info", "CollectionInfo": "astrapy.info.CollectionInfo", "CollectionOptions": "astrapy.info.CollectionOptions", "check_caller_parameters": "astrapy.meta.check_caller_parameters", "check_deprecated_vector_ize": "astrapy.meta.check_deprecated_vector_ize", "check_namespace_keyspace": "astrapy.meta.check_namespace_keyspace", "BulkWriteResult": "astrapy.results.BulkWriteResult", "DeleteResult": "astrapy.results.DeleteResult", "InsertManyResult": "astrapy.results.InsertManyResult", "InsertOneResult": "astrapy.results.InsertOneResult", "UpdateResult": "astrapy.results.UpdateResult", "EmbeddingHeadersProvider": "astrapy.authentication.EmbeddingHeadersProvider", "AsyncBaseOperation": "astrapy.operations.AsyncBaseOperation", "BaseOperation": "astrapy.operations.BaseOperation"}, "properties": {"is_init_module": false, "is_package": false, "is_subpackage": false, "is_namespace_package": false, "is_namespace_subpackage": false}}} +{"id": "astrapy.collection.logger", "text": "", "metadata": {"kind": "attribute", "name": "logger", "path": "astrapy.collection.logger", "value": "logger = logging.getLogger(__name__)", "gathered_types": ["__name__"]}} +{"id": "astrapy.collection.Collection", "text": "A Data API collection, the main object to interact with the Data API,\nespecially for DDL operations.\nThis class has a synchronous interface.\n\nA Collection is spawned from a Database object, from which it inherits\nthe details on how to reach the API server (endpoint, authentication token).", "metadata": {"kind": "class", "name": "Collection", "path": "astrapy.collection.Collection", "parameters": [{"name": "database", "type": "Database", "description": "a Database object, instantiated earlier. This represents\nthe database the collection belongs to.", "default": null}, {"name": "name", "type": "str", "description": "the collection name. This parameter should match an existing\ncollection on the database.", "default": null}, {"name": "keyspace", "type": "str | None", "description": "this is the keyspace to which the collection belongs.\nIf not specified, the database's working keyspace is used.", "value": "None", "default": "None"}, {"name": "namespace", "type": "str | None", "description": "an alias for `keyspace`. *DEPRECATED*, removal in 2.0.", "value": "None", "default": "None"}, {"name": "api_options", "type": "CollectionAPIOptions | None", "description": "An instance of `astrapy.api_options.CollectionAPIOptions`\nproviding the general settings for interacting with the Data API.", "value": "None", "default": "None"}, {"name": "callers", "type": "Sequence[CallerType]", "description": "a list of caller identities, i.e. applications, or frameworks,\non behalf of which the Data API calls are performed. These end up\nin the request user-agent.\nEach caller identity is a (\"caller_name\", \"caller_version\") pair.", "value": "[]", "default": "[]"}, {"name": "caller_name", "type": "str | None", "description": "*DEPRECATED*, use `callers`. Removal 2.0. Name of the\napplication, or framework, on behalf of which the Data API calls\nare performed. This ends up in the request user-agent.", "value": "None", "default": "None"}, {"name": "caller_version", "type": "str | None", "description": "version of the caller. *DEPRECATED*, use `callers`. Removal 2.0.", "value": "None", "default": "None"}], "example": ">>> from astrapy import DataAPIClient, Collection\n>>> my_client = astrapy.DataAPIClient(\"AstraCS:...\")\n>>> my_db = my_client.get_database(\n... \"https://01234567-....apps.astra.datastax.com\"\n... )\n>>> my_coll_1 = Collection(database=my_db, name=\"my_collection\")\n>>> my_coll_2 = my_db.create_collection(\n... \"my_v_collection\",\n... dimension=3,\n... metric=\"cosine\",\n... )\n>>> my_coll_3a = my_db.get_collection(\"my_already_existing_collection\")\n>>> my_coll_3b = my_db.my_already_existing_collection\n>>> my_coll_3c = my_db[\"my_already_existing_collection\"]", "note": "creating an instance of Collection does not trigger actual creation\nof the collection on the database. The latter should have been created\nbeforehand, e.g. through the `create_collection` method of a Database.", "references": ["astrapy.collection.Collection", "astrapy.client.DataAPIClient"], "gathered_types": ["astrapy.constants.CallerType", "astrapy.database.Database", "astrapy.api_options.CollectionAPIOptions"]}} +{"id": "astrapy.collection.Collection.api_options", "text": "", "metadata": {"kind": "attribute", "name": "api_options", "path": "astrapy.collection.Collection.api_options", "value": "api_options = CollectionAPIOptions()", "gathered_types": ["astrapy.api_options.CollectionAPIOptions"]}} +{"id": "astrapy.collection.Collection.callers", "text": "", "metadata": {"kind": "attribute", "name": "callers", "path": "astrapy.collection.Collection.callers", "value": "callers = callers_param"}} +{"id": "astrapy.collection.Collection.with_options", "text": "Create a clone of this collection with some changed attributes.", "metadata": {"kind": "function", "name": "with_options", "path": "astrapy.collection.Collection.with_options", "parameters": [{"name": "name", "type": "str | None", "description": "the name of the collection. This parameter is useful to\nquickly spawn Collection instances each pointing to a different\ncollection existing in the same keyspace.", "value": "None", "default": "None"}, {"name": "embedding_api_key", "type": "str | EmbeddingHeadersProvider | None", "description": "optional API key(s) for interacting with the collection.\nIf an embedding service is configured, and this parameter is not None,\neach Data API call will include the necessary embedding-related headers\nas specified by this parameter. If a string is passed, it translates\ninto the one \"embedding api key\" header\n(i.e. `astrapy.authentication.EmbeddingAPIKeyHeaderProvider`).\nFor some vectorize providers/models, if using header-based authentication,\nspecialized subclasses of `astrapy.authentication.EmbeddingHeadersProvider`\nshould be supplied.", "value": "None", "default": "None"}, {"name": "collection_max_time_ms", "type": "int | None", "description": "a default timeout, in millisecond, for the duration of each\noperation on the collection. Individual timeouts can be provided to\neach collection method call and will take precedence, with this value\nbeing an overall default.\nNote that for some methods involving multiple API calls (such as\n`find`, `delete_many`, `insert_many` and so on), it is strongly suggested\nto provide a specific timeout as the default one likely wouldn't make\nmuch sense.", "value": "None", "default": "None"}, {"name": "callers", "type": "Sequence[CallerType]", "description": "a list of caller identities, i.e. applications, or frameworks,\non behalf of which the Data API calls are performed. These end up\nin the request user-agent.\nEach caller identity is a (\"caller_name\", \"caller_version\") pair.", "value": "[]", "default": "[]"}, {"name": "caller_name", "type": "str | None", "description": "*DEPRECATED*, use `callers`. Removal 2.0. Name of the\napplication, or framework, on behalf of which the Data API calls\nare performed. This ends up in the request user-agent.", "value": "None", "default": "None"}, {"name": "caller_version", "type": "str | None", "description": "version of the caller. *DEPRECATED*, use `callers`.\nRemoval 2.0.", "value": "None", "default": "None"}], "returns": [{"type": "Collection", "description": "a new Collection instance."}], "example": ">>> my_other_coll = my_coll.with_options(\n... name=\"the_other_coll\",\n... callers=[(\"caller_identity\", \"0.1.2\")],\n... )", "gathered_types": ["astrapy.authentication.EmbeddingHeadersProvider", "astrapy.constants.CallerType"]}} +{"id": "astrapy.collection.Collection.to_async", "text": "Create an AsyncCollection from this one. Save for the arguments\nexplicitly provided as overrides, everything else is kept identical\nto this collection in the copy (the database is converted into\nan async object).", "metadata": {"kind": "function", "name": "to_async", "path": "astrapy.collection.Collection.to_async", "parameters": [{"name": "database", "type": "AsyncDatabase | None", "description": "an AsyncDatabase object, instantiated earlier.\nThis represents the database the new collection belongs to.", "value": "None", "default": "None"}, {"name": "name", "type": "str | None", "description": "the collection name. This parameter should match an existing\ncollection on the database.", "value": "None", "default": "None"}, {"name": "keyspace", "type": "str | None", "description": "this is the keyspace to which the collection belongs.\nIf not specified, the database's working keyspace is used.", "value": "None", "default": "None"}, {"name": "namespace", "type": "str | None", "description": "an alias for `keyspace`. *DEPRECATED*, removal in 2.0.", "value": "None", "default": "None"}, {"name": "embedding_api_key", "type": "str | EmbeddingHeadersProvider | None", "description": "optional API key(s) for interacting with the collection.\nIf an embedding service is configured, and this parameter is not None,\neach Data API call will include the necessary embedding-related headers\nas specified by this parameter. If a string is passed, it translates\ninto the one \"embedding api key\" header\n(i.e. `astrapy.authentication.EmbeddingAPIKeyHeaderProvider`).\nFor some vectorize providers/models, if using header-based authentication,\nspecialized subclasses of `astrapy.authentication.EmbeddingHeadersProvider`\nshould be supplied.", "value": "None", "default": "None"}, {"name": "collection_max_time_ms", "type": "int | None", "description": "a default timeout, in millisecond, for the duration of each\noperation on the collection. Individual timeouts can be provided to\neach collection method call and will take precedence, with this value\nbeing an overall default.\nNote that for some methods involving multiple API calls (such as\n`find`, `delete_many`, `insert_many` and so on), it is strongly suggested\nto provide a specific timeout as the default one likely wouldn't make\nmuch sense.", "value": "None", "default": "None"}, {"name": "callers", "type": "Sequence[CallerType]", "description": "a list of caller identities, i.e. applications, or frameworks,\non behalf of which the Data API calls are performed. These end up\nin the request user-agent.\nEach caller identity is a (\"caller_name\", \"caller_version\") pair.", "value": "[]", "default": "[]"}, {"name": "caller_name", "type": "str | None", "description": "*DEPRECATED*, use `callers`. Removal 2.0. Name of the\napplication, or framework, on behalf of which the Data API calls\nare performed. This ends up in the request user-agent.", "value": "None", "default": "None"}, {"name": "caller_version", "type": "str | None", "description": "version of the caller. *DEPRECATED*, use `callers`.\nRemoval 2.0.", "value": "None", "default": "None"}], "returns": [{"type": "AsyncCollection", "description": "the new copy, an AsyncCollection instance."}], "example": ">>> asyncio.run(my_coll.to_async().count_documents({},upper_bound=100))\n77", "gathered_types": ["astrapy.collection.AsyncCollection", "astrapy.authentication.EmbeddingHeadersProvider", "astrapy.constants.CallerType", "astrapy.database.AsyncDatabase"]}} +{"id": "astrapy.collection.Collection.set_caller", "text": "Set a new identity for the application/framework on behalf of which\nthe Data API calls are performed (the \"caller\").", "metadata": {"kind": "function", "name": "set_caller", "path": "astrapy.collection.Collection.set_caller", "parameters": [{"name": "caller_name", "type": "str | None", "description": "name of the application, or framework, on behalf of which\nthe Data API calls are performed. This ends up in the request user-agent.", "value": "None", "default": "None"}, {"name": "caller_version", "type": "str | None", "description": "version of the caller.", "value": "None", "default": "None"}], "returns": [{"type": "None"}], "example": ">>> my_coll.set_caller(caller_name=\"the_caller\", caller_version=\"0.1.0\")"}} +{"id": "astrapy.collection.Collection.options", "text": "Get the collection options, i.e. its configuration as read from the database.\n\nThe method issues a request to the Data API each time is invoked,\nwithout caching mechanisms: this ensures up-to-date information\nfor usages such as real-time collection validation by the application.", "metadata": {"kind": "function", "name": "options", "path": "astrapy.collection.Collection.options", "parameters": [{"name": "max_time_ms", "type": "int | None", "description": "a timeout, in milliseconds, for the underlying HTTP request.\nIf not passed, the collection-level setting is used instead.", "value": "None", "default": "None"}], "returns": [{"type": "CollectionOptions", "description": "a CollectionOptions instance describing the collection."}, {"type": "CollectionOptions", "description": "(See also the database `list_collections` method.)"}], "example": ">>> my_coll.options()\nCollectionOptions(vector=CollectionVectorOptions(dimension=3, metric='cosine'))", "gathered_types": ["astrapy.info.CollectionOptions"]}} +{"id": "astrapy.collection.Collection.info", "text": "Information on the collection (name, location, database), in the\nform of a CollectionInfo object.\n\nNot to be confused with the collection `options` method (related\nto the collection internal configuration).", "metadata": {"kind": "function", "name": "info", "path": "astrapy.collection.Collection.info", "returns": [{"type": "CollectionInfo"}], "example": ">>> my_coll.info().database_info.region\n'eu-west-1'\n>>> my_coll.info().full_name\n'default_keyspace.my_v_collection'", "note": "the returned CollectionInfo wraps, among other things,\nthe database information: as such, calling this method\ntriggers the same-named method of a Database object (which, in turn,\nperforms a HTTP request to the DevOps API).\nSee the documentation for `Database.info()` for more details.", "gathered_types": ["astrapy.info.CollectionInfo"]}} +{"id": "astrapy.collection.Collection.database", "text": "a Database object, the database this collection belongs to.", "metadata": {"kind": "attribute", "name": "database", "path": "astrapy.collection.Collection.database", "value": "database: Database", "example": ">>> my_coll.database.name\n'the_application_database'", "gathered_types": ["astrapy.database.Database"]}} +{"id": "astrapy.collection.Collection.namespace", "text": "The namespace this collection is in.\n\n*DEPRECATED* (removal in 2.0). Switch to the \"keyspace\" property.**", "metadata": {"kind": "attribute", "name": "namespace", "path": "astrapy.collection.Collection.namespace", "value": "namespace: str", "example": ">>> my_coll.namespace\n'default_keyspace'"}} +{"id": "astrapy.collection.Collection.keyspace", "text": "The keyspace this collection is in.", "metadata": {"kind": "attribute", "name": "keyspace", "path": "astrapy.collection.Collection.keyspace", "value": "keyspace: str", "example": ">>> my_coll.keyspace\n'default_keyspace'"}} +{"id": "astrapy.collection.Collection.name", "text": "The name of this collection.", "metadata": {"kind": "attribute", "name": "name", "path": "astrapy.collection.Collection.name", "value": "name: str", "example": ">>> my_coll.name\n'my_v_collection'"}} +{"id": "astrapy.collection.Collection.full_name", "text": "The fully-qualified collection name within the database,\nin the form \"keyspace.collection_name\".", "metadata": {"kind": "attribute", "name": "full_name", "path": "astrapy.collection.Collection.full_name", "value": "full_name: str", "example": ">>> my_coll.full_name\n'default_keyspace.my_v_collection'"}} +{"id": "astrapy.collection.Collection.insert_one", "text": "Insert a single document in the collection in an atomic operation.", "metadata": {"kind": "function", "name": "insert_one", "path": "astrapy.collection.Collection.insert_one", "parameters": [{"name": "document", "type": "DocumentType", "description": "the dictionary expressing the document to insert.\nThe `_id` field of the document can be left out, in which\ncase it will be created automatically.", "default": null}, {"name": "vector", "type": "VectorType | None", "description": "a vector (a list of numbers appropriate for the collection)\nfor the document. Passing this parameter is equivalent to\nproviding a `$vector` field within the document itself,\nhowever the two are mutually exclusive.\n*DEPRECATED* (removal in 2.0). Use a `$vector` key in the document instead.", "value": "None", "default": "None"}, {"name": "vectorize", "type": "str | None", "description": "a string to be made into a vector, if such a service\nis configured for the collection. Passing this parameter is\nequivalent to providing a `$vectorize` field in the document itself,\nhowever the two are mutually exclusive.\nMoreover, this parameter cannot coexist with `vector`.\n*DEPRECATED* (removal in 2.0). Use a `$vectorize` key in the document instead.", "value": "None", "default": "None"}, {"name": "max_time_ms", "type": "int | None", "description": "a timeout, in milliseconds, for the underlying HTTP request.\nIf not passed, the collection-level setting is used instead.", "value": "None", "default": "None"}], "returns": [{"type": "InsertOneResult", "description": "an InsertOneResult object."}], "example": ">>> my_coll.count_documents({}, upper_bound=10)\n0\n>>> my_coll.insert_one(\n... {\n... \"age\": 30,\n... \"name\": \"Smith\",\n... \"food\": [\"pear\", \"peach\"],\n... \"likes_fruit\": True,\n... },\n... )\nInsertOneResult(raw_results=..., inserted_id='ed4587a4-...-...-...')\n>>> my_coll.insert_one({\"_id\": \"user-123\", \"age\": 50, \"name\": \"Maccio\"})\nInsertOneResult(raw_results=..., inserted_id='user-123')\n>>> my_coll.count_documents({}, upper_bound=10)\n2/n/n/n>>> my_coll.insert_one({\"tag\": \"v\", \"$vector\": [10, 11]})\nInsertOneResult(...)", "note": "If an `_id` is explicitly provided, which corresponds to a document\nthat exists already in the collection, an error is raised and\nthe insertion fails.", "gathered_types": ["astrapy.constants.VectorType", "astrapy.constants.DocumentType", "astrapy.results.InsertOneResult"]}} +{"id": "astrapy.collection.Collection.insert_many", "text": "Insert a list of documents into the collection.\nThis is not an atomic operation.", "metadata": {"kind": "function", "name": "insert_many", "path": "astrapy.collection.Collection.insert_many", "parameters": [{"name": "documents", "type": "Iterable[DocumentType]", "description": "an iterable of dictionaries, each a document to insert.\nDocuments may specify their `_id` field or leave it out, in which\ncase it will be added automatically.", "default": null}, {"name": "vectors", "type": "Iterable[VectorType | None] | None", "description": "an optional list of vectors (as many vectors as the provided\ndocuments) to associate to the documents when inserting.\nPassing vectors this way is indeed equivalent to the \"$vector\" field\nof the documents, however the two are mutually exclusive.\n*DEPRECATED* (removal in 2.0). Use a `$vector` key in the documents instead.", "value": "None", "default": "None"}, {"name": "vectorize", "type": "Iterable[str | None] | None", "description": "an optional list of strings to be made into as many vectors\n(one per document), if such a service is configured for the collection.\nPassing this parameter is equivalent to providing a `$vectorize`\nfield in the documents themselves, however the two are mutually exclusive.\n*DEPRECATED* (removal in 2.0). Use a `$vectorize` key in the documents instead.", "value": "None", "default": "None"}, {"name": "ordered", "type": "bool", "description": "if False (default), the insertions can occur in arbitrary order\nand possibly concurrently. If True, they are processed sequentially.\nIf there are no specific reasons against it, unordered insertions are to\nbe preferred as they complete much faster.", "value": "False", "default": "False"}, {"name": "chunk_size", "type": "int | None", "description": "how many documents to include in a single API request.\nExceeding the server maximum allowed value results in an error.\nLeave it unspecified (recommended) to use the system default.", "value": "None", "default": "None"}, {"name": "concurrency", "type": "int | None", "description": "maximum number of concurrent requests to the API at\na given time. It cannot be more than one for ordered insertions.", "value": "None", "default": "None"}, {"name": "max_time_ms", "type": "int | None", "description": "a timeout, in milliseconds, for the operation.\nIf not passed, the collection-level setting is used instead:\nIf many documents are being inserted, this method corresponds\nto several HTTP requests: in such cases one may want to specify\na more tolerant timeout here.", "value": "None", "default": "None"}], "returns": [{"type": "InsertManyResult", "description": "an InsertManyResult object."}], "example": ">>> my_coll.count_documents({}, upper_bound=10)\n0\n>>> my_coll.insert_many(\n... [{\"a\": 10}, {\"a\": 5}, {\"b\": [True, False, False]}],\n... ordered=True,\n... )\nInsertManyResult(raw_results=..., inserted_ids=['184bb06f-...', '...', '...'])\n>>> my_coll.count_documents({}, upper_bound=100)\n3\n>>> my_coll.insert_many(\n... [{\"seq\": i} for i in range(50)],\n... concurrency=5,\n... )\nInsertManyResult(raw_results=..., inserted_ids=[... ...])\n>>> my_coll.count_documents({}, upper_bound=100)\n53\n>>> my_coll.insert_many(\n... [\n... {\"tag\": \"a\", \"$vector\": [1, 2]},\n... {\"tag\": \"b\", \"$vector\": [3, 4]},\n... ]\n... )\nInsertManyResult(...)", "note": "A failure mode for this command is related to certain faulty documents\nfound among those to insert: a document may have the an `_id` already\npresent on the collection, or its vector dimension may not\nmatch the collection setting.\n\nFor an ordered insertion, the method will raise an exception at\nthe first such faulty document -- nevertheless, all documents processed\nuntil then will end up being written to the database.\n\nFor unordered insertions, if the error stems from faulty documents\nthe insertion proceeds until exhausting the input documents: then,\nan exception is raised -- and all insertable documents will have been\nwritten to the database, including those \"after\" the troublesome ones.\n\nIf, on the other hand, there are errors not related to individual\ndocuments (such as a network connectivity error), the whole\n`insert_many` operation will stop in mid-way, an exception will be raised,\nand only a certain amount of the input documents will\nhave made their way to the database.", "gathered_types": ["astrapy.constants.VectorType", "astrapy.constants.DocumentType", "astrapy.results.InsertManyResult"]}} +{"id": "astrapy.collection.Collection.find", "text": "Find documents on the collection, matching a certain provided filter.\n\nThe method returns a Cursor that can then be iterated over. Depending\non the method call pattern, the iteration over all documents can reflect\ncollection mutations occurred since the `find` method was called, or not.\nIn cases where the cursor reflects mutations in real-time, it will iterate\nover cursors in an approximate way (i.e. exhibiting occasional skipped\nor duplicate documents). This happens when making use of the `sort`\noption in a non-vector-search manner.", "metadata": {"kind": "function", "name": "find", "path": "astrapy.collection.Collection.find", "parameters": [{"name": "filter", "type": "FilterType | None", "description": "a predicate expressed as a dictionary according to the\nData API filter syntax. Examples are:\n {}\n {\"name\": \"John\"}\n {\"price\": {\"$lt\": 100}}\n {\"$and\": [{\"name\": \"John\"}, {\"price\": {\"$lt\": 100}}]}\nSee the Data API documentation for the full set of operators.", "value": "None", "default": "None"}, {"name": "projection", "type": "ProjectionType | None", "description": "it controls which parts of the document are returned.\nIt can be an allow-list: `{\"f1\": True, \"f2\": True}`,\nor a deny-list: `{\"fx\": False, \"fy\": False}`, but not a mixture\n(except for the `_id` and other special fields, which can be\nassociated to both True or False independently of the rest\nof the specification).\nThe special star-projections `{\"*\": True}` and `{\"*\": False}`\nhave the effect of returning the whole document and `{}` respectively.\nFor lists in documents, slice directives can be passed to select\nportions of the list: for instance, `{\"array\": {\"$slice\": 2}}`,\n`{\"array\": {\"$slice\": -2}}`, `{\"array\": {\"$slice\": [4, 2]}}` or\n`{\"array\": {\"$slice\": [-4, 2]}}`.\nAn iterable over strings will be treated implicitly as an allow-list.\nThe default projection (used if this parameter is not passed) does not\nnecessarily include \"special\" fields such as `$vector` or `$vectorize`.\nSee the Data API documentation for more on projections.", "value": "None", "default": "None"}, {"name": "skip", "type": "int | None", "description": "with this integer parameter, what would be the first `skip`\ndocuments returned by the query are discarded, and the results\nstart from the (skip+1)-th document.\nThis parameter can be used only in conjunction with an explicit\n`sort` criterion of the ascending/descending type (i.e. it cannot\nbe used when not sorting, nor with vector-based ANN search).", "value": "None", "default": "None"}, {"name": "limit", "type": "int | None", "description": "this (integer) parameter sets a limit over how many documents\nare returned. Once `limit` is reached (or the cursor is exhausted\nfor lack of matching documents), nothing more is returned.", "value": "None", "default": "None"}, {"name": "vector", "type": "VectorType | None", "description": "a suitable vector, i.e. a list of float numbers of the appropriate\ndimensionality, to perform vector search (i.e. ANN,\nor \"approximate nearest-neighbours\" search).\nWhen running similarity search on a collection, no other sorting\ncriteria can be specified. Moreover, there is an upper bound\nto the number of documents that can be returned. For details,\nsee the Note about upper bounds and the Data API documentation.\n*DEPRECATED* (removal in 2.0). Use a `$vector` key in the\nsort clause dict instead.", "value": "None", "default": "None"}, {"name": "vectorize", "type": "str | None", "description": "a string to be made into a vector to perform vector search.\nThis can be supplied in (exclusive) alternative to `vector`,\nprovided such a service is configured for the collection,\nand achieves the same effect.\n*DEPRECATED* (removal in 2.0). Use a `$vectorize` key in the\nsort clause dict instead.", "value": "None", "default": "None"}, {"name": "include_similarity", "type": "bool | None", "description": "a boolean to request the numeric value of the\nsimilarity to be returned as an added \"$similarity\" key in each\nreturned document. Can only be used for vector ANN search, i.e.\nwhen either `vector` is supplied or the `sort` parameter has the\nshape {\"$vector\": ...}.", "value": "None", "default": "None"}, {"name": "include_sort_vector", "type": "bool | None", "description": "a boolean to request query vector used in this search.\nIf set to True (and if the invocation is a vector search), calling\nthe `get_sort_vector` method on the returned cursor will yield\nthe vector used for the ANN search.", "value": "None", "default": "None"}, {"name": "sort", "type": "SortType | None", "description": "with this dictionary parameter one can control the order\nthe documents are returned. See the Note about sorting, as well as\nthe one about upper bounds, for details.\nVector-based ANN sorting is achieved by providing a \"$vector\"\nor a \"$vectorize\" key in `sort`.", "value": "None", "default": "None"}, {"name": "max_time_ms", "type": "int | None", "description": "a timeout, in milliseconds, for each single one\nof the underlying HTTP requests used to fetch documents as the\ncursor is iterated over.\nIf not passed, the collection-level setting is used instead.", "value": "None", "default": "None"}], "returns": [{"type": "Cursor", "description": "a Cursor object representing iterations over the matching documents"}, {"type": "Cursor", "description": "(see the Cursor object for how to use it. The simplest thing is to"}, {"type": "Cursor", "description": "run a for loop: `for document in collection.sort(...):`)."}], "example": ">>> filter = {\"seq\": {\"$exists\": True}}\n>>> for doc in my_coll.find(filter, projection={\"seq\": True}, limit=5):\n... print(doc[\"seq\"])\n...\n37\n35\n10\n36\n27\n>>> cursor1 = my_coll.find(\n... {},\n... limit=4,\n... sort={\"seq\": astrapy.constants.SortDocuments.DESCENDING},\n... )\n>>> [doc[\"_id\"] for doc in cursor1]\n['97e85f81-...', '1581efe4-...', '...', '...']\n>>> cursor2 = my_coll.find({}, limit=3)\n>>> cursor2.distinct(\"seq\")\n[37, 35, 10]/n/n/n>>> my_coll.insert_many([\n... {\"tag\": \"A\", \"$vector\": [4, 5]},\n... {\"tag\": \"B\", \"$vector\": [3, 4]},\n... {\"tag\": \"C\", \"$vector\": [3, 2]},\n... {\"tag\": \"D\", \"$vector\": [4, 1]},\n... {\"tag\": \"E\", \"$vector\": [2, 5]},\n... ])\n>>> ann_tags = [\n... document[\"tag\"]\n... for document in my_coll.find(\n... {},\n... sort={\"$vector\": [3, 3]},\n... limit=3,\n... )\n... ]\n>>> ann_tags\n['A', 'B', 'C']\n>>> # (assuming the collection has metric VectorMetric.COSINE)/n/n/n>>> cursor = my_coll.find(\n... sort={\"$vector\": [3, 3]},\n... limit=3,\n... include_sort_vector=True,\n... )\n>>> cursor.get_sort_vector()\n[3.0, 3.0]\n>>> matches = list(cursor)\n>>> cursor.get_sort_vector()\n[3.0, 3.0]", "note": "When not specifying sorting criteria at all (by vector or otherwise),\nthe cursor can scroll through an arbitrary number of documents as\nthe Data API and the client periodically exchange new chunks of documents.\nIt should be noted that the behavior of the cursor in the case documents\nhave been added/removed after the `find` was started depends on database\ninternals and it is not guaranteed, nor excluded, that such \"real-time\"\nchanges in the data would be picked up by the cursor.", "gathered_types": ["astrapy.constants.FilterType", "astrapy.constants.SortType", "astrapy.constants.VectorType", "astrapy.cursors.Cursor", "astrapy.constants.ProjectionType"]}} +{"id": "astrapy.collection.Collection.find_one", "text": "Run a search, returning the first document in the collection that matches\nprovided filters, if any is found.", "metadata": {"kind": "function", "name": "find_one", "path": "astrapy.collection.Collection.find_one", "parameters": [{"name": "filter", "type": "FilterType | None", "description": "a predicate expressed as a dictionary according to the\nData API filter syntax. Examples are:\n {}\n {\"name\": \"John\"}\n {\"price\": {\"$lt\": 100}}\n {\"$and\": [{\"name\": \"John\"}, {\"price\": {\"$lt\": 100}}]}\nSee the Data API documentation for the full set of operators.", "value": "None", "default": "None"}, {"name": "projection", "type": "ProjectionType | None", "description": "it controls which parts of the document are returned.\nIt can be an allow-list: `{\"f1\": True, \"f2\": True}`,\nor a deny-list: `{\"fx\": False, \"fy\": False}`, but not a mixture\n(except for the `_id` and other special fields, which can be\nassociated to both True or False independently of the rest\nof the specification).\nThe special star-projections `{\"*\": True}` and `{\"*\": False}`\nhave the effect of returning the whole document and `{}` respectively.\nFor lists in documents, slice directives can be passed to select\nportions of the list: for instance, `{\"array\": {\"$slice\": 2}}`,\n`{\"array\": {\"$slice\": -2}}`, `{\"array\": {\"$slice\": [4, 2]}}` or\n`{\"array\": {\"$slice\": [-4, 2]}}`.\nAn iterable over strings will be treated implicitly as an allow-list.\nThe default projection (used if this parameter is not passed) does not\nnecessarily include \"special\" fields such as `$vector` or `$vectorize`.\nSee the Data API documentation for more on projections.", "value": "None", "default": "None"}, {"name": "vector", "type": "VectorType | None", "description": "a suitable vector, i.e. a list of float numbers of the appropriate\ndimensionality, to perform vector search (i.e. ANN,\nor \"approximate nearest-neighbours\" search), extracting the most\nsimilar document in the collection matching the filter.\n*DEPRECATED* (removal in 2.0). Use a `$vector` key in the\nsort clause dict instead.", "value": "None", "default": "None"}, {"name": "vectorize", "type": "str | None", "description": "a string to be made into a vector to perform vector search.\nUsing vectorize assumes a suitable service is configured for the collection.\n*DEPRECATED* (removal in 2.0). Use a `$vectorize` key in the\nsort clause dict instead.", "value": "None", "default": "None"}, {"name": "include_similarity", "type": "bool | None", "description": "a boolean to request the numeric value of the\nsimilarity to be returned as an added \"$similarity\" key in the\nreturned document. Can only be used for vector ANN search, i.e.\nwhen either `vector` is supplied or the `sort` parameter has the\nshape {\"$vector\": ...}.", "value": "None", "default": "None"}, {"name": "sort", "type": "SortType | None", "description": "with this dictionary parameter one can control the order\nthe documents are returned. See the Note about sorting for details.\nVector-based ANN sorting is achieved by providing a \"$vector\"\nor a \"$vectorize\" key in `sort`.", "value": "None", "default": "None"}, {"name": "max_time_ms", "type": "int | None", "description": "a timeout, in milliseconds, for the underlying HTTP request.\nIf not passed, the collection-level setting is used instead.", "value": "None", "default": "None"}], "returns": [{"type": "DocumentType | None", "description": "a dictionary expressing the required document, otherwise None."}], "example": ">>> my_coll.find_one({})\n{'_id': '68d1e515-...', 'seq': 37}\n>>> my_coll.find_one({\"seq\": 10})\n{'_id': 'd560e217-...', 'seq': 10}\n>>> my_coll.find_one({\"seq\": 1011})\n>>> # (returns None for no matches)\n>>> my_coll.find_one({}, projection={\"seq\": False})\n{'_id': '68d1e515-...'}\n>>> my_coll.find_one(\n... {},\n... sort={\"seq\": astrapy.constants.SortDocuments.DESCENDING},\n... )\n{'_id': '97e85f81-...', 'seq': 69}\n>>> my_coll.find_one({}, sort={\"$vector\": [1, 0]}, projection={\"*\": True})\n{'_id': '...', 'tag': 'D', '$vector': [4.0, 1.0]}", "note": "See the `find` method for more details on the accepted parameters\n(whereas `skip` and `limit` are not valid parameters for `find_one`).", "gathered_types": ["astrapy.constants.FilterType", "astrapy.constants.SortType", "astrapy.constants.DocumentType", "astrapy.constants.VectorType", "astrapy.constants.ProjectionType"]}} +{"id": "astrapy.collection.Collection.distinct", "text": "Return a list of the unique values of `key` across the documents\nin the collection that match the provided filter.", "metadata": {"kind": "function", "name": "distinct", "path": "astrapy.collection.Collection.distinct", "parameters": [{"name": "key", "type": "str", "description": "the name of the field whose value is inspected across documents.\nKeys can use dot-notation to descend to deeper document levels.\nExample of acceptable `key` values:\n \"field\"\n \"field.subfield\"\n \"field.3\"\n \"field.3.subfield\"\nIf lists are encountered and no numeric index is specified,\nall items in the list are visited.", "default": null}, {"name": "filter", "type": "FilterType | None", "description": "a predicate expressed as a dictionary according to the\nData API filter syntax. Examples are:\n {}\n {\"name\": \"John\"}\n {\"price\": {\"$lt\": 100}}\n {\"$and\": [{\"name\": \"John\"}, {\"price\": {\"$lt\": 100}}]}\nSee the Data API documentation for the full set of operators.", "value": "None", "default": "None"}, {"name": "max_time_ms", "type": "int | None", "description": "a timeout, in milliseconds, with the same meaning as for `find`.\nIf not passed, the collection-level setting is used instead.", "value": "None", "default": "None"}], "returns": [{"type": "list[Any]", "description": "a list of all different values for `key` found across the documents"}, {"type": "list[Any]", "description": "that match the filter. The result list has no repeated items."}], "example": ">>> my_coll.insert_many(\n... [\n... {\"name\": \"Marco\", \"food\": [\"apple\", \"orange\"], \"city\": \"Helsinki\"},\n... {\"name\": \"Emma\", \"food\": {\"likes_fruit\": True, \"allergies\": []}},\n... ]\n... )\nInsertManyResult(raw_results=..., inserted_ids=['c5b99f37-...', 'd6416321-...'])\n>>> my_coll.distinct(\"name\")\n['Marco', 'Emma']\n>>> my_coll.distinct(\"city\")\n['Helsinki']\n>>> my_coll.distinct(\"food\")\n['apple', 'orange', {'likes_fruit': True, 'allergies': []}]\n>>> my_coll.distinct(\"food.1\")\n['orange']\n>>> my_coll.distinct(\"food.allergies\")\n[]\n>>> my_coll.distinct(\"food.likes_fruit\")\n[True]", "note": "For details on the behaviour of \"distinct\" in conjunction with\nreal-time changes in the collection contents, see the\nNote of the `find` command.", "gathered_types": ["astrapy.constants.FilterType"]}} +{"id": "astrapy.collection.Collection.count_documents", "text": "Count the documents in the collection matching the specified filter.", "metadata": {"kind": "function", "name": "count_documents", "path": "astrapy.collection.Collection.count_documents", "parameters": [{"name": "filter", "type": "FilterType", "description": "a predicate expressed as a dictionary according to the\nData API filter syntax. Examples are:\n {}\n {\"name\": \"John\"}\n {\"price\": {\"$lt\": 100}}\n {\"$and\": [{\"name\": \"John\"}, {\"price\": {\"$lt\": 100}}]}\nSee the Data API documentation for the full set of operators.", "default": null}, {"name": "upper_bound", "type": "int", "description": "a required ceiling on the result of the count operation.\nIf the actual number of documents exceeds this value,\nan exception will be raised.\nFurthermore, if the actual number of documents exceeds the maximum\ncount that the Data API can reach (regardless of upper_bound),\nan exception will be raised.", "default": null}, {"name": "max_time_ms", "type": "int | None", "description": "a timeout, in milliseconds, for the underlying HTTP request.\nIf not passed, the collection-level setting is used instead.", "value": "None", "default": "None"}], "returns": [{"type": "int", "description": "the exact count of matching documents."}], "example": ">>> my_coll.insert_many([{\"seq\": i} for i in range(20)])\nInsertManyResult(...)\n>>> my_coll.count_documents({}, upper_bound=100)\n20\n>>> my_coll.count_documents({\"seq\":{\"$gt\": 15}}, upper_bound=100)\n4\n>>> my_coll.count_documents({}, upper_bound=10)\nTraceback (most recent call last):\n ... ...\nastrapy.exceptions.TooManyDocumentsToCountException", "note": "Count operations are expensive: for this reason, the best practice\nis to provide a reasonable `upper_bound` according to the caller\nexpectations. Moreover, indiscriminate usage of count operations\nfor sizeable amounts of documents (i.e. in the thousands and more)\nis discouraged in favor of alternative application-specific solutions.\nKeep in mind that the Data API has a hard upper limit on the amount\nof documents it will count, and that an exception will be thrown\nby this method if this limit is encountered.", "gathered_types": ["astrapy.constants.FilterType"]}} +{"id": "astrapy.collection.Collection.estimated_document_count", "text": "Query the API server for an estimate of the document count in the collection.\n\nContrary to `count_documents`, this method has no filtering parameters.", "metadata": {"kind": "function", "name": "estimated_document_count", "path": "astrapy.collection.Collection.estimated_document_count", "parameters": [{"name": "max_time_ms", "type": "int | None", "description": "a timeout, in milliseconds, for the underlying HTTP request.\nIf not passed, the collection-level setting is used instead.", "value": "None", "default": "None"}], "returns": [{"type": "int", "description": "a server-provided estimate count of the documents in the collection."}], "example": ">>> my_coll.estimated_document_count()\n35700"}} +{"id": "astrapy.collection.Collection.find_one_and_replace", "text": "Find a document on the collection and replace it entirely with a new one,\noptionally inserting a new one if no match is found.", "metadata": {"kind": "function", "name": "find_one_and_replace", "path": "astrapy.collection.Collection.find_one_and_replace", "parameters": [{"name": "filter", "type": "FilterType", "description": "a predicate expressed as a dictionary according to the\nData API filter syntax. Examples are:\n {}\n {\"name\": \"John\"}\n {\"price\": {\"$lt\": 100}}\n {\"$and\": [{\"name\": \"John\"}, {\"price\": {\"$lt\": 100}}]}\nSee the Data API documentation for the full set of operators.", "default": null}, {"name": "replacement", "type": "DocumentType", "description": "the new document to write into the collection.", "default": null}, {"name": "projection", "type": "ProjectionType | None", "description": "it controls which parts of the document are returned.\nIt can be an allow-list: `{\"f1\": True, \"f2\": True}`,\nor a deny-list: `{\"fx\": False, \"fy\": False}`, but not a mixture\n(except for the `_id` and other special fields, which can be\nassociated to both True or False independently of the rest\nof the specification).\nThe special star-projections `{\"*\": True}` and `{\"*\": False}`\nhave the effect of returning the whole document and `{}` respectively.\nFor lists in documents, slice directives can be passed to select\nportions of the list: for instance, `{\"array\": {\"$slice\": 2}}`,\n`{\"array\": {\"$slice\": -2}}`, `{\"array\": {\"$slice\": [4, 2]}}` or\n`{\"array\": {\"$slice\": [-4, 2]}}`.\nAn iterable over strings will be treated implicitly as an allow-list.\nThe default projection (used if this parameter is not passed) does not\nnecessarily include \"special\" fields such as `$vector` or `$vectorize`.\nSee the Data API documentation for more on projections.", "value": "None", "default": "None"}, {"name": "vector", "type": "VectorType | None", "description": "a suitable vector, i.e. a list of float numbers of the appropriate\ndimensionality, to use vector search (i.e. ANN,\nor \"approximate nearest-neighbours\" search), as the sorting criterion.\nIn this way, the matched document (if any) will be the one\nthat is most similar to the provided vector.\n*DEPRECATED* (removal in 2.0). Use a `$vector` key in the\nsort clause dict instead.", "value": "None", "default": "None"}, {"name": "vectorize", "type": "str | None", "description": "a string to be made into a vector to perform vector search.\nUsing vectorize assumes a suitable service is configured for the collection.\n*DEPRECATED* (removal in 2.0). Use a `$vectorize` key in the\nsort clause dict instead.", "value": "None", "default": "None"}, {"name": "sort", "type": "SortType | None", "description": "with this dictionary parameter one can control the sorting\norder of the documents matching the filter, effectively\ndetermining what document will come first and hence be the\nreplaced one. See the `find` method for more on sorting.\nVector-based ANN sorting is achieved by providing a \"$vector\"\nor a \"$vectorize\" key in `sort`.", "value": "None", "default": "None"}, {"name": "upsert", "type": "bool", "description": "this parameter controls the behavior in absence of matches.\nIf True, `replacement` is inserted as a new document\nif no matches are found on the collection. If False,\nthe operation silently does nothing in case of no matches.", "value": "False", "default": "False"}, {"name": "return_document", "type": "str", "description": "a flag controlling what document is returned:\nif set to `ReturnDocument.BEFORE`, or the string \"before\",\nthe document found on database is returned; if set to\n`ReturnDocument.AFTER`, or the string \"after\", the new\ndocument is returned. The default is \"before\".", "value": "ReturnDocument.BEFORE", "default": "ReturnDocument.BEFORE"}, {"name": "max_time_ms", "type": "int | None", "description": "a timeout, in milliseconds, for the underlying HTTP request.\nIf not passed, the collection-level setting is used instead.", "value": "None", "default": "None"}], "returns": [{"type": "DocumentType | None", "description": "A document (or a projection thereof, as required), either the one"}, {"type": "DocumentType | None", "description": "before the replace operation or the one after that."}, {"type": "DocumentType | None", "description": "Alternatively, the method returns None to represent"}, {"type": "DocumentType | None", "description": "that no matching document was found, or that no replacement"}, {"type": "DocumentType | None", "description": "was inserted (depending on the `return_document` parameter)."}], "example": ">>> my_coll.insert_one({\"_id\": \"rule1\", \"text\": \"all animals are equal\"})\nInsertOneResult(...)\n>>> my_coll.find_one_and_replace(\n... {\"_id\": \"rule1\"},\n... {\"text\": \"some animals are more equal!\"},\n... )\n{'_id': 'rule1', 'text': 'all animals are equal'}\n>>> my_coll.find_one_and_replace(\n... {\"text\": \"some animals are more equal!\"},\n... {\"text\": \"and the pigs are the rulers\"},\n... return_document=astrapy.constants.ReturnDocument.AFTER,\n... )\n{'_id': 'rule1', 'text': 'and the pigs are the rulers'}\n>>> my_coll.find_one_and_replace(\n... {\"_id\": \"rule2\"},\n... {\"text\": \"F=ma^2\"},\n... return_document=astrapy.constants.ReturnDocument.AFTER,\n... )\n>>> # (returns None for no matches)\n>>> my_coll.find_one_and_replace(\n... {\"_id\": \"rule2\"},\n... {\"text\": \"F=ma\"},\n... upsert=True,\n... return_document=astrapy.constants.ReturnDocument.AFTER,\n... projection={\"_id\": False},\n... )\n{'text': 'F=ma'}", "gathered_types": ["astrapy.constants.FilterType", "astrapy.constants.SortType", "astrapy.constants.ReturnDocument", "astrapy.constants.DocumentType", "astrapy.constants.VectorType", "BEFORE", "astrapy.constants.ProjectionType"]}} +{"id": "astrapy.collection.Collection.replace_one", "text": "Replace a single document on the collection with a new one,\noptionally inserting a new one if no match is found.", "metadata": {"kind": "function", "name": "replace_one", "path": "astrapy.collection.Collection.replace_one", "parameters": [{"name": "filter", "type": "FilterType", "description": "a predicate expressed as a dictionary according to the\nData API filter syntax. Examples are:\n {}\n {\"name\": \"John\"}\n {\"price\": {\"$lt\": 100}}\n {\"$and\": [{\"name\": \"John\"}, {\"price\": {\"$lt\": 100}}]}\nSee the Data API documentation for the full set of operators.", "default": null}, {"name": "replacement", "type": "DocumentType", "description": "the new document to write into the collection.", "default": null}, {"name": "vector", "type": "VectorType | None", "description": "a suitable vector, i.e. a list of float numbers of the appropriate\ndimensionality, to use vector search (i.e. ANN,\nor \"approximate nearest-neighbours\" search), as the sorting criterion.\nIn this way, the matched document (if any) will be the one\nthat is most similar to the provided vector.\n*DEPRECATED* (removal in 2.0). Use a `$vector` key in the\nsort clause dict instead.", "value": "None", "default": "None"}, {"name": "vectorize", "type": "str | None", "description": "a string to be made into a vector to perform vector search.\nUsing vectorize assumes a suitable service is configured for the collection.\n*DEPRECATED* (removal in 2.0). Use a `$vectorize` key in the\nsort clause dict instead.", "value": "None", "default": "None"}, {"name": "sort", "type": "SortType | None", "description": "with this dictionary parameter one can control the sorting\norder of the documents matching the filter, effectively\ndetermining what document will come first and hence be the\nreplaced one. See the `find` method for more on sorting.\nVector-based ANN sorting is achieved by providing a \"$vector\"\nor a \"$vectorize\" key in `sort`.", "value": "None", "default": "None"}, {"name": "upsert", "type": "bool", "description": "this parameter controls the behavior in absence of matches.\nIf True, `replacement` is inserted as a new document\nif no matches are found on the collection. If False,\nthe operation silently does nothing in case of no matches.", "value": "False", "default": "False"}, {"name": "max_time_ms", "type": "int | None", "description": "a timeout, in milliseconds, for the underlying HTTP request.\nIf not passed, the collection-level setting is used instead.", "value": "None", "default": "None"}], "returns": [{"type": "UpdateResult", "description": "an UpdateResult object summarizing the outcome of the replace operation."}], "example": ">>> my_coll.insert_one({\"Marco\": \"Polo\"})\nInsertOneResult(...)\n>>> my_coll.replace_one({\"Marco\": {\"$exists\": True}}, {\"Buda\": \"Pest\"})\nUpdateResult(raw_results=..., update_info={'n': 1, 'updatedExisting': True, 'ok': 1.0, 'nModified': 1})\n>>> my_coll.find_one({\"Buda\": \"Pest\"})\n{'_id': '8424905a-...', 'Buda': 'Pest'}\n>>> my_coll.replace_one({\"Mirco\": {\"$exists\": True}}, {\"Oh\": \"yeah?\"})\nUpdateResult(raw_results=..., update_info={'n': 0, 'updatedExisting': False, 'ok': 1.0, 'nModified': 0})\n>>> my_coll.replace_one({\"Mirco\": {\"$exists\": True}}, {\"Oh\": \"yeah?\"}, upsert=True)\nUpdateResult(raw_results=..., update_info={'n': 1, 'updatedExisting': False, 'ok': 1.0, 'nModified': 0, 'upserted': '931b47d6-...'})", "gathered_types": ["astrapy.constants.FilterType", "astrapy.constants.SortType", "astrapy.constants.DocumentType", "astrapy.results.UpdateResult", "astrapy.constants.VectorType"]}} +{"id": "astrapy.collection.Collection.find_one_and_update", "text": "Find a document on the collection and update it as requested,\noptionally inserting a new one if no match is found.", "metadata": {"kind": "function", "name": "find_one_and_update", "path": "astrapy.collection.Collection.find_one_and_update", "parameters": [{"name": "filter", "type": "FilterType", "description": "a predicate expressed as a dictionary according to the\nData API filter syntax. Examples are:\n {}\n {\"name\": \"John\"}\n {\"price\": {\"$lt\": 100}}\n {\"$and\": [{\"name\": \"John\"}, {\"price\": {\"$lt\": 100}}]}\nSee the Data API documentation for the full set of operators.", "default": null}, {"name": "update", "type": "dict[str, Any]", "description": "the update prescription to apply to the document, expressed\nas a dictionary as per Data API syntax. Examples are:\n {\"$set\": {\"field\": \"value}}\n {\"$inc\": {\"counter\": 10}}\n {\"$unset\": {\"field\": \"\"}}\nSee the Data API documentation for the full syntax.", "default": null}, {"name": "projection", "type": "ProjectionType | None", "description": "it controls which parts of the document are returned.\nIt can be an allow-list: `{\"f1\": True, \"f2\": True}`,\nor a deny-list: `{\"fx\": False, \"fy\": False}`, but not a mixture\n(except for the `_id` and other special fields, which can be\nassociated to both True or False independently of the rest\nof the specification).\nThe special star-projections `{\"*\": True}` and `{\"*\": False}`\nhave the effect of returning the whole document and `{}` respectively.\nFor lists in documents, slice directives can be passed to select\nportions of the list: for instance, `{\"array\": {\"$slice\": 2}}`,\n`{\"array\": {\"$slice\": -2}}`, `{\"array\": {\"$slice\": [4, 2]}}` or\n`{\"array\": {\"$slice\": [-4, 2]}}`.\nAn iterable over strings will be treated implicitly as an allow-list.\nThe default projection (used if this parameter is not passed) does not\nnecessarily include \"special\" fields such as `$vector` or `$vectorize`.\nSee the Data API documentation for more on projections.", "value": "None", "default": "None"}, {"name": "vector", "type": "VectorType | None", "description": "a suitable vector, i.e. a list of float numbers of the appropriate\ndimensionality, to use vector search (i.e. ANN,\nor \"approximate nearest-neighbours\" search), as the sorting criterion.\nIn this way, the matched document (if any) will be the one\nthat is most similar to the provided vector.\n*DEPRECATED* (removal in 2.0). Use a `$vector` key in the\nsort clause dict instead.", "value": "None", "default": "None"}, {"name": "vectorize", "type": "str | None", "description": "a string to be made into a vector to perform vector search.\nUsing vectorize assumes a suitable service is configured for the collection.\n*DEPRECATED* (removal in 2.0). Use a `$vectorize` key in the\nsort clause dict instead.", "value": "None", "default": "None"}, {"name": "sort", "type": "SortType | None", "description": "with this dictionary parameter one can control the sorting\norder of the documents matching the filter, effectively\ndetermining what document will come first and hence be the\nreplaced one. See the `find` method for more on sorting.\nVector-based ANN sorting is achieved by providing a \"$vector\"\nor a \"$vectorize\" key in `sort`.", "value": "None", "default": "None"}, {"name": "upsert", "type": "bool", "description": "this parameter controls the behavior in absence of matches.\nIf True, a new document (resulting from applying the `update`\nto an empty document) is inserted if no matches are found on\nthe collection. If False, the operation silently does nothing\nin case of no matches.", "value": "False", "default": "False"}, {"name": "return_document", "type": "str", "description": "a flag controlling what document is returned:\nif set to `ReturnDocument.BEFORE`, or the string \"before\",\nthe document found on database is returned; if set to\n`ReturnDocument.AFTER`, or the string \"after\", the new\ndocument is returned. The default is \"before\".", "value": "ReturnDocument.BEFORE", "default": "ReturnDocument.BEFORE"}, {"name": "max_time_ms", "type": "int | None", "description": "a timeout, in milliseconds, for the underlying HTTP request.\nIf not passed, the collection-level setting is used instead.", "value": "None", "default": "None"}], "returns": [{"type": "DocumentType | None", "description": "A document (or a projection thereof, as required), either the one"}, {"type": "DocumentType | None", "description": "before the replace operation or the one after that."}, {"type": "DocumentType | None", "description": "Alternatively, the method returns None to represent"}, {"type": "DocumentType | None", "description": "that no matching document was found, or that no update"}, {"type": "DocumentType | None", "description": "was applied (depending on the `return_document` parameter)."}], "example": ">>> my_coll.insert_one({\"Marco\": \"Polo\"})\nInsertOneResult(...)\n>>> my_coll.find_one_and_update(\n... {\"Marco\": {\"$exists\": True}},\n... {\"$set\": {\"title\": \"Mr.\"}},\n... )\n{'_id': 'a80106f2-...', 'Marco': 'Polo'}\n>>> my_coll.find_one_and_update(\n... {\"title\": \"Mr.\"},\n... {\"$inc\": {\"rank\": 3}},\n... projection=[\"title\", \"rank\"],\n... return_document=astrapy.constants.ReturnDocument.AFTER,\n... )\n{'_id': 'a80106f2-...', 'title': 'Mr.', 'rank': 3}\n>>> my_coll.find_one_and_update(\n... {\"name\": \"Johnny\"},\n... {\"$set\": {\"rank\": 0}},\n... return_document=astrapy.constants.ReturnDocument.AFTER,\n... )\n>>> # (returns None for no matches)\n>>> my_coll.find_one_and_update(\n... {\"name\": \"Johnny\"},\n... {\"$set\": {\"rank\": 0}},\n... upsert=True,\n... return_document=astrapy.constants.ReturnDocument.AFTER,\n... )\n{'_id': 'cb4ef2ab-...', 'name': 'Johnny', 'rank': 0}", "gathered_types": ["astrapy.constants.FilterType", "astrapy.constants.SortType", "astrapy.constants.ReturnDocument", "astrapy.constants.DocumentType", "astrapy.constants.VectorType", "BEFORE", "astrapy.constants.ProjectionType"]}} +{"id": "astrapy.collection.Collection.update_one", "text": "Update a single document on the collection as requested,\noptionally inserting a new one if no match is found.", "metadata": {"kind": "function", "name": "update_one", "path": "astrapy.collection.Collection.update_one", "parameters": [{"name": "filter", "type": "FilterType", "description": "a predicate expressed as a dictionary according to the\nData API filter syntax. Examples are:\n {}\n {\"name\": \"John\"}\n {\"price\": {\"$lt\": 100}}\n {\"$and\": [{\"name\": \"John\"}, {\"price\": {\"$lt\": 100}}]}\nSee the Data API documentation for the full set of operators.", "default": null}, {"name": "update", "type": "dict[str, Any]", "description": "the update prescription to apply to the document, expressed\nas a dictionary as per Data API syntax. Examples are:\n {\"$set\": {\"field\": \"value}}\n {\"$inc\": {\"counter\": 10}}\n {\"$unset\": {\"field\": \"\"}}\nSee the Data API documentation for the full syntax.", "default": null}, {"name": "vector", "type": "VectorType | None", "description": "a suitable vector, i.e. a list of float numbers of the appropriate\ndimensionality, to use vector search (i.e. ANN,\nor \"approximate nearest-neighbours\" search), as the sorting criterion.\nIn this way, the matched document (if any) will be the one\nthat is most similar to the provided vector.\n*DEPRECATED* (removal in 2.0). Use a `$vector` key in the\nsort clause dict instead.", "value": "None", "default": "None"}, {"name": "vectorize", "type": "str | None", "description": "a string to be made into a vector to perform vector search.\nUsing vectorize assumes a suitable service is configured for the collection.\n*DEPRECATED* (removal in 2.0). Use a `$vectorize` key in the\nsort clause dict instead.", "value": "None", "default": "None"}, {"name": "sort", "type": "SortType | None", "description": "with this dictionary parameter one can control the sorting\norder of the documents matching the filter, effectively\ndetermining what document will come first and hence be the\nreplaced one. See the `find` method for more on sorting.\nVector-based ANN sorting is achieved by providing a \"$vector\"\nor a \"$vectorize\" key in `sort`.", "value": "None", "default": "None"}, {"name": "upsert", "type": "bool", "description": "this parameter controls the behavior in absence of matches.\nIf True, a new document (resulting from applying the `update`\nto an empty document) is inserted if no matches are found on\nthe collection. If False, the operation silently does nothing\nin case of no matches.", "value": "False", "default": "False"}, {"name": "max_time_ms", "type": "int | None", "description": "a timeout, in milliseconds, for the underlying HTTP request.\nIf not passed, the collection-level setting is used instead.", "value": "None", "default": "None"}], "returns": [{"type": "UpdateResult", "description": "an UpdateResult object summarizing the outcome of the update operation."}], "example": ">>> my_coll.insert_one({\"Marco\": \"Polo\"})\nInsertOneResult(...)\n>>> my_coll.update_one({\"Marco\": {\"$exists\": True}}, {\"$inc\": {\"rank\": 3}})\nUpdateResult(raw_results=..., update_info={'n': 1, 'updatedExisting': True, 'ok': 1.0, 'nModified': 1})\n>>> my_coll.update_one({\"Mirko\": {\"$exists\": True}}, {\"$inc\": {\"rank\": 3}})\nUpdateResult(raw_results=..., update_info={'n': 0, 'updatedExisting': False, 'ok': 1.0, 'nModified': 0})\n>>> my_coll.update_one({\"Mirko\": {\"$exists\": True}}, {\"$inc\": {\"rank\": 3}}, upsert=True)\nUpdateResult(raw_results=..., update_info={'n': 1, 'updatedExisting': False, 'ok': 1.0, 'nModified': 0, 'upserted': '2a45ff60-...'})", "gathered_types": ["astrapy.constants.FilterType", "astrapy.constants.SortType", "astrapy.constants.VectorType", "astrapy.results.UpdateResult"]}} +{"id": "astrapy.collection.Collection.update_many", "text": "Apply an update operations to all documents matching a condition,\noptionally inserting one documents in absence of matches.", "metadata": {"kind": "function", "name": "update_many", "path": "astrapy.collection.Collection.update_many", "parameters": [{"name": "filter", "type": "FilterType", "description": "a predicate expressed as a dictionary according to the\nData API filter syntax. Examples are:\n {}\n {\"name\": \"John\"}\n {\"price\": {\"$lt\": 100}}\n {\"$and\": [{\"name\": \"John\"}, {\"price\": {\"$lt\": 100}}]}\nSee the Data API documentation for the full set of operators.", "default": null}, {"name": "update", "type": "dict[str, Any]", "description": "the update prescription to apply to the documents, expressed\nas a dictionary as per Data API syntax. Examples are:\n {\"$set\": {\"field\": \"value}}\n {\"$inc\": {\"counter\": 10}}\n {\"$unset\": {\"field\": \"\"}}\nSee the Data API documentation for the full syntax.", "default": null}, {"name": "upsert", "type": "bool", "description": "this parameter controls the behavior in absence of matches.\nIf True, a single new document (resulting from applying `update`\nto an empty document) is inserted if no matches are found on\nthe collection. If False, the operation silently does nothing\nin case of no matches.", "value": "False", "default": "False"}, {"name": "max_time_ms", "type": "int | None", "description": "a timeout, in milliseconds, for the operation.\nIf not passed, the collection-level setting is used instead:\nif a large number of document updates is anticipated, it is suggested\nto specify a larger timeout than in most other operations as the\nupdate will span several HTTP calls to the API in sequence.", "value": "None", "default": "None"}], "returns": [{"type": "UpdateResult", "description": "an UpdateResult object summarizing the outcome of the update operation."}], "example": ">>> my_coll.insert_many([{\"c\": \"red\"}, {\"c\": \"green\"}, {\"c\": \"blue\"}])\nInsertManyResult(...)\n>>> my_coll.update_many({\"c\": {\"$ne\": \"green\"}}, {\"$set\": {\"nongreen\": True}})\nUpdateResult(raw_results=..., update_info={'n': 2, 'updatedExisting': True, 'ok': 1.0, 'nModified': 2})\n>>> my_coll.update_many({\"c\": \"orange\"}, {\"$set\": {\"is_also_fruit\": True}})\nUpdateResult(raw_results=..., update_info={'n': 0, 'updatedExisting': False, 'ok': 1.0, 'nModified': 0})\n>>> my_coll.update_many(\n... {\"c\": \"orange\"},\n... {\"$set\": {\"is_also_fruit\": True}},\n... upsert=True,\n... )\nUpdateResult(raw_results=..., update_info={'n': 1, 'updatedExisting': False, 'ok': 1.0, 'nModified': 0, 'upserted': '46643050-...'})", "note": "Similarly to the case of `find` (see its docstring for more details),\nrunning this command while, at the same time, another process is\ninserting new documents which match the filter of the `update_many`\ncan result in an unpredictable fraction of these documents being updated.\nIn other words, it cannot be easily predicted whether a given\nnewly-inserted document will be picked up by the update_many command or not.", "gathered_types": ["astrapy.constants.FilterType", "astrapy.results.UpdateResult"]}} +{"id": "astrapy.collection.Collection.find_one_and_delete", "text": "Find a document in the collection and delete it. The deleted document,\nhowever, is the return value of the method.", "metadata": {"kind": "function", "name": "find_one_and_delete", "path": "astrapy.collection.Collection.find_one_and_delete", "parameters": [{"name": "filter", "type": "FilterType", "description": "a predicate expressed as a dictionary according to the\nData API filter syntax. Examples are:\n {}\n {\"name\": \"John\"}\n {\"price\": {\"$lt\": 100}}\n {\"$and\": [{\"name\": \"John\"}, {\"price\": {\"$lt\": 100}}]}\nSee the Data API documentation for the full set of operators.", "default": null}, {"name": "projection", "type": "ProjectionType | None", "description": "it controls which parts of the document are returned.\nIt can be an allow-list: `{\"f1\": True, \"f2\": True}`,\nor a deny-list: `{\"fx\": False, \"fy\": False}`, but not a mixture\n(except for the `_id` and other special fields, which can be\nassociated to both True or False independently of the rest\nof the specification).\nThe special star-projections `{\"*\": True}` and `{\"*\": False}`\nhave the effect of returning the whole document and `{}` respectively.\nFor lists in documents, slice directives can be passed to select\nportions of the list: for instance, `{\"array\": {\"$slice\": 2}}`,\n`{\"array\": {\"$slice\": -2}}`, `{\"array\": {\"$slice\": [4, 2]}}` or\n`{\"array\": {\"$slice\": [-4, 2]}}`.\nAn iterable over strings will be treated implicitly as an allow-list.\nThe default projection (used if this parameter is not passed) does not\nnecessarily include \"special\" fields such as `$vector` or `$vectorize`.\nSee the Data API documentation for more on projections.", "value": "None", "default": "None"}, {"name": "vector", "type": "VectorType | None", "description": "a suitable vector, i.e. a list of float numbers of the appropriate\ndimensionality, to use vector search (i.e. ANN,\nor \"approximate nearest-neighbours\" search), as the sorting criterion.\nIn this way, the matched document (if any) will be the one\nthat is most similar to the provided vector.\nThis parameter cannot be used together with `sort`.\nSee the `find` method for more details on this parameter.\n*DEPRECATED* (removal in 2.0). Use a `$vector` key in the\nsort clause dict instead.", "value": "None", "default": "None"}, {"name": "vectorize", "type": "str | None", "description": "a string to be made into a vector to perform vector search.\nThis can be supplied in (exclusive) alternative to `vector`,\nprovided such a service is configured for the collection,\nand achieves the same effect.\n*DEPRECATED* (removal in 2.0). Use a `$vectorize` key in the\nsort clause dict instead.", "value": "None", "default": "None"}, {"name": "sort", "type": "SortType | None", "description": "with this dictionary parameter one can control the sorting\norder of the documents matching the filter, effectively\ndetermining what document will come first and hence be the\ndeleted one. See the `find` method for more on sorting.", "value": "None", "default": "None"}, {"name": "max_time_ms", "type": "int | None", "description": "a timeout, in milliseconds, for the underlying HTTP request.\nIf not passed, the collection-level setting is used instead.", "value": "None", "default": "None"}], "returns": [{"type": "DocumentType | None", "description": "Either the document (or a projection thereof, as requested), or None"}, {"type": "DocumentType | None", "description": "if no matches were found in the first place."}], "example": ">>> my_coll.insert_many(\n... [\n... {\"species\": \"swan\", \"class\": \"Aves\"},\n... {\"species\": \"frog\", \"class\": \"Amphibia\"},\n... ],\n... )\nInsertManyResult(...)\n>>> my_coll.find_one_and_delete(\n... {\"species\": {\"$ne\": \"frog\"}},\n... projection=[\"species\"],\n... )\n{'_id': '5997fb48-...', 'species': 'swan'}\n>>> my_coll.find_one_and_delete({\"species\": {\"$ne\": \"frog\"}})\n>>> # (returns None for no matches)", "gathered_types": ["astrapy.constants.FilterType", "astrapy.constants.SortType", "astrapy.constants.DocumentType", "astrapy.constants.VectorType", "astrapy.constants.ProjectionType"]}} +{"id": "astrapy.collection.Collection.delete_one", "text": "Delete one document matching a provided filter.\nThis method never deletes more than a single document, regardless\nof the number of matches to the provided filters.", "metadata": {"kind": "function", "name": "delete_one", "path": "astrapy.collection.Collection.delete_one", "parameters": [{"name": "filter", "type": "FilterType", "description": "a predicate expressed as a dictionary according to the\nData API filter syntax. Examples are:\n {}\n {\"name\": \"John\"}\n {\"price\": {\"$lt\": 100}}\n {\"$and\": [{\"name\": \"John\"}, {\"price\": {\"$lt\": 100}}]}\nSee the Data API documentation for the full set of operators.", "default": null}, {"name": "vector", "type": "VectorType | None", "description": "a suitable vector, i.e. a list of float numbers of the appropriate\ndimensionality, to use vector search (i.e. ANN,\nor \"approximate nearest-neighbours\" search), as the sorting criterion.\nIn this way, the matched document (if any) will be the one\nthat is most similar to the provided vector.\nThis parameter cannot be used together with `sort`.\nSee the `find` method for more details on this parameter.\n*DEPRECATED* (removal in 2.0). Use a `$vector` key in the\nsort clause dict instead.", "value": "None", "default": "None"}, {"name": "vectorize", "type": "str | None", "description": "a string to be made into a vector to perform vector search.\nThis can be supplied in (exclusive) alternative to `vector`,\nprovided such a service is configured for the collection,\nand achieves the same effect.\n*DEPRECATED* (removal in 2.0). Use a `$vectorize` key in the\nsort clause dict instead.", "value": "None", "default": "None"}, {"name": "sort", "type": "SortType | None", "description": "with this dictionary parameter one can control the sorting\norder of the documents matching the filter, effectively\ndetermining what document will come first and hence be the\ndeleted one. See the `find` method for more on sorting.", "value": "None", "default": "None"}, {"name": "max_time_ms", "type": "int | None", "description": "a timeout, in milliseconds, for the underlying HTTP request.\nIf not passed, the collection-level setting is used instead.", "value": "None", "default": "None"}], "returns": [{"type": "DeleteResult", "description": "a DeleteResult object summarizing the outcome of the delete operation."}], "example": ">>> my_coll.insert_many([{\"seq\": 1}, {\"seq\": 0}, {\"seq\": 2}])\nInsertManyResult(...)\n>>> my_coll.delete_one({\"seq\": 1})\nDeleteResult(raw_results=..., deleted_count=1)\n>>> my_coll.distinct(\"seq\")\n[0, 2]\n>>> my_coll.delete_one(\n... {\"seq\": {\"$exists\": True}},\n... sort={\"seq\": astrapy.constants.SortDocuments.DESCENDING},\n... )\nDeleteResult(raw_results=..., deleted_count=1)\n>>> my_coll.distinct(\"seq\")\n[0]\n>>> my_coll.delete_one({\"seq\": 2})\nDeleteResult(raw_results=..., deleted_count=0)", "gathered_types": ["astrapy.constants.FilterType", "astrapy.constants.SortType", "astrapy.constants.VectorType", "astrapy.results.DeleteResult"]}} +{"id": "astrapy.collection.Collection.delete_many", "text": "Delete all documents matching a provided filter.", "metadata": {"kind": "function", "name": "delete_many", "path": "astrapy.collection.Collection.delete_many", "parameters": [{"name": "filter", "type": "FilterType", "description": "a predicate expressed as a dictionary according to the\nData API filter syntax. Examples are:\n {}\n {\"name\": \"John\"}\n {\"price\": {\"$lt\": 100}}\n {\"$and\": [{\"name\": \"John\"}, {\"price\": {\"$lt\": 100}}]}\nSee the Data API documentation for the full set of operators.\nPassing an empty filter, `{}`, completely erases all contents\nof the collection.", "default": null}, {"name": "max_time_ms", "type": "int | None", "description": "a timeout, in milliseconds, for the operation.\nIf not passed, the collection-level setting is used instead:\nkeep in mind that this method entails successive HTTP requests\nto the API, depending on how many documents are to be deleted.\nFor this reason, in most cases it is suggested to relax the\ntimeout compared to other method calls.", "value": "None", "default": "None"}], "returns": [{"type": "DeleteResult", "description": "a DeleteResult object summarizing the outcome of the delete operation."}], "example": ">>> my_coll.insert_many([{\"seq\": 1}, {\"seq\": 0}, {\"seq\": 2}])\nInsertManyResult(...)\n>>> my_coll.delete_many({\"seq\": {\"$lte\": 1}})\nDeleteResult(raw_results=..., deleted_count=2)\n>>> my_coll.distinct(\"seq\")\n[2]\n>>> my_coll.delete_many({\"seq\": {\"$lte\": 1}})\nDeleteResult(raw_results=..., deleted_count=0)", "note": "This operation is in general not atomic. Depending on the amount\nof matching documents, it can keep running (in a blocking way)\nfor a macroscopic time. In that case, new documents that are\nmeanwhile inserted (e.g. from another process/application) will be\ndeleted during the execution of this method call until the\ncollection is devoid of matches.\nAn exception is the `filter={}` case, whereby the operation is atomic.", "gathered_types": ["astrapy.constants.FilterType", "astrapy.results.DeleteResult"]}} +{"id": "astrapy.collection.Collection.delete_all", "text": "Delete all documents in a collection.", "metadata": {"kind": "function", "name": "delete_all", "path": "astrapy.collection.Collection.delete_all", "parameters": [{"name": "max_time_ms", "type": "int | None", "description": "a timeout, in milliseconds, for the underlying HTTP request.\nIf not passed, the collection-level setting is used instead.", "value": "None", "default": "None"}], "returns": [{"type": "dict[str, Any]", "description": "a dictionary of the form {\"ok\": 1} to signal successful deletion."}], "example": ">>> my_coll.distinct(\"seq\")\n[2, 1, 0]\n>>> my_coll.count_documents({}, upper_bound=100)\n4\n>>> my_coll.delete_all()\n{'ok': 1}\n>>> my_coll.count_documents({}, upper_bound=100)\n0", "note": "Use with caution."}} +{"id": "astrapy.collection.Collection.bulk_write", "text": "Execute an arbitrary amount of operations such as inserts, updates, deletes\neither sequentially or concurrently.\n\nThis method does not execute atomically, i.e. individual operations are\neach performed in the same way as the corresponding collection method,\nand each one is a different and unrelated database mutation.", "metadata": {"kind": "function", "name": "bulk_write", "path": "astrapy.collection.Collection.bulk_write", "parameters": [{"name": "requests", "type": "Iterable[BaseOperation]", "description": "an iterable over concrete subclasses of `BaseOperation`,\nsuch as `InsertMany` or `ReplaceOne`. Each such object\nrepresents an operation ready to be executed on a collection,\nand is instantiated by passing the same parameters as one\nwould the corresponding collection method.", "default": null}, {"name": "ordered", "type": "bool", "description": "whether to launch the `requests` one after the other or\nin arbitrary order, possibly in a concurrent fashion. For\nperformance reasons, False (default) should be preferred\nwhen compatible with the needs of the application flow.", "value": "False", "default": "False"}, {"name": "concurrency", "type": "int | None", "description": "maximum number of concurrent operations executing at\na given time. It cannot be more than one for ordered bulk writes.", "value": "None", "default": "None"}, {"name": "max_time_ms", "type": "int | None", "description": "a timeout, in milliseconds, for the whole bulk write.\nRemember that, if the method call times out, then there's no\nguarantee about what portion of the bulk write has been received\nand successfully executed by the Data API.\nIf not passed, the collection-level setting is used instead:\nin most cases, however, one should pass a relaxed timeout\nif longer sequences of operations are to be executed in bulk.", "value": "None", "default": "None"}], "returns": [{"type": "BulkWriteResult", "description": "A single BulkWriteResult summarizing the whole list of requested"}, {"type": "BulkWriteResult", "description": "operations. The keys in the map attributes of BulkWriteResult"}, {"type": "BulkWriteResult", "description": "(when present) are the integer indices of the corresponding operation"}, {"type": "BulkWriteResult", "description": "in the `requests` iterable."}], "example": ">>> from astrapy.operations import InsertMany, ReplaceOne\n>>> op1 = InsertMany([{\"a\": 1}, {\"a\": 2}])\n>>> op2 = ReplaceOne({\"z\": 9}, replacement={\"z\": 9, \"replaced\": True}, upsert=True)\n>>> my_coll.bulk_write([op1, op2])\nBulkWriteResult(bulk_api_results={0: ..., 1: ...}, deleted_count=0, inserted_count=3, matched_count=0, modified_count=0, upserted_count=1, upserted_ids={1: '2addd676-...'})\n>>> my_coll.count_documents({}, upper_bound=100)\n3\n>>> my_coll.distinct(\"replaced\")\n[True]", "references": ["astrapy.operations.ReplaceOne", "astrapy.operations.InsertMany"], "gathered_types": ["astrapy.results.BulkWriteResult", "astrapy.operations.BaseOperation"]}} +{"id": "astrapy.collection.Collection.drop", "text": "Drop the collection, i.e. delete it from the database along with\nall the documents it contains.", "metadata": {"kind": "function", "name": "drop", "path": "astrapy.collection.Collection.drop", "parameters": [{"name": "max_time_ms", "type": "int | None", "description": "a timeout, in milliseconds, for the underlying HTTP request.\nIf not passed, the collection-level setting is used instead.\nRemember there is not guarantee that a request that has\ntimed out us not in fact honored.", "value": "None", "default": "None"}], "returns": [{"type": "dict[str, Any]", "description": "a dictionary of the form {\"ok\": 1} to signal successful deletion."}], "example": ">>> my_coll.find_one({})\n{'_id': '...', 'a': 100}\n>>> my_coll.drop()\n{'ok': 1}\n>>> my_coll.find_one({})\nTraceback (most recent call last):\n ... ...\nastrapy.exceptions.DataAPIResponseException: Collection does not exist, collection name: my_collection", "note": "Once the method succeeds, methods on this object can still be invoked:\nhowever, this hardly makes sense as the underlying actual collection\nis no more.\nIt is responsibility of the developer to design a correct flow\nwhich avoids using a deceased collection any further."}} +{"id": "astrapy.collection.Collection.command", "text": "Send a POST request to the Data API for this collection with\nan arbitrary, caller-provided payload.", "metadata": {"kind": "function", "name": "command", "path": "astrapy.collection.Collection.command", "parameters": [{"name": "body", "type": "dict[str, Any]", "description": "a JSON-serializable dictionary, the payload of the request.", "default": null}, {"name": "raise_api_errors", "type": "bool", "description": "if True, responses with a nonempty 'errors' field\nresult in an astrapy exception being raised.", "value": "True", "default": "True"}, {"name": "max_time_ms", "type": "int | None", "description": "a timeout, in milliseconds, for the underlying HTTP request.\nIf not passed, the collection-level setting is used instead.", "value": "None", "default": "None"}], "returns": [{"type": "dict[str, Any]", "description": "a dictionary with the response of the HTTP request."}], "example": ">>> my_coll.command({\"countDocuments\": {}})\n{'status': {'count': 123}}"}} +{"id": "astrapy.collection.AsyncCollection", "text": "A Data API collection, the main object to interact with the Data API,\nespecially for DDL operations.\nThis class has an asynchronous interface for use with asyncio.\n\nAn AsyncCollection is spawned from a Database object, from which it inherits\nthe details on how to reach the API server (endpoint, authentication token).", "metadata": {"kind": "class", "name": "AsyncCollection", "path": "astrapy.collection.AsyncCollection", "parameters": [{"name": "database", "type": "AsyncDatabase", "description": "a Database object, instantiated earlier. This represents\nthe database the collection belongs to.", "default": null}, {"name": "name", "type": "str", "description": "the collection name. This parameter should match an existing\ncollection on the database.", "default": null}, {"name": "keyspace", "type": "str | None", "description": "this is the keyspace to which the collection belongs.\nIf not specified, the database's working keyspace is used.", "value": "None", "default": "None"}, {"name": "namespace", "type": "str | None", "description": "an alias for `keyspace`. *DEPRECATED*, removal in 2.0.", "value": "None", "default": "None"}, {"name": "api_options", "type": "CollectionAPIOptions | None", "description": "An instance of `astrapy.api_options.CollectionAPIOptions`\nproviding the general settings for interacting with the Data API.", "value": "None", "default": "None"}, {"name": "callers", "type": "Sequence[CallerType]", "description": "a list of caller identities, i.e. applications, or frameworks,\non behalf of which the Data API calls are performed. These end up\nin the request user-agent.\nEach caller identity is a (\"caller_name\", \"caller_version\") pair.", "value": "[]", "default": "[]"}, {"name": "caller_name", "type": "str | None", "description": "*DEPRECATED*, use `callers`. Removal 2.0. Name of the\napplication, or framework, on behalf of which the Data API calls\nare performed. This ends up in the request user-agent.", "value": "None", "default": "None"}, {"name": "caller_version", "type": "str | None", "description": "version of the caller. *DEPRECATED*, use `callers`. Removal 2.0.", "value": "None", "default": "None"}], "example": ">>> from astrapy import DataAPIClient, AsyncCollection\n>>> my_client = astrapy.DataAPIClient(\"AstraCS:...\")\n>>> my_async_db = my_client.get_async_database(\n... \"https://01234567-....apps.astra.datastax.com\"\n... )\n>>> my_async_coll_1 = AsyncCollection(database=my_async_db, name=\"my_collection\")\n>>> my_async coll_2 = asyncio.run(my_async_db.create_collection(\n... \"my_v_collection\",\n... dimension=3,\n... metric=\"cosine\",\n... ))\n>>> my_async_coll_3a = asyncio.run(my_async_db.get_collection(\n... \"my_already_existing_collection\",\n... ))\n>>> my_async_coll_3b = my_async_db.my_already_existing_collection\n>>> my_async_coll_3c = my_async_db[\"my_already_existing_collection\"]", "note": "creating an instance of AsyncCollection does not trigger actual creation\nof the collection on the database. The latter should have been created\nbeforehand, e.g. through the `create_collection` method of an AsyncDatabase.", "references": ["astrapy.client.DataAPIClient", "astrapy.collection.AsyncCollection"], "gathered_types": ["astrapy.constants.CallerType", "astrapy.api_options.CollectionAPIOptions", "astrapy.database.AsyncDatabase"]}} +{"id": "astrapy.collection.AsyncCollection.api_options", "text": "", "metadata": {"kind": "attribute", "name": "api_options", "path": "astrapy.collection.AsyncCollection.api_options", "value": "api_options = CollectionAPIOptions()", "gathered_types": ["astrapy.api_options.CollectionAPIOptions"]}} +{"id": "astrapy.collection.AsyncCollection.callers", "text": "", "metadata": {"kind": "attribute", "name": "callers", "path": "astrapy.collection.AsyncCollection.callers", "value": "callers = callers_param"}} +{"id": "astrapy.collection.AsyncCollection.with_options", "text": "Create a clone of this collection with some changed attributes.", "metadata": {"kind": "function", "name": "with_options", "path": "astrapy.collection.AsyncCollection.with_options", "parameters": [{"name": "name", "type": "str | None", "description": "the name of the collection. This parameter is useful to\nquickly spawn AsyncCollection instances each pointing to a different\ncollection existing in the same keyspace.", "value": "None", "default": "None"}, {"name": "embedding_api_key", "type": "str | EmbeddingHeadersProvider | None", "description": "optional API key(s) for interacting with the collection.\nIf an embedding service is configured, and this parameter is not None,\neach Data API call will include the necessary embedding-related headers\nas specified by this parameter. If a string is passed, it translates\ninto the one \"embedding api key\" header\n(i.e. `astrapy.authentication.EmbeddingAPIKeyHeaderProvider`).\nFor some vectorize providers/models, if using header-based authentication,\nspecialized subclasses of `astrapy.authentication.EmbeddingHeadersProvider`\nshould be supplied.", "value": "None", "default": "None"}, {"name": "collection_max_time_ms", "type": "int | None", "description": "a default timeout, in millisecond, for the duration of each\noperation on the collection. Individual timeouts can be provided to\neach collection method call and will take precedence, with this value\nbeing an overall default.\nNote that for some methods involving multiple API calls (such as\n`find`, `delete_many`, `insert_many` and so on), it is strongly suggested\nto provide a specific timeout as the default one likely wouldn't make\nmuch sense.", "value": "None", "default": "None"}, {"name": "callers", "type": "Sequence[CallerType]", "description": "a list of caller identities, i.e. applications, or frameworks,\non behalf of which the Data API calls are performed. These end up\nin the request user-agent.\nEach caller identity is a (\"caller_name\", \"caller_version\") pair.", "value": "[]", "default": "[]"}, {"name": "caller_name", "type": "str | None", "description": "*DEPRECATED*, use `callers`. Removal 2.0. Name of the\napplication, or framework, on behalf of which the Data API calls\nare performed. This ends up in the request user-agent.", "value": "None", "default": "None"}, {"name": "caller_version", "type": "str | None", "description": "version of the caller. *DEPRECATED*, use `callers`.\nRemoval 2.0.", "value": "None", "default": "None"}], "returns": [{"type": "AsyncCollection", "description": "a new AsyncCollection instance."}], "example": ">>> my_other_async_coll = my_async_coll.with_options(\n... name=\"the_other_coll\",\n... callers=[(\"caller_identity\", \"0.1.2\")],\n... )", "gathered_types": ["astrapy.collection.AsyncCollection", "astrapy.authentication.EmbeddingHeadersProvider", "astrapy.constants.CallerType"]}} +{"id": "astrapy.collection.AsyncCollection.to_sync", "text": "Create a Collection from this one. Save for the arguments\nexplicitly provided as overrides, everything else is kept identical\nto this collection in the copy (the database is converted into\na sync object).", "metadata": {"kind": "function", "name": "to_sync", "path": "astrapy.collection.AsyncCollection.to_sync", "parameters": [{"name": "database", "type": "Database | None", "description": "a Database object, instantiated earlier.\nThis represents the database the new collection belongs to.", "value": "None", "default": "None"}, {"name": "name", "type": "str | None", "description": "the collection name. This parameter should match an existing\ncollection on the database.", "value": "None", "default": "None"}, {"name": "keyspace", "type": "str | None", "description": "this is the keyspace to which the collection belongs.\nIf not specified, the database's working keyspace is used.", "value": "None", "default": "None"}, {"name": "namespace", "type": "str | None", "description": "an alias for `keyspace`. *DEPRECATED*, removal in 2.0.", "value": "None", "default": "None"}, {"name": "embedding_api_key", "type": "str | EmbeddingHeadersProvider | None", "description": "optional API key(s) for interacting with the collection.\nIf an embedding service is configured, and this parameter is not None,\neach Data API call will include the necessary embedding-related headers\nas specified by this parameter. If a string is passed, it translates\ninto the one \"embedding api key\" header\n(i.e. `astrapy.authentication.EmbeddingAPIKeyHeaderProvider`).\nFor some vectorize providers/models, if using header-based authentication,\nspecialized subclasses of `astrapy.authentication.EmbeddingHeadersProvider`\nshould be supplied.", "value": "None", "default": "None"}, {"name": "collection_max_time_ms", "type": "int | None", "description": "a default timeout, in millisecond, for the duration of each\noperation on the collection. Individual timeouts can be provided to\neach collection method call and will take precedence, with this value\nbeing an overall default.\nNote that for some methods involving multiple API calls (such as\n`find`, `delete_many`, `insert_many` and so on), it is strongly suggested\nto provide a specific timeout as the default one likely wouldn't make\nmuch sense.", "value": "None", "default": "None"}, {"name": "callers", "type": "Sequence[CallerType]", "description": "a list of caller identities, i.e. applications, or frameworks,\non behalf of which the Data API calls are performed. These end up\nin the request user-agent.\nEach caller identity is a (\"caller_name\", \"caller_version\") pair.", "value": "[]", "default": "[]"}, {"name": "caller_name", "type": "str | None", "description": "*DEPRECATED*, use `callers`. Removal 2.0. Name of the\napplication, or framework, on behalf of which the Data API calls\nare performed. This ends up in the request user-agent.", "value": "None", "default": "None"}, {"name": "caller_version", "type": "str | None", "description": "version of the caller. *DEPRECATED*, use `callers`.\nRemoval 2.0.", "value": "None", "default": "None"}], "returns": [{"type": "Collection", "description": "the new copy, a Collection instance."}], "example": ">>> my_async_coll.to_sync().count_documents({}, upper_bound=100)\n77", "gathered_types": ["astrapy.authentication.EmbeddingHeadersProvider", "astrapy.database.Database", "astrapy.constants.CallerType"]}} +{"id": "astrapy.collection.AsyncCollection.set_caller", "text": "Set a new identity for the application/framework on behalf of which\nthe Data API calls are performed (the \"caller\").", "metadata": {"kind": "function", "name": "set_caller", "path": "astrapy.collection.AsyncCollection.set_caller", "parameters": [{"name": "caller_name", "type": "str | None", "description": "name of the application, or framework, on behalf of which\nthe Data API calls are performed. This ends up in the request user-agent.", "value": "None", "default": "None"}, {"name": "caller_version", "type": "str | None", "description": "version of the caller.", "value": "None", "default": "None"}], "returns": [{"type": "None"}], "example": ">>> my_coll.set_caller(caller_name=\"the_caller\", caller_version=\"0.1.0\")"}} +{"id": "astrapy.collection.AsyncCollection.options", "text": "Get the collection options, i.e. its configuration as read from the database.\n\nThe method issues a request to the Data API each time is invoked,\nwithout caching mechanisms: this ensures up-to-date information\nfor usages such as real-time collection validation by the application.", "metadata": {"kind": "function", "name": "options", "path": "astrapy.collection.AsyncCollection.options", "parameters": [{"name": "max_time_ms", "type": "int | None", "description": "a timeout, in milliseconds, for the underlying HTTP request.\nIf not passed, the collection-level setting is used instead.", "value": "None", "default": "None"}], "returns": [{"type": "CollectionOptions", "description": "a CollectionOptions instance describing the collection."}, {"type": "CollectionOptions", "description": "(See also the database `list_collections` method.)"}], "example": ">>> asyncio.run(my_async_coll.options())\nCollectionOptions(vector=CollectionVectorOptions(dimension=3, metric='cosine'))", "gathered_types": ["astrapy.info.CollectionOptions"]}} +{"id": "astrapy.collection.AsyncCollection.info", "text": "Information on the collection (name, location, database), in the\nform of a CollectionInfo object.\n\nNot to be confused with the collection `options` method (related\nto the collection internal configuration).", "metadata": {"kind": "function", "name": "info", "path": "astrapy.collection.AsyncCollection.info", "returns": [{"type": "CollectionInfo"}], "example": ">>> my_async_coll.info().database_info.region\n'us-east1'\n>>> my_async_coll.info().full_name\n'default_keyspace.my_v_collection'", "note": "the returned CollectionInfo wraps, among other things,\nthe database information: as such, calling this method\ntriggers the same-named method of a Database object (which, in turn,\nperforms a HTTP request to the DevOps API).\nSee the documentation for `Database.info()` for more details.", "gathered_types": ["astrapy.info.CollectionInfo"]}} +{"id": "astrapy.collection.AsyncCollection.database", "text": "a Database object, the database this collection belongs to.", "metadata": {"kind": "attribute", "name": "database", "path": "astrapy.collection.AsyncCollection.database", "value": "database: AsyncDatabase", "example": ">>> my_async_coll.database.name\n'quicktest'", "gathered_types": ["astrapy.database.AsyncDatabase"]}} +{"id": "astrapy.collection.AsyncCollection.namespace", "text": "The namespace this collection is in.\n\n*DEPRECATED* (removal in 2.0). Switch to the \"keyspace\" property.**", "metadata": {"kind": "attribute", "name": "namespace", "path": "astrapy.collection.AsyncCollection.namespace", "value": "namespace: str", "example": ">>> my_async_coll.namespace\n'default_keyspace'"}} +{"id": "astrapy.collection.AsyncCollection.keyspace", "text": "The keyspace this collection is in.", "metadata": {"kind": "attribute", "name": "keyspace", "path": "astrapy.collection.AsyncCollection.keyspace", "value": "keyspace: str", "example": ">>> my_coll.keyspace\n'default_keyspace'"}} +{"id": "astrapy.collection.AsyncCollection.name", "text": "The name of this collection.", "metadata": {"kind": "attribute", "name": "name", "path": "astrapy.collection.AsyncCollection.name", "value": "name: str", "example": ">>> my_async_coll.name\n'my_v_collection'"}} +{"id": "astrapy.collection.AsyncCollection.full_name", "text": "The fully-qualified collection name within the database,\nin the form \"keyspace.collection_name\".", "metadata": {"kind": "attribute", "name": "full_name", "path": "astrapy.collection.AsyncCollection.full_name", "value": "full_name: str", "example": ">>> my_async_coll.full_name\n'default_keyspace.my_v_collection'"}} +{"id": "astrapy.collection.AsyncCollection.insert_one", "text": "Insert a single document in the collection in an atomic operation.", "metadata": {"kind": "function", "name": "insert_one", "path": "astrapy.collection.AsyncCollection.insert_one", "parameters": [{"name": "document", "type": "DocumentType", "description": "the dictionary expressing the document to insert.\nThe `_id` field of the document can be left out, in which\ncase it will be created automatically.", "default": null}, {"name": "vector", "type": "VectorType | None", "description": "a vector (a list of numbers appropriate for the collection)\nfor the document. Passing this parameter is equivalent to\nproviding a `$vector` field within the document itself,\nhowever the two are mutually exclusive.\n*DEPRECATED* (removal in 2.0). Use a `$vector` key in the document instead.", "value": "None", "default": "None"}, {"name": "vectorize", "type": "str | None", "description": "a string to be made into a vector, if such a service\nis configured for the collection. Passing this parameter is\nequivalent to providing a `$vectorize` field in the document itself,\nhowever the two are mutually exclusive.\nMoreover, this parameter cannot coexist with `vector`.\n*DEPRECATED* (removal in 2.0). Use a `$vectorize` key in the document instead.", "value": "None", "default": "None"}, {"name": "max_time_ms", "type": "int | None", "description": "a timeout, in milliseconds, for the underlying HTTP request.\nIf not passed, the collection-level setting is used instead.", "value": "None", "default": "None"}], "returns": [{"type": "InsertOneResult", "description": "an InsertOneResult object."}], "example": ">>> async def write_and_count(acol: AsyncCollection) -> None:\n... count0 = await acol.count_documents({}, upper_bound=10)\n... print(\"count0\", count0)\n... await acol.insert_one(\n... {\n... \"age\": 30,\n... \"name\": \"Smith\",\n... \"food\": [\"pear\", \"peach\"],\n... \"likes_fruit\": True,\n... },\n... )\n... await acol.insert_one({\"_id\": \"user-123\", \"age\": 50, \"name\": \"Maccio\"})\n... count1 = await acol.count_documents({}, upper_bound=10)\n... print(\"count1\", count1)\n...\n>>> asyncio.run(write_and_count(my_async_coll))\ncount0 0\ncount1 2\n\n>>> asyncio.run(my_async_coll.insert_one({\"tag\": v\", \"$vector\": [10, 11]}))\nInsertOneResult(...)", "note": "If an `_id` is explicitly provided, which corresponds to a document\nthat exists already in the collection, an error is raised and\nthe insertion fails.", "gathered_types": ["astrapy.constants.VectorType", "astrapy.constants.DocumentType", "astrapy.results.InsertOneResult"]}} +{"id": "astrapy.collection.AsyncCollection.insert_many", "text": "Returns:\n an InsertManyResult object.", "metadata": {"kind": "function", "name": "insert_many", "path": "astrapy.collection.AsyncCollection.insert_many", "parameters": [{"name": "documents", "type": "Iterable[DocumentType]", "description": "an iterable of dictionaries, each a document to insert.\nDocuments may specify their `_id` field or leave it out, in which\ncase it will be added automatically.", "default": null}, {"name": "vectors", "type": "Iterable[VectorType | None] | None", "description": "an optional list of vectors (as many vectors as the provided\ndocuments) to associate to the documents when inserting.\nPassing vectors this way is indeed equivalent to the \"$vector\" field\nof the documents, however the two are mutually exclusive.\n*DEPRECATED* (removal in 2.0). Use a `$vector` key in the documents instead.", "value": "None", "default": "None"}, {"name": "vectorize", "type": "Iterable[str | None] | None", "description": "an optional list of strings to be made into as many vectors\n(one per document), if such a service is configured for the collection.\nPassing this parameter is equivalent to providing a `$vectorize`\nfield in the documents themselves, however the two are mutually exclusive.\n*DEPRECATED* (removal in 2.0). Use a `$vectorize` key in the documents instead.", "value": "None", "default": "None"}, {"name": "ordered", "type": "bool", "description": "if False (default), the insertions can occur in arbitrary order\nand possibly concurrently. If True, they are processed sequentially.\nIf there are no specific reasons against it, unordered insertions are to\nbe preferred as they complete much faster.", "value": "False", "default": "False"}, {"name": "chunk_size", "type": "int | None", "description": "how many documents to include in a single API request.\nExceeding the server maximum allowed value results in an error.\nLeave it unspecified (recommended) to use the system default.", "value": "None", "default": "None"}, {"name": "concurrency", "type": "int | None", "description": "maximum number of concurrent requests to the API at\na given time. It cannot be more than one for ordered insertions.", "value": "None", "default": "None"}, {"name": "max_time_ms", "type": "int | None", "description": "a timeout, in milliseconds, for the operation.\nIf not passed, the collection-level setting is used instead:\nIf many documents are being inserted, this method corresponds\nto several HTTP requests: in such cases one may want to specify\na more tolerant timeout here.", "value": "None", "default": "None"}], "returns": [{"type": "InsertManyResult"}], "example": ">>> async def write_and_count(acol: AsyncCollection) -> None:\n... count0 = await acol.count_documents({}, upper_bound=10)\n... print(\"count0\", count0)\n... im_result1 = await acol.insert_many(\n... [\n... {\"a\": 10},\n... {\"a\": 5},\n... {\"b\": [True, False, False]},\n... ],\n... ordered=True,\n... )\n... print(\"inserted1\", im_result1.inserted_ids)\n... count1 = await acol.count_documents({}, upper_bound=100)\n... print(\"count1\", count1)\n... await acol.insert_many(\n... [{\"seq\": i} for i in range(50)],\n... concurrency=5,\n... )\n... count2 = await acol.count_documents({}, upper_bound=100)\n... print(\"count2\", count2)\n...\n>>> asyncio.run(write_and_count(my_async_coll))\ncount0 0\ninserted1 ['e3c2a684-...', '1de4949f-...', '167dacc3-...']\ncount1 3\ncount2 53\n>>> asyncio.run(my_async_coll.insert_many(\n... [\n... {\"tag\": \"a\", \"$vector\": [1, 2]},\n... {\"tag\": \"b\", \"$vector\": [3, 4]},\n... ]\n... ))\nInsertManyResult(...)", "note": "A failure mode for this command is related to certain faulty documents\nfound among those to insert: a document may have the an `_id` already\npresent on the collection, or its vector dimension may not\nmatch the collection setting.\n\nFor an ordered insertion, the method will raise an exception at\nthe first such faulty document -- nevertheless, all documents processed\nuntil then will end up being written to the database.\n\nFor unordered insertions, if the error stems from faulty documents\nthe insertion proceeds until exhausting the input documents: then,\nan exception is raised -- and all insertable documents will have been\nwritten to the database, including those \"after\" the troublesome ones.\n\nIf, on the other hand, there are errors not related to individual\ndocuments (such as a network connectivity error), the whole\n`insert_many` operation will stop in mid-way, an exception will be raised,\nand only a certain amount of the input documents will\nhave made their way to the database.", "gathered_types": ["astrapy.constants.VectorType", "astrapy.constants.DocumentType", "astrapy.results.InsertManyResult"]}} +{"id": "astrapy.collection.AsyncCollection.find", "text": "Find documents on the collection, matching a certain provided filter.\n\nThe method returns a Cursor that can then be iterated over. Depending\non the method call pattern, the iteration over all documents can reflect\ncollection mutations occurred since the `find` method was called, or not.\nIn cases where the cursor reflects mutations in real-time, it will iterate\nover cursors in an approximate way (i.e. exhibiting occasional skipped\nor duplicate documents). This happens when making use of the `sort`\noption in a non-vector-search manner.", "metadata": {"kind": "function", "name": "find", "path": "astrapy.collection.AsyncCollection.find", "parameters": [{"name": "filter", "type": "FilterType | None", "description": "a predicate expressed as a dictionary according to the\nData API filter syntax. Examples are:\n {}\n {\"name\": \"John\"}\n {\"price\": {\"$lt\": 100}}\n {\"$and\": [{\"name\": \"John\"}, {\"price\": {\"$lt\": 100}}]}\nSee the Data API documentation for the full set of operators.", "value": "None", "default": "None"}, {"name": "projection", "type": "ProjectionType | None", "description": "it controls which parts of the document are returned.\nIt can be an allow-list: `{\"f1\": True, \"f2\": True}`,\nor a deny-list: `{\"fx\": False, \"fy\": False}`, but not a mixture\n(except for the `_id` and other special fields, which can be\nassociated to both True or False independently of the rest\nof the specification).\nThe special star-projections `{\"*\": True}` and `{\"*\": False}`\nhave the effect of returning the whole document and `{}` respectively.\nFor lists in documents, slice directives can be passed to select\nportions of the list: for instance, `{\"array\": {\"$slice\": 2}}`,\n`{\"array\": {\"$slice\": -2}}`, `{\"array\": {\"$slice\": [4, 2]}}` or\n`{\"array\": {\"$slice\": [-4, 2]}}`.\nAn iterable over strings will be treated implicitly as an allow-list.\nThe default projection (used if this parameter is not passed) does not\nnecessarily include \"special\" fields such as `$vector` or `$vectorize`.\nSee the Data API documentation for more on projections.", "value": "None", "default": "None"}, {"name": "skip", "type": "int | None", "description": "with this integer parameter, what would be the first `skip`\ndocuments returned by the query are discarded, and the results\nstart from the (skip+1)-th document.\nThis parameter can be used only in conjunction with an explicit\n`sort` criterion of the ascending/descending type (i.e. it cannot\nbe used when not sorting, nor with vector-based ANN search).", "value": "None", "default": "None"}, {"name": "limit", "type": "int | None", "description": "this (integer) parameter sets a limit over how many documents\nare returned. Once `limit` is reached (or the cursor is exhausted\nfor lack of matching documents), nothing more is returned.", "value": "None", "default": "None"}, {"name": "vector", "type": "VectorType | None", "description": "a suitable vector, i.e. a list of float numbers of the appropriate\ndimensionality, to perform vector search (i.e. ANN,\nor \"approximate nearest-neighbours\" search).\nWhen running similarity search on a collection, no other sorting\ncriteria can be specified. Moreover, there is an upper bound\nto the number of documents that can be returned. For details,\nsee the Note about upper bounds and the Data API documentation.\n*DEPRECATED* (removal in 2.0). Use a `$vector` key in the\nsort clause dict instead.", "value": "None", "default": "None"}, {"name": "vectorize", "type": "str | None", "description": "a string to be made into a vector to perform vector search.\nThis can be supplied in (exclusive) alternative to `vector`,\nprovided such a service is configured for the collection,\nand achieves the same effect.\n*DEPRECATED* (removal in 2.0). Use a `$vectorize` key in the\nsort clause dict instead.", "value": "None", "default": "None"}, {"name": "include_similarity", "type": "bool | None", "description": "a boolean to request the numeric value of the\nsimilarity to be returned as an added \"$similarity\" key in each\nreturned document. Can only be used for vector ANN search, i.e.\nwhen either `vector` is supplied or the `sort` parameter has the\nshape {\"$vector\": ...}.", "value": "None", "default": "None"}, {"name": "include_sort_vector", "type": "bool | None", "description": "a boolean to request query vector used in this search.\nIf set to True (and if the invocation is a vector search), calling\nthe `get_sort_vector` method on the returned cursor will yield\nthe vector used for the ANN search.", "value": "None", "default": "None"}, {"name": "sort", "type": "SortType | None", "description": "with this dictionary parameter one can control the order\nthe documents are returned. See the Note about sorting, as well as\nthe one about upper bounds, for details.\nVector-based ANN sorting is achieved by providing a \"$vector\"\nor a \"$vectorize\" key in `sort`.", "value": "None", "default": "None"}, {"name": "max_time_ms", "type": "int | None", "description": "a timeout, in milliseconds, for each single one\nof the underlying HTTP requests used to fetch documents as the\ncursor is iterated over.\nIf not passed, the collection-level setting is used instead.", "value": "None", "default": "None"}], "returns": [{"type": "AsyncCursor", "description": "an AsyncCursor object representing iterations over the matching documents"}, {"type": "AsyncCursor", "description": "(see the AsyncCursor object for how to use it. The simplest thing is to"}, {"type": "AsyncCursor", "description": "run a for loop: `for document in collection.sort(...):`)."}], "example": ">>> async def run_finds(acol: AsyncCollection) -> None:\n... filter = {\"seq\": {\"$exists\": True}}\n... print(\"find results 1:\")\n... async for doc in acol.find(filter, projection={\"seq\": True}, limit=5):\n... print(doc[\"seq\"])\n... async_cursor1 = acol.find(\n... {},\n... limit=4,\n... sort={\"seq\": astrapy.constants.SortDocuments.DESCENDING},\n... )\n... ids = [doc[\"_id\"] async for doc in async_cursor1]\n... print(\"find results 2:\", ids)\n... async_cursor2 = acol.find({}, limit=3)\n... seqs = await async_cursor2.distinct(\"seq\")\n... print(\"distinct results 3:\", seqs)\n...\n>>> asyncio.run(run_finds(my_async_coll))\nfind results 1:\n48\n35\n7\n11\n13\nfind results 2: ['d656cd9d-...', '479c7ce8-...', '96dc87fd-...', '83f0a21f-...']\ndistinct results 3: [48, 35, 7]/n/n/n>>> async def run_vector_finds(acol: AsyncCollection) -> None:\n... await acol.insert_many([\n... {\"tag\": \"A\", \"$vector\": [4, 5]},\n... {\"tag\": \"B\", \"$vector\": [3, 4]},\n... {\"tag\": \"C\", \"$vector\": [3, 2]},\n... {\"tag\": \"D\", \"$vector\": [4, 1]},\n... {\"tag\": \"E\", \"$vector\": [2, 5]},\n... ])\n... ann_tags = [\n... document[\"tag\"]\n... async for document in acol.find(\n... {},\n... sort={\"$vector\": [3, 3]},\n... limit=3,\n... )\n... ]\n... return ann_tags\n...\n>>> asyncio.run(run_vector_finds(my_async_coll))\n['A', 'B', 'C']\n>>> # (assuming the collection has metric VectorMetric.COSINE)/n/n/n>>> async_cursor = my_async_coll.find(\n... sort={\"$vector\": [3, 3]},\n... limit=3,\n... include_sort_vector=True,\n... )\n>>> asyncio.run(async_cursor.get_sort_vector())\n[3.0, 3.0]\n>>> asyncio.run(async_cursor.__anext__())\n{'_id': 'b13ce177-738e-47ec-bce1-77738ee7ec93', 'tag': 'A'}\n>>> asyncio.run(async_cursor.get_sort_vector())\n[3.0, 3.0]", "note": "When not specifying sorting criteria at all (by vector or otherwise),\nthe cursor can scroll through an arbitrary number of documents as\nthe Data API and the client periodically exchange new chunks of documents.\nIt should be noted that the behavior of the cursor in the case documents\nhave been added/removed after the `find` was started depends on database\ninternals and it is not guaranteed, nor excluded, that such \"real-time\"\nchanges in the data would be picked up by the cursor.", "gathered_types": ["astrapy.constants.FilterType", "astrapy.constants.SortType", "astrapy.constants.VectorType", "astrapy.constants.ProjectionType", "astrapy.cursors.AsyncCursor"]}} +{"id": "astrapy.collection.AsyncCollection.find_one", "text": "Run a search, returning the first document in the collection that matches\nprovided filters, if any is found.", "metadata": {"kind": "function", "name": "find_one", "path": "astrapy.collection.AsyncCollection.find_one", "parameters": [{"name": "filter", "type": "FilterType | None", "description": "a predicate expressed as a dictionary according to the\nData API filter syntax. Examples are:\n {}\n {\"name\": \"John\"}\n {\"price\": {\"$lt\": 100}}\n {\"$and\": [{\"name\": \"John\"}, {\"price\": {\"$lt\": 100}}]}\nSee the Data API documentation for the full set of operators.", "value": "None", "default": "None"}, {"name": "projection", "type": "ProjectionType | None", "description": "it controls which parts of the document are returned.\nIt can be an allow-list: `{\"f1\": True, \"f2\": True}`,\nor a deny-list: `{\"fx\": False, \"fy\": False}`, but not a mixture\n(except for the `_id` and other special fields, which can be\nassociated to both True or False independently of the rest\nof the specification).\nThe special star-projections `{\"*\": True}` and `{\"*\": False}`\nhave the effect of returning the whole document and `{}` respectively.\nFor lists in documents, slice directives can be passed to select\nportions of the list: for instance, `{\"array\": {\"$slice\": 2}}`,\n`{\"array\": {\"$slice\": -2}}`, `{\"array\": {\"$slice\": [4, 2]}}` or\n`{\"array\": {\"$slice\": [-4, 2]}}`.\nAn iterable over strings will be treated implicitly as an allow-list.\nThe default projection (used if this parameter is not passed) does not\nnecessarily include \"special\" fields such as `$vector` or `$vectorize`.\nSee the Data API documentation for more on projections.", "value": "None", "default": "None"}, {"name": "vector", "type": "VectorType | None", "description": "a suitable vector, i.e. a list of float numbers of the appropriate\ndimensionality, to perform vector search (i.e. ANN,\nor \"approximate nearest-neighbours\" search), extracting the most\nsimilar document in the collection matching the filter.\n*DEPRECATED* (removal in 2.0). Use a `$vector` key in the\nsort clause dict instead.", "value": "None", "default": "None"}, {"name": "vectorize", "type": "str | None", "description": "a string to be made into a vector to perform vector search.\nUsing vectorize assumes a suitable service is configured for the collection.\n*DEPRECATED* (removal in 2.0). Use a `$vectorize` key in the\nsort clause dict instead.", "value": "None", "default": "None"}, {"name": "include_similarity", "type": "bool | None", "description": "a boolean to request the numeric value of the\nsimilarity to be returned as an added \"$similarity\" key in the\nreturned document. Can only be used for vector ANN search, i.e.\nwhen either `vector` is supplied or the `sort` parameter has the\nshape {\"$vector\": ...}.", "value": "None", "default": "None"}, {"name": "sort", "type": "SortType | None", "description": "with this dictionary parameter one can control the order\nthe documents are returned. See the Note about sorting for details.\nVector-based ANN sorting is achieved by providing a \"$vector\"\nor a \"$vectorize\" key in `sort`.", "value": "None", "default": "None"}, {"name": "max_time_ms", "type": "int | None", "description": "a timeout, in milliseconds, for the underlying HTTP request.\nIf not passed, the collection-level setting is used instead.", "value": "None", "default": "None"}], "returns": [{"type": "DocumentType | None", "description": "a dictionary expressing the required document, otherwise None."}], "example": ">>> async def demo_find_one(acol: AsyncCollection) -> None:\n.... print(\"Count:\", await acol.count_documents({}, upper_bound=100))\n... result0 = await acol.find_one({})\n... print(\"result0\", result0)\n... result1 = await acol.find_one({\"seq\": 10})\n... print(\"result1\", result1)\n... result2 = await acol.find_one({\"seq\": 1011})\n... print(\"result2\", result2)\n... result3 = await acol.find_one({}, projection={\"seq\": False})\n... print(\"result3\", result3)\n... result4 = await acol.find_one(\n... {},\n... sort={\"seq\": astrapy.constants.SortDocuments.DESCENDING},\n... )\n... print(\"result4\", result4)\n...\n>>>\n>>> asyncio.run(demo_find_one(my_async_coll))\nCount: 50\nresult0 {'_id': '479c7ce8-...', 'seq': 48}\nresult1 {'_id': '93e992c4-...', 'seq': 10}\nresult2 None\nresult3 {'_id': '479c7ce8-...'}\nresult4 {'_id': 'd656cd9d-...', 'seq': 49}\n\n>>> asyncio.run(my_async_coll.find_one(\n... {},\n... sort={\"$vector\": [1, 0]},\n... projection={\"*\": True},\n... ))\n{'_id': '...', 'tag': 'D', '$vector': [4.0, 1.0]}", "note": "See the `find` method for more details on the accepted parameters\n(whereas `skip` and `limit` are not valid parameters for `find_one`).", "gathered_types": ["astrapy.constants.FilterType", "astrapy.constants.SortType", "astrapy.constants.DocumentType", "astrapy.constants.VectorType", "astrapy.constants.ProjectionType"]}} +{"id": "astrapy.collection.AsyncCollection.distinct", "text": "Return a list of the unique values of `key` across the documents\nin the collection that match the provided filter.", "metadata": {"kind": "function", "name": "distinct", "path": "astrapy.collection.AsyncCollection.distinct", "parameters": [{"name": "key", "type": "str", "description": "the name of the field whose value is inspected across documents.\nKeys can use dot-notation to descend to deeper document levels.\nExample of acceptable `key` values:\n \"field\"\n \"field.subfield\"\n \"field.3\"\n \"field.3.subfield\"\nIf lists are encountered and no numeric index is specified,\nall items in the list are visited.", "default": null}, {"name": "filter", "type": "FilterType | None", "description": "a predicate expressed as a dictionary according to the\nData API filter syntax. Examples are:\n {}\n {\"name\": \"John\"}\n {\"price\": {\"$lt\": 100}}\n {\"$and\": [{\"name\": \"John\"}, {\"price\": {\"$lt\": 100}}]}\nSee the Data API documentation for the full set of operators.", "value": "None", "default": "None"}, {"name": "max_time_ms", "type": "int | None", "description": "a timeout, in milliseconds, with the same meaning as for `find`.\nIf not passed, the collection-level setting is used instead.", "value": "None", "default": "None"}], "returns": [{"type": "list[Any]", "description": "a list of all different values for `key` found across the documents"}, {"type": "list[Any]", "description": "that match the filter. The result list has no repeated items."}], "example": ">>> async def run_distinct(acol: AsyncCollection) -> None:\n... await acol.insert_many(\n... [\n... {\"name\": \"Marco\", \"food\": [\"apple\", \"orange\"], \"city\": \"Helsinki\"},\n... {\"name\": \"Emma\", \"food\": {\"likes_fruit\": True, \"allergies\": []}},\n... ]\n... )\n... distinct0 = await acol.distinct(\"name\")\n... print(\"distinct('name')\", distinct0)\n... distinct1 = await acol.distinct(\"city\")\n... print(\"distinct('city')\", distinct1)\n... distinct2 = await acol.distinct(\"food\")\n... print(\"distinct('food')\", distinct2)\n... distinct3 = await acol.distinct(\"food.1\")\n... print(\"distinct('food.1')\", distinct3)\n... distinct4 = await acol.distinct(\"food.allergies\")\n... print(\"distinct('food.allergies')\", distinct4)\n... distinct5 = await acol.distinct(\"food.likes_fruit\")\n... print(\"distinct('food.likes_fruit')\", distinct5)\n...\n>>> asyncio.run(run_distinct(my_async_coll))\ndistinct('name') ['Emma', 'Marco']\ndistinct('city') ['Helsinki']\ndistinct('food') [{'likes_fruit': True, 'allergies': []}, 'apple', 'orange']\ndistinct('food.1') ['orange']\ndistinct('food.allergies') []\ndistinct('food.likes_fruit') [True]", "note": "For details on the behaviour of \"distinct\" in conjunction with\nreal-time changes in the collection contents, see the\nNote of the `find` command.", "gathered_types": ["astrapy.constants.FilterType"]}} +{"id": "astrapy.collection.AsyncCollection.count_documents", "text": "Count the documents in the collection matching the specified filter.", "metadata": {"kind": "function", "name": "count_documents", "path": "astrapy.collection.AsyncCollection.count_documents", "parameters": [{"name": "filter", "type": "FilterType", "description": "a predicate expressed as a dictionary according to the\nData API filter syntax. Examples are:\n {}\n {\"name\": \"John\"}\n {\"price\": {\"$lt\": 100}}\n {\"$and\": [{\"name\": \"John\"}, {\"price\": {\"$lt\": 100}}]}\nSee the Data API documentation for the full set of operators.", "default": null}, {"name": "upper_bound", "type": "int", "description": "a required ceiling on the result of the count operation.\nIf the actual number of documents exceeds this value,\nan exception will be raised.\nFurthermore, if the actual number of documents exceeds the maximum\ncount that the Data API can reach (regardless of upper_bound),\nan exception will be raised.", "default": null}, {"name": "max_time_ms", "type": "int | None", "description": "a timeout, in milliseconds, for the underlying HTTP request.\nIf not passed, the collection-level setting is used instead.", "value": "None", "default": "None"}], "returns": [{"type": "int", "description": "the exact count of matching documents."}], "example": ">>> async def do_count_docs(acol: AsyncCollection) -> None:\n... await acol.insert_many([{\"seq\": i} for i in range(20)])\n... count0 = await acol.count_documents({}, upper_bound=100)\n... print(\"count0\", count0)\n... count1 = await acol.count_documents({\"seq\":{\"$gt\": 15}}, upper_bound=100)\n... print(\"count1\", count1)\n... count2 = await acol.count_documents({}, upper_bound=10)\n... print(\"count2\", count2)\n...\n>>> asyncio.run(do_count_docs(my_async_coll))\ncount0 20\ncount1 4\nTraceback (most recent call last):\n ... ...\nastrapy.exceptions.TooManyDocumentsToCountException", "note": "Count operations are expensive: for this reason, the best practice\nis to provide a reasonable `upper_bound` according to the caller\nexpectations. Moreover, indiscriminate usage of count operations\nfor sizeable amounts of documents (i.e. in the thousands and more)\nis discouraged in favor of alternative application-specific solutions.\nKeep in mind that the Data API has a hard upper limit on the amount\nof documents it will count, and that an exception will be thrown\nby this method if this limit is encountered.", "gathered_types": ["astrapy.constants.FilterType"]}} +{"id": "astrapy.collection.AsyncCollection.estimated_document_count", "text": "Query the API server for an estimate of the document count in the collection.\n\nContrary to `count_documents`, this method has no filtering parameters.", "metadata": {"kind": "function", "name": "estimated_document_count", "path": "astrapy.collection.AsyncCollection.estimated_document_count", "parameters": [{"name": "max_time_ms", "type": "int | None", "description": "a timeout, in milliseconds, for the underlying HTTP request.\nIf not passed, the collection-level setting is used instead.", "value": "None", "default": "None"}], "returns": [{"type": "int", "description": "a server-provided estimate count of the documents in the collection."}], "example": ">>> asyncio.run(my_async_coll.estimated_document_count())\n35700"}} +{"id": "astrapy.collection.AsyncCollection.find_one_and_replace", "text": "Find a document on the collection and replace it entirely with a new one,\noptionally inserting a new one if no match is found.\n\nArgs:\n\n filter: a predicate expressed as a dictionary according to the\n Data API filter syntax. Examples are:\n {}\n {\"name\": \"John\"}\n {\"price\": {\"$lt\": 100}}\n {\"$and\": [{\"name\": \"John\"}, {\"price\": {\"$lt\": 100}}]}\n See the Data API documentation for the full set of operators.\n replacement: the new document to write into the collection.\n projection: it controls which parts of the document are returned.\n It can be an allow-list: `{\"f1\": True, \"f2\": True}`,\n or a deny-list: `{\"fx\": False, \"fy\": False}`, but not a mixture\n (except for the `_id` and other special fields, which can be\n associated to both True or False independently of the rest\n of the specification).\n The special star-projections `{\"*\": True}` and `{\"*\": False}`\n have the effect of returning the whole document and `{}` respectively.\n For lists in documents, slice directives can be passed to select\n portions of the list: for instance, `{\"array\": {\"$slice\": 2}}`,\n `{\"array\": {\"$slice\": -2}}`, `{\"array\": {\"$slice\": [4, 2]}}` or\n `{\"array\": {\"$slice\": [-4, 2]}}`.\n An iterable over strings will be treated implicitly as an allow-list.\n The default projection (used if this parameter is not passed) does not\n necessarily include \"special\" fields such as `$vector` or `$vectorize`.\n See the Data API documentation for more on projections.\n vector: a suitable vector, i.e. a list of float numbers of the appropriate\n dimensionality, to use vector search (i.e. ANN,\n or \"approximate nearest-neighbours\" search), as the sorting criterion.\n In this way, the matched document (if any) will be the one\n that is most similar to the provided vector.\n *DEPRECATED* (removal in 2.0). Use a `$vector` key in the\n sort clause dict instead.\n vectorize: a string to be made into a vector to perform vector search.\n Using vectorize assumes a suitable service is configured for the collection.\n *DEPRECATED* (removal in 2.0). Use a `$vectorize` key in the\n sort clause dict instead.\n sort: with this dictionary parameter one can control the sorting\n order of the documents matching the filter, effectively\n determining what document will come first and hence be the\n replaced one. See the `find` method for more on sorting.\n Vector-based ANN sorting is achieved by providing a \"$vector\"\n or a \"$vectorize\" key in `sort`.\n upsert: this parameter controls the behavior in absence of matches.\n If True, `replacement` is inserted as a new document\n if no matches are found on the collection. If False,\n the operation silently does nothing in case of no matches.\n return_document: a flag controlling what document is returned:\n if set to `ReturnDocument.BEFORE`, or the string \"before\",\n the document found on database is returned; if set to\n `ReturnDocument.AFTER`, or the string \"after\", the new\n document is returned. The default is \"before\".\n max_time_ms: a timeout, in milliseconds, for the underlying HTTP request.\n If not passed, the collection-level setting is used instead.", "metadata": {"kind": "function", "name": "find_one_and_replace", "path": "astrapy.collection.AsyncCollection.find_one_and_replace", "parameters": [{"name": "filter", "type": "FilterType"}, {"name": "replacement", "type": "DocumentType"}, {"name": "projection", "default": "None", "type": "ProjectionType | None"}, {"name": "vector", "default": "None", "type": "VectorType | None"}, {"name": "vectorize", "default": "None", "type": "str | None"}, {"name": "sort", "default": "None", "type": "SortType | None"}, {"name": "upsert", "default": "False", "type": "bool"}, {"name": "return_document", "default": "ReturnDocument.BEFORE", "type": "str"}, {"name": "max_time_ms", "default": "None", "type": "int | None"}], "returns": [{"type": "DocumentType | None", "description": "A document, either the one before the replace operation or the"}, {"type": "DocumentType | None", "description": "one after that. Alternatively, the method returns None to represent"}, {"type": "DocumentType | None", "description": "that no matching document was found, or that no replacement"}, {"type": "DocumentType | None", "description": "was inserted (depending on the `return_document` parameter)."}], "example": ">>> async def do_find_one_and_replace(acol: AsyncCollection) -> None:\n... await acol.insert_one({\"_id\": \"rule1\", \"text\": \"all animals are equal\"})\n... result0 = await acol.find_one_and_replace(\n... {\"_id\": \"rule1\"},\n... {\"text\": \"some animals are more equal!\"},\n... )\n... print(\"result0\", result0)\n... result1 = await acol.find_one_and_replace(\n... {\"text\": \"some animals are more equal!\"},\n... {\"text\": \"and the pigs are the rulers\"},\n... return_document=astrapy.constants.ReturnDocument.AFTER,\n... )\n... print(\"result1\", result1)\n... result2 = await acol.find_one_and_replace(\n... {\"_id\": \"rule2\"},\n... {\"text\": \"F=ma^2\"},\n... return_document=astrapy.constants.ReturnDocument.AFTER,\n... )\n... print(\"result2\", result2)\n... result3 = await acol.find_one_and_replace(\n... {\"_id\": \"rule2\"},\n... {\"text\": \"F=ma\"},\n... upsert=True,\n... return_document=astrapy.constants.ReturnDocument.AFTER,\n... projection={\"_id\": False},\n... )\n... print(\"result3\", result3)\n...\n>>> asyncio.run(do_find_one_and_replace(my_async_coll))\nresult0 {'_id': 'rule1', 'text': 'all animals are equal'}\nresult1 {'_id': 'rule1', 'text': 'and the pigs are the rulers'}\nresult2 None\nresult3 {'text': 'F=ma'}", "gathered_types": ["astrapy.constants.FilterType", "astrapy.constants.SortType", "astrapy.constants.ReturnDocument", "astrapy.constants.DocumentType", "astrapy.constants.VectorType", "BEFORE", "astrapy.constants.ProjectionType"]}} +{"id": "astrapy.collection.AsyncCollection.replace_one", "text": "Replace a single document on the collection with a new one,\noptionally inserting a new one if no match is found.", "metadata": {"kind": "function", "name": "replace_one", "path": "astrapy.collection.AsyncCollection.replace_one", "parameters": [{"name": "filter", "type": "FilterType", "description": "a predicate expressed as a dictionary according to the\nData API filter syntax. Examples are:\n {}\n {\"name\": \"John\"}\n {\"price\": {\"$lt\": 100}}\n {\"$and\": [{\"name\": \"John\"}, {\"price\": {\"$lt\": 100}}]}\nSee the Data API documentation for the full set of operators.", "default": null}, {"name": "replacement", "type": "DocumentType", "description": "the new document to write into the collection.", "default": null}, {"name": "vector", "type": "VectorType | None", "description": "a suitable vector, i.e. a list of float numbers of the appropriate\ndimensionality, to use vector search (i.e. ANN,\nor \"approximate nearest-neighbours\" search), as the sorting criterion.\nIn this way, the matched document (if any) will be the one\nthat is most similar to the provided vector.\n*DEPRECATED* (removal in 2.0). Use a `$vector` key in the\nsort clause dict instead.", "value": "None", "default": "None"}, {"name": "vectorize", "type": "str | None", "description": "a string to be made into a vector to perform vector search.\nUsing vectorize assumes a suitable service is configured for the collection.\n*DEPRECATED* (removal in 2.0). Use a `$vectorize` key in the\nsort clause dict instead.", "value": "None", "default": "None"}, {"name": "sort", "type": "SortType | None", "description": "with this dictionary parameter one can control the sorting\norder of the documents matching the filter, effectively\ndetermining what document will come first and hence be the\nreplaced one. See the `find` method for more on sorting.\nVector-based ANN sorting is achieved by providing a \"$vector\"\nor a \"$vectorize\" key in `sort`.", "value": "None", "default": "None"}, {"name": "upsert", "type": "bool", "description": "this parameter controls the behavior in absence of matches.\nIf True, `replacement` is inserted as a new document\nif no matches are found on the collection. If False,\nthe operation silently does nothing in case of no matches.", "value": "False", "default": "False"}, {"name": "max_time_ms", "type": "int | None", "description": "a timeout, in milliseconds, for the underlying HTTP request.\nIf not passed, the collection-level setting is used instead.", "value": "None", "default": "None"}], "returns": [{"type": "UpdateResult", "description": "an UpdateResult object summarizing the outcome of the replace operation."}], "example": ">>> async def do_replace_one(acol: AsyncCollection) -> None:\n... await acol.insert_one({\"Marco\": \"Polo\"})\n... result0 = await acol.replace_one(\n... {\"Marco\": {\"$exists\": True}},\n... {\"Buda\": \"Pest\"},\n... )\n... print(\"result0.update_info\", result0.update_info)\n... doc1 = await acol.find_one({\"Buda\": \"Pest\"})\n... print(\"doc1\", doc1)\n... result1 = await acol.replace_one(\n... {\"Mirco\": {\"$exists\": True}},\n... {\"Oh\": \"yeah?\"},\n... )\n... print(\"result1.update_info\", result1.update_info)\n... result2 = await acol.replace_one(\n... {\"Mirco\": {\"$exists\": True}},\n... {\"Oh\": \"yeah?\"},\n... upsert=True,\n... )\n... print(\"result2.update_info\", result2.update_info)\n...\n>>> asyncio.run(do_replace_one(my_async_coll))\nresult0.update_info {'n': 1, 'updatedExisting': True, 'ok': 1.0, 'nModified': 1}\ndoc1 {'_id': '6e669a5a-...', 'Buda': 'Pest'}\nresult1.update_info {'n': 0, 'updatedExisting': False, 'ok': 1.0, 'nModified': 0}\nresult2.update_info {'n': 1, 'updatedExisting': False, 'ok': 1.0, 'nModified': 0, 'upserted': '30e34e00-...'}", "gathered_types": ["astrapy.constants.FilterType", "astrapy.constants.SortType", "astrapy.constants.DocumentType", "astrapy.results.UpdateResult", "astrapy.constants.VectorType"]}} +{"id": "astrapy.collection.AsyncCollection.find_one_and_update", "text": "Find a document on the collection and update it as requested,\noptionally inserting a new one if no match is found.", "metadata": {"kind": "function", "name": "find_one_and_update", "path": "astrapy.collection.AsyncCollection.find_one_and_update", "parameters": [{"name": "filter", "type": "FilterType", "description": "a predicate expressed as a dictionary according to the\nData API filter syntax. Examples are:\n {}\n {\"name\": \"John\"}\n {\"price\": {\"$lt\": 100}}\n {\"$and\": [{\"name\": \"John\"}, {\"price\": {\"$lt\": 100}}]}\nSee the Data API documentation for the full set of operators.", "default": null}, {"name": "update", "type": "dict[str, Any]", "description": "the update prescription to apply to the document, expressed\nas a dictionary as per Data API syntax. Examples are:\n {\"$set\": {\"field\": \"value}}\n {\"$inc\": {\"counter\": 10}}\n {\"$unset\": {\"field\": \"\"}}\nSee the Data API documentation for the full syntax.", "default": null}, {"name": "projection", "type": "ProjectionType | None", "description": "it controls which parts of the document are returned.\nIt can be an allow-list: `{\"f1\": True, \"f2\": True}`,\nor a deny-list: `{\"fx\": False, \"fy\": False}`, but not a mixture\n(except for the `_id` and other special fields, which can be\nassociated to both True or False independently of the rest\nof the specification).\nThe special star-projections `{\"*\": True}` and `{\"*\": False}`\nhave the effect of returning the whole document and `{}` respectively.\nFor lists in documents, slice directives can be passed to select\nportions of the list: for instance, `{\"array\": {\"$slice\": 2}}`,\n`{\"array\": {\"$slice\": -2}}`, `{\"array\": {\"$slice\": [4, 2]}}` or\n`{\"array\": {\"$slice\": [-4, 2]}}`.\nAn iterable over strings will be treated implicitly as an allow-list.\nThe default projection (used if this parameter is not passed) does not\nnecessarily include \"special\" fields such as `$vector` or `$vectorize`.\nSee the Data API documentation for more on projections.", "value": "None", "default": "None"}, {"name": "vector", "type": "VectorType | None", "description": "a suitable vector, i.e. a list of float numbers of the appropriate\ndimensionality, to use vector search (i.e. ANN,\nor \"approximate nearest-neighbours\" search), as the sorting criterion.\nIn this way, the matched document (if any) will be the one\nthat is most similar to the provided vector.\n*DEPRECATED* (removal in 2.0). Use a `$vector` key in the\nsort clause dict instead.", "value": "None", "default": "None"}, {"name": "vectorize", "type": "str | None", "description": "a string to be made into a vector to perform vector search.\nUsing vectorize assumes a suitable service is configured for the collection.\n*DEPRECATED* (removal in 2.0). Use a `$vectorize` key in the\nsort clause dict instead.", "value": "None", "default": "None"}, {"name": "sort", "type": "SortType | None", "description": "with this dictionary parameter one can control the sorting\norder of the documents matching the filter, effectively\ndetermining what document will come first and hence be the\nreplaced one. See the `find` method for more on sorting.\nVector-based ANN sorting is achieved by providing a \"$vector\"\nor a \"$vectorize\" key in `sort`.", "value": "None", "default": "None"}, {"name": "upsert", "type": "bool", "description": "this parameter controls the behavior in absence of matches.\nIf True, a new document (resulting from applying the `update`\nto an empty document) is inserted if no matches are found on\nthe collection. If False, the operation silently does nothing\nin case of no matches.", "value": "False", "default": "False"}, {"name": "return_document", "type": "str", "description": "a flag controlling what document is returned:\nif set to `ReturnDocument.BEFORE`, or the string \"before\",\nthe document found on database is returned; if set to\n`ReturnDocument.AFTER`, or the string \"after\", the new\ndocument is returned. The default is \"before\".", "value": "ReturnDocument.BEFORE", "default": "ReturnDocument.BEFORE"}, {"name": "max_time_ms", "type": "int | None", "description": "a timeout, in milliseconds, for the underlying HTTP request.\nIf not passed, the collection-level setting is used instead.", "value": "None", "default": "None"}], "returns": [{"type": "DocumentType | None", "description": "A document (or a projection thereof, as required), either the one"}, {"type": "DocumentType | None", "description": "before the replace operation or the one after that."}, {"type": "DocumentType | None", "description": "Alternatively, the method returns None to represent"}, {"type": "DocumentType | None", "description": "that no matching document was found, or that no update"}, {"type": "DocumentType | None", "description": "was applied (depending on the `return_document` parameter)."}], "example": ">>> async def do_find_one_and_update(acol: AsyncCollection) -> None:\n... await acol.insert_one({\"Marco\": \"Polo\"})\n... result0 = await acol.find_one_and_update(\n... {\"Marco\": {\"$exists\": True}},\n... {\"$set\": {\"title\": \"Mr.\"}},\n... )\n... print(\"result0\", result0)\n... result1 = await acol.find_one_and_update(\n... {\"title\": \"Mr.\"},\n... {\"$inc\": {\"rank\": 3}},\n... projection=[\"title\", \"rank\"],\n... return_document=astrapy.constants.ReturnDocument.AFTER,\n... )\n... print(\"result1\", result1)\n... result2 = await acol.find_one_and_update(\n... {\"name\": \"Johnny\"},\n... {\"$set\": {\"rank\": 0}},\n... return_document=astrapy.constants.ReturnDocument.AFTER,\n... )\n... print(\"result2\", result2)\n... result3 = await acol.find_one_and_update(\n... {\"name\": \"Johnny\"},\n... {\"$set\": {\"rank\": 0}},\n... upsert=True,\n... return_document=astrapy.constants.ReturnDocument.AFTER,\n... )\n... print(\"result3\", result3)\n...\n>>> asyncio.run(do_find_one_and_update(my_async_coll))\nresult0 {'_id': 'f7c936d3-b0a0-45eb-a676-e2829662a57c', 'Marco': 'Polo'}\nresult1 {'_id': 'f7c936d3-b0a0-45eb-a676-e2829662a57c', 'title': 'Mr.', 'rank': 3}\nresult2 None\nresult3 {'_id': 'db3d678d-14d4-4caa-82d2-d5fb77dab7ec', 'name': 'Johnny', 'rank': 0}", "gathered_types": ["astrapy.constants.FilterType", "astrapy.constants.SortType", "astrapy.constants.ReturnDocument", "astrapy.constants.DocumentType", "astrapy.constants.VectorType", "BEFORE", "astrapy.constants.ProjectionType"]}} +{"id": "astrapy.collection.AsyncCollection.update_one", "text": "Update a single document on the collection as requested,\noptionally inserting a new one if no match is found.", "metadata": {"kind": "function", "name": "update_one", "path": "astrapy.collection.AsyncCollection.update_one", "parameters": [{"name": "filter", "type": "FilterType", "description": "a predicate expressed as a dictionary according to the\nData API filter syntax. Examples are:\n {}\n {\"name\": \"John\"}\n {\"price\": {\"$lt\": 100}}\n {\"$and\": [{\"name\": \"John\"}, {\"price\": {\"$lt\": 100}}]}\nSee the Data API documentation for the full set of operators.", "default": null}, {"name": "update", "type": "dict[str, Any]", "description": "the update prescription to apply to the document, expressed\nas a dictionary as per Data API syntax. Examples are:\n {\"$set\": {\"field\": \"value}}\n {\"$inc\": {\"counter\": 10}}\n {\"$unset\": {\"field\": \"\"}}\nSee the Data API documentation for the full syntax.", "default": null}, {"name": "vector", "type": "VectorType | None", "description": "a suitable vector, i.e. a list of float numbers of the appropriate\ndimensionality, to use vector search (i.e. ANN,\nor \"approximate nearest-neighbours\" search), as the sorting criterion.\nIn this way, the matched document (if any) will be the one\nthat is most similar to the provided vector.\n*DEPRECATED* (removal in 2.0). Use a `$vector` key in the\nsort clause dict instead.", "value": "None", "default": "None"}, {"name": "vectorize", "type": "str | None", "description": "a string to be made into a vector to perform vector search.\nUsing vectorize assumes a suitable service is configured for the collection.\n*DEPRECATED* (removal in 2.0). Use a `$vectorize` key in the\nsort clause dict instead.", "value": "None", "default": "None"}, {"name": "sort", "type": "SortType | None", "description": "with this dictionary parameter one can control the sorting\norder of the documents matching the filter, effectively\ndetermining what document will come first and hence be the\nreplaced one. See the `find` method for more on sorting.\nVector-based ANN sorting is achieved by providing a \"$vector\"\nor a \"$vectorize\" key in `sort`.", "value": "None", "default": "None"}, {"name": "upsert", "type": "bool", "description": "this parameter controls the behavior in absence of matches.\nIf True, a new document (resulting from applying the `update`\nto an empty document) is inserted if no matches are found on\nthe collection. If False, the operation silently does nothing\nin case of no matches.", "value": "False", "default": "False"}, {"name": "max_time_ms", "type": "int | None", "description": "a timeout, in milliseconds, for the underlying HTTP request.\nIf not passed, the collection-level setting is used instead.", "value": "None", "default": "None"}], "returns": [{"type": "UpdateResult", "description": "an UpdateResult object summarizing the outcome of the update operation."}], "example": ">>> async def do_update_one(acol: AsyncCollection) -> None:\n... await acol.insert_one({\"Marco\": \"Polo\"})\n... result0 = await acol.update_one(\n... {\"Marco\": {\"$exists\": True}},\n... {\"$inc\": {\"rank\": 3}},\n... )\n... print(\"result0.update_info\", result0.update_info)\n... result1 = await acol.update_one(\n... {\"Mirko\": {\"$exists\": True}},\n... {\"$inc\": {\"rank\": 3}},\n... )\n... print(\"result1.update_info\", result1.update_info)\n... result2 = await acol.update_one(\n... {\"Mirko\": {\"$exists\": True}},\n... {\"$inc\": {\"rank\": 3}},\n... upsert=True,\n... )\n... print(\"result2.update_info\", result2.update_info)\n...\n>>> asyncio.run(do_update_one(my_async_coll))\nresult0.update_info {'n': 1, 'updatedExisting': True, 'ok': 1.0, 'nModified': 1})\nresult1.update_info {'n': 0, 'updatedExisting': False, 'ok': 1.0, 'nModified': 0})\nresult2.update_info {'n': 1, 'updatedExisting': False, 'ok': 1.0, 'nModified': 0, 'upserted': '75748092-...'}", "gathered_types": ["astrapy.constants.FilterType", "astrapy.constants.SortType", "astrapy.constants.VectorType", "astrapy.results.UpdateResult"]}} +{"id": "astrapy.collection.AsyncCollection.update_many", "text": "Apply an update operations to all documents matching a condition,\noptionally inserting one documents in absence of matches.", "metadata": {"kind": "function", "name": "update_many", "path": "astrapy.collection.AsyncCollection.update_many", "parameters": [{"name": "filter", "type": "FilterType", "description": "a predicate expressed as a dictionary according to the\nData API filter syntax. Examples are:\n {}\n {\"name\": \"John\"}\n {\"price\": {\"$lt\": 100}}\n {\"$and\": [{\"name\": \"John\"}, {\"price\": {\"$lt\": 100}}]}\nSee the Data API documentation for the full set of operators.", "default": null}, {"name": "update", "type": "dict[str, Any]", "description": "the update prescription to apply to the documents, expressed\nas a dictionary as per Data API syntax. Examples are:\n {\"$set\": {\"field\": \"value}}\n {\"$inc\": {\"counter\": 10}}\n {\"$unset\": {\"field\": \"\"}}\nSee the Data API documentation for the full syntax.", "default": null}, {"name": "upsert", "type": "bool", "description": "this parameter controls the behavior in absence of matches.\nIf True, a single new document (resulting from applying `update`\nto an empty document) is inserted if no matches are found on\nthe collection. If False, the operation silently does nothing\nin case of no matches.", "value": "False", "default": "False"}, {"name": "max_time_ms", "type": "int | None", "description": "a timeout, in milliseconds, for the operation.\nIf not passed, the collection-level setting is used instead:\nif a large number of document updates is anticipated, it is suggested\nto specify a larger timeout than in most other operations as the\nupdate will span several HTTP calls to the API in sequence.", "value": "None", "default": "None"}], "returns": [{"type": "UpdateResult", "description": "an UpdateResult object summarizing the outcome of the update operation."}], "example": ">>> async def do_update_many(acol: AsyncCollection) -> None:\n... await acol.insert_many([{\"c\": \"red\"}, {\"c\": \"green\"}, {\"c\": \"blue\"}])\n... result0 = await acol.update_many(\n... {\"c\": {\"$ne\": \"green\"}},\n... {\"$set\": {\"nongreen\": True}},\n... )\n... print(\"result0.update_info\", result0.update_info)\n... result1 = await acol.update_many(\n... {\"c\": \"orange\"},\n... {\"$set\": {\"is_also_fruit\": True}},\n... )\n... print(\"result1.update_info\", result1.update_info)\n... result2 = await acol.update_many(\n... {\"c\": \"orange\"},\n... {\"$set\": {\"is_also_fruit\": True}},\n... upsert=True,\n... )\n... print(\"result2.update_info\", result2.update_info)\n...\n>>> asyncio.run(do_update_many(my_async_coll))\nresult0.update_info {'n': 2, 'updatedExisting': True, 'ok': 1.0, 'nModified': 2}\nresult1.update_info {'n': 0, 'updatedExisting': False, 'ok': 1.0, 'nModified': 0}\nresult2.update_info {'n': 1, 'updatedExisting': False, 'ok': 1.0, 'nModified': 0, 'upserted': '79ffd5a3-ab99-4dff-a2a5-4aaa0e59e854'}", "note": "Similarly to the case of `find` (see its docstring for more details),\nrunning this command while, at the same time, another process is\ninserting new documents which match the filter of the `update_many`\ncan result in an unpredictable fraction of these documents being updated.\nIn other words, it cannot be easily predicted whether a given\nnewly-inserted document will be picked up by the update_many command or not.", "gathered_types": ["astrapy.constants.FilterType", "astrapy.results.UpdateResult"]}} +{"id": "astrapy.collection.AsyncCollection.find_one_and_delete", "text": "Find a document in the collection and delete it. The deleted document,\nhowever, is the return value of the method.", "metadata": {"kind": "function", "name": "find_one_and_delete", "path": "astrapy.collection.AsyncCollection.find_one_and_delete", "parameters": [{"name": "filter", "type": "FilterType", "description": "a predicate expressed as a dictionary according to the\nData API filter syntax. Examples are:\n {}\n {\"name\": \"John\"}\n {\"price\": {\"$lt\": 100}}\n {\"$and\": [{\"name\": \"John\"}, {\"price\": {\"$lt\": 100}}]}\nSee the Data API documentation for the full set of operators.", "default": null}, {"name": "projection", "type": "ProjectionType | None", "description": "it controls which parts of the document are returned.\nIt can be an allow-list: `{\"f1\": True, \"f2\": True}`,\nor a deny-list: `{\"fx\": False, \"fy\": False}`, but not a mixture\n(except for the `_id` and other special fields, which can be\nassociated to both True or False independently of the rest\nof the specification).\nThe special star-projections `{\"*\": True}` and `{\"*\": False}`\nhave the effect of returning the whole document and `{}` respectively.\nFor lists in documents, slice directives can be passed to select\nportions of the list: for instance, `{\"array\": {\"$slice\": 2}}`,\n`{\"array\": {\"$slice\": -2}}`, `{\"array\": {\"$slice\": [4, 2]}}` or\n`{\"array\": {\"$slice\": [-4, 2]}}`.\nAn iterable over strings will be treated implicitly as an allow-list.\nThe default projection (used if this parameter is not passed) does not\nnecessarily include \"special\" fields such as `$vector` or `$vectorize`.\nSee the Data API documentation for more on projections.", "value": "None", "default": "None"}, {"name": "vector", "type": "VectorType | None", "description": "a suitable vector, i.e. a list of float numbers of the appropriate\ndimensionality, to use vector search (i.e. ANN,\nor \"approximate nearest-neighbours\" search), as the sorting criterion.\nIn this way, the matched document (if any) will be the one\nthat is most similar to the provided vector.\n*DEPRECATED* (removal in 2.0). Use a `$vector` key in the\nsort clause dict instead.", "value": "None", "default": "None"}, {"name": "vectorize", "type": "str | None", "description": "a string to be made into a vector to perform vector search.\nUsing vectorize assumes a suitable service is configured for the collection.\n*DEPRECATED* (removal in 2.0). Use a `$vectorize` key in the\nsort clause dict instead.", "value": "None", "default": "None"}, {"name": "sort", "type": "SortType | None", "description": "with this dictionary parameter one can control the sorting\norder of the documents matching the filter, effectively\ndetermining what document will come first and hence be the\nreplaced one. See the `find` method for more on sorting.\nVector-based ANN sorting is achieved by providing a \"$vector\"\nor a \"$vectorize\" key in `sort`.", "value": "None", "default": "None"}, {"name": "max_time_ms", "type": "int | None", "description": "a timeout, in milliseconds, for the underlying HTTP request.\nIf not passed, the collection-level setting is used instead.", "value": "None", "default": "None"}], "returns": [{"type": "DocumentType | None", "description": "Either the document (or a projection thereof, as requested), or None"}, {"type": "DocumentType | None", "description": "if no matches were found in the first place."}], "example": ">>> async def do_find_one_and_delete(acol: AsyncCollection) -> None:\n... await acol.insert_many(\n... [\n... {\"species\": \"swan\", \"class\": \"Aves\"},\n... {\"species\": \"frog\", \"class\": \"Amphibia\"},\n... ],\n... )\n... delete_result0 = await acol.find_one_and_delete(\n... {\"species\": {\"$ne\": \"frog\"}},\n... projection=[\"species\"],\n... )\n... print(\"delete_result0\", delete_result0)\n... delete_result1 = await acol.find_one_and_delete(\n... {\"species\": {\"$ne\": \"frog\"}},\n... )\n... print(\"delete_result1\", delete_result1)\n...\n>>> asyncio.run(do_find_one_and_delete(my_async_coll))\ndelete_result0 {'_id': 'f335cd0f-...', 'species': 'swan'}\ndelete_result1 None", "gathered_types": ["astrapy.constants.FilterType", "astrapy.constants.SortType", "astrapy.constants.DocumentType", "astrapy.constants.VectorType", "astrapy.constants.ProjectionType"]}} +{"id": "astrapy.collection.AsyncCollection.delete_one", "text": "Delete one document matching a provided filter.\nThis method never deletes more than a single document, regardless\nof the number of matches to the provided filters.", "metadata": {"kind": "function", "name": "delete_one", "path": "astrapy.collection.AsyncCollection.delete_one", "parameters": [{"name": "filter", "type": "FilterType", "description": "a predicate expressed as a dictionary according to the\nData API filter syntax. Examples are:\n {}\n {\"name\": \"John\"}\n {\"price\": {\"$lt\": 100}}\n {\"$and\": [{\"name\": \"John\"}, {\"price\": {\"$lt\": 100}}]}\nSee the Data API documentation for the full set of operators.", "default": null}, {"name": "vector", "type": "VectorType | None", "description": "a suitable vector, i.e. a list of float numbers of the appropriate\ndimensionality, to use vector search (i.e. ANN,\nor \"approximate nearest-neighbours\" search), as the sorting criterion.\nIn this way, the matched document (if any) will be the one\nthat is most similar to the provided vector.\n*DEPRECATED* (removal in 2.0). Use a `$vector` key in the\nsort clause dict instead.", "value": "None", "default": "None"}, {"name": "vectorize", "type": "str | None", "description": "a string to be made into a vector to perform vector search.\nUsing vectorize assumes a suitable service is configured for the collection.\n*DEPRECATED* (removal in 2.0). Use a `$vectorize` key in the\nsort clause dict instead.", "value": "None", "default": "None"}, {"name": "sort", "type": "SortType | None", "description": "with this dictionary parameter one can control the sorting\norder of the documents matching the filter, effectively\ndetermining what document will come first and hence be the\nreplaced one. See the `find` method for more on sorting.\nVector-based ANN sorting is achieved by providing a \"$vector\"\nor a \"$vectorize\" key in `sort`.", "value": "None", "default": "None"}, {"name": "max_time_ms", "type": "int | None", "description": "a timeout, in milliseconds, for the underlying HTTP request.\nIf not passed, the collection-level setting is used instead.", "value": "None", "default": "None"}], "returns": [{"type": "DeleteResult", "description": "a DeleteResult object summarizing the outcome of the delete operation."}], "example": ">>> my_coll.insert_many([{\"seq\": 1}, {\"seq\": 0}, {\"seq\": 2}])\nInsertManyResult(...)\n>>> my_coll.delete_one({\"seq\": 1})\nDeleteResult(raw_results=..., deleted_count=1)\n>>> my_coll.distinct(\"seq\")\n[0, 2]\n>>> my_coll.delete_one(\n... {\"seq\": {\"$exists\": True}},\n... sort={\"seq\": astrapy.constants.SortDocuments.DESCENDING},\n... )\nDeleteResult(raw_results=..., deleted_count=1)\n>>> my_coll.distinct(\"seq\")\n[0]\n>>> my_coll.delete_one({\"seq\": 2})\nDeleteResult(raw_results=..., deleted_count=0)", "gathered_types": ["astrapy.constants.FilterType", "astrapy.constants.SortType", "astrapy.constants.VectorType", "astrapy.results.DeleteResult"]}} +{"id": "astrapy.collection.AsyncCollection.delete_many", "text": "Delete all documents matching a provided filter.", "metadata": {"kind": "function", "name": "delete_many", "path": "astrapy.collection.AsyncCollection.delete_many", "parameters": [{"name": "filter", "type": "FilterType", "description": "a predicate expressed as a dictionary according to the\nData API filter syntax. Examples are:\n {}\n {\"name\": \"John\"}\n {\"price\": {\"$lt\": 100}}\n {\"$and\": [{\"name\": \"John\"}, {\"price\": {\"$lt\": 100}}]}\nSee the Data API documentation for the full set of operators.\nPassing an empty filter, `{}`, completely erases all contents\nof the collection.", "default": null}, {"name": "max_time_ms", "type": "int | None", "description": "a timeout, in milliseconds, for the operation.\nIf not passed, the collection-level setting is used instead:\nkeep in mind that this method entails successive HTTP requests\nto the API, depending on how many documents are to be deleted.\nFor this reason, in most cases it is suggested to relax the\ntimeout compared to other method calls.", "value": "None", "default": "None"}], "returns": [{"type": "DeleteResult", "description": "a DeleteResult object summarizing the outcome of the delete operation."}], "example": ">>> async def do_delete_many(acol: AsyncCollection) -> None:\n... await acol.insert_many([{\"seq\": 1}, {\"seq\": 0}, {\"seq\": 2}])\n... delete_result0 = await acol.delete_many({\"seq\": {\"$lte\": 1}})\n... print(\"delete_result0.deleted_count\", delete_result0.deleted_count)\n... distinct1 = await acol.distinct(\"seq\")\n... print(\"distinct1\", distinct1)\n... delete_result2 = await acol.delete_many({\"seq\": {\"$lte\": 1}})\n... print(\"delete_result2.deleted_count\", delete_result2.deleted_count)\n...\n>>> asyncio.run(do_delete_many(my_async_coll))\ndelete_result0.deleted_count 2\ndistinct1 [2]\ndelete_result2.deleted_count 0", "note": "This operation is in general not atomic. Depending on the amount\nof matching documents, it can keep running (in a blocking way)\nfor a macroscopic time. In that case, new documents that are\nmeanwhile inserted (e.g. from another process/application) will be\ndeleted during the execution of this method call until the\ncollection is devoid of matches.\nAn exception is the `filter={}` case, whereby the operation is atomic.", "gathered_types": ["astrapy.constants.FilterType", "astrapy.results.DeleteResult"]}} +{"id": "astrapy.collection.AsyncCollection.delete_all", "text": "Delete all documents in a collection.", "metadata": {"kind": "function", "name": "delete_all", "path": "astrapy.collection.AsyncCollection.delete_all", "parameters": [{"name": "max_time_ms", "type": "int | None", "description": "a timeout, in milliseconds, for the underlying HTTP request.\nIf not passed, the collection-level setting is used instead.", "value": "None", "default": "None"}], "returns": [{"type": "dict[str, Any]", "description": "a dictionary of the form {\"ok\": 1} to signal successful deletion."}], "example": ">>> async def do_delete_all(acol: AsyncCollection) -> None:\n... distinct0 = await acol.distinct(\"seq\")\n... print(\"distinct0\", distinct0)\n... count1 = await acol.count_documents({}, upper_bound=100)\n... print(\"count1\", count1)\n... delete_result2 = await acol.delete_all()\n... print(\"delete_result2\", delete_result2)\n... count3 = await acol.count_documents({}, upper_bound=100)\n... print(\"count3\", count3)\n...\n>>> asyncio.run(do_delete_all(my_async_coll))\ndistinct0 [4, 2, 3, 0, 1]\ncount1 5\ndelete_result2 {'ok': 1}\ncount3 0", "note": "Use with caution."}} +{"id": "astrapy.collection.AsyncCollection.bulk_write", "text": "Execute an arbitrary amount of operations such as inserts, updates, deletes\neither sequentially or concurrently.\n\nThis method does not execute atomically, i.e. individual operations are\neach performed in the same way as the corresponding collection method,\nand each one is a different and unrelated database mutation.", "metadata": {"kind": "function", "name": "bulk_write", "path": "astrapy.collection.AsyncCollection.bulk_write", "parameters": [{"name": "requests", "type": "Iterable[AsyncBaseOperation]", "description": "an iterable over concrete subclasses of `BaseOperation`,\nsuch as `AsyncInsertMany` or `AsyncReplaceOne`. Each such object\nrepresents an operation ready to be executed on a collection,\nand is instantiated by passing the same parameters as one\nwould the corresponding collection method.", "default": null}, {"name": "ordered", "type": "bool", "description": "whether to launch the `requests` one after the other or\nin arbitrary order, possibly in a concurrent fashion. For\nperformance reasons, False (default) should be preferred\nwhen compatible with the needs of the application flow.", "value": "False", "default": "False"}, {"name": "concurrency", "type": "int | None", "description": "maximum number of concurrent operations executing at\na given time. It cannot be more than one for ordered bulk writes.", "value": "None", "default": "None"}, {"name": "max_time_ms", "type": "int | None", "description": "a timeout, in milliseconds, for the whole bulk write.\nRemember that, if the method call times out, then there's no\nguarantee about what portion of the bulk write has been received\nand successfully executed by the Data API.\nIf not passed, the collection-level setting is used instead:\nin most cases, however, one should pass a relaxed timeout\nif longer sequences of operations are to be executed in bulk.", "value": "None", "default": "None"}], "returns": [{"type": "BulkWriteResult", "description": "A single BulkWriteResult summarizing the whole list of requested"}, {"type": "BulkWriteResult", "description": "operations. The keys in the map attributes of BulkWriteResult"}, {"type": "BulkWriteResult", "description": "(when present) are the integer indices of the corresponding operation"}, {"type": "BulkWriteResult", "description": "in the `requests` iterable."}], "example": ">>> from astrapy.operations import AsyncInsertMany, AsyncReplaceOne, AsyncOperation\n>>> from astrapy.results import BulkWriteResult\n>>>\n>>> async def do_bulk_write(\n... acol: AsyncCollection,\n... async_operations: List[AsyncOperation],\n... ) -> BulkWriteResult:\n... bw_result = await acol.bulk_write(async_operations)\n... count0 = await acol.count_documents({}, upper_bound=100)\n... print(\"count0\", count0)\n... distinct0 = await acol.distinct(\"replaced\")\n... print(\"distinct0\", distinct0)\n... return bw_result\n...\n>>> op1 = AsyncInsertMany([{\"a\": 1}, {\"a\": 2}])\n>>> op2 = AsyncReplaceOne(\n... {\"z\": 9},\n... replacement={\"z\": 9, \"replaced\": True},\n... upsert=True,\n... )\n>>> result = asyncio.run(do_bulk_write(my_async_coll, [op1, op2]))\ncount0 3\ndistinct0 [True]\n>>> print(\"result\", result)\nresult BulkWriteResult(bulk_api_results={0: ..., 1: ...}, deleted_count=0, inserted_count=3, matched_count=0, modified_count=0, upserted_count=1, upserted_ids={1: 'ccd0a800-...'})", "references": ["astrapy.results.BulkWriteResult", "astrapy.operations.AsyncReplaceOne", "astrapy.operations.AsyncInsertMany", "astrapy.operations.AsyncOperation"], "gathered_types": ["astrapy.operations.AsyncBaseOperation", "astrapy.results.BulkWriteResult"]}} +{"id": "astrapy.collection.AsyncCollection.drop", "text": "Drop the collection, i.e. delete it from the database along with\nall the documents it contains.", "metadata": {"kind": "function", "name": "drop", "path": "astrapy.collection.AsyncCollection.drop", "parameters": [{"name": "max_time_ms", "type": "int | None", "description": "a timeout, in milliseconds, for the underlying HTTP request.\nIf not passed, the collection-level setting is used instead.\nRemember there is not guarantee that a request that has\ntimed out us not in fact honored.", "value": "None", "default": "None"}], "returns": [{"type": "dict[str, Any]", "description": "a dictionary of the form {\"ok\": 1} to signal successful deletion."}], "example": ">>> async def drop_and_check(acol: AsyncCollection) -> None:\n... doc0 = await acol.find_one({})\n... print(\"doc0\", doc0)\n... drop_result = await acol.drop()\n... print(\"drop_result\", drop_result)\n... doc1 = await acol.find_one({})\n...\n>>> asyncio.run(drop_and_check(my_async_coll))\ndoc0 {'_id': '...', 'z': -10}\ndrop_result {'ok': 1}\nTraceback (most recent call last):\n ... ...\nastrapy.exceptions.DataAPIResponseException: Collection does not exist, collection name: my_collection", "note": "Once the method succeeds, methods on this object can still be invoked:\nhowever, this hardly makes sense as the underlying actual collection\nis no more.\nIt is responsibility of the developer to design a correct flow\nwhich avoids using a deceased collection any further."}} +{"id": "astrapy.collection.AsyncCollection.command", "text": "Send a POST request to the Data API for this collection with\nan arbitrary, caller-provided payload.", "metadata": {"kind": "function", "name": "command", "path": "astrapy.collection.AsyncCollection.command", "parameters": [{"name": "body", "type": "dict[str, Any]", "description": "a JSON-serializable dictionary, the payload of the request.", "default": null}, {"name": "raise_api_errors", "type": "bool", "description": "if True, responses with a nonempty 'errors' field\nresult in an astrapy exception being raised.", "value": "True", "default": "True"}, {"name": "max_time_ms", "type": "int | None", "description": "a timeout, in milliseconds, for the underlying HTTP request.\nIf not passed, the collection-level setting is used instead.", "value": "None", "default": "None"}], "returns": [{"type": "dict[str, Any]", "description": "a dictionary with the response of the HTTP request."}], "example": ">>> asyncio.await(my_async_coll.command({\"countDocuments\": {}}))\n{'status': {'count': 123}}"}} +{"id": "astrapy.exceptions", "text": "", "metadata": {"kind": "module", "name": "exceptions", "path": "astrapy.exceptions", "imports": {"annotations": "__future__.annotations", "time": "time", "dataclass": "dataclasses.dataclass", "TYPE_CHECKING": "typing.TYPE_CHECKING", "Any": "typing.Any", "httpx": "httpx", "TimeoutInfo": "astrapy.request_tools.TimeoutInfo", "BulkWriteResult": "astrapy.results.BulkWriteResult", "DeleteResult": "astrapy.results.DeleteResult", "InsertManyResult": "astrapy.results.InsertManyResult", "OperationResult": "astrapy.results.OperationResult", "UpdateResult": "astrapy.results.UpdateResult"}, "properties": {"is_init_module": false, "is_package": false, "is_subpackage": false, "is_namespace_package": false, "is_namespace_subpackage": false}}} +{"id": "astrapy.exceptions.DevOpsAPIException", "text": "An exception specific to issuing requests to the DevOps API.", "metadata": {"kind": "class", "name": "DevOpsAPIException", "path": "astrapy.exceptions.DevOpsAPIException", "parameters": [{"name": "text", "default": "''", "type": "str"}], "bases": ["ValueError"], "gathered_types": ["ValueError"], "implemented_by": ["astrapy.exceptions.DevOpsAPIFaultyResponseException", "astrapy.exceptions.DevOpsAPIResponseException", "astrapy.exceptions.DevOpsAPITimeoutException", "astrapy.exceptions.DevOpsAPIHttpException"]}} +{"id": "astrapy.exceptions.DevOpsAPIHttpException", "text": "A request to the DevOps API resulted in an HTTP 4xx or 5xx response.\n\nThough the DevOps API seldom enriches such errors with a response text,\nthis class acts as the DevOps counterpart to DataAPIHttpException\nto facilitate a symmetryc handling of errors at application lebel.", "metadata": {"kind": "class", "name": "DevOpsAPIHttpException", "path": "astrapy.exceptions.DevOpsAPIHttpException", "parameters": [{"name": "text", "type": "str | None"}, {"name": "httpx_error", "type": "httpx.HTTPStatusError"}, {"name": "error_descriptors", "type": "list[DevOpsAPIErrorDescriptor]"}], "bases": ["astrapy.exceptions.DevOpsAPIException", "httpx.HTTPStatusError"], "attributes": [{"name": "text", "type": "str | None", "description": "a text message about the exception."}, {"name": "error_descriptors", "type": "list[DevOpsAPIErrorDescriptor]", "description": "a list of all DevOpsAPIErrorDescriptor objects\nfound in the response."}], "gathered_types": ["DevOpsAPIErrorDescriptor", "HTTPStatusError", "astrapy.exceptions.DevOpsAPIException"]}} +{"id": "astrapy.exceptions.DevOpsAPIHttpException.text", "text": "", "metadata": {"kind": "attribute", "name": "text", "path": "astrapy.exceptions.DevOpsAPIHttpException.text", "value": "text: str | None = text"}} +{"id": "astrapy.exceptions.DevOpsAPIHttpException.error_descriptors", "text": "", "metadata": {"kind": "attribute", "name": "error_descriptors", "path": "astrapy.exceptions.DevOpsAPIHttpException.error_descriptors", "value": "error_descriptors: list[DevOpsAPIErrorDescriptor] = error_descriptors", "gathered_types": ["DevOpsAPIErrorDescriptor"]}} +{"id": "astrapy.exceptions.DevOpsAPIHttpException.httpx_error", "text": "", "metadata": {"kind": "attribute", "name": "httpx_error", "path": "astrapy.exceptions.DevOpsAPIHttpException.httpx_error", "value": "httpx_error = httpx_error"}} +{"id": "astrapy.exceptions.DevOpsAPIHttpException.from_httpx_error", "text": "Parse a httpx status error into this exception.", "metadata": {"kind": "function", "name": "from_httpx_error", "path": "astrapy.exceptions.DevOpsAPIHttpException.from_httpx_error", "parameters": [{"name": "cls"}, {"name": "httpx_error", "type": "httpx.HTTPStatusError"}, {"name": "kwargs", "default": "{}", "type": "Any"}], "returns": [{"type": "DevOpsAPIHttpException"}], "gathered_types": ["astrapy.exceptions.DevOpsAPIHttpException", "HTTPStatusError"]}} +{"id": "astrapy.exceptions.DevOpsAPITimeoutException", "text": "A DevOps API operation timed out.", "metadata": {"kind": "class", "name": "DevOpsAPITimeoutException", "path": "astrapy.exceptions.DevOpsAPITimeoutException", "parameters": [{"name": "text", "type": "str"}, {"name": "timeout_type", "type": "str"}, {"name": "endpoint", "type": "str | None"}, {"name": "raw_payload", "type": "str | None"}], "bases": ["astrapy.exceptions.DevOpsAPIException"], "attributes": [{"name": "text", "type": "str", "description": "a textual description of the error"}, {"name": "timeout_type", "type": "str", "description": "this denotes the phase of the HTTP request when the event\noccurred (\"connect\", \"read\", \"write\", \"pool\") or \"generic\" if there is\nnot a specific request associated to the exception."}, {"name": "endpoint", "type": "str | None", "description": "if the timeout is tied to a specific request, this is the\nURL that the request was targeting."}, {"name": "raw_payload", "type": "str | None", "description": "if the timeout is tied to a specific request, this is the\nassociated payload (as a string)."}], "gathered_types": ["astrapy.exceptions.DevOpsAPIException"]}} +{"id": "astrapy.exceptions.DevOpsAPITimeoutException.text", "text": "", "metadata": {"kind": "attribute", "name": "text", "path": "astrapy.exceptions.DevOpsAPITimeoutException.text", "value": "text: str = text"}} +{"id": "astrapy.exceptions.DevOpsAPITimeoutException.timeout_type", "text": "", "metadata": {"kind": "attribute", "name": "timeout_type", "path": "astrapy.exceptions.DevOpsAPITimeoutException.timeout_type", "value": "timeout_type: str = timeout_type"}} +{"id": "astrapy.exceptions.DevOpsAPITimeoutException.endpoint", "text": "", "metadata": {"kind": "attribute", "name": "endpoint", "path": "astrapy.exceptions.DevOpsAPITimeoutException.endpoint", "value": "endpoint: str | None = endpoint"}} +{"id": "astrapy.exceptions.DevOpsAPITimeoutException.raw_payload", "text": "", "metadata": {"kind": "attribute", "name": "raw_payload", "path": "astrapy.exceptions.DevOpsAPITimeoutException.raw_payload", "value": "raw_payload: str | None = raw_payload"}} +{"id": "astrapy.exceptions.DevOpsAPIErrorDescriptor", "text": "An object representing a single error returned from the DevOps API,\ntypically with an error code and a text message.\n\nA single response from the Devops API may return zero, one or more of these.", "metadata": {"kind": "class", "name": "DevOpsAPIErrorDescriptor", "path": "astrapy.exceptions.DevOpsAPIErrorDescriptor", "parameters": [{"name": "error_dict", "type": "dict[str, Any]"}], "attributes": [{"name": "id", "type": "int | None", "description": "a numeric code as found in the API \"ID\" item."}, {"name": "message", "type": "str | None", "description": "the text found in the API \"error\" item."}, {"name": "attributes", "type": "dict[str, Any]", "description": "a dict with any further key-value pairs returned by the API."}]}} +{"id": "astrapy.exceptions.DevOpsAPIErrorDescriptor.id", "text": "", "metadata": {"kind": "attribute", "name": "id", "path": "astrapy.exceptions.DevOpsAPIErrorDescriptor.id", "value": "id: int | None = error_dict.get('ID')"}} +{"id": "astrapy.exceptions.DevOpsAPIErrorDescriptor.message", "text": "", "metadata": {"kind": "attribute", "name": "message", "path": "astrapy.exceptions.DevOpsAPIErrorDescriptor.message", "value": "message: str | None = error_dict.get('message')"}} +{"id": "astrapy.exceptions.DevOpsAPIErrorDescriptor.attributes", "text": "", "metadata": {"kind": "attribute", "name": "attributes", "path": "astrapy.exceptions.DevOpsAPIErrorDescriptor.attributes", "value": "attributes: dict[str, Any] = {k: vfor (k, v) in error_dict.items() if k not in {'ID', 'message'}}"}} +{"id": "astrapy.exceptions.DevOpsAPIFaultyResponseException", "text": "The DevOps API response is malformed in that it does not have\nexpected field(s), or they are of the wrong type.", "metadata": {"kind": "class", "name": "DevOpsAPIFaultyResponseException", "path": "astrapy.exceptions.DevOpsAPIFaultyResponseException", "parameters": [{"name": "text", "type": "str"}, {"name": "raw_response", "type": "dict[str, Any] | None"}], "bases": ["astrapy.exceptions.DevOpsAPIException"], "attributes": [{"name": "text", "type": "str", "description": "a text message about the exception."}, {"name": "raw_response", "type": "dict[str, Any] | None", "description": "the response returned by the API in the form of a dict."}], "gathered_types": ["astrapy.exceptions.DevOpsAPIException"]}} +{"id": "astrapy.exceptions.DevOpsAPIFaultyResponseException.text", "text": "", "metadata": {"kind": "attribute", "name": "text", "path": "astrapy.exceptions.DevOpsAPIFaultyResponseException.text", "value": "text: str = text"}} +{"id": "astrapy.exceptions.DevOpsAPIFaultyResponseException.raw_response", "text": "", "metadata": {"kind": "attribute", "name": "raw_response", "path": "astrapy.exceptions.DevOpsAPIFaultyResponseException.raw_response", "value": "raw_response: dict[str, Any] | None = raw_response"}} +{"id": "astrapy.exceptions.DevOpsAPIResponseException", "text": "A request to the DevOps API returned with a non-success return code\nand one of more errors in the HTTP response.", "metadata": {"kind": "class", "name": "DevOpsAPIResponseException", "path": "astrapy.exceptions.DevOpsAPIResponseException", "parameters": [{"name": "text", "default": "None", "type": "str | None"}, {"name": "command", "default": "None", "type": "dict[str, Any] | None"}, {"name": "error_descriptors", "default": "[]", "type": "list[DevOpsAPIErrorDescriptor]"}], "bases": ["astrapy.exceptions.DevOpsAPIException"], "attributes": [{"name": "text", "type": "str | None", "description": "a text message about the exception."}, {"name": "command", "type": "dict[str, Any] | None", "description": "the raw payload that was sent to the DevOps API."}, {"name": "error_descriptors", "type": "list[DevOpsAPIErrorDescriptor]", "description": "a list of all DevOpsAPIErrorDescriptor objects\nreturned by the API in the response."}], "gathered_types": ["DevOpsAPIErrorDescriptor", "astrapy.exceptions.DevOpsAPIException"]}} +{"id": "astrapy.exceptions.DevOpsAPIResponseException.text", "text": "", "metadata": {"kind": "attribute", "name": "text", "path": "astrapy.exceptions.DevOpsAPIResponseException.text", "value": "text: str | None = text"}} +{"id": "astrapy.exceptions.DevOpsAPIResponseException.command", "text": "", "metadata": {"kind": "attribute", "name": "command", "path": "astrapy.exceptions.DevOpsAPIResponseException.command", "value": "command: dict[str, Any] | None = command"}} +{"id": "astrapy.exceptions.DevOpsAPIResponseException.error_descriptors", "text": "", "metadata": {"kind": "attribute", "name": "error_descriptors", "path": "astrapy.exceptions.DevOpsAPIResponseException.error_descriptors", "value": "error_descriptors: list[DevOpsAPIErrorDescriptor] = error_descriptors", "gathered_types": ["DevOpsAPIErrorDescriptor"]}} +{"id": "astrapy.exceptions.DevOpsAPIResponseException.from_response", "text": "Parse a raw response from the API into this exception.", "metadata": {"kind": "function", "name": "from_response", "path": "astrapy.exceptions.DevOpsAPIResponseException.from_response", "parameters": [{"name": "command", "type": "dict[str, Any] | None"}, {"name": "raw_response", "type": "dict[str, Any]"}], "returns": [{"type": "DevOpsAPIResponseException"}], "gathered_types": ["astrapy.exceptions.DevOpsAPIResponseException"]}} +{"id": "astrapy.exceptions.DataAPIErrorDescriptor", "text": "An object representing a single error returned from the Data API,\ntypically with an error code and a text message.\nAn API request would return with an HTTP 200 success error code,\nbut contain a nonzero amount of these.\n\nA single response from the Data API may return zero, one or more of these.\nMoreover, some operations, such as an insert_many, may partally succeed\nyet return these errors about the rest of the operation (such as,\nsome of the input documents could not be inserted).", "metadata": {"kind": "class", "name": "DataAPIErrorDescriptor", "path": "astrapy.exceptions.DataAPIErrorDescriptor", "parameters": [{"name": "error_dict", "type": "dict[str, str]"}], "attributes": [{"name": "error_code", "type": "str | None", "description": "a string code as found in the API \"error\" item."}, {"name": "message", "type": "str | None", "description": "the text found in the API \"error\" item."}, {"name": "attributes", "type": "dict[str, Any]", "description": "a dict with any further key-value pairs returned by the API."}]}} +{"id": "astrapy.exceptions.DataAPIErrorDescriptor.title", "text": "", "metadata": {"kind": "attribute", "name": "title", "path": "astrapy.exceptions.DataAPIErrorDescriptor.title", "value": "title: str | None = error_dict.get('title')"}} +{"id": "astrapy.exceptions.DataAPIErrorDescriptor.error_code", "text": "", "metadata": {"kind": "attribute", "name": "error_code", "path": "astrapy.exceptions.DataAPIErrorDescriptor.error_code", "value": "error_code: str | None = error_dict.get('errorCode')"}} +{"id": "astrapy.exceptions.DataAPIErrorDescriptor.message", "text": "", "metadata": {"kind": "attribute", "name": "message", "path": "astrapy.exceptions.DataAPIErrorDescriptor.message", "value": "message: str | None = error_dict.get('message')"}} +{"id": "astrapy.exceptions.DataAPIErrorDescriptor.family", "text": "", "metadata": {"kind": "attribute", "name": "family", "path": "astrapy.exceptions.DataAPIErrorDescriptor.family", "value": "family: str | None = error_dict.get('family')"}} +{"id": "astrapy.exceptions.DataAPIErrorDescriptor.scope", "text": "", "metadata": {"kind": "attribute", "name": "scope", "path": "astrapy.exceptions.DataAPIErrorDescriptor.scope", "value": "scope: str | None = error_dict.get('scope')"}} +{"id": "astrapy.exceptions.DataAPIErrorDescriptor.id", "text": "", "metadata": {"kind": "attribute", "name": "id", "path": "astrapy.exceptions.DataAPIErrorDescriptor.id", "value": "id: str | None = error_dict.get('id')"}} +{"id": "astrapy.exceptions.DataAPIErrorDescriptor.attributes", "text": "", "metadata": {"kind": "attribute", "name": "attributes", "path": "astrapy.exceptions.DataAPIErrorDescriptor.attributes", "value": "attributes: dict[str, Any] = {k: vfor (k, v) in error_dict.items() if k not in self._known_dict_fields}", "gathered_types": ["_known_dict_fields"]}} +{"id": "astrapy.exceptions.DataAPIDetailedErrorDescriptor", "text": "An object representing an errorful response from the Data API.\nErrors specific to the Data API (as opposed to e.g. network failures)\nwould result in an HTTP 200 success response code but coming with\none or more DataAPIErrorDescriptor objects.\n\nThis object corresponds to one response, and as such its attributes\nare a single request payload, a single response, but a list of\nDataAPIErrorDescriptor instances.", "metadata": {"kind": "class", "name": "DataAPIDetailedErrorDescriptor", "path": "astrapy.exceptions.DataAPIDetailedErrorDescriptor", "parameters": [{"name": "error_descriptors", "type": "list[DataAPIErrorDescriptor]"}, {"name": "command", "type": "dict[str, Any] | None"}, {"name": "raw_response", "type": "dict[str, Any]"}], "attributes": [{"name": "error_descriptors", "type": "list[DataAPIErrorDescriptor]", "description": "a list of DataAPIErrorDescriptor objects."}, {"name": "command", "type": "dict[str, Any] | None", "description": "the raw payload of the API request."}, {"name": "raw_response", "type": "dict[str, Any]", "description": "the full API response in the form of a dict."}], "gathered_types": ["DataAPIErrorDescriptor"]}} +{"id": "astrapy.exceptions.DataAPIDetailedErrorDescriptor.error_descriptors", "text": "", "metadata": {"kind": "attribute", "name": "error_descriptors", "path": "astrapy.exceptions.DataAPIDetailedErrorDescriptor.error_descriptors", "value": "error_descriptors: list[DataAPIErrorDescriptor]", "gathered_types": ["DataAPIErrorDescriptor"]}} +{"id": "astrapy.exceptions.DataAPIDetailedErrorDescriptor.command", "text": "", "metadata": {"kind": "attribute", "name": "command", "path": "astrapy.exceptions.DataAPIDetailedErrorDescriptor.command", "value": "command: dict[str, Any] | None"}} +{"id": "astrapy.exceptions.DataAPIDetailedErrorDescriptor.raw_response", "text": "", "metadata": {"kind": "attribute", "name": "raw_response", "path": "astrapy.exceptions.DataAPIDetailedErrorDescriptor.raw_response", "value": "raw_response: dict[str, Any]"}} +{"id": "astrapy.exceptions.DataAPIException", "text": "Any exception occurred while issuing requests to the Data API\nand specific to it, such as:\n - a collection is found not to exist when gettings its metadata,\n - the API return a response with an error,\nbut not, for instance,\n - a network error while sending an HTTP request to the API.", "metadata": {"kind": "class", "name": "DataAPIException", "path": "astrapy.exceptions.DataAPIException", "bases": ["ValueError"], "gathered_types": ["ValueError"]}} +{"id": "astrapy.exceptions.DataAPIHttpException", "text": "A request to the Data API resulted in an HTTP 4xx or 5xx response.\n\nIn most cases this comes with additional information: the purpose\nof this class is to present such information in a structured way,\nakin to what happens for the DataAPIResponseException, while\nstill raising (a subclass of) `httpx.HTTPStatusError`.", "metadata": {"kind": "class", "name": "DataAPIHttpException", "path": "astrapy.exceptions.DataAPIHttpException", "parameters": [{"name": "text", "type": "str | None"}, {"name": "httpx_error", "type": "httpx.HTTPStatusError"}, {"name": "error_descriptors", "type": "list[DataAPIErrorDescriptor]"}], "bases": ["DataAPIException", "httpx.HTTPStatusError"], "attributes": [{"name": "text", "type": "str | None", "description": "a text message about the exception."}, {"name": "error_descriptors", "type": "list[DataAPIErrorDescriptor]", "description": "a list of all DataAPIErrorDescriptor objects\nfound in the response."}], "gathered_types": ["HTTPStatusError", "DataAPIErrorDescriptor", "DataAPIException"]}} +{"id": "astrapy.exceptions.DataAPIHttpException.text", "text": "", "metadata": {"kind": "attribute", "name": "text", "path": "astrapy.exceptions.DataAPIHttpException.text", "value": "text: str | None = text"}} +{"id": "astrapy.exceptions.DataAPIHttpException.error_descriptors", "text": "", "metadata": {"kind": "attribute", "name": "error_descriptors", "path": "astrapy.exceptions.DataAPIHttpException.error_descriptors", "value": "error_descriptors: list[DataAPIErrorDescriptor] = error_descriptors", "gathered_types": ["DataAPIErrorDescriptor"]}} +{"id": "astrapy.exceptions.DataAPIHttpException.httpx_error", "text": "", "metadata": {"kind": "attribute", "name": "httpx_error", "path": "astrapy.exceptions.DataAPIHttpException.httpx_error", "value": "httpx_error = httpx_error"}} +{"id": "astrapy.exceptions.DataAPIHttpException.from_httpx_error", "text": "Parse a httpx status error into this exception.", "metadata": {"kind": "function", "name": "from_httpx_error", "path": "astrapy.exceptions.DataAPIHttpException.from_httpx_error", "parameters": [{"name": "cls"}, {"name": "httpx_error", "type": "httpx.HTTPStatusError"}, {"name": "kwargs", "default": "{}", "type": "Any"}], "returns": [{"type": "DataAPIHttpException"}], "gathered_types": ["HTTPStatusError", "astrapy.exceptions.DataAPIHttpException"]}} +{"id": "astrapy.exceptions.DataAPITimeoutException", "text": "A Data API operation timed out. This can be a request timeout occurring\nduring a specific HTTP request, or can happen over the course of a method\ninvolving several requests in a row, such as a paginated find.", "metadata": {"kind": "class", "name": "DataAPITimeoutException", "path": "astrapy.exceptions.DataAPITimeoutException", "parameters": [{"name": "text", "type": "str"}, {"name": "timeout_type", "type": "str"}, {"name": "endpoint", "type": "str | None"}, {"name": "raw_payload", "type": "str | None"}], "bases": ["DataAPIException"], "attributes": [{"name": "text", "type": "str", "description": "a textual description of the error"}, {"name": "timeout_type", "type": "str", "description": "this denotes the phase of the HTTP request when the event\noccurred (\"connect\", \"read\", \"write\", \"pool\") or \"generic\" if there is\nnot a specific request associated to the exception."}, {"name": "endpoint", "type": "str | None", "description": "if the timeout is tied to a specific request, this is the\nURL that the request was targeting."}, {"name": "raw_payload", "type": "str | None", "description": "if the timeout is tied to a specific request, this is the\nassociated payload (as a string)."}], "gathered_types": ["DataAPIException"]}} +{"id": "astrapy.exceptions.DataAPITimeoutException.text", "text": "", "metadata": {"kind": "attribute", "name": "text", "path": "astrapy.exceptions.DataAPITimeoutException.text", "value": "text: str = text"}} +{"id": "astrapy.exceptions.DataAPITimeoutException.timeout_type", "text": "", "metadata": {"kind": "attribute", "name": "timeout_type", "path": "astrapy.exceptions.DataAPITimeoutException.timeout_type", "value": "timeout_type: str = timeout_type"}} +{"id": "astrapy.exceptions.DataAPITimeoutException.endpoint", "text": "", "metadata": {"kind": "attribute", "name": "endpoint", "path": "astrapy.exceptions.DataAPITimeoutException.endpoint", "value": "endpoint: str | None = endpoint"}} +{"id": "astrapy.exceptions.DataAPITimeoutException.raw_payload", "text": "", "metadata": {"kind": "attribute", "name": "raw_payload", "path": "astrapy.exceptions.DataAPITimeoutException.raw_payload", "value": "raw_payload: str | None = raw_payload"}} +{"id": "astrapy.exceptions.CursorIsStartedException", "text": "The cursor operation cannot be invoked if a cursor is not in its pristine\nstate (i.e. is already being consumed or is exhausted altogether).", "metadata": {"kind": "class", "name": "CursorIsStartedException", "path": "astrapy.exceptions.CursorIsStartedException", "parameters": [{"name": "text", "type": "str"}, {"name": "cursor_state", "type": "str"}], "bases": ["DataAPIException"], "attributes": [{"name": "text", "type": "str", "description": "a text message about the exception."}, {"name": "cursor_state", "type": "str", "description": "a string description of the current state\nof the cursor. See the documentation for Cursor."}], "gathered_types": ["DataAPIException"]}} +{"id": "astrapy.exceptions.CursorIsStartedException.text", "text": "", "metadata": {"kind": "attribute", "name": "text", "path": "astrapy.exceptions.CursorIsStartedException.text", "value": "text: str = text"}} +{"id": "astrapy.exceptions.CursorIsStartedException.cursor_state", "text": "", "metadata": {"kind": "attribute", "name": "cursor_state", "path": "astrapy.exceptions.CursorIsStartedException.cursor_state", "value": "cursor_state: str = cursor_state"}} +{"id": "astrapy.exceptions.CollectionNotFoundException", "text": "A collection is found non-existing and the requested operation\ncannot be performed.", "metadata": {"kind": "class", "name": "CollectionNotFoundException", "path": "astrapy.exceptions.CollectionNotFoundException", "parameters": [{"name": "text", "type": "str"}, {"name": "keyspace", "type": "str"}, {"name": "collection_name", "type": "str"}], "bases": ["DataAPIException"], "attributes": [{"name": "text", "type": "str", "description": "a text message about the exception."}, {"name": "keyspace", "type": "str", "description": "the keyspace where the collection was supposed to be."}, {"name": "namespace", "type": "str", "description": "an alias for 'keyspace'. *DEPRECATED*, removal in 2.0"}, {"name": "collection_name", "type": "str", "description": "the name of the expected collection."}], "gathered_types": ["DataAPIException"]}} +{"id": "astrapy.exceptions.CollectionNotFoundException.text", "text": "", "metadata": {"kind": "attribute", "name": "text", "path": "astrapy.exceptions.CollectionNotFoundException.text", "value": "text: str = text"}} +{"id": "astrapy.exceptions.CollectionNotFoundException.keyspace", "text": "", "metadata": {"kind": "attribute", "name": "keyspace", "path": "astrapy.exceptions.CollectionNotFoundException.keyspace", "value": "keyspace: str = keyspace"}} +{"id": "astrapy.exceptions.CollectionNotFoundException.namespace", "text": "", "metadata": {"kind": "attribute", "name": "namespace", "path": "astrapy.exceptions.CollectionNotFoundException.namespace", "value": "namespace: str = keyspace"}} +{"id": "astrapy.exceptions.CollectionNotFoundException.collection_name", "text": "", "metadata": {"kind": "attribute", "name": "collection_name", "path": "astrapy.exceptions.CollectionNotFoundException.collection_name", "value": "collection_name: str = collection_name"}} +{"id": "astrapy.exceptions.CollectionAlreadyExistsException", "text": "An operation expected a collection not to exist, yet it has\nbeen detected as pre-existing.", "metadata": {"kind": "class", "name": "CollectionAlreadyExistsException", "path": "astrapy.exceptions.CollectionAlreadyExistsException", "parameters": [{"name": "text", "type": "str"}, {"name": "keyspace", "type": "str"}, {"name": "collection_name", "type": "str"}], "bases": ["DataAPIException"], "attributes": [{"name": "text", "type": "str", "description": "a text message about the exception."}, {"name": "keyspace", "type": "str", "description": "the keyspace where the collection was expected not to exist."}, {"name": "namespace", "type": "str", "description": "an alias for 'keyspace'. *DEPRECATED*, removal in 2.0"}, {"name": "collection_name", "type": "str", "description": "the name of the collection."}], "gathered_types": ["DataAPIException"]}} +{"id": "astrapy.exceptions.CollectionAlreadyExistsException.text", "text": "", "metadata": {"kind": "attribute", "name": "text", "path": "astrapy.exceptions.CollectionAlreadyExistsException.text", "value": "text: str = text"}} +{"id": "astrapy.exceptions.CollectionAlreadyExistsException.keyspace", "text": "", "metadata": {"kind": "attribute", "name": "keyspace", "path": "astrapy.exceptions.CollectionAlreadyExistsException.keyspace", "value": "keyspace: str = keyspace"}} +{"id": "astrapy.exceptions.CollectionAlreadyExistsException.namespace", "text": "", "metadata": {"kind": "attribute", "name": "namespace", "path": "astrapy.exceptions.CollectionAlreadyExistsException.namespace", "value": "namespace: str = keyspace"}} +{"id": "astrapy.exceptions.CollectionAlreadyExistsException.collection_name", "text": "", "metadata": {"kind": "attribute", "name": "collection_name", "path": "astrapy.exceptions.CollectionAlreadyExistsException.collection_name", "value": "collection_name: str = collection_name"}} +{"id": "astrapy.exceptions.TooManyDocumentsToCountException", "text": "A `count_documents()` operation failed because the resulting number of documents\nexceeded either the upper bound set by the caller or the hard limit imposed\nby the Data API.", "metadata": {"kind": "class", "name": "TooManyDocumentsToCountException", "path": "astrapy.exceptions.TooManyDocumentsToCountException", "parameters": [{"name": "text", "type": "str"}, {"name": "server_max_count_exceeded", "type": "bool"}], "bases": ["DataAPIException"], "attributes": [{"name": "text", "type": "str", "description": "a text message about the exception."}, {"name": "server_max_count_exceeded", "type": "bool", "description": "True if the count limit imposed by the API\nis reached. In that case, increasing the upper bound in the method\ninvocation is of no help."}], "gathered_types": ["DataAPIException"]}} +{"id": "astrapy.exceptions.TooManyDocumentsToCountException.text", "text": "", "metadata": {"kind": "attribute", "name": "text", "path": "astrapy.exceptions.TooManyDocumentsToCountException.text", "value": "text: str = text"}} +{"id": "astrapy.exceptions.TooManyDocumentsToCountException.server_max_count_exceeded", "text": "", "metadata": {"kind": "attribute", "name": "server_max_count_exceeded", "path": "astrapy.exceptions.TooManyDocumentsToCountException.server_max_count_exceeded", "value": "server_max_count_exceeded: bool = server_max_count_exceeded"}} +{"id": "astrapy.exceptions.DataAPIFaultyResponseException", "text": "The Data API response is malformed in that it does not have\nexpected field(s), or they are of the wrong type.", "metadata": {"kind": "class", "name": "DataAPIFaultyResponseException", "path": "astrapy.exceptions.DataAPIFaultyResponseException", "parameters": [{"name": "text", "type": "str"}, {"name": "raw_response", "type": "dict[str, Any] | None"}], "bases": ["DataAPIException"], "attributes": [{"name": "text", "type": "str", "description": "a text message about the exception."}, {"name": "raw_response", "type": "dict[str, Any] | None", "description": "the response returned by the API in the form of a dict."}], "gathered_types": ["DataAPIException"]}} +{"id": "astrapy.exceptions.DataAPIFaultyResponseException.text", "text": "", "metadata": {"kind": "attribute", "name": "text", "path": "astrapy.exceptions.DataAPIFaultyResponseException.text", "value": "text: str = text"}} +{"id": "astrapy.exceptions.DataAPIFaultyResponseException.raw_response", "text": "", "metadata": {"kind": "attribute", "name": "raw_response", "path": "astrapy.exceptions.DataAPIFaultyResponseException.raw_response", "value": "raw_response: dict[str, Any] | None = raw_response"}} +{"id": "astrapy.exceptions.DataAPIResponseException", "text": "The Data API returned an HTTP 200 success response, which however\nreports about API-specific error(s), possibly alongside partial successes.\n\nThis exception is related to an operation that can have spanned several\nHTTP requests in sequence (e.g. a chunked insert_many). For this\nreason, it should be not thought as being in a 1:1 relation with\nactual API requests, rather with operations invoked by the user,\nsuch as the methods of the Collection object.", "metadata": {"kind": "class", "name": "DataAPIResponseException", "path": "astrapy.exceptions.DataAPIResponseException", "parameters": [{"name": "text", "type": "str | None"}, {"name": "error_descriptors", "type": "list[DataAPIErrorDescriptor]"}, {"name": "detailed_error_descriptors", "type": "list[DataAPIDetailedErrorDescriptor]"}], "bases": ["DataAPIException"], "attributes": [{"name": "text", "type": "str | None", "description": "a text message about the exception."}, {"name": "error_descriptors", "type": "list[DataAPIErrorDescriptor]", "description": "a list of all DataAPIErrorDescriptor objects\nfound across all requests involved in this exception, which are\npossibly more than one."}, {"name": "detailed_error_descriptors", "type": "list[DataAPIDetailedErrorDescriptor]", "description": "a list of DataAPIDetailedErrorDescriptor\nobjects, one for each of the requests performed during this operation.\nFor single-request methods, such as insert_one, this list always\nhas a single element."}], "gathered_types": ["DataAPIDetailedErrorDescriptor", "DataAPIErrorDescriptor", "DataAPIException"], "implemented_by": ["astrapy.exceptions.CumulativeOperationException", "astrapy.exceptions.BulkWriteException"]}} +{"id": "astrapy.exceptions.DataAPIResponseException.text", "text": "", "metadata": {"kind": "attribute", "name": "text", "path": "astrapy.exceptions.DataAPIResponseException.text", "value": "text: str | None = text"}} +{"id": "astrapy.exceptions.DataAPIResponseException.error_descriptors", "text": "", "metadata": {"kind": "attribute", "name": "error_descriptors", "path": "astrapy.exceptions.DataAPIResponseException.error_descriptors", "value": "error_descriptors: list[DataAPIErrorDescriptor] = error_descriptors", "gathered_types": ["DataAPIErrorDescriptor"]}} +{"id": "astrapy.exceptions.DataAPIResponseException.detailed_error_descriptors", "text": "", "metadata": {"kind": "attribute", "name": "detailed_error_descriptors", "path": "astrapy.exceptions.DataAPIResponseException.detailed_error_descriptors", "value": "detailed_error_descriptors: list[DataAPIDetailedErrorDescriptor] = detailed_error_descriptors", "gathered_types": ["DataAPIDetailedErrorDescriptor"]}} +{"id": "astrapy.exceptions.DataAPIResponseException.from_response", "text": "Parse a raw response from the API into this exception.", "metadata": {"kind": "function", "name": "from_response", "path": "astrapy.exceptions.DataAPIResponseException.from_response", "parameters": [{"name": "cls"}, {"name": "command", "type": "dict[str, Any] | None"}, {"name": "raw_response", "type": "dict[str, Any]"}, {"name": "kwargs", "default": "{}", "type": "Any"}], "returns": [{"type": "DataAPIResponseException"}], "gathered_types": ["astrapy.exceptions.DataAPIResponseException"]}} +{"id": "astrapy.exceptions.DataAPIResponseException.from_responses", "text": "Parse a list of raw responses from the API into this exception.", "metadata": {"kind": "function", "name": "from_responses", "path": "astrapy.exceptions.DataAPIResponseException.from_responses", "parameters": [{"name": "cls"}, {"name": "commands", "type": "list[dict[str, Any] | None]"}, {"name": "raw_responses", "type": "list[dict[str, Any]]"}, {"name": "kwargs", "default": "{}", "type": "Any"}], "returns": [{"type": "DataAPIResponseException"}], "gathered_types": ["astrapy.exceptions.DataAPIResponseException"]}} +{"id": "astrapy.exceptions.DataAPIResponseException.data_api_response_exception", "text": "Cast the exception, whatever the subclass, into this parent superclass.", "metadata": {"kind": "function", "name": "data_api_response_exception", "path": "astrapy.exceptions.DataAPIResponseException.data_api_response_exception", "returns": [{"type": "DataAPIResponseException"}], "gathered_types": ["astrapy.exceptions.DataAPIResponseException"]}} +{"id": "astrapy.exceptions.CumulativeOperationException", "text": "An exception of type DataAPIResponseException (see) occurred\nduring an operation that in general spans several requests.\nAs such, besides information on the error, it may have accumulated\na partial result from past successful Data API requests.", "metadata": {"kind": "class", "name": "CumulativeOperationException", "path": "astrapy.exceptions.CumulativeOperationException", "parameters": [{"name": "text", "type": "str | None"}, {"name": "error_descriptors", "type": "list[DataAPIErrorDescriptor]"}, {"name": "detailed_error_descriptors", "type": "list[DataAPIDetailedErrorDescriptor]"}], "bases": ["astrapy.exceptions.DataAPIResponseException"], "attributes": [{"name": "text", "type": "str | None", "description": "a text message about the exception."}, {"name": "error_descriptors", "type": "list[DataAPIErrorDescriptor]", "description": "a list of all DataAPIErrorDescriptor objects\nfound across all requests involved in this exception, which are\npossibly more than one."}, {"name": "detailed_error_descriptors", "type": "list[DataAPIDetailedErrorDescriptor]", "description": "a list of DataAPIDetailedErrorDescriptor\nobjects, one for each of the requests performed during this operation.\nFor single-request methods, such as insert_one, this list always\nhas a single element."}, {"name": "partial_result", "type": "OperationResult", "description": "an OperationResult object, just like the one that would\nbe the return value of the operation, had it succeeded completely."}], "gathered_types": ["DataAPIDetailedErrorDescriptor", "astrapy.exceptions.DataAPIResponseException", "DataAPIErrorDescriptor", "astrapy.results.OperationResult"], "implemented_by": ["astrapy.exceptions.InsertManyException", "astrapy.exceptions.DeleteManyException", "astrapy.exceptions.UpdateManyException"]}} +{"id": "astrapy.exceptions.CumulativeOperationException.partial_result", "text": "", "metadata": {"kind": "attribute", "name": "partial_result", "path": "astrapy.exceptions.CumulativeOperationException.partial_result", "value": "partial_result: OperationResult", "gathered_types": ["astrapy.results.OperationResult"]}} +{"id": "astrapy.exceptions.InsertManyException", "text": "An exception of type DataAPIResponseException (see) occurred\nduring an insert_many (that in general spans several requests).\nAs such, besides information on the error, it may have accumulated\na partial result from past successful Data API requests.", "metadata": {"kind": "class", "name": "InsertManyException", "path": "astrapy.exceptions.InsertManyException", "parameters": [{"name": "text", "type": "str"}, {"name": "partial_result", "type": "InsertManyResult"}, {"name": "pargs", "default": "()", "type": "Any"}, {"name": "kwargs", "default": "{}", "type": "Any"}], "bases": ["astrapy.exceptions.CumulativeOperationException"], "attributes": [{"name": "text", "type": "str | None", "description": "a text message about the exception."}, {"name": "error_descriptors", "type": "list[DataAPIErrorDescriptor]", "description": "a list of all DataAPIErrorDescriptor objects\nfound across all requests involved in this exception, which are\npossibly more than one."}, {"name": "detailed_error_descriptors", "type": "list[DataAPIDetailedErrorDescriptor]", "description": "a list of DataAPIDetailedErrorDescriptor\nobjects, one for each of the requests performed during this operation.\nFor single-request methods, such as insert_one, this list always\nhas a single element."}, {"name": "partial_result", "type": "InsertManyResult", "description": "an InsertManyResult object, just like the one that would\nbe the return value of the operation, had it succeeded completely."}], "gathered_types": ["DataAPIDetailedErrorDescriptor", "DataAPIErrorDescriptor", "astrapy.exceptions.CumulativeOperationException", "astrapy.results.InsertManyResult"]}} +{"id": "astrapy.exceptions.InsertManyException.partial_result", "text": "", "metadata": {"kind": "attribute", "name": "partial_result", "path": "astrapy.exceptions.InsertManyException.partial_result", "value": "partial_result: InsertManyResult = partial_result", "gathered_types": ["astrapy.results.InsertManyResult"]}} +{"id": "astrapy.exceptions.DeleteManyException", "text": "An exception of type DataAPIResponseException (see) occurred\nduring a delete_many (that in general spans several requests).\nAs such, besides information on the error, it may have accumulated\na partial result from past successful Data API requests.", "metadata": {"kind": "class", "name": "DeleteManyException", "path": "astrapy.exceptions.DeleteManyException", "parameters": [{"name": "text", "type": "str"}, {"name": "partial_result", "type": "DeleteResult"}, {"name": "pargs", "default": "()", "type": "Any"}, {"name": "kwargs", "default": "{}", "type": "Any"}], "bases": ["astrapy.exceptions.CumulativeOperationException"], "attributes": [{"name": "text", "type": "str | None", "description": "a text message about the exception."}, {"name": "error_descriptors", "type": "list[DataAPIErrorDescriptor]", "description": "a list of all DataAPIErrorDescriptor objects\nfound across all requests involved in this exception, which are\npossibly more than one."}, {"name": "detailed_error_descriptors", "type": "list[DataAPIDetailedErrorDescriptor]", "description": "a list of DataAPIDetailedErrorDescriptor\nobjects, one for each of the requests performed during this operation.\nFor single-request methods, such as insert_one, this list always\nhas a single element."}, {"name": "partial_result", "type": "DeleteResult", "description": "a DeleteResult object, just like the one that would\nbe the return value of the operation, had it succeeded completely."}], "gathered_types": ["DataAPIDetailedErrorDescriptor", "astrapy.results.DeleteResult", "DataAPIErrorDescriptor", "astrapy.exceptions.CumulativeOperationException"]}} +{"id": "astrapy.exceptions.DeleteManyException.partial_result", "text": "", "metadata": {"kind": "attribute", "name": "partial_result", "path": "astrapy.exceptions.DeleteManyException.partial_result", "value": "partial_result: DeleteResult = partial_result", "gathered_types": ["astrapy.results.DeleteResult"]}} +{"id": "astrapy.exceptions.UpdateManyException", "text": "An exception of type DataAPIResponseException (see) occurred\nduring an update_many (that in general spans several requests).\nAs such, besides information on the error, it may have accumulated\na partial result from past successful Data API requests.", "metadata": {"kind": "class", "name": "UpdateManyException", "path": "astrapy.exceptions.UpdateManyException", "parameters": [{"name": "text", "type": "str"}, {"name": "partial_result", "type": "UpdateResult"}, {"name": "pargs", "default": "()", "type": "Any"}, {"name": "kwargs", "default": "{}", "type": "Any"}], "bases": ["astrapy.exceptions.CumulativeOperationException"], "attributes": [{"name": "text", "type": "str | None", "description": "a text message about the exception."}, {"name": "error_descriptors", "type": "list[DataAPIErrorDescriptor]", "description": "a list of all DataAPIErrorDescriptor objects\nfound across all requests involved in this exception, which are\npossibly more than one."}, {"name": "detailed_error_descriptors", "type": "list[DataAPIDetailedErrorDescriptor]", "description": "a list of DataAPIDetailedErrorDescriptor\nobjects, one for each of the requests performed during this operation.\nFor single-request methods, such as insert_one, this list always\nhas a single element."}, {"name": "partial_result", "type": "UpdateResult", "description": "an UpdateResult object, just like the one that would\nbe the return value of the operation, had it succeeded completely."}], "gathered_types": ["DataAPIDetailedErrorDescriptor", "astrapy.results.UpdateResult", "DataAPIErrorDescriptor", "astrapy.exceptions.CumulativeOperationException"]}} +{"id": "astrapy.exceptions.UpdateManyException.partial_result", "text": "", "metadata": {"kind": "attribute", "name": "partial_result", "path": "astrapy.exceptions.UpdateManyException.partial_result", "value": "partial_result: UpdateResult = partial_result", "gathered_types": ["astrapy.results.UpdateResult"]}} +{"id": "astrapy.exceptions.BulkWriteException", "text": "An exception of type DataAPIResponseException (see) occurred\nduring a bulk_write of a list of operations.\nAs such, besides information on the error, it may have accumulated\na partial result from past successful operations.", "metadata": {"kind": "class", "name": "BulkWriteException", "path": "astrapy.exceptions.BulkWriteException", "parameters": [{"name": "text", "type": "str | None"}, {"name": "partial_result", "type": "BulkWriteResult"}, {"name": "exceptions", "type": "list[DataAPIResponseException]"}, {"name": "pargs", "default": "()", "type": "Any"}, {"name": "kwargs", "default": "{}", "type": "Any"}], "bases": ["astrapy.exceptions.DataAPIResponseException"], "attributes": [{"name": "text", "type": "str | None", "description": "a text message about the exception."}, {"name": "error_descriptors", "type": "list[DataAPIErrorDescriptor]", "description": "a list of all DataAPIErrorDescriptor objects\nfound across all requests involved in the first\noperation that has failed."}, {"name": "detailed_error_descriptors", "type": "list[DataAPIDetailedErrorDescriptor]", "description": "a list of DataAPIDetailedErrorDescriptor\nobjects, one for each of the requests performed during the first operation\nthat has failed."}, {"name": "partial_result", "type": "BulkWriteResult", "description": "a BulkWriteResult object, just like the one that would\nbe the return value of the operation, had it succeeded completely."}, {"name": "exceptions", "type": "list[DataAPIResponseException]", "description": "a list of DataAPIResponseException objects, one for each\noperation in the bulk that has failed. This information is made\navailable here since the top-level fields of this error\nonly surface the first such failure that is detected across the bulk.\nIn case of bulk_writes with ordered=True, this trivially contains\na single element, the same described by the top-level fields\ntext, error_descriptors and detailed_error_descriptors."}], "gathered_types": ["DataAPIDetailedErrorDescriptor", "DataAPIErrorDescriptor", "astrapy.exceptions.DataAPIResponseException", "astrapy.results.BulkWriteResult"]}} +{"id": "astrapy.exceptions.BulkWriteException.partial_result", "text": "", "metadata": {"kind": "attribute", "name": "partial_result", "path": "astrapy.exceptions.BulkWriteException.partial_result", "value": "partial_result: BulkWriteResult = partial_result", "gathered_types": ["astrapy.results.BulkWriteResult"]}} +{"id": "astrapy.exceptions.BulkWriteException.exceptions", "text": "", "metadata": {"kind": "attribute", "name": "exceptions", "path": "astrapy.exceptions.BulkWriteException.exceptions", "value": "exceptions: list[DataAPIResponseException] = exceptions", "gathered_types": ["astrapy.exceptions.DataAPIResponseException"]}} +{"id": "astrapy.exceptions.to_dataapi_timeout_exception", "text": "", "metadata": {"kind": "function", "name": "to_dataapi_timeout_exception", "path": "astrapy.exceptions.to_dataapi_timeout_exception", "parameters": [{"name": "httpx_timeout", "type": "httpx.TimeoutException"}], "returns": [{"type": "DataAPITimeoutException"}], "gathered_types": ["astrapy.exceptions.DataAPITimeoutException", "TimeoutException"]}} +{"id": "astrapy.exceptions.to_devopsapi_timeout_exception", "text": "", "metadata": {"kind": "function", "name": "to_devopsapi_timeout_exception", "path": "astrapy.exceptions.to_devopsapi_timeout_exception", "parameters": [{"name": "httpx_timeout", "type": "httpx.TimeoutException"}], "returns": [{"type": "DevOpsAPITimeoutException"}], "gathered_types": ["DevOpsAPITimeoutException", "TimeoutException"]}} +{"id": "astrapy.exceptions.base_timeout_info", "text": "", "metadata": {"kind": "function", "name": "base_timeout_info", "path": "astrapy.exceptions.base_timeout_info", "parameters": [{"name": "max_time_ms", "type": "int | None"}], "returns": [{"type": "TimeoutInfo | None"}], "gathered_types": ["astrapy.request_tools.TimeoutInfo"]}} +{"id": "astrapy.exceptions.MultiCallTimeoutManager", "text": "A helper class to keep track of timing and timeouts\nin a multi-call method context.", "metadata": {"kind": "class", "name": "MultiCallTimeoutManager", "path": "astrapy.exceptions.MultiCallTimeoutManager", "parameters": [{"name": "overall_max_time_ms", "type": "int | None", "description": "an optional max duration to track (milliseconds)", "default": null}], "attributes": [{"name": "overall_max_time_ms", "type": "int | None", "description": "an optional max duration to track (milliseconds)"}, {"name": "started_ms", "type": "int", "description": "timestamp of the instance construction (milliseconds)"}, {"name": "deadline_ms", "type": "int | None", "description": "optional deadline in milliseconds (computed by the class)."}]}} +{"id": "astrapy.exceptions.MultiCallTimeoutManager.overall_max_time_ms", "text": "", "metadata": {"kind": "attribute", "name": "overall_max_time_ms", "path": "astrapy.exceptions.MultiCallTimeoutManager.overall_max_time_ms", "value": "overall_max_time_ms: int | None = overall_max_time_ms"}} +{"id": "astrapy.exceptions.MultiCallTimeoutManager.started_ms", "text": "", "metadata": {"kind": "attribute", "name": "started_ms", "path": "astrapy.exceptions.MultiCallTimeoutManager.started_ms", "value": "started_ms: int = int(time.time() * 1000)"}} +{"id": "astrapy.exceptions.MultiCallTimeoutManager.deadline_ms", "text": "", "metadata": {"kind": "attribute", "name": "deadline_ms", "path": "astrapy.exceptions.MultiCallTimeoutManager.deadline_ms", "value": "deadline_ms: int | None"}} +{"id": "astrapy.exceptions.MultiCallTimeoutManager.dev_ops_api", "text": "", "metadata": {"kind": "attribute", "name": "dev_ops_api", "path": "astrapy.exceptions.MultiCallTimeoutManager.dev_ops_api", "value": "dev_ops_api = dev_ops_api"}} +{"id": "astrapy.exceptions.MultiCallTimeoutManager.remaining_timeout_ms", "text": "Ensure the deadline, if any, is not yet in the past.\nIf it is, raise an appropriate timeout error.\nIf not, return either None (if no timeout) or the remaining milliseconds.\nFor use within the multi-call method.", "metadata": {"kind": "function", "name": "remaining_timeout_ms", "path": "astrapy.exceptions.MultiCallTimeoutManager.remaining_timeout_ms", "returns": [{"type": "int | None"}]}} +{"id": "astrapy.exceptions.MultiCallTimeoutManager.remaining_timeout_info", "text": "Ensure the deadline, if any, is not yet in the past.\nIf it is, raise an appropriate timeout error.\nIt it is not, or there is no deadline, return a suitable TimeoutInfo\nfor use within the multi-call method.", "metadata": {"kind": "function", "name": "remaining_timeout_info", "path": "astrapy.exceptions.MultiCallTimeoutManager.remaining_timeout_info", "returns": [{"type": "TimeoutInfo | None"}], "gathered_types": ["astrapy.request_tools.TimeoutInfo"]}} +{"id": "astrapy.defaults", "text": "", "metadata": {"kind": "module", "name": "defaults", "path": "astrapy.defaults", "imports": {"annotations": "__future__.annotations"}, "properties": {"is_init_module": false, "is_package": false, "is_subpackage": false, "is_namespace_package": false, "is_namespace_subpackage": false}}} +{"id": "astrapy.defaults.DATA_API_ENVIRONMENT_PROD", "text": "", "metadata": {"kind": "attribute", "name": "DATA_API_ENVIRONMENT_PROD", "path": "astrapy.defaults.DATA_API_ENVIRONMENT_PROD", "value": "DATA_API_ENVIRONMENT_PROD = 'prod'"}} +{"id": "astrapy.defaults.DATA_API_ENVIRONMENT_DEV", "text": "", "metadata": {"kind": "attribute", "name": "DATA_API_ENVIRONMENT_DEV", "path": "astrapy.defaults.DATA_API_ENVIRONMENT_DEV", "value": "DATA_API_ENVIRONMENT_DEV = 'dev'"}} +{"id": "astrapy.defaults.DATA_API_ENVIRONMENT_TEST", "text": "", "metadata": {"kind": "attribute", "name": "DATA_API_ENVIRONMENT_TEST", "path": "astrapy.defaults.DATA_API_ENVIRONMENT_TEST", "value": "DATA_API_ENVIRONMENT_TEST = 'test'"}} +{"id": "astrapy.defaults.DATA_API_ENVIRONMENT_DSE", "text": "", "metadata": {"kind": "attribute", "name": "DATA_API_ENVIRONMENT_DSE", "path": "astrapy.defaults.DATA_API_ENVIRONMENT_DSE", "value": "DATA_API_ENVIRONMENT_DSE = 'dse'"}} +{"id": "astrapy.defaults.DATA_API_ENVIRONMENT_HCD", "text": "", "metadata": {"kind": "attribute", "name": "DATA_API_ENVIRONMENT_HCD", "path": "astrapy.defaults.DATA_API_ENVIRONMENT_HCD", "value": "DATA_API_ENVIRONMENT_HCD = 'hcd'"}} +{"id": "astrapy.defaults.DATA_API_ENVIRONMENT_CASSANDRA", "text": "", "metadata": {"kind": "attribute", "name": "DATA_API_ENVIRONMENT_CASSANDRA", "path": "astrapy.defaults.DATA_API_ENVIRONMENT_CASSANDRA", "value": "DATA_API_ENVIRONMENT_CASSANDRA = 'cassandra'"}} +{"id": "astrapy.defaults.DATA_API_ENVIRONMENT_OTHER", "text": "", "metadata": {"kind": "attribute", "name": "DATA_API_ENVIRONMENT_OTHER", "path": "astrapy.defaults.DATA_API_ENVIRONMENT_OTHER", "value": "DATA_API_ENVIRONMENT_OTHER = 'other'"}} +{"id": "astrapy.defaults.DEFAULT_ASTRA_DB_KEYSPACE", "text": "", "metadata": {"kind": "attribute", "name": "DEFAULT_ASTRA_DB_KEYSPACE", "path": "astrapy.defaults.DEFAULT_ASTRA_DB_KEYSPACE", "value": "DEFAULT_ASTRA_DB_KEYSPACE = 'default_keyspace'"}} +{"id": "astrapy.defaults.API_ENDPOINT_TEMPLATE_ENV_MAP", "text": "", "metadata": {"kind": "attribute", "name": "API_ENDPOINT_TEMPLATE_ENV_MAP", "path": "astrapy.defaults.API_ENDPOINT_TEMPLATE_ENV_MAP", "value": "API_ENDPOINT_TEMPLATE_ENV_MAP = {DATA_API_ENVIRONMENT_PROD: 'https://{database_id}-{region}.apps.astra.datastax.com', DATA_API_ENVIRONMENT_DEV: 'https://{database_id}-{region}.apps.astra-dev.datastax.com', DATA_API_ENVIRONMENT_TEST: 'https://{database_id}-{region}.apps.astra-test.datastax.com'}", "gathered_types": ["astrapy.defaults.DATA_API_ENVIRONMENT_PROD", "astrapy.defaults.DATA_API_ENVIRONMENT_DEV", "astrapy.defaults.DATA_API_ENVIRONMENT_TEST"]}} +{"id": "astrapy.defaults.API_PATH_ENV_MAP", "text": "", "metadata": {"kind": "attribute", "name": "API_PATH_ENV_MAP", "path": "astrapy.defaults.API_PATH_ENV_MAP", "value": "API_PATH_ENV_MAP = {DATA_API_ENVIRONMENT_PROD: '/api/json', DATA_API_ENVIRONMENT_DEV: '/api/json', DATA_API_ENVIRONMENT_TEST: '/api/json', DATA_API_ENVIRONMENT_DSE: '', DATA_API_ENVIRONMENT_HCD: '', DATA_API_ENVIRONMENT_CASSANDRA: '', DATA_API_ENVIRONMENT_OTHER: ''}", "gathered_types": ["astrapy.defaults.DATA_API_ENVIRONMENT_DEV", "astrapy.defaults.DATA_API_ENVIRONMENT_TEST", "astrapy.defaults.DATA_API_ENVIRONMENT_DSE", "astrapy.defaults.DATA_API_ENVIRONMENT_CASSANDRA", "astrapy.defaults.DATA_API_ENVIRONMENT_OTHER", "astrapy.defaults.DATA_API_ENVIRONMENT_PROD", "astrapy.defaults.DATA_API_ENVIRONMENT_HCD"]}} +{"id": "astrapy.defaults.API_VERSION_ENV_MAP", "text": "", "metadata": {"kind": "attribute", "name": "API_VERSION_ENV_MAP", "path": "astrapy.defaults.API_VERSION_ENV_MAP", "value": "API_VERSION_ENV_MAP = {DATA_API_ENVIRONMENT_PROD: '/v1', DATA_API_ENVIRONMENT_DEV: '/v1', DATA_API_ENVIRONMENT_TEST: '/v1', DATA_API_ENVIRONMENT_DSE: 'v1', DATA_API_ENVIRONMENT_HCD: 'v1', DATA_API_ENVIRONMENT_CASSANDRA: 'v1', DATA_API_ENVIRONMENT_OTHER: 'v1'}", "gathered_types": ["astrapy.defaults.DATA_API_ENVIRONMENT_DEV", "astrapy.defaults.DATA_API_ENVIRONMENT_TEST", "astrapy.defaults.DATA_API_ENVIRONMENT_DSE", "astrapy.defaults.DATA_API_ENVIRONMENT_CASSANDRA", "astrapy.defaults.DATA_API_ENVIRONMENT_OTHER", "astrapy.defaults.DATA_API_ENVIRONMENT_PROD", "astrapy.defaults.DATA_API_ENVIRONMENT_HCD"]}} +{"id": "astrapy.defaults.DEFAULT_INSERT_MANY_CHUNK_SIZE", "text": "", "metadata": {"kind": "attribute", "name": "DEFAULT_INSERT_MANY_CHUNK_SIZE", "path": "astrapy.defaults.DEFAULT_INSERT_MANY_CHUNK_SIZE", "value": "DEFAULT_INSERT_MANY_CHUNK_SIZE = 50"}} +{"id": "astrapy.defaults.DEFAULT_INSERT_MANY_CONCURRENCY", "text": "", "metadata": {"kind": "attribute", "name": "DEFAULT_INSERT_MANY_CONCURRENCY", "path": "astrapy.defaults.DEFAULT_INSERT_MANY_CONCURRENCY", "value": "DEFAULT_INSERT_MANY_CONCURRENCY = 20"}} +{"id": "astrapy.defaults.DEFAULT_BULK_WRITE_CONCURRENCY", "text": "", "metadata": {"kind": "attribute", "name": "DEFAULT_BULK_WRITE_CONCURRENCY", "path": "astrapy.defaults.DEFAULT_BULK_WRITE_CONCURRENCY", "value": "DEFAULT_BULK_WRITE_CONCURRENCY = 10"}} +{"id": "astrapy.defaults.DEFAULT_REQUEST_TIMEOUT_MS", "text": "", "metadata": {"kind": "attribute", "name": "DEFAULT_REQUEST_TIMEOUT_MS", "path": "astrapy.defaults.DEFAULT_REQUEST_TIMEOUT_MS", "value": "DEFAULT_REQUEST_TIMEOUT_MS = 30000"}} +{"id": "astrapy.defaults.DEFAULT_DATA_API_AUTH_HEADER", "text": "", "metadata": {"kind": "attribute", "name": "DEFAULT_DATA_API_AUTH_HEADER", "path": "astrapy.defaults.DEFAULT_DATA_API_AUTH_HEADER", "value": "DEFAULT_DATA_API_AUTH_HEADER = 'Token'"}} +{"id": "astrapy.defaults.EMBEDDING_HEADER_AWS_ACCESS_ID", "text": "", "metadata": {"kind": "attribute", "name": "EMBEDDING_HEADER_AWS_ACCESS_ID", "path": "astrapy.defaults.EMBEDDING_HEADER_AWS_ACCESS_ID", "value": "EMBEDDING_HEADER_AWS_ACCESS_ID = 'X-Embedding-Access-Id'"}} +{"id": "astrapy.defaults.EMBEDDING_HEADER_AWS_SECRET_ID", "text": "", "metadata": {"kind": "attribute", "name": "EMBEDDING_HEADER_AWS_SECRET_ID", "path": "astrapy.defaults.EMBEDDING_HEADER_AWS_SECRET_ID", "value": "EMBEDDING_HEADER_AWS_SECRET_ID = 'X-Embedding-Secret-Id'"}} +{"id": "astrapy.defaults.EMBEDDING_HEADER_API_KEY", "text": "", "metadata": {"kind": "attribute", "name": "EMBEDDING_HEADER_API_KEY", "path": "astrapy.defaults.EMBEDDING_HEADER_API_KEY", "value": "EMBEDDING_HEADER_API_KEY = 'X-Embedding-Api-Key'"}} +{"id": "astrapy.defaults.DEFAULT_DEV_OPS_AUTH_HEADER", "text": "", "metadata": {"kind": "attribute", "name": "DEFAULT_DEV_OPS_AUTH_HEADER", "path": "astrapy.defaults.DEFAULT_DEV_OPS_AUTH_HEADER", "value": "DEFAULT_DEV_OPS_AUTH_HEADER = 'Authorization'"}} +{"id": "astrapy.defaults.DEFAULT_DEV_OPS_AUTH_PREFIX", "text": "", "metadata": {"kind": "attribute", "name": "DEFAULT_DEV_OPS_AUTH_PREFIX", "path": "astrapy.defaults.DEFAULT_DEV_OPS_AUTH_PREFIX", "value": "DEFAULT_DEV_OPS_AUTH_PREFIX = 'Bearer '"}} +{"id": "astrapy.defaults.DEV_OPS_KEYSPACE_POLL_INTERVAL_S", "text": "", "metadata": {"kind": "attribute", "name": "DEV_OPS_KEYSPACE_POLL_INTERVAL_S", "path": "astrapy.defaults.DEV_OPS_KEYSPACE_POLL_INTERVAL_S", "value": "DEV_OPS_KEYSPACE_POLL_INTERVAL_S = 2"}} +{"id": "astrapy.defaults.DEV_OPS_DATABASE_POLL_INTERVAL_S", "text": "", "metadata": {"kind": "attribute", "name": "DEV_OPS_DATABASE_POLL_INTERVAL_S", "path": "astrapy.defaults.DEV_OPS_DATABASE_POLL_INTERVAL_S", "value": "DEV_OPS_DATABASE_POLL_INTERVAL_S = 15"}} +{"id": "astrapy.defaults.DEV_OPS_DATABASE_STATUS_MAINTENANCE", "text": "", "metadata": {"kind": "attribute", "name": "DEV_OPS_DATABASE_STATUS_MAINTENANCE", "path": "astrapy.defaults.DEV_OPS_DATABASE_STATUS_MAINTENANCE", "value": "DEV_OPS_DATABASE_STATUS_MAINTENANCE = 'MAINTENANCE'"}} +{"id": "astrapy.defaults.DEV_OPS_DATABASE_STATUS_ACTIVE", "text": "", "metadata": {"kind": "attribute", "name": "DEV_OPS_DATABASE_STATUS_ACTIVE", "path": "astrapy.defaults.DEV_OPS_DATABASE_STATUS_ACTIVE", "value": "DEV_OPS_DATABASE_STATUS_ACTIVE = 'ACTIVE'"}} +{"id": "astrapy.defaults.DEV_OPS_DATABASE_STATUS_PENDING", "text": "", "metadata": {"kind": "attribute", "name": "DEV_OPS_DATABASE_STATUS_PENDING", "path": "astrapy.defaults.DEV_OPS_DATABASE_STATUS_PENDING", "value": "DEV_OPS_DATABASE_STATUS_PENDING = 'PENDING'"}} +{"id": "astrapy.defaults.DEV_OPS_DATABASE_STATUS_INITIALIZING", "text": "", "metadata": {"kind": "attribute", "name": "DEV_OPS_DATABASE_STATUS_INITIALIZING", "path": "astrapy.defaults.DEV_OPS_DATABASE_STATUS_INITIALIZING", "value": "DEV_OPS_DATABASE_STATUS_INITIALIZING = 'INITIALIZING'"}} +{"id": "astrapy.defaults.DEV_OPS_DATABASE_STATUS_ERROR", "text": "", "metadata": {"kind": "attribute", "name": "DEV_OPS_DATABASE_STATUS_ERROR", "path": "astrapy.defaults.DEV_OPS_DATABASE_STATUS_ERROR", "value": "DEV_OPS_DATABASE_STATUS_ERROR = 'ERROR'"}} +{"id": "astrapy.defaults.DEV_OPS_DATABASE_STATUS_TERMINATING", "text": "", "metadata": {"kind": "attribute", "name": "DEV_OPS_DATABASE_STATUS_TERMINATING", "path": "astrapy.defaults.DEV_OPS_DATABASE_STATUS_TERMINATING", "value": "DEV_OPS_DATABASE_STATUS_TERMINATING = 'TERMINATING'"}} +{"id": "astrapy.defaults.DEV_OPS_URL_ENV_MAP", "text": "", "metadata": {"kind": "attribute", "name": "DEV_OPS_URL_ENV_MAP", "path": "astrapy.defaults.DEV_OPS_URL_ENV_MAP", "value": "DEV_OPS_URL_ENV_MAP = {DATA_API_ENVIRONMENT_PROD: 'https://api.astra.datastax.com', DATA_API_ENVIRONMENT_DEV: 'https://api.dev.cloud.datastax.com', DATA_API_ENVIRONMENT_TEST: 'https://api.test.cloud.datastax.com'}", "gathered_types": ["astrapy.defaults.DATA_API_ENVIRONMENT_PROD", "astrapy.defaults.DATA_API_ENVIRONMENT_DEV", "astrapy.defaults.DATA_API_ENVIRONMENT_TEST"]}} +{"id": "astrapy.defaults.DEV_OPS_VERSION_ENV_MAP", "text": "", "metadata": {"kind": "attribute", "name": "DEV_OPS_VERSION_ENV_MAP", "path": "astrapy.defaults.DEV_OPS_VERSION_ENV_MAP", "value": "DEV_OPS_VERSION_ENV_MAP = {DATA_API_ENVIRONMENT_PROD: 'v2', DATA_API_ENVIRONMENT_DEV: 'v2', DATA_API_ENVIRONMENT_TEST: 'v2'}", "gathered_types": ["astrapy.defaults.DATA_API_ENVIRONMENT_PROD", "astrapy.defaults.DATA_API_ENVIRONMENT_DEV", "astrapy.defaults.DATA_API_ENVIRONMENT_TEST"]}} +{"id": "astrapy.defaults.DEV_OPS_RESPONSE_HTTP_ACCEPTED", "text": "", "metadata": {"kind": "attribute", "name": "DEV_OPS_RESPONSE_HTTP_ACCEPTED", "path": "astrapy.defaults.DEV_OPS_RESPONSE_HTTP_ACCEPTED", "value": "DEV_OPS_RESPONSE_HTTP_ACCEPTED = 202"}} +{"id": "astrapy.defaults.DEV_OPS_RESPONSE_HTTP_CREATED", "text": "", "metadata": {"kind": "attribute", "name": "DEV_OPS_RESPONSE_HTTP_CREATED", "path": "astrapy.defaults.DEV_OPS_RESPONSE_HTTP_CREATED", "value": "DEV_OPS_RESPONSE_HTTP_CREATED = 201"}} +{"id": "astrapy.defaults.DEV_OPS_DEFAULT_DATABASES_PAGE_SIZE", "text": "", "metadata": {"kind": "attribute", "name": "DEV_OPS_DEFAULT_DATABASES_PAGE_SIZE", "path": "astrapy.defaults.DEV_OPS_DEFAULT_DATABASES_PAGE_SIZE", "value": "DEV_OPS_DEFAULT_DATABASES_PAGE_SIZE = 25"}} +{"id": "astrapy.defaults.SECRETS_REDACT_ENDING", "text": "", "metadata": {"kind": "attribute", "name": "SECRETS_REDACT_ENDING", "path": "astrapy.defaults.SECRETS_REDACT_ENDING", "value": "SECRETS_REDACT_ENDING = '...'"}} +{"id": "astrapy.defaults.SECRETS_REDACT_CHAR", "text": "", "metadata": {"kind": "attribute", "name": "SECRETS_REDACT_CHAR", "path": "astrapy.defaults.SECRETS_REDACT_CHAR", "value": "SECRETS_REDACT_CHAR = '*'"}} +{"id": "astrapy.defaults.SECRETS_REDACT_ENDING_LENGTH", "text": "", "metadata": {"kind": "attribute", "name": "SECRETS_REDACT_ENDING_LENGTH", "path": "astrapy.defaults.SECRETS_REDACT_ENDING_LENGTH", "value": "SECRETS_REDACT_ENDING_LENGTH = 3"}} +{"id": "astrapy.defaults.HEADER_REDACT_PLACEHOLDER", "text": "", "metadata": {"kind": "attribute", "name": "HEADER_REDACT_PLACEHOLDER", "path": "astrapy.defaults.HEADER_REDACT_PLACEHOLDER", "value": "HEADER_REDACT_PLACEHOLDER = '***'"}} +{"id": "astrapy.defaults.DEFAULT_REDACTED_HEADER_NAMES", "text": "", "metadata": {"kind": "attribute", "name": "DEFAULT_REDACTED_HEADER_NAMES", "path": "astrapy.defaults.DEFAULT_REDACTED_HEADER_NAMES", "value": "DEFAULT_REDACTED_HEADER_NAMES = {DEFAULT_DATA_API_AUTH_HEADER, DEFAULT_DEV_OPS_AUTH_HEADER, EMBEDDING_HEADER_AWS_ACCESS_ID, EMBEDDING_HEADER_AWS_SECRET_ID, EMBEDDING_HEADER_API_KEY}", "gathered_types": ["astrapy.defaults.EMBEDDING_HEADER_API_KEY", "astrapy.defaults.EMBEDDING_HEADER_AWS_SECRET_ID", "astrapy.defaults.DEFAULT_DATA_API_AUTH_HEADER", "astrapy.defaults.EMBEDDING_HEADER_AWS_ACCESS_ID", "astrapy.core.defaults.DEFAULT_DEV_OPS_AUTH_HEADER"]}} +{"id": "astrapy.defaults.NAMESPACE_DEPRECATION_NOTICE_METHOD", "text": "", "metadata": {"kind": "attribute", "name": "NAMESPACE_DEPRECATION_NOTICE_METHOD", "path": "astrapy.defaults.NAMESPACE_DEPRECATION_NOTICE_METHOD", "value": "NAMESPACE_DEPRECATION_NOTICE_METHOD = \"The term 'namespace' is being replaced by 'keyspace' throughout the Data API and the clients. Please adapt method and parameter names consistently (examples: `db_admin.findNamespaces` => `db_admin.findKeyspaces`; `collection.namespace` => `collection.keyspace`; `database.list_collections(namespace=...)` => `database.list_collections(keyspace=...)`). See https://docs.datastax.com/en/astra-db-serverless/api-reference/client-versions.html#version-1-5 for more information.\""}} +{"id": "astrapy.defaults.NAMESPACE_DEPRECATION_NOTICE_UPDATEDBNS_SUBJECT", "text": "", "metadata": {"kind": "attribute", "name": "NAMESPACE_DEPRECATION_NOTICE_UPDATEDBNS_SUBJECT", "path": "astrapy.defaults.NAMESPACE_DEPRECATION_NOTICE_UPDATEDBNS_SUBJECT", "value": "NAMESPACE_DEPRECATION_NOTICE_UPDATEDBNS_SUBJECT = 'Parameter `update_db_namespace`'"}} +{"id": "astrapy.defaults.NAMESPACE_DEPRECATION_NOTICE_UPDATEDBNS_DETAILS", "text": "", "metadata": {"kind": "attribute", "name": "NAMESPACE_DEPRECATION_NOTICE_UPDATEDBNS_DETAILS", "path": "astrapy.defaults.NAMESPACE_DEPRECATION_NOTICE_UPDATEDBNS_DETAILS", "value": "NAMESPACE_DEPRECATION_NOTICE_UPDATEDBNS_DETAILS = 'Please replace the parameter with `update_db_keyspace`. See https://docs.datastax.com/en/astra-db-serverless/api-reference/client-versions.html#version-1-5 for more information.'"}} +{"id": "astrapy.defaults.NAMESPACE_DEPRECATION_NOTICE_NS_SUBJECT", "text": "", "metadata": {"kind": "attribute", "name": "NAMESPACE_DEPRECATION_NOTICE_NS_SUBJECT", "path": "astrapy.defaults.NAMESPACE_DEPRECATION_NOTICE_NS_SUBJECT", "value": "NAMESPACE_DEPRECATION_NOTICE_NS_SUBJECT = 'Parameter `namespace`'"}} +{"id": "astrapy.defaults.NAMESPACE_DEPRECATION_NOTICE_NS_DETAILS", "text": "", "metadata": {"kind": "attribute", "name": "NAMESPACE_DEPRECATION_NOTICE_NS_DETAILS", "path": "astrapy.defaults.NAMESPACE_DEPRECATION_NOTICE_NS_DETAILS", "value": "NAMESPACE_DEPRECATION_NOTICE_NS_DETAILS = 'Please replace the parameter with `keyspace`. See https://docs.datastax.com/en/astra-db-serverless/api-reference/client-versions.html#version-1-5 for more information.'"}} +{"id": "astrapy.defaults.SET_CALLER_DEPRECATION_NOTICE", "text": "", "metadata": {"kind": "attribute", "name": "SET_CALLER_DEPRECATION_NOTICE", "path": "astrapy.defaults.SET_CALLER_DEPRECATION_NOTICE", "value": "SET_CALLER_DEPRECATION_NOTICE = 'Please provide the caller(s) at constructor time through the `callers` list parameter.'"}} +{"id": "astrapy.defaults.CALLER_NAME_VERSION_DEPRECATION_NOTICE_DETAILS", "text": "", "metadata": {"kind": "attribute", "name": "CALLER_NAME_VERSION_DEPRECATION_NOTICE_DETAILS", "path": "astrapy.defaults.CALLER_NAME_VERSION_DEPRECATION_NOTICE_DETAILS", "value": "CALLER_NAME_VERSION_DEPRECATION_NOTICE_DETAILS = \"Please pass any caller identities as the `callers` constructor parameter: `callers=[('cname1', 'cversion1'), ('cname2', 'cversion2'), ...]`.\""}} +{"id": "astrapy.ids", "text": "", "metadata": {"kind": "module", "name": "ids", "path": "astrapy.ids", "imports": {"annotations": "__future__.annotations", "UUID": "uuid.UUID", "uuid1": "uuid.uuid1", "uuid3": "uuid.uuid3", "uuid4": "uuid.uuid4", "uuid5": "uuid.uuid5", "ObjectId": "bson.objectid.ObjectId", "uuid6": "uuid6.uuid6", "uuid7": "uuid6.uuid7", "uuid8": "uuid6.uuid8"}, "exports": ["uuid6", "uuid4", "uuid1", "uuid7", "UUID", "ObjectId", "uuid3", "uuid8", "uuid5"], "properties": {"is_init_module": false, "is_package": false, "is_subpackage": false, "is_namespace_package": false, "is_namespace_subpackage": false}}} +{"id": "astrapy.user_agents", "text": "", "metadata": {"kind": "module", "name": "user_agents", "path": "astrapy.user_agents", "imports": {"annotations": "__future__.annotations", "Sequence": "typing.Sequence", "__version__": "astrapy.__version__", "CallerType": "astrapy.constants.CallerType"}, "properties": {"is_init_module": false, "is_package": false, "is_subpackage": false, "is_namespace_package": false, "is_namespace_subpackage": false}}} +{"id": "astrapy.user_agents.detect_astrapy_user_agent", "text": "", "metadata": {"kind": "function", "name": "detect_astrapy_user_agent", "path": "astrapy.user_agents.detect_astrapy_user_agent", "returns": [{"type": "CallerType"}], "gathered_types": ["astrapy.constants.CallerType"]}} +{"id": "astrapy.user_agents.compose_user_agent_string", "text": "", "metadata": {"kind": "function", "name": "compose_user_agent_string", "path": "astrapy.user_agents.compose_user_agent_string", "parameters": [{"name": "caller_name", "type": "str | None"}, {"name": "caller_version", "type": "str | None"}], "returns": [{"type": "str | None"}]}} +{"id": "astrapy.user_agents.compose_full_user_agent", "text": "", "metadata": {"kind": "function", "name": "compose_full_user_agent", "path": "astrapy.user_agents.compose_full_user_agent", "parameters": [{"name": "callers", "type": "Sequence[CallerType]"}], "returns": [{"type": "str | None"}], "gathered_types": ["astrapy.constants.CallerType"]}} +{"id": "astrapy.api_commander", "text": "", "metadata": {"kind": "module", "name": "api_commander", "path": "astrapy.api_commander", "imports": {"annotations": "__future__.annotations", "json": "json", "logging": "logging", "TracebackType": "types.TracebackType", "TYPE_CHECKING": "typing.TYPE_CHECKING", "Any": "typing.Any", "Dict": "typing.Dict", "Iterable": "typing.Iterable", "Sequence": "typing.Sequence", "cast": "typing.cast", "httpx": "httpx", "CallerType": "astrapy.constants.CallerType", "DEFAULT_REDACTED_HEADER_NAMES": "astrapy.defaults.DEFAULT_REDACTED_HEADER_NAMES", "DEFAULT_REQUEST_TIMEOUT_MS": "astrapy.defaults.DEFAULT_REQUEST_TIMEOUT_MS", "HEADER_REDACT_PLACEHOLDER": "astrapy.defaults.HEADER_REDACT_PLACEHOLDER", "DataAPIFaultyResponseException": "astrapy.exceptions.DataAPIFaultyResponseException", "DataAPIHttpException": "astrapy.exceptions.DataAPIHttpException", "DataAPIResponseException": "astrapy.exceptions.DataAPIResponseException", "DevOpsAPIFaultyResponseException": "astrapy.exceptions.DevOpsAPIFaultyResponseException", "DevOpsAPIHttpException": "astrapy.exceptions.DevOpsAPIHttpException", "DevOpsAPIResponseException": "astrapy.exceptions.DevOpsAPIResponseException", "to_dataapi_timeout_exception": "astrapy.exceptions.to_dataapi_timeout_exception", "to_devopsapi_timeout_exception": "astrapy.exceptions.to_devopsapi_timeout_exception", "HttpMethod": "astrapy.request_tools.HttpMethod", "log_httpx_request": "astrapy.request_tools.log_httpx_request", "log_httpx_response": "astrapy.request_tools.log_httpx_response", "to_httpx_timeout": "astrapy.request_tools.to_httpx_timeout", "normalize_for_api": "astrapy.transform_payload.normalize_for_api", "restore_from_api": "astrapy.transform_payload.restore_from_api", "compose_full_user_agent": "astrapy.user_agents.compose_full_user_agent", "detect_astrapy_user_agent": "astrapy.user_agents.detect_astrapy_user_agent", "TimeoutInfoWideType": "astrapy.request_tools.TimeoutInfoWideType"}, "properties": {"is_init_module": false, "is_package": false, "is_subpackage": false, "is_namespace_package": false, "is_namespace_subpackage": false}}} +{"id": "astrapy.api_commander.user_agent_astrapy", "text": "", "metadata": {"kind": "attribute", "name": "user_agent_astrapy", "path": "astrapy.api_commander.user_agent_astrapy", "value": "user_agent_astrapy = detect_astrapy_user_agent()"}} +{"id": "astrapy.api_commander.logger", "text": "", "metadata": {"kind": "attribute", "name": "logger", "path": "astrapy.api_commander.logger", "value": "logger = logging.getLogger(__name__)", "gathered_types": ["__name__"]}} +{"id": "astrapy.api_commander.APICommander", "text": "", "metadata": {"kind": "class", "name": "APICommander", "path": "astrapy.api_commander.APICommander", "parameters": [{"name": "api_endpoint", "type": "str"}, {"name": "path", "type": "str"}, {"name": "headers", "default": "{}", "type": "dict[str, str | None]"}, {"name": "callers", "default": "[]", "type": "Sequence[CallerType]"}, {"name": "redacted_header_names", "default": "DEFAULT_REDACTED_HEADER_NAMES", "type": "Iterable[str]"}, {"name": "dev_ops_api", "default": "False", "type": "bool"}], "gathered_types": ["astrapy.defaults.DEFAULT_REDACTED_HEADER_NAMES", "astrapy.constants.CallerType"]}} +{"id": "astrapy.api_commander.APICommander.client", "text": "", "metadata": {"kind": "attribute", "name": "client", "path": "astrapy.api_commander.APICommander.client", "value": "client = httpx.Client()", "gathered_types": ["Client"]}} +{"id": "astrapy.api_commander.APICommander.async_client", "text": "", "metadata": {"kind": "attribute", "name": "async_client", "path": "astrapy.api_commander.APICommander.async_client", "value": "async_client = httpx.AsyncClient()", "gathered_types": ["AsyncClient"]}} +{"id": "astrapy.api_commander.APICommander.api_endpoint", "text": "", "metadata": {"kind": "attribute", "name": "api_endpoint", "path": "astrapy.api_commander.APICommander.api_endpoint", "value": "api_endpoint = api_endpoint.rstrip('/')"}} +{"id": "astrapy.api_commander.APICommander.path", "text": "", "metadata": {"kind": "attribute", "name": "path", "path": "astrapy.api_commander.APICommander.path", "value": "path = path.lstrip('/')"}} +{"id": "astrapy.api_commander.APICommander.headers", "text": "", "metadata": {"kind": "attribute", "name": "headers", "path": "astrapy.api_commander.APICommander.headers", "value": "headers = headers"}} +{"id": "astrapy.api_commander.APICommander.callers", "text": "", "metadata": {"kind": "attribute", "name": "callers", "path": "astrapy.api_commander.APICommander.callers", "value": "callers = callers"}} +{"id": "astrapy.api_commander.APICommander.redacted_header_names", "text": "", "metadata": {"kind": "attribute", "name": "redacted_header_names", "path": "astrapy.api_commander.APICommander.redacted_header_names", "value": "redacted_header_names = set(redacted_header_names)"}} +{"id": "astrapy.api_commander.APICommander.dev_ops_api", "text": "", "metadata": {"kind": "attribute", "name": "dev_ops_api", "path": "astrapy.api_commander.APICommander.dev_ops_api", "value": "dev_ops_api = dev_ops_api"}} +{"id": "astrapy.api_commander.APICommander.caller_header", "text": "", "metadata": {"kind": "attribute", "name": "caller_header", "path": "astrapy.api_commander.APICommander.caller_header", "value": "caller_header: dict[str, str] = {'User-Agent': full_user_agent_string} if full_user_agent_string else {}"}} +{"id": "astrapy.api_commander.APICommander.full_headers", "text": "", "metadata": {"kind": "attribute", "name": "full_headers", "path": "astrapy.api_commander.APICommander.full_headers", "value": "full_headers: dict[str, str] = {None: {k: vfor (k, v) in self.headers.items() if v is not None}, None: self.caller_header, None: {'Content-Type': 'application/json'}}"}} +{"id": "astrapy.api_commander.APICommander.full_path", "text": "", "metadata": {"kind": "attribute", "name": "full_path", "path": "astrapy.api_commander.APICommander.full_path", "value": "full_path = '/'.join([self.api_endpoint, self.path]).rstrip('/')"}} +{"id": "astrapy.api_commander.APICommander.raw_request", "text": "", "metadata": {"kind": "function", "name": "raw_request", "path": "astrapy.api_commander.APICommander.raw_request", "parameters": [{"name": "http_method", "default": "HttpMethod.POST", "type": "str"}, {"name": "payload", "default": "None", "type": "dict[str, Any] | None"}, {"name": "additional_path", "default": "None", "type": "str | None"}, {"name": "request_params", "default": "{}", "type": "dict[str, Any]"}, {"name": "raise_api_errors", "default": "True", "type": "bool"}, {"name": "timeout_info", "default": "None", "type": "TimeoutInfoWideType"}], "returns": [{"type": "httpx.Response"}], "gathered_types": ["astrapy.core.utils.TimeoutInfoWideType", "astrapy.request_tools.HttpMethod", "POST", "Response"]}} +{"id": "astrapy.api_commander.APICommander.async_raw_request", "text": "", "metadata": {"kind": "function", "name": "async_raw_request", "path": "astrapy.api_commander.APICommander.async_raw_request", "parameters": [{"name": "http_method", "default": "HttpMethod.POST", "type": "str"}, {"name": "payload", "default": "None", "type": "dict[str, Any] | None"}, {"name": "additional_path", "default": "None", "type": "str | None"}, {"name": "request_params", "default": "{}", "type": "dict[str, Any]"}, {"name": "raise_api_errors", "default": "True", "type": "bool"}, {"name": "timeout_info", "default": "None", "type": "TimeoutInfoWideType"}], "returns": [{"type": "httpx.Response"}], "gathered_types": ["astrapy.core.utils.TimeoutInfoWideType", "astrapy.request_tools.HttpMethod", "POST", "Response"]}} +{"id": "astrapy.api_commander.APICommander.request", "text": "", "metadata": {"kind": "function", "name": "request", "path": "astrapy.api_commander.APICommander.request", "parameters": [{"name": "http_method", "default": "HttpMethod.POST", "type": "str"}, {"name": "payload", "default": "None", "type": "dict[str, Any] | None"}, {"name": "additional_path", "default": "None", "type": "str | None"}, {"name": "request_params", "default": "{}", "type": "dict[str, Any]"}, {"name": "raise_api_errors", "default": "True", "type": "bool"}, {"name": "timeout_info", "default": "None", "type": "TimeoutInfoWideType"}], "returns": [{"type": "dict[str, Any]"}], "gathered_types": ["astrapy.core.utils.TimeoutInfoWideType", "astrapy.request_tools.HttpMethod", "POST"]}} +{"id": "astrapy.api_commander.APICommander.async_request", "text": "", "metadata": {"kind": "function", "name": "async_request", "path": "astrapy.api_commander.APICommander.async_request", "parameters": [{"name": "http_method", "default": "HttpMethod.POST", "type": "str"}, {"name": "payload", "default": "None", "type": "dict[str, Any] | None"}, {"name": "additional_path", "default": "None", "type": "str | None"}, {"name": "request_params", "default": "{}", "type": "dict[str, Any]"}, {"name": "raise_api_errors", "default": "True", "type": "bool"}, {"name": "timeout_info", "default": "None", "type": "TimeoutInfoWideType"}], "returns": [{"type": "dict[str, Any]"}], "gathered_types": ["astrapy.core.utils.TimeoutInfoWideType", "astrapy.request_tools.HttpMethod", "POST"]}} +{"id": "astrapy.info", "text": "", "metadata": {"kind": "module", "name": "info", "path": "astrapy.info", "imports": {"annotations": "__future__.annotations", "warnings": "warnings", "dataclass": "dataclasses.dataclass", "Any": "typing.Any"}, "properties": {"is_init_module": false, "is_package": false, "is_subpackage": false, "is_namespace_package": false, "is_namespace_subpackage": false}}} +{"id": "astrapy.info.DatabaseInfo", "text": "Represents the identifying information for a database,\nincluding the region the connection is established to.", "metadata": {"kind": "class", "name": "DatabaseInfo", "path": "astrapy.info.DatabaseInfo", "parameters": [{"name": "id", "type": "str"}, {"name": "region", "type": "str"}, {"name": "keyspace", "type": "str | None"}, {"name": "namespace", "type": "str | None"}, {"name": "name", "type": "str"}, {"name": "environment", "type": "str"}, {"name": "raw_info", "type": "dict[str, Any] | None"}], "attributes": [{"name": "id", "type": "str", "description": "the database ID."}, {"name": "region", "type": "str", "description": "the ID of the region through which the connection to DB is done."}, {"name": "keyspace", "type": "str | None", "description": "the namespace this DB is set to work with. None if not set."}, {"name": "namespace", "type": "str | None", "description": "an alias for 'keyspace'. *DEPRECATED*, removal in 2.0"}, {"name": "name", "type": "str", "description": "the database name. Not necessarily unique: there can be multiple\ndatabases with the same name."}, {"name": "environment", "type": "str", "description": "a label, whose value can be `Environment.PROD`,\nor another value in `Environment.*`."}, {"name": "raw_info", "type": "dict[str, Any] | None", "description": "the full response from the DevOPS API call to get this info."}], "note": "The `raw_info` dictionary usually has a `region` key describing\nthe default region as configured in the database, which does not\nnecessarily (for multi-region databases) match the region through\nwhich the connection is established: the latter is the one specified\nby the \"api endpoint\" used for connecting. In other words, for multi-region\ndatabases it is possible that\n database_info.region != database_info.raw_info[\"region\"]\nConversely, in case of a DatabaseInfo not obtained through a\nconnected database, such as when calling `Admin.list_databases()`,\nall fields except `environment` (e.g. keyspace, region, etc)\nare set as found on the DevOps API response directly."}} +{"id": "astrapy.info.DatabaseInfo.id", "text": "", "metadata": {"kind": "attribute", "name": "id", "path": "astrapy.info.DatabaseInfo.id", "value": "id: str"}} +{"id": "astrapy.info.DatabaseInfo.region", "text": "", "metadata": {"kind": "attribute", "name": "region", "path": "astrapy.info.DatabaseInfo.region", "value": "region: str"}} +{"id": "astrapy.info.DatabaseInfo.keyspace", "text": "", "metadata": {"kind": "attribute", "name": "keyspace", "path": "astrapy.info.DatabaseInfo.keyspace", "value": "keyspace: str | None"}} +{"id": "astrapy.info.DatabaseInfo.namespace", "text": "", "metadata": {"kind": "attribute", "name": "namespace", "path": "astrapy.info.DatabaseInfo.namespace", "value": "namespace: str | None"}} +{"id": "astrapy.info.DatabaseInfo.name", "text": "", "metadata": {"kind": "attribute", "name": "name", "path": "astrapy.info.DatabaseInfo.name", "value": "name: str"}} +{"id": "astrapy.info.DatabaseInfo.environment", "text": "", "metadata": {"kind": "attribute", "name": "environment", "path": "astrapy.info.DatabaseInfo.environment", "value": "environment: str"}} +{"id": "astrapy.info.DatabaseInfo.raw_info", "text": "", "metadata": {"kind": "attribute", "name": "raw_info", "path": "astrapy.info.DatabaseInfo.raw_info", "value": "raw_info: dict[str, Any] | None"}} +{"id": "astrapy.info.AdminDatabaseInfo", "text": "Represents the full response from the DevOps API about a database info.\n\nMost attributes just contain the corresponding part of the raw response:\nfor this reason, please consult the DevOps API documentation for details.", "metadata": {"kind": "class", "name": "AdminDatabaseInfo", "path": "astrapy.info.AdminDatabaseInfo", "parameters": [{"name": "info", "type": "DatabaseInfo"}, {"name": "available_actions", "type": "list[str] | None"}, {"name": "cost", "type": "dict[str, Any]"}, {"name": "cqlsh_url", "type": "str"}, {"name": "creation_time", "type": "str"}, {"name": "data_endpoint_url", "type": "str"}, {"name": "grafana_url", "type": "str"}, {"name": "graphql_url", "type": "str"}, {"name": "id", "type": "str"}, {"name": "last_usage_time", "type": "str"}, {"name": "metrics", "type": "dict[str, Any]"}, {"name": "observed_status", "type": "str"}, {"name": "org_id", "type": "str"}, {"name": "owner_id", "type": "str"}, {"name": "status", "type": "str"}, {"name": "storage", "type": "dict[str, Any]"}, {"name": "termination_time", "type": "str"}, {"name": "raw_info", "type": "dict[str, Any] | None"}], "attributes": [{"name": "info", "type": "DatabaseInfo", "description": "a DatabaseInfo instance for the underlying database.\nThe DatabaseInfo is a subset of the information described by\nAdminDatabaseInfo - in terms of the DevOps API response,\nit corresponds to just its \"info\" subdictionary."}, {"name": "available_actions", "type": "list[str] | None", "description": "the \"availableActions\" value in the full API response."}, {"name": "cost", "type": "dict[str, Any]", "description": "the \"cost\" value in the full API response."}, {"name": "cqlsh_url", "type": "str", "description": "the \"cqlshUrl\" value in the full API response."}, {"name": "creation_time", "type": "str", "description": "the \"creationTime\" value in the full API response."}, {"name": "data_endpoint_url", "type": "str", "description": "the \"dataEndpointUrl\" value in the full API response."}, {"name": "grafana_url", "type": "str", "description": "the \"grafanaUrl\" value in the full API response."}, {"name": "graphql_url", "type": "str", "description": "the \"graphqlUrl\" value in the full API response."}, {"name": "id", "type": "str", "description": "the \"id\" value in the full API response."}, {"name": "last_usage_time", "type": "str", "description": "the \"lastUsageTime\" value in the full API response."}, {"name": "metrics", "type": "dict[str, Any]", "description": "the \"metrics\" value in the full API response."}, {"name": "observed_status", "type": "str", "description": "the \"observedStatus\" value in the full API response."}, {"name": "org_id", "type": "str", "description": "the \"orgId\" value in the full API response."}, {"name": "owner_id", "type": "str", "description": "the \"ownerId\" value in the full API response."}, {"name": "status", "type": "str", "description": "the \"status\" value in the full API response."}, {"name": "storage", "type": "dict[str, Any]", "description": "the \"storage\" value in the full API response."}, {"name": "termination_time", "type": "str", "description": "the \"terminationTime\" value in the full API response."}, {"name": "raw_info", "type": "dict[str, Any] | None", "description": "the full raw response from the DevOps API."}], "gathered_types": ["astrapy.info.DatabaseInfo"]}} +{"id": "astrapy.info.AdminDatabaseInfo.info", "text": "", "metadata": {"kind": "attribute", "name": "info", "path": "astrapy.info.AdminDatabaseInfo.info", "value": "info: DatabaseInfo", "gathered_types": ["astrapy.info.DatabaseInfo"]}} +{"id": "astrapy.info.AdminDatabaseInfo.available_actions", "text": "", "metadata": {"kind": "attribute", "name": "available_actions", "path": "astrapy.info.AdminDatabaseInfo.available_actions", "value": "available_actions: list[str] | None"}} +{"id": "astrapy.info.AdminDatabaseInfo.cost", "text": "", "metadata": {"kind": "attribute", "name": "cost", "path": "astrapy.info.AdminDatabaseInfo.cost", "value": "cost: dict[str, Any]"}} +{"id": "astrapy.info.AdminDatabaseInfo.cqlsh_url", "text": "", "metadata": {"kind": "attribute", "name": "cqlsh_url", "path": "astrapy.info.AdminDatabaseInfo.cqlsh_url", "value": "cqlsh_url: str"}} +{"id": "astrapy.info.AdminDatabaseInfo.creation_time", "text": "", "metadata": {"kind": "attribute", "name": "creation_time", "path": "astrapy.info.AdminDatabaseInfo.creation_time", "value": "creation_time: str"}} +{"id": "astrapy.info.AdminDatabaseInfo.data_endpoint_url", "text": "", "metadata": {"kind": "attribute", "name": "data_endpoint_url", "path": "astrapy.info.AdminDatabaseInfo.data_endpoint_url", "value": "data_endpoint_url: str"}} +{"id": "astrapy.info.AdminDatabaseInfo.grafana_url", "text": "", "metadata": {"kind": "attribute", "name": "grafana_url", "path": "astrapy.info.AdminDatabaseInfo.grafana_url", "value": "grafana_url: str"}} +{"id": "astrapy.info.AdminDatabaseInfo.graphql_url", "text": "", "metadata": {"kind": "attribute", "name": "graphql_url", "path": "astrapy.info.AdminDatabaseInfo.graphql_url", "value": "graphql_url: str"}} +{"id": "astrapy.info.AdminDatabaseInfo.id", "text": "", "metadata": {"kind": "attribute", "name": "id", "path": "astrapy.info.AdminDatabaseInfo.id", "value": "id: str"}} +{"id": "astrapy.info.AdminDatabaseInfo.last_usage_time", "text": "", "metadata": {"kind": "attribute", "name": "last_usage_time", "path": "astrapy.info.AdminDatabaseInfo.last_usage_time", "value": "last_usage_time: str"}} +{"id": "astrapy.info.AdminDatabaseInfo.metrics", "text": "", "metadata": {"kind": "attribute", "name": "metrics", "path": "astrapy.info.AdminDatabaseInfo.metrics", "value": "metrics: dict[str, Any]"}} +{"id": "astrapy.info.AdminDatabaseInfo.observed_status", "text": "", "metadata": {"kind": "attribute", "name": "observed_status", "path": "astrapy.info.AdminDatabaseInfo.observed_status", "value": "observed_status: str"}} +{"id": "astrapy.info.AdminDatabaseInfo.org_id", "text": "", "metadata": {"kind": "attribute", "name": "org_id", "path": "astrapy.info.AdminDatabaseInfo.org_id", "value": "org_id: str"}} +{"id": "astrapy.info.AdminDatabaseInfo.owner_id", "text": "", "metadata": {"kind": "attribute", "name": "owner_id", "path": "astrapy.info.AdminDatabaseInfo.owner_id", "value": "owner_id: str"}} +{"id": "astrapy.info.AdminDatabaseInfo.status", "text": "", "metadata": {"kind": "attribute", "name": "status", "path": "astrapy.info.AdminDatabaseInfo.status", "value": "status: str"}} +{"id": "astrapy.info.AdminDatabaseInfo.storage", "text": "", "metadata": {"kind": "attribute", "name": "storage", "path": "astrapy.info.AdminDatabaseInfo.storage", "value": "storage: dict[str, Any]"}} +{"id": "astrapy.info.AdminDatabaseInfo.termination_time", "text": "", "metadata": {"kind": "attribute", "name": "termination_time", "path": "astrapy.info.AdminDatabaseInfo.termination_time", "value": "termination_time: str"}} +{"id": "astrapy.info.AdminDatabaseInfo.raw_info", "text": "", "metadata": {"kind": "attribute", "name": "raw_info", "path": "astrapy.info.AdminDatabaseInfo.raw_info", "value": "raw_info: dict[str, Any] | None"}} +{"id": "astrapy.info.CollectionInfo", "text": "Represents the identifying information for a collection,\nincluding the information about the database the collection belongs to.", "metadata": {"kind": "class", "name": "CollectionInfo", "path": "astrapy.info.CollectionInfo", "parameters": [{"name": "database_info", "type": "DatabaseInfo"}, {"name": "keyspace", "type": "str"}, {"name": "namespace", "type": "str"}, {"name": "name", "type": "str"}, {"name": "full_name", "type": "str"}], "attributes": [{"name": "database_info", "type": "DatabaseInfo", "description": "a DatabaseInfo instance for the underlying database."}, {"name": "keyspace", "type": "str", "description": "the keyspace where the collection is located."}, {"name": "namespace", "type": "str", "description": "an alias for 'keyspace'. *DEPRECATED*, removal in 2.0"}, {"name": "name", "type": "str", "description": "collection name. Unique within a keyspace."}, {"name": "full_name", "type": "str", "description": "identifier for the collection within the database,\nin the form \"keyspace.collection_name\"."}], "gathered_types": ["astrapy.info.DatabaseInfo"]}} +{"id": "astrapy.info.CollectionInfo.database_info", "text": "", "metadata": {"kind": "attribute", "name": "database_info", "path": "astrapy.info.CollectionInfo.database_info", "value": "database_info: DatabaseInfo", "gathered_types": ["astrapy.info.DatabaseInfo"]}} +{"id": "astrapy.info.CollectionInfo.keyspace", "text": "", "metadata": {"kind": "attribute", "name": "keyspace", "path": "astrapy.info.CollectionInfo.keyspace", "value": "keyspace: str"}} +{"id": "astrapy.info.CollectionInfo.namespace", "text": "", "metadata": {"kind": "attribute", "name": "namespace", "path": "astrapy.info.CollectionInfo.namespace", "value": "namespace: str"}} +{"id": "astrapy.info.CollectionInfo.name", "text": "", "metadata": {"kind": "attribute", "name": "name", "path": "astrapy.info.CollectionInfo.name", "value": "name: str"}} +{"id": "astrapy.info.CollectionInfo.full_name", "text": "", "metadata": {"kind": "attribute", "name": "full_name", "path": "astrapy.info.CollectionInfo.full_name", "value": "full_name: str"}} +{"id": "astrapy.info.CollectionDefaultIDOptions", "text": "The \"defaultId\" component of the collection options.\nSee the Data API specifications for allowed values.", "metadata": {"kind": "class", "name": "CollectionDefaultIDOptions", "path": "astrapy.info.CollectionDefaultIDOptions", "parameters": [{"name": "default_id_type", "type": "str"}], "attributes": [{"name": "default_id_type", "type": "str", "description": "string such as `objectId`, `uuid6` and so on."}]}} +{"id": "astrapy.info.CollectionDefaultIDOptions.default_id_type", "text": "", "metadata": {"kind": "attribute", "name": "default_id_type", "path": "astrapy.info.CollectionDefaultIDOptions.default_id_type", "value": "default_id_type: str"}} +{"id": "astrapy.info.CollectionDefaultIDOptions.as_dict", "text": "Recast this object into a dictionary.", "metadata": {"kind": "function", "name": "as_dict", "path": "astrapy.info.CollectionDefaultIDOptions.as_dict", "returns": [{"type": "dict[str, Any]"}]}} +{"id": "astrapy.info.CollectionDefaultIDOptions.from_dict", "text": "Create an instance of CollectionDefaultIDOptions from a dictionary\nsuch as one from the Data API.", "metadata": {"kind": "function", "name": "from_dict", "path": "astrapy.info.CollectionDefaultIDOptions.from_dict", "parameters": [{"name": "raw_dict", "type": "dict[str, Any] | None"}], "returns": [{"type": "CollectionDefaultIDOptions | None"}], "gathered_types": ["CollectionDefaultIDOptions"]}} +{"id": "astrapy.info.CollectionVectorServiceOptions", "text": "The \"vector.service\" component of the collection options.\nSee the Data API specifications for allowed values.", "metadata": {"kind": "class", "name": "CollectionVectorServiceOptions", "path": "astrapy.info.CollectionVectorServiceOptions", "parameters": [{"name": "provider", "type": "str | None"}, {"name": "model_name", "type": "str | None"}, {"name": "authentication", "default": "None", "type": "dict[str, Any] | None"}, {"name": "parameters", "default": "None", "type": "dict[str, Any] | None"}], "attributes": [{"name": "provider", "type": "str | None", "description": "the name of a service provider for embedding calculation."}, {"name": "model_name", "type": "str | None", "description": "the name of a specific model for use by the service."}, {"name": "authentication", "type": "dict[str, Any] | None", "description": "a key-value dictionary for the \"authentication\" specification,\nif any, in the vector service options."}, {"name": "parameters", "type": "dict[str, Any] | None", "description": "a key-value dictionary for the \"parameters\" specification, if any,\nin the vector service options."}]}} +{"id": "astrapy.info.CollectionVectorServiceOptions.provider", "text": "", "metadata": {"kind": "attribute", "name": "provider", "path": "astrapy.info.CollectionVectorServiceOptions.provider", "value": "provider: str | None"}} +{"id": "astrapy.info.CollectionVectorServiceOptions.model_name", "text": "", "metadata": {"kind": "attribute", "name": "model_name", "path": "astrapy.info.CollectionVectorServiceOptions.model_name", "value": "model_name: str | None"}} +{"id": "astrapy.info.CollectionVectorServiceOptions.authentication", "text": "", "metadata": {"kind": "attribute", "name": "authentication", "path": "astrapy.info.CollectionVectorServiceOptions.authentication", "value": "authentication: dict[str, Any] | None = None"}} +{"id": "astrapy.info.CollectionVectorServiceOptions.parameters", "text": "", "metadata": {"kind": "attribute", "name": "parameters", "path": "astrapy.info.CollectionVectorServiceOptions.parameters", "value": "parameters: dict[str, Any] | None = None"}} +{"id": "astrapy.info.CollectionVectorServiceOptions.as_dict", "text": "Recast this object into a dictionary.", "metadata": {"kind": "function", "name": "as_dict", "path": "astrapy.info.CollectionVectorServiceOptions.as_dict", "returns": [{"type": "dict[str, Any]"}]}} +{"id": "astrapy.info.CollectionVectorServiceOptions.from_dict", "text": "Create an instance of CollectionVectorServiceOptions from a dictionary\nsuch as one from the Data API.", "metadata": {"kind": "function", "name": "from_dict", "path": "astrapy.info.CollectionVectorServiceOptions.from_dict", "parameters": [{"name": "raw_dict", "type": "dict[str, Any] | None"}], "returns": [{"type": "CollectionVectorServiceOptions | None"}], "gathered_types": ["astrapy.info.CollectionVectorServiceOptions"]}} +{"id": "astrapy.info.CollectionVectorOptions", "text": "The \"vector\" component of the collection options.\nSee the Data API specifications for allowed values.", "metadata": {"kind": "class", "name": "CollectionVectorOptions", "path": "astrapy.info.CollectionVectorOptions", "parameters": [{"name": "dimension", "type": "int | None"}, {"name": "metric", "type": "str | None"}, {"name": "service", "type": "CollectionVectorServiceOptions | None"}], "attributes": [{"name": "dimension", "type": "int | None", "description": "an optional positive integer, the dimensionality of the vector space."}, {"name": "metric", "type": "str | None", "description": "an optional metric among `VectorMetric.DOT_PRODUCT`,\n`VectorMetric.EUCLIDEAN` and `VectorMetric.COSINE`."}, {"name": "service", "type": "CollectionVectorServiceOptions | None", "description": "an optional CollectionVectorServiceOptions object in case a\nservice is configured for the collection."}], "gathered_types": ["astrapy.info.CollectionVectorServiceOptions"]}} +{"id": "astrapy.info.CollectionVectorOptions.dimension", "text": "", "metadata": {"kind": "attribute", "name": "dimension", "path": "astrapy.info.CollectionVectorOptions.dimension", "value": "dimension: int | None"}} +{"id": "astrapy.info.CollectionVectorOptions.metric", "text": "", "metadata": {"kind": "attribute", "name": "metric", "path": "astrapy.info.CollectionVectorOptions.metric", "value": "metric: str | None"}} +{"id": "astrapy.info.CollectionVectorOptions.service", "text": "", "metadata": {"kind": "attribute", "name": "service", "path": "astrapy.info.CollectionVectorOptions.service", "value": "service: CollectionVectorServiceOptions | None", "gathered_types": ["astrapy.info.CollectionVectorServiceOptions"]}} +{"id": "astrapy.info.CollectionVectorOptions.as_dict", "text": "Recast this object into a dictionary.", "metadata": {"kind": "function", "name": "as_dict", "path": "astrapy.info.CollectionVectorOptions.as_dict", "returns": [{"type": "dict[str, Any]"}]}} +{"id": "astrapy.info.CollectionVectorOptions.from_dict", "text": "Create an instance of CollectionVectorOptions from a dictionary\nsuch as one from the Data API.", "metadata": {"kind": "function", "name": "from_dict", "path": "astrapy.info.CollectionVectorOptions.from_dict", "parameters": [{"name": "raw_dict", "type": "dict[str, Any] | None"}], "returns": [{"type": "CollectionVectorOptions | None"}], "gathered_types": ["CollectionVectorOptions"]}} +{"id": "astrapy.info.CollectionOptions", "text": "A structure expressing the options of a collection.\nSee the Data API specifications for detailed specification and allowed values.", "metadata": {"kind": "class", "name": "CollectionOptions", "path": "astrapy.info.CollectionOptions", "parameters": [{"name": "vector", "type": "CollectionVectorOptions | None"}, {"name": "indexing", "type": "dict[str, Any] | None"}, {"name": "default_id", "type": "CollectionDefaultIDOptions | None"}, {"name": "raw_options", "type": "dict[str, Any] | None"}], "attributes": [{"name": "vector", "type": "CollectionVectorOptions | None", "description": "an optional CollectionVectorOptions object."}, {"name": "indexing", "type": "dict[str, Any] | None", "description": "an optional dictionary with the \"indexing\" collection properties."}, {"name": "default_id", "type": "CollectionDefaultIDOptions | None", "description": "an optional CollectionDefaultIDOptions object."}, {"name": "raw_options", "type": "dict[str, Any] | None", "description": "the raw response from the Data API for the collection configuration."}], "gathered_types": ["CollectionVectorOptions", "CollectionDefaultIDOptions"]}} +{"id": "astrapy.info.CollectionOptions.vector", "text": "", "metadata": {"kind": "attribute", "name": "vector", "path": "astrapy.info.CollectionOptions.vector", "value": "vector: CollectionVectorOptions | None", "gathered_types": ["CollectionVectorOptions"]}} +{"id": "astrapy.info.CollectionOptions.indexing", "text": "", "metadata": {"kind": "attribute", "name": "indexing", "path": "astrapy.info.CollectionOptions.indexing", "value": "indexing: dict[str, Any] | None"}} +{"id": "astrapy.info.CollectionOptions.default_id", "text": "", "metadata": {"kind": "attribute", "name": "default_id", "path": "astrapy.info.CollectionOptions.default_id", "value": "default_id: CollectionDefaultIDOptions | None", "gathered_types": ["CollectionDefaultIDOptions"]}} +{"id": "astrapy.info.CollectionOptions.raw_options", "text": "", "metadata": {"kind": "attribute", "name": "raw_options", "path": "astrapy.info.CollectionOptions.raw_options", "value": "raw_options: dict[str, Any] | None"}} +{"id": "astrapy.info.CollectionOptions.as_dict", "text": "Recast this object into a dictionary.", "metadata": {"kind": "function", "name": "as_dict", "path": "astrapy.info.CollectionOptions.as_dict", "returns": [{"type": "dict[str, Any]"}]}} +{"id": "astrapy.info.CollectionOptions.flatten", "text": "Recast this object as a flat key-value pair suitable for\nuse as kwargs in a create_collection method call (including recasts).", "metadata": {"kind": "function", "name": "flatten", "path": "astrapy.info.CollectionOptions.flatten", "returns": [{"type": "dict[str, Any]"}]}} +{"id": "astrapy.info.CollectionOptions.from_dict", "text": "Create an instance of CollectionOptions from a dictionary\nsuch as one from the Data API.", "metadata": {"kind": "function", "name": "from_dict", "path": "astrapy.info.CollectionOptions.from_dict", "parameters": [{"name": "raw_dict", "type": "dict[str, Any]"}], "returns": [{"type": "CollectionOptions"}], "gathered_types": ["astrapy.info.CollectionOptions"]}} +{"id": "astrapy.info.CollectionDescriptor", "text": "A structure expressing full description of a collection as the Data API\nreturns it, i.e. its name and its `options` sub-structure.", "metadata": {"kind": "class", "name": "CollectionDescriptor", "path": "astrapy.info.CollectionDescriptor", "parameters": [{"name": "name", "type": "str"}, {"name": "options", "type": "CollectionOptions"}, {"name": "raw_descriptor", "type": "dict[str, Any] | None"}], "attributes": [{"name": "name", "type": "str", "description": "the name of the collection."}, {"name": "options", "type": "CollectionOptions", "description": "a CollectionOptions instance."}, {"name": "raw_descriptor", "type": "dict[str, Any] | None", "description": "the raw response from the Data API."}], "gathered_types": ["astrapy.info.CollectionOptions"]}} +{"id": "astrapy.info.CollectionDescriptor.name", "text": "", "metadata": {"kind": "attribute", "name": "name", "path": "astrapy.info.CollectionDescriptor.name", "value": "name: str"}} +{"id": "astrapy.info.CollectionDescriptor.options", "text": "", "metadata": {"kind": "attribute", "name": "options", "path": "astrapy.info.CollectionDescriptor.options", "value": "options: CollectionOptions", "gathered_types": ["astrapy.info.CollectionOptions"]}} +{"id": "astrapy.info.CollectionDescriptor.raw_descriptor", "text": "", "metadata": {"kind": "attribute", "name": "raw_descriptor", "path": "astrapy.info.CollectionDescriptor.raw_descriptor", "value": "raw_descriptor: dict[str, Any] | None"}} +{"id": "astrapy.info.CollectionDescriptor.as_dict", "text": "Recast this object into a dictionary.\nEmpty `options` will not be returned at all.", "metadata": {"kind": "function", "name": "as_dict", "path": "astrapy.info.CollectionDescriptor.as_dict", "returns": [{"type": "dict[str, Any]"}]}} +{"id": "astrapy.info.CollectionDescriptor.flatten", "text": "Recast this object as a flat key-value pair suitable for\nuse as kwargs in a create_collection method call (including recasts).", "metadata": {"kind": "function", "name": "flatten", "path": "astrapy.info.CollectionDescriptor.flatten", "returns": [{"type": "dict[str, Any]"}]}} +{"id": "astrapy.info.CollectionDescriptor.from_dict", "text": "Create an instance of CollectionDescriptor from a dictionary\nsuch as one from the Data API.", "metadata": {"kind": "function", "name": "from_dict", "path": "astrapy.info.CollectionDescriptor.from_dict", "parameters": [{"name": "raw_dict", "type": "dict[str, Any]"}], "returns": [{"type": "CollectionDescriptor"}], "gathered_types": ["astrapy.info.CollectionDescriptor"]}} +{"id": "astrapy.info.EmbeddingProviderParameter", "text": "A representation of a parameter as returned by the 'findEmbeddingProviders'\nData API endpoint.", "metadata": {"kind": "class", "name": "EmbeddingProviderParameter", "path": "astrapy.info.EmbeddingProviderParameter", "parameters": [{"name": "default_value", "type": "Any"}, {"name": "display_name", "type": "str | None"}, {"name": "help", "type": "str | None"}, {"name": "hint", "type": "str | None"}, {"name": "name", "type": "str"}, {"name": "required", "type": "bool"}, {"name": "parameter_type", "type": "str"}, {"name": "validation", "type": "dict[str, Any]"}], "attributes": [{"name": "default_value", "type": "Any", "description": "the default value for the parameter."}, {"name": "help", "type": "str | None", "description": "a textual description of the parameter."}, {"name": "name", "type": "str", "description": "the name to use when passing the parameter for vectorize operations."}, {"name": "required", "type": "bool", "description": "whether the parameter is required or not."}, {"name": "parameter_type", "type": "str", "description": "a textual description of the data type for the parameter."}, {"name": "validation", "type": "dict[str, Any]", "description": "a dictionary describing a parameter-specific validation policy."}]}} +{"id": "astrapy.info.EmbeddingProviderParameter.default_value", "text": "", "metadata": {"kind": "attribute", "name": "default_value", "path": "astrapy.info.EmbeddingProviderParameter.default_value", "value": "default_value: Any"}} +{"id": "astrapy.info.EmbeddingProviderParameter.display_name", "text": "", "metadata": {"kind": "attribute", "name": "display_name", "path": "astrapy.info.EmbeddingProviderParameter.display_name", "value": "display_name: str | None"}} +{"id": "astrapy.info.EmbeddingProviderParameter.help", "text": "", "metadata": {"kind": "attribute", "name": "help", "path": "astrapy.info.EmbeddingProviderParameter.help", "value": "help: str | None"}} +{"id": "astrapy.info.EmbeddingProviderParameter.hint", "text": "", "metadata": {"kind": "attribute", "name": "hint", "path": "astrapy.info.EmbeddingProviderParameter.hint", "value": "hint: str | None"}} +{"id": "astrapy.info.EmbeddingProviderParameter.name", "text": "", "metadata": {"kind": "attribute", "name": "name", "path": "astrapy.info.EmbeddingProviderParameter.name", "value": "name: str"}} +{"id": "astrapy.info.EmbeddingProviderParameter.required", "text": "", "metadata": {"kind": "attribute", "name": "required", "path": "astrapy.info.EmbeddingProviderParameter.required", "value": "required: bool"}} +{"id": "astrapy.info.EmbeddingProviderParameter.parameter_type", "text": "", "metadata": {"kind": "attribute", "name": "parameter_type", "path": "astrapy.info.EmbeddingProviderParameter.parameter_type", "value": "parameter_type: str"}} +{"id": "astrapy.info.EmbeddingProviderParameter.validation", "text": "", "metadata": {"kind": "attribute", "name": "validation", "path": "astrapy.info.EmbeddingProviderParameter.validation", "value": "validation: dict[str, Any]"}} +{"id": "astrapy.info.EmbeddingProviderParameter.as_dict", "text": "Recast this object into a dictionary.", "metadata": {"kind": "function", "name": "as_dict", "path": "astrapy.info.EmbeddingProviderParameter.as_dict", "returns": [{"type": "dict[str, Any]"}]}} +{"id": "astrapy.info.EmbeddingProviderParameter.from_dict", "text": "Create an instance of EmbeddingProviderParameter from a dictionary\nsuch as one from the Data API.", "metadata": {"kind": "function", "name": "from_dict", "path": "astrapy.info.EmbeddingProviderParameter.from_dict", "parameters": [{"name": "raw_dict", "type": "dict[str, Any]"}], "returns": [{"type": "EmbeddingProviderParameter"}], "gathered_types": ["EmbeddingProviderParameter"]}} +{"id": "astrapy.info.EmbeddingProviderModel", "text": "A representation of an embedding model as returned by the 'findEmbeddingProviders'\nData API endpoint.", "metadata": {"kind": "class", "name": "EmbeddingProviderModel", "path": "astrapy.info.EmbeddingProviderModel", "parameters": [{"name": "name", "type": "str"}, {"name": "parameters", "type": "list[EmbeddingProviderParameter]"}, {"name": "vector_dimension", "type": "int | None"}], "attributes": [{"name": "name", "type": "str", "description": "the model name as must be passed when issuing\nvectorize operations to the API."}, {"name": "parameters", "type": "list[EmbeddingProviderParameter]", "description": "a list of the `EmbeddingProviderParameter` objects the model admits."}, {"name": "vector_dimension", "type": "int | None", "description": "an integer for the dimensionality of the embedding model.\nif this is None, the dimension can assume multiple values as specified\nby a corresponding parameter listed with the model."}], "gathered_types": ["EmbeddingProviderParameter"]}} +{"id": "astrapy.info.EmbeddingProviderModel.name", "text": "", "metadata": {"kind": "attribute", "name": "name", "path": "astrapy.info.EmbeddingProviderModel.name", "value": "name: str"}} +{"id": "astrapy.info.EmbeddingProviderModel.parameters", "text": "", "metadata": {"kind": "attribute", "name": "parameters", "path": "astrapy.info.EmbeddingProviderModel.parameters", "value": "parameters: list[EmbeddingProviderParameter]", "gathered_types": ["EmbeddingProviderParameter"]}} +{"id": "astrapy.info.EmbeddingProviderModel.vector_dimension", "text": "", "metadata": {"kind": "attribute", "name": "vector_dimension", "path": "astrapy.info.EmbeddingProviderModel.vector_dimension", "value": "vector_dimension: int | None"}} +{"id": "astrapy.info.EmbeddingProviderModel.as_dict", "text": "Recast this object into a dictionary.", "metadata": {"kind": "function", "name": "as_dict", "path": "astrapy.info.EmbeddingProviderModel.as_dict", "returns": [{"type": "dict[str, Any]"}]}} +{"id": "astrapy.info.EmbeddingProviderModel.from_dict", "text": "Create an instance of EmbeddingProviderModel from a dictionary\nsuch as one from the Data API.", "metadata": {"kind": "function", "name": "from_dict", "path": "astrapy.info.EmbeddingProviderModel.from_dict", "parameters": [{"name": "raw_dict", "type": "dict[str, Any]"}], "returns": [{"type": "EmbeddingProviderModel"}], "gathered_types": ["EmbeddingProviderModel"]}} +{"id": "astrapy.info.EmbeddingProviderToken", "text": "A representation of a \"token\", that is a specific secret string, needed by\nan embedding model; this models a part of the response from the\n'findEmbeddingProviders' Data API endpoint.", "metadata": {"kind": "class", "name": "EmbeddingProviderToken", "path": "astrapy.info.EmbeddingProviderToken", "parameters": [{"name": "accepted", "type": "str"}, {"name": "forwarded", "type": "str"}], "attributes": [{"name": "accepted", "type": "str", "description": "the name of this \"token\" as seen by the Data API. This is the\nname that should be used in the clients when supplying the secret,\nwhether as header or by shared-secret."}, {"name": "forwarded", "type": "str", "description": "the name used by the API when issuing the embedding request\nto the embedding provider. This is of no direct interest for the Data API user."}]}} +{"id": "astrapy.info.EmbeddingProviderToken.accepted", "text": "", "metadata": {"kind": "attribute", "name": "accepted", "path": "astrapy.info.EmbeddingProviderToken.accepted", "value": "accepted: str"}} +{"id": "astrapy.info.EmbeddingProviderToken.forwarded", "text": "", "metadata": {"kind": "attribute", "name": "forwarded", "path": "astrapy.info.EmbeddingProviderToken.forwarded", "value": "forwarded: str"}} +{"id": "astrapy.info.EmbeddingProviderToken.as_dict", "text": "Recast this object into a dictionary.", "metadata": {"kind": "function", "name": "as_dict", "path": "astrapy.info.EmbeddingProviderToken.as_dict", "returns": [{"type": "dict[str, Any]"}]}} +{"id": "astrapy.info.EmbeddingProviderToken.from_dict", "text": "Create an instance of EmbeddingProviderToken from a dictionary\nsuch as one from the Data API.", "metadata": {"kind": "function", "name": "from_dict", "path": "astrapy.info.EmbeddingProviderToken.from_dict", "parameters": [{"name": "raw_dict", "type": "dict[str, Any]"}], "returns": [{"type": "EmbeddingProviderToken"}], "gathered_types": ["EmbeddingProviderToken"]}} +{"id": "astrapy.info.EmbeddingProviderAuthentication", "text": "A representation of an authentication mode for using an embedding model,\nmodeling the corresponding part of the response returned by the\n'findEmbeddingProviders' Data API endpoint (namely \"supportedAuthentication\").", "metadata": {"kind": "class", "name": "EmbeddingProviderAuthentication", "path": "astrapy.info.EmbeddingProviderAuthentication", "parameters": [{"name": "enabled", "type": "bool"}, {"name": "tokens", "type": "list[EmbeddingProviderToken]"}], "attributes": [{"name": "enabled", "type": "bool", "description": "whether this authentication mode is available for a given model."}, {"name": "tokens", "type": "list[EmbeddingProviderToken]", "description": "a list of `EmbeddingProviderToken` objects,\ndetailing the secrets required for the authentication mode."}], "gathered_types": ["EmbeddingProviderToken"]}} +{"id": "astrapy.info.EmbeddingProviderAuthentication.enabled", "text": "", "metadata": {"kind": "attribute", "name": "enabled", "path": "astrapy.info.EmbeddingProviderAuthentication.enabled", "value": "enabled: bool"}} +{"id": "astrapy.info.EmbeddingProviderAuthentication.tokens", "text": "", "metadata": {"kind": "attribute", "name": "tokens", "path": "astrapy.info.EmbeddingProviderAuthentication.tokens", "value": "tokens: list[EmbeddingProviderToken]", "gathered_types": ["EmbeddingProviderToken"]}} +{"id": "astrapy.info.EmbeddingProviderAuthentication.as_dict", "text": "Recast this object into a dictionary.", "metadata": {"kind": "function", "name": "as_dict", "path": "astrapy.info.EmbeddingProviderAuthentication.as_dict", "returns": [{"type": "dict[str, Any]"}]}} +{"id": "astrapy.info.EmbeddingProviderAuthentication.from_dict", "text": "Create an instance of EmbeddingProviderAuthentication from a dictionary\nsuch as one from the Data API.", "metadata": {"kind": "function", "name": "from_dict", "path": "astrapy.info.EmbeddingProviderAuthentication.from_dict", "parameters": [{"name": "raw_dict", "type": "dict[str, Any]"}], "returns": [{"type": "EmbeddingProviderAuthentication"}], "gathered_types": ["EmbeddingProviderAuthentication"]}} +{"id": "astrapy.info.EmbeddingProvider", "text": "A representation of an embedding provider, as returned by the 'findEmbeddingProviders'\nData API endpoint.", "metadata": {"kind": "class", "name": "EmbeddingProvider", "path": "astrapy.info.EmbeddingProvider", "parameters": [{"name": "display_name", "type": "str | None"}, {"name": "models", "type": "list[EmbeddingProviderModel]"}, {"name": "parameters", "type": "list[EmbeddingProviderParameter]"}, {"name": "supported_authentication", "type": "dict[str, EmbeddingProviderAuthentication]"}, {"name": "url", "type": "str | None"}], "attributes": [{"name": "display_name", "type": "str | None", "description": "a version of the provider name for display and pretty printing.\nNot to be used when issuing vectorize API requests (for the latter, it is\nthe key in the providers dictionary that is required)."}, {"name": "models", "type": "list[EmbeddingProviderModel]", "description": "a list of `EmbeddingProviderModel` objects pertaining to the provider."}, {"name": "parameters", "type": "list[EmbeddingProviderParameter]", "description": "a list of `EmbeddingProviderParameter` objects common to all models\nfor this provider."}, {"name": "supported_authentication", "type": "dict[str, EmbeddingProviderAuthentication]", "description": "a dictionary of the authentication modes for\nthis provider. Note that disabled modes may still appear in this map,\nalbeit with the `enabled` property set to False."}, {"name": "url", "type": "str | None", "description": "a string template for the URL used by the Data API when issuing the request\ntoward the embedding provider. This is of no direct concern to the Data API user."}], "gathered_types": ["EmbeddingProviderAuthentication", "EmbeddingProviderModel", "EmbeddingProviderParameter"]}} +{"id": "astrapy.info.EmbeddingProvider.display_name", "text": "", "metadata": {"kind": "attribute", "name": "display_name", "path": "astrapy.info.EmbeddingProvider.display_name", "value": "display_name: str | None"}} +{"id": "astrapy.info.EmbeddingProvider.models", "text": "", "metadata": {"kind": "attribute", "name": "models", "path": "astrapy.info.EmbeddingProvider.models", "value": "models: list[EmbeddingProviderModel]", "gathered_types": ["EmbeddingProviderModel"]}} +{"id": "astrapy.info.EmbeddingProvider.parameters", "text": "", "metadata": {"kind": "attribute", "name": "parameters", "path": "astrapy.info.EmbeddingProvider.parameters", "value": "parameters: list[EmbeddingProviderParameter]", "gathered_types": ["EmbeddingProviderParameter"]}} +{"id": "astrapy.info.EmbeddingProvider.supported_authentication", "text": "", "metadata": {"kind": "attribute", "name": "supported_authentication", "path": "astrapy.info.EmbeddingProvider.supported_authentication", "value": "supported_authentication: dict[str, EmbeddingProviderAuthentication]", "gathered_types": ["EmbeddingProviderAuthentication"]}} +{"id": "astrapy.info.EmbeddingProvider.url", "text": "", "metadata": {"kind": "attribute", "name": "url", "path": "astrapy.info.EmbeddingProvider.url", "value": "url: str | None"}} +{"id": "astrapy.info.EmbeddingProvider.as_dict", "text": "Recast this object into a dictionary.", "metadata": {"kind": "function", "name": "as_dict", "path": "astrapy.info.EmbeddingProvider.as_dict", "returns": [{"type": "dict[str, Any]"}]}} +{"id": "astrapy.info.EmbeddingProvider.from_dict", "text": "Create an instance of EmbeddingProvider from a dictionary\nsuch as one from the Data API.", "metadata": {"kind": "function", "name": "from_dict", "path": "astrapy.info.EmbeddingProvider.from_dict", "parameters": [{"name": "raw_dict", "type": "dict[str, Any]"}], "returns": [{"type": "EmbeddingProvider"}], "gathered_types": ["EmbeddingProvider"]}} +{"id": "astrapy.info.FindEmbeddingProvidersResult", "text": "A representation of the whole response from the 'findEmbeddingProviders'\nData API endpoint.", "metadata": {"kind": "class", "name": "FindEmbeddingProvidersResult", "path": "astrapy.info.FindEmbeddingProvidersResult", "parameters": [{"name": "embedding_providers", "type": "dict[str, EmbeddingProvider]"}, {"name": "raw_info", "type": "dict[str, Any] | None"}], "attributes": [{"name": "embedding_providers", "type": "dict[str, EmbeddingProvider]", "description": "a dictionary of provider names to EmbeddingProvider objects."}, {"name": "raw_info", "type": "dict[str, Any] | None", "description": "a (nested) dictionary containing the original full response from the endpoint."}], "gathered_types": ["EmbeddingProvider"]}} +{"id": "astrapy.info.FindEmbeddingProvidersResult.embedding_providers", "text": "", "metadata": {"kind": "attribute", "name": "embedding_providers", "path": "astrapy.info.FindEmbeddingProvidersResult.embedding_providers", "value": "embedding_providers: dict[str, EmbeddingProvider]", "gathered_types": ["EmbeddingProvider"]}} +{"id": "astrapy.info.FindEmbeddingProvidersResult.raw_info", "text": "", "metadata": {"kind": "attribute", "name": "raw_info", "path": "astrapy.info.FindEmbeddingProvidersResult.raw_info", "value": "raw_info: dict[str, Any] | None"}} +{"id": "astrapy.info.FindEmbeddingProvidersResult.as_dict", "text": "Recast this object into a dictionary.", "metadata": {"kind": "function", "name": "as_dict", "path": "astrapy.info.FindEmbeddingProvidersResult.as_dict", "returns": [{"type": "dict[str, Any]"}]}} +{"id": "astrapy.info.FindEmbeddingProvidersResult.from_dict", "text": "Create an instance of FindEmbeddingProvidersResult from a dictionary\nsuch as one from the Data API.", "metadata": {"kind": "function", "name": "from_dict", "path": "astrapy.info.FindEmbeddingProvidersResult.from_dict", "parameters": [{"name": "raw_dict", "type": "dict[str, Any]"}], "returns": [{"type": "FindEmbeddingProvidersResult"}], "gathered_types": ["astrapy.info.FindEmbeddingProvidersResult"]}} +{"id": "astrapy.request_tools", "text": "", "metadata": {"kind": "module", "name": "request_tools", "path": "astrapy.request_tools", "imports": {"annotations": "__future__.annotations", "logging": "logging", "Any": "typing.Any", "TypedDict": "typing.TypedDict", "Union": "typing.Union", "httpx": "httpx", "DEFAULT_REQUEST_TIMEOUT_MS": "astrapy.defaults.DEFAULT_REQUEST_TIMEOUT_MS"}, "properties": {"is_init_module": false, "is_package": false, "is_subpackage": false, "is_namespace_package": false, "is_namespace_subpackage": false}}} +{"id": "astrapy.request_tools.logger", "text": "", "metadata": {"kind": "attribute", "name": "logger", "path": "astrapy.request_tools.logger", "value": "logger = logging.getLogger(__name__)", "gathered_types": ["__name__"]}} +{"id": "astrapy.request_tools.log_httpx_request", "text": "Log the details of an HTTP request for debugging purposes.", "metadata": {"kind": "function", "name": "log_httpx_request", "path": "astrapy.request_tools.log_httpx_request", "parameters": [{"name": "http_method", "type": "str", "description": "the HTTP verb of the request (e.g. \"POST\").", "default": null}, {"name": "full_url", "type": "str", "description": "the URL of the request (e.g. \"https://domain.com/full/path\").", "default": null}, {"name": "request_params", "type": "dict[str, Any] | None", "description": "parameters of the request.", "default": null}, {"name": "redacted_request_headers", "type": "dict[str, str]", "description": "caution, as these will be logged as they are.", "default": null}, {"name": "payload", "type": "dict[str, Any] | None", "description": "The payload sent with the request, if any.", "default": null}], "returns": [{"type": "None"}]}} +{"id": "astrapy.request_tools.log_httpx_response", "text": "Log the details of an httpx.Response.", "metadata": {"kind": "function", "name": "log_httpx_response", "path": "astrapy.request_tools.log_httpx_response", "parameters": [{"name": "response", "type": "httpx.Response", "description": "the httpx.Response object to log.", "default": null}], "returns": [{"type": "None"}], "gathered_types": ["Response"]}} +{"id": "astrapy.request_tools.HttpMethod", "text": "", "metadata": {"kind": "class", "name": "HttpMethod", "path": "astrapy.request_tools.HttpMethod"}} +{"id": "astrapy.request_tools.HttpMethod.GET", "text": "", "metadata": {"kind": "attribute", "name": "GET", "path": "astrapy.request_tools.HttpMethod.GET", "value": "GET = 'GET'"}} +{"id": "astrapy.request_tools.HttpMethod.POST", "text": "", "metadata": {"kind": "attribute", "name": "POST", "path": "astrapy.request_tools.HttpMethod.POST", "value": "POST = 'POST'"}} +{"id": "astrapy.request_tools.HttpMethod.PUT", "text": "", "metadata": {"kind": "attribute", "name": "PUT", "path": "astrapy.request_tools.HttpMethod.PUT", "value": "PUT = 'PUT'"}} +{"id": "astrapy.request_tools.HttpMethod.PATCH", "text": "", "metadata": {"kind": "attribute", "name": "PATCH", "path": "astrapy.request_tools.HttpMethod.PATCH", "value": "PATCH = 'PATCH'"}} +{"id": "astrapy.request_tools.HttpMethod.DELETE", "text": "", "metadata": {"kind": "attribute", "name": "DELETE", "path": "astrapy.request_tools.HttpMethod.DELETE", "value": "DELETE = 'DELETE'"}} +{"id": "astrapy.request_tools.TimeoutInfo", "text": "", "metadata": {"kind": "class", "name": "TimeoutInfo", "path": "astrapy.request_tools.TimeoutInfo", "bases": ["TypedDict"]}} +{"id": "astrapy.request_tools.TimeoutInfo.read", "text": "", "metadata": {"kind": "attribute", "name": "read", "path": "astrapy.request_tools.TimeoutInfo.read", "value": "read: float"}} +{"id": "astrapy.request_tools.TimeoutInfo.write", "text": "", "metadata": {"kind": "attribute", "name": "write", "path": "astrapy.request_tools.TimeoutInfo.write", "value": "write: float"}} +{"id": "astrapy.request_tools.TimeoutInfo.base", "text": "", "metadata": {"kind": "attribute", "name": "base", "path": "astrapy.request_tools.TimeoutInfo.base", "value": "base: float"}} +{"id": "astrapy.request_tools.TimeoutInfoWideType", "text": "", "metadata": {"kind": "attribute", "name": "TimeoutInfoWideType", "path": "astrapy.request_tools.TimeoutInfoWideType", "value": "TimeoutInfoWideType = Union[TimeoutInfo, float, None]", "gathered_types": ["astrapy.request_tools.TimeoutInfo"]}} +{"id": "astrapy.request_tools.to_httpx_timeout", "text": "", "metadata": {"kind": "function", "name": "to_httpx_timeout", "path": "astrapy.request_tools.to_httpx_timeout", "parameters": [{"name": "timeout_info", "type": "TimeoutInfoWideType"}], "returns": [{"type": "httpx.Timeout | None"}], "gathered_types": ["astrapy.core.utils.TimeoutInfoWideType", "Timeout"]}} +{"id": "astrapy.meta", "text": "", "metadata": {"kind": "module", "name": "meta", "path": "astrapy.meta", "imports": {"annotations": "__future__.annotations", "warnings": "warnings", "Any": "typing.Any", "Sequence": "typing.Sequence", "DeprecatedWarning": "deprecation.DeprecatedWarning", "CallerType": "astrapy.constants.CallerType", "CALLER_NAME_VERSION_DEPRECATION_NOTICE_DETAILS": "astrapy.defaults.CALLER_NAME_VERSION_DEPRECATION_NOTICE_DETAILS", "NAMESPACE_DEPRECATION_NOTICE_NS_DETAILS": "astrapy.defaults.NAMESPACE_DEPRECATION_NOTICE_NS_DETAILS", "NAMESPACE_DEPRECATION_NOTICE_NS_SUBJECT": "astrapy.defaults.NAMESPACE_DEPRECATION_NOTICE_NS_SUBJECT", "NAMESPACE_DEPRECATION_NOTICE_UPDATEDBNS_DETAILS": "astrapy.defaults.NAMESPACE_DEPRECATION_NOTICE_UPDATEDBNS_DETAILS", "NAMESPACE_DEPRECATION_NOTICE_UPDATEDBNS_SUBJECT": "astrapy.defaults.NAMESPACE_DEPRECATION_NOTICE_UPDATEDBNS_SUBJECT"}, "properties": {"is_init_module": false, "is_package": false, "is_subpackage": false, "is_namespace_package": false, "is_namespace_subpackage": false}}} +{"id": "astrapy.meta.check_deprecated_vector_ize", "text": "", "metadata": {"kind": "function", "name": "check_deprecated_vector_ize", "path": "astrapy.meta.check_deprecated_vector_ize", "parameters": [{"name": "vector", "type": "Any"}, {"name": "vectors", "type": "Any"}, {"name": "vectorize", "type": "Any"}, {"name": "kind", "type": "str"}], "returns": [{"type": "None"}]}} +{"id": "astrapy.meta.check_caller_parameters", "text": "", "metadata": {"kind": "function", "name": "check_caller_parameters", "path": "astrapy.meta.check_caller_parameters", "parameters": [{"name": "callers", "type": "Sequence[CallerType]"}, {"name": "caller_name", "type": "str | None"}, {"name": "caller_version", "type": "str | None"}], "returns": [{"type": "Sequence[CallerType]"}], "gathered_types": ["astrapy.constants.CallerType"]}} +{"id": "astrapy.meta.check_deprecated_id_region", "text": "", "metadata": {"kind": "function", "name": "check_deprecated_id_region", "path": "astrapy.meta.check_deprecated_id_region", "parameters": [{"name": "id", "type": "str | None"}, {"name": "region", "type": "str | None"}], "returns": [{"type": "None"}]}} +{"id": "astrapy.meta.check_namespace_keyspace", "text": "", "metadata": {"kind": "function", "name": "check_namespace_keyspace", "path": "astrapy.meta.check_namespace_keyspace", "parameters": [{"name": "keyspace", "type": "str | None"}, {"name": "namespace", "type": "str | None"}], "returns": [{"type": "str | None"}]}} +{"id": "astrapy.meta.check_update_db_namespace_keyspace", "text": "", "metadata": {"kind": "function", "name": "check_update_db_namespace_keyspace", "path": "astrapy.meta.check_update_db_namespace_keyspace", "parameters": [{"name": "update_db_keyspace", "type": "bool | None"}, {"name": "update_db_namespace", "type": "bool | None"}], "returns": [{"type": "bool | None"}]}} +{"id": "astrapy.core", "text": "", "metadata": {"kind": "module", "name": "core", "path": "astrapy.core", "imports": {"annotations": "__future__.annotations", "inspect": "inspect", "warnings": "warnings", "DeprecatedWarning": "deprecation.DeprecatedWarning"}, "properties": {"is_init_module": true, "is_package": false, "is_subpackage": true, "is_namespace_package": false, "is_namespace_subpackage": false}}} +{"id": "astrapy.core.DEPRECATED_MODULE_PATHS", "text": "", "metadata": {"kind": "attribute", "name": "DEPRECATED_MODULE_PATHS", "path": "astrapy.core.DEPRECATED_MODULE_PATHS", "value": "DEPRECATED_MODULE_PATHS = {'astrapy.api', 'astrapy.core', 'astrapy.db', 'astrapy.ops'}"}} +{"id": "astrapy.core.issue_deprecation_warning", "text": "Issue a DeprecatedWarning (subclass of DeprecationWarning).\n\nIn order to correctly refer to user code, this function needs to\ndynamically adjust the `stacklevel` parameter.", "metadata": {"kind": "function", "name": "issue_deprecation_warning", "path": "astrapy.core.issue_deprecation_warning", "returns": [{"type": "None"}]}} +{"id": "astrapy.core.db", "text": "", "metadata": {"kind": "module", "name": "db", "path": "astrapy.core.db", "imports": {"annotations": "__future__.annotations", "asyncio": "asyncio", "json": "json", "logging": "logging", "queue": "queue", "threading": "threading", "weakref": "weakref", "AsyncGenerator": "collections.abc.AsyncGenerator", "AsyncIterator": "collections.abc.AsyncIterator", "ThreadPoolExecutor": "concurrent.futures.ThreadPoolExecutor", "partial": "functools.partial", "TracebackType": "types.TracebackType", "Any": "typing.Any", "Callable": "typing.Callable", "Iterator": "typing.Iterator", "List": "typing.List", "Union": "typing.Union", "cast": "typing.cast", "httpx": "httpx", "APIRequestError": "astrapy.core.api.APIRequestError", "api_request": "astrapy.core.api.api_request", "async_api_request": "astrapy.core.api.async_api_request", "API_DOC": "astrapy.core.core_types.API_DOC", "API_RESPONSE": "astrapy.core.core_types.API_RESPONSE", "AsyncPaginableRequestMethod": "astrapy.core.core_types.AsyncPaginableRequestMethod", "PaginableRequestMethod": "astrapy.core.core_types.PaginableRequestMethod", "DEFAULT_AUTH_HEADER": "astrapy.core.defaults.DEFAULT_AUTH_HEADER", "DEFAULT_INSERT_NUM_DOCUMENTS": "astrapy.core.defaults.DEFAULT_INSERT_NUM_DOCUMENTS", "DEFAULT_JSON_API_PATH": "astrapy.core.defaults.DEFAULT_JSON_API_PATH", "DEFAULT_JSON_API_VERSION": "astrapy.core.defaults.DEFAULT_JSON_API_VERSION", "DEFAULT_KEYSPACE_NAME": "astrapy.core.defaults.DEFAULT_KEYSPACE_NAME", "TimeoutInfoWideType": "astrapy.core.utils.TimeoutInfoWideType", "convert_vector_to_floats": "astrapy.core.utils.convert_vector_to_floats", "http_methods": "astrapy.core.utils.http_methods", "make_payload": "astrapy.core.utils.make_payload", "normalize_for_api": "astrapy.core.utils.normalize_for_api", "restore_from_api": "astrapy.core.utils.restore_from_api", "to_httpx_timeout": "astrapy.core.utils.to_httpx_timeout"}, "properties": {"is_init_module": false, "is_package": false, "is_subpackage": false, "is_namespace_package": false, "is_namespace_subpackage": false}}} +{"id": "astrapy.core.db.logger", "text": "", "metadata": {"kind": "attribute", "name": "logger", "path": "astrapy.core.db.logger", "value": "logger = logging.getLogger(__name__)", "gathered_types": ["__name__"]}} +{"id": "astrapy.core.db._PrefetchIterator.queue", "text": "", "metadata": {"kind": "attribute", "name": "queue", "path": "astrapy.core.db._PrefetchIterator.queue", "value": "queue: queue.Queue[API_DOC | None] = queue.Queue(prefetched)", "gathered_types": ["Queue", "astrapy.core.core_types.API_DOC"]}} +{"id": "astrapy.core.db._PrefetchIterator.request_method", "text": "", "metadata": {"kind": "attribute", "name": "request_method", "path": "astrapy.core.db._PrefetchIterator.request_method", "value": "request_method = request_method"}} +{"id": "astrapy.core.db._PrefetchIterator.options", "text": "", "metadata": {"kind": "attribute", "name": "options", "path": "astrapy.core.db._PrefetchIterator.options", "value": "options = options"}} +{"id": "astrapy.core.db._PrefetchIterator.raw_response_callback", "text": "", "metadata": {"kind": "attribute", "name": "raw_response_callback", "path": "astrapy.core.db._PrefetchIterator.raw_response_callback", "value": "raw_response_callback = raw_response_callback"}} +{"id": "astrapy.core.db._PrefetchIterator.initialised", "text": "", "metadata": {"kind": "attribute", "name": "initialised", "path": "astrapy.core.db._PrefetchIterator.initialised", "value": "initialised = threading.Event()", "gathered_types": ["Event"]}} +{"id": "astrapy.core.db._PrefetchIterator.stop", "text": "", "metadata": {"kind": "attribute", "name": "stop", "path": "astrapy.core.db._PrefetchIterator.stop", "value": "stop = threading.Event()", "gathered_types": ["Event"]}} +{"id": "astrapy.core.db._PrefetchIterator.thread", "text": "", "metadata": {"kind": "attribute", "name": "thread", "path": "astrapy.core.db._PrefetchIterator.thread", "value": "thread = threading.Thread(target=_PrefetchIterator.queued_paginate, args=(weakref.proxy(self),))", "gathered_types": ["Thread", "_PrefetchIterator"]}} +{"id": "astrapy.core.db._PrefetchIterator.queue_put", "text": "", "metadata": {"kind": "function", "name": "queue_put", "path": "astrapy.core.db._PrefetchIterator.queue_put", "parameters": [{"name": "q", "type": "queue.Queue[API_DOC | None]"}, {"name": "item", "type": "API_DOC | None"}, {"name": "stop", "type": "threading.Event"}], "returns": [{"type": "None"}], "gathered_types": ["Queue", "Event", "astrapy.core.core_types.API_DOC"]}} +{"id": "astrapy.core.db._PrefetchIterator.queued_paginate", "text": "", "metadata": {"kind": "function", "name": "queued_paginate", "path": "astrapy.core.db._PrefetchIterator.queued_paginate", "returns": [{"type": "None"}]}} +{"id": "astrapy.core.db.AstraDBCollection", "text": "", "metadata": {"kind": "class", "name": "AstraDBCollection", "path": "astrapy.core.db.AstraDBCollection", "parameters": [{"name": "collection_name", "type": "str"}, {"name": "astra_db", "default": "None", "type": "AstraDB | None"}, {"name": "token", "default": "None", "type": "str | None"}, {"name": "api_endpoint", "default": "None", "type": "str | None"}, {"name": "namespace", "default": "None", "type": "str | None"}, {"name": "caller_name", "default": "None", "type": "str | None"}, {"name": "caller_version", "default": "None", "type": "str | None"}, {"name": "additional_headers", "default": "{}", "type": "dict[str, str]"}], "gathered_types": ["astrapy.core.db.AstraDB"]}} +{"id": "astrapy.core.db.AstraDBCollection.client", "text": "", "metadata": {"kind": "attribute", "name": "client", "path": "astrapy.core.db.AstraDBCollection.client", "value": "client = httpx.Client()", "gathered_types": ["Client"]}} +{"id": "astrapy.core.db.AstraDBCollection.astra_db", "text": "", "metadata": {"kind": "attribute", "name": "astra_db", "path": "astrapy.core.db.AstraDBCollection.astra_db", "value": "astra_db = astra_db"}} +{"id": "astrapy.core.db.AstraDBCollection.caller_name", "text": "", "metadata": {"kind": "attribute", "name": "caller_name", "path": "astrapy.core.db.AstraDBCollection.caller_name", "value": "caller_name: str | None = self.astra_db.caller_name"}} +{"id": "astrapy.core.db.AstraDBCollection.caller_version", "text": "", "metadata": {"kind": "attribute", "name": "caller_version", "path": "astrapy.core.db.AstraDBCollection.caller_version", "value": "caller_version: str | None = self.astra_db.caller_version"}} +{"id": "astrapy.core.db.AstraDBCollection.additional_headers", "text": "", "metadata": {"kind": "attribute", "name": "additional_headers", "path": "astrapy.core.db.AstraDBCollection.additional_headers", "value": "additional_headers = additional_headers"}} +{"id": "astrapy.core.db.AstraDBCollection.collection_name", "text": "", "metadata": {"kind": "attribute", "name": "collection_name", "path": "astrapy.core.db.AstraDBCollection.collection_name", "value": "collection_name = collection_name"}} +{"id": "astrapy.core.db.AstraDBCollection.base_path", "text": "", "metadata": {"kind": "attribute", "name": "base_path", "path": "astrapy.core.db.AstraDBCollection.base_path", "value": "base_path: str = f'{self.astra_db.base_path}/{self.collection_name}'"}} +{"id": "astrapy.core.db.AstraDBCollection.copy", "text": "", "metadata": {"kind": "function", "name": "copy", "path": "astrapy.core.db.AstraDBCollection.copy", "parameters": [{"name": "collection_name", "default": "None", "type": "str | None"}, {"name": "token", "default": "None", "type": "str | None"}, {"name": "api_endpoint", "default": "None", "type": "str | None"}, {"name": "api_path", "default": "None", "type": "str | None"}, {"name": "api_version", "default": "None", "type": "str | None"}, {"name": "namespace", "default": "None", "type": "str | None"}, {"name": "caller_name", "default": "None", "type": "str | None"}, {"name": "caller_version", "default": "None", "type": "str | None"}, {"name": "additional_headers", "default": "None", "type": "dict[str, str] | None"}], "returns": [{"type": "AstraDBCollection"}], "gathered_types": ["astrapy.core.db.AstraDBCollection"]}} +{"id": "astrapy.core.db.AstraDBCollection.to_async", "text": "", "metadata": {"kind": "function", "name": "to_async", "path": "astrapy.core.db.AstraDBCollection.to_async", "returns": [{"type": "AsyncAstraDBCollection"}], "gathered_types": ["astrapy.core.db.AsyncAstraDBCollection"]}} +{"id": "astrapy.core.db.AstraDBCollection.set_caller", "text": "", "metadata": {"kind": "function", "name": "set_caller", "path": "astrapy.core.db.AstraDBCollection.set_caller", "parameters": [{"name": "caller_name", "default": "None", "type": "str | None"}, {"name": "caller_version", "default": "None", "type": "str | None"}], "returns": [{"type": "None"}]}} +{"id": "astrapy.core.db.AstraDBCollection.post_raw_request", "text": "", "metadata": {"kind": "function", "name": "post_raw_request", "path": "astrapy.core.db.AstraDBCollection.post_raw_request", "parameters": [{"name": "body", "type": "dict[str, Any]"}, {"name": "timeout_info", "default": "None", "type": "TimeoutInfoWideType"}], "returns": [{"type": "API_RESPONSE"}], "gathered_types": ["astrapy.core.utils.TimeoutInfoWideType", "astrapy.core.core_types.API_RESPONSE"]}} +{"id": "astrapy.core.db.AstraDBCollection.get", "text": "Retrieve a document from the collection by its path.", "metadata": {"kind": "function", "name": "get", "path": "astrapy.core.db.AstraDBCollection.get", "parameters": [{"name": "path", "type": "str", "description": "The path of the document to retrieve.", "value": "None", "default": "None"}, {"name": "timeout_info", "type": "TimeoutInfoWideType", "description": "a float, or a TimeoutInfo dict, for the HTTP request.\nNote that a 'read' timeout event will not block the action taken\nby the API server if it has received the request already.", "value": "None", "default": "None"}], "returns": [{"name": "dict", "type": "API_RESPONSE | None", "description": "The retrieved document."}], "gathered_types": ["astrapy.core.utils.TimeoutInfoWideType", "astrapy.core.core_types.API_RESPONSE"]}} +{"id": "astrapy.core.db.AstraDBCollection.find", "text": "Find documents in the collection that match the given filter.", "metadata": {"kind": "function", "name": "find", "path": "astrapy.core.db.AstraDBCollection.find", "parameters": [{"name": "filter", "type": "dict", "description": "Criteria to filter documents.", "value": "None", "default": "None"}, {"name": "projection", "type": "dict", "description": "Specifies the fields to return.", "value": "None", "default": "None"}, {"name": "sort", "type": "dict", "description": "Specifies the order in which to return matching documents.", "value": "None", "default": "None"}, {"name": "options", "type": "dict", "description": "Additional options for the query.", "value": "None", "default": "None"}, {"name": "timeout_info", "type": "TimeoutInfoWideType", "description": "a float, or a TimeoutInfo dict, for the HTTP request.\nNote that a 'read' timeout event will not block the action taken\nby the API server if it has received the request already.", "value": "None", "default": "None"}], "returns": [{"name": "dict", "type": "API_RESPONSE", "description": "The query response containing matched documents."}], "gathered_types": ["astrapy.core.utils.TimeoutInfoWideType", "astrapy.core.core_types.API_RESPONSE"]}} +{"id": "astrapy.core.db.AstraDBCollection.vector_find", "text": "Perform a vector-based search in the collection.", "metadata": {"kind": "function", "name": "vector_find", "path": "astrapy.core.db.AstraDBCollection.vector_find", "parameters": [{"name": "vector", "type": "list", "description": "The vector to search with.", "default": null}, {"name": "limit", "type": "int", "description": "The maximum number of documents to return.", "default": null}, {"name": "filter", "type": "dict", "description": "Criteria to filter documents.", "value": "None", "default": "None"}, {"name": "fields", "type": "list", "description": "Specifies the fields to return.", "value": "None", "default": "None"}, {"name": "include_similarity", "type": "bool", "description": "Whether to include similarity score in the result.", "value": "True", "default": "True"}, {"name": "timeout_info", "type": "TimeoutInfoWideType", "description": "a float, or a TimeoutInfo dict, for the HTTP request.\nNote that a 'read' timeout event will not block the action taken\nby the API server if it has received the request already.", "value": "None", "default": "None"}], "returns": [{"name": "list", "type": "list[API_DOC]", "description": "A list of documents matching the vector search criteria."}], "gathered_types": ["astrapy.core.utils.TimeoutInfoWideType", "astrapy.core.core_types.API_DOC"]}} +{"id": "astrapy.core.db.AstraDBCollection.paginate", "text": "Generate paginated results for a given database query method.", "metadata": {"kind": "function", "name": "paginate", "path": "astrapy.core.db.AstraDBCollection.paginate", "parameters": [{"name": "request_method", "type": "function", "description": "The database query method to paginate.", "default": null}, {"name": "options", "type": "dict", "description": "Options for the database query.", "default": null}, {"name": "prefetched", "type": "int", "description": "Number of pre-fetched documents.", "default": null}, {"name": "raw_response_callback", "type": "Callable[[dict[str, Any]], None] | None", "description": "an optional callback invoked at each new\nresponse coming from the API. The only argument is the raw\nAPI response and the callback must return None.", "value": "None", "default": "None"}], "returns": [{"type": "Iterator[API_DOC]"}], "yields": [{"name": "dict", "type": "API_DOC", "description": "The next document in the paginated result set."}], "gathered_types": ["astrapy.core.core_types.PaginableRequestMethod", "astrapy.core.core_types.API_DOC"]}} +{"id": "astrapy.core.db.AstraDBCollection.paginated_find", "text": "Perform a paginated search in the collection.", "metadata": {"kind": "function", "name": "paginated_find", "path": "astrapy.core.db.AstraDBCollection.paginated_find", "parameters": [{"name": "filter", "type": "dict", "description": "Criteria to filter documents.", "value": "None", "default": "None"}, {"name": "projection", "type": "dict", "description": "Specifies the fields to return.", "value": "None", "default": "None"}, {"name": "sort", "type": "dict", "description": "Specifies the order in which to return matching documents.", "value": "None", "default": "None"}, {"name": "options", "type": "dict", "description": "Additional options for the query.", "value": "None", "default": "None"}, {"name": "prefetched", "type": "int", "description": "Number of pre-fetched documents.", "value": "None", "default": "None"}, {"name": "timeout_info", "type": "TimeoutInfoWideType", "description": "a float, or a TimeoutInfo dict, for each\nsingle HTTP request.\nThis is a paginated method, that issues several requests as it\nneeds more data. This parameter controls a single request.\nNote that a 'read' timeout event will not block the action taken\nby the API server if it has received the request already.", "value": "None", "default": "None"}, {"name": "raw_response_callback", "type": "Callable[[dict[str, Any]], None] | None", "description": "an optional callback invoked at each new\nresponse coming from the API. The only argument is the raw\nAPI response and the callback must return None.", "value": "None", "default": "None"}], "returns": [{"name": "generator", "type": "Iterator[API_DOC]", "description": "A generator yielding documents in the paginated result set."}], "gathered_types": ["astrapy.core.utils.TimeoutInfoWideType", "astrapy.core.core_types.API_DOC"]}} +{"id": "astrapy.core.db.AstraDBCollection.pop", "text": "Pop the last data in the tags array", "metadata": {"kind": "function", "name": "pop", "path": "astrapy.core.db.AstraDBCollection.pop", "parameters": [{"name": "filter", "type": "dict", "description": "Criteria to identify the document to update.", "default": null}, {"name": "pop", "type": "dict", "description": "The pop to apply to the tags.", "default": null}, {"name": "options", "type": "dict", "description": "Additional options for the update operation.", "default": null}, {"name": "timeout_info", "type": "TimeoutInfoWideType", "description": "a float, or a TimeoutInfo dict, for the HTTP request.\nNote that a 'read' timeout event will not block the action taken\nby the API server if it has received the request already.", "value": "None", "default": "None"}], "returns": [{"name": "dict", "type": "API_RESPONSE", "description": "The original document before the update."}], "gathered_types": ["astrapy.core.utils.TimeoutInfoWideType", "astrapy.core.core_types.API_RESPONSE"]}} +{"id": "astrapy.core.db.AstraDBCollection.push", "text": "Push new data to the tags array", "metadata": {"kind": "function", "name": "push", "path": "astrapy.core.db.AstraDBCollection.push", "parameters": [{"name": "filter", "type": "dict", "description": "Criteria to identify the document to update.", "default": null}, {"name": "push", "type": "dict", "description": "The push to apply to the tags.", "default": null}, {"name": "options", "type": "dict", "description": "Additional options for the update operation.", "default": null}, {"name": "timeout_info", "type": "TimeoutInfoWideType", "description": "a float, or a TimeoutInfo dict, for the HTTP request.\nNote that a 'read' timeout event will not block the action taken\nby the API server if it has received the request already.", "value": "None", "default": "None"}], "returns": [{"name": "dict", "type": "API_RESPONSE", "description": "The result of the update operation."}], "gathered_types": ["astrapy.core.utils.TimeoutInfoWideType", "astrapy.core.core_types.API_RESPONSE"]}} +{"id": "astrapy.core.db.AstraDBCollection.find_one_and_replace", "text": "Find a single document and replace it.", "metadata": {"kind": "function", "name": "find_one_and_replace", "path": "astrapy.core.db.AstraDBCollection.find_one_and_replace", "parameters": [{"name": "replacement", "type": "dict", "description": "The new document to replace the existing one.", "default": null}, {"name": "filter", "type": "dict", "description": "Criteria to filter documents.", "value": "None", "default": "None"}, {"name": "sort", "type": "dict", "description": "Specifies the order in which to find the document.", "value": "None", "default": "None"}, {"name": "options", "type": "dict", "description": "Additional options for the operation.", "value": "None", "default": "None"}, {"name": "timeout_info", "type": "TimeoutInfoWideType", "description": "a float, or a TimeoutInfo dict, for the HTTP request.\nNote that a 'read' timeout event will not block the action taken\nby the API server if it has received the request already.", "value": "None", "default": "None"}], "returns": [{"name": "dict", "type": "API_RESPONSE", "description": "The result of the find and replace operation."}], "gathered_types": ["astrapy.core.utils.TimeoutInfoWideType", "astrapy.core.core_types.API_RESPONSE"]}} +{"id": "astrapy.core.db.AstraDBCollection.vector_find_one_and_replace", "text": "Perform a vector-based search and replace the first matched document.", "metadata": {"kind": "function", "name": "vector_find_one_and_replace", "path": "astrapy.core.db.AstraDBCollection.vector_find_one_and_replace", "parameters": [{"name": "vector", "type": "dict", "description": "The vector to search with.", "default": null}, {"name": "replacement", "type": "dict", "description": "The new document to replace the existing one.", "default": null}, {"name": "filter", "type": "dict", "description": "Criteria to filter documents.", "value": "None", "default": "None"}, {"name": "fields", "type": "list", "description": "Specifies the fields to return in the result.", "value": "None", "default": "None"}, {"name": "timeout_info", "type": "TimeoutInfoWideType", "description": "a float, or a TimeoutInfo dict, for the HTTP request.\nNote that a 'read' timeout event will not block the action taken\nby the API server if it has received the request already.", "value": "None", "default": "None"}], "returns": [{"type": "API_DOC | None", "description": "dict or None: either the matched document or None if nothing found"}], "gathered_types": ["astrapy.core.utils.TimeoutInfoWideType", "astrapy.core.core_types.API_DOC"]}} +{"id": "astrapy.core.db.AstraDBCollection.find_one_and_update", "text": "Find a single document and update it.", "metadata": {"kind": "function", "name": "find_one_and_update", "path": "astrapy.core.db.AstraDBCollection.find_one_and_update", "parameters": [{"name": "update", "type": "dict", "description": "The update to apply to the document.", "default": null}, {"name": "sort", "type": "dict", "description": "Specifies the order in which to find the document.", "value": "{}", "default": "{}"}, {"name": "filter", "type": "dict", "description": "Criteria to filter documents.", "value": "None", "default": "None"}, {"name": "options", "type": "dict", "description": "Additional options for the operation.", "value": "None", "default": "None"}, {"name": "timeout_info", "type": "TimeoutInfoWideType", "description": "a float, or a TimeoutInfo dict, for the HTTP request.\nNote that a 'read' timeout event will not block the action taken\nby the API server if it has received the request already.", "value": "None", "default": "None"}], "returns": [{"name": "dict", "type": "API_RESPONSE", "description": "The result of the find and update operation."}], "gathered_types": ["astrapy.core.utils.TimeoutInfoWideType", "astrapy.core.core_types.API_RESPONSE"]}} +{"id": "astrapy.core.db.AstraDBCollection.vector_find_one_and_update", "text": "Perform a vector-based search and update the first matched document.", "metadata": {"kind": "function", "name": "vector_find_one_and_update", "path": "astrapy.core.db.AstraDBCollection.vector_find_one_and_update", "parameters": [{"name": "vector", "type": "list", "description": "The vector to search with.", "default": null}, {"name": "update", "type": "dict", "description": "The update to apply to the matched document.", "default": null}, {"name": "filter", "type": "dict", "description": "Criteria to filter documents before applying the vector search.", "value": "None", "default": "None"}, {"name": "fields", "type": "list", "description": "Specifies the fields to return in the updated document.", "value": "None", "default": "None"}, {"name": "timeout_info", "type": "TimeoutInfoWideType", "description": "a float, or a TimeoutInfo dict, for the HTTP request.\nNote that a 'read' timeout event will not block the action taken\nby the API server if it has received the request already.", "value": "None", "default": "None"}], "returns": [{"type": "API_DOC | None", "description": "dict or None: The result of the vector-based find and\nupdate operation, or None if nothing found"}], "gathered_types": ["astrapy.core.utils.TimeoutInfoWideType", "astrapy.core.core_types.API_DOC"]}} +{"id": "astrapy.core.db.AstraDBCollection.find_one_and_delete", "text": "Find a single document and delete it.", "metadata": {"kind": "function", "name": "find_one_and_delete", "path": "astrapy.core.db.AstraDBCollection.find_one_and_delete", "parameters": [{"name": "sort", "type": "dict", "description": "Specifies the order in which to find the document.", "value": "{}", "default": "{}"}, {"name": "filter", "type": "dict", "description": "Criteria to filter documents.", "value": "None", "default": "None"}, {"name": "projection", "type": "dict", "description": "Specifies the fields to return.", "value": "None", "default": "None"}, {"name": "timeout_info", "type": "TimeoutInfoWideType", "description": "a float, or a TimeoutInfo dict, for the HTTP request.\nNote that a 'read' timeout event will not block the action taken\nby the API server if it has received the request already.", "value": "None", "default": "None"}], "returns": [{"name": "dict", "type": "API_RESPONSE", "description": "The result of the find and delete operation."}], "gathered_types": ["astrapy.core.utils.TimeoutInfoWideType", "astrapy.core.core_types.API_RESPONSE"]}} +{"id": "astrapy.core.db.AstraDBCollection.count_documents", "text": "Count documents matching a given predicate (expressed as filter).", "metadata": {"kind": "function", "name": "count_documents", "path": "astrapy.core.db.AstraDBCollection.count_documents", "parameters": [{"name": "filter", "type": "dict, defaults to {}", "description": "Criteria to filter documents.", "value": "{}", "default": "{}"}, {"name": "timeout_info", "type": "TimeoutInfoWideType", "description": "a float, or a TimeoutInfo dict, for the HTTP request.\nNote that a 'read' timeout event will not block the action taken\nby the API server if it has received the request already.", "value": "None", "default": "None"}], "returns": [{"name": "dict", "type": "API_RESPONSE", "description": "the response, either\n{\"status\": {\"count\": <NUMBER> }}"}, {"type": "API_RESPONSE", "description": "or\n{\"errors\": [...]}"}], "gathered_types": ["astrapy.core.utils.TimeoutInfoWideType", "astrapy.core.core_types.API_RESPONSE"]}} +{"id": "astrapy.core.db.AstraDBCollection.find_one", "text": "Find a single document in the collection.", "metadata": {"kind": "function", "name": "find_one", "path": "astrapy.core.db.AstraDBCollection.find_one", "parameters": [{"name": "filter", "type": "dict", "description": "Criteria to filter documents.", "value": "{}", "default": "{}"}, {"name": "projection", "type": "dict", "description": "Specifies the fields to return.", "value": "{}", "default": "{}"}, {"name": "sort", "type": "dict", "description": "Specifies the order in which to return the document.", "value": "{}", "default": "{}"}, {"name": "options", "type": "dict", "description": "Additional options for the query.", "value": "{}", "default": "{}"}, {"name": "timeout_info", "type": "TimeoutInfoWideType", "description": "a float, or a TimeoutInfo dict, for the HTTP request.\nNote that a 'read' timeout event will not block the action taken\nby the API server if it has received the request already.", "value": "None", "default": "None"}], "returns": [{"name": "dict", "type": "API_RESPONSE", "description": "the response, either\n{\"data\": {\"document\": <DOCUMENT> }}"}, {"type": "API_RESPONSE", "description": "or\n{\"data\": {\"document\": None}}"}, {"type": "API_RESPONSE", "description": "depending on whether a matching document is found or not."}], "gathered_types": ["astrapy.core.utils.TimeoutInfoWideType", "astrapy.core.core_types.API_RESPONSE"]}} +{"id": "astrapy.core.db.AstraDBCollection.vector_find_one", "text": "Perform a vector-based search to find a single document in the collection.", "metadata": {"kind": "function", "name": "vector_find_one", "path": "astrapy.core.db.AstraDBCollection.vector_find_one", "parameters": [{"name": "vector", "type": "list", "description": "The vector to search with.", "default": null}, {"name": "filter", "type": "dict", "description": "Additional criteria to filter documents.", "value": "None", "default": "None"}, {"name": "fields", "type": "list", "description": "Specifies the fields to return in the result.", "value": "None", "default": "None"}, {"name": "include_similarity", "type": "bool", "description": "Whether to include similarity score in the result.", "value": "True", "default": "True"}, {"name": "timeout_info", "type": "TimeoutInfoWideType", "description": "a float, or a TimeoutInfo dict, for the HTTP request.\nNote that a 'read' timeout event will not block the action taken\nby the API server if it has received the request already.", "value": "None", "default": "None"}], "returns": [{"type": "API_DOC | None", "description": "dict or None: The found document or None if no matching document is found."}], "gathered_types": ["astrapy.core.utils.TimeoutInfoWideType", "astrapy.core.core_types.API_DOC"]}} +{"id": "astrapy.core.db.AstraDBCollection.insert_one", "text": "Insert a single document into the collection.", "metadata": {"kind": "function", "name": "insert_one", "path": "astrapy.core.db.AstraDBCollection.insert_one", "parameters": [{"name": "document", "type": "dict", "description": "The document to insert.", "default": null}, {"name": "failures_allowed", "type": "bool", "description": "Whether to allow failures in the insert operation.", "value": "False", "default": "False"}, {"name": "timeout_info", "type": "TimeoutInfoWideType", "description": "a float, or a TimeoutInfo dict, for the HTTP request.\nNote that a 'read' timeout event will not block the action taken\nby the API server if it has received the request already.", "value": "None", "default": "None"}], "returns": [{"name": "dict", "type": "API_RESPONSE", "description": "The response from the database after the insert operation."}], "gathered_types": ["astrapy.core.utils.TimeoutInfoWideType", "astrapy.core.core_types.API_DOC", "astrapy.core.core_types.API_RESPONSE"]}} +{"id": "astrapy.core.db.AstraDBCollection.insert_many", "text": "Insert multiple documents into the collection.", "metadata": {"kind": "function", "name": "insert_many", "path": "astrapy.core.db.AstraDBCollection.insert_many", "parameters": [{"name": "documents", "type": "list", "description": "A list of documents to insert.", "default": null}, {"name": "options", "type": "dict", "description": "Additional options for the insert operation.", "value": "None", "default": "None"}, {"name": "partial_failures_allowed", "type": "bool", "description": "Whether to allow partial\nfailures through the insertion (i.e. on some documents).", "value": "False", "default": "False"}, {"name": "timeout_info", "type": "TimeoutInfoWideType", "description": "a float, or a TimeoutInfo dict, for the HTTP request.\nNote that a 'read' timeout event will not block the action taken\nby the API server if it has received the request already.", "value": "None", "default": "None"}], "returns": [{"name": "dict", "type": "API_RESPONSE", "description": "The response from the database after the insert operation."}], "gathered_types": ["astrapy.core.utils.TimeoutInfoWideType", "astrapy.core.core_types.API_DOC", "astrapy.core.core_types.API_RESPONSE"]}} +{"id": "astrapy.core.db.AstraDBCollection.chunked_insert_many", "text": "Insert multiple documents into the collection, handling chunking and\noptionally with concurrent insertions.", "metadata": {"kind": "function", "name": "chunked_insert_many", "path": "astrapy.core.db.AstraDBCollection.chunked_insert_many", "parameters": [{"name": "documents", "type": "list", "description": "A list of documents to insert.", "default": null}, {"name": "options", "type": "dict", "description": "Additional options for the insert operation.", "value": "None", "default": "None"}, {"name": "partial_failures_allowed", "type": "bool", "description": "Whether to allow partial\nfailures in the chunk. Should be used combined with\noptions={\"ordered\": False} in most cases.", "value": "False", "default": "False"}, {"name": "chunk_size", "type": "int", "description": "Override the default insertion chunk size.", "value": "DEFAULT_INSERT_NUM_DOCUMENTS", "default": "DEFAULT_INSERT_NUM_DOCUMENTS"}, {"name": "concurrency", "type": "int", "description": "The number of concurrent chunk insertions.\nDefault is no concurrency.", "value": "1", "default": "1"}, {"name": "timeout_info", "type": "TimeoutInfoWideType", "description": "a float, or a TimeoutInfo dict, for each single HTTP request.\nThis method runs a number of HTTP requests as it works on chunked\ndata. The timeout refers to each individual such request.\nNote that a 'read' timeout event will not block the action taken\nby the API server if it has received the request already.", "value": "None", "default": "None"}], "returns": [{"name": "list", "type": "list[API_RESPONSE | Exception]", "description": "The responses from the database after the chunked insert operation.\nThis is a list of individual responses from the API: the caller\nwill need to inspect them all, e.g. to collate the inserted IDs."}], "gathered_types": ["astrapy.core.core_types.API_DOC", "astrapy.core.defaults.DEFAULT_INSERT_NUM_DOCUMENTS", "astrapy.core.core_types.API_RESPONSE", "astrapy.core.utils.TimeoutInfoWideType", "Exception"]}} +{"id": "astrapy.core.db.AstraDBCollection.update_one", "text": "Update a single document in the collection.", "metadata": {"kind": "function", "name": "update_one", "path": "astrapy.core.db.AstraDBCollection.update_one", "parameters": [{"name": "filter", "type": "dict", "description": "Criteria to identify the document to update.", "default": null}, {"name": "update", "type": "dict", "description": "The update to apply to the document.", "default": null}, {"name": "timeout_info", "type": "TimeoutInfoWideType", "description": "a float, or a TimeoutInfo dict, for the HTTP request.\nNote that a 'read' timeout event will not block the action taken\nby the API server if it has received the request already.", "value": "None", "default": "None"}, {"name": "options", "type": "dict", "description": "Additional options for the operation.", "value": "None", "default": "None"}], "returns": [{"name": "dict", "type": "API_RESPONSE", "description": "The response from the database after the update operation."}], "gathered_types": ["astrapy.core.utils.TimeoutInfoWideType", "astrapy.core.core_types.API_RESPONSE"]}} +{"id": "astrapy.core.db.AstraDBCollection.update_many", "text": "Updates multiple documents in the collection.", "metadata": {"kind": "function", "name": "update_many", "path": "astrapy.core.db.AstraDBCollection.update_many", "parameters": [{"name": "filter", "type": "dict", "description": "Criteria to identify the document to update.", "default": null}, {"name": "update", "type": "dict", "description": "The update to apply to the document.", "default": null}, {"name": "timeout_info", "type": "TimeoutInfoWideType", "description": "a float, or a TimeoutInfo dict, for the HTTP request.\nNote that a 'read' timeout event will not block the action taken\nby the API server if it has received the request already.", "value": "None", "default": "None"}], "returns": [{"name": "dict", "type": "API_RESPONSE", "description": "The response from the database after the update operation."}], "gathered_types": ["astrapy.core.utils.TimeoutInfoWideType", "astrapy.core.core_types.API_RESPONSE"]}} +{"id": "astrapy.core.db.AstraDBCollection.replace", "text": "Replace a document in the collection.", "metadata": {"kind": "function", "name": "replace", "path": "astrapy.core.db.AstraDBCollection.replace", "parameters": [{"name": "path", "type": "str", "description": "The path to the document to replace.", "default": null}, {"name": "document", "type": "dict", "description": "The new document to replace the existing one.", "default": null}, {"name": "timeout_info", "type": "TimeoutInfoWideType", "description": "a float, or a TimeoutInfo dict, for the HTTP request.\nNote that a 'read' timeout event will not block the action taken\nby the API server if it has received the request already.", "value": "None", "default": "None"}], "returns": [{"name": "dict", "type": "API_RESPONSE", "description": "The response from the database after the replace operation."}], "gathered_types": ["astrapy.core.utils.TimeoutInfoWideType", "astrapy.core.core_types.API_DOC", "astrapy.core.core_types.API_RESPONSE"]}} +{"id": "astrapy.core.db.AstraDBCollection.delete_one", "text": "Delete a single document from the collection based on its ID.", "metadata": {"kind": "function", "name": "delete_one", "path": "astrapy.core.db.AstraDBCollection.delete_one", "parameters": [{"name": "id", "type": "str", "description": "The ID of the document to delete.", "default": null}, {"name": "timeout_info", "type": "TimeoutInfoWideType", "description": "a float, or a TimeoutInfo dict, for the HTTP request.\nNote that a 'read' timeout event will not block the action taken\nby the API server if it has received the request already.", "value": "None", "default": "None"}], "returns": [{"name": "dict", "type": "API_RESPONSE", "description": "The response from the database after the delete operation."}], "gathered_types": ["astrapy.core.utils.TimeoutInfoWideType", "astrapy.core.core_types.API_RESPONSE"]}} +{"id": "astrapy.core.db.AstraDBCollection.delete_one_by_predicate", "text": "Delete a single document from the collection based on a filter clause", "metadata": {"kind": "function", "name": "delete_one_by_predicate", "path": "astrapy.core.db.AstraDBCollection.delete_one_by_predicate", "parameters": [{"name": "filter", "type": "dict[str, Any]", "description": "any filter dictionary", "default": null}, {"name": "timeout_info", "type": "TimeoutInfoWideType", "description": "a float, or a TimeoutInfo dict, for the HTTP request.\nNote that a 'read' timeout event will not block the action taken\nby the API server if it has received the request already.", "value": "None", "default": "None"}], "returns": [{"name": "dict", "type": "API_RESPONSE", "description": "The response from the database after the delete operation."}], "gathered_types": ["astrapy.core.utils.TimeoutInfoWideType", "astrapy.core.core_types.API_RESPONSE"]}} +{"id": "astrapy.core.db.AstraDBCollection.delete_many", "text": "Delete many documents from the collection based on a filter condition", "metadata": {"kind": "function", "name": "delete_many", "path": "astrapy.core.db.AstraDBCollection.delete_many", "parameters": [{"name": "filter", "type": "dict", "description": "Criteria to identify the documents to delete.", "default": null}, {"name": "skip_error_check", "type": "bool", "description": "whether to ignore the check for API error\nand return the response untouched. Default is False.", "value": "False", "default": "False"}, {"name": "timeout_info", "type": "TimeoutInfoWideType", "description": "a float, or a TimeoutInfo dict, for the HTTP request.\nNote that a 'read' timeout event will not block the action taken\nby the API server if it has received the request already.", "value": "None", "default": "None"}], "returns": [{"name": "dict", "type": "API_RESPONSE", "description": "The response from the database after the delete operation."}], "gathered_types": ["astrapy.core.utils.TimeoutInfoWideType", "astrapy.core.core_types.API_RESPONSE"]}} +{"id": "astrapy.core.db.AstraDBCollection.chunked_delete_many", "text": "Delete many documents from the collection based on a filter condition,\nchaining several API calls until exhaustion of the documents to delete.", "metadata": {"kind": "function", "name": "chunked_delete_many", "path": "astrapy.core.db.AstraDBCollection.chunked_delete_many", "parameters": [{"name": "filter", "type": "dict", "description": "Criteria to identify the documents to delete.", "default": null}, {"name": "timeout_info", "type": "TimeoutInfoWideType", "description": "a float, or a TimeoutInfo dict, for each single HTTP request.\nThis method runs a number of HTTP requests as it works on a\npagination basis. The timeout refers to each individual such request.\nNote that a 'read' timeout event will not block the action taken\nby the API server if it has received the request already.", "value": "None", "default": "None"}], "returns": [{"type": "list[API_RESPONSE]", "description": "List[dict]: The responses from the database from all the calls"}], "gathered_types": ["astrapy.core.utils.TimeoutInfoWideType", "astrapy.core.core_types.API_RESPONSE"]}} +{"id": "astrapy.core.db.AstraDBCollection.clear", "text": "Clear the collection, deleting all documents", "metadata": {"kind": "function", "name": "clear", "path": "astrapy.core.db.AstraDBCollection.clear", "parameters": [{"name": "timeout_info", "type": "TimeoutInfoWideType", "description": "a float, or a TimeoutInfo dict, for the HTTP request.\nNote that a 'read' timeout event will not block the action taken\nby the API server if it has received the request already.", "value": "None", "default": "None"}], "returns": [{"name": "dict", "type": "API_RESPONSE", "description": "The response from the database."}], "gathered_types": ["astrapy.core.utils.TimeoutInfoWideType", "astrapy.core.core_types.API_RESPONSE"]}} +{"id": "astrapy.core.db.AstraDBCollection.delete_subdocument", "text": "Delete a subdocument or field from a document in the collection.", "metadata": {"kind": "function", "name": "delete_subdocument", "path": "astrapy.core.db.AstraDBCollection.delete_subdocument", "parameters": [{"name": "id", "type": "str", "description": "The ID of the document containing the subdocument.", "default": null}, {"name": "subdoc", "type": "str", "description": "The key of the subdocument or field to remove.", "default": null}, {"name": "timeout_info", "type": "TimeoutInfoWideType", "description": "a float, or a TimeoutInfo dict, for the HTTP request.\nNote that a 'read' timeout event will not block the action taken\nby the API server if it has received the request already.", "value": "None", "default": "None"}], "returns": [{"name": "dict", "type": "API_RESPONSE", "description": "The response from the database after the update operation."}], "gathered_types": ["astrapy.core.utils.TimeoutInfoWideType", "astrapy.core.core_types.API_RESPONSE"]}} +{"id": "astrapy.core.db.AstraDBCollection.upsert_one", "text": "Emulate an upsert operation for a single document in the collection.\n\nThis method attempts to insert the document.\nIf a document with the same _id exists, it updates the existing document.", "metadata": {"kind": "function", "name": "upsert_one", "path": "astrapy.core.db.AstraDBCollection.upsert_one", "parameters": [{"name": "document", "type": "dict", "description": "The document to insert or update.", "default": null}, {"name": "timeout_info", "type": "TimeoutInfoWideType", "description": "a float, or a TimeoutInfo dict, for the HTTP requests.\nThis method may issue one or two requests, depending on what\nis detected on DB. This timeout controls each HTTP request individually.\nNote that a 'read' timeout event will not block the action taken\nby the API server if it has received the request already.", "value": "None", "default": "None"}], "returns": [{"name": "str", "type": "str", "description": "The _id of the inserted or updated document."}], "gathered_types": ["astrapy.core.utils.TimeoutInfoWideType", "astrapy.core.core_types.API_DOC"]}} +{"id": "astrapy.core.db.AstraDBCollection.upsert_many", "text": "Emulate an upsert operation for multiple documents in the collection.\n\nThis method attempts to insert the documents.\nIf a document with the same _id exists, it updates the existing document.", "metadata": {"kind": "function", "name": "upsert_many", "path": "astrapy.core.db.AstraDBCollection.upsert_many", "parameters": [{"name": "documents", "type": "List[dict]", "description": "The documents to insert or update.", "default": null}, {"name": "concurrency", "type": "int", "description": "The number of concurrent upserts.", "value": "1", "default": "1"}, {"name": "partial_failures_allowed", "type": "bool", "description": "Whether to allow partial\nfailures in the batch.", "value": "False", "default": "False"}, {"name": "timeout_info", "type": "TimeoutInfoWideType", "description": "a float, or a TimeoutInfo dict, for each HTTP request.\nThis method issues a separate HTTP request for each document to\ninsert: the timeout controls each such request individually.\nNote that a 'read' timeout event will not block the action taken\nby the API server if it has received the request already.", "value": "None", "default": "None"}], "returns": [{"type": "list[str | Exception]", "description": "List[Union[str, Exception]]: A list of \"_id\"s of the inserted or updated documents."}], "gathered_types": ["astrapy.core.utils.TimeoutInfoWideType", "astrapy.core.core_types.API_DOC", "Exception"]}} +{"id": "astrapy.core.db.AsyncAstraDBCollection", "text": "", "metadata": {"kind": "class", "name": "AsyncAstraDBCollection", "path": "astrapy.core.db.AsyncAstraDBCollection", "parameters": [{"name": "collection_name", "type": "str"}, {"name": "astra_db", "default": "None", "type": "AsyncAstraDB | None"}, {"name": "token", "default": "None", "type": "str | None"}, {"name": "api_endpoint", "default": "None", "type": "str | None"}, {"name": "namespace", "default": "None", "type": "str | None"}, {"name": "caller_name", "default": "None", "type": "str | None"}, {"name": "caller_version", "default": "None", "type": "str | None"}, {"name": "additional_headers", "default": "{}", "type": "dict[str, str]"}], "gathered_types": ["astrapy.core.db.AsyncAstraDB"]}} +{"id": "astrapy.core.db.AsyncAstraDBCollection.astra_db", "text": "", "metadata": {"kind": "attribute", "name": "astra_db", "path": "astrapy.core.db.AsyncAstraDBCollection.astra_db", "value": "astra_db: AsyncAstraDB = astra_db", "gathered_types": ["astrapy.core.db.AsyncAstraDB"]}} +{"id": "astrapy.core.db.AsyncAstraDBCollection.caller_name", "text": "", "metadata": {"kind": "attribute", "name": "caller_name", "path": "astrapy.core.db.AsyncAstraDBCollection.caller_name", "value": "caller_name: str | None = self.astra_db.caller_name"}} +{"id": "astrapy.core.db.AsyncAstraDBCollection.caller_version", "text": "", "metadata": {"kind": "attribute", "name": "caller_version", "path": "astrapy.core.db.AsyncAstraDBCollection.caller_version", "value": "caller_version: str | None = self.astra_db.caller_version"}} +{"id": "astrapy.core.db.AsyncAstraDBCollection.additional_headers", "text": "", "metadata": {"kind": "attribute", "name": "additional_headers", "path": "astrapy.core.db.AsyncAstraDBCollection.additional_headers", "value": "additional_headers = additional_headers"}} +{"id": "astrapy.core.db.AsyncAstraDBCollection.client", "text": "", "metadata": {"kind": "attribute", "name": "client", "path": "astrapy.core.db.AsyncAstraDBCollection.client", "value": "client = astra_db.client"}} +{"id": "astrapy.core.db.AsyncAstraDBCollection.collection_name", "text": "", "metadata": {"kind": "attribute", "name": "collection_name", "path": "astrapy.core.db.AsyncAstraDBCollection.collection_name", "value": "collection_name = collection_name"}} +{"id": "astrapy.core.db.AsyncAstraDBCollection.base_path", "text": "", "metadata": {"kind": "attribute", "name": "base_path", "path": "astrapy.core.db.AsyncAstraDBCollection.base_path", "value": "base_path: str = f'{self.astra_db.base_path}/{self.collection_name}'"}} +{"id": "astrapy.core.db.AsyncAstraDBCollection.copy", "text": "", "metadata": {"kind": "function", "name": "copy", "path": "astrapy.core.db.AsyncAstraDBCollection.copy", "parameters": [{"name": "collection_name", "default": "None", "type": "str | None"}, {"name": "token", "default": "None", "type": "str | None"}, {"name": "api_endpoint", "default": "None", "type": "str | None"}, {"name": "api_path", "default": "None", "type": "str | None"}, {"name": "api_version", "default": "None", "type": "str | None"}, {"name": "namespace", "default": "None", "type": "str | None"}, {"name": "caller_name", "default": "None", "type": "str | None"}, {"name": "caller_version", "default": "None", "type": "str | None"}, {"name": "additional_headers", "default": "None", "type": "dict[str, str] | None"}], "returns": [{"type": "AsyncAstraDBCollection"}], "gathered_types": ["astrapy.core.db.AsyncAstraDBCollection"]}} +{"id": "astrapy.core.db.AsyncAstraDBCollection.set_caller", "text": "", "metadata": {"kind": "function", "name": "set_caller", "path": "astrapy.core.db.AsyncAstraDBCollection.set_caller", "parameters": [{"name": "caller_name", "default": "None", "type": "str | None"}, {"name": "caller_version", "default": "None", "type": "str | None"}], "returns": [{"type": "None"}]}} +{"id": "astrapy.core.db.AsyncAstraDBCollection.to_sync", "text": "", "metadata": {"kind": "function", "name": "to_sync", "path": "astrapy.core.db.AsyncAstraDBCollection.to_sync", "returns": [{"type": "AstraDBCollection"}], "gathered_types": ["astrapy.core.db.AstraDBCollection"]}} +{"id": "astrapy.core.db.AsyncAstraDBCollection.post_raw_request", "text": "", "metadata": {"kind": "function", "name": "post_raw_request", "path": "astrapy.core.db.AsyncAstraDBCollection.post_raw_request", "parameters": [{"name": "body", "type": "dict[str, Any]"}, {"name": "timeout_info", "default": "None", "type": "TimeoutInfoWideType"}], "returns": [{"type": "API_RESPONSE"}], "gathered_types": ["astrapy.core.utils.TimeoutInfoWideType", "astrapy.core.core_types.API_RESPONSE"]}} +{"id": "astrapy.core.db.AsyncAstraDBCollection.get", "text": "Retrieve a document from the collection by its path.", "metadata": {"kind": "function", "name": "get", "path": "astrapy.core.db.AsyncAstraDBCollection.get", "parameters": [{"name": "path", "type": "str", "description": "The path of the document to retrieve.", "value": "None", "default": "None"}, {"name": "timeout_info", "type": "TimeoutInfoWideType", "description": "a float, or a TimeoutInfo dict, for the HTTP request.\nNote that a 'read' timeout event will not block the action taken\nby the API server if it has received the request already.", "value": "None", "default": "None"}], "returns": [{"name": "dict", "type": "API_RESPONSE | None", "description": "The retrieved document."}], "gathered_types": ["astrapy.core.utils.TimeoutInfoWideType", "astrapy.core.core_types.API_RESPONSE"]}} +{"id": "astrapy.core.db.AsyncAstraDBCollection.find", "text": "Find documents in the collection that match the given filter.", "metadata": {"kind": "function", "name": "find", "path": "astrapy.core.db.AsyncAstraDBCollection.find", "parameters": [{"name": "filter", "type": "dict", "description": "Criteria to filter documents.", "value": "None", "default": "None"}, {"name": "projection", "type": "dict", "description": "Specifies the fields to return.", "value": "None", "default": "None"}, {"name": "sort", "type": "dict", "description": "Specifies the order in which to return matching documents.", "value": "None", "default": "None"}, {"name": "options", "type": "dict", "description": "Additional options for the query.", "value": "None", "default": "None"}, {"name": "timeout_info", "type": "TimeoutInfoWideType", "description": "a float, or a TimeoutInfo dict, for the HTTP request.\nNote that a 'read' timeout event will not block the action taken\nby the API server if it has received the request already.", "value": "None", "default": "None"}], "returns": [{"name": "dict", "type": "API_RESPONSE", "description": "The query response containing matched documents."}], "gathered_types": ["astrapy.core.utils.TimeoutInfoWideType", "astrapy.core.core_types.API_RESPONSE"]}} +{"id": "astrapy.core.db.AsyncAstraDBCollection.vector_find", "text": "Perform a vector-based search in the collection.", "metadata": {"kind": "function", "name": "vector_find", "path": "astrapy.core.db.AsyncAstraDBCollection.vector_find", "parameters": [{"name": "vector", "type": "list", "description": "The vector to search with.", "default": null}, {"name": "limit", "type": "int", "description": "The maximum number of documents to return.", "default": null}, {"name": "filter", "type": "dict", "description": "Criteria to filter documents.", "value": "None", "default": "None"}, {"name": "fields", "type": "list", "description": "Specifies the fields to return.", "value": "None", "default": "None"}, {"name": "include_similarity", "type": "bool", "description": "Whether to include similarity score in the result.", "value": "True", "default": "True"}, {"name": "timeout_info", "type": "TimeoutInfoWideType", "description": "a float, or a TimeoutInfo dict, for the HTTP request.\nNote that a 'read' timeout event will not block the action taken\nby the API server if it has received the request already.", "value": "None", "default": "None"}], "returns": [{"name": "list", "type": "list[API_DOC]", "description": "A list of documents matching the vector search criteria."}], "gathered_types": ["astrapy.core.utils.TimeoutInfoWideType", "astrapy.core.core_types.API_DOC"]}} +{"id": "astrapy.core.db.AsyncAstraDBCollection.paginate", "text": "Generate paginated results for a given database query method.", "metadata": {"kind": "function", "name": "paginate", "path": "astrapy.core.db.AsyncAstraDBCollection.paginate", "parameters": [{"name": "request_method", "type": "function", "description": "The database query method to paginate.", "default": null}, {"name": "options", "type": "dict", "description": "Options for the database query.", "default": null}, {"name": "prefetched", "type": "int", "description": "Number of pre-fetched documents.", "value": "None", "default": "None"}, {"name": "timeout_info", "type": "TimeoutInfoWideType", "description": "a float, or a TimeoutInfo dict, for the HTTP request.\nNote that a 'read' timeout event will not block the action taken\nby the API server if it has received the request already.", "value": "None", "default": "None"}, {"name": "raw_response_callback", "type": "Callable[[dict[str, Any]], None] | None", "description": "an optional callback invoked at each new\nresponse coming from the API. The only argument is the raw\nAPI response and the callback must return None.", "value": "None", "default": "None"}], "returns": [{"type": "AsyncGenerator[API_DOC, None]"}], "yields": [{"name": "dict", "type": "AsyncGenerator[API_DOC, None]", "description": "The next document in the paginated result set."}], "gathered_types": ["astrapy.core.utils.TimeoutInfoWideType", "astrapy.core.core_types.API_DOC", "astrapy.core.core_types.AsyncPaginableRequestMethod"]}} +{"id": "astrapy.core.db.AsyncAstraDBCollection.paginated_find", "text": "Perform a paginated search in the collection.", "metadata": {"kind": "function", "name": "paginated_find", "path": "astrapy.core.db.AsyncAstraDBCollection.paginated_find", "parameters": [{"name": "filter", "type": "dict", "description": "Criteria to filter documents.", "value": "None", "default": "None"}, {"name": "projection", "type": "dict", "description": "Specifies the fields to return.", "value": "None", "default": "None"}, {"name": "sort", "type": "dict", "description": "Specifies the order in which to return matching documents.", "value": "None", "default": "None"}, {"name": "options", "type": "dict", "description": "Additional options for the query.", "value": "None", "default": "None"}, {"name": "prefetched", "type": "int", "description": "Number of pre-fetched documents", "value": "None", "default": "None"}, {"name": "timeout_info", "type": "TimeoutInfoWideType", "description": "a float, or a TimeoutInfo dict, for each\nsingle HTTP request.\nThis is a paginated method, that issues several requests as it\nneeds more data. This parameter controls a single request.\nNote that a 'read' timeout event will not block the action taken\nby the API server if it has received the request already.", "value": "None", "default": "None"}, {"name": "raw_response_callback", "type": "Callable[[dict[str, Any]], None] | None", "description": "an optional callback invoked at each new\nresponse coming from the API. The only argument is the raw\nAPI response and the callback must return None.", "value": "None", "default": "None"}], "returns": [{"name": "generator", "type": "AsyncIterator[API_DOC]", "description": "A generator yielding documents in the paginated result set."}], "gathered_types": ["astrapy.core.utils.TimeoutInfoWideType", "astrapy.core.core_types.API_DOC"]}} +{"id": "astrapy.core.db.AsyncAstraDBCollection.pop", "text": "Pop the last data in the tags array", "metadata": {"kind": "function", "name": "pop", "path": "astrapy.core.db.AsyncAstraDBCollection.pop", "parameters": [{"name": "filter", "type": "dict", "description": "Criteria to identify the document to update.", "default": null}, {"name": "pop", "type": "dict", "description": "The pop to apply to the tags.", "default": null}, {"name": "options", "type": "dict", "description": "Additional options for the update operation.", "default": null}, {"name": "timeout_info", "type": "TimeoutInfoWideType", "description": "a float, or a TimeoutInfo dict, for the HTTP request.\nNote that a 'read' timeout event will not block the action taken\nby the API server if it has received the request already.", "value": "None", "default": "None"}], "returns": [{"name": "dict", "type": "API_RESPONSE", "description": "The original document before the update."}], "gathered_types": ["astrapy.core.utils.TimeoutInfoWideType", "astrapy.core.core_types.API_RESPONSE"]}} +{"id": "astrapy.core.db.AsyncAstraDBCollection.push", "text": "Push new data to the tags array", "metadata": {"kind": "function", "name": "push", "path": "astrapy.core.db.AsyncAstraDBCollection.push", "parameters": [{"name": "filter", "type": "dict", "description": "Criteria to identify the document to update.", "default": null}, {"name": "push", "type": "dict", "description": "The push to apply to the tags.", "default": null}, {"name": "options", "type": "dict", "description": "Additional options for the update operation.", "default": null}, {"name": "timeout_info", "type": "TimeoutInfoWideType", "description": "a float, or a TimeoutInfo dict, for the HTTP request.\nNote that a 'read' timeout event will not block the action taken\nby the API server if it has received the request already.", "value": "None", "default": "None"}], "returns": [{"name": "dict", "type": "API_RESPONSE", "description": "The result of the update operation."}], "gathered_types": ["astrapy.core.utils.TimeoutInfoWideType", "astrapy.core.core_types.API_RESPONSE"]}} +{"id": "astrapy.core.db.AsyncAstraDBCollection.find_one_and_replace", "text": "Find a single document and replace it.", "metadata": {"kind": "function", "name": "find_one_and_replace", "path": "astrapy.core.db.AsyncAstraDBCollection.find_one_and_replace", "parameters": [{"name": "replacement", "type": "dict", "description": "The new document to replace the existing one.", "default": null}, {"name": "filter", "type": "dict", "description": "Criteria to filter documents.", "value": "None", "default": "None"}, {"name": "sort", "type": "dict", "description": "Specifies the order in which to find the document.", "value": "None", "default": "None"}, {"name": "options", "type": "dict", "description": "Additional options for the operation.", "value": "None", "default": "None"}, {"name": "timeout_info", "type": "TimeoutInfoWideType", "description": "a float, or a TimeoutInfo dict, for the HTTP request.\nNote that a 'read' timeout event will not block the action taken\nby the API server if it has received the request already.", "value": "None", "default": "None"}], "returns": [{"name": "dict", "type": "API_RESPONSE", "description": "The result of the find and replace operation."}], "gathered_types": ["astrapy.core.utils.TimeoutInfoWideType", "astrapy.core.core_types.API_RESPONSE"]}} +{"id": "astrapy.core.db.AsyncAstraDBCollection.vector_find_one_and_replace", "text": "Perform a vector-based search and replace the first matched document.", "metadata": {"kind": "function", "name": "vector_find_one_and_replace", "path": "astrapy.core.db.AsyncAstraDBCollection.vector_find_one_and_replace", "parameters": [{"name": "vector", "type": "dict", "description": "The vector to search with.", "default": null}, {"name": "replacement", "type": "dict", "description": "The new document to replace the existing one.", "default": null}, {"name": "filter", "type": "dict", "description": "Criteria to filter documents.", "value": "None", "default": "None"}, {"name": "fields", "type": "list", "description": "Specifies the fields to return in the result.", "value": "None", "default": "None"}, {"name": "timeout_info", "type": "TimeoutInfoWideType", "description": "a float, or a TimeoutInfo dict, for the HTTP request.\nNote that a 'read' timeout event will not block the action taken\nby the API server if it has received the request already.", "value": "None", "default": "None"}], "returns": [{"type": "API_DOC | None", "description": "dict or None: either the matched document or None if nothing found"}], "gathered_types": ["astrapy.core.utils.TimeoutInfoWideType", "astrapy.core.core_types.API_DOC"]}} +{"id": "astrapy.core.db.AsyncAstraDBCollection.find_one_and_update", "text": "Find a single document and update it.", "metadata": {"kind": "function", "name": "find_one_and_update", "path": "astrapy.core.db.AsyncAstraDBCollection.find_one_and_update", "parameters": [{"name": "sort", "type": "dict", "description": "Specifies the order in which to find the document.", "value": "{}", "default": "{}"}, {"name": "update", "type": "dict", "description": "The update to apply to the document.", "default": null}, {"name": "filter", "type": "dict", "description": "Criteria to filter documents.", "value": "None", "default": "None"}, {"name": "options", "type": "dict", "description": "Additional options for the operation.", "value": "None", "default": "None"}, {"name": "timeout_info", "type": "TimeoutInfoWideType", "description": "a float, or a TimeoutInfo dict, for the HTTP request.\nNote that a 'read' timeout event will not block the action taken\nby the API server if it has received the request already.", "value": "None", "default": "None"}], "returns": [{"name": "dict", "type": "API_RESPONSE", "description": "The result of the find and update operation."}], "gathered_types": ["astrapy.core.utils.TimeoutInfoWideType", "astrapy.core.core_types.API_RESPONSE"]}} +{"id": "astrapy.core.db.AsyncAstraDBCollection.vector_find_one_and_update", "text": "Perform a vector-based search and update the first matched document.", "metadata": {"kind": "function", "name": "vector_find_one_and_update", "path": "astrapy.core.db.AsyncAstraDBCollection.vector_find_one_and_update", "parameters": [{"name": "vector", "type": "list", "description": "The vector to search with.", "default": null}, {"name": "update", "type": "dict", "description": "The update to apply to the matched document.", "default": null}, {"name": "filter", "type": "dict", "description": "Criteria to filter documents before applying the vector search.", "value": "None", "default": "None"}, {"name": "fields", "type": "list", "description": "Specifies the fields to return in the updated document.", "value": "None", "default": "None"}, {"name": "timeout_info", "type": "TimeoutInfoWideType", "description": "a float, or a TimeoutInfo dict, for the HTTP request.\nNote that a 'read' timeout event will not block the action taken\nby the API server if it has received the request already.", "value": "None", "default": "None"}], "returns": [{"type": "API_DOC | None", "description": "dict or None: The result of the vector-based find and\nupdate operation, or None if nothing found"}], "gathered_types": ["astrapy.core.utils.TimeoutInfoWideType", "astrapy.core.core_types.API_DOC"]}} +{"id": "astrapy.core.db.AsyncAstraDBCollection.find_one_and_delete", "text": "Find a single document and delete it.", "metadata": {"kind": "function", "name": "find_one_and_delete", "path": "astrapy.core.db.AsyncAstraDBCollection.find_one_and_delete", "parameters": [{"name": "sort", "type": "dict", "description": "Specifies the order in which to find the document.", "value": "{}", "default": "{}"}, {"name": "filter", "type": "dict", "description": "Criteria to filter documents.", "value": "None", "default": "None"}, {"name": "projection", "type": "dict", "description": "Specifies the fields to return.", "value": "None", "default": "None"}, {"name": "timeout_info", "type": "TimeoutInfoWideType", "description": "a float, or a TimeoutInfo dict, for the HTTP request.\nNote that a 'read' timeout event will not block the action taken\nby the API server if it has received the request already.", "value": "None", "default": "None"}], "returns": [{"name": "dict", "type": "API_RESPONSE", "description": "The result of the find and delete operation."}], "gathered_types": ["astrapy.core.utils.TimeoutInfoWideType", "astrapy.core.core_types.API_RESPONSE"]}} +{"id": "astrapy.core.db.AsyncAstraDBCollection.count_documents", "text": "Count documents matching a given predicate (expressed as filter).", "metadata": {"kind": "function", "name": "count_documents", "path": "astrapy.core.db.AsyncAstraDBCollection.count_documents", "parameters": [{"name": "filter", "type": "dict, defaults to {}", "description": "Criteria to filter documents.", "value": "{}", "default": "{}"}, {"name": "timeout_info", "type": "TimeoutInfoWideType", "description": "a float, or a TimeoutInfo dict, for the HTTP request.\nNote that a 'read' timeout event will not block the action taken\nby the API server if it has received the request already.", "value": "None", "default": "None"}], "returns": [{"name": "dict", "type": "API_RESPONSE", "description": "the response, either\n{\"status\": {\"count\": <NUMBER> }}"}, {"type": "API_RESPONSE", "description": "or\n{\"errors\": [...]}"}], "gathered_types": ["astrapy.core.utils.TimeoutInfoWideType", "astrapy.core.core_types.API_RESPONSE"]}} +{"id": "astrapy.core.db.AsyncAstraDBCollection.find_one", "text": "Find a single document in the collection.", "metadata": {"kind": "function", "name": "find_one", "path": "astrapy.core.db.AsyncAstraDBCollection.find_one", "parameters": [{"name": "filter", "type": "dict", "description": "Criteria to filter documents.", "value": "{}", "default": "{}"}, {"name": "projection", "type": "dict", "description": "Specifies the fields to return.", "value": "{}", "default": "{}"}, {"name": "sort", "type": "dict", "description": "Specifies the order in which to return the document.", "value": "{}", "default": "{}"}, {"name": "options", "type": "dict", "description": "Additional options for the query.", "value": "{}", "default": "{}"}, {"name": "timeout_info", "type": "TimeoutInfoWideType", "description": "a float, or a TimeoutInfo dict, for the HTTP request.\nNote that a 'read' timeout event will not block the action taken\nby the API server if it has received the request already.", "value": "None", "default": "None"}], "returns": [{"name": "dict", "type": "API_RESPONSE", "description": "the response, either\n{\"data\": {\"document\": <DOCUMENT> }}"}, {"type": "API_RESPONSE", "description": "or\n{\"data\": {\"document\": None}}"}, {"type": "API_RESPONSE", "description": "depending on whether a matching document is found or not."}], "gathered_types": ["astrapy.core.utils.TimeoutInfoWideType", "astrapy.core.core_types.API_RESPONSE"]}} +{"id": "astrapy.core.db.AsyncAstraDBCollection.vector_find_one", "text": "Perform a vector-based search to find a single document in the collection.", "metadata": {"kind": "function", "name": "vector_find_one", "path": "astrapy.core.db.AsyncAstraDBCollection.vector_find_one", "parameters": [{"name": "vector", "type": "list", "description": "The vector to search with.", "default": null}, {"name": "filter", "type": "dict", "description": "Additional criteria to filter documents.", "value": "None", "default": "None"}, {"name": "fields", "type": "list", "description": "Specifies the fields to return in the result.", "value": "None", "default": "None"}, {"name": "include_similarity", "type": "bool", "description": "Whether to include similarity score in the result.", "value": "True", "default": "True"}, {"name": "timeout_info", "type": "TimeoutInfoWideType", "description": "a float, or a TimeoutInfo dict, for the HTTP request.\nNote that a 'read' timeout event will not block the action taken\nby the API server if it has received the request already.", "value": "None", "default": "None"}], "returns": [{"type": "API_DOC | None", "description": "dict or None: The found document or None if no matching document is found."}], "gathered_types": ["astrapy.core.utils.TimeoutInfoWideType", "astrapy.core.core_types.API_DOC"]}} +{"id": "astrapy.core.db.AsyncAstraDBCollection.insert_one", "text": "Insert a single document into the collection.", "metadata": {"kind": "function", "name": "insert_one", "path": "astrapy.core.db.AsyncAstraDBCollection.insert_one", "parameters": [{"name": "document", "type": "dict", "description": "The document to insert.", "default": null}, {"name": "failures_allowed", "type": "bool", "description": "Whether to allow failures in the insert operation.", "value": "False", "default": "False"}, {"name": "timeout_info", "type": "TimeoutInfoWideType", "description": "a float, or a TimeoutInfo dict, for the HTTP request.\nNote that a 'read' timeout event will not block the action taken\nby the API server if it has received the request already.", "value": "None", "default": "None"}], "returns": [{"name": "dict", "type": "API_RESPONSE", "description": "The response from the database after the insert operation."}], "gathered_types": ["astrapy.core.utils.TimeoutInfoWideType", "astrapy.core.core_types.API_DOC", "astrapy.core.core_types.API_RESPONSE"]}} +{"id": "astrapy.core.db.AsyncAstraDBCollection.insert_many", "text": "Insert multiple documents into the collection.", "metadata": {"kind": "function", "name": "insert_many", "path": "astrapy.core.db.AsyncAstraDBCollection.insert_many", "parameters": [{"name": "documents", "type": "list", "description": "A list of documents to insert.", "default": null}, {"name": "options", "type": "dict", "description": "Additional options for the insert operation.", "value": "None", "default": "None"}, {"name": "partial_failures_allowed", "type": "bool", "description": "Whether to allow partial\nfailures through the insertion (i.e. on some documents).", "value": "False", "default": "False"}, {"name": "timeout_info", "type": "TimeoutInfoWideType", "description": "a float, or a TimeoutInfo dict, for the HTTP request.\nNote that a 'read' timeout event will not block the action taken\nby the API server if it has received the request already.", "value": "None", "default": "None"}], "returns": [{"name": "dict", "type": "API_RESPONSE", "description": "The response from the database after the insert operation."}], "gathered_types": ["astrapy.core.utils.TimeoutInfoWideType", "astrapy.core.core_types.API_DOC", "astrapy.core.core_types.API_RESPONSE"]}} +{"id": "astrapy.core.db.AsyncAstraDBCollection.chunked_insert_many", "text": "Insert multiple documents into the collection, handling chunking and\noptionally with concurrent insertions.", "metadata": {"kind": "function", "name": "chunked_insert_many", "path": "astrapy.core.db.AsyncAstraDBCollection.chunked_insert_many", "parameters": [{"name": "documents", "type": "list", "description": "A list of documents to insert.", "default": null}, {"name": "options", "type": "dict", "description": "Additional options for the insert operation.", "value": "None", "default": "None"}, {"name": "partial_failures_allowed", "type": "bool", "description": "Whether to allow partial\nfailures in the chunk. Should be used combined with\noptions={\"ordered\": False} in most cases.", "value": "False", "default": "False"}, {"name": "chunk_size", "type": "int", "description": "Override the default insertion chunk size.", "value": "DEFAULT_INSERT_NUM_DOCUMENTS", "default": "DEFAULT_INSERT_NUM_DOCUMENTS"}, {"name": "concurrency", "type": "int", "description": "The number of concurrent chunk insertions.\nDefault is no concurrency.", "value": "1", "default": "1"}, {"name": "timeout_info", "type": "TimeoutInfoWideType", "description": "a float, or a TimeoutInfo dict, for each single HTTP request.\nThis method runs a number of HTTP requests as it works on chunked\ndata. The timeout refers to each individual such request.\nNote that a 'read' timeout event will not block the action taken\nby the API server if it has received the request already.", "value": "None", "default": "None"}], "returns": [{"name": "list", "type": "list[API_RESPONSE | Exception]", "description": "The responses from the database after the chunked insert operation.\nThis is a list of individual responses from the API: the caller\nwill need to inspect them all, e.g. to collate the inserted IDs."}], "gathered_types": ["astrapy.core.core_types.API_DOC", "astrapy.core.defaults.DEFAULT_INSERT_NUM_DOCUMENTS", "astrapy.core.core_types.API_RESPONSE", "astrapy.core.utils.TimeoutInfoWideType", "Exception"]}} +{"id": "astrapy.core.db.AsyncAstraDBCollection.update_one", "text": "Update a single document in the collection.", "metadata": {"kind": "function", "name": "update_one", "path": "astrapy.core.db.AsyncAstraDBCollection.update_one", "parameters": [{"name": "filter", "type": "dict", "description": "Criteria to identify the document to update.", "default": null}, {"name": "update", "type": "dict", "description": "The update to apply to the document.", "default": null}, {"name": "timeout_info", "type": "TimeoutInfoWideType", "description": "a float, or a TimeoutInfo dict, for the HTTP request.\nNote that a 'read' timeout event will not block the action taken\nby the API server if it has received the request already.", "value": "None", "default": "None"}, {"name": "options", "type": "dict", "description": "Additional options for the operation.", "value": "None", "default": "None"}], "returns": [{"name": "dict", "type": "API_RESPONSE", "description": "The response from the database after the update operation."}], "gathered_types": ["astrapy.core.utils.TimeoutInfoWideType", "astrapy.core.core_types.API_RESPONSE"]}} +{"id": "astrapy.core.db.AsyncAstraDBCollection.update_many", "text": "Updates multiple documents in the collection.", "metadata": {"kind": "function", "name": "update_many", "path": "astrapy.core.db.AsyncAstraDBCollection.update_many", "parameters": [{"name": "filter", "type": "dict", "description": "Criteria to identify the document to update.", "default": null}, {"name": "update", "type": "dict", "description": "The update to apply to the document.", "default": null}, {"name": "timeout_info", "type": "TimeoutInfoWideType", "description": "a float, or a TimeoutInfo dict, for the HTTP request.\nNote that a 'read' timeout event will not block the action taken\nby the API server if it has received the request already.", "value": "None", "default": "None"}], "returns": [{"name": "dict", "type": "API_RESPONSE", "description": "The response from the database after the update operation."}], "gathered_types": ["astrapy.core.utils.TimeoutInfoWideType", "astrapy.core.core_types.API_RESPONSE"]}} +{"id": "astrapy.core.db.AsyncAstraDBCollection.replace", "text": "Replace a document in the collection.", "metadata": {"kind": "function", "name": "replace", "path": "astrapy.core.db.AsyncAstraDBCollection.replace", "parameters": [{"name": "path", "type": "str", "description": "The path to the document to replace.", "default": null}, {"name": "document", "type": "dict", "description": "The new document to replace the existing one.", "default": null}, {"name": "timeout_info", "type": "TimeoutInfoWideType", "description": "a float, or a TimeoutInfo dict, for the HTTP request.\nNote that a 'read' timeout event will not block the action taken\nby the API server if it has received the request already.", "value": "None", "default": "None"}], "returns": [{"name": "dict", "type": "API_RESPONSE", "description": "The response from the database after the replace operation."}], "gathered_types": ["astrapy.core.utils.TimeoutInfoWideType", "astrapy.core.core_types.API_DOC", "astrapy.core.core_types.API_RESPONSE"]}} +{"id": "astrapy.core.db.AsyncAstraDBCollection.delete_one", "text": "Delete a single document from the collection based on its ID.", "metadata": {"kind": "function", "name": "delete_one", "path": "astrapy.core.db.AsyncAstraDBCollection.delete_one", "parameters": [{"name": "id", "type": "str", "description": "The ID of the document to delete.", "default": null}, {"name": "timeout_info", "type": "TimeoutInfoWideType", "description": "a float, or a TimeoutInfo dict, for the HTTP request.\nNote that a 'read' timeout event will not block the action taken\nby the API server if it has received the request already.", "value": "None", "default": "None"}], "returns": [{"name": "dict", "type": "API_RESPONSE", "description": "The response from the database after the delete operation."}], "gathered_types": ["astrapy.core.utils.TimeoutInfoWideType", "astrapy.core.core_types.API_RESPONSE"]}} +{"id": "astrapy.core.db.AsyncAstraDBCollection.delete_one_by_predicate", "text": "Delete a single document from the collection based on a filter clause", "metadata": {"kind": "function", "name": "delete_one_by_predicate", "path": "astrapy.core.db.AsyncAstraDBCollection.delete_one_by_predicate", "parameters": [{"name": "filter", "type": "dict[str, Any]", "description": "any filter dictionary", "default": null}, {"name": "timeout_info", "type": "TimeoutInfoWideType", "description": "a float, or a TimeoutInfo dict, for the HTTP request.\nNote that a 'read' timeout event will not block the action taken\nby the API server if it has received the request already.", "value": "None", "default": "None"}], "returns": [{"name": "dict", "type": "API_RESPONSE", "description": "The response from the database after the delete operation."}], "gathered_types": ["astrapy.core.utils.TimeoutInfoWideType", "astrapy.core.core_types.API_RESPONSE"]}} +{"id": "astrapy.core.db.AsyncAstraDBCollection.delete_many", "text": "Delete many documents from the collection based on a filter condition", "metadata": {"kind": "function", "name": "delete_many", "path": "astrapy.core.db.AsyncAstraDBCollection.delete_many", "parameters": [{"name": "filter", "type": "dict", "description": "Criteria to identify the documents to delete.", "default": null}, {"name": "skip_error_check", "type": "bool", "description": "whether to ignore the check for API error\nand return the response untouched. Default is False.", "value": "False", "default": "False"}, {"name": "timeout_info", "type": "TimeoutInfoWideType", "description": "a float, or a TimeoutInfo dict, for the HTTP request.\nNote that a 'read' timeout event will not block the action taken\nby the API server if it has received the request already.", "value": "None", "default": "None"}], "returns": [{"name": "dict", "type": "API_RESPONSE", "description": "The response from the database after the delete operation."}], "gathered_types": ["astrapy.core.utils.TimeoutInfoWideType", "astrapy.core.core_types.API_RESPONSE"]}} +{"id": "astrapy.core.db.AsyncAstraDBCollection.chunked_delete_many", "text": "Delete many documents from the collection based on a filter condition,\nchaining several API calls until exhaustion of the documents to delete.", "metadata": {"kind": "function", "name": "chunked_delete_many", "path": "astrapy.core.db.AsyncAstraDBCollection.chunked_delete_many", "parameters": [{"name": "filter", "type": "dict", "description": "Criteria to identify the documents to delete.", "default": null}, {"name": "timeout_info", "type": "TimeoutInfoWideType", "description": "a float, or a TimeoutInfo dict, for each single HTTP request.\nThis method runs a number of HTTP requests as it works on a\npagination basis. The timeout refers to each individual such request.\nNote that a 'read' timeout event will not block the action taken\nby the API server if it has received the request already.", "value": "None", "default": "None"}], "returns": [{"type": "list[API_RESPONSE]", "description": "List[dict]: The responses from the database from all the calls"}], "gathered_types": ["astrapy.core.utils.TimeoutInfoWideType", "astrapy.core.core_types.API_RESPONSE"]}} +{"id": "astrapy.core.db.AsyncAstraDBCollection.clear", "text": "Clear the collection, deleting all documents", "metadata": {"kind": "function", "name": "clear", "path": "astrapy.core.db.AsyncAstraDBCollection.clear", "parameters": [{"name": "timeout_info", "type": "TimeoutInfoWideType", "description": "a float, or a TimeoutInfo dict, for the HTTP request.\nNote that a 'read' timeout event will not block the action taken\nby the API server if it has received the request already.", "value": "None", "default": "None"}], "returns": [{"name": "dict", "type": "API_RESPONSE", "description": "The response from the database."}], "gathered_types": ["astrapy.core.utils.TimeoutInfoWideType", "astrapy.core.core_types.API_RESPONSE"]}} +{"id": "astrapy.core.db.AsyncAstraDBCollection.delete_subdocument", "text": "Delete a subdocument or field from a document in the collection.", "metadata": {"kind": "function", "name": "delete_subdocument", "path": "astrapy.core.db.AsyncAstraDBCollection.delete_subdocument", "parameters": [{"name": "id", "type": "str", "description": "The ID of the document containing the subdocument.", "default": null}, {"name": "subdoc", "type": "str", "description": "The key of the subdocument or field to remove.", "default": null}, {"name": "timeout_info", "type": "TimeoutInfoWideType", "description": "a float, or a TimeoutInfo dict, for the HTTP request.\nNote that a 'read' timeout event will not block the action taken\nby the API server if it has received the request already.", "value": "None", "default": "None"}], "returns": [{"name": "dict", "type": "API_RESPONSE", "description": "The response from the database after the update operation."}], "gathered_types": ["astrapy.core.utils.TimeoutInfoWideType", "astrapy.core.core_types.API_RESPONSE"]}} +{"id": "astrapy.core.db.AsyncAstraDBCollection.upsert_one", "text": "Emulate an upsert operation for a single document in the collection.\n\nThis method attempts to insert the document.\nIf a document with the same _id exists, it updates the existing document.", "metadata": {"kind": "function", "name": "upsert_one", "path": "astrapy.core.db.AsyncAstraDBCollection.upsert_one", "parameters": [{"name": "document", "type": "dict", "description": "The document to insert or update.", "default": null}, {"name": "timeout_info", "type": "TimeoutInfoWideType", "description": "a float, or a TimeoutInfo dict, for the HTTP requests.\nThis method may issue one or two requests, depending on what\nis detected on DB. This timeout controls each HTTP request individually.\nNote that a 'read' timeout event will not block the action taken\nby the API server if it has received the request already.", "value": "None", "default": "None"}], "returns": [{"name": "str", "type": "str", "description": "The _id of the inserted or updated document."}], "gathered_types": ["astrapy.core.utils.TimeoutInfoWideType", "astrapy.core.core_types.API_DOC"]}} +{"id": "astrapy.core.db.AsyncAstraDBCollection.upsert_many", "text": "Emulate an upsert operation for multiple documents in the collection.\nThis method attempts to insert the documents.\nIf a document with the same _id exists, it updates the existing document.", "metadata": {"kind": "function", "name": "upsert_many", "path": "astrapy.core.db.AsyncAstraDBCollection.upsert_many", "parameters": [{"name": "documents", "type": "List[dict]", "description": "The documents to insert or update.", "default": null}, {"name": "concurrency", "type": "int", "description": "The number of concurrent upserts.", "value": "1", "default": "1"}, {"name": "partial_failures_allowed", "type": "bool", "description": "Whether to allow partial\nfailures in the batch.", "value": "False", "default": "False"}, {"name": "timeout_info", "type": "TimeoutInfoWideType", "description": "a float, or a TimeoutInfo dict, for each HTTP request.\nThis method issues a separate HTTP request for each document to\ninsert: the timeout controls each such request individually.\nNote that a 'read' timeout event will not block the action taken\nby the API server if it has received the request already.", "value": "None", "default": "None"}], "returns": [{"type": "list[str | Exception]", "description": "List[Union[str, Exception]]: A list of \"_id\"s of the inserted or updated documents."}], "gathered_types": ["astrapy.core.utils.TimeoutInfoWideType", "astrapy.core.core_types.API_DOC", "Exception"]}} +{"id": "astrapy.core.db.AstraDB", "text": "", "metadata": {"kind": "class", "name": "AstraDB", "path": "astrapy.core.db.AstraDB", "parameters": [{"name": "token", "type": "str | None"}, {"name": "api_endpoint", "type": "str"}, {"name": "api_path", "default": "None", "type": "str | None"}, {"name": "api_version", "default": "None", "type": "str | None"}, {"name": "namespace", "default": "None", "type": "str | None"}, {"name": "caller_name", "default": "None", "type": "str | None"}, {"name": "caller_version", "default": "None", "type": "str | None"}]}} +{"id": "astrapy.core.db.AstraDB.client", "text": "", "metadata": {"kind": "attribute", "name": "client", "path": "astrapy.core.db.AstraDB.client", "value": "client = httpx.Client()", "gathered_types": ["Client"]}} +{"id": "astrapy.core.db.AstraDB.caller_name", "text": "", "metadata": {"kind": "attribute", "name": "caller_name", "path": "astrapy.core.db.AstraDB.caller_name", "value": "caller_name = caller_name"}} +{"id": "astrapy.core.db.AstraDB.caller_version", "text": "", "metadata": {"kind": "attribute", "name": "caller_version", "path": "astrapy.core.db.AstraDB.caller_version", "value": "caller_version = caller_version"}} +{"id": "astrapy.core.db.AstraDB.token", "text": "", "metadata": {"kind": "attribute", "name": "token", "path": "astrapy.core.db.AstraDB.token", "value": "token = token"}} +{"id": "astrapy.core.db.AstraDB.api_endpoint", "text": "", "metadata": {"kind": "attribute", "name": "api_endpoint", "path": "astrapy.core.db.AstraDB.api_endpoint", "value": "api_endpoint = api_endpoint"}} +{"id": "astrapy.core.db.AstraDB.base_url", "text": "", "metadata": {"kind": "attribute", "name": "base_url", "path": "astrapy.core.db.AstraDB.base_url", "value": "base_url = self.api_endpoint.strip('/')"}} +{"id": "astrapy.core.db.AstraDB.api_path", "text": "", "metadata": {"kind": "attribute", "name": "api_path", "path": "astrapy.core.db.AstraDB.api_path", "value": "api_path = DEFAULT_JSON_API_PATH if api_path is None else api_path.strip('/')", "gathered_types": ["astrapy.core.defaults.DEFAULT_JSON_API_PATH"]}} +{"id": "astrapy.core.db.AstraDB.api_version", "text": "", "metadata": {"kind": "attribute", "name": "api_version", "path": "astrapy.core.db.AstraDB.api_version", "value": "api_version = DEFAULT_JSON_API_VERSION if api_version is None else api_version.strip('/')", "gathered_types": ["astrapy.core.defaults.DEFAULT_JSON_API_VERSION"]}} +{"id": "astrapy.core.db.AstraDB.namespace", "text": "", "metadata": {"kind": "attribute", "name": "namespace", "path": "astrapy.core.db.AstraDB.namespace", "value": "namespace = namespace"}} +{"id": "astrapy.core.db.AstraDB.base_path", "text": "", "metadata": {"kind": "attribute", "name": "base_path", "path": "astrapy.core.db.AstraDB.base_path", "value": "base_path: str = f'/{'/'.join(base_path_components)}'"}} +{"id": "astrapy.core.db.AstraDB.copy", "text": "", "metadata": {"kind": "function", "name": "copy", "path": "astrapy.core.db.AstraDB.copy", "parameters": [{"name": "token", "default": "None", "type": "str | None"}, {"name": "api_endpoint", "default": "None", "type": "str | None"}, {"name": "api_path", "default": "None", "type": "str | None"}, {"name": "api_version", "default": "None", "type": "str | None"}, {"name": "namespace", "default": "None", "type": "str | None"}, {"name": "caller_name", "default": "None", "type": "str | None"}, {"name": "caller_version", "default": "None", "type": "str | None"}], "returns": [{"type": "AstraDB"}], "gathered_types": ["astrapy.core.db.AstraDB"]}} +{"id": "astrapy.core.db.AstraDB.to_async", "text": "", "metadata": {"kind": "function", "name": "to_async", "path": "astrapy.core.db.AstraDB.to_async", "returns": [{"type": "AsyncAstraDB"}], "gathered_types": ["astrapy.core.db.AsyncAstraDB"]}} +{"id": "astrapy.core.db.AstraDB.set_caller", "text": "", "metadata": {"kind": "function", "name": "set_caller", "path": "astrapy.core.db.AstraDB.set_caller", "parameters": [{"name": "caller_name", "default": "None", "type": "str | None"}, {"name": "caller_version", "default": "None", "type": "str | None"}], "returns": [{"type": "None"}]}} +{"id": "astrapy.core.db.AstraDB.post_raw_request", "text": "", "metadata": {"kind": "function", "name": "post_raw_request", "path": "astrapy.core.db.AstraDB.post_raw_request", "parameters": [{"name": "body", "type": "dict[str, Any]"}, {"name": "timeout_info", "default": "None", "type": "TimeoutInfoWideType"}], "returns": [{"type": "API_RESPONSE"}], "gathered_types": ["astrapy.core.utils.TimeoutInfoWideType", "astrapy.core.core_types.API_RESPONSE"]}} +{"id": "astrapy.core.db.AstraDB.collection", "text": "Retrieve a collection from the database.", "metadata": {"kind": "function", "name": "collection", "path": "astrapy.core.db.AstraDB.collection", "parameters": [{"name": "collection_name", "type": "str", "description": "The name of the collection to retrieve.", "default": null}], "returns": [{"name": "AstraDBCollection", "type": "AstraDBCollection", "description": "The collection object."}], "gathered_types": ["astrapy.core.db.AstraDBCollection"]}} +{"id": "astrapy.core.db.AstraDB.get_collections", "text": "Retrieve a list of collections from the database.", "metadata": {"kind": "function", "name": "get_collections", "path": "astrapy.core.db.AstraDB.get_collections", "parameters": [{"name": "options", "type": "dict", "description": "Options to get the collection list", "value": "None", "default": "None"}, {"name": "timeout_info", "type": "TimeoutInfoWideType", "description": "a float, or a TimeoutInfo dict, for the HTTP request.\nNote that a 'read' timeout event will not block the action taken\nby the API server if it has received the request already.", "value": "None", "default": "None"}], "returns": [{"name": "dict", "type": "API_RESPONSE", "description": "An object containing the list of collections in the database:\n{\"status\": {\"collections\": [...]}}"}], "gathered_types": ["astrapy.core.utils.TimeoutInfoWideType", "astrapy.core.core_types.API_RESPONSE"]}} +{"id": "astrapy.core.db.AstraDB.create_collection", "text": "Create a new collection in the database.", "metadata": {"kind": "function", "name": "create_collection", "path": "astrapy.core.db.AstraDB.create_collection", "parameters": [{"name": "collection_name", "type": "str", "description": "The name of the collection to create.", "default": null}, {"name": "options", "type": "dict", "description": "Options for the collection.", "value": "None", "default": "None"}, {"name": "dimension", "type": "int", "description": "Dimension for vector search.", "value": "None", "default": "None"}, {"name": "metric", "type": "str", "description": "Metric choice for vector search.", "value": "None", "default": "None"}, {"name": "service_dict", "type": "dict", "description": "a definition for the $vectorize service", "value": "None", "default": "None"}, {"name": "timeout_info", "type": "TimeoutInfoWideType", "description": "a float, or a TimeoutInfo dict, for the HTTP request.\nNote that a 'read' timeout event will not block the action taken\nby the API server if it has received the request already.", "value": "None", "default": "None"}], "returns": [{"name": "AstraDBCollection", "type": "AstraDBCollection", "description": "The created collection object."}], "gathered_types": ["astrapy.core.utils.TimeoutInfoWideType", "astrapy.core.db.AstraDBCollection"]}} +{"id": "astrapy.core.db.AstraDB.delete_collection", "text": "Delete a collection from the database.", "metadata": {"kind": "function", "name": "delete_collection", "path": "astrapy.core.db.AstraDB.delete_collection", "parameters": [{"name": "collection_name", "type": "str", "description": "The name of the collection to delete.", "default": null}, {"name": "timeout_info", "type": "TimeoutInfoWideType", "description": "a float, or a TimeoutInfo dict, for the HTTP request.\nNote that a 'read' timeout event will not block the action taken\nby the API server if it has received the request already.", "value": "None", "default": "None"}], "returns": [{"name": "dict", "type": "API_RESPONSE", "description": "The response from the database."}], "gathered_types": ["astrapy.core.utils.TimeoutInfoWideType", "astrapy.core.core_types.API_RESPONSE"]}} +{"id": "astrapy.core.db.AsyncAstraDB", "text": "", "metadata": {"kind": "class", "name": "AsyncAstraDB", "path": "astrapy.core.db.AsyncAstraDB", "parameters": [{"name": "token", "type": "str | None"}, {"name": "api_endpoint", "type": "str"}, {"name": "api_path", "default": "None", "type": "str | None"}, {"name": "api_version", "default": "None", "type": "str | None"}, {"name": "namespace", "default": "None", "type": "str | None"}, {"name": "caller_name", "default": "None", "type": "str | None"}, {"name": "caller_version", "default": "None", "type": "str | None"}]}} +{"id": "astrapy.core.db.AsyncAstraDB.caller_name", "text": "", "metadata": {"kind": "attribute", "name": "caller_name", "path": "astrapy.core.db.AsyncAstraDB.caller_name", "value": "caller_name = caller_name"}} +{"id": "astrapy.core.db.AsyncAstraDB.caller_version", "text": "", "metadata": {"kind": "attribute", "name": "caller_version", "path": "astrapy.core.db.AsyncAstraDB.caller_version", "value": "caller_version = caller_version"}} +{"id": "astrapy.core.db.AsyncAstraDB.client", "text": "", "metadata": {"kind": "attribute", "name": "client", "path": "astrapy.core.db.AsyncAstraDB.client", "value": "client = httpx.AsyncClient()", "gathered_types": ["AsyncClient"]}} +{"id": "astrapy.core.db.AsyncAstraDB.token", "text": "", "metadata": {"kind": "attribute", "name": "token", "path": "astrapy.core.db.AsyncAstraDB.token", "value": "token = token"}} +{"id": "astrapy.core.db.AsyncAstraDB.api_endpoint", "text": "", "metadata": {"kind": "attribute", "name": "api_endpoint", "path": "astrapy.core.db.AsyncAstraDB.api_endpoint", "value": "api_endpoint = api_endpoint"}} +{"id": "astrapy.core.db.AsyncAstraDB.base_url", "text": "", "metadata": {"kind": "attribute", "name": "base_url", "path": "astrapy.core.db.AsyncAstraDB.base_url", "value": "base_url = self.api_endpoint.strip('/')"}} +{"id": "astrapy.core.db.AsyncAstraDB.api_path", "text": "", "metadata": {"kind": "attribute", "name": "api_path", "path": "astrapy.core.db.AsyncAstraDB.api_path", "value": "api_path = DEFAULT_JSON_API_PATH if api_path is None else api_path.strip('/')", "gathered_types": ["astrapy.core.defaults.DEFAULT_JSON_API_PATH"]}} +{"id": "astrapy.core.db.AsyncAstraDB.api_version", "text": "", "metadata": {"kind": "attribute", "name": "api_version", "path": "astrapy.core.db.AsyncAstraDB.api_version", "value": "api_version = DEFAULT_JSON_API_VERSION if api_version is None else api_version.strip('/')", "gathered_types": ["astrapy.core.defaults.DEFAULT_JSON_API_VERSION"]}} +{"id": "astrapy.core.db.AsyncAstraDB.namespace", "text": "", "metadata": {"kind": "attribute", "name": "namespace", "path": "astrapy.core.db.AsyncAstraDB.namespace", "value": "namespace = namespace"}} +{"id": "astrapy.core.db.AsyncAstraDB.base_path", "text": "", "metadata": {"kind": "attribute", "name": "base_path", "path": "astrapy.core.db.AsyncAstraDB.base_path", "value": "base_path: str = f'/{'/'.join(base_path_components)}'"}} +{"id": "astrapy.core.db.AsyncAstraDB.copy", "text": "", "metadata": {"kind": "function", "name": "copy", "path": "astrapy.core.db.AsyncAstraDB.copy", "parameters": [{"name": "token", "default": "None", "type": "str | None"}, {"name": "api_endpoint", "default": "None", "type": "str | None"}, {"name": "api_path", "default": "None", "type": "str | None"}, {"name": "api_version", "default": "None", "type": "str | None"}, {"name": "namespace", "default": "None", "type": "str | None"}, {"name": "caller_name", "default": "None", "type": "str | None"}, {"name": "caller_version", "default": "None", "type": "str | None"}], "returns": [{"type": "AsyncAstraDB"}], "gathered_types": ["astrapy.core.db.AsyncAstraDB"]}} +{"id": "astrapy.core.db.AsyncAstraDB.to_sync", "text": "", "metadata": {"kind": "function", "name": "to_sync", "path": "astrapy.core.db.AsyncAstraDB.to_sync", "returns": [{"type": "AstraDB"}], "gathered_types": ["astrapy.core.db.AstraDB"]}} +{"id": "astrapy.core.db.AsyncAstraDB.set_caller", "text": "", "metadata": {"kind": "function", "name": "set_caller", "path": "astrapy.core.db.AsyncAstraDB.set_caller", "parameters": [{"name": "caller_name", "default": "None", "type": "str | None"}, {"name": "caller_version", "default": "None", "type": "str | None"}], "returns": [{"type": "None"}]}} +{"id": "astrapy.core.db.AsyncAstraDB.post_raw_request", "text": "", "metadata": {"kind": "function", "name": "post_raw_request", "path": "astrapy.core.db.AsyncAstraDB.post_raw_request", "parameters": [{"name": "body", "type": "dict[str, Any]"}, {"name": "timeout_info", "default": "None", "type": "TimeoutInfoWideType"}], "returns": [{"type": "API_RESPONSE"}], "gathered_types": ["astrapy.core.utils.TimeoutInfoWideType", "astrapy.core.core_types.API_RESPONSE"]}} +{"id": "astrapy.core.db.AsyncAstraDB.collection", "text": "Retrieve a collection from the database.", "metadata": {"kind": "function", "name": "collection", "path": "astrapy.core.db.AsyncAstraDB.collection", "parameters": [{"name": "collection_name", "type": "str", "description": "The name of the collection to retrieve.", "default": null}, {"name": "timeout_info", "description": "a float, or a TimeoutInfo dict, for the HTTP request.\nNote that a 'read' timeout event will not block the action taken\nby the API server if it has received the request already.", "default": null}], "returns": [{"name": "AstraDBCollection", "type": "AsyncAstraDBCollection", "description": "The collection object."}], "gathered_types": ["astrapy.core.db.AsyncAstraDBCollection"]}} +{"id": "astrapy.core.db.AsyncAstraDB.get_collections", "text": "Retrieve a list of collections from the database.", "metadata": {"kind": "function", "name": "get_collections", "path": "astrapy.core.db.AsyncAstraDB.get_collections", "parameters": [{"name": "options", "type": "dict", "description": "Options to get the collection list", "value": "None", "default": "None"}, {"name": "timeout_info", "type": "TimeoutInfoWideType", "description": "a float, or a TimeoutInfo dict, for the HTTP request.\nNote that a 'read' timeout event will not block the action taken\nby the API server if it has received the request already.", "value": "None", "default": "None"}], "returns": [{"name": "dict", "type": "API_RESPONSE", "description": "An object containing the list of collections in the database:\n{\"status\": {\"collections\": [...]}}"}], "gathered_types": ["astrapy.core.utils.TimeoutInfoWideType", "astrapy.core.core_types.API_RESPONSE"]}} +{"id": "astrapy.core.db.AsyncAstraDB.create_collection", "text": "Create a new collection in the database.", "metadata": {"kind": "function", "name": "create_collection", "path": "astrapy.core.db.AsyncAstraDB.create_collection", "parameters": [{"name": "collection_name", "type": "str", "description": "The name of the collection to create.", "default": null}, {"name": "options", "type": "dict", "description": "Options for the collection.", "value": "None", "default": "None"}, {"name": "dimension", "type": "int", "description": "Dimension for vector search.", "value": "None", "default": "None"}, {"name": "metric", "type": "str", "description": "Metric choice for vector search.", "value": "None", "default": "None"}, {"name": "service_dict", "type": "dict", "description": "a definition for the $vectorize service", "value": "None", "default": "None"}, {"name": "timeout_info", "type": "TimeoutInfoWideType", "description": "a float, or a TimeoutInfo dict, for the HTTP request.\nNote that a 'read' timeout event will not block the action taken\nby the API server if it has received the request already.", "value": "None", "default": "None"}], "returns": [{"name": "AsyncAstraDBCollection", "type": "AsyncAstraDBCollection", "description": "The created collection object."}], "gathered_types": ["astrapy.core.utils.TimeoutInfoWideType", "astrapy.core.db.AsyncAstraDBCollection"]}} +{"id": "astrapy.core.db.AsyncAstraDB.delete_collection", "text": "Delete a collection from the database.", "metadata": {"kind": "function", "name": "delete_collection", "path": "astrapy.core.db.AsyncAstraDB.delete_collection", "parameters": [{"name": "collection_name", "type": "str", "description": "The name of the collection to delete.", "default": null}, {"name": "timeout_info", "type": "TimeoutInfoWideType", "description": "a float, or a TimeoutInfo dict, for the HTTP request.\nNote that a 'read' timeout event will not block the action taken\nby the API server if it has received the request already.", "value": "None", "default": "None"}], "returns": [{"name": "dict", "type": "API_RESPONSE", "description": "The response from the database."}], "gathered_types": ["astrapy.core.utils.TimeoutInfoWideType", "astrapy.core.core_types.API_RESPONSE"]}} +{"id": "astrapy.core.api", "text": "", "metadata": {"kind": "module", "name": "api", "path": "astrapy.core.api", "imports": {"annotations": "__future__.annotations", "logging": "logging", "Any": "typing.Any", "cast": "typing.cast", "httpx": "httpx", "API_RESPONSE": "astrapy.core.core_types.API_RESPONSE", "amake_request": "astrapy.core.utils.amake_request", "make_request": "astrapy.core.utils.make_request"}, "properties": {"is_init_module": false, "is_package": false, "is_subpackage": false, "is_namespace_package": false, "is_namespace_subpackage": false}}} +{"id": "astrapy.core.api.logger", "text": "", "metadata": {"kind": "attribute", "name": "logger", "path": "astrapy.core.api.logger", "value": "logger = logging.getLogger(__name__)", "gathered_types": ["__name__"]}} +{"id": "astrapy.core.api.APIRequestError", "text": "", "metadata": {"kind": "class", "name": "APIRequestError", "path": "astrapy.core.api.APIRequestError", "parameters": [{"name": "response", "type": "httpx.Response"}, {"name": "payload", "type": "dict[str, Any] | None"}], "bases": ["ValueError"], "gathered_types": ["Response", "ValueError"]}} +{"id": "astrapy.core.api.APIRequestError.response", "text": "", "metadata": {"kind": "attribute", "name": "response", "path": "astrapy.core.api.APIRequestError.response", "value": "response = response"}} +{"id": "astrapy.core.api.APIRequestError.payload", "text": "", "metadata": {"kind": "attribute", "name": "payload", "path": "astrapy.core.api.APIRequestError.payload", "value": "payload = payload"}} +{"id": "astrapy.core.api.raw_api_request", "text": "", "metadata": {"kind": "function", "name": "raw_api_request", "path": "astrapy.core.api.raw_api_request", "parameters": [{"name": "client", "type": "httpx.Client"}, {"name": "base_url", "type": "str"}, {"name": "auth_header", "type": "str"}, {"name": "token", "type": "str | None"}, {"name": "method", "type": "str"}, {"name": "json_data", "type": "dict[str, Any] | None"}, {"name": "url_params", "type": "dict[str, Any] | None"}, {"name": "path", "type": "str | None"}, {"name": "caller_name", "type": "str | None"}, {"name": "caller_version", "type": "str | None"}, {"name": "timeout", "type": "httpx.Timeout | float | None"}, {"name": "additional_headers", "type": "dict[str, str]"}], "returns": [{"type": "httpx.Response"}], "gathered_types": ["Client", "Response", "Timeout"]}} +{"id": "astrapy.core.api.process_raw_api_response", "text": "", "metadata": {"kind": "function", "name": "process_raw_api_response", "path": "astrapy.core.api.process_raw_api_response", "parameters": [{"name": "raw_response", "type": "httpx.Response"}, {"name": "skip_error_check", "type": "bool"}, {"name": "json_data", "type": "dict[str, Any] | None"}], "returns": [{"type": "API_RESPONSE"}], "gathered_types": ["astrapy.core.core_types.API_RESPONSE", "Response"]}} +{"id": "astrapy.core.api.api_request", "text": "", "metadata": {"kind": "function", "name": "api_request", "path": "astrapy.core.api.api_request", "parameters": [{"name": "client", "type": "httpx.Client"}, {"name": "base_url", "type": "str"}, {"name": "auth_header", "type": "str"}, {"name": "token", "type": "str | None"}, {"name": "method", "type": "str"}, {"name": "json_data", "type": "dict[str, Any] | None"}, {"name": "url_params", "type": "dict[str, Any] | None"}, {"name": "path", "type": "str | None"}, {"name": "skip_error_check", "type": "bool"}, {"name": "caller_name", "type": "str | None"}, {"name": "caller_version", "type": "str | None"}, {"name": "timeout", "type": "httpx.Timeout | float | None"}, {"name": "additional_headers", "type": "dict[str, str]"}], "returns": [{"type": "API_RESPONSE"}], "gathered_types": ["Client", "astrapy.core.core_types.API_RESPONSE", "Timeout"]}} +{"id": "astrapy.core.api.async_raw_api_request", "text": "", "metadata": {"kind": "function", "name": "async_raw_api_request", "path": "astrapy.core.api.async_raw_api_request", "parameters": [{"name": "client", "type": "httpx.AsyncClient"}, {"name": "base_url", "type": "str"}, {"name": "auth_header", "type": "str"}, {"name": "token", "type": "str | None"}, {"name": "method", "type": "str"}, {"name": "json_data", "type": "dict[str, Any] | None"}, {"name": "url_params", "type": "dict[str, Any] | None"}, {"name": "path", "type": "str | None"}, {"name": "caller_name", "type": "str | None"}, {"name": "caller_version", "type": "str | None"}, {"name": "timeout", "type": "httpx.Timeout | float | None"}, {"name": "additional_headers", "type": "dict[str, str]"}], "returns": [{"type": "httpx.Response"}], "gathered_types": ["AsyncClient", "Response", "Timeout"]}} +{"id": "astrapy.core.api.async_process_raw_api_response", "text": "", "metadata": {"kind": "function", "name": "async_process_raw_api_response", "path": "astrapy.core.api.async_process_raw_api_response", "parameters": [{"name": "raw_response", "type": "httpx.Response"}, {"name": "skip_error_check", "type": "bool"}, {"name": "json_data", "type": "dict[str, Any] | None"}], "returns": [{"type": "API_RESPONSE"}], "gathered_types": ["astrapy.core.core_types.API_RESPONSE", "Response"]}} +{"id": "astrapy.core.api.async_api_request", "text": "", "metadata": {"kind": "function", "name": "async_api_request", "path": "astrapy.core.api.async_api_request", "parameters": [{"name": "client", "type": "httpx.AsyncClient"}, {"name": "base_url", "type": "str"}, {"name": "auth_header", "type": "str"}, {"name": "token", "type": "str | None"}, {"name": "method", "type": "str"}, {"name": "json_data", "type": "dict[str, Any] | None"}, {"name": "url_params", "type": "dict[str, Any] | None"}, {"name": "path", "type": "str | None"}, {"name": "skip_error_check", "type": "bool"}, {"name": "caller_name", "type": "str | None"}, {"name": "caller_version", "type": "str | None"}, {"name": "timeout", "type": "httpx.Timeout | float | None"}, {"name": "additional_headers", "type": "dict[str, str]"}], "returns": [{"type": "API_RESPONSE"}], "gathered_types": ["AsyncClient", "astrapy.core.core_types.API_RESPONSE", "Timeout"]}} +{"id": "astrapy.core.ops", "text": "", "metadata": {"kind": "module", "name": "ops", "path": "astrapy.core.ops", "imports": {"annotations": "__future__.annotations", "logging": "logging", "Any": "typing.Any", "TypedDict": "typing.TypedDict", "cast": "typing.cast", "httpx": "httpx", "APIRequestError": "astrapy.core.api.APIRequestError", "api_request": "astrapy.core.api.api_request", "async_api_request": "astrapy.core.api.async_api_request", "async_raw_api_request": "astrapy.core.api.async_raw_api_request", "raw_api_request": "astrapy.core.api.raw_api_request", "API_RESPONSE": "astrapy.core.core_types.API_RESPONSE", "OPS_API_RESPONSE": "astrapy.core.core_types.OPS_API_RESPONSE", "DEFAULT_DEV_OPS_API_VERSION": "astrapy.core.defaults.DEFAULT_DEV_OPS_API_VERSION", "DEFAULT_DEV_OPS_AUTH_HEADER": "astrapy.core.defaults.DEFAULT_DEV_OPS_AUTH_HEADER", "DEFAULT_DEV_OPS_URL": "astrapy.core.defaults.DEFAULT_DEV_OPS_URL", "TimeoutInfoWideType": "astrapy.core.utils.TimeoutInfoWideType", "http_methods": "astrapy.core.utils.http_methods", "to_httpx_timeout": "astrapy.core.utils.to_httpx_timeout"}, "properties": {"is_init_module": false, "is_package": false, "is_subpackage": false, "is_namespace_package": false, "is_namespace_subpackage": false}}} +{"id": "astrapy.core.ops.logger", "text": "", "metadata": {"kind": "attribute", "name": "logger", "path": "astrapy.core.ops.logger", "value": "logger = logging.getLogger(__name__)", "gathered_types": ["__name__"]}} +{"id": "astrapy.core.ops.AstraDBOpsConstructorParams", "text": "", "metadata": {"kind": "class", "name": "AstraDBOpsConstructorParams", "path": "astrapy.core.ops.AstraDBOpsConstructorParams", "bases": ["TypedDict"]}} +{"id": "astrapy.core.ops.AstraDBOpsConstructorParams.token", "text": "", "metadata": {"kind": "attribute", "name": "token", "path": "astrapy.core.ops.AstraDBOpsConstructorParams.token", "value": "token: str | None"}} +{"id": "astrapy.core.ops.AstraDBOpsConstructorParams.dev_ops_url", "text": "", "metadata": {"kind": "attribute", "name": "dev_ops_url", "path": "astrapy.core.ops.AstraDBOpsConstructorParams.dev_ops_url", "value": "dev_ops_url: str | None"}} +{"id": "astrapy.core.ops.AstraDBOpsConstructorParams.dev_ops_api_version", "text": "", "metadata": {"kind": "attribute", "name": "dev_ops_api_version", "path": "astrapy.core.ops.AstraDBOpsConstructorParams.dev_ops_api_version", "value": "dev_ops_api_version: str | None"}} +{"id": "astrapy.core.ops.AstraDBOpsConstructorParams.caller_name", "text": "", "metadata": {"kind": "attribute", "name": "caller_name", "path": "astrapy.core.ops.AstraDBOpsConstructorParams.caller_name", "value": "caller_name: str | None"}} +{"id": "astrapy.core.ops.AstraDBOpsConstructorParams.caller_version", "text": "", "metadata": {"kind": "attribute", "name": "caller_version", "path": "astrapy.core.ops.AstraDBOpsConstructorParams.caller_version", "value": "caller_version: str | None"}} +{"id": "astrapy.core.ops.AstraDBOps", "text": "", "metadata": {"kind": "class", "name": "AstraDBOps", "path": "astrapy.core.ops.AstraDBOps", "parameters": [{"name": "token", "type": "str | None"}, {"name": "dev_ops_url", "default": "None", "type": "str | None"}, {"name": "dev_ops_api_version", "default": "None", "type": "str | None"}, {"name": "caller_name", "default": "None", "type": "str | None"}, {"name": "caller_version", "default": "None", "type": "str | None"}]}} +{"id": "astrapy.core.ops.AstraDBOps.client", "text": "", "metadata": {"kind": "attribute", "name": "client", "path": "astrapy.core.ops.AstraDBOps.client", "value": "client = httpx.Client()", "gathered_types": ["Client"]}} +{"id": "astrapy.core.ops.AstraDBOps.async_client", "text": "", "metadata": {"kind": "attribute", "name": "async_client", "path": "astrapy.core.ops.AstraDBOps.async_client", "value": "async_client = httpx.AsyncClient()", "gathered_types": ["AsyncClient"]}} +{"id": "astrapy.core.ops.AstraDBOps.caller_name", "text": "", "metadata": {"kind": "attribute", "name": "caller_name", "path": "astrapy.core.ops.AstraDBOps.caller_name", "value": "caller_name = caller_name"}} +{"id": "astrapy.core.ops.AstraDBOps.caller_version", "text": "", "metadata": {"kind": "attribute", "name": "caller_version", "path": "astrapy.core.ops.AstraDBOps.caller_version", "value": "caller_version = caller_version"}} +{"id": "astrapy.core.ops.AstraDBOps.constructor_params", "text": "", "metadata": {"kind": "attribute", "name": "constructor_params", "path": "astrapy.core.ops.AstraDBOps.constructor_params", "value": "constructor_params: AstraDBOpsConstructorParams = {'token': token, 'dev_ops_url': dev_ops_url, 'dev_ops_api_version': dev_ops_api_version, 'caller_name': caller_name, 'caller_version': caller_version}", "gathered_types": ["AstraDBOpsConstructorParams"]}} +{"id": "astrapy.core.ops.AstraDBOps.token", "text": "", "metadata": {"kind": "attribute", "name": "token", "path": "astrapy.core.ops.AstraDBOps.token", "value": "token: str | None"}} +{"id": "astrapy.core.ops.AstraDBOps.base_url", "text": "", "metadata": {"kind": "attribute", "name": "base_url", "path": "astrapy.core.ops.AstraDBOps.base_url", "value": "base_url = f'{dev_ops_url}/{dev_ops_api_version}'"}} +{"id": "astrapy.core.ops.AstraDBOps.copy", "text": "", "metadata": {"kind": "function", "name": "copy", "path": "astrapy.core.ops.AstraDBOps.copy", "parameters": [{"name": "token", "default": "None", "type": "str | None"}, {"name": "dev_ops_url", "default": "None", "type": "str | None"}, {"name": "dev_ops_api_version", "default": "None", "type": "str | None"}, {"name": "caller_name", "default": "None", "type": "str | None"}, {"name": "caller_version", "default": "None", "type": "str | None"}], "returns": [{"type": "AstraDBOps"}], "gathered_types": ["astrapy.core.ops.AstraDBOps"]}} +{"id": "astrapy.core.ops.AstraDBOps.set_caller", "text": "", "metadata": {"kind": "function", "name": "set_caller", "path": "astrapy.core.ops.AstraDBOps.set_caller", "parameters": [{"name": "caller_name", "default": "None", "type": "str | None"}, {"name": "caller_version", "default": "None", "type": "str | None"}], "returns": [{"type": "None"}]}} +{"id": "astrapy.core.ops.AstraDBOps.get_databases", "text": "Retrieve a list of databases.", "metadata": {"kind": "function", "name": "get_databases", "path": "astrapy.core.ops.AstraDBOps.get_databases", "parameters": [{"name": "options", "type": "dict", "description": "Additional options for the request.", "value": "None", "default": "None"}], "returns": [{"name": "list", "type": "OPS_API_RESPONSE", "description": "a JSON list of dictionaries, one per database."}], "gathered_types": ["astrapy.core.utils.TimeoutInfoWideType", "astrapy.core.core_types.OPS_API_RESPONSE"]}} +{"id": "astrapy.core.ops.AstraDBOps.async_get_databases", "text": "Retrieve a list of databases - async version of the method.", "metadata": {"kind": "function", "name": "async_get_databases", "path": "astrapy.core.ops.AstraDBOps.async_get_databases", "parameters": [{"name": "options", "type": "dict", "description": "Additional options for the request.", "value": "None", "default": "None"}], "returns": [{"name": "list", "type": "OPS_API_RESPONSE", "description": "a JSON list of dictionaries, one per database."}], "gathered_types": ["astrapy.core.utils.TimeoutInfoWideType", "astrapy.core.core_types.OPS_API_RESPONSE"]}} +{"id": "astrapy.core.ops.AstraDBOps.create_database", "text": "Create a new database.", "metadata": {"kind": "function", "name": "create_database", "path": "astrapy.core.ops.AstraDBOps.create_database", "parameters": [{"name": "database_definition", "type": "dict", "description": "A dictionary defining the properties of the database to be created.", "value": "None", "default": "None"}, {"name": "timeout_info", "type": "TimeoutInfoWideType", "description": "either a float (seconds) or a TimeoutInfo dict (see)", "value": "None", "default": "None"}], "returns": [{"name": "dict", "type": "dict[str, str]", "description": "A dictionary such as: {\"id\": the ID of the created database}"}, {"type": "dict[str, str]", "description": "Raises an error if not successful."}], "gathered_types": ["astrapy.core.utils.TimeoutInfoWideType"]}} +{"id": "astrapy.core.ops.AstraDBOps.async_create_database", "text": "Create a new database - async version of the method.", "metadata": {"kind": "function", "name": "async_create_database", "path": "astrapy.core.ops.AstraDBOps.async_create_database", "parameters": [{"name": "database_definition", "type": "dict", "description": "A dictionary defining the properties of the database to be created.", "value": "None", "default": "None"}, {"name": "timeout_info", "type": "TimeoutInfoWideType", "description": "either a float (seconds) or a TimeoutInfo dict (see)", "value": "None", "default": "None"}], "returns": [{"name": "dict", "type": "dict[str, str]", "description": "A dictionary such as: {\"id\": the ID of the created database}"}, {"type": "dict[str, str]", "description": "Raises an error if not successful."}], "gathered_types": ["astrapy.core.utils.TimeoutInfoWideType"]}} +{"id": "astrapy.core.ops.AstraDBOps.terminate_database", "text": "Terminate an existing database.", "metadata": {"kind": "function", "name": "terminate_database", "path": "astrapy.core.ops.AstraDBOps.terminate_database", "parameters": [{"name": "database", "type": "str", "description": "The identifier of the database to terminate.", "value": "''", "default": "''"}, {"name": "timeout_info", "type": "TimeoutInfoWideType", "description": "either a float (seconds) or a TimeoutInfo dict (see)", "value": "None", "default": "None"}], "returns": [{"name": "str", "type": "str", "description": "The identifier of the terminated database, or None if termination was unsuccessful."}], "gathered_types": ["astrapy.core.utils.TimeoutInfoWideType"]}} +{"id": "astrapy.core.ops.AstraDBOps.async_terminate_database", "text": "Terminate an existing database - async version of the method.", "metadata": {"kind": "function", "name": "async_terminate_database", "path": "astrapy.core.ops.AstraDBOps.async_terminate_database", "parameters": [{"name": "database", "type": "str", "description": "The identifier of the database to terminate.", "value": "''", "default": "''"}, {"name": "timeout_info", "type": "TimeoutInfoWideType", "description": "either a float (seconds) or a TimeoutInfo dict (see)", "value": "None", "default": "None"}], "returns": [{"name": "str", "type": "str", "description": "The identifier of the terminated database, or None if termination was unsuccessful."}], "gathered_types": ["astrapy.core.utils.TimeoutInfoWideType"]}} +{"id": "astrapy.core.ops.AstraDBOps.get_database", "text": "Retrieve details of a specific database.", "metadata": {"kind": "function", "name": "get_database", "path": "astrapy.core.ops.AstraDBOps.get_database", "parameters": [{"name": "database", "type": "str", "description": "The identifier of the database to retrieve.", "value": "''", "default": "''"}, {"name": "options", "type": "dict", "description": "Additional options for the request.", "value": "None", "default": "None"}], "returns": [{"name": "dict", "type": "API_RESPONSE", "description": "A JSON response containing the details of the specified database."}], "gathered_types": ["astrapy.core.utils.TimeoutInfoWideType", "astrapy.core.core_types.API_RESPONSE"]}} +{"id": "astrapy.core.ops.AstraDBOps.async_get_database", "text": "Retrieve details of a specific database - async version of the method.", "metadata": {"kind": "function", "name": "async_get_database", "path": "astrapy.core.ops.AstraDBOps.async_get_database", "parameters": [{"name": "database", "type": "str", "description": "The identifier of the database to retrieve.", "value": "''", "default": "''"}, {"name": "options", "type": "dict", "description": "Additional options for the request.", "value": "None", "default": "None"}], "returns": [{"name": "dict", "type": "API_RESPONSE", "description": "A JSON response containing the details of the specified database."}], "gathered_types": ["astrapy.core.utils.TimeoutInfoWideType", "astrapy.core.core_types.API_RESPONSE"]}} +{"id": "astrapy.core.ops.AstraDBOps.create_keyspace", "text": "Create a keyspace in a specified database.", "metadata": {"kind": "function", "name": "create_keyspace", "path": "astrapy.core.ops.AstraDBOps.create_keyspace", "parameters": [{"name": "database", "type": "str", "description": "The identifier of the database where the keyspace will be created.", "value": "''", "default": "''"}, {"name": "keyspace", "type": "str", "description": "The name of the keyspace to create.", "value": "''", "default": "''"}, {"name": "timeout_info", "type": "TimeoutInfoWideType", "description": "either a float (seconds) or a TimeoutInfo dict (see)", "value": "None", "default": "None"}], "returns": [{"type": "dict[str, str]", "description": "{\"ok\": 1} if successful. Raises errors otherwise."}], "gathered_types": ["astrapy.core.utils.TimeoutInfoWideType"]}} +{"id": "astrapy.core.ops.AstraDBOps.async_create_keyspace", "text": "Create a keyspace in a specified database - async version of the method.", "metadata": {"kind": "function", "name": "async_create_keyspace", "path": "astrapy.core.ops.AstraDBOps.async_create_keyspace", "parameters": [{"name": "database", "type": "str", "description": "The identifier of the database where the keyspace will be created.", "value": "''", "default": "''"}, {"name": "keyspace", "type": "str", "description": "The name of the keyspace to create.", "value": "''", "default": "''"}, {"name": "timeout_info", "type": "TimeoutInfoWideType", "description": "either a float (seconds) or a TimeoutInfo dict (see)", "value": "None", "default": "None"}], "returns": [{"type": "dict[str, str]", "description": "{\"ok\": 1} if successful. Raises errors otherwise."}], "gathered_types": ["astrapy.core.utils.TimeoutInfoWideType"]}} +{"id": "astrapy.core.ops.AstraDBOps.delete_keyspace", "text": "Delete a keyspace from a database", "metadata": {"kind": "function", "name": "delete_keyspace", "path": "astrapy.core.ops.AstraDBOps.delete_keyspace", "parameters": [{"name": "database", "type": "str", "description": "The identifier of the database to terminate.", "value": "''", "default": "''"}, {"name": "keyspace", "type": "str", "description": "The name of the keyspace to create.", "value": "''", "default": "''"}, {"name": "timeout_info", "type": "TimeoutInfoWideType", "description": "either a float (seconds) or a TimeoutInfo dict (see)", "value": "None", "default": "None"}], "returns": [{"name": "str", "type": "str", "description": "The identifier of the deleted keyspace. Otherwise raises an error."}], "gathered_types": ["astrapy.core.utils.TimeoutInfoWideType"]}} +{"id": "astrapy.core.ops.AstraDBOps.async_delete_keyspace", "text": "Delete a keyspace from a database - async version of the method.", "metadata": {"kind": "function", "name": "async_delete_keyspace", "path": "astrapy.core.ops.AstraDBOps.async_delete_keyspace", "parameters": [{"name": "database", "type": "str", "description": "The identifier of the database to terminate.", "value": "''", "default": "''"}, {"name": "keyspace", "type": "str", "description": "The name of the keyspace to create.", "value": "''", "default": "''"}, {"name": "timeout_info", "type": "TimeoutInfoWideType", "description": "either a float (seconds) or a TimeoutInfo dict (see)", "value": "None", "default": "None"}], "returns": [{"name": "str", "type": "str", "description": "The identifier of the deleted keyspace. Otherwise raises an error."}], "gathered_types": ["astrapy.core.utils.TimeoutInfoWideType"]}} +{"id": "astrapy.core.ops.AstraDBOps.park_database", "text": "Park a specific database, making it inactive.", "metadata": {"kind": "function", "name": "park_database", "path": "astrapy.core.ops.AstraDBOps.park_database", "parameters": [{"name": "database", "type": "str", "description": "The identifier of the database to park.", "value": "''", "default": "''"}], "returns": [{"name": "dict", "type": "OPS_API_RESPONSE", "description": "The response from the server after parking the database."}], "gathered_types": ["astrapy.core.utils.TimeoutInfoWideType", "astrapy.core.core_types.OPS_API_RESPONSE"]}} +{"id": "astrapy.core.ops.AstraDBOps.unpark_database", "text": "Unpark a specific database, making it active again.", "metadata": {"kind": "function", "name": "unpark_database", "path": "astrapy.core.ops.AstraDBOps.unpark_database", "parameters": [{"name": "database", "type": "str", "description": "The identifier of the database to unpark.", "value": "''", "default": "''"}], "returns": [{"name": "dict", "type": "OPS_API_RESPONSE", "description": "The response from the server after unparking the database."}], "gathered_types": ["astrapy.core.utils.TimeoutInfoWideType", "astrapy.core.core_types.OPS_API_RESPONSE"]}} +{"id": "astrapy.core.ops.AstraDBOps.resize_database", "text": "Resize a specific database according to provided options.", "metadata": {"kind": "function", "name": "resize_database", "path": "astrapy.core.ops.AstraDBOps.resize_database", "parameters": [{"name": "database", "type": "str", "description": "The identifier of the database to resize.", "value": "''", "default": "''"}, {"name": "options", "type": "dict", "description": "The specifications for the resize operation.", "value": "None", "default": "None"}], "returns": [{"name": "dict", "type": "OPS_API_RESPONSE", "description": "The response from the server after the resize operation."}], "gathered_types": ["astrapy.core.utils.TimeoutInfoWideType", "astrapy.core.core_types.OPS_API_RESPONSE"]}} +{"id": "astrapy.core.ops.AstraDBOps.reset_database_password", "text": "Reset the password for a specific database.", "metadata": {"kind": "function", "name": "reset_database_password", "path": "astrapy.core.ops.AstraDBOps.reset_database_password", "parameters": [{"name": "database", "type": "str", "description": "The identifier of the database for which to reset the password.", "value": "''", "default": "''"}, {"name": "options", "type": "dict", "description": "Additional options for the password reset.", "value": "None", "default": "None"}], "returns": [{"name": "dict", "type": "OPS_API_RESPONSE", "description": "The response from the server after resetting the password."}], "gathered_types": ["astrapy.core.utils.TimeoutInfoWideType", "astrapy.core.core_types.OPS_API_RESPONSE"]}} +{"id": "astrapy.core.ops.AstraDBOps.get_secure_bundle", "text": "Retrieve a secure bundle URL for a specific database.", "metadata": {"kind": "function", "name": "get_secure_bundle", "path": "astrapy.core.ops.AstraDBOps.get_secure_bundle", "parameters": [{"name": "database", "type": "str", "description": "The identifier of the database for which to get the secure bundle.", "value": "''", "default": "''"}], "returns": [{"name": "dict", "type": "OPS_API_RESPONSE", "description": "The secure bundle URL and related information."}], "gathered_types": ["astrapy.core.utils.TimeoutInfoWideType", "astrapy.core.core_types.OPS_API_RESPONSE"]}} +{"id": "astrapy.core.ops.AstraDBOps.get_datacenters", "text": "Get a list of datacenters associated with a specific database.", "metadata": {"kind": "function", "name": "get_datacenters", "path": "astrapy.core.ops.AstraDBOps.get_datacenters", "parameters": [{"name": "database", "type": "str", "description": "The identifier of the database for which to list datacenters.", "value": "''", "default": "''"}], "returns": [{"name": "dict", "type": "OPS_API_RESPONSE", "description": "A list of datacenters and their details."}], "gathered_types": ["astrapy.core.utils.TimeoutInfoWideType", "astrapy.core.core_types.OPS_API_RESPONSE"]}} +{"id": "astrapy.core.ops.AstraDBOps.create_datacenter", "text": "Create a new datacenter for a specific database.", "metadata": {"kind": "function", "name": "create_datacenter", "path": "astrapy.core.ops.AstraDBOps.create_datacenter", "parameters": [{"name": "database", "type": "str", "description": "The identifier of the database for which to create the datacenter.", "value": "''", "default": "''"}, {"name": "options", "type": "dict", "description": "Specifications for the new datacenter.", "value": "None", "default": "None"}], "returns": [{"name": "dict", "type": "OPS_API_RESPONSE", "description": "The response from the server after creating the datacenter."}], "gathered_types": ["astrapy.core.utils.TimeoutInfoWideType", "astrapy.core.core_types.OPS_API_RESPONSE"]}} +{"id": "astrapy.core.ops.AstraDBOps.terminate_datacenter", "text": "Terminate a specific datacenter in a database.", "metadata": {"kind": "function", "name": "terminate_datacenter", "path": "astrapy.core.ops.AstraDBOps.terminate_datacenter", "parameters": [{"name": "database", "type": "str", "description": "The identifier of the database containing the datacenter.", "value": "''", "default": "''"}, {"name": "datacenter", "type": "str", "description": "The identifier of the datacenter to terminate.", "value": "''", "default": "''"}], "returns": [{"name": "dict", "type": "OPS_API_RESPONSE", "description": "The response from the server after terminating the datacenter."}], "gathered_types": ["astrapy.core.utils.TimeoutInfoWideType", "astrapy.core.core_types.OPS_API_RESPONSE"]}} +{"id": "astrapy.core.ops.AstraDBOps.get_access_list", "text": "Retrieve the access list for a specific database.", "metadata": {"kind": "function", "name": "get_access_list", "path": "astrapy.core.ops.AstraDBOps.get_access_list", "parameters": [{"name": "database", "type": "str", "description": "The identifier of the database for which to get the access list.", "value": "''", "default": "''"}], "returns": [{"name": "dict", "type": "OPS_API_RESPONSE", "description": "The current access list for the database."}], "gathered_types": ["astrapy.core.utils.TimeoutInfoWideType", "astrapy.core.core_types.OPS_API_RESPONSE"]}} +{"id": "astrapy.core.ops.AstraDBOps.replace_access_list", "text": "Replace the entire access list for a specific database.", "metadata": {"kind": "function", "name": "replace_access_list", "path": "astrapy.core.ops.AstraDBOps.replace_access_list", "parameters": [{"name": "database", "type": "str", "description": "The identifier of the database for which to replace the access list.", "value": "''", "default": "''"}, {"name": "access_list", "type": "dict", "description": "The new access list to be set.", "value": "None", "default": "None"}], "returns": [{"name": "dict", "type": "OPS_API_RESPONSE", "description": "The response from the server after replacing the access list."}], "gathered_types": ["astrapy.core.utils.TimeoutInfoWideType", "astrapy.core.core_types.OPS_API_RESPONSE"]}} +{"id": "astrapy.core.ops.AstraDBOps.update_access_list", "text": "Update the access list for a specific database.", "metadata": {"kind": "function", "name": "update_access_list", "path": "astrapy.core.ops.AstraDBOps.update_access_list", "parameters": [{"name": "database", "type": "str", "description": "The identifier of the database for which to update the access list.", "value": "''", "default": "''"}, {"name": "access_list", "type": "dict", "description": "The updates to be applied to the access list.", "value": "None", "default": "None"}], "returns": [{"name": "dict", "type": "OPS_API_RESPONSE", "description": "The response from the server after updating the access list."}], "gathered_types": ["astrapy.core.utils.TimeoutInfoWideType", "astrapy.core.core_types.OPS_API_RESPONSE"]}} +{"id": "astrapy.core.ops.AstraDBOps.add_access_list_address", "text": "Add a new address to the access list for a specific database.", "metadata": {"kind": "function", "name": "add_access_list_address", "path": "astrapy.core.ops.AstraDBOps.add_access_list_address", "parameters": [{"name": "database", "type": "str", "description": "The identifier of the database for which to add the address.", "value": "''", "default": "''"}, {"name": "address", "type": "dict", "description": "The address details to add to the access list.", "value": "None", "default": "None"}], "returns": [{"name": "dict", "type": "OPS_API_RESPONSE", "description": "The response from the server after adding the address."}], "gathered_types": ["astrapy.core.utils.TimeoutInfoWideType", "astrapy.core.core_types.OPS_API_RESPONSE"]}} +{"id": "astrapy.core.ops.AstraDBOps.delete_access_list", "text": "Delete the access list for a specific database.", "metadata": {"kind": "function", "name": "delete_access_list", "path": "astrapy.core.ops.AstraDBOps.delete_access_list", "parameters": [{"name": "database", "type": "str", "description": "The identifier of the database for which to delete the access list.", "value": "''", "default": "''"}], "returns": [{"name": "dict", "type": "OPS_API_RESPONSE", "description": "The response from the server after deleting the access list."}], "gathered_types": ["astrapy.core.utils.TimeoutInfoWideType", "astrapy.core.core_types.OPS_API_RESPONSE"]}} +{"id": "astrapy.core.ops.AstraDBOps.get_private_link", "text": "Retrieve the private link information for a specified database.", "metadata": {"kind": "function", "name": "get_private_link", "path": "astrapy.core.ops.AstraDBOps.get_private_link", "parameters": [{"name": "database", "type": "str", "description": "The identifier of the database.", "value": "''", "default": "''"}], "returns": [{"name": "dict", "type": "OPS_API_RESPONSE", "description": "The private link information for the database."}], "gathered_types": ["astrapy.core.utils.TimeoutInfoWideType", "astrapy.core.core_types.OPS_API_RESPONSE"]}} +{"id": "astrapy.core.ops.AstraDBOps.get_datacenter_private_link", "text": "Retrieve the private link information for a specific datacenter in a database.", "metadata": {"kind": "function", "name": "get_datacenter_private_link", "path": "astrapy.core.ops.AstraDBOps.get_datacenter_private_link", "parameters": [{"name": "database", "type": "str", "description": "The identifier of the database.", "value": "''", "default": "''"}, {"name": "datacenter", "type": "str", "description": "The identifier of the datacenter.", "value": "''", "default": "''"}], "returns": [{"name": "dict", "type": "OPS_API_RESPONSE", "description": "The private link information for the specified datacenter."}], "gathered_types": ["astrapy.core.utils.TimeoutInfoWideType", "astrapy.core.core_types.OPS_API_RESPONSE"]}} +{"id": "astrapy.core.ops.AstraDBOps.create_datacenter_private_link", "text": "Create a private link for a specific datacenter in a database.", "metadata": {"kind": "function", "name": "create_datacenter_private_link", "path": "astrapy.core.ops.AstraDBOps.create_datacenter_private_link", "parameters": [{"name": "database", "type": "str", "description": "The identifier of the database.", "value": "''", "default": "''"}, {"name": "datacenter", "type": "str", "description": "The identifier of the datacenter.", "value": "''", "default": "''"}, {"name": "private_link", "type": "dict", "description": "The private link configuration details.", "value": "None", "default": "None"}], "returns": [{"name": "dict", "type": "OPS_API_RESPONSE", "description": "The response from the server after creating the private link."}], "gathered_types": ["astrapy.core.utils.TimeoutInfoWideType", "astrapy.core.core_types.OPS_API_RESPONSE"]}} +{"id": "astrapy.core.ops.AstraDBOps.create_datacenter_endpoint", "text": "Create an endpoint for a specific datacenter in a database.", "metadata": {"kind": "function", "name": "create_datacenter_endpoint", "path": "astrapy.core.ops.AstraDBOps.create_datacenter_endpoint", "parameters": [{"name": "database", "type": "str", "description": "The identifier of the database.", "value": "''", "default": "''"}, {"name": "datacenter", "type": "str", "description": "The identifier of the datacenter.", "value": "''", "default": "''"}, {"name": "endpoint", "type": "dict", "description": "The endpoint configuration details.", "value": "None", "default": "None"}], "returns": [{"name": "dict", "type": "OPS_API_RESPONSE", "description": "The response from the server after creating the endpoint."}], "gathered_types": ["astrapy.core.utils.TimeoutInfoWideType", "astrapy.core.core_types.OPS_API_RESPONSE"]}} +{"id": "astrapy.core.ops.AstraDBOps.update_datacenter_endpoint", "text": "Update an existing endpoint for a specific datacenter in a database.", "metadata": {"kind": "function", "name": "update_datacenter_endpoint", "path": "astrapy.core.ops.AstraDBOps.update_datacenter_endpoint", "parameters": [{"name": "database", "type": "str", "description": "The identifier of the database.", "value": "''", "default": "''"}, {"name": "datacenter", "type": "str", "description": "The identifier of the datacenter.", "value": "''", "default": "''"}, {"name": "endpoint", "type": "dict", "description": "The updated endpoint configuration details.", "value": "{}", "default": "{}"}], "returns": [{"name": "dict", "type": "OPS_API_RESPONSE", "description": "The response from the server after updating the endpoint."}], "gathered_types": ["astrapy.core.utils.TimeoutInfoWideType", "astrapy.core.core_types.OPS_API_RESPONSE"]}} +{"id": "astrapy.core.ops.AstraDBOps.get_datacenter_endpoint", "text": "Retrieve information about a specific endpoint in a datacenter of a database.", "metadata": {"kind": "function", "name": "get_datacenter_endpoint", "path": "astrapy.core.ops.AstraDBOps.get_datacenter_endpoint", "parameters": [{"name": "database", "type": "str", "description": "The identifier of the database.", "value": "''", "default": "''"}, {"name": "datacenter", "type": "str", "description": "The identifier of the datacenter.", "value": "''", "default": "''"}, {"name": "endpoint", "type": "str", "description": "The identifier of the endpoint.", "value": "''", "default": "''"}], "returns": [{"name": "dict", "type": "OPS_API_RESPONSE", "description": "The endpoint information for the specified datacenter."}], "gathered_types": ["astrapy.core.utils.TimeoutInfoWideType", "astrapy.core.core_types.OPS_API_RESPONSE"]}} +{"id": "astrapy.core.ops.AstraDBOps.delete_datacenter_endpoint", "text": "Delete a specific endpoint in a datacenter of a database.", "metadata": {"kind": "function", "name": "delete_datacenter_endpoint", "path": "astrapy.core.ops.AstraDBOps.delete_datacenter_endpoint", "parameters": [{"name": "database", "type": "str", "description": "The identifier of the database.", "value": "''", "default": "''"}, {"name": "datacenter", "type": "str", "description": "The identifier of the datacenter.", "value": "''", "default": "''"}, {"name": "endpoint", "type": "str", "description": "The identifier of the endpoint to delete.", "value": "''", "default": "''"}], "returns": [{"name": "dict", "type": "OPS_API_RESPONSE", "description": "The response from the server after deleting the endpoint."}], "gathered_types": ["astrapy.core.utils.TimeoutInfoWideType", "astrapy.core.core_types.OPS_API_RESPONSE"]}} +{"id": "astrapy.core.ops.AstraDBOps.get_available_classic_regions", "text": "Retrieve a list of available classic regions.", "metadata": {"kind": "function", "name": "get_available_classic_regions", "path": "astrapy.core.ops.AstraDBOps.get_available_classic_regions", "parameters": [{"name": "timeout_info", "default": "None", "type": "TimeoutInfoWideType"}], "returns": [{"name": "dict", "type": "OPS_API_RESPONSE", "description": "A list of available classic regions."}], "gathered_types": ["astrapy.core.utils.TimeoutInfoWideType", "astrapy.core.core_types.OPS_API_RESPONSE"]}} +{"id": "astrapy.core.ops.AstraDBOps.get_available_regions", "text": "Retrieve a list of available regions for serverless deployment.", "metadata": {"kind": "function", "name": "get_available_regions", "path": "astrapy.core.ops.AstraDBOps.get_available_regions", "parameters": [{"name": "timeout_info", "default": "None", "type": "TimeoutInfoWideType"}], "returns": [{"name": "dict", "type": "OPS_API_RESPONSE", "description": "A list of available regions for serverless deployment."}], "gathered_types": ["astrapy.core.utils.TimeoutInfoWideType", "astrapy.core.core_types.OPS_API_RESPONSE"]}} +{"id": "astrapy.core.ops.AstraDBOps.get_roles", "text": "Retrieve a list of roles within the organization.", "metadata": {"kind": "function", "name": "get_roles", "path": "astrapy.core.ops.AstraDBOps.get_roles", "parameters": [{"name": "timeout_info", "default": "None", "type": "TimeoutInfoWideType"}], "returns": [{"name": "dict", "type": "OPS_API_RESPONSE", "description": "A list of roles within the organization."}], "gathered_types": ["astrapy.core.utils.TimeoutInfoWideType", "astrapy.core.core_types.OPS_API_RESPONSE"]}} +{"id": "astrapy.core.ops.AstraDBOps.create_role", "text": "Create a new role within the organization.", "metadata": {"kind": "function", "name": "create_role", "path": "astrapy.core.ops.AstraDBOps.create_role", "parameters": [{"name": "role_definition", "type": "dict", "description": "The definition of the role to be created.", "value": "None", "default": "None"}], "returns": [{"name": "dict", "type": "OPS_API_RESPONSE", "description": "The response from the server after creating the role."}], "gathered_types": ["astrapy.core.utils.TimeoutInfoWideType", "astrapy.core.core_types.OPS_API_RESPONSE"]}} +{"id": "astrapy.core.ops.AstraDBOps.get_role", "text": "Retrieve details of a specific role within the organization.", "metadata": {"kind": "function", "name": "get_role", "path": "astrapy.core.ops.AstraDBOps.get_role", "parameters": [{"name": "role", "type": "str", "description": "The identifier of the role.", "value": "''", "default": "''"}], "returns": [{"name": "dict", "type": "OPS_API_RESPONSE", "description": "The details of the specified role."}], "gathered_types": ["astrapy.core.utils.TimeoutInfoWideType", "astrapy.core.core_types.OPS_API_RESPONSE"]}} +{"id": "astrapy.core.ops.AstraDBOps.update_role", "text": "Update the definition of an existing role within the organization.", "metadata": {"kind": "function", "name": "update_role", "path": "astrapy.core.ops.AstraDBOps.update_role", "parameters": [{"name": "role", "type": "str", "description": "The identifier of the role to update.", "value": "''", "default": "''"}, {"name": "role_definition", "type": "dict", "description": "The new definition of the role.", "value": "None", "default": "None"}], "returns": [{"name": "dict", "type": "OPS_API_RESPONSE", "description": "The response from the server after updating the role."}], "gathered_types": ["astrapy.core.utils.TimeoutInfoWideType", "astrapy.core.core_types.OPS_API_RESPONSE"]}} +{"id": "astrapy.core.ops.AstraDBOps.delete_role", "text": "Delete a specific role from the organization.", "metadata": {"kind": "function", "name": "delete_role", "path": "astrapy.core.ops.AstraDBOps.delete_role", "parameters": [{"name": "role", "type": "str", "description": "The identifier of the role to delete.", "value": "''", "default": "''"}], "returns": [{"name": "dict", "type": "OPS_API_RESPONSE", "description": "The response from the server after deleting the role."}], "gathered_types": ["astrapy.core.utils.TimeoutInfoWideType", "astrapy.core.core_types.OPS_API_RESPONSE"]}} +{"id": "astrapy.core.ops.AstraDBOps.invite_user", "text": "Invite a new user to the organization.", "metadata": {"kind": "function", "name": "invite_user", "path": "astrapy.core.ops.AstraDBOps.invite_user", "parameters": [{"name": "user_definition", "type": "dict", "description": "The definition of the user to be invited.", "value": "None", "default": "None"}], "returns": [{"name": "dict", "type": "OPS_API_RESPONSE", "description": "The response from the server after inviting the user."}], "gathered_types": ["astrapy.core.utils.TimeoutInfoWideType", "astrapy.core.core_types.OPS_API_RESPONSE"]}} +{"id": "astrapy.core.ops.AstraDBOps.get_users", "text": "Retrieve a list of users within the organization.", "metadata": {"kind": "function", "name": "get_users", "path": "astrapy.core.ops.AstraDBOps.get_users", "parameters": [{"name": "timeout_info", "default": "None", "type": "TimeoutInfoWideType"}], "returns": [{"name": "dict", "type": "OPS_API_RESPONSE", "description": "A list of users within the organization."}], "gathered_types": ["astrapy.core.utils.TimeoutInfoWideType", "astrapy.core.core_types.OPS_API_RESPONSE"]}} +{"id": "astrapy.core.ops.AstraDBOps.get_user", "text": "Retrieve details of a specific user within the organization.", "metadata": {"kind": "function", "name": "get_user", "path": "astrapy.core.ops.AstraDBOps.get_user", "parameters": [{"name": "user", "type": "str", "description": "The identifier of the user.", "value": "''", "default": "''"}], "returns": [{"name": "dict", "type": "OPS_API_RESPONSE", "description": "The details of the specified user."}], "gathered_types": ["astrapy.core.utils.TimeoutInfoWideType", "astrapy.core.core_types.OPS_API_RESPONSE"]}} +{"id": "astrapy.core.ops.AstraDBOps.remove_user", "text": "Remove a user from the organization.", "metadata": {"kind": "function", "name": "remove_user", "path": "astrapy.core.ops.AstraDBOps.remove_user", "parameters": [{"name": "user", "type": "str", "description": "The identifier of the user to remove.", "value": "''", "default": "''"}], "returns": [{"name": "dict", "type": "OPS_API_RESPONSE", "description": "The response from the server after removing the user."}], "gathered_types": ["astrapy.core.utils.TimeoutInfoWideType", "astrapy.core.core_types.OPS_API_RESPONSE"]}} +{"id": "astrapy.core.ops.AstraDBOps.update_user_roles", "text": "Update the roles assigned to a specific user within the organization.", "metadata": {"kind": "function", "name": "update_user_roles", "path": "astrapy.core.ops.AstraDBOps.update_user_roles", "parameters": [{"name": "user", "type": "str", "description": "The identifier of the user.", "value": "''", "default": "''"}, {"name": "roles", "type": "list", "description": "The list of new roles to assign to the user.", "value": "None", "default": "None"}], "returns": [{"name": "dict", "type": "OPS_API_RESPONSE", "description": "The response from the server after updating the user's roles."}], "gathered_types": ["astrapy.core.utils.TimeoutInfoWideType", "astrapy.core.core_types.OPS_API_RESPONSE"]}} +{"id": "astrapy.core.ops.AstraDBOps.get_clients", "text": "Retrieve a list of client IDs and secrets associated with the organization.", "metadata": {"kind": "function", "name": "get_clients", "path": "astrapy.core.ops.AstraDBOps.get_clients", "parameters": [{"name": "timeout_info", "default": "None", "type": "TimeoutInfoWideType"}], "returns": [{"name": "dict", "type": "OPS_API_RESPONSE", "description": "A list of client IDs and their associated secrets."}], "gathered_types": ["astrapy.core.utils.TimeoutInfoWideType", "astrapy.core.core_types.OPS_API_RESPONSE"]}} +{"id": "astrapy.core.ops.AstraDBOps.create_token", "text": "Create a new token with specific roles.", "metadata": {"kind": "function", "name": "create_token", "path": "astrapy.core.ops.AstraDBOps.create_token", "parameters": [{"name": "roles", "type": "dict", "description": "The roles to associate with the token:\n{\"roles\": [\"<roleId>\"]}", "value": "None", "default": "None"}], "returns": [{"name": "dict", "type": "OPS_API_RESPONSE", "description": "The response from the server after creating the token."}], "gathered_types": ["astrapy.core.utils.TimeoutInfoWideType", "astrapy.core.core_types.OPS_API_RESPONSE"]}} +{"id": "astrapy.core.ops.AstraDBOps.delete_token", "text": "Delete a specific token.", "metadata": {"kind": "function", "name": "delete_token", "path": "astrapy.core.ops.AstraDBOps.delete_token", "parameters": [{"name": "token", "type": "str", "description": "The identifier of the token to delete.", "value": "''", "default": "''"}], "returns": [{"name": "dict", "type": "OPS_API_RESPONSE", "description": "The response from the server after deleting the token."}], "gathered_types": ["astrapy.core.utils.TimeoutInfoWideType", "astrapy.core.core_types.OPS_API_RESPONSE"]}} +{"id": "astrapy.core.ops.AstraDBOps.get_organization", "text": "Retrieve details of the current organization.", "metadata": {"kind": "function", "name": "get_organization", "path": "astrapy.core.ops.AstraDBOps.get_organization", "parameters": [{"name": "timeout_info", "default": "None", "type": "TimeoutInfoWideType"}], "returns": [{"name": "dict", "type": "OPS_API_RESPONSE", "description": "The details of the organization."}], "gathered_types": ["astrapy.core.utils.TimeoutInfoWideType", "astrapy.core.core_types.OPS_API_RESPONSE"]}} +{"id": "astrapy.core.ops.AstraDBOps.get_access_lists", "text": "Retrieve a list of access lists for the organization.", "metadata": {"kind": "function", "name": "get_access_lists", "path": "astrapy.core.ops.AstraDBOps.get_access_lists", "parameters": [{"name": "timeout_info", "default": "None", "type": "TimeoutInfoWideType"}], "returns": [{"name": "dict", "type": "OPS_API_RESPONSE", "description": "A list of access lists."}], "gathered_types": ["astrapy.core.utils.TimeoutInfoWideType", "astrapy.core.core_types.OPS_API_RESPONSE"]}} +{"id": "astrapy.core.ops.AstraDBOps.get_access_list_template", "text": "Retrieve a template for creating an access list.", "metadata": {"kind": "function", "name": "get_access_list_template", "path": "astrapy.core.ops.AstraDBOps.get_access_list_template", "parameters": [{"name": "timeout_info", "default": "None", "type": "TimeoutInfoWideType"}], "returns": [{"name": "dict", "type": "OPS_API_RESPONSE", "description": "An access list template."}], "gathered_types": ["astrapy.core.utils.TimeoutInfoWideType", "astrapy.core.core_types.OPS_API_RESPONSE"]}} +{"id": "astrapy.core.ops.AstraDBOps.validate_access_list", "text": "Validate the configuration of the access list.", "metadata": {"kind": "function", "name": "validate_access_list", "path": "astrapy.core.ops.AstraDBOps.validate_access_list", "parameters": [{"name": "timeout_info", "default": "None", "type": "TimeoutInfoWideType"}], "returns": [{"name": "dict", "type": "OPS_API_RESPONSE", "description": "The validation result of the access list configuration."}], "gathered_types": ["astrapy.core.utils.TimeoutInfoWideType", "astrapy.core.core_types.OPS_API_RESPONSE"]}} +{"id": "astrapy.core.ops.AstraDBOps.get_private_links", "text": "Retrieve a list of private link connections for the organization.", "metadata": {"kind": "function", "name": "get_private_links", "path": "astrapy.core.ops.AstraDBOps.get_private_links", "parameters": [{"name": "timeout_info", "default": "None", "type": "TimeoutInfoWideType"}], "returns": [{"name": "dict", "type": "OPS_API_RESPONSE", "description": "A list of private link connections."}], "gathered_types": ["astrapy.core.utils.TimeoutInfoWideType", "astrapy.core.core_types.OPS_API_RESPONSE"]}} +{"id": "astrapy.core.ops.AstraDBOps.get_streaming_providers", "text": "Retrieve a list of streaming service providers.", "metadata": {"kind": "function", "name": "get_streaming_providers", "path": "astrapy.core.ops.AstraDBOps.get_streaming_providers", "parameters": [{"name": "timeout_info", "default": "None", "type": "TimeoutInfoWideType"}], "returns": [{"name": "dict", "type": "OPS_API_RESPONSE", "description": "A list of available streaming service providers."}], "gathered_types": ["astrapy.core.utils.TimeoutInfoWideType", "astrapy.core.core_types.OPS_API_RESPONSE"]}} +{"id": "astrapy.core.ops.AstraDBOps.get_streaming_tenants", "text": "Retrieve a list of streaming tenants.", "metadata": {"kind": "function", "name": "get_streaming_tenants", "path": "astrapy.core.ops.AstraDBOps.get_streaming_tenants", "parameters": [{"name": "timeout_info", "default": "None", "type": "TimeoutInfoWideType"}], "returns": [{"name": "dict", "type": "OPS_API_RESPONSE", "description": "A list of streaming tenants and their details."}], "gathered_types": ["astrapy.core.utils.TimeoutInfoWideType", "astrapy.core.core_types.OPS_API_RESPONSE"]}} +{"id": "astrapy.core.ops.AstraDBOps.create_streaming_tenant", "text": "Create a new streaming tenant.", "metadata": {"kind": "function", "name": "create_streaming_tenant", "path": "astrapy.core.ops.AstraDBOps.create_streaming_tenant", "parameters": [{"name": "tenant", "type": "dict", "description": "The configuration details for the new streaming tenant.", "value": "None", "default": "None"}], "returns": [{"name": "dict", "type": "OPS_API_RESPONSE", "description": "The response from the server after creating the streaming tenant."}], "gathered_types": ["astrapy.core.utils.TimeoutInfoWideType", "astrapy.core.core_types.OPS_API_RESPONSE"]}} +{"id": "astrapy.core.ops.AstraDBOps.delete_streaming_tenant", "text": "Delete a specific streaming tenant from a cluster.", "metadata": {"kind": "function", "name": "delete_streaming_tenant", "path": "astrapy.core.ops.AstraDBOps.delete_streaming_tenant", "parameters": [{"name": "tenant", "type": "str", "description": "The identifier of the tenant to delete.", "value": "''", "default": "''"}, {"name": "cluster", "type": "str", "description": "The identifier of the cluster from which the tenant is to be deleted.", "value": "''", "default": "''"}, {"name": "timeout_info", "type": "TimeoutInfoWideType", "description": "either a float (seconds) or a TimeoutInfo dict (see)", "value": "None", "default": "None"}], "returns": [{"name": "dict", "type": "None", "description": "The response from the server after deleting the streaming tenant."}], "gathered_types": ["astrapy.core.utils.TimeoutInfoWideType"]}} +{"id": "astrapy.core.ops.AstraDBOps.get_streaming_tenant", "text": "Retrieve information about the limits and usage of a specific streaming tenant.", "metadata": {"kind": "function", "name": "get_streaming_tenant", "path": "astrapy.core.ops.AstraDBOps.get_streaming_tenant", "parameters": [{"name": "tenant", "type": "str", "description": "The identifier of the streaming tenant.", "value": "''", "default": "''"}], "returns": [{"name": "dict", "type": "OPS_API_RESPONSE", "description": "Details of the specified streaming tenant, including limits and current usage."}], "gathered_types": ["astrapy.core.utils.TimeoutInfoWideType", "astrapy.core.core_types.OPS_API_RESPONSE"]}} +{"id": "astrapy.core.core_types", "text": "", "metadata": {"kind": "module", "name": "core_types", "path": "astrapy.core.core_types", "imports": {"annotations": "__future__.annotations", "Any": "typing.Any", "Dict": "typing.Dict", "List": "typing.List", "Protocol": "typing.Protocol", "Union": "typing.Union"}, "properties": {"is_init_module": false, "is_package": false, "is_subpackage": false, "is_namespace_package": false, "is_namespace_subpackage": false}}} +{"id": "astrapy.core.core_types.API_RESPONSE", "text": "", "metadata": {"kind": "attribute", "name": "API_RESPONSE", "path": "astrapy.core.core_types.API_RESPONSE", "value": "API_RESPONSE = Dict[str, Any]"}} +{"id": "astrapy.core.core_types.OPS_API_RESPONSE", "text": "", "metadata": {"kind": "attribute", "name": "OPS_API_RESPONSE", "path": "astrapy.core.core_types.OPS_API_RESPONSE", "value": "OPS_API_RESPONSE = Union[API_RESPONSE, List[Any]]", "gathered_types": ["astrapy.core.core_types.API_RESPONSE"]}} +{"id": "astrapy.core.core_types.API_DOC", "text": "", "metadata": {"kind": "attribute", "name": "API_DOC", "path": "astrapy.core.core_types.API_DOC", "value": "API_DOC = Dict[str, Any]"}} +{"id": "astrapy.core.core_types.PaginableRequestMethod", "text": "", "metadata": {"kind": "class", "name": "PaginableRequestMethod", "path": "astrapy.core.core_types.PaginableRequestMethod", "bases": ["Protocol"]}} +{"id": "astrapy.core.core_types.AsyncPaginableRequestMethod", "text": "", "metadata": {"kind": "class", "name": "AsyncPaginableRequestMethod", "path": "astrapy.core.core_types.AsyncPaginableRequestMethod", "bases": ["Protocol"]}} +{"id": "astrapy.core.utils", "text": "", "metadata": {"kind": "module", "name": "utils", "path": "astrapy.core.utils", "imports": {"annotations": "__future__.annotations", "datetime": "datetime", "json": "json", "logging": "logging", "time": "time", "Any": "typing.Any", "Dict": "typing.Dict", "Iterable": "typing.Iterable", "TypedDict": "typing.TypedDict", "Union": "typing.Union", "cast": "typing.cast", "httpx": "httpx", "__version__": "astrapy.__version__", "API_RESPONSE": "astrapy.core.core_types.API_RESPONSE", "DEFAULT_REDACTED_HEADERS": "astrapy.core.defaults.DEFAULT_REDACTED_HEADERS", "DEFAULT_TIMEOUT": "astrapy.core.defaults.DEFAULT_TIMEOUT", "UUID": "astrapy.core.ids.UUID", "ObjectId": "astrapy.core.ids.ObjectId"}, "properties": {"is_init_module": false, "is_package": false, "is_subpackage": false, "is_namespace_package": false, "is_namespace_subpackage": false}}} +{"id": "astrapy.core.utils.CustomLogger", "text": "", "metadata": {"kind": "class", "name": "CustomLogger", "path": "astrapy.core.utils.CustomLogger", "bases": ["logging.Logger"], "gathered_types": ["Logger"]}} +{"id": "astrapy.core.utils.CustomLogger.trace", "text": "", "metadata": {"kind": "function", "name": "trace", "path": "astrapy.core.utils.CustomLogger.trace", "parameters": [{"name": "msg", "type": "str"}, {"name": "args", "default": "()", "type": "Any"}, {"name": "kwargs", "default": "{}", "type": "Any"}], "returns": [{"type": "None"}]}} +{"id": "astrapy.core.utils.logger", "text": "", "metadata": {"kind": "attribute", "name": "logger", "path": "astrapy.core.utils.logger", "value": "logger = logging.getLogger(__name__)", "gathered_types": ["__name__"]}} +{"id": "astrapy.core.utils.http_methods", "text": "", "metadata": {"kind": "class", "name": "http_methods", "path": "astrapy.core.utils.http_methods"}} +{"id": "astrapy.core.utils.http_methods.GET", "text": "", "metadata": {"kind": "attribute", "name": "GET", "path": "astrapy.core.utils.http_methods.GET", "value": "GET = 'GET'"}} +{"id": "astrapy.core.utils.http_methods.POST", "text": "", "metadata": {"kind": "attribute", "name": "POST", "path": "astrapy.core.utils.http_methods.POST", "value": "POST = 'POST'"}} +{"id": "astrapy.core.utils.http_methods.PUT", "text": "", "metadata": {"kind": "attribute", "name": "PUT", "path": "astrapy.core.utils.http_methods.PUT", "value": "PUT = 'PUT'"}} +{"id": "astrapy.core.utils.http_methods.PATCH", "text": "", "metadata": {"kind": "attribute", "name": "PATCH", "path": "astrapy.core.utils.http_methods.PATCH", "value": "PATCH = 'PATCH'"}} +{"id": "astrapy.core.utils.http_methods.DELETE", "text": "", "metadata": {"kind": "attribute", "name": "DELETE", "path": "astrapy.core.utils.http_methods.DELETE", "value": "DELETE = 'DELETE'"}} +{"id": "astrapy.core.utils.package_name", "text": "", "metadata": {"kind": "attribute", "name": "package_name", "path": "astrapy.core.utils.package_name", "value": "package_name = __name__.split('.')[0]", "gathered_types": ["__name__"]}} +{"id": "astrapy.core.utils.user_agent_astrapy", "text": "", "metadata": {"kind": "attribute", "name": "user_agent_astrapy", "path": "astrapy.core.utils.user_agent_astrapy", "value": "user_agent_astrapy = f'{package_name}/{__version__}'", "gathered_types": ["astrapy.__version__"]}} +{"id": "astrapy.core.utils.detect_ragstack_user_agent", "text": "", "metadata": {"kind": "function", "name": "detect_ragstack_user_agent", "path": "astrapy.core.utils.detect_ragstack_user_agent", "returns": [{"type": "str | None"}]}} +{"id": "astrapy.core.utils.user_agent_rs", "text": "", "metadata": {"kind": "attribute", "name": "user_agent_rs", "path": "astrapy.core.utils.user_agent_rs", "value": "user_agent_rs = detect_ragstack_user_agent()"}} +{"id": "astrapy.core.utils.log_request", "text": "Log the details of an HTTP request for debugging purposes.", "metadata": {"kind": "function", "name": "log_request", "path": "astrapy.core.utils.log_request", "parameters": [{"name": "json_data", "type": "dict or None", "description": "The JSON payload sent with the request, if any.", "default": null}], "returns": [{"type": "None"}]}} +{"id": "astrapy.core.utils.log_response", "text": "Log the details of an HTTP response for debugging purposes.", "metadata": {"kind": "function", "name": "log_response", "path": "astrapy.core.utils.log_response", "parameters": [{"name": "r", "type": "requests.Response", "description": "The response object from the HTTP request.", "default": null}], "returns": [{"type": "None"}], "gathered_types": ["Response"]}} +{"id": "astrapy.core.utils.user_agent_string", "text": "", "metadata": {"kind": "function", "name": "user_agent_string", "path": "astrapy.core.utils.user_agent_string", "parameters": [{"name": "caller_name", "type": "str | None"}, {"name": "caller_version", "type": "str | None"}], "returns": [{"type": "str | None"}]}} +{"id": "astrapy.core.utils.compose_user_agent", "text": "", "metadata": {"kind": "function", "name": "compose_user_agent", "path": "astrapy.core.utils.compose_user_agent", "parameters": [{"name": "caller_name", "type": "str | None"}, {"name": "caller_version", "type": "str | None"}], "returns": [{"type": "str"}]}} +{"id": "astrapy.core.utils.TimeoutInfo", "text": "", "metadata": {"kind": "class", "name": "TimeoutInfo", "path": "astrapy.core.utils.TimeoutInfo", "bases": ["TypedDict"]}} +{"id": "astrapy.core.utils.TimeoutInfo.read", "text": "", "metadata": {"kind": "attribute", "name": "read", "path": "astrapy.core.utils.TimeoutInfo.read", "value": "read: float"}} +{"id": "astrapy.core.utils.TimeoutInfo.write", "text": "", "metadata": {"kind": "attribute", "name": "write", "path": "astrapy.core.utils.TimeoutInfo.write", "value": "write: float"}} +{"id": "astrapy.core.utils.TimeoutInfo.base", "text": "", "metadata": {"kind": "attribute", "name": "base", "path": "astrapy.core.utils.TimeoutInfo.base", "value": "base: float"}} +{"id": "astrapy.core.utils.TimeoutInfoWideType", "text": "", "metadata": {"kind": "attribute", "name": "TimeoutInfoWideType", "path": "astrapy.core.utils.TimeoutInfoWideType", "value": "TimeoutInfoWideType = Union[TimeoutInfo, float, None]", "gathered_types": ["astrapy.request_tools.TimeoutInfo"]}} +{"id": "astrapy.core.utils.to_httpx_timeout", "text": "", "metadata": {"kind": "function", "name": "to_httpx_timeout", "path": "astrapy.core.utils.to_httpx_timeout", "parameters": [{"name": "timeout_info", "type": "TimeoutInfoWideType"}], "returns": [{"type": "httpx.Timeout | None"}], "gathered_types": ["astrapy.core.utils.TimeoutInfoWideType", "Timeout"]}} +{"id": "astrapy.core.utils.make_request", "text": "Make an HTTP request to a specified URL.", "metadata": {"kind": "function", "name": "make_request", "path": "astrapy.core.utils.make_request", "parameters": [{"name": "client", "type": "httpx", "description": "The httpx client for the request.", "default": null}, {"name": "base_url", "type": "str", "description": "The base URL for the request.", "default": null}, {"name": "auth_header", "type": "str", "description": "The authentication header key.", "default": null}, {"name": "token", "type": "str", "description": "The token used for authentication.", "default": null}, {"name": "method", "type": "str", "description": "The HTTP method to use for the request. Default is POST.", "default": null}, {"name": "path", "type": "str", "description": "The specific path to append to the base URL.", "default": null}, {"name": "json_data", "type": "dict", "description": "JSON payload to be sent with the request.", "default": null}, {"name": "url_params", "type": "dict", "description": "URL parameters to be sent with the request.", "default": null}], "returns": [{"type": "httpx.Response", "description": "requests.Response: The response from the HTTP request."}], "gathered_types": ["Client", "Response", "Timeout"]}} +{"id": "astrapy.core.utils.amake_request", "text": "Make an HTTP request to a specified URL.", "metadata": {"kind": "function", "name": "amake_request", "path": "astrapy.core.utils.amake_request", "parameters": [{"name": "client", "type": "httpx", "description": "The httpx client for the request.", "default": null}, {"name": "base_url", "type": "str", "description": "The base URL for the request.", "default": null}, {"name": "auth_header", "type": "str", "description": "The authentication header key.", "default": null}, {"name": "token", "type": "str", "description": "The token used for authentication.", "default": null}, {"name": "method", "type": "str", "description": "The HTTP method to use for the request. Default is POST.", "default": null}, {"name": "path", "type": "str", "description": "The specific path to append to the base URL.", "default": null}, {"name": "json_data", "type": "dict", "description": "JSON payload to be sent with the request.", "default": null}, {"name": "url_params", "type": "dict", "description": "URL parameters to be sent with the request.", "default": null}], "returns": [{"type": "httpx.Response", "description": "requests.Response: The response from the HTTP request."}], "gathered_types": ["AsyncClient", "Response", "Timeout"]}} +{"id": "astrapy.core.utils.make_payload", "text": "Construct a JSON payload for an HTTP request with a specified top-level key.", "metadata": {"kind": "function", "name": "make_payload", "path": "astrapy.core.utils.make_payload", "parameters": [{"name": "top_level", "type": "str", "description": "The top-level key for the JSON payload.", "default": null}, {"name": "**kwargs", "type": "Any", "description": "Arbitrary keyword arguments representing other keys and their values to be included in the payload.", "value": "{}", "default": "{}"}], "returns": [{"name": "dict", "type": "dict[str, Any]", "description": "The constructed JSON payload."}]}} +{"id": "astrapy.core.utils.convert_vector_to_floats", "text": "Convert a vector of strings to a vector of floats.", "metadata": {"kind": "function", "name": "convert_vector_to_floats", "path": "astrapy.core.utils.convert_vector_to_floats", "parameters": [{"name": "vector", "type": "list", "description": "A vector of objects.", "default": null}], "returns": [{"name": "list", "type": "list[float]", "description": "A vector of floats."}]}} +{"id": "astrapy.core.utils.is_list_of_floats", "text": "Safely determine if it's a list of floats.\nAssumption: if list, and first item is float, then all items are.", "metadata": {"kind": "function", "name": "is_list_of_floats", "path": "astrapy.core.utils.is_list_of_floats", "parameters": [{"name": "vector", "type": "Iterable[Any]"}], "returns": [{"type": "bool"}]}} +{"id": "astrapy.core.utils.convert_to_ejson_date_object", "text": "", "metadata": {"kind": "function", "name": "convert_to_ejson_date_object", "path": "astrapy.core.utils.convert_to_ejson_date_object", "parameters": [{"name": "date_value", "type": "datetime.date | datetime.datetime"}], "returns": [{"type": "dict[str, int]"}]}} +{"id": "astrapy.core.utils.convert_to_ejson_uuid_object", "text": "", "metadata": {"kind": "function", "name": "convert_to_ejson_uuid_object", "path": "astrapy.core.utils.convert_to_ejson_uuid_object", "parameters": [{"name": "uuid_value", "type": "UUID"}], "returns": [{"type": "dict[str, str]"}], "gathered_types": ["UUID"]}} +{"id": "astrapy.core.utils.convert_to_ejson_objectid_object", "text": "", "metadata": {"kind": "function", "name": "convert_to_ejson_objectid_object", "path": "astrapy.core.utils.convert_to_ejson_objectid_object", "parameters": [{"name": "objectid_value", "type": "ObjectId"}], "returns": [{"type": "dict[str, str]"}], "gathered_types": ["ObjectId"]}} +{"id": "astrapy.core.utils.convert_ejson_date_object_to_datetime", "text": "", "metadata": {"kind": "function", "name": "convert_ejson_date_object_to_datetime", "path": "astrapy.core.utils.convert_ejson_date_object_to_datetime", "parameters": [{"name": "date_object", "type": "dict[str, int]"}], "returns": [{"type": "datetime.datetime"}]}} +{"id": "astrapy.core.utils.convert_ejson_uuid_object_to_uuid", "text": "", "metadata": {"kind": "function", "name": "convert_ejson_uuid_object_to_uuid", "path": "astrapy.core.utils.convert_ejson_uuid_object_to_uuid", "parameters": [{"name": "uuid_object", "type": "dict[str, str]"}], "returns": [{"type": "UUID"}], "gathered_types": ["UUID"]}} +{"id": "astrapy.core.utils.convert_ejson_objectid_object_to_objectid", "text": "", "metadata": {"kind": "function", "name": "convert_ejson_objectid_object_to_objectid", "path": "astrapy.core.utils.convert_ejson_objectid_object_to_objectid", "parameters": [{"name": "objectid_object", "type": "dict[str, str]"}], "returns": [{"type": "ObjectId"}], "gathered_types": ["ObjectId"]}} +{"id": "astrapy.core.utils.normalize_for_api", "text": "Normalize a payload for API calls.\nThis includes e.g. ensuring values for \"$vector\" key\nare made into plain lists of floats.", "metadata": {"kind": "function", "name": "normalize_for_api", "path": "astrapy.core.utils.normalize_for_api", "parameters": [{"name": "payload", "type": "Dict[str, Any]", "description": "A dict expressing a payload for an API call", "default": null}], "returns": [{"type": "dict[str, Any] | None", "description": "Dict[str, Any]: a \"normalized\" payload dict"}]}} +{"id": "astrapy.core.utils.restore_from_api", "text": "Process a dictionary just returned from the API.\nThis is the place where e.g. `{\"$date\": 123}` is\nconverted back into a datetime object.", "metadata": {"kind": "function", "name": "restore_from_api", "path": "astrapy.core.utils.restore_from_api", "parameters": [{"name": "response", "type": "API_RESPONSE"}], "returns": [{"type": "API_RESPONSE"}], "gathered_types": ["astrapy.core.core_types.API_RESPONSE"]}} +{"id": "astrapy.core.defaults", "text": "", "metadata": {"kind": "module", "name": "defaults", "path": "astrapy.core.defaults", "imports": {"annotations": "__future__.annotations"}, "properties": {"is_init_module": false, "is_package": false, "is_subpackage": false, "is_namespace_package": false, "is_namespace_subpackage": false}}} +{"id": "astrapy.core.defaults.DEFAULT_AUTH_PATH", "text": "", "metadata": {"kind": "attribute", "name": "DEFAULT_AUTH_PATH", "path": "astrapy.core.defaults.DEFAULT_AUTH_PATH", "value": "DEFAULT_AUTH_PATH = '/api/rest/v1/auth'"}} +{"id": "astrapy.core.defaults.DEFAULT_JSON_API_PATH", "text": "", "metadata": {"kind": "attribute", "name": "DEFAULT_JSON_API_PATH", "path": "astrapy.core.defaults.DEFAULT_JSON_API_PATH", "value": "DEFAULT_JSON_API_PATH = '/api/json'"}} +{"id": "astrapy.core.defaults.DEFAULT_JSON_API_VERSION", "text": "", "metadata": {"kind": "attribute", "name": "DEFAULT_JSON_API_VERSION", "path": "astrapy.core.defaults.DEFAULT_JSON_API_VERSION", "value": "DEFAULT_JSON_API_VERSION = 'v1'"}} +{"id": "astrapy.core.defaults.DEFAULT_DEV_OPS_URL", "text": "", "metadata": {"kind": "attribute", "name": "DEFAULT_DEV_OPS_URL", "path": "astrapy.core.defaults.DEFAULT_DEV_OPS_URL", "value": "DEFAULT_DEV_OPS_URL = 'https://api.astra.datastax.com'"}} +{"id": "astrapy.core.defaults.DEFAULT_DEV_OPS_API_VERSION", "text": "", "metadata": {"kind": "attribute", "name": "DEFAULT_DEV_OPS_API_VERSION", "path": "astrapy.core.defaults.DEFAULT_DEV_OPS_API_VERSION", "value": "DEFAULT_DEV_OPS_API_VERSION = 'v2'"}} +{"id": "astrapy.core.defaults.DEFAULT_DEV_OPS_AUTH_HEADER", "text": "", "metadata": {"kind": "attribute", "name": "DEFAULT_DEV_OPS_AUTH_HEADER", "path": "astrapy.core.defaults.DEFAULT_DEV_OPS_AUTH_HEADER", "value": "DEFAULT_DEV_OPS_AUTH_HEADER = 'Authorization'"}} +{"id": "astrapy.core.defaults.DEFAULT_TIMEOUT", "text": "", "metadata": {"kind": "attribute", "name": "DEFAULT_TIMEOUT", "path": "astrapy.core.defaults.DEFAULT_TIMEOUT", "value": "DEFAULT_TIMEOUT = 30000"}} +{"id": "astrapy.core.defaults.DEFAULT_AUTH_HEADER", "text": "", "metadata": {"kind": "attribute", "name": "DEFAULT_AUTH_HEADER", "path": "astrapy.core.defaults.DEFAULT_AUTH_HEADER", "value": "DEFAULT_AUTH_HEADER = 'Token'"}} +{"id": "astrapy.core.defaults.DEFAULT_KEYSPACE_NAME", "text": "", "metadata": {"kind": "attribute", "name": "DEFAULT_KEYSPACE_NAME", "path": "astrapy.core.defaults.DEFAULT_KEYSPACE_NAME", "value": "DEFAULT_KEYSPACE_NAME = 'default_keyspace'"}} +{"id": "astrapy.core.defaults.DEFAULT_REGION", "text": "", "metadata": {"kind": "attribute", "name": "DEFAULT_REGION", "path": "astrapy.core.defaults.DEFAULT_REGION", "value": "DEFAULT_REGION = 'us-east1'"}} +{"id": "astrapy.core.defaults.DEFAULT_INSERT_NUM_DOCUMENTS", "text": "", "metadata": {"kind": "attribute", "name": "DEFAULT_INSERT_NUM_DOCUMENTS", "path": "astrapy.core.defaults.DEFAULT_INSERT_NUM_DOCUMENTS", "value": "DEFAULT_INSERT_NUM_DOCUMENTS = 50"}} +{"id": "astrapy.core.defaults.DEFAULT_REDACTED_HEADERS", "text": "", "metadata": {"kind": "attribute", "name": "DEFAULT_REDACTED_HEADERS", "path": "astrapy.core.defaults.DEFAULT_REDACTED_HEADERS", "value": "DEFAULT_REDACTED_HEADERS = {DEFAULT_DEV_OPS_AUTH_HEADER, DEFAULT_AUTH_HEADER, 'X-Embedding-Api-Key', 'X-Embedding-Access-Id', 'X-Embedding-Secret-Id'}", "gathered_types": ["astrapy.core.defaults.DEFAULT_AUTH_HEADER", "astrapy.core.defaults.DEFAULT_DEV_OPS_AUTH_HEADER"]}} +{"id": "astrapy.core.ids", "text": "", "metadata": {"kind": "module", "name": "ids", "path": "astrapy.core.ids", "imports": {"annotations": "__future__.annotations", "UUID": "uuid.UUID", "uuid1": "uuid.uuid1", "uuid3": "uuid.uuid3", "uuid4": "uuid.uuid4", "uuid5": "uuid.uuid5", "ObjectId": "bson.objectid.ObjectId", "uuid6": "uuid6.uuid6", "uuid7": "uuid6.uuid7", "uuid8": "uuid6.uuid8"}, "exports": ["ObjectId", "uuid1", "uuid3", "uuid4", "uuid5", "uuid6", "uuid7", "uuid8", "UUID"], "properties": {"is_init_module": false, "is_package": false, "is_subpackage": false, "is_namespace_package": false, "is_namespace_subpackage": false}}} +{"id": "astrapy.db", "text": "Core \"db\" subpackage, exported here to preserve import patterns.", "metadata": {"kind": "module", "name": "db", "path": "astrapy.db", "imports": {"annotations": "__future__.annotations", "AstraDB": "astrapy.core.db.AstraDB", "AstraDBCollection": "astrapy.core.db.AstraDBCollection", "AsyncAstraDB": "astrapy.core.db.AsyncAstraDB", "AsyncAstraDBCollection": "astrapy.core.db.AsyncAstraDBCollection", "logger": "astrapy.core.db.logger"}, "exports": ["AsyncAstraDBCollection", "AsyncAstraDB", "logger", "AstraDBCollection", "AstraDB"], "properties": {"is_init_module": true, "is_package": false, "is_subpackage": true, "is_namespace_package": false, "is_namespace_subpackage": false}}} +{"id": "astrapy.api", "text": "Core \"api\" subpackage, exported here to preserve import patterns.", "metadata": {"kind": "module", "name": "api", "path": "astrapy.api", "imports": {"annotations": "__future__.annotations", "APIRequestError": "astrapy.core.api.APIRequestError"}, "exports": ["APIRequestError"], "properties": {"is_init_module": true, "is_package": false, "is_subpackage": true, "is_namespace_package": false, "is_namespace_subpackage": false}}} +{"id": "astrapy.ops", "text": "Core \"ops\" subpackage, exported here to preserve import patterns.", "metadata": {"kind": "module", "name": "ops", "path": "astrapy.ops", "imports": {"annotations": "__future__.annotations", "AstraDBOps": "astrapy.core.ops.AstraDBOps"}, "exports": ["AstraDBOps"], "properties": {"is_init_module": true, "is_package": false, "is_subpackage": true, "is_namespace_package": false, "is_namespace_subpackage": false}}} diff --git a/data/para_with_hyperlink_short.jsonl b/data/para_with_hyperlink_short.jsonl new file mode 100644 index 00000000..4f2bfc60 --- /dev/null +++ b/data/para_with_hyperlink_short.jsonl @@ -0,0 +1,1000 @@ +{"id": "17888798", "title": "The Circle (Wipers album)", "sentences": ["The Circle is the sixth studio album by punk rock band Wipers, released on Restless in 1988.", "The album received positive reviews.", "\"The Rough Guide to Rock\" wrote that \"jazzy distorted riffing hadn't sounded this invigorating since Hendrix or Robert Fripp had their heyday.\""], "mentions": [{"id": 0, "start": 40, "end": 49, "ref_url": "Punk_rock", "ref_ids": ["23037"], "sent_idx": 0}, {"id": 1, "start": 55, "end": 61, "ref_url": "Wipers", "ref_ids": ["939352"], "sent_idx": 0}, {"id": 2, "start": 75, "end": 83, "ref_url": "Restless_Records", "ref_ids": ["1864203"], "sent_idx": 0}]} +{"id": "17888807", "title": "Urgand", "sentences": ["Urgand is a village in Badakhshan Province in north-eastern Afghanistan."], "mentions": [{"id": 0, "start": 12, "end": 19, "ref_url": "Village", "ref_ids": ["53509"], "sent_idx": 0}, {"id": 1, "start": 23, "end": 42, "ref_url": "Badakhshan_Province", "ref_ids": ["1282198"], "sent_idx": 0}, {"id": 2, "start": 60, "end": 71, "ref_url": "Afghanistan", "ref_ids": ["737"], "sent_idx": 0}]} +{"id": "17888822", "title": "Urup, Afghanistan", "sentences": ["Urup is a village in Badakhshan Province in north-eastern Afghanistan."], "mentions": [{"id": 0, "start": 10, "end": 17, "ref_url": "Village", "ref_ids": ["53509"], "sent_idx": 0}, {"id": 1, "start": 21, "end": 40, "ref_url": "Badakhshan_Province", "ref_ids": ["1282198"], "sent_idx": 0}, {"id": 2, "start": 58, "end": 69, "ref_url": "Afghanistan", "ref_ids": ["737"], "sent_idx": 0}]} +{"id": "17888850", "title": "WMIA (AM)", "sentences": ["\"For the Miami, Florida radio station, see WMIA-FM\"\nWMIA (1070 AM) is a radio station licensed to serve Arecibo, Puerto Rico.", "The station is owned by Wifredo G. Blanco Pi and it is part of the WAPA Radio News Network.", "It airs a News/Talk format and features programming from CNN Radio.", "The station is shared with translator station W227DY 93.3 FM also in Arecibo.", "The station was assigned the WMIA call letters by the Federal Communications Commission."], "mentions": [{"id": 0, "start": 9, "end": 23, "ref_url": "Miami,_Florida", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 43, "end": 50, "ref_url": "WMIA-FM", "ref_ids": ["5511324"], "sent_idx": 0}, {"id": 2, "start": 63, "end": 65, "ref_url": "AM_broadcasting", "ref_ids": ["113509"], "sent_idx": 0}, {"id": 3, "start": 24, "end": 37, "ref_url": "Radio_station", "ref_ids": null, "sent_idx": 0}, {"id": 4, "start": 86, "end": 94, "ref_url": "City_of_license", "ref_ids": ["584935"], "sent_idx": 0}, {"id": 5, "start": 104, "end": 124, "ref_url": "Arecibo,_Puerto_Rico", "ref_ids": ["219279"], "sent_idx": 0}, {"id": 6, "start": 67, "end": 77, "ref_url": "WAPA_(AM)", "ref_ids": ["14136064"], "sent_idx": 1}, {"id": 7, "start": 10, "end": 14, "ref_url": "News_radio", "ref_ids": null, "sent_idx": 2}, {"id": 8, "start": 15, "end": 19, "ref_url": "Talk_radio", "ref_ids": ["30077"], "sent_idx": 2}, {"id": 9, "start": 57, "end": 66, "ref_url": "CNN_Radio", "ref_ids": null, "sent_idx": 2}, {"id": 10, "start": 54, "end": 87, "ref_url": "Federal_Communications_Commission", "ref_ids": ["55974"], "sent_idx": 4}]} +{"id": "17888858", "title": "Guido of Acqui", "sentences": ["Saint Guido of Acqui( also Wido)( c. 1004 – 12 June 1070) was Bishop of Acqui( now Acqui Terme) in north- west Italy from 1034 until his death.", "He was born around 1004 to a noble family of the area of Acqui, the Counts of Acquesana, in Melazzo where the family's wealth was concentrated.", "He completed his education, by now an orphan, in Bologna.", "Elected bishop of Acqui in March 1034, his career was marked by reform in the areas of liturgy, spirituality and morality.", "He was generous in donating his own money and possessions to the diocese, in part to remove the economic pressure which had led to widespread corruption, and in part to support new projects.", "The latter included the promotion of the education of young women and the foundation of the nunnery of Santa Maria De Campis.", "Under his government, too, Acqui Cathedral was erected, dedicated to the Madonna Assunta and consecrated on 13 November 1067.", "Guido died on 12 June 1070.", "His remains are preserved in the cathedral which he founded.", "His feast day is recorded in the\" Martyrologium Romanum\" as 12 June, the anniversary of his death.", "In Acqui, however, it is celebrated on the second Sunday of July."], "mentions": [{"id": 0, "start": 62, "end": 77, "ref_url": "Bishop_of_Acqui", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 83, "end": 94, "ref_url": "Acqui_Terme", "ref_ids": ["2623130"], "sent_idx": 0}, {"id": 2, "start": 111, "end": 116, "ref_url": "Italy", "ref_ids": ["14532"], "sent_idx": 0}, {"id": 3, "start": 68, "end": 87, "ref_url": "Counts_of_Acquesana", "ref_ids": null, "sent_idx": 1}, {"id": 4, "start": 92, "end": 99, "ref_url": "Melazzo", "ref_ids": ["6560440"], "sent_idx": 1}, {"id": 5, "start": 49, "end": 56, "ref_url": "Bologna", "ref_ids": ["21069333"], "sent_idx": 2}, {"id": 6, "start": 27, "end": 42, "ref_url": "Acqui_Cathedral", "ref_ids": ["21646064"], "sent_idx": 6}, {"id": 7, "start": 73, "end": 88, "ref_url": "Madonna_Assunta", "ref_ids": null, "sent_idx": 6}, {"id": 8, "start": 34, "end": 55, "ref_url": "Martyrologium_Romanum", "ref_ids": null, "sent_idx": 9}]} +{"id": "17888863", "title": "Ushkan", "sentences": ["Ushkan is a village in Badakhshan Province in north-eastern Afghanistan."], "mentions": [{"id": 0, "start": 12, "end": 19, "ref_url": "Village", "ref_ids": ["53509"], "sent_idx": 0}, {"id": 1, "start": 23, "end": 42, "ref_url": "Badakhshan_Province", "ref_ids": ["1282198"], "sent_idx": 0}, {"id": 2, "start": 60, "end": 71, "ref_url": "Afghanistan", "ref_ids": ["737"], "sent_idx": 0}]} +{"id": "17888887", "title": "International Partnership for Energy Efficiency Cooperation", "sentences": ["The Partnership for Energy Efficiency Cooperation (IPEEC) is a high-level international forum which includes developed and developing countries.", "Its purpose is to enhance global cooperation in the field of energy efficiency and to facilitate policies that yield energy efficiency gains across all sectors globally.", "IPEEC provides information to decision-makers in major economies, facilitating candid discussions for exchanging ideas and experiences and helping countries undertake joint projects to develop and implement energy efficiency policies and measures at a global scale.", "It is also a forum for member and non-member economies to share information about various bilateral and multilateral initiatives.", "IPEEC supported initiatives are open to both member and non-member nations as well as the private sector.", "IPEEC is coordinating the implementation of the Group of 20 (G20) Energy Efficiency Action Plan, which was agreed by G20 leaders in late 2014 as a practical approach to strengthening voluntary international energy efficiency collaboration.", "Since July 2016, IPEEC is coordinating the Energy Efficiency Leading Programme (EELP) that provides the basis for a ‘comprehensive, flexible, and adequately-resourced’ framework for strengthened voluntary collaboration on energy efficiency among G20 members and beyond.", "The IPEEC Secretariat is governed by two core committees: the Executive Committee and the Policy Committee.", "Both these committees meet twice a year to determine the forward work program and discuss results of current and previous projects."], "mentions": [{"id": 0, "start": 222, "end": 239, "ref_url": "Efficient_energy_use", "ref_ids": ["11944078"], "sent_idx": 6}, {"id": 1, "start": 48, "end": 65, "ref_url": "G20", "ref_ids": ["2206608"], "sent_idx": 5}]} +{"id": "17888888", "title": "Marine Corps Recruit Depot", "sentences": ["Marine Corps Recruit Depot may refer to:"], "mentions": []} +{"id": "17888890", "title": "Zamira Amirova", "sentences": ["Zamira Amirova (born 11 June 1979) is a retired Uzbekistani middle distance runner who specialized in the 800 metres.", "Her personal best time is 2:02.30 minutes, achieved in June 2002 in Tashkent."], "mentions": [{"id": 0, "start": 48, "end": 58, "ref_url": "Uzbekistan", "ref_ids": ["31853"], "sent_idx": 0}, {"id": 1, "start": 106, "end": 116, "ref_url": "800_metres", "ref_ids": ["585165"], "sent_idx": 0}, {"id": 2, "start": 68, "end": 76, "ref_url": "Tashkent", "ref_ids": ["57533"], "sent_idx": 1}]} +{"id": "17888908", "title": "Vark, Afghanistan", "sentences": ["Vark is a village in Badakhshan Province in north-eastern Afghanistan."], "mentions": [{"id": 0, "start": 10, "end": 17, "ref_url": "Village", "ref_ids": ["53509"], "sent_idx": 0}, {"id": 1, "start": 21, "end": 40, "ref_url": "Badakhshan_Province", "ref_ids": ["1282198"], "sent_idx": 0}, {"id": 2, "start": 58, "end": 69, "ref_url": "Afghanistan", "ref_ids": ["737"], "sent_idx": 0}]} +{"id": "17888943", "title": "Vazit", "sentences": ["Vazit is a village in Badakhshan Province in north-eastern Afghanistan."], "mentions": [{"id": 0, "start": 11, "end": 18, "ref_url": "Village", "ref_ids": ["53509"], "sent_idx": 0}, {"id": 1, "start": 22, "end": 41, "ref_url": "Badakhshan_Province", "ref_ids": ["1282198"], "sent_idx": 0}, {"id": 2, "start": 59, "end": 70, "ref_url": "Afghanistan", "ref_ids": ["737"], "sent_idx": 0}]} +{"id": "17888956", "title": "Vod Ab", "sentences": ["Vod Ab is a village in Badakhshan Province in north-eastern Afghanistan."], "mentions": [{"id": 0, "start": 12, "end": 19, "ref_url": "Village", "ref_ids": ["53509"], "sent_idx": 0}, {"id": 1, "start": 23, "end": 42, "ref_url": "Badakhshan_Province", "ref_ids": ["1282198"], "sent_idx": 0}, {"id": 2, "start": 60, "end": 71, "ref_url": "Afghanistan", "ref_ids": ["737"], "sent_idx": 0}]} +{"id": "17888959", "title": "TVR 390SE", "sentences": ["The TVR 390SE is a sports car designed and built by TVR.", "It was introduced in October 1984.", "It featured many novelties not found in other 'Wedge' TVRs before and was also the most powerful one yet.", "It never received British Type Approval, so technically speaking the 390SE was just a 350i with special equipment fitted."], "mentions": [{"id": 0, "start": 54, "end": 57, "ref_url": "TVR", "ref_ids": ["188987"], "sent_idx": 2}, {"id": 1, "start": 46, "end": 53, "ref_url": "TVR_Wedges", "ref_ids": ["17888712"], "sent_idx": 2}, {"id": 2, "start": 26, "end": 39, "ref_url": "Homologation", "ref_ids": ["744579"], "sent_idx": 3}, {"id": 3, "start": 86, "end": 90, "ref_url": "TVR_350i", "ref_ids": ["28910875"], "sent_idx": 3}]} +{"id": "17888975", "title": "Good Girl (Sherman Chung album)", "sentences": ["Good Girl? (乖女仔) is Sherman Chung's debut album, published by Music Plus."], "mentions": [{"id": 0, "start": 20, "end": 33, "ref_url": "Sherman_Chung", "ref_ids": ["11461598"], "sent_idx": 0}, {"id": 1, "start": 62, "end": 72, "ref_url": "Emperor_Entertainment_Group", "ref_ids": null, "sent_idx": 0}]} +{"id": "17888978", "title": "Wandian", "sentences": ["Wandian is a village in Badakhshan Province in north-eastern Afghanistan."], "mentions": [{"id": 0, "start": 13, "end": 20, "ref_url": "Village", "ref_ids": ["53509"], "sent_idx": 0}, {"id": 1, "start": 24, "end": 43, "ref_url": "Badakhshan_Province", "ref_ids": ["1282198"], "sent_idx": 0}, {"id": 2, "start": 61, "end": 72, "ref_url": "Afghanistan", "ref_ids": ["737"], "sent_idx": 0}]} +{"id": "17888989", "title": "Lyubov Perepelova", "sentences": ["Lyubov Perepelova (\"\"; born 26 February 1979) is an Uzbekistani sprinter who specializes in the 100 and 200 metres.", "From July 2005 to July 2007 she was suspended due to a doping offense."], "mentions": [{"id": 0, "start": 52, "end": 62, "ref_url": "Uzbekistan", "ref_ids": ["31853"], "sent_idx": 0}, {"id": 1, "start": 96, "end": 99, "ref_url": "100_metres", "ref_ids": ["1231316"], "sent_idx": 0}, {"id": 2, "start": 104, "end": 114, "ref_url": "200_metres", "ref_ids": ["1833981"], "sent_idx": 0}, {"id": 3, "start": 55, "end": 61, "ref_url": "Doping_(sport)", "ref_ids": null, "sent_idx": 1}]} +{"id": "17888992", "title": "Woring", "sentences": ["Woring is a village in Badakhshan Province in north-eastern Afghanistan."], "mentions": [{"id": 0, "start": 12, "end": 19, "ref_url": "Village", "ref_ids": ["53509"], "sent_idx": 0}, {"id": 1, "start": 23, "end": 42, "ref_url": "Badakhshan_Province", "ref_ids": ["1282198"], "sent_idx": 0}, {"id": 2, "start": 60, "end": 71, "ref_url": "Afghanistan", "ref_ids": ["737"], "sent_idx": 0}]} +{"id": "17889000", "title": "Wusan", "sentences": ["Wusan is a village in Badakhshan Province in north-eastern Afghanistan."], "mentions": [{"id": 0, "start": 11, "end": 18, "ref_url": "Village", "ref_ids": ["53509"], "sent_idx": 0}, {"id": 1, "start": 22, "end": 41, "ref_url": "Badakhshan_Province", "ref_ids": ["1282198"], "sent_idx": 0}, {"id": 2, "start": 59, "end": 70, "ref_url": "Afghanistan", "ref_ids": ["737"], "sent_idx": 0}]} +{"id": "17889004", "title": "Trilogija 1: Nevinost bez zaštite", "sentences": ["Trilogija 1: Nevinost bez zaštite (trans.", "\"Trilogy 1: Virginity Without Protection\") is the first EP from Serbian and former Yugoslav rock band Riblja Čorba.", "It is the first part of the Riblja Čorba trilogy released during 2005 and 2006.", "The band considers EPs \"Trilogija 1: Nevinost bez zaštite\", \"\" and \"\" three parts of the studio album titled \"Trilogija\", although all three were released separately.", "All the songs from three EPs were released on the compilation album \"Trilogija\".", "The EP features Marija Mihajlović and Marija Dokmanović on backing vocals."], "mentions": [{"id": 0, "start": 64, "end": 70, "ref_url": "Serbia", "ref_ids": ["29265"], "sent_idx": 1}, {"id": 1, "start": 76, "end": 91, "ref_url": "Socialist_Federal_Republic_of_Yugoslavia", "ref_ids": ["297809"], "sent_idx": 1}, {"id": 2, "start": 92, "end": 96, "ref_url": "Rock_music", "ref_ids": ["25423"], "sent_idx": 1}, {"id": 3, "start": 28, "end": 40, "ref_url": "Riblja_Čorba", "ref_ids": ["13269687"], "sent_idx": 2}, {"id": 4, "start": 69, "end": 78, "ref_url": "Trilogija", "ref_ids": ["20255766"], "sent_idx": 4}, {"id": 5, "start": 16, "end": 33, "ref_url": "Marija_Mihajlović", "ref_ids": null, "sent_idx": 5}, {"id": 6, "start": 38, "end": 55, "ref_url": "Abonos", "ref_ids": ["20678197"], "sent_idx": 5}]} +{"id": "17889009", "title": "Yalur", "sentences": ["Yalur is a village in Badakhshan Province in north-eastern Afghanistan."], "mentions": [{"id": 0, "start": 11, "end": 18, "ref_url": "Village", "ref_ids": ["53509"], "sent_idx": 0}, {"id": 1, "start": 22, "end": 41, "ref_url": "Badakhshan_Province", "ref_ids": ["1282198"], "sent_idx": 0}, {"id": 2, "start": 59, "end": 70, "ref_url": "Afghanistan", "ref_ids": ["737"], "sent_idx": 0}]} +{"id": "17889017", "title": "Yardar", "sentences": ["Yardar is a village in Badakhshan Province in north-eastern Afghanistan."], "mentions": [{"id": 0, "start": 12, "end": 19, "ref_url": "Village", "ref_ids": ["53509"], "sent_idx": 0}, {"id": 1, "start": 23, "end": 42, "ref_url": "Badakhshan_Province", "ref_ids": ["1282198"], "sent_idx": 0}, {"id": 2, "start": 60, "end": 71, "ref_url": "Afghanistan", "ref_ids": ["737"], "sent_idx": 0}]} +{"id": "17889027", "title": "Ponto Chique", "sentences": ["Ponto Chique is a municipality in the north of the Brazilian state of Minas Gerais.", "As of 2007 the population was 4,046 in a total area of 602 km².", "It became a municipality in 1997."], "mentions": [{"id": 0, "start": 12, "end": 24, "ref_url": "Municipalities_of_Brazil", "ref_ids": ["214437"], "sent_idx": 2}, {"id": 1, "start": 70, "end": 82, "ref_url": "Minas_Gerais", "ref_ids": ["222651"], "sent_idx": 0}]} +{"id": "17889033", "title": "Valdis Muižnieks", "sentences": ["Valdis Muižnieks (February 22, 1935 – November 29, 2013) was a Latvian basketball player.", "Muižnieks was born in Riga.", "He played for Rīgas ASK and won 3 Euroleague titles (1958, 1959, 1960) and 4 Soviet national championships (1955, 1956, 1957, 1958).", "Honoured Master of Sport of the USSR (1959).", "Playing for Soviet national team, Muižnieks won 3 gold medals at Eurobasket 1957, Eurobasket 1959, Eurobasket 1961, and 3 silver medals at the Olympic Games (1956, 1960, 1964)."], "mentions": [{"id": 0, "start": 63, "end": 69, "ref_url": "Latvia", "ref_ids": ["17514"], "sent_idx": 0}, {"id": 1, "start": 71, "end": 81, "ref_url": "Basketball", "ref_ids": ["3921"], "sent_idx": 0}, {"id": 2, "start": 22, "end": 26, "ref_url": "Riga", "ref_ids": ["25508"], "sent_idx": 1}, {"id": 3, "start": 14, "end": 23, "ref_url": "Rīgas_ASK", "ref_ids": ["55516070"], "sent_idx": 2}, {"id": 4, "start": 34, "end": 44, "ref_url": "Euroleague", "ref_ids": null, "sent_idx": 2}, {"id": 5, "start": 77, "end": 106, "ref_url": "USSR_Premier_Basketball_League", "ref_ids": ["39649138"], "sent_idx": 2}, {"id": 6, "start": 12, "end": 32, "ref_url": "Soviet_Union_national_basketball_team", "ref_ids": ["3146594"], "sent_idx": 4}, {"id": 7, "start": 65, "end": 80, "ref_url": "Eurobasket_1957", "ref_ids": null, "sent_idx": 4}, {"id": 8, "start": 82, "end": 97, "ref_url": "Eurobasket_1959", "ref_ids": null, "sent_idx": 4}, {"id": 9, "start": 99, "end": 114, "ref_url": "Eurobasket_1961", "ref_ids": null, "sent_idx": 4}, {"id": 10, "start": 143, "end": 156, "ref_url": "Basketball_at_the_Summer_Olympics", "ref_ids": ["2695055"], "sent_idx": 4}, {"id": 11, "start": 158, "end": 162, "ref_url": "Basketball_at_the_1956_Summer_Olympics", "ref_ids": ["1864500"], "sent_idx": 4}, {"id": 12, "start": 164, "end": 168, "ref_url": "Basketball_at_the_1960_Summer_Olympics", "ref_ids": ["1864693"], "sent_idx": 4}, {"id": 13, "start": 170, "end": 174, "ref_url": "Basketball_at_the_1964_Summer_Olympics", "ref_ids": ["2301926"], "sent_idx": 4}]} +{"id": "17889041", "title": "Yasich", "sentences": ["Yasich is a village in Badakhshan Province in north-eastern Afghanistan."], "mentions": [{"id": 0, "start": 12, "end": 19, "ref_url": "Village", "ref_ids": ["53509"], "sent_idx": 0}, {"id": 1, "start": 23, "end": 42, "ref_url": "Badakhshan_Province", "ref_ids": ["1282198"], "sent_idx": 0}, {"id": 2, "start": 60, "end": 71, "ref_url": "Afghanistan", "ref_ids": ["737"], "sent_idx": 0}]} +{"id": "17889043", "title": "Yvonne McGregor", "sentences": ["Yvonne McGregor MBE (born 9 April 1961) is a female English former professional cyclist from Wibsey.", "She was made an MBE, for services to cycling, in the 2002 New Year Honours."], "mentions": [{"id": 0, "start": 16, "end": 19, "ref_url": "MBE", "ref_ids": null, "sent_idx": 1}, {"id": 1, "start": 93, "end": 99, "ref_url": "Wibsey", "ref_ids": ["2819199"], "sent_idx": 0}, {"id": 2, "start": 16, "end": 19, "ref_url": "MBE", "ref_ids": null, "sent_idx": 1}, {"id": 3, "start": 53, "end": 74, "ref_url": "2002_New_Year_Honours", "ref_ids": ["18830825"], "sent_idx": 1}]} +{"id": "17889052", "title": "Yasif", "sentences": ["Yasif is a village in Badakhshan Province in north-eastern Afghanistan."], "mentions": [{"id": 0, "start": 11, "end": 18, "ref_url": "Village", "ref_ids": ["53509"], "sent_idx": 0}, {"id": 1, "start": 22, "end": 41, "ref_url": "Badakhshan_Province", "ref_ids": ["1282198"], "sent_idx": 0}, {"id": 2, "start": 59, "end": 70, "ref_url": "Afghanistan", "ref_ids": ["737"], "sent_idx": 0}]} +{"id": "17889071", "title": "Yavarzan", "sentences": ["Yavarzan is a village in Badakhshan Province in north-eastern Afghanistan."], "mentions": [{"id": 0, "start": 14, "end": 21, "ref_url": "Village", "ref_ids": ["53509"], "sent_idx": 0}, {"id": 1, "start": 25, "end": 44, "ref_url": "Badakhshan_Province", "ref_ids": ["1282198"], "sent_idx": 0}, {"id": 2, "start": 62, "end": 73, "ref_url": "Afghanistan", "ref_ids": ["737"], "sent_idx": 0}]} +{"id": "17889078", "title": "Guzel Khubbieva", "sentences": ["Guzel Khubbieva (born 2 May 1976) is an Uzbekistani sprinter who specializes in the 100 and 200 metres.", "Khubbieva represented Uzbekistan at the 2008 Summer Olympics in Beijing competing at the 100 metres sprint.", "In her first round heat she placed third behind Muna Lee and Anita Pistone in a time of 11.44 to advance to the second round.", "There she failed to qualify for the semi finals as her time of 11.49 was only the seventh time of her heat, causing elimination.", "She won the silver medal at the 2010 Asian Games.", "She also competed at the 2012 Summer Olympics.", "She didn't qualify for the second round."], "mentions": [{"id": 0, "start": 22, "end": 32, "ref_url": "Uzbekistan", "ref_ids": ["31853"], "sent_idx": 1}, {"id": 1, "start": 89, "end": 92, "ref_url": "100_metres", "ref_ids": ["1231316"], "sent_idx": 1}, {"id": 2, "start": 92, "end": 102, "ref_url": "200_metres", "ref_ids": ["1833981"], "sent_idx": 0}, {"id": 3, "start": 40, "end": 60, "ref_url": "2008_Summer_Olympics", "ref_ids": ["77745"], "sent_idx": 1}, {"id": 4, "start": 64, "end": 71, "ref_url": "Beijing", "ref_ids": ["18603746"], "sent_idx": 1}, {"id": 5, "start": 89, "end": 99, "ref_url": "100_metres", "ref_ids": ["1231316"], "sent_idx": 1}, {"id": 6, "start": 48, "end": 56, "ref_url": "Muna_Lee_(athlete)", "ref_ids": ["2891460"], "sent_idx": 2}, {"id": 7, "start": 61, "end": 74, "ref_url": "Anita_Pistone", "ref_ids": ["19078490"], "sent_idx": 2}, {"id": 8, "start": 25, "end": 45, "ref_url": "2012_Summer_Olympics", "ref_ids": ["2176142"], "sent_idx": 5}]} +{"id": "17889089", "title": "Yeylaq-e Amurn", "sentences": ["Yeylaq-e Amurn is a village in Badakhshan Province in north-eastern Afghanistan.", "It is located on the border with Tajikistan"], "mentions": [{"id": 0, "start": 20, "end": 27, "ref_url": "Village", "ref_ids": ["53509"], "sent_idx": 0}, {"id": 1, "start": 31, "end": 50, "ref_url": "Badakhshan_Province", "ref_ids": ["1282198"], "sent_idx": 0}, {"id": 2, "start": 68, "end": 79, "ref_url": "Afghanistan", "ref_ids": ["737"], "sent_idx": 0}, {"id": 3, "start": 33, "end": 43, "ref_url": "Tajikistan", "ref_ids": ["30108"], "sent_idx": 1}]} +{"id": "17889101", "title": "Rietveld joint", "sentences": ["A Rietveld joint, also called a Cartesian node in furniture-making, is an overlapping joint of three battens in the three orthogonal directions.", "It was a prominent feature in the Red and Blue Chair that was designed by Gerrit Rietveld.", "Rietveld joints are inextricably linked with the early 20th century Dutch artistic movement called De Stijl (of which Gerrit Rietveld was a member), a movement whose aims included ultimate simplicity and abstraction.", "This led to the movement's three-dimensional works having vertical and horizontal lines that are positioned in layers or planes that do not intersect, thereby allowing each element to exist independently and unobstructed by other elements and giving a piece a visually raw and simplified look.", "In Gerrit Rietveld's furniture, many of these joints were doweled, meaning that the adjoining faces were connected with glued wooden pins.", "The first two connections were made by boring a hole about 1 mm deeper than the dowel length, but the third connection was made with\na longer dowel, boring through a batten, leaving a circular mark that had to be painted over."], "mentions": [{"id": 0, "start": 32, "end": 41, "ref_url": "Cartesian_coordinate_system", "ref_ids": ["7706"], "sent_idx": 0}, {"id": 1, "start": 166, "end": 172, "ref_url": "Batten", "ref_ids": ["2637151"], "sent_idx": 5}, {"id": 2, "start": 34, "end": 52, "ref_url": "Red_and_Blue_Chair", "ref_ids": ["3305703"], "sent_idx": 1}, {"id": 3, "start": 3, "end": 18, "ref_url": "Gerrit_Rietveld", "ref_ids": ["12648"], "sent_idx": 4}, {"id": 4, "start": 99, "end": 107, "ref_url": "De_Stijl", "ref_ids": ["226375"], "sent_idx": 2}]} +{"id": "17889105", "title": "Lyudmila Dmitriadi", "sentences": ["Lyudmila Dmitriadi (born 24 September 1969) is an Uzbekistani sprinter who specialized in the 100 metres.", "She competed in the women's 100 metres at the 1996 Summer Olympics.", "Her personal best time is 11.44 seconds, achieved in June 2001 in Bishkek."], "mentions": [{"id": 0, "start": 50, "end": 60, "ref_url": "Uzbekistan", "ref_ids": ["31853"], "sent_idx": 0}, {"id": 1, "start": 28, "end": 38, "ref_url": "100_metres", "ref_ids": ["1231316"], "sent_idx": 1}, {"id": 2, "start": 20, "end": 38, "ref_url": "Athletics_at_the_1996_Summer_Olympics_–_Women's_100_metres", "ref_ids": ["25220229"], "sent_idx": 1}, {"id": 3, "start": 46, "end": 66, "ref_url": "1996_Summer_Olympics", "ref_ids": ["72311"], "sent_idx": 1}, {"id": 4, "start": 66, "end": 73, "ref_url": "Bishkek", "ref_ids": ["4554"], "sent_idx": 2}]} +{"id": "17889108", "title": "Zangerya", "sentences": ["Zangerya is a village in Badakhshan Province in north-eastern Afghanistan.", "It is located on the border with Tajikistan."], "mentions": [{"id": 0, "start": 14, "end": 21, "ref_url": "Village", "ref_ids": ["53509"], "sent_idx": 0}, {"id": 1, "start": 25, "end": 44, "ref_url": "Badakhshan_Province", "ref_ids": ["1282198"], "sent_idx": 0}, {"id": 2, "start": 62, "end": 73, "ref_url": "Afghanistan", "ref_ids": ["737"], "sent_idx": 0}, {"id": 3, "start": 33, "end": 43, "ref_url": "Tajikistan", "ref_ids": ["30108"], "sent_idx": 1}]} +{"id": "17889109", "title": "Marton, Ryedale", "sentences": ["Marton is a village and civil parish in the Ryedale district of North Yorkshire, England.", "It is situated approximately west of the market town of Pickering on the River Seven."], "mentions": [{"id": 0, "start": 24, "end": 36, "ref_url": "Civil_parish", "ref_ids": ["644233"], "sent_idx": 0}, {"id": 1, "start": 44, "end": 51, "ref_url": "Ryedale", "ref_ids": ["442185"], "sent_idx": 0}, {"id": 2, "start": 64, "end": 79, "ref_url": "North_Yorkshire", "ref_ids": ["143759"], "sent_idx": 0}, {"id": 3, "start": 41, "end": 52, "ref_url": "Market_town", "ref_ids": ["145965"], "sent_idx": 1}, {"id": 4, "start": 56, "end": 65, "ref_url": "Pickering,_North_Yorkshire", "ref_ids": ["947327"], "sent_idx": 1}, {"id": 5, "start": 73, "end": 84, "ref_url": "River_Seven", "ref_ids": ["16302489"], "sent_idx": 1}]} +{"id": "17889144", "title": "Zar Khan", "sentences": ["Zar Khan is a village in Badakhshan Province in north-eastern Afghanistan."], "mentions": [{"id": 0, "start": 14, "end": 21, "ref_url": "Village", "ref_ids": ["53509"], "sent_idx": 0}, {"id": 1, "start": 25, "end": 44, "ref_url": "Badakhshan_Province", "ref_ids": ["1282198"], "sent_idx": 0}, {"id": 2, "start": 62, "end": 73, "ref_url": "Afghanistan", "ref_ids": ["737"], "sent_idx": 0}]} +{"id": "17889186", "title": "Ziak", "sentences": ["Ziak is a village in the Badakhshan Province of north-eastern Afghanistan."], "mentions": [{"id": 0, "start": 10, "end": 17, "ref_url": "Village", "ref_ids": ["53509"], "sent_idx": 0}, {"id": 1, "start": 25, "end": 44, "ref_url": "Badakhshan_Province", "ref_ids": ["1282198"], "sent_idx": 0}, {"id": 2, "start": 62, "end": 73, "ref_url": "Afghanistan", "ref_ids": ["737"], "sent_idx": 0}]} +{"id": "17889199", "title": "Michael Wayne Richard", "sentences": ["Michael Wayne Richard (August 24, 1959 – September 25, 2007) was convicted of rape and murder whose execution gained notoriety due to controversies regarding procedural problems related to the timing of the execution.", "Richard admitted he was involved in the murder and offered to help find the murder weapon.", "Police found the weapon and testing revealed it to be the gun that fired the fatal shot.", "On 18 August 1986, in Hockley, Texas, while on parole for motor vehicle theft, Richard entered the home of Marguerite Lucille Dixon, stole two television sets, raped Dixon, fatally shot her, and then stole her van.", "In the wake of the Supreme Court of the United States expressing interest in the question of the constitutionality of lethal injections, on the same day that the execution was scheduled, Richard's lawyers sought a stay of execution.", "The stay request had to be filed with the Texas Court of Criminal Appeals in Austin.", "The execution was scheduled for 6 PM, but the court's clerks office, where motions are usually filed, was scheduled to close at 5 PM, and refused to remain open beyond then to allow a later filing.", "Richard's lawyers claimed that, because of a computer failure, they did not reach the Court of Criminal Appeals until about 5:20 PM.", "Although there was a judge on call to receive emergency stay motions, and although Texas law would have allowed the stay application to be filed directly with a judge of the court, the lawyers did not attempt to contact any of them.", "However, Richard's legal team did call the court to ask for a short extension of time to file a motion based on a case, Baze v.", "Rees, that had just been granted certiorari by the United States Supreme Court earlier that day.", "Judge Sharon Keller responded with four words: \"We close at 5.\"", "Richard was subsequently executed at 8:23 PM on September 25, 2007 at Huntsville, Texas.", "On February 19, 2009, the Texas State Commission on Judicial Conduct brought seven charges against Judge Sharon Keller, alleging ethical improprieties in Keller's handling of the case.", "A Special Master who had previously been a judge in the Court of Criminal Appeals was assigned to the case.", "He found the Texas Defender Service to be primarily at fault in the failure, and although he found that Keller's actions were inadequate, he concluded that her conduct was not \"so egregious\" as to warrant further punishment.", "The Special Master cast doubt on a number of the reported issues in the case.", "He concluded that there was no evidence that the Texas Defender Service suffered any \"major\" computer failure; although news reports had mentioned multiple crashes, the only claim that the TDS repeated during the hearings was that there had been some problems with an internal email service, and no documentation of those problems was produced.", "Contrary to one of Richard's lawyers' earlier comments about the court refusing to stay open \"20 minutes\", the Special Master found the filings were not ready until 5:56 p.m., with the execution authorized for any time after 6:00 p.m.", "He also found fault with the attorneys for assigning only a junior attorney to prepare the documents; for delaying two hours past the US Supreme Court's grant of certiorari in Baze v.", "Rees earlier that day, which opened a new avenue for appeal, before even discussing preparing a motion in Richard's case; and for relying on paralegals to contact the clerk's office about the filing, without any attorneys attempting to directly contact a judge or the Court of Criminal Appeal's General Counsel.", "The Special Master criticized the TDS for \"causing a public uproar against Judge Keller, much of which was unwarranted\"."], "mentions": [{"id": 0, "start": 160, "end": 164, "ref_url": "Rape", "ref_ids": ["7980471"], "sent_idx": 3}, {"id": 1, "start": 40, "end": 46, "ref_url": "Murder", "ref_ids": ["20171"], "sent_idx": 1}, {"id": 2, "start": 185, "end": 194, "ref_url": "Capital_punishment", "ref_ids": ["5902"], "sent_idx": 18}, {"id": 3, "start": 22, "end": 36, "ref_url": "Hockley,_Texas", "ref_ids": ["5056177"], "sent_idx": 3}, {"id": 4, "start": 47, "end": 53, "ref_url": "Parole", "ref_ids": ["342305"], "sent_idx": 3}, {"id": 5, "start": 58, "end": 77, "ref_url": "Motor_vehicle_theft", "ref_ids": ["214348"], "sent_idx": 3}, {"id": 6, "start": 19, "end": 53, "ref_url": "Supreme_Court_of_the_United_States", "ref_ids": ["31737"], "sent_idx": 4}, {"id": 7, "start": 97, "end": 114, "ref_url": "Eighth_Amendment_to_the_United_States_Constitution", "ref_ids": ["31660"], "sent_idx": 4}, {"id": 8, "start": 118, "end": 134, "ref_url": "Lethal_injection", "ref_ids": ["62745"], "sent_idx": 4}, {"id": 9, "start": 42, "end": 73, "ref_url": "Texas_Court_of_Criminal_Appeals", "ref_ids": ["4383757"], "sent_idx": 5}, {"id": 10, "start": 77, "end": 83, "ref_url": "Austin,_Texas", "ref_ids": ["1998"], "sent_idx": 5}, {"id": 11, "start": 42, "end": 44, "ref_url": "12-hour_clock", "ref_ids": ["241267"], "sent_idx": 12}, {"id": 13, "start": 105, "end": 118, "ref_url": "Sharon_Keller", "ref_ids": ["14243133"], "sent_idx": 13}, {"id": 14, "start": 70, "end": 87, "ref_url": "Huntsville,_Texas", "ref_ids": ["136748"], "sent_idx": 12}, {"id": 15, "start": 4, "end": 18, "ref_url": "Special_Master", "ref_ids": null, "sent_idx": 21}, {"id": 16, "start": 134, "end": 150, "ref_url": "SCOTUS", "ref_ids": null, "sent_idx": 19}]} +{"id": "17889220", "title": "Zin, Afghanistan", "sentences": ["Zin is a village in Badakhshan Province in northeastern Afghanistan."], "mentions": [{"id": 0, "start": 9, "end": 16, "ref_url": "Village", "ref_ids": ["53509"], "sent_idx": 0}, {"id": 1, "start": 20, "end": 39, "ref_url": "Badakhshan_Province", "ref_ids": ["1282198"], "sent_idx": 0}, {"id": 2, "start": 56, "end": 67, "ref_url": "Afghanistan", "ref_ids": ["737"], "sent_idx": 0}]} +{"id": "17889250", "title": "Zinjaren", "sentences": ["Zinjaren is a village in Badakhshan Province in north-eastern Afghanistan."], "mentions": [{"id": 0, "start": 14, "end": 21, "ref_url": "Village", "ref_ids": ["53509"], "sent_idx": 0}, {"id": 1, "start": 25, "end": 44, "ref_url": "Badakhshan_Province", "ref_ids": ["1282198"], "sent_idx": 0}, {"id": 2, "start": 62, "end": 73, "ref_url": "Afghanistan", "ref_ids": ["737"], "sent_idx": 0}]} +{"id": "17889317", "title": "Gennadiy Belkov", "sentences": ["Gennadiy Belkov (born 24 June 1955) is a retired Uzbekistani high jumper who represented the Soviet Union.", "He won the silver medal at the 1979 European Indoor Championships and finished eighteenth at the 1980 European Indoor Championships.", "His personal best jump was 2.32 metres, achieved May 1982 in Tashkent."], "mentions": [{"id": 0, "start": 49, "end": 59, "ref_url": "Uzbekistan", "ref_ids": ["31853"], "sent_idx": 0}, {"id": 1, "start": 61, "end": 70, "ref_url": "High_jump", "ref_ids": ["13791"], "sent_idx": 0}, {"id": 2, "start": 93, "end": 105, "ref_url": "Soviet_Union", "ref_ids": ["26779"], "sent_idx": 0}, {"id": 3, "start": 31, "end": 65, "ref_url": "1979_European_Indoor_Championships_in_Athletics", "ref_ids": null, "sent_idx": 1}, {"id": 4, "start": 97, "end": 131, "ref_url": "1980_European_Indoor_Championships_in_Athletics", "ref_ids": null, "sent_idx": 1}, {"id": 5, "start": 61, "end": 69, "ref_url": "Tashkent", "ref_ids": ["57533"], "sent_idx": 2}]} +{"id": "17889320", "title": "Ioan Zalomit", "sentences": ["Ioan Zalomit (1823–1885) was a Romanian philosopher, professor and rector of the University of Bucharest."], "mentions": [{"id": 0, "start": 31, "end": 38, "ref_url": "Romania", "ref_ids": ["25445"], "sent_idx": 0}, {"id": 1, "start": 67, "end": 73, "ref_url": "Rector_(academia)", "ref_ids": ["66772"], "sent_idx": 0}, {"id": 2, "start": 81, "end": 104, "ref_url": "University_of_Bucharest", "ref_ids": ["553982"], "sent_idx": 0}]} +{"id": "17889329", "title": "Gireogi appa", "sentences": ["A gireogi appa (Korean: 기러기 아빠, literally \"goose dad\") is a South Korean man who works in Korea while his wife and children stay in an English-speaking country for the sake of the children's education.", "The term is inspired by the fact that geese are a species that migrate, just as the \"gireogi appa\" father must travel a great distance to see his family.", "Estimates of the number of \"gireogi appa\" in South Korea range as high as 200,000 men.", "The word 'gireogi appa' was included in the report '2002 New Word' by the National Academy of Korean Language."], "mentions": [{"id": 0, "start": 94, "end": 100, "ref_url": "Korean_language", "ref_ids": ["16756"], "sent_idx": 3}, {"id": 1, "start": 60, "end": 72, "ref_url": "South_Korea", "ref_ids": ["27019"], "sent_idx": 0}, {"id": 2, "start": 81, "end": 95, "ref_url": "Economy_of_South_Korea", "ref_ids": ["27024"], "sent_idx": 0}, {"id": 3, "start": 135, "end": 159, "ref_url": "English-speaking_country", "ref_ids": null, "sent_idx": 0}, {"id": 4, "start": 191, "end": 200, "ref_url": "Education_in_South_Korea", "ref_ids": ["1495964"], "sent_idx": 0}, {"id": 5, "start": 38, "end": 43, "ref_url": "Geese", "ref_ids": null, "sent_idx": 1}, {"id": 6, "start": 63, "end": 70, "ref_url": "Bird_migration", "ref_ids": ["201943"], "sent_idx": 1}]} +{"id": "17889332", "title": "Paducah micropolitan area", "sentences": ["The Paducah, KY-IL Micropolitan Statistical Area, as defined by the United States Census Bureau, is an area consisting of four counties – two in the Jackson Purchase region of Kentucky, a third Kentucky county bordering the Purchase, and one in southern Illinois – anchored by the city of Paducah, Kentucky.", "As of the 2000 census, the μSA had a population of 98,765 (though a July 1, 2009 estimate placed the population at 98,609)."], "mentions": [{"id": 0, "start": 68, "end": 95, "ref_url": "United_States_Census_Bureau", "ref_ids": ["57070"], "sent_idx": 0}, {"id": 1, "start": 149, "end": 165, "ref_url": "Jackson_Purchase", "ref_ids": ["440239"], "sent_idx": 0}, {"id": 2, "start": 176, "end": 184, "ref_url": "Kentucky", "ref_ids": ["16846"], "sent_idx": 0}, {"id": 3, "start": 245, "end": 253, "ref_url": "Southern_Illinois", "ref_ids": ["473630"], "sent_idx": 0}, {"id": 4, "start": 254, "end": 262, "ref_url": "Illinois", "ref_ids": ["14849"], "sent_idx": 0}, {"id": 5, "start": 289, "end": 306, "ref_url": "Paducah,_Kentucky", "ref_ids": ["115346"], "sent_idx": 0}, {"id": 6, "start": 10, "end": 21, "ref_url": "2000_United_States_Census", "ref_ids": ["432383"], "sent_idx": 1}, {"id": 7, "start": 27, "end": 30, "ref_url": "Micropolitan_Statistical_Area", "ref_ids": null, "sent_idx": 1}]} +{"id": "17889339", "title": "Château de Neuilly", "sentences": ["The château de Neuilly is a former château in Neuilly-sur-Seine, France.", "Its estate covered a vast 170-hectare park called \"parc de Neuilly\" which comprised all of Neuilly that is today to be found between avenue du Roule and the town of Levallois-Perret.", "The castle was built in 1751, and was largely destroyed in 1848.", "One wing of the former castle remains, and was integrated into a new convent building in 1907."], "mentions": [{"id": 0, "start": 4, "end": 11, "ref_url": "Château", "ref_ids": ["61914"], "sent_idx": 0}, {"id": 1, "start": 46, "end": 63, "ref_url": "Neuilly-sur-Seine", "ref_ids": ["234647"], "sent_idx": 0}, {"id": 2, "start": 65, "end": 71, "ref_url": "France", "ref_ids": ["5843419"], "sent_idx": 0}, {"id": 3, "start": 165, "end": 181, "ref_url": "Levallois-Perret", "ref_ids": ["1257063"], "sent_idx": 1}, {"id": 4, "start": 69, "end": 76, "ref_url": "Convent", "ref_ids": ["133806"], "sent_idx": 3}]} +{"id": "17889348", "title": "Maigonis Valdmanis", "sentences": ["Maigonis Valdmanis (September 8, 1933 – October 30, 1999) was a Latvian basketball player.", "He was born in Riga.", "He played for Rīgas ASK and won three Euroleague titles (1958, 1959, 1960) and four Soviet national league championships (1955, 1956, 1957, 1958).", "Playing for the USSR national basketball team Maigonis Valdmanis won three gold medals at Eurobasket 1957, Eurobasket 1959, Eurobasket 1961, and silver medals at the 1952, 1956 and 1960 Olympic Games."], "mentions": [{"id": 0, "start": 64, "end": 70, "ref_url": "Latvia", "ref_ids": ["17514"], "sent_idx": 0}, {"id": 1, "start": 15, "end": 19, "ref_url": "Riga", "ref_ids": ["25508"], "sent_idx": 1}, {"id": 2, "start": 14, "end": 23, "ref_url": "Rīgas_ASK", "ref_ids": ["55516070"], "sent_idx": 2}, {"id": 3, "start": 84, "end": 106, "ref_url": "USSR_Premier_Basketball_League", "ref_ids": ["39649138"], "sent_idx": 2}, {"id": 4, "start": 16, "end": 45, "ref_url": "USSR_national_basketball_team", "ref_ids": null, "sent_idx": 3}, {"id": 5, "start": 90, "end": 105, "ref_url": "Eurobasket_1957", "ref_ids": null, "sent_idx": 3}, {"id": 6, "start": 107, "end": 122, "ref_url": "Eurobasket_1959", "ref_ids": null, "sent_idx": 3}, {"id": 7, "start": 124, "end": 139, "ref_url": "Eurobasket_1961", "ref_ids": null, "sent_idx": 3}]} +{"id": "17889361", "title": "Pimelea microcephala", "sentences": ["Pimelea microcephala (mallee riceflower or shrubby riceflower) is a dioecious shrub in the family Thymelaeaceae, native to Australia.", "It grows up to 4 metres high and produces greenish-yellow flowerheads.", "The male flowerheads have 13 to 100 flowers while the female flowerheads have 7 to 12.", "The leaves are 7 to 40 mm long and 1 to 4 mm wide.", "The species is toxic to stock."], "mentions": [{"id": 0, "start": 68, "end": 77, "ref_url": "Dioecious", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 98, "end": 111, "ref_url": "Thymelaeaceae", "ref_ids": ["1895669"], "sent_idx": 0}, {"id": 2, "start": 123, "end": 132, "ref_url": "Australia", "ref_ids": ["4689264"], "sent_idx": 0}]} +{"id": "17889377", "title": "Humboldt Falls", "sentences": ["Humboldt Falls is a waterfall located in the Hollyford Valley in the Fiordland district of New Zealand.", "They fall 275 metres in three drops; the largest of the three drops is 134 metres high."], "mentions": [{"id": 0, "start": 20, "end": 29, "ref_url": "Waterfall", "ref_ids": ["69442"], "sent_idx": 0}, {"id": 1, "start": 45, "end": 61, "ref_url": "Hollyford_River", "ref_ids": ["1103669"], "sent_idx": 0}, {"id": 2, "start": 69, "end": 78, "ref_url": "Fiordland", "ref_ids": ["336481"], "sent_idx": 0}, {"id": 3, "start": 91, "end": 102, "ref_url": "New_Zealand", "ref_ids": ["4913064"], "sent_idx": 0}]} +{"id": "17889394", "title": "Svetlana Radzivil", "sentences": ["Svetlana Mikhaylovna Radzivil (; born 17 January 1987 in Tashkent, Uzbek SSR) is an Uzbekistani high jumper.", "She is among Asia's top female high jumpers.", "She won the Asian Games title three times running (2010 to 2018) and was the Asian champion in 2015.", "She has also won two silver and one bronze medal at the Asian Athletics Championships.", "She is a two-time champion at the Asian Indoor Athletics Championships (2006 and 2014).", "She is a three-time Olympian (2008, 2012 and 2016).", "She was born in Tashkent.", "She finished ninth at the 2003 World Youth Championships and thirteenth at the 2004 World Junior Championships.", "In 2006, she won the 2006 World Junior Championships, in a new personal best jump of 1.91 metres, and finished seventh at the 2006 Asian Games.", "She jumped 1.91 again in 2007.", "In 2008, she competed at the Olympic Games.", "She won the gold medal at the 17th Asian Games with a jump of 1.94 metres ahead of China's Xingjuan Zheng who claimed silver with her season's best jump of 1.92 m and Uzbekistan's Nadiya Dusanova who took home the bronze with a best leap of 1.89 m.", "She won gold medal in Asian Games 2018, with games record of 1.96 meters beating Nadiya Dusanova 1.94 m.", "Her personal best jump is 1.98 metres, achieved in May 2008 in Tashkent.", "This is the Asian record, although she shares it with two athletes: Nadejda Dusanova and Yekaterina Yevseyeva."], "mentions": [{"id": 0, "start": 63, "end": 71, "ref_url": "Tashkent", "ref_ids": ["57533"], "sent_idx": 13}, {"id": 1, "start": 67, "end": 76, "ref_url": "Uzbek_SSR", "ref_ids": null, "sent_idx": 0}, {"id": 2, "start": 167, "end": 177, "ref_url": "Uzbekistan", "ref_ids": ["31853"], "sent_idx": 11}, {"id": 3, "start": 31, "end": 40, "ref_url": "High_jump", "ref_ids": ["13791"], "sent_idx": 1}, {"id": 4, "start": 22, "end": 33, "ref_url": "Asian_Games", "ref_ids": ["39205"], "sent_idx": 12}, {"id": 5, "start": 95, "end": 99, "ref_url": "2015_Asian_Athletics_Championships", "ref_ids": ["46882483"], "sent_idx": 2}, {"id": 6, "start": 56, "end": 85, "ref_url": "Asian_Athletics_Championships", "ref_ids": ["12479703"], "sent_idx": 3}, {"id": 7, "start": 34, "end": 70, "ref_url": "Asian_Indoor_Athletics_Championships", "ref_ids": ["25015727"], "sent_idx": 4}, {"id": 8, "start": 63, "end": 71, "ref_url": "Tashkent", "ref_ids": ["57533"], "sent_idx": 13}, {"id": 9, "start": 26, "end": 56, "ref_url": "2003_World_Youth_Championships_in_Athletics", "ref_ids": ["9352262"], "sent_idx": 7}, {"id": 10, "start": 79, "end": 110, "ref_url": "2004_World_Junior_Championships_in_Athletics", "ref_ids": ["6154841"], "sent_idx": 7}, {"id": 11, "start": 21, "end": 52, "ref_url": "2006_World_Junior_Championships_in_Athletics", "ref_ids": ["6630609"], "sent_idx": 8}, {"id": 12, "start": 126, "end": 142, "ref_url": "Athletics_at_the_2006_Asian_Games", "ref_ids": ["8080921"], "sent_idx": 8}, {"id": 13, "start": 29, "end": 42, "ref_url": "Athletics_at_the_2008_Summer_Olympics_–_Women's_high_jump", "ref_ids": ["18578964"], "sent_idx": 10}, {"id": 14, "start": 81, "end": 96, "ref_url": "Nadiya_Dusanova", "ref_ids": ["22282005"], "sent_idx": 12}, {"id": 15, "start": 63, "end": 71, "ref_url": "Tashkent", "ref_ids": ["57533"], "sent_idx": 13}, {"id": 16, "start": 68, "end": 84, "ref_url": "Nadejda_Dusanova", "ref_ids": null, "sent_idx": 14}, {"id": 17, "start": 89, "end": 109, "ref_url": "Yekaterina_Yevseyeva", "ref_ids": ["22281987"], "sent_idx": 14}]} +{"id": "17889423", "title": "Doctor Slaughter", "sentences": ["Doctor Slaughter is a 1984 novel by Paul Theroux.", "The main character is a young woman living in near poverty in London.", "She has a Ph.D. and works at a research institute.", "She turns to prostitution.", "In 1986, the story was made into the film \"Half Moon Street\", starring Sigourney Weaver."], "mentions": [{"id": 0, "start": 36, "end": 48, "ref_url": "Paul_Theroux", "ref_ids": ["172064"], "sent_idx": 0}, {"id": 1, "start": 10, "end": 15, "ref_url": "Ph.D.", "ref_ids": null, "sent_idx": 2}, {"id": 2, "start": 43, "end": 59, "ref_url": "Half_Moon_Street_(film)", "ref_ids": ["12555559"], "sent_idx": 4}, {"id": 3, "start": 71, "end": 87, "ref_url": "Sigourney_Weaver", "ref_ids": ["64965"], "sent_idx": 4}]} +{"id": "17889436", "title": "Queen's Own West Kent Yeomanry", "sentences": ["The Queen's Own West Kent Yeomanry was a British Army regiment formed in 1794.", "It served in the Second Boer War and the First World War.", "It amalgamated with the Royal East Kent (The Duke of Connaught's Own) Yeomanry (Mounted Rifles) to form the Kent Yeomanry in 1920."], "mentions": [{"id": 0, "start": 41, "end": 53, "ref_url": "British_Army", "ref_ids": ["4887"], "sent_idx": 0}, {"id": 1, "start": 17, "end": 32, "ref_url": "Second_Boer_War", "ref_ids": ["42720"], "sent_idx": 1}, {"id": 2, "start": 41, "end": 56, "ref_url": "World_War_I", "ref_ids": ["4764461"], "sent_idx": 1}, {"id": 3, "start": 24, "end": 95, "ref_url": "Royal_East_Kent_Yeomanry", "ref_ids": ["17888194"], "sent_idx": 2}, {"id": 4, "start": 108, "end": 121, "ref_url": "Kent_Yeomanry", "ref_ids": ["42697962"], "sent_idx": 2}]} +{"id": "17889440", "title": "Île de la Jatte", "sentences": ["The Île de la Jatte or Île de la Grande Jatte is an island in the river Seine, located in the department of Hauts-de-Seine, and shared between the two communes of Neuilly-sur-Seine and Levallois.", "It is situated at the very gates of Paris, being 7 km distant (in a straight line) from the towers of Notre Dame and 3 km from the Place de l'Étoile.", "The island, which has about 4,000 inhabitants, is nearly 2 km long and almost 200 m wide at its widest point.", "Its name translates as \"Island of the Bowl\" or \"Island of the Big Bowl\".", "It is best known as the setting for Georges Seurat's pointillist oil painting, \"Un Dimanche après-midi à l'Île de la Grande Jatte\" (\"A Sunday Afternoon on the Island of La Grande Jatte\") (1884-1886 and 1889), and also for the Stephen Sondheim/James Lapine musical, \"Sunday in the Park with George\"."], "mentions": [{"id": 0, "start": 4, "end": 10, "ref_url": "Island", "ref_ids": ["14587"], "sent_idx": 2}, {"id": 1, "start": 72, "end": 77, "ref_url": "Seine", "ref_ids": ["54006"], "sent_idx": 0}, {"id": 2, "start": 108, "end": 122, "ref_url": "Hauts-de-Seine", "ref_ids": ["59133"], "sent_idx": 0}, {"id": 3, "start": 151, "end": 159, "ref_url": "Communes_of_France", "ref_ids": ["410219"], "sent_idx": 0}, {"id": 4, "start": 163, "end": 180, "ref_url": "Neuilly-sur-Seine", "ref_ids": ["234647"], "sent_idx": 0}, {"id": 5, "start": 185, "end": 194, "ref_url": "Levallois-Perret", "ref_ids": ["1257063"], "sent_idx": 0}, {"id": 6, "start": 36, "end": 41, "ref_url": "Paris", "ref_ids": ["22989"], "sent_idx": 1}, {"id": 7, "start": 102, "end": 112, "ref_url": "Notre_Dame_de_Paris", "ref_ids": null, "sent_idx": 1}, {"id": 8, "start": 131, "end": 148, "ref_url": "Place_de_l'Étoile", "ref_ids": null, "sent_idx": 1}, {"id": 9, "start": 36, "end": 50, "ref_url": "Georges_Seurat", "ref_ids": ["62031"], "sent_idx": 4}, {"id": 10, "start": 53, "end": 64, "ref_url": "Pointillism", "ref_ids": ["165439"], "sent_idx": 4}, {"id": 11, "start": 133, "end": 184, "ref_url": "A_Sunday_Afternoon_on_the_Island_of_La_Grande_Jatte", "ref_ids": ["518691"], "sent_idx": 4}, {"id": 12, "start": 226, "end": 242, "ref_url": "Stephen_Sondheim", "ref_ids": ["29268"], "sent_idx": 4}, {"id": 13, "start": 243, "end": 255, "ref_url": "James_Lapine", "ref_ids": ["691097"], "sent_idx": 4}, {"id": 14, "start": 266, "end": 296, "ref_url": "Sunday_in_the_Park_with_George", "ref_ids": ["23410276"], "sent_idx": 4}]} +{"id": "17889446", "title": "Chłapowski", "sentences": ["Chłapowski (; feminine: Chłapowska; plural: Chłapowscy) is the surname of:"], "mentions": []} +{"id": "17889451", "title": "Anna Ustinova", "sentences": ["Anna Ustinova (born 8 December 1985) is a Kazakhstani high jumper.", "She finished seventh at the 2004 World Junior Championships and the 2005 Universiade, won the bronze medal at the 2005 Asian Championships and finished sixth at the 2006 Asian Games.", "In 2007, she won bronze medals at the Asian Championships and the Universiade, before competing at the 2007 World Championships without reaching the final.", "She won the silver medal at the 2010 Asian Indoor Athletics Championships behind compatriot Marina Aitova.", "Her personal best jump is 1.92 metres, achieved in July 2006 in Almaty."], "mentions": [{"id": 0, "start": 42, "end": 52, "ref_url": "Kazakhstan", "ref_ids": ["16642"], "sent_idx": 0}, {"id": 1, "start": 54, "end": 63, "ref_url": "High_jump", "ref_ids": ["13791"], "sent_idx": 0}, {"id": 2, "start": 28, "end": 59, "ref_url": "2004_World_Junior_Championships_in_Athletics", "ref_ids": ["6154841"], "sent_idx": 1}, {"id": 3, "start": 68, "end": 84, "ref_url": "Athletics_at_the_2005_Summer_Universiade", "ref_ids": ["11744335"], "sent_idx": 1}, {"id": 4, "start": 114, "end": 138, "ref_url": "2005_Asian_Championships_in_Athletics", "ref_ids": null, "sent_idx": 1}, {"id": 5, "start": 165, "end": 181, "ref_url": "Athletics_at_the_2006_Asian_Games", "ref_ids": ["8080921"], "sent_idx": 1}, {"id": 6, "start": 38, "end": 57, "ref_url": "2007_Asian_Championships_in_Athletics", "ref_ids": null, "sent_idx": 2}, {"id": 7, "start": 66, "end": 77, "ref_url": "Athletics_at_the_2007_Summer_Universiade", "ref_ids": ["12656357"], "sent_idx": 2}, {"id": 8, "start": 103, "end": 127, "ref_url": "2007_World_Championships_in_Athletics_–_Women's_high_jump", "ref_ids": ["13073685"], "sent_idx": 2}, {"id": 9, "start": 32, "end": 73, "ref_url": "2010_Asian_Indoor_Athletics_Championships", "ref_ids": ["26258887"], "sent_idx": 3}, {"id": 10, "start": 92, "end": 105, "ref_url": "Marina_Aitova", "ref_ids": ["9574571"], "sent_idx": 3}, {"id": 11, "start": 64, "end": 70, "ref_url": "Almaty", "ref_ids": ["159577"], "sent_idx": 4}]} +{"id": "17889463", "title": "Stuttgart Declaration of Guilt", "sentences": ["The Stuttgart Declaration of Guilt () was a declaration issued on October 19, 1945, by the Council of the Evangelical Church in Germany (\"\", EKD), in which it confessed guilt for its inadequacies in opposition to the Nazis and the Third Reich."], "mentions": [{"id": 0, "start": 106, "end": 135, "ref_url": "Evangelical_Church_in_Germany", "ref_ids": ["213018"], "sent_idx": 0}, {"id": 1, "start": 217, "end": 222, "ref_url": "Nazis", "ref_ids": null, "sent_idx": 0}, {"id": 2, "start": 231, "end": 242, "ref_url": "Third_Reich", "ref_ids": null, "sent_idx": 0}]} +{"id": "17889472", "title": "The Light of the World (Sullivan)", "sentences": ["The Light of the World is an oratorio composed in 1873 by Arthur Sullivan.", "Sullivan wrote the libretto with the assistance of George Grove, based on the New Testament.", "The work was inspired by William Holman Hunt's popular 1853–54 painting, \"The Light of the World\".", "The story of the oratorio follows the whole life of Christ, told mostly in the first person, focusing on his deeds on Earth as preacher, healer and prophet.", "The work was first performed at the Birmingham Festival on 27 August 1873 and was the composer's second oratorio, the first being \"The Prodigal Son\" (1869)."], "mentions": [{"id": 0, "start": 104, "end": 112, "ref_url": "Oratorio", "ref_ids": ["79515"], "sent_idx": 4}, {"id": 1, "start": 58, "end": 73, "ref_url": "Arthur_Sullivan", "ref_ids": ["45280"], "sent_idx": 0}, {"id": 2, "start": 19, "end": 27, "ref_url": "Libretto", "ref_ids": ["46950"], "sent_idx": 1}, {"id": 3, "start": 51, "end": 63, "ref_url": "George_Grove", "ref_ids": ["251053"], "sent_idx": 1}, {"id": 4, "start": 78, "end": 91, "ref_url": "New_Testament", "ref_ids": ["21433"], "sent_idx": 1}, {"id": 5, "start": 25, "end": 44, "ref_url": "William_Holman_Hunt", "ref_ids": ["118449"], "sent_idx": 2}, {"id": 6, "start": 74, "end": 96, "ref_url": "The_Light_of_the_World_(painting)", "ref_ids": ["5164211"], "sent_idx": 2}, {"id": 7, "start": 52, "end": 58, "ref_url": "Christ", "ref_ids": null, "sent_idx": 3}, {"id": 8, "start": 36, "end": 55, "ref_url": "Birmingham_Festival", "ref_ids": null, "sent_idx": 4}, {"id": 9, "start": 131, "end": 147, "ref_url": "The_Prodigal_Son_(Sullivan)", "ref_ids": ["14586493"], "sent_idx": 4}]} +{"id": "17889473", "title": "Marton, Harrogate", "sentences": ["Marton is a village in the Harrogate district of North Yorkshire, England.", "It is situated approximately north-west of the city of York and north-east of the market town of Knaresborough.", "The village is joined with Grafton and it forms the civil parish of Marton cum Grafton."], "mentions": [{"id": 0, "start": 27, "end": 36, "ref_url": "Harrogate", "ref_ids": ["266660"], "sent_idx": 0}, {"id": 1, "start": 49, "end": 64, "ref_url": "North_Yorkshire", "ref_ids": ["143759"], "sent_idx": 0}, {"id": 2, "start": 47, "end": 51, "ref_url": "City_status_in_the_United_Kingdom", "ref_ids": ["70841"], "sent_idx": 1}, {"id": 3, "start": 55, "end": 59, "ref_url": "York", "ref_ids": ["34361"], "sent_idx": 1}, {"id": 4, "start": 82, "end": 93, "ref_url": "Market_town", "ref_ids": ["145965"], "sent_idx": 1}, {"id": 5, "start": 97, "end": 110, "ref_url": "Knaresborough", "ref_ids": ["350983"], "sent_idx": 1}, {"id": 6, "start": 27, "end": 34, "ref_url": "Grafton,_North_Yorkshire", "ref_ids": null, "sent_idx": 2}, {"id": 7, "start": 52, "end": 64, "ref_url": "Civil_parish", "ref_ids": ["644233"], "sent_idx": 2}, {"id": 8, "start": 68, "end": 86, "ref_url": "Marton_cum_Grafton", "ref_ids": null, "sent_idx": 2}]} +{"id": "17889505", "title": "Mariya Sokova", "sentences": ["Mariya Sokova (; born 2 September 1970) is an Uzbekistani triple jumper.", "She originally represented Russia.", "She finished fifth at the 1995 World Indoor Championships.", "Her personal best jump was 14.50 metres, achieved in June 1999 in Moscow.", "She had 14.54 metres on the indoor track, achieved in February 1995 in Volgograd.", "She later married Uzbekistani triple jumper Vasiliy Sokov, and became an Uzbekistani citizen herself.", "She won the silver medal at the 2002 Asian Championships.", "Both she and her husband represented Russia at the 2015 World Masters Athletics Championships, where she won the silver medal in the W40 division."], "mentions": [{"id": 0, "start": 18, "end": 28, "ref_url": "Uzbekistan", "ref_ids": ["31853"], "sent_idx": 5}, {"id": 1, "start": 30, "end": 41, "ref_url": "Triple_jump", "ref_ids": ["30697"], "sent_idx": 5}, {"id": 2, "start": 37, "end": 43, "ref_url": "Russia", "ref_ids": ["25391"], "sent_idx": 7}, {"id": 3, "start": 26, "end": 57, "ref_url": "1995_IAAF_World_Indoor_Championships", "ref_ids": ["2554739"], "sent_idx": 2}, {"id": 4, "start": 66, "end": 72, "ref_url": "Moscow", "ref_ids": ["19004"], "sent_idx": 3}, {"id": 5, "start": 71, "end": 80, "ref_url": "Volgograd", "ref_ids": ["32585"], "sent_idx": 4}, {"id": 6, "start": 18, "end": 28, "ref_url": "Uzbekistan", "ref_ids": ["31853"], "sent_idx": 5}, {"id": 7, "start": 44, "end": 57, "ref_url": "Vasiliy_Sokov", "ref_ids": ["5912075"], "sent_idx": 5}, {"id": 8, "start": 32, "end": 56, "ref_url": "2002_Asian_Championships_in_Athletics", "ref_ids": null, "sent_idx": 6}, {"id": 9, "start": 51, "end": 93, "ref_url": "2015_World_Masters_Athletics_Championships", "ref_ids": ["47546673"], "sent_idx": 7}]} +{"id": "17889517", "title": "Cassidy (given name)", "sentences": ["Cassidy is a given name derived from an Irish surname and ultimately from the Goidelic given name \"Caiside\", meaning \"clever\" or \"curly-haired.\"", "The name \"Caiside\" comes from the Irish word element \"cas\", according to \"Behindthename.com\"", "Cassidy was the 244th most popular name for girls born in the United States in 2009.", "It was most popular in 1999, when it was the 99th most popular name for American girls.", "It first appeared among the 1,000 most popular names for American girls in 1981.", "Kassidy, an alternate spelling, was ranked as the 443rd most popular name for girls in 2009.", "The name Cassidy was a known last name.", "Cassidy may have become a first name due to baby-boomer parents naming their children after the Grateful Dead song, \"Cassidy\"."], "mentions": [{"id": 0, "start": 34, "end": 39, "ref_url": "Irish_language", "ref_ids": ["19872429"], "sent_idx": 1}, {"id": 1, "start": 78, "end": 86, "ref_url": "Goidelic_languages", "ref_ids": ["12469"], "sent_idx": 0}, {"id": 2, "start": 62, "end": 75, "ref_url": "United_States", "ref_ids": ["3434750"], "sent_idx": 2}, {"id": 3, "start": 96, "end": 109, "ref_url": "Grateful_Dead", "ref_ids": ["12542"], "sent_idx": 7}, {"id": 4, "start": 0, "end": 7, "ref_url": "Cassidy_(song)", "ref_ids": ["5908060"], "sent_idx": 7}]} +{"id": "17889520", "title": "Wendy Scarfe", "sentences": ["Wendy Elizabeth Scarfe (born 21 November 1933) is an Australian novelist, biographer and poet.", "Born in Adelaide, South Australia, Scarfe gained qualifications from both the University of Melbourne and the Associated Teachers' Training College.", "After living in England for some time, she moved, with husband, Allan Scarfe, to India.", "She has taught in Australia, England and India and has published over 20 books including a number written or co-edited with her husband.", "In retirement she completed a B.Litt. in Classical and Near Eastern Studies.", "She is currently based in Warrnambool, Victoria."], "mentions": [{"id": 0, "start": 53, "end": 63, "ref_url": "Australian", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 8, "end": 16, "ref_url": "Adelaide", "ref_ids": ["1148"], "sent_idx": 1}, {"id": 2, "start": 18, "end": 33, "ref_url": "South_Australia", "ref_ids": ["26716"], "sent_idx": 1}, {"id": 3, "start": 78, "end": 101, "ref_url": "University_of_Melbourne", "ref_ids": ["363594"], "sent_idx": 1}, {"id": 4, "start": 26, "end": 47, "ref_url": "Warrnambool,_Victoria", "ref_ids": null, "sent_idx": 5}]} +{"id": "17889546", "title": "Tatyana Borisova", "sentences": ["Tatyana Borisova (born 3 June 1975) is a Kyrgyzstani middle distance runner who specializes in the 1500 metres.", "She represented her country at the 2004 Summer Olympics, where she ran in the qualifiers of the 1500 m.", "Borisova has competed at the IAAF World Cross Country Championships twice (in 2002 and 2003), and also in the IAAF World Championships in Athletics twice, running in the 1500 m in 2001 and 2003.", "She later moved on to longer distances, winning the Austin Marathon in 2004 and 2005.", "She also ran at the Pune Half Marathon in 2004, finishing in third position.", "She enjoyed a resurgence at the 2010 Asian Indoor Athletics Championships, winning a silver medal in the 800 metres and 1500 m bronze."], "mentions": [{"id": 0, "start": 41, "end": 51, "ref_url": "Kyrgyzstan", "ref_ids": ["170131"], "sent_idx": 0}, {"id": 1, "start": 99, "end": 110, "ref_url": "1500_metres", "ref_ids": ["2707254"], "sent_idx": 0}, {"id": 2, "start": 35, "end": 55, "ref_url": "2004_Summer_Olympics", "ref_ids": ["77741"], "sent_idx": 1}, {"id": 3, "start": 29, "end": 67, "ref_url": "IAAF_World_Cross_Country_Championships", "ref_ids": null, "sent_idx": 2}, {"id": 4, "start": 110, "end": 147, "ref_url": "IAAF_World_Championships_in_Athletics", "ref_ids": null, "sent_idx": 2}, {"id": 5, "start": 52, "end": 67, "ref_url": "Austin_Marathon", "ref_ids": ["4172738"], "sent_idx": 3}, {"id": 6, "start": 20, "end": 38, "ref_url": "Pune_Half_Marathon", "ref_ids": null, "sent_idx": 4}, {"id": 7, "start": 32, "end": 73, "ref_url": "2010_Asian_Indoor_Athletics_Championships", "ref_ids": ["26258887"], "sent_idx": 5}, {"id": 8, "start": 105, "end": 115, "ref_url": "800_metres", "ref_ids": ["585165"], "sent_idx": 5}]} +{"id": "17889548", "title": "Thazhvaram", "sentences": ["Thazhvaram is a 1990 Indian Malayalam- language drama thriller film directed by Bharathan and written by M. T. Vasudevan Nair.", "It stars Mohanlal, Salim Ghouse, Sumalatha, Anju and Sankaradi.", "It tells the story of Balan( Mohanlal), who's on the lookout for Raju( Salim Ghouse) for taking revenge for murdering his wife Raji( Anju).", "Bharathan also composed the only song featured in the film, the background score was provided by Johnson.", "Set in the backdrop of a remote village across a valley in Palakkad, the film is made in the style of a spaghetti western film, and has achieved a cult status in Kerala since its release."], "mentions": [{"id": 0, "start": 28, "end": 37, "ref_url": "Malayalam", "ref_ids": ["37299"], "sent_idx": 0}, {"id": 1, "start": 48, "end": 53, "ref_url": "Drama_(film_and_television)", "ref_ids": ["25584621"], "sent_idx": 0}, {"id": 2, "start": 54, "end": 62, "ref_url": "Thriller_film", "ref_ids": ["28148882"], "sent_idx": 0}, {"id": 3, "start": 80, "end": 89, "ref_url": "Bharathan", "ref_ids": ["497602"], "sent_idx": 0}, {"id": 4, "start": 105, "end": 125, "ref_url": "M._T._Vasudevan_Nair", "ref_ids": ["872828"], "sent_idx": 0}, {"id": 5, "start": 9, "end": 17, "ref_url": "Mohanlal", "ref_ids": ["478225"], "sent_idx": 1}, {"id": 6, "start": 19, "end": 31, "ref_url": "Salim_Ghouse", "ref_ids": ["25166452"], "sent_idx": 1}, {"id": 7, "start": 33, "end": 42, "ref_url": "Sumalatha", "ref_ids": ["11268740"], "sent_idx": 1}, {"id": 8, "start": 44, "end": 48, "ref_url": "Anju_(actress)", "ref_ids": ["40458344"], "sent_idx": 1}, {"id": 9, "start": 53, "end": 62, "ref_url": "Sankaradi", "ref_ids": ["20615238"], "sent_idx": 1}, {"id": 10, "start": 97, "end": 104, "ref_url": "Johnson_(composer)", "ref_ids": ["2327783"], "sent_idx": 3}, {"id": 11, "start": 59, "end": 67, "ref_url": "Palakkad_district", "ref_ids": ["746129"], "sent_idx": 4}, {"id": 12, "start": 104, "end": 121, "ref_url": "Spaghetti_western", "ref_ids": null, "sent_idx": 4}, {"id": 13, "start": 147, "end": 151, "ref_url": "Cult_film", "ref_ids": ["5645"], "sent_idx": 4}]} +{"id": "17889576", "title": "Jamaica Rural Police Force", "sentences": ["The Jamaica Rural Police Force, popularly known as the District Constable (D.C.), is an auxiliary to the Jamaica Constabulary Force.", "District constables are appointed by the Commissioner of Police and attached to a particular Police Station.", "Their powers and authority, like the regular police, extend to all parts of the Island.", "Appointments of district constables are published in the Jamaica Gazette and Force Orders.", "Men and women can be appointed as district constables."], "mentions": [{"id": 0, "start": 105, "end": 131, "ref_url": "Jamaica_Constabulary_Force", "ref_ids": ["7796506"], "sent_idx": 0}]} +{"id": "17889583", "title": "Xin Kegui", "sentences": ["Professor Chen Kegui (Chinese: 辛克贵; Pinyin: Xīn Kègùi) (October 6, 1950 – August 30, 2012) was the head of Department of Civil Engineering at Tsinghua University and Deputy Dean of School of Civil Engineering at Tsinghua University from 2005 until his death in 2012.", "Pekenyo\nXin was born October 6, 1950 in Sichuan Province, China.", "He began his college life when he was 23, but he made progress fast.", "He received his B.Sc. in 1977, M.Sc. in 1983 form Tsinghua University, supervised by professor Yu-qiu Long (a member of the Chinese Academy of Engineering).", "He later earned a Ph.D. from The Hong Kong Polytechnic University.", "Xin served as deputy dean in School of Civil Engineering until his death.", "He was author and co-author of numerous scientific publications, an editor, and was on the editorial board of the Journal of Engineering Mechanics.", "He served on the editorial board of the Journal of Engineering Mechanics from 1997 and contributed to numerous professional activities in structural engineering."], "mentions": [{"id": 0, "start": 124, "end": 131, "ref_url": "Chinese_character", "ref_ids": null, "sent_idx": 3}, {"id": 1, "start": 36, "end": 42, "ref_url": "Pinyin", "ref_ids": ["23588"], "sent_idx": 0}, {"id": 2, "start": 58, "end": 63, "ref_url": "China", "ref_ids": ["5405"], "sent_idx": 1}]} +{"id": "17889585", "title": "Malcolm Thompson (footballer)", "sentences": ["Malcolm George Thompson (19 October 1946 – 24 October 2014) was an English footballer.", "Whilst with Scarborough, he scored the winning goal in the 1973 FA Trophy Final, in extra-time against Wigan Athletic.", "He played for Goole Town, Hartlepool United, Corby Town and Scarborough.", "He made 46 appearances for Hartlepool in the Football League, scoring 9 goals."], "mentions": [{"id": 0, "start": 75, "end": 85, "ref_url": "Association_football", "ref_ids": ["10568"], "sent_idx": 0}, {"id": 1, "start": 60, "end": 71, "ref_url": "Scarborough_F.C.", "ref_ids": ["946449"], "sent_idx": 2}, {"id": 2, "start": 59, "end": 79, "ref_url": "1973_FA_Trophy_Final", "ref_ids": ["38999563"], "sent_idx": 1}, {"id": 3, "start": 103, "end": 117, "ref_url": "Wigan_Athletic_F.C.", "ref_ids": ["367030"], "sent_idx": 1}, {"id": 4, "start": 14, "end": 24, "ref_url": "Goole_Town_F.C.", "ref_ids": ["6375403"], "sent_idx": 2}, {"id": 5, "start": 26, "end": 43, "ref_url": "Hartlepool_United_F.C.", "ref_ids": ["451130"], "sent_idx": 2}, {"id": 6, "start": 45, "end": 55, "ref_url": "Corby_Town_F.C.", "ref_ids": ["3174382"], "sent_idx": 2}, {"id": 7, "start": 60, "end": 71, "ref_url": "Scarborough_F.C.", "ref_ids": ["946449"], "sent_idx": 2}, {"id": 8, "start": 41, "end": 60, "ref_url": "The_Football_League", "ref_ids": null, "sent_idx": 3}]} +{"id": "17889594", "title": "Geoff Holt (artist)", "sentences": ["Geoff Holt (1942–1998) was a British artist.", "Born in London, Holt was active artistically from 1965–1998, first at Notting Hill and later living and working in Devon until his death.", "Holt started his professional career in Bayswater selling to tourists from the railings at Hyde Park.", "Thus his work spread worldwide; an example is the hand colored print \"The Hunter\", discovered and restored in New Zealand.", "His talent was spotted at 13 with an entry into Camberwell School of Art.", "He left at 18.", "Holt started selling his paintings around the early 1960s on Bayswater Road.", "Many minor drawings and large oil paintings were produced, resulting in small exhibitions.", "He then spent some years at the Portobello market with hand crafted chess set reproductions.", "At the same time the Afro-Caribbean music scene developed and Holt designed some Bob Marley covers produced from his paintings.", "Denis Holt, Geoff Holt's younger brother, assisted with his work until 1980.", "During an independent writing project, which involved identifying a number of important video artists from the mid-1980s, film enthusiast Kevin Hall uncovered no less than nine examples of Geoff's work.", "It is confirmed that these works were commissioned in late 1986 by video software distributor, \"Motion Pictures On Video\" for use on this parent label and its offshoots, Quick Video and Screen In Doors.", "During this period of 'research', Kevin described Geoff's painted works as: \"playing a hugely important role in creating a mood that helped define this unique period in the UK's video history,\" continuing so suggest that \"Geoff helped bridge the gap between the classic and the modern with many of his uniquely colourful sleeve designs.\"", "There is little doubt that Geoff had a keen eye for 'stylish' figurative painting; likewise the artist's decidedly brief diversification into designing a number of illustrations which were used as video artworks is seen today as something of a milestone in modern pop culture.", "Some examples of Geoff's video artworks, in association with the London-based Motion Pictures on Video (MPV), are David Sheldon's surreal and creepy \"TANTRUMS\" [AKA \"Devil Times Five\"], Joseph Brooks musical drama \"HEADIN' FOR BROADWAY\", and Curtis Harrinton's psycho-thriller \"THE PSYCHOPATH\" [ AKA \"The Killing Kind\"].\""], "mentions": [{"id": 0, "start": 70, "end": 82, "ref_url": "Notting_Hill", "ref_ids": ["94200"], "sent_idx": 1}, {"id": 1, "start": 115, "end": 120, "ref_url": "Devon", "ref_ids": ["8166"], "sent_idx": 1}, {"id": 2, "start": 61, "end": 70, "ref_url": "Bayswater", "ref_ids": ["93867"], "sent_idx": 6}, {"id": 3, "start": 91, "end": 100, "ref_url": "Hyde_Park,_London", "ref_ids": ["211289"], "sent_idx": 2}, {"id": 4, "start": 110, "end": 121, "ref_url": "New_Zealand", "ref_ids": ["4913064"], "sent_idx": 3}, {"id": 5, "start": 48, "end": 72, "ref_url": "Camberwell_School_of_Art", "ref_ids": null, "sent_idx": 4}, {"id": 6, "start": 61, "end": 75, "ref_url": "Bayswater_Road", "ref_ids": ["1329625"], "sent_idx": 6}, {"id": 7, "start": 32, "end": 49, "ref_url": "Portobello_market", "ref_ids": null, "sent_idx": 8}, {"id": 8, "start": 21, "end": 41, "ref_url": "Afro-Caribbean_music", "ref_ids": ["1654043"], "sent_idx": 9}, {"id": 9, "start": 81, "end": 91, "ref_url": "Bob_Marley", "ref_ids": ["8239846"], "sent_idx": 9}, {"id": 10, "start": 96, "end": 120, "ref_url": "Motion_Pictures_On_Video", "ref_ids": null, "sent_idx": 12}]} +{"id": "17889598", "title": "Yuriko Kobayashi", "sentences": ["Yuriko Kobayashi (; born 12 December 1988 in Hyōgo) is a Japanese middle- and long-distance runner who specializes in the 1500 metres and 5000 metres.", "She represented Japan at the 2008 Summer Olympics.", "She started out in middle-distance events, running in the 800 metres at the 2004 World Junior Championships and then winning a 1500 m silver medal at the World Youth Championships in Athletics the following year.", "She won the bronze medal in the 1500 m at the 2005 Asian Championships in Athletics and went one better at the 2006 Asian Games by taking the silver medal in Doha.", "Kobayashi moved up to specialise in longer distances and ran the 5000 m at the 2008 Beijing Olympics after becoming the Japanese national champion in the event.", "She gave the best performance by an Asian runner in the women's 5000 m at the 2009 World Championships in Athletics, finishing in eleventh and beating her domestic rival Yurika Nakamura.", "Kobayashi ended her season with a 5000 m gold medal at the 2009 East Asian Games.", "She was selected to represent Asia/Pacific at the 2010 IAAF Continental Cup, but managed only eighth place in the 3000 m.", "The following January, she ran for Hyōgo at the 2011 Women's Inter-Prefectural Ekiden and set a new stage record in her section, although the team finished in ninth position overall.", "She was the bronze medalist in the 5000 m at the 2011 Asian Athletics Championships in Kobe, one place behind her compatriot Hitomi Niiya.", "At the International Chiba Ekiden in November she won the first women's stage for Japan, setting a race record for her leg, and the mixed team went on to finish a close second behind Kenya."], "mentions": [{"id": 0, "start": 35, "end": 40, "ref_url": "Hyōgo", "ref_ids": null, "sent_idx": 8}, {"id": 1, "start": 120, "end": 128, "ref_url": "Japanese_people", "ref_ids": ["186932"], "sent_idx": 4}, {"id": 2, "start": 19, "end": 25, "ref_url": "Middle-distance_running", "ref_ids": ["89280"], "sent_idx": 2}, {"id": 3, "start": 78, "end": 98, "ref_url": "Long-distance_runner", "ref_ids": null, "sent_idx": 0}, {"id": 4, "start": 122, "end": 133, "ref_url": "1500_metres", "ref_ids": ["2707254"], "sent_idx": 0}, {"id": 5, "start": 138, "end": 149, "ref_url": "5000_metres", "ref_ids": ["4371506"], "sent_idx": 0}, {"id": 6, "start": 29, "end": 49, "ref_url": "2008_Summer_Olympics", "ref_ids": ["77745"], "sent_idx": 1}, {"id": 7, "start": 58, "end": 68, "ref_url": "800_metres", "ref_ids": ["585165"], "sent_idx": 2}, {"id": 8, "start": 76, "end": 107, "ref_url": "2004_World_Junior_Championships_in_Athletics", "ref_ids": ["6154841"], "sent_idx": 2}, {"id": 9, "start": 142, "end": 154, "ref_url": "Silver_medal", "ref_ids": ["526647"], "sent_idx": 3}, {"id": 10, "start": 154, "end": 192, "ref_url": "World_Youth_Championships_in_Athletics", "ref_ids": null, "sent_idx": 2}, {"id": 11, "start": 12, "end": 24, "ref_url": "Bronze_medal", "ref_ids": ["526648"], "sent_idx": 9}, {"id": 12, "start": 46, "end": 83, "ref_url": "2005_Asian_Championships_in_Athletics", "ref_ids": null, "sent_idx": 3}, {"id": 13, "start": 111, "end": 127, "ref_url": "2006_Asian_Games", "ref_ids": ["3071193"], "sent_idx": 3}, {"id": 14, "start": 158, "end": 162, "ref_url": "Doha", "ref_ids": ["26214389"], "sent_idx": 3}, {"id": 15, "start": 79, "end": 100, "ref_url": "Athletics_at_the_2008_Summer_Olympics", "ref_ids": ["6374194"], "sent_idx": 4}, {"id": 16, "start": 120, "end": 146, "ref_url": "National_champions_5000_metres_(women)", "ref_ids": null, "sent_idx": 4}, {"id": 17, "start": 78, "end": 115, "ref_url": "2009_World_Championships_in_Athletics", "ref_ids": ["3517978"], "sent_idx": 5}, {"id": 18, "start": 170, "end": 185, "ref_url": "Yurika_Nakamura", "ref_ids": ["9711581"], "sent_idx": 5}, {"id": 19, "start": 59, "end": 80, "ref_url": "Athletics_at_the_2009_East_Asian_Games", "ref_ids": ["25411084"], "sent_idx": 6}, {"id": 20, "start": 50, "end": 75, "ref_url": "2010_IAAF_Continental_Cup", "ref_ids": ["24099434"], "sent_idx": 7}, {"id": 21, "start": 53, "end": 85, "ref_url": "Women's_Inter-Prefectural_Ekiden", "ref_ids": null, "sent_idx": 8}, {"id": 22, "start": 49, "end": 83, "ref_url": "2011_Asian_Athletics_Championships", "ref_ids": ["32071392"], "sent_idx": 9}, {"id": 23, "start": 87, "end": 91, "ref_url": "Kobe", "ref_ids": ["59056"], "sent_idx": 9}, {"id": 24, "start": 125, "end": 137, "ref_url": "Hitomi_Niiya", "ref_ids": ["31027097"], "sent_idx": 9}, {"id": 25, "start": 7, "end": 33, "ref_url": "International_Chiba_Ekiden", "ref_ids": ["9711066"], "sent_idx": 10}]} +{"id": "17889631", "title": "Mickler", "sentences": ["Mickler is a surname.", "It may refer to:\nMickler may also refer to:", "See also:"], "mentions": [{"id": 0, "start": 13, "end": 20, "ref_url": "Surname", "ref_ids": ["72243"], "sent_idx": 0}]} +{"id": "17889636", "title": "Comasteridae", "sentences": ["Comasteridae is a family of crinoids.", "This family is now considered obsolete, having been replaced by the family Comatulidae since 2015."], "mentions": [{"id": 0, "start": 28, "end": 35, "ref_url": "Crinoid", "ref_ids": ["62175"], "sent_idx": 0}, {"id": 1, "start": 75, "end": 86, "ref_url": "Comatulidae", "ref_ids": ["48628921"], "sent_idx": 1}]} +{"id": "17889642", "title": "Balavé", "sentences": ["Balavé is a town in the Balavé Department of Banwa Province in western Burkina Faso.", "As of 2005 it had a population of 4700."], "mentions": [{"id": 0, "start": 24, "end": 41, "ref_url": "Balavé_Department", "ref_ids": ["16848955"], "sent_idx": 0}, {"id": 1, "start": 45, "end": 59, "ref_url": "Banwa_Province", "ref_ids": ["2874525"], "sent_idx": 0}, {"id": 2, "start": 71, "end": 83, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17889644", "title": "Kennedy (given name)", "sentences": ["Kennedy () is a unisex given name in the English language.", "The name is an Anglicised form of a masculine given name in the Irish language."], "mentions": [{"id": 0, "start": 16, "end": 22, "ref_url": "Unisex_name", "ref_ids": ["461344"], "sent_idx": 0}, {"id": 1, "start": 46, "end": 56, "ref_url": "Given_name", "ref_ids": ["247991"], "sent_idx": 1}, {"id": 2, "start": 41, "end": 57, "ref_url": "English_language", "ref_ids": ["8569916"], "sent_idx": 0}, {"id": 3, "start": 15, "end": 25, "ref_url": "Anglicised", "ref_ids": null, "sent_idx": 1}, {"id": 4, "start": 64, "end": 78, "ref_url": "Irish_language", "ref_ids": ["19872429"], "sent_idx": 1}]} +{"id": "17889665", "title": "List of Danish football transfers summer 2008", "sentences": ["This is a list of Danish football transfers for the 2008 summer transfer window.", "Only moves featuring at least one Danish Superliga club are listed.", "The 2007–08 Danish Superliga season ended on May 24, 2008, with the 2008–09 Danish Superliga season starting on July 19, 2008.", "The summer transfer window opened on 1 July 2008, although a few transfers took place prior to that date; including carry-overs from the winter 2007–08 transfer window.", "The window closed at midnight on 31 August 2008.", "Players without a club may join one at any time, either during or in between transfer windows.", "If need be, clubs may sign a goalkeeper on an emergency loan, if all others are unavailable."], "mentions": [{"id": 0, "start": 12, "end": 28, "ref_url": "Danish_Superliga", "ref_ids": ["1984091"], "sent_idx": 2}, {"id": 1, "start": 4, "end": 28, "ref_url": "2007–08_Danish_Superliga", "ref_ids": ["10794139"], "sent_idx": 2}, {"id": 2, "start": 68, "end": 92, "ref_url": "2008–09_Danish_Superliga", "ref_ids": ["17039232"], "sent_idx": 2}, {"id": 3, "start": 77, "end": 92, "ref_url": "Transfer_window", "ref_ids": ["2607843"], "sent_idx": 5}, {"id": 4, "start": 137, "end": 167, "ref_url": "List_of_Danish_football_transfers_winter_2007–08", "ref_ids": null, "sent_idx": 3}]} +{"id": "17889669", "title": "Chinna Gounder", "sentences": ["Chinna Gounder is a 1992 Indian Tamil- language drama film directed by R. V. Udayakumar and starring Vijayakanth, Sukanya, Manorama, Salim Ghouse, Sathyapriya, Goundamani, Senthil and Vadivelu.", "This film was remade into Telugu as\" Chinarayudu\" with Venkatesh and Vijayashanti in the lead.", "It was also remade in Kannada as\" Chikkejamanru\" with Ravichandran and Gautami."], "mentions": [{"id": 0, "start": 32, "end": 37, "ref_url": "Tamil_cinema", "ref_ids": ["10749933"], "sent_idx": 0}, {"id": 1, "start": 48, "end": 58, "ref_url": "Drama_film", "ref_ids": null, "sent_idx": 0}, {"id": 2, "start": 71, "end": 87, "ref_url": "R._V._Udayakumar", "ref_ids": ["16726005"], "sent_idx": 0}, {"id": 3, "start": 101, "end": 112, "ref_url": "Vijayakanth", "ref_ids": ["1848562"], "sent_idx": 0}, {"id": 4, "start": 114, "end": 121, "ref_url": "Sukanya_(actress)", "ref_ids": ["21241419"], "sent_idx": 0}, {"id": 5, "start": 123, "end": 131, "ref_url": "Manorama_(Tamil_actress)", "ref_ids": ["2727913"], "sent_idx": 0}, {"id": 6, "start": 133, "end": 145, "ref_url": "Salim_Ghouse", "ref_ids": ["25166452"], "sent_idx": 0}, {"id": 7, "start": 147, "end": 158, "ref_url": "Sathyapriya", "ref_ids": ["43613012"], "sent_idx": 0}, {"id": 8, "start": 160, "end": 170, "ref_url": "Goundamani", "ref_ids": ["1690995"], "sent_idx": 0}, {"id": 9, "start": 172, "end": 179, "ref_url": "Senthil", "ref_ids": ["2814802"], "sent_idx": 0}, {"id": 10, "start": 184, "end": 192, "ref_url": "Vadivelu", "ref_ids": ["1866337"], "sent_idx": 0}, {"id": 11, "start": 37, "end": 48, "ref_url": "Chinarayudu", "ref_ids": ["34251609"], "sent_idx": 1}, {"id": 12, "start": 55, "end": 64, "ref_url": "Daggubati_Venkatesh", "ref_ids": null, "sent_idx": 1}, {"id": 13, "start": 69, "end": 81, "ref_url": "Vijayashanti", "ref_ids": ["4398885"], "sent_idx": 1}, {"id": 14, "start": 22, "end": 29, "ref_url": "Kannada", "ref_ids": ["37445"], "sent_idx": 2}, {"id": 15, "start": 34, "end": 47, "ref_url": "Chikkejamanru", "ref_ids": ["41869719"], "sent_idx": 2}, {"id": 16, "start": 71, "end": 78, "ref_url": "Gautami", "ref_ids": ["1161168"], "sent_idx": 2}]} +{"id": "17889676", "title": "Badinga", "sentences": ["Badinga is a town in the Balavé Department of Banwa Province in western Burkina Faso.", "As of 2005 it had a population of 1948."], "mentions": [{"id": 0, "start": 25, "end": 42, "ref_url": "Balavé_Department", "ref_ids": ["16848955"], "sent_idx": 0}, {"id": 1, "start": 46, "end": 60, "ref_url": "Banwa_Province", "ref_ids": ["2874525"], "sent_idx": 0}, {"id": 2, "start": 72, "end": 84, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17889690", "title": "Doga, Burkina Faso", "sentences": ["Doga is a village in the Balavé Department of Banwa Province in western Burkina Faso.", "As of 2005 it had a population of 813."], "mentions": [{"id": 0, "start": 25, "end": 42, "ref_url": "Balavé_Department", "ref_ids": ["16848955"], "sent_idx": 0}, {"id": 1, "start": 46, "end": 60, "ref_url": "Banwa_Province", "ref_ids": ["2874525"], "sent_idx": 0}, {"id": 2, "start": 72, "end": 84, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17889691", "title": "Judy Hughes", "sentences": ["Judith Hughes (\"née\" Verhagen, born 28 April 1959 in Subiaco, Western Australia) is an Australian politician.", "She was a Labor member of the Western Australian Legislative Assembly from 2005 to 2008, representing the electorate of Kingsley.", "Following the retirement of Cheryl Edwardes, who had held the seat of Kingsley since 1989 for the Liberal Party, Hughes contested the seat in the 2005 election and narrowly won the seat with a margin of 0.8%.", "A redistribution ahead of the 2008 state election all but eliminated the Labor margin, and Hughes was defeated by Liberal candidate Andrea Mitchell\nIn 2006, Hughes was appointed as Acting Speaker of the Legislative Assembly and as the deputy government whip in the Legislative Assembly."], "mentions": [{"id": 0, "start": 53, "end": 60, "ref_url": "Subiaco,_Western_Australia", "ref_ids": ["676712"], "sent_idx": 0}, {"id": 1, "start": 30, "end": 47, "ref_url": "Western_Australia", "ref_ids": ["33613"], "sent_idx": 1}, {"id": 2, "start": 38, "end": 47, "ref_url": "Australia", "ref_ids": ["4689264"], "sent_idx": 1}, {"id": 3, "start": 73, "end": 78, "ref_url": "Australian_Labor_Party_(Western_Australian_Branch)", "ref_ids": ["58088752"], "sent_idx": 3}, {"id": 4, "start": 30, "end": 69, "ref_url": "Western_Australian_Legislative_Assembly", "ref_ids": ["579448"], "sent_idx": 1}, {"id": 5, "start": 70, "end": 78, "ref_url": "Electoral_district_of_Kingsley", "ref_ids": ["4292191"], "sent_idx": 2}, {"id": 6, "start": 98, "end": 111, "ref_url": "Liberal_Party_of_Australia_(Western_Australian_Division)", "ref_ids": ["57910215"], "sent_idx": 2}, {"id": 7, "start": 146, "end": 159, "ref_url": "2005_Western_Australian_state_election", "ref_ids": ["9648332"], "sent_idx": 2}, {"id": 8, "start": 30, "end": 49, "ref_url": "2008_Western_Australian_state_election", "ref_ids": ["18752637"], "sent_idx": 3}, {"id": 9, "start": 132, "end": 147, "ref_url": "Andrea_Mitchell_(politician)", "ref_ids": ["19836188"], "sent_idx": 3}, {"id": 10, "start": 242, "end": 257, "ref_url": "Government_whip", "ref_ids": null, "sent_idx": 3}]} +{"id": "17889696", "title": "Częstochowa pogrom (1902)", "sentences": ["Częstochowa pogrom refers to an alleged anti-Semitic disturbance that occurred on August 11, 1902, in the town of Chenstokhov, Russian Partition under Nicholas II (modern Częstochowa, Poland).", "According to an official Russian report by the Tsarist Governor of the Piotrków Governorate (residing at the distance), the said pogrom started after an altercation between Jewish shopkeeper and a Catholic woman."], "mentions": [{"id": 0, "start": 40, "end": 52, "ref_url": "Anti-Semitic", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 114, "end": 125, "ref_url": "Chenstokhov", "ref_ids": null, "sent_idx": 0}, {"id": 2, "start": 127, "end": 144, "ref_url": "Russian_Partition", "ref_ids": ["13511741"], "sent_idx": 0}, {"id": 3, "start": 151, "end": 162, "ref_url": "Nicholas_II_of_Russia", "ref_ids": ["30172853"], "sent_idx": 0}, {"id": 4, "start": 71, "end": 91, "ref_url": "Piotrków_Governorate", "ref_ids": ["12165278"], "sent_idx": 1}]} +{"id": "17889698", "title": "Remijia", "sentences": ["Remijia is a genus of flowering plants in the family Rubiaceae.", "Within the family, it is a member of the subfamily Cinchonoideae and the tribe Cinchoneae.", "There are about 36 species in \"Remijia\".", "They are native to Peru and Brazil.", "Some of the species have hollow stems that harbor ants.", "The bark of \"Remijia\" contains 0.5%–2% of quinine, a chemical substance often used as a medicinal drug and flavour additive in alcoholic beverages.", "It is cheaper than the bark of \"Cinchona\", another source of quinine.", "Because of its intense flavor, the bark of \"Remijia\" is used in making tonic water.", "No type species has ever been designated for \"Remijia\".", "In 2005, two species were transferred from \"Remijia\" to a new genus, \"Ciliosemina\"."], "mentions": [{"id": 0, "start": 62, "end": 67, "ref_url": "Genus", "ref_ids": ["38493"], "sent_idx": 9}, {"id": 1, "start": 22, "end": 37, "ref_url": "Flowering_plant", "ref_ids": ["18967"], "sent_idx": 0}, {"id": 2, "start": 11, "end": 17, "ref_url": "Family_(biology)", "ref_ids": ["56276"], "sent_idx": 1}, {"id": 3, "start": 53, "end": 62, "ref_url": "Rubiaceae", "ref_ids": ["54025"], "sent_idx": 0}, {"id": 4, "start": 41, "end": 50, "ref_url": "Subfamily", "ref_ids": ["151983"], "sent_idx": 1}, {"id": 5, "start": 51, "end": 64, "ref_url": "Cinchonoideae", "ref_ids": ["43699126"], "sent_idx": 1}, {"id": 6, "start": 73, "end": 78, "ref_url": "Tribe_(biology)", "ref_ids": ["217858"], "sent_idx": 1}, {"id": 7, "start": 79, "end": 89, "ref_url": "Cinchoneae", "ref_ids": ["35431478"], "sent_idx": 1}, {"id": 8, "start": 13, "end": 20, "ref_url": "Species", "ref_ids": ["21780446"], "sent_idx": 9}, {"id": 9, "start": 9, "end": 15, "ref_url": "Indigenous_(ecology)", "ref_ids": ["9019997"], "sent_idx": 3}, {"id": 10, "start": 19, "end": 23, "ref_url": "Peru", "ref_ids": ["170691"], "sent_idx": 3}, {"id": 11, "start": 28, "end": 34, "ref_url": "Brazil", "ref_ids": ["3383"], "sent_idx": 3}, {"id": 12, "start": 50, "end": 53, "ref_url": "Ant", "ref_ids": ["2594"], "sent_idx": 4}, {"id": 13, "start": 61, "end": 68, "ref_url": "Quinine", "ref_ids": ["25297"], "sent_idx": 6}, {"id": 14, "start": 98, "end": 102, "ref_url": "Drug", "ref_ids": ["9311172"], "sent_idx": 5}, {"id": 15, "start": 32, "end": 40, "ref_url": "Cinchona", "ref_ids": ["99803"], "sent_idx": 6}, {"id": 16, "start": 71, "end": 82, "ref_url": "Tonic_water", "ref_ids": ["99809"], "sent_idx": 7}, {"id": 17, "start": 3, "end": 15, "ref_url": "Type_species", "ref_ids": ["532379"], "sent_idx": 8}, {"id": 18, "start": 70, "end": 81, "ref_url": "Ciliosemina", "ref_ids": ["42649324"], "sent_idx": 9}]} +{"id": "17889699", "title": "Meirinkan", "sentences": [], "mentions": []} +{"id": "17889702", "title": "Kunming University of Science and Technology", "sentences": ["Kunming University of Science and Technology (KUST) (昆明理工大学) is in Kunming, the capital city of Yunnan Province, Southwestern China.", "As the goal by 2010, KUST was to become a key university with a strong science and engineering background.", "The university is to be developed into an important provincial training base for advanced technical personnel, a research base for applied fundamental research and hi-tech research, an industrialization base for hi-tech, and a research and consulting center for the national social and economic development.", "Yunnan University Science Park (YNUSP) was established as an experimental site of University Science Park in 1999 and was awarded the status of national university science park in May 2001.", "The park is supported by Yunnan University, Kunming University of Science & Technology and Yunnan Normal University."], "mentions": [{"id": 0, "start": 44, "end": 51, "ref_url": "Kunming", "ref_ids": ["197181"], "sent_idx": 4}, {"id": 1, "start": 25, "end": 31, "ref_url": "Yunnan", "ref_ids": ["166410"], "sent_idx": 4}, {"id": 2, "start": 113, "end": 131, "ref_url": "Southwestern_China", "ref_ids": null, "sent_idx": 0}, {"id": 3, "start": 42, "end": 56, "ref_url": "National_Key_Universities", "ref_ids": ["5483726"], "sent_idx": 1}, {"id": 4, "start": 164, "end": 171, "ref_url": "Hi-tech", "ref_ids": null, "sent_idx": 2}, {"id": 5, "start": 185, "end": 202, "ref_url": "Industrialization", "ref_ids": null, "sent_idx": 2}, {"id": 6, "start": 275, "end": 306, "ref_url": "Social_and_economic_development", "ref_ids": null, "sent_idx": 2}, {"id": 7, "start": 0, "end": 30, "ref_url": "Kunming_High-tech_Industrial_Development_Zone", "ref_ids": ["17883247"], "sent_idx": 3}, {"id": 8, "start": 25, "end": 42, "ref_url": "Yunnan_University", "ref_ids": ["7121931"], "sent_idx": 4}, {"id": 9, "start": 91, "end": 115, "ref_url": "Yunnan_Normal_University", "ref_ids": ["17913756"], "sent_idx": 4}]} +{"id": "17889706", "title": "Gama, Burkina Faso", "sentences": ["Gama is a village in the Balavé Department of Banwa Province in western Burkina Faso.", "As of 2005 it had a population of 735."], "mentions": [{"id": 0, "start": 25, "end": 42, "ref_url": "Balavé_Department", "ref_ids": ["16848955"], "sent_idx": 0}, {"id": 1, "start": 46, "end": 60, "ref_url": "Banwa_Province", "ref_ids": ["2874525"], "sent_idx": 0}, {"id": 2, "start": 72, "end": 84, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17889716", "title": "Hasbialaye", "sentences": ["Hasbialaye is a village in the Balavé Department of Banwa Province in western Burkina Faso.", "As of 2005 it had a population of 845."], "mentions": [{"id": 0, "start": 31, "end": 48, "ref_url": "Balavé_Department", "ref_ids": ["16848955"], "sent_idx": 0}, {"id": 1, "start": 52, "end": 66, "ref_url": "Banwa_Province", "ref_ids": ["2874525"], "sent_idx": 0}, {"id": 2, "start": 78, "end": 90, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17889728", "title": "Lago, Burkina Faso", "sentences": ["Lago is a village in the Balavé Department of Banwa Province in western Burkina Faso.", "As of 2005 it had a population of 611."], "mentions": [{"id": 0, "start": 25, "end": 42, "ref_url": "Balavé_Department", "ref_ids": ["16848955"], "sent_idx": 0}, {"id": 1, "start": 46, "end": 60, "ref_url": "Banwa_Province", "ref_ids": ["2874525"], "sent_idx": 0}, {"id": 2, "start": 72, "end": 84, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17889739", "title": "Euridice (Caccini)", "sentences": ["Euridice is an opera in a prologue and one act by the Italian composer Giulio Caccini.", "The libretto, by Ottavio Rinuccini, had already been set by Caccini's rival Jacopo Peri in 1600.", "Caccini's version of \"Euridice\" was first performed at the Pitti Palace, Florence on 5 December 1602.", "Caccini hurriedly prepared the score for the press and published it six weeks before Peri's version appeared."], "mentions": [{"id": 0, "start": 15, "end": 20, "ref_url": "Opera", "ref_ids": ["22348"], "sent_idx": 0}, {"id": 1, "start": 54, "end": 61, "ref_url": "Italy", "ref_ids": ["14532"], "sent_idx": 0}, {"id": 2, "start": 71, "end": 85, "ref_url": "Giulio_Caccini", "ref_ids": ["1141823"], "sent_idx": 0}, {"id": 3, "start": 4, "end": 12, "ref_url": "Libretto", "ref_ids": ["46950"], "sent_idx": 1}, {"id": 4, "start": 17, "end": 34, "ref_url": "Ottavio_Rinuccini", "ref_ids": ["1384269"], "sent_idx": 1}, {"id": 5, "start": 40, "end": 56, "ref_url": "Euridice_(opera)", "ref_ids": null, "sent_idx": 1}, {"id": 6, "start": 76, "end": 87, "ref_url": "Jacopo_Peri", "ref_ids": ["161918"], "sent_idx": 1}, {"id": 7, "start": 59, "end": 71, "ref_url": "Pitti_Palace", "ref_ids": null, "sent_idx": 2}, {"id": 8, "start": 73, "end": 81, "ref_url": "Florence", "ref_ids": ["11525"], "sent_idx": 2}]} +{"id": "17889743", "title": "Raymond Decorte", "sentences": ["Raymond Decorte (Waarschoot, 17 March 1898 — Waarschoot, 30 March 1972) was a Belgian professional road bicycle racer.", "In the 1927 Tour de France, he won two stages and finished 11th in the general classification."], "mentions": [{"id": 0, "start": 17, "end": 27, "ref_url": "Waarschoot", "ref_ids": ["155080"], "sent_idx": 0}, {"id": 1, "start": 78, "end": 85, "ref_url": "Belgium", "ref_ids": ["3343"], "sent_idx": 0}, {"id": 2, "start": 99, "end": 117, "ref_url": "Road_bicycle_racer", "ref_ids": null, "sent_idx": 0}, {"id": 3, "start": 7, "end": 26, "ref_url": "1927_Tour_de_France", "ref_ids": ["6516497"], "sent_idx": 1}]} +{"id": "17889752", "title": "Tangouna", "sentences": ["Tangouna is a village in the Balavé Department of Banwa Province in western Burkina Faso.", "As of 2005 it had a population of 977."], "mentions": [{"id": 0, "start": 29, "end": 46, "ref_url": "Balavé_Department", "ref_ids": ["16848955"], "sent_idx": 0}, {"id": 1, "start": 50, "end": 64, "ref_url": "Banwa_Province", "ref_ids": ["2874525"], "sent_idx": 0}, {"id": 2, "start": 76, "end": 88, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17889755", "title": "RMS Ausonia", "sentences": ["RMS \"Ausonia\", launched in 1921, was one of Cunard's six post-World War I \"A-class\" ocean liners for the Canadian service."], "mentions": [{"id": 0, "start": 44, "end": 50, "ref_url": "Cunard_Line", "ref_ids": ["95374"], "sent_idx": 0}]} +{"id": "17889764", "title": "Yasso, Burkina Faso", "sentences": ["Yasso is a town in the Balavé Department of Banwa Province in western Burkina Faso.", "As of 2005 it had a population of 4,191."], "mentions": [{"id": 0, "start": 23, "end": 40, "ref_url": "Balavé_Department", "ref_ids": ["16848955"], "sent_idx": 0}, {"id": 1, "start": 44, "end": 58, "ref_url": "Banwa_Province", "ref_ids": ["2874525"], "sent_idx": 0}, {"id": 2, "start": 70, "end": 82, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17889765", "title": "Francesco Carmelo Salerno", "sentences": ["Francesco Salerno (September 9, 1925 – October 17, 1998) was an Italian politician.", "Lawyer, publicist and member of the Christian Democracy political party, he was appointed Deputy Minister to the Prime Minister office in 1979, during the presidency of Francesco Cossiga.", "In addition to achieving high positions in Italian politics, he was for 22 years president of the Matera football club."], "mentions": [{"id": 0, "start": 169, "end": 186, "ref_url": "Francesco_Cossiga", "ref_ids": ["11809"], "sent_idx": 1}, {"id": 1, "start": 98, "end": 118, "ref_url": "F.C._Matera", "ref_ids": ["6029698"], "sent_idx": 2}]} +{"id": "17889774", "title": "Ultralase", "sentences": ["Ultralase is a healthcare company based in the Leeds, England.", "It that specialises in vision correction through laser eye surgery."], "mentions": [{"id": 0, "start": 15, "end": 25, "ref_url": "Healthcare", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 55, "end": 66, "ref_url": "Eye_surgery", "ref_ids": ["587878"], "sent_idx": 1}]} +{"id": "17889785", "title": "U.S. Route 15 in Pennsylvania", "sentences": ["U.S. Route 15 (US 15) is a 791.71 mi (1,274.13 km)-long United States highway, running from Walterboro, South Carolina to Painted Post, New York.", "In Pennsylvania, the highway runs for 194.89 miles (313.65 km), from the Maryland state line just south of Gettysburg, to the New York state line near Lawrenceville."], "mentions": [{"id": 0, "start": 92, "end": 118, "ref_url": "Walterboro,_South_Carolina", "ref_ids": ["134457"], "sent_idx": 0}, {"id": 1, "start": 122, "end": 144, "ref_url": "Painted_Post,_New_York", "ref_ids": ["127227"], "sent_idx": 0}, {"id": 2, "start": 3, "end": 15, "ref_url": "Pennsylvania", "ref_ids": ["23332"], "sent_idx": 1}, {"id": 3, "start": 73, "end": 81, "ref_url": "Maryland", "ref_ids": ["18858"], "sent_idx": 1}, {"id": 4, "start": 107, "end": 117, "ref_url": "Gettysburg,_Pennsylvania", "ref_ids": ["53270"], "sent_idx": 1}, {"id": 5, "start": 151, "end": 164, "ref_url": "Lawrenceville,_Pennsylvania", "ref_ids": ["133908"], "sent_idx": 1}]} +{"id": "17889786", "title": "The Banker", "sentences": ["The Banker is a British English-language monthly international financial affairs publication owned by The Financial Times Ltd. and edited in London, United Kingdom.", "The magazine was first published in January 1926 through founding Editor, Brendan Bracken of the \"Financial News\", who went on to become the chairman of the \"Financial Times\" from 1945-1958.", "Since its founding, the magazine has claimed a dedication to the international perspective through features, interviews, multi-media applications, and events.", "The Banker is the world’s premier banking and finance resource, read in over 180 countries and is the key source of data and analysis for the industry.", "It combines in-depth regional and country coverage with reports on global financial markets, regulation and policy, cash management and securities services, commodities and carbon finance, infrastructure and project finance, trading and technology, clearing and settlement, and management and governance issues.", "\"The Banker\" is read most widely in banks, financial institutions, multilateral corporations, central banks, and finance ministries around the world.", "Approximately 60% of its readers are CEO/President and CFO/Treasurers of their organizations."], "mentions": [{"id": 0, "start": 24, "end": 31, "ref_url": "English_language", "ref_ids": ["8569916"], "sent_idx": 0}, {"id": 1, "start": 102, "end": 121, "ref_url": "The_Financial_Times", "ref_ids": null, "sent_idx": 0}, {"id": 2, "start": 74, "end": 89, "ref_url": "Brendan_Bracken", "ref_ids": ["864291"], "sent_idx": 1}, {"id": 3, "start": 98, "end": 112, "ref_url": "Financial_News_(1884–1945)", "ref_ids": ["23052183"], "sent_idx": 1}, {"id": 4, "start": 158, "end": 173, "ref_url": "Financial_Times", "ref_ids": ["136566"], "sent_idx": 1}]} +{"id": "17889796", "title": "Charters Towers State High School", "sentences": ["Charters Towers State High School (CTSHS) is a public high school in Charters Towers, Queensland, Australia.", "It was established in 1912 and is one of the oldest state secondary schools in Queensland."], "mentions": [{"id": 0, "start": 47, "end": 53, "ref_url": "State_school", "ref_ids": ["471603"], "sent_idx": 0}, {"id": 1, "start": 54, "end": 65, "ref_url": "Secondary_school", "ref_ids": ["554992"], "sent_idx": 0}, {"id": 2, "start": 0, "end": 15, "ref_url": "Charters_Towers", "ref_ids": ["951125"], "sent_idx": 0}, {"id": 3, "start": 79, "end": 89, "ref_url": "Queensland", "ref_ids": ["59051"], "sent_idx": 1}]} +{"id": "17889798", "title": "2007–08 South China AA season", "sentences": ["The 2007–08 season is South China's 2nd year after giving up the all-Chinese policy.", "This article shows statistics of the club's players in the season, and also lists all matches that the club have and will play in the season."], "mentions": [{"id": 0, "start": 22, "end": 33, "ref_url": "South_China_AA", "ref_ids": ["2261954"], "sent_idx": 0}]} +{"id": "17889800", "title": "Uzbek passport", "sentences": ["The Uzbek passport, being the property of the Republic of Uzbekistan, is issued to the citizens of Uzbekistan for internal use and international travels.", "An ordinary passport is issued to a person at 16 years and must be exchanged each 10 years.", "The passport could also be changed in case of damage or when the bearer of the passport changed his/her name.", "The passport contains 32 pages for special notes and visas.", "It is in the Uzbek and English languages.", "According to bilateral and multilateral agreements, the bearer of the Uzbek passport has the right to travel within Commonwealth of Independent States except neighboring Turkmenistan (Turkmen citizens also required to obtain the Uzbek visa to enter Uzbekistan) and stay up to 90 days without any visa requirements.", "However, for \"leaving\" Uzbekistan, one has to apply for special travel permit in local police.", "This permit is valid for two years and is needed only for \"leaving\" the country—the bearer may stay abroad until the passport is valid.", "Since 2010", "Uzbekistan has been switching gradually to the biometric passport system.", "The old (non-biometric) passports are invalid since the beginning of 2016 internationally, but not nationally where are valid till 2018."], "mentions": [{"id": 0, "start": 46, "end": 68, "ref_url": "Republic_of_Uzbekistan", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 0, "end": 10, "ref_url": "Uzbekistan", "ref_ids": ["31853"], "sent_idx": 9}, {"id": 2, "start": 235, "end": 239, "ref_url": "Visa_(document)", "ref_ids": null, "sent_idx": 5}, {"id": 3, "start": 0, "end": 5, "ref_url": "Uzbek_language", "ref_ids": ["71836"], "sent_idx": 9}, {"id": 4, "start": 116, "end": 150, "ref_url": "Commonwealth_of_Independent_States", "ref_ids": ["36870"], "sent_idx": 5}, {"id": 5, "start": 170, "end": 182, "ref_url": "Turkmenistan", "ref_ids": ["198149"], "sent_idx": 5}, {"id": 6, "start": 47, "end": 65, "ref_url": "Biometric_passport", "ref_ids": ["1438860"], "sent_idx": 9}]} +{"id": "17889802", "title": "Archeophone Records", "sentences": ["Archeophone Records is a record company and label founded in 1998 to document the early days of America's recording history.", "It was started by Richard Martin and Meagan Hennessey, a husband and wife who run the company in Champaign, Illinois.", "Archeophone restores and remasters audio from cylinders and discs of jazz, popular music, vaudeville, and spoken word.", "Archeophone has released recordings by Billy Murray, Bert Williams, Guido Deiro, Nora Bayes, Jack Norworth, Eddie Morton, and by jazz ensembles the Six Brown Brothers, the Benson Orchestra of Chicago, and Art Hickman's Orchestra.", "Compilations feature Vess Ossman, Arthur Collins and Byron G. Harlan, Henry Burr, Bob Roberts, Ada Jones, Fred Van Eps, Sophie Tucker, Harry Lauder, and the American, Peerless, and Haydn Quartets.", "The company is not affiliated with the Archéophone manufacturer Henri Chamoux."], "mentions": [{"id": 0, "start": 97, "end": 116, "ref_url": "Champaign,_Illinois", "ref_ids": ["110872"], "sent_idx": 1}, {"id": 1, "start": 46, "end": 55, "ref_url": "Phonograph_cylinders", "ref_ids": null, "sent_idx": 2}, {"id": 2, "start": 60, "end": 65, "ref_url": "Gramophone_records", "ref_ids": null, "sent_idx": 2}, {"id": 3, "start": 129, "end": 133, "ref_url": "Jazz", "ref_ids": ["15613"], "sent_idx": 3}, {"id": 4, "start": 75, "end": 88, "ref_url": "Popular_music", "ref_ids": ["24297671"], "sent_idx": 2}, {"id": 5, "start": 90, "end": 100, "ref_url": "Vaudeville", "ref_ids": ["48235"], "sent_idx": 2}, {"id": 6, "start": 106, "end": 117, "ref_url": "Spoken_word", "ref_ids": ["343430"], "sent_idx": 2}, {"id": 7, "start": 39, "end": 51, "ref_url": "Billy_Murray_(singer)", "ref_ids": ["612808"], "sent_idx": 3}, {"id": 8, "start": 53, "end": 66, "ref_url": "Bert_Williams", "ref_ids": ["187434"], "sent_idx": 3}, {"id": 9, "start": 68, "end": 79, "ref_url": "Guido_Deiro", "ref_ids": ["10540615"], "sent_idx": 3}, {"id": 10, "start": 81, "end": 91, "ref_url": "Nora_Bayes", "ref_ids": ["632193"], "sent_idx": 3}, {"id": 11, "start": 93, "end": 106, "ref_url": "Jack_Norworth", "ref_ids": ["761939"], "sent_idx": 3}, {"id": 12, "start": 108, "end": 120, "ref_url": "Eddie_Morton", "ref_ids": ["35594973"], "sent_idx": 3}, {"id": 13, "start": 148, "end": 166, "ref_url": "Six_Brown_Brothers", "ref_ids": ["8397842"], "sent_idx": 3}, {"id": 14, "start": 172, "end": 199, "ref_url": "Benson_Orchestra_of_Chicago", "ref_ids": ["39615606"], "sent_idx": 3}, {"id": 15, "start": 205, "end": 218, "ref_url": "Art_Hickman", "ref_ids": ["7174264"], "sent_idx": 3}, {"id": 16, "start": 21, "end": 32, "ref_url": "Vess_Ossman", "ref_ids": ["3152159"], "sent_idx": 4}, {"id": 17, "start": 34, "end": 48, "ref_url": "Arthur_Collins_(singer)", "ref_ids": ["1843909"], "sent_idx": 4}, {"id": 18, "start": 53, "end": 68, "ref_url": "Byron_G._Harlan", "ref_ids": ["1843910"], "sent_idx": 4}, {"id": 19, "start": 70, "end": 80, "ref_url": "Henry_Burr", "ref_ids": ["3424694"], "sent_idx": 4}, {"id": 20, "start": 82, "end": 93, "ref_url": "Bob_Roberts_(folksinger)", "ref_ids": ["15521058"], "sent_idx": 4}, {"id": 21, "start": 95, "end": 104, "ref_url": "Ada_Jones", "ref_ids": ["2686769"], "sent_idx": 4}, {"id": 22, "start": 106, "end": 118, "ref_url": "Fred_Van_Eps", "ref_ids": ["17457246"], "sent_idx": 4}, {"id": 23, "start": 120, "end": 133, "ref_url": "Sophie_Tucker", "ref_ids": ["264719"], "sent_idx": 4}, {"id": 24, "start": 135, "end": 147, "ref_url": "Harry_Lauder", "ref_ids": ["386172"], "sent_idx": 4}, {"id": 25, "start": 157, "end": 165, "ref_url": "American_Quartet_(ensemble)", "ref_ids": ["13024827"], "sent_idx": 4}, {"id": 26, "start": 167, "end": 175, "ref_url": "Peerless_Quartet", "ref_ids": ["2023881"], "sent_idx": 4}, {"id": 27, "start": 181, "end": 195, "ref_url": "The_Haydn_Quartet", "ref_ids": null, "sent_idx": 4}, {"id": 28, "start": 39, "end": 50, "ref_url": "Archéophone", "ref_ids": ["17894029"], "sent_idx": 5}]} +{"id": "17889804", "title": "Bankouma", "sentences": ["Bankouma is a town in the Kouka Department of Banwa Province in western Burkina Faso.", "As of 2005 it had a population of 3,349."], "mentions": [{"id": 0, "start": 26, "end": 42, "ref_url": "Kouka_Department", "ref_ids": ["16849067"], "sent_idx": 0}, {"id": 1, "start": 46, "end": 60, "ref_url": "Banwa_Province", "ref_ids": ["2874525"], "sent_idx": 0}, {"id": 2, "start": 72, "end": 84, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17889805", "title": "Orthosias in Phoenicia", "sentences": ["Orthosias in Phoenicia was a town in the Roman province of Phoenicia Prima, and a bishopric that was a suffragan of Tyre."], "mentions": [{"id": 0, "start": 41, "end": 55, "ref_url": "Roman_province", "ref_ids": ["314732"], "sent_idx": 0}, {"id": 1, "start": 59, "end": 74, "ref_url": "Phœnicia", "ref_ids": null, "sent_idx": 0}, {"id": 2, "start": 103, "end": 112, "ref_url": "Suffragan", "ref_ids": null, "sent_idx": 0}]} +{"id": "17889816", "title": "Pé Verhaegen", "sentences": ["Petrus \"Pé\" Verhaegen (20 February 1902, Tremelo — 5 April 1958, Leuven) was a Belgian professional road bicycle racer."], "mentions": [{"id": 0, "start": 41, "end": 48, "ref_url": "Tremelo", "ref_ids": ["123497"], "sent_idx": 0}, {"id": 1, "start": 65, "end": 71, "ref_url": "Leuven", "ref_ids": ["76931"], "sent_idx": 0}, {"id": 2, "start": 79, "end": 86, "ref_url": "Belgium", "ref_ids": ["3343"], "sent_idx": 0}, {"id": 3, "start": 100, "end": 118, "ref_url": "Road_bicycle_racer", "ref_ids": null, "sent_idx": 0}]} +{"id": "17889819", "title": "Bourawali", "sentences": ["Bourawali is a village in the Kouka Department of Banwa Province in western Burkina Faso.", "As of 2005 it had a population of 417."], "mentions": [{"id": 0, "start": 30, "end": 46, "ref_url": "Kouka_Department", "ref_ids": ["16849067"], "sent_idx": 0}, {"id": 1, "start": 50, "end": 64, "ref_url": "Banwa_Province", "ref_ids": ["2874525"], "sent_idx": 0}, {"id": 2, "start": 76, "end": 88, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17889834", "title": "Diontala", "sentences": ["Diontala is a town in the Kouka Department of Banwa Province in western Burkina Faso.", "As of 2005 it had a population of 4,191."], "mentions": [{"id": 0, "start": 26, "end": 42, "ref_url": "Kouka_Department", "ref_ids": ["16849067"], "sent_idx": 0}, {"id": 1, "start": 46, "end": 60, "ref_url": "Banwa_Province", "ref_ids": ["2874525"], "sent_idx": 0}, {"id": 2, "start": 72, "end": 84, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17889838", "title": "John Woodall (footballer)", "sentences": ["Bertram John Woodall (born 16 January 1949), more commonly known as John Woodall, is an English former footballer."], "mentions": [{"id": 0, "start": 103, "end": 113, "ref_url": "Association_football", "ref_ids": ["10568"], "sent_idx": 0}]} +{"id": "17889842", "title": "Fini, Burkina Faso", "sentences": ["Fini is a town in the Kouka Department of Banwa Province in western Burkina Faso.", "As of 2005 it had a population of 3,009."], "mentions": [{"id": 0, "start": 22, "end": 38, "ref_url": "Kouka_Department", "ref_ids": ["16849067"], "sent_idx": 0}, {"id": 1, "start": 42, "end": 56, "ref_url": "Banwa_Province", "ref_ids": ["2874525"], "sent_idx": 0}, {"id": 2, "start": 68, "end": 80, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17889850", "title": "Lumea ta", "sentences": ["Lumea ta is the third album by DJ Project, a Romanian trance group."], "mentions": [{"id": 0, "start": 31, "end": 41, "ref_url": "DJ_Project", "ref_ids": ["8028887"], "sent_idx": 0}, {"id": 1, "start": 45, "end": 53, "ref_url": "Romanian_language", "ref_ids": ["25534"], "sent_idx": 0}, {"id": 2, "start": 54, "end": 60, "ref_url": "Trance_music", "ref_ids": ["30900"], "sent_idx": 0}]} +{"id": "17889852", "title": "Houna", "sentences": ["Houna is a town in the Kouka Department of Banwa Province in western Burkina Faso.", "As of 2005 it had a population of 3,483."], "mentions": [{"id": 0, "start": 23, "end": 39, "ref_url": "Kouka_Department", "ref_ids": ["16849067"], "sent_idx": 0}, {"id": 1, "start": 43, "end": 57, "ref_url": "Banwa_Province", "ref_ids": ["2874525"], "sent_idx": 0}, {"id": 2, "start": 69, "end": 81, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17889863", "title": "Pipes & Flowers", "sentences": ["Pipes & Flowers is the debut album by Italian singer Elisa, released in Italy in 1997 by Sugar Records.", "It followed the single Sleeping in Your Hand.", "In 1998 the album was re-released with the bonus track \"Cure Me\".", "Always in 1997 a promotional single, \"Labyrinth\" was released.", "In 1998, another two official singles, \"Mr. Want\" and \"Cure me\" was released, while \"A Feast For Me\" was a solo radio hit."], "mentions": [{"id": 0, "start": 53, "end": 58, "ref_url": "Elisa_(Italian_singer)", "ref_ids": ["2200860"], "sent_idx": 0}, {"id": 1, "start": 10, "end": 14, "ref_url": "1997_in_music", "ref_ids": ["160808"], "sent_idx": 3}, {"id": 2, "start": 23, "end": 44, "ref_url": "Sleeping_in_Your_Hand", "ref_ids": ["18985236"], "sent_idx": 1}, {"id": 3, "start": 3, "end": 7, "ref_url": "1998_in_music", "ref_ids": ["160809"], "sent_idx": 4}]} +{"id": "17889864", "title": "Kouelworo", "sentences": ["Kouelworo is a town in the Kouka Department of Banwa Province in western Burkina Faso.", "As of 2005 it had a population of 923."], "mentions": [{"id": 0, "start": 27, "end": 43, "ref_url": "Kouka_Department", "ref_ids": ["16849067"], "sent_idx": 0}, {"id": 1, "start": 47, "end": 61, "ref_url": "Banwa_Province", "ref_ids": ["2874525"], "sent_idx": 0}, {"id": 2, "start": 73, "end": 85, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17889875", "title": "Koulakou", "sentences": ["Koulakou is a town in the Kouka Department of Banwa Province in western Burkina Faso.", "As of 2005 it had a population of 1,454."], "mentions": [{"id": 0, "start": 26, "end": 42, "ref_url": "Kouka_Department", "ref_ids": ["16849067"], "sent_idx": 0}, {"id": 1, "start": 46, "end": 60, "ref_url": "Banwa_Province", "ref_ids": ["2874525"], "sent_idx": 0}, {"id": 2, "start": 72, "end": 84, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17889880", "title": "Gryżyna", "sentences": ["Gryżyna may refer to:"], "mentions": []} +{"id": "17889889", "title": "Are We Not Horses", "sentences": ["Are We Not Horses is an album by Rock Plaza Central.", "Despite being first released independently, the disc made many top ten lists for 2006, including #8 for CMJ Editor-in-Chief Kenny Herzog, Pitchfork staff writer Stephen Deusner and Americana-UK lead writer David Cowling.", "Because the album did not receive an official US release through Yep Roc Records until mid-2007, it made several of those year-end lists as well, including \"Magnet\"'s \"10 Great Hidden Treasures of 2007\", calling it \"2007's finest folk/rock find\".", "The album has also recently been taught in a graduate English course at the University of South Alabama, alongside frontman Chris Eaton's first novel, 2003's \"The Inactivist\"."], "mentions": [{"id": 0, "start": 33, "end": 51, "ref_url": "Rock_Plaza_Central", "ref_ids": ["7994704"], "sent_idx": 0}, {"id": 1, "start": 157, "end": 163, "ref_url": "Magnet_(magazine)", "ref_ids": ["5782525"], "sent_idx": 2}, {"id": 2, "start": 76, "end": 103, "ref_url": "University_of_South_Alabama", "ref_ids": ["499499"], "sent_idx": 3}]} +{"id": "17889896", "title": "Kouroumani", "sentences": ["Kouroumani is a town in the Kouka Department of Banwa Province in western Burkina Faso.", "As of 2005 it had a population of 2,359."], "mentions": [{"id": 0, "start": 28, "end": 44, "ref_url": "Kouka_Department", "ref_ids": ["16849067"], "sent_idx": 0}, {"id": 1, "start": 48, "end": 62, "ref_url": "Banwa_Province", "ref_ids": ["2874525"], "sent_idx": 0}, {"id": 2, "start": 74, "end": 86, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17889912", "title": "Liaba", "sentences": ["Liaba is a village in the Kouka Department of Banwa Province in western Burkina Faso.", "As of 2005 it had a population of 541."], "mentions": [{"id": 0, "start": 26, "end": 42, "ref_url": "Kouka_Department", "ref_ids": ["16849067"], "sent_idx": 0}, {"id": 1, "start": 46, "end": 60, "ref_url": "Banwa_Province", "ref_ids": ["2874525"], "sent_idx": 0}, {"id": 2, "start": 72, "end": 84, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17889916", "title": "Julien Vervaecke", "sentences": ["Julien Vervaecke (3 November 1899 — May 1940) was a Belgian professional road bicycle racer.", "He won Paris–Roubaix, Paris–Brussels, 2 stages in the Tour de France and finished 3rd in the 1927 Tour de France.", "At the start of the Second World War, when an English army detachment wanted to take his house, he refused, and was shot.", "His body was found weeks later, so the exact date of his death is not known.", "Vervaecke was born in , Belgium, and died in Roncq, France.", "Julien's younger brother, Félicien Vervaecke, was also a successful cyclist."], "mentions": [{"id": 0, "start": 52, "end": 59, "ref_url": "Belgian", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 73, "end": 91, "ref_url": "Road_bicycle_racer", "ref_ids": null, "sent_idx": 0}, {"id": 2, "start": 7, "end": 20, "ref_url": "Paris–Roubaix", "ref_ids": ["291594"], "sent_idx": 1}, {"id": 3, "start": 22, "end": 36, "ref_url": "Paris–Brussels", "ref_ids": null, "sent_idx": 1}, {"id": 4, "start": 93, "end": 112, "ref_url": "1927_Tour_de_France", "ref_ids": ["6516497"], "sent_idx": 1}, {"id": 5, "start": 20, "end": 36, "ref_url": "Second_World_War", "ref_ids": null, "sent_idx": 2}, {"id": 6, "start": 45, "end": 50, "ref_url": "Roncq", "ref_ids": ["8040954"], "sent_idx": 4}, {"id": 7, "start": 26, "end": 44, "ref_url": "Félicien_Vervaecke", "ref_ids": ["11957627"], "sent_idx": 5}]} +{"id": "17889933", "title": "Mahouana", "sentences": ["Mahouana is a town in the Kouka Department of Banwa Province in western Burkina Faso.", "As of 2005 it had a population of 7,019."], "mentions": [{"id": 0, "start": 26, "end": 42, "ref_url": "Kouka_Department", "ref_ids": ["16849067"], "sent_idx": 0}, {"id": 1, "start": 46, "end": 60, "ref_url": "Banwa_Province", "ref_ids": ["2874525"], "sent_idx": 0}, {"id": 2, "start": 72, "end": 84, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17889938", "title": "SS Ausonia (1956)", "sentences": ["SS \"Ausonia, later known as the SS \"Ivory and Aegean Two while in service with her last owners, Golden Star Cruises, was a cruise liner belonging to Louis Cruise Lines operating in the Mediterranean.", "She operated mostly cruise service during her 52 years of life.", "She was the last vintage Italian ocean liner in service when she was retired from service in September 2008 and beached for dismantling in March 2010."], "mentions": [{"id": 0, "start": 96, "end": 115, "ref_url": "Golden_Star_Cruises", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 123, "end": 135, "ref_url": "Cruise_ship", "ref_ids": ["314855"], "sent_idx": 0}, {"id": 2, "start": 149, "end": 167, "ref_url": "Louis_Cruise_Lines", "ref_ids": null, "sent_idx": 0}]} +{"id": "17889954", "title": "Mollé", "sentences": ["Mollé is a town in the Kouka Department of Banwa Province in western Burkina Faso.", "As of 2005 it had a population of 3,539."], "mentions": [{"id": 0, "start": 23, "end": 39, "ref_url": "Kouka_Department", "ref_ids": ["16849067"], "sent_idx": 0}, {"id": 1, "start": 43, "end": 57, "ref_url": "Banwa_Province", "ref_ids": ["2874525"], "sent_idx": 0}, {"id": 2, "start": 69, "end": 81, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17889955", "title": "People In The Sky", "sentences": ["People In The Sky is an independent record label based in London, England, that has released music by various artists, including, most notably, Friendly Fires."], "mentions": [{"id": 0, "start": 24, "end": 48, "ref_url": "Independent_record_label", "ref_ids": ["47863329"], "sent_idx": 0}, {"id": 1, "start": 58, "end": 73, "ref_url": "London,_England", "ref_ids": null, "sent_idx": 0}, {"id": 2, "start": 144, "end": 158, "ref_url": "Friendly_Fires", "ref_ids": ["16748836"], "sent_idx": 0}]} +{"id": "17889961", "title": "Saint-Michel, Burkina Faso", "sentences": ["Saint-Michel is a village in the Kouka Department of Banwa Province in western Burkina Faso.", "As of 2005 it had a population of 667."], "mentions": [{"id": 0, "start": 33, "end": 49, "ref_url": "Kouka_Department", "ref_ids": ["16849067"], "sent_idx": 0}, {"id": 1, "start": 53, "end": 67, "ref_url": "Banwa_Province", "ref_ids": ["2874525"], "sent_idx": 0}, {"id": 2, "start": 79, "end": 91, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17889974", "title": "Don Askarian", "sentences": ["Don Askarian (; born 10 July 1949 in Stepanakert, Nagorno-Karabakh Autonomous Oblast, USSR – died 6 October 2018 in Berlin, Germany) was an international film director, producer, photographer and screenwriter of Armenian origin."], "mentions": [{"id": 0, "start": 37, "end": 48, "ref_url": "Stepanakert", "ref_ids": ["914180"], "sent_idx": 0}, {"id": 1, "start": 50, "end": 84, "ref_url": "Nagorno-Karabakh_Autonomous_Oblast", "ref_ids": ["5605323"], "sent_idx": 0}, {"id": 2, "start": 86, "end": 90, "ref_url": "Soviet_Union", "ref_ids": ["26779"], "sent_idx": 0}, {"id": 3, "start": 116, "end": 122, "ref_url": "Berlin", "ref_ids": ["3354"], "sent_idx": 0}, {"id": 4, "start": 124, "end": 131, "ref_url": "Germany", "ref_ids": ["11867"], "sent_idx": 0}, {"id": 5, "start": 154, "end": 167, "ref_url": "Film_director", "ref_ids": ["21554680"], "sent_idx": 0}, {"id": 6, "start": 169, "end": 177, "ref_url": "Film_producer", "ref_ids": ["211405"], "sent_idx": 0}, {"id": 7, "start": 179, "end": 191, "ref_url": "Photographer", "ref_ids": ["84303"], "sent_idx": 0}, {"id": 8, "start": 196, "end": 208, "ref_url": "Screenwriter", "ref_ids": ["53016"], "sent_idx": 0}, {"id": 9, "start": 212, "end": 220, "ref_url": "Armenians", "ref_ids": ["387816"], "sent_idx": 0}]} +{"id": "17889976", "title": "Mishmar Ayalon", "sentences": ["Mishmar Ayalon (, \"lit.\" Ayalon Guard) is a moshav in central Israel.", "Located between Latrun and Ramla on the old Jerusalem-Tel Aviv road, it falls under the jurisdiction of Gezer Regional Council.", "In it had a population of ."], "mentions": [{"id": 0, "start": 44, "end": 50, "ref_url": "Moshav", "ref_ids": ["1597952"], "sent_idx": 0}, {"id": 1, "start": 62, "end": 68, "ref_url": "Israel", "ref_ids": ["9282173"], "sent_idx": 0}, {"id": 2, "start": 16, "end": 22, "ref_url": "Latrun", "ref_ids": ["83446"], "sent_idx": 1}, {"id": 3, "start": 27, "end": 32, "ref_url": "Ramla", "ref_ids": ["323256"], "sent_idx": 1}, {"id": 4, "start": 44, "end": 53, "ref_url": "Jerusalem", "ref_ids": ["16043"], "sent_idx": 1}, {"id": 5, "start": 54, "end": 62, "ref_url": "Tel_Aviv", "ref_ids": ["31453"], "sent_idx": 1}, {"id": 6, "start": 104, "end": 126, "ref_url": "Gezer_Regional_Council", "ref_ids": ["17827248"], "sent_idx": 1}]} +{"id": "17889981", "title": "Sallé", "sentences": ["Sallé is a town in the Kouka Department of Banwa Province in western Burkina Faso.", "As of 2005 it had a population of 2,930."], "mentions": [{"id": 0, "start": 23, "end": 39, "ref_url": "Kouka_Department", "ref_ids": ["16849067"], "sent_idx": 0}, {"id": 1, "start": 43, "end": 57, "ref_url": "Banwa_Province", "ref_ids": ["2874525"], "sent_idx": 0}, {"id": 2, "start": 69, "end": 81, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17889983", "title": "Hermann Wernicke", "sentences": ["Hermann Wernicke (1851 – 1925) was a German entomologist.", "From 1898 Wernicke was an insect dealer in Dresden.", "He also sold collecting equipment and natural history books.", "In 1899 he wrote \"Anleitung zur Deutschen Normalpräparation der Schmetterlinge\".", "Published in Dresden, this was a manual on collecting, setting and conserving (as specimens) butterflies and moths.", "It was a very popular manual even with non German speakers.", "His private collection of Malay Peninsula butterflies collected between 1883 and 1884, his private collection of world Lepidoptera and his business were sold to Hans Kotzsch."], "mentions": [{"id": 0, "start": 43, "end": 49, "ref_url": "Germany", "ref_ids": ["11867"], "sent_idx": 5}, {"id": 1, "start": 44, "end": 56, "ref_url": "Entomologist", "ref_ids": null, "sent_idx": 0}, {"id": 2, "start": 13, "end": 20, "ref_url": "Dresden", "ref_ids": ["37410"], "sent_idx": 4}, {"id": 3, "start": 38, "end": 53, "ref_url": "Natural_history", "ref_ids": ["166380"], "sent_idx": 2}, {"id": 4, "start": 26, "end": 41, "ref_url": "Malay_Peninsula", "ref_ids": ["20403"], "sent_idx": 6}, {"id": 5, "start": 161, "end": 173, "ref_url": "Hans_Kotzsch", "ref_ids": ["17888102"], "sent_idx": 6}]} +{"id": "17889988", "title": "Sama, Burkina Faso", "sentences": ["Sama is a town in the Kouka Department of Banwa Province in western Burkina Faso.", "As of 2005 it had a population of 3,853."], "mentions": [{"id": 0, "start": 22, "end": 38, "ref_url": "Kouka_Department", "ref_ids": ["16849067"], "sent_idx": 0}, {"id": 1, "start": 42, "end": 56, "ref_url": "Banwa_Province", "ref_ids": ["2874525"], "sent_idx": 0}, {"id": 2, "start": 68, "end": 80, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17889999", "title": "Monument to the Soviet Army, Sofia", "sentences": ["The Monument to the Soviet Army (, \"Pametnik na Savetskata armia\") is a monument located in Sofia, the capital of Bulgaria.", "There is a large park around the statue and the surrounding areas.", "It is a popular place where many young people gather.", "The monument is located on Tsar Osvoboditel Boulevard, near Orlov Most and the Sofia University.", "It portrays a soldier from the Soviet Army as a freedom fighter, surrounded by a Bulgarian woman, holding her baby, and a Bulgarian man.", "There are other, secondary sculptural composition parts of the memorial complex around the main monument, like the group of soldiers which were used as a canvas by political artists.", "The monument was built in 1954 on the occasion of the 10th anniversary of the liberation by the Soviet Army, which is the Russian interpretation of the complex Military history of Bulgaria during World War II."], "mentions": [{"id": 0, "start": 79, "end": 84, "ref_url": "Sofia", "ref_ids": ["57644"], "sent_idx": 3}, {"id": 1, "start": 180, "end": 188, "ref_url": "Bulgaria", "ref_ids": ["3415"], "sent_idx": 6}, {"id": 2, "start": 27, "end": 53, "ref_url": "Tsar_Osvoboditel_Boulevard", "ref_ids": null, "sent_idx": 3}, {"id": 3, "start": 60, "end": 70, "ref_url": "Orlov_Most", "ref_ids": null, "sent_idx": 3}, {"id": 4, "start": 79, "end": 95, "ref_url": "Sofia_University", "ref_ids": ["3516963"], "sent_idx": 3}, {"id": 5, "start": 96, "end": 107, "ref_url": "Soviet_Army", "ref_ids": ["13824438"], "sent_idx": 6}, {"id": 6, "start": 96, "end": 107, "ref_url": "Soviet_Army", "ref_ids": ["13824438"], "sent_idx": 6}, {"id": 7, "start": 160, "end": 208, "ref_url": "Military_history_of_Bulgaria_during_World_War_II", "ref_ids": ["4026196"], "sent_idx": 6}]} +{"id": "17890000", "title": "Sélenkoro", "sentences": ["Sélenkoro is a town in the Kouka Department of Banwa Province in western Burkina Faso.", "As of 2005 it had a population of 1,111."], "mentions": [{"id": 0, "start": 27, "end": 43, "ref_url": "Kouka_Department", "ref_ids": ["16849067"], "sent_idx": 0}, {"id": 1, "start": 47, "end": 61, "ref_url": "Banwa_Province", "ref_ids": ["2874525"], "sent_idx": 0}, {"id": 2, "start": 73, "end": 85, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17890003", "title": "Siwi, Burkina Faso", "sentences": ["Siwi is a town in the Kouka Department of Banwa Province in western Burkina Faso.", "As of 2005 it had a population of 4,383."], "mentions": [{"id": 0, "start": 22, "end": 38, "ref_url": "Kouka_Department", "ref_ids": ["16849067"], "sent_idx": 0}, {"id": 1, "start": 42, "end": 56, "ref_url": "Banwa_Province", "ref_ids": ["2874525"], "sent_idx": 0}, {"id": 2, "start": 68, "end": 80, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17890004", "title": "Andrey Vavilov", "sentences": ["Andrey Petrovich Vavilov (; born on 10 January 1961 in Perm) is a Russian politician and businessman, senator and a former first Deputy Finance Minister of Russia, and the former Russian Secretary of State."], "mentions": [{"id": 0, "start": 55, "end": 59, "ref_url": "Perm", "ref_ids": ["389777"], "sent_idx": 0}, {"id": 1, "start": 66, "end": 72, "ref_url": "Russia", "ref_ids": ["25391"], "sent_idx": 0}, {"id": 2, "start": 102, "end": 109, "ref_url": "Federation_Council_of_Russia", "ref_ids": null, "sent_idx": 0}]} +{"id": "17890020", "title": "WIDA (AM)", "sentences": ["WIDA (1400 AM, \"Vida AM\") is a radio station licensed to serve Carolina, Puerto Rico.", "The station is owned by Primera Iglesia Bautista de Carolina, through licensee Radio Vida Incorporado.", "It airs a Spanish language Christian radio format.", "The station was assigned the WIDA call letters by the Federal Communications Commission on September 1, 1980.", "On September 20, 2017, the station's transmitter site was heavily damaged by Hurricane Maria and on October 10, the diesel generator was stolen.", "The station returned to the air on November 30, 2019, after two years off the air and the transmitter repairs have been completed."], "mentions": [{"id": 0, "start": 11, "end": 13, "ref_url": "AM_broadcasting", "ref_ids": ["113509"], "sent_idx": 0}, {"id": 1, "start": 31, "end": 44, "ref_url": "Radio_station", "ref_ids": null, "sent_idx": 0}, {"id": 2, "start": 45, "end": 53, "ref_url": "City_of_license", "ref_ids": ["584935"], "sent_idx": 0}, {"id": 3, "start": 63, "end": 84, "ref_url": "Carolina,_Puerto_Rico", "ref_ids": ["195632"], "sent_idx": 0}, {"id": 4, "start": 10, "end": 26, "ref_url": "Spanish_language", "ref_ids": ["26825"], "sent_idx": 2}, {"id": 5, "start": 27, "end": 42, "ref_url": "Christian_radio", "ref_ids": ["159945"], "sent_idx": 2}, {"id": 6, "start": 54, "end": 87, "ref_url": "Federal_Communications_Commission", "ref_ids": ["55974"], "sent_idx": 3}, {"id": 7, "start": 104, "end": 108, "ref_url": "1980_in_radio", "ref_ids": ["13594268"], "sent_idx": 3}, {"id": 8, "start": 77, "end": 92, "ref_url": "Hurricane_Maria", "ref_ids": ["55262011"], "sent_idx": 4}]} +{"id": "17890021", "title": "Abersychan and Talywain railway station", "sentences": ["Abersychan and Talywain railway station served the west of Abersychan village in the Welsh county of Monmouthshire.", "The station was the meeting point for two major pre-grouping railways as they competed for the South Wales coal traffic."], "mentions": [{"id": 0, "start": 0, "end": 10, "ref_url": "Abersychan", "ref_ids": ["2716"], "sent_idx": 0}, {"id": 1, "start": 85, "end": 90, "ref_url": "Wales", "ref_ids": ["69894"], "sent_idx": 0}, {"id": 2, "start": 101, "end": 114, "ref_url": "Monmouthshire_(historic)", "ref_ids": ["370053"], "sent_idx": 0}, {"id": 3, "start": 95, "end": 106, "ref_url": "South_Wales", "ref_ids": ["832809"], "sent_idx": 1}]} +{"id": "17890022", "title": "Sami, Burkina Faso", "sentences": ["Sami is a village in the Sami Department of Banwa Province in western Burkina Faso.", "As of 2005 it had a population of 340."], "mentions": [{"id": 0, "start": 25, "end": 40, "ref_url": "Sami_Department", "ref_ids": ["16849122"], "sent_idx": 0}, {"id": 1, "start": 44, "end": 58, "ref_url": "Banwa_Province", "ref_ids": ["2874525"], "sent_idx": 0}, {"id": 2, "start": 70, "end": 82, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17890030", "title": "Bonkorowé", "sentences": ["Bonkorowé is a village in the Sami Department of Banwa Province in western Burkina Faso.", "As of 2005 it had a population of 330."], "mentions": [{"id": 0, "start": 30, "end": 45, "ref_url": "Sami_Department", "ref_ids": ["16849122"], "sent_idx": 0}, {"id": 1, "start": 49, "end": 63, "ref_url": "Banwa_Province", "ref_ids": ["2874525"], "sent_idx": 0}, {"id": 2, "start": 75, "end": 87, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17890048", "title": "Déré", "sentences": ["Déré is a village in the Sami Department of Banwa Province in western Burkina Faso.", "As of 2005 it had a population of 608."], "mentions": [{"id": 0, "start": 25, "end": 40, "ref_url": "Sami_Department", "ref_ids": ["16849122"], "sent_idx": 0}, {"id": 1, "start": 44, "end": 58, "ref_url": "Banwa_Province", "ref_ids": ["2874525"], "sent_idx": 0}, {"id": 2, "start": 70, "end": 82, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17890054", "title": "Angie Pontani", "sentences": ["Angie Pontani is a contemporary burlesque dancer, choreographer, producer, and blogger based in Brooklyn, NY.", "She was crowned Miss Exotic World in 2008."], "mentions": [{"id": 0, "start": 32, "end": 41, "ref_url": "Burlesque", "ref_ids": ["339806"], "sent_idx": 0}, {"id": 1, "start": 96, "end": 108, "ref_url": "Brooklyn,_NY", "ref_ids": null, "sent_idx": 0}, {"id": 2, "start": 16, "end": 33, "ref_url": "Miss_Exotic_World_Pageant", "ref_ids": ["6381655"], "sent_idx": 1}]} +{"id": "17890057", "title": "Dima, Burkina Faso", "sentences": ["Dima is a village in the Sami Department of Banwa Province in western Burkina Faso.", "As of 2005 it had a population of 489."], "mentions": [{"id": 0, "start": 25, "end": 40, "ref_url": "Sami_Department", "ref_ids": ["16849122"], "sent_idx": 0}, {"id": 1, "start": 44, "end": 58, "ref_url": "Banwa_Province", "ref_ids": ["2874525"], "sent_idx": 0}, {"id": 2, "start": 70, "end": 82, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17890060", "title": "Men's Health (British magazine)", "sentences": ["The British edition of the American magazine Men's Health was launched in February 1995 with a separate editorial team, and is the best-selling monthly men's magazine in the United Kingdom, selling more than \"GQ\" and \"Esquire\" put together.", "The magazine focuses on topics such as fitness, sex, relationships, health, weight loss, nutrition, fashion, technology and style.", "The currently editor-in-chief is Morgan Rees; Toby Wiseman is the featured editor.", "The UK version has maintained the image of the original US version, in particular by promoting the body care, nutrition and all matters relating to the male universe.", "The concept of aesthetically-perfect man is an extreme with the presence on the cover of bare-chested muscular male models.", "Because of this, the magazine has often been criticized for promoting an unattainable model of man.", "To strengthen the idea of achievability, the staff of the magazine often try out the health and fitness programmes themselves and write about their experiences alongside pictorial evidence.", "In March 2006, one of the UK writers, Dan Rookwood, appeared on the cover having transformed his body shape while working at the magazine.", "The staff of German \"Men's Health\" have also appeared on their cover, and UK fitness editor Ray Klerck has appeared on the cover and within the pages of the magazine as a model."], "mentions": [{"id": 0, "start": 21, "end": 33, "ref_url": "Men's_Health_(magazine)", "ref_ids": null, "sent_idx": 8}, {"id": 1, "start": 174, "end": 188, "ref_url": "United_Kingdom", "ref_ids": ["31717"], "sent_idx": 0}, {"id": 2, "start": 209, "end": 211, "ref_url": "GQ_(magazine)", "ref_ids": null, "sent_idx": 0}, {"id": 3, "start": 218, "end": 225, "ref_url": "Esquire_(magazine)", "ref_ids": ["244002"], "sent_idx": 0}, {"id": 4, "start": 77, "end": 84, "ref_url": "Physical_fitness", "ref_ids": ["432986"], "sent_idx": 8}, {"id": 5, "start": 48, "end": 51, "ref_url": "Sex", "ref_ids": ["26805"], "sent_idx": 1}, {"id": 6, "start": 53, "end": 65, "ref_url": "Intimate_relationship", "ref_ids": ["1056700"], "sent_idx": 1}, {"id": 7, "start": 85, "end": 91, "ref_url": "Health", "ref_ids": ["80381"], "sent_idx": 6}, {"id": 8, "start": 76, "end": 87, "ref_url": "Weight_loss", "ref_ids": ["400199"], "sent_idx": 1}, {"id": 9, "start": 110, "end": 119, "ref_url": "Nutrition", "ref_ids": ["21525"], "sent_idx": 3}, {"id": 10, "start": 100, "end": 107, "ref_url": "Fashion", "ref_ids": ["11657"], "sent_idx": 1}, {"id": 11, "start": 109, "end": 119, "ref_url": "Technology", "ref_ids": ["29816"], "sent_idx": 1}, {"id": 12, "start": 33, "end": 44, "ref_url": "Morgan_Rees", "ref_ids": null, "sent_idx": 2}, {"id": 13, "start": 46, "end": 58, "ref_url": "Toby_Wiseman", "ref_ids": null, "sent_idx": 2}, {"id": 14, "start": 38, "end": 50, "ref_url": "Dan_Rookwood", "ref_ids": null, "sent_idx": 7}, {"id": 15, "start": 92, "end": 102, "ref_url": "Ray_Klerck", "ref_ids": null, "sent_idx": 8}]} +{"id": "17890072", "title": "Dimibo", "sentences": ["Dimibo is a village in the Sami Department of Banwa Province in western Burkina Faso.", "As of 2005 it had a population of 599."], "mentions": [{"id": 0, "start": 27, "end": 42, "ref_url": "Sami_Department", "ref_ids": ["16849122"], "sent_idx": 0}, {"id": 1, "start": 46, "end": 60, "ref_url": "Banwa_Province", "ref_ids": ["2874525"], "sent_idx": 0}, {"id": 2, "start": 72, "end": 84, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17890084", "title": "Priwé", "sentences": ["Priwé is a town in the Sami Department of Banwa Province in western Burkina Faso.", "As of 2005 it had a population of 2,632."], "mentions": [{"id": 0, "start": 23, "end": 38, "ref_url": "Sami_Department", "ref_ids": ["16849122"], "sent_idx": 0}, {"id": 1, "start": 42, "end": 56, "ref_url": "Banwa_Province", "ref_ids": ["2874525"], "sent_idx": 0}, {"id": 2, "start": 68, "end": 80, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17890092", "title": "Sagoéta", "sentences": ["Sagoéta is a village in the Sami Department of Banwa Province in western Burkina Faso.", "As of 2005 it had a population of 632."], "mentions": [{"id": 0, "start": 28, "end": 43, "ref_url": "Sami_Department", "ref_ids": ["16849122"], "sent_idx": 0}, {"id": 1, "start": 47, "end": 61, "ref_url": "Banwa_Province", "ref_ids": ["2874525"], "sent_idx": 0}, {"id": 2, "start": 73, "end": 85, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17890096", "title": "Seindé", "sentences": ["Seindé is a village in the Sami Department of Banwa Province in western Burkina Faso.", "In 2005, it had a population of 304."], "mentions": [{"id": 0, "start": 27, "end": 42, "ref_url": "Sami_Department", "ref_ids": ["16849122"], "sent_idx": 0}, {"id": 1, "start": 46, "end": 60, "ref_url": "Banwa_Province", "ref_ids": ["2874525"], "sent_idx": 0}, {"id": 2, "start": 72, "end": 84, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17890102", "title": "Musa nagensium", "sentences": ["Musa nagensium is a species of the genus \"Musa\", found in tropical Asia."], "mentions": [{"id": 0, "start": 0, "end": 4, "ref_url": "Musa_(genus)", "ref_ids": ["2786535"], "sent_idx": 0}]} +{"id": "17890105", "title": "Sogodjankoli", "sentences": ["Sogodjankoli is a town in the Sami Department of Banwa Province in western Burkina Faso.", "As of 2005 it had a population of 1,769."], "mentions": [{"id": 0, "start": 30, "end": 45, "ref_url": "Sami_Department", "ref_ids": ["16849122"], "sent_idx": 0}, {"id": 1, "start": 49, "end": 63, "ref_url": "Banwa_Province", "ref_ids": ["2874525"], "sent_idx": 0}, {"id": 2, "start": 75, "end": 87, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17890108", "title": "TVR Tasmin", "sentences": ["The TVR Tasmin (later known as the TVR 280i) is a sports car designed by TVR and built in the United Kingdom by that company from 1980 to 1987.", "It was the first of TVR's \"Wedge\"-series which formed the basis of its 1980's model range.", "The Tasmin/280i was available as a 2-seater coupé, as a 2+2 coupé and as a 2-seater convertible."], "mentions": [{"id": 0, "start": 20, "end": 23, "ref_url": "TVR", "ref_ids": ["188987"], "sent_idx": 1}, {"id": 1, "start": 26, "end": 40, "ref_url": "TVR_Wedges", "ref_ids": ["17888712"], "sent_idx": 1}, {"id": 2, "start": 44, "end": 49, "ref_url": "Coupé", "ref_ids": ["204658"], "sent_idx": 2}, {"id": 3, "start": 84, "end": 95, "ref_url": "Convertible", "ref_ids": ["188044"], "sent_idx": 2}]} +{"id": "17890114", "title": "Laura Ford", "sentences": ["Laura Ford (born 1961, Cardiff, Wales) is a British sculptor."], "mentions": [{"id": 0, "start": 23, "end": 30, "ref_url": "Cardiff", "ref_ids": ["5882"], "sent_idx": 0}, {"id": 1, "start": 52, "end": 60, "ref_url": "Sculptor", "ref_ids": null, "sent_idx": 0}]} +{"id": "17890138", "title": "Bendougou", "sentences": ["Bendougou is a town in the Sanaba Department of Banwa Province in western Burkina Faso.", "In 2005 it had a population of 2,864."], "mentions": [{"id": 0, "start": 27, "end": 44, "ref_url": "Sanaba_Department", "ref_ids": ["16849196"], "sent_idx": 0}, {"id": 1, "start": 48, "end": 62, "ref_url": "Banwa_Province", "ref_ids": ["2874525"], "sent_idx": 0}, {"id": 2, "start": 74, "end": 86, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17890152", "title": "Bérenkuy", "sentences": ["Bérenkuy is a village in the Sanaba Department of Banwa Province in western Burkina Faso.", "As of 2005 it had a population of 435."], "mentions": [{"id": 0, "start": 29, "end": 46, "ref_url": "Sanaba_Department", "ref_ids": ["16849196"], "sent_idx": 0}, {"id": 1, "start": 50, "end": 64, "ref_url": "Banwa_Province", "ref_ids": ["2874525"], "sent_idx": 0}, {"id": 2, "start": 76, "end": 88, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17890156", "title": "Union Minière", "sentences": ["Union Minière can refer to:"], "mentions": []} +{"id": "17890172", "title": "Dio, Burkina Faso", "sentences": ["Dio is a town in the Sanaba Department of Banwa Province in western Burkina Faso.", "As of 2005 it had a population of 2,107."], "mentions": [{"id": 0, "start": 21, "end": 38, "ref_url": "Sanaba_Department", "ref_ids": ["16849196"], "sent_idx": 0}, {"id": 1, "start": 42, "end": 56, "ref_url": "Banwa_Province", "ref_ids": ["2874525"], "sent_idx": 0}, {"id": 2, "start": 68, "end": 80, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17890177", "title": "Founa", "sentences": ["Founa is a town in the Sanaba Department of Banwa Province in western Burkina Faso.", "As of 2005 it had a population of 1,677."], "mentions": [{"id": 0, "start": 23, "end": 40, "ref_url": "Sanaba_Department", "ref_ids": ["16849196"], "sent_idx": 0}, {"id": 1, "start": 44, "end": 58, "ref_url": "Banwa_Province", "ref_ids": ["2874525"], "sent_idx": 0}, {"id": 2, "start": 70, "end": 82, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17890186", "title": "Gombio", "sentences": ["Gombio is a village in the Sanaba Department of Banwa Province in western Burkina Faso.", "As of 2005 it had a population of 292."], "mentions": [{"id": 0, "start": 27, "end": 44, "ref_url": "Sanaba_Department", "ref_ids": ["16849196"], "sent_idx": 0}, {"id": 1, "start": 48, "end": 62, "ref_url": "Banwa_Province", "ref_ids": ["2874525"], "sent_idx": 0}, {"id": 2, "start": 74, "end": 86, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17890188", "title": "Júlio Afrânio Peixoto", "sentences": ["Dr. Júlio Afrânio Peixoto (December 17, 1876 – January 12, 1947) was a Brazilian physician, writer, politician, historian, university president, and pioneering eugenicist.", "He held many public offices, including Brazilian congressional representative from Bahia in the federal \"Câmara de Deputados\" (federal congressman) (1924–1930), first the president of the Universidade Federal do Rio de Janeiro, member of the Brazil-United States Cultural Institute, president of the Academia Brasileira de Letras, and honorary doctorates from Coimbra University and the University of Lisbon, Portugal."], "mentions": [{"id": 0, "start": 160, "end": 170, "ref_url": "Eugenetics", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 188, "end": 226, "ref_url": "University_of_Brazil", "ref_ids": null, "sent_idx": 1}, {"id": 2, "start": 242, "end": 281, "ref_url": "Brazil-United_States_Cultural_Institute", "ref_ids": null, "sent_idx": 1}, {"id": 3, "start": 300, "end": 329, "ref_url": "Brazilian_Academy_of_Letters", "ref_ids": null, "sent_idx": 1}, {"id": 4, "start": 360, "end": 378, "ref_url": "Coimbra_University", "ref_ids": null, "sent_idx": 1}, {"id": 5, "start": 387, "end": 407, "ref_url": "University_of_Lisbon", "ref_ids": ["585749"], "sent_idx": 1}]} +{"id": "17890191", "title": "Vasile Pușcaș", "sentences": ["Vasile Puşcaş (; b. Surduc, July 8, 1952) is a Romanian politician, diplomat and International Relations professor."], "mentions": [{"id": 0, "start": 20, "end": 26, "ref_url": "Surduc", "ref_ids": ["17570798"], "sent_idx": 0}, {"id": 1, "start": 47, "end": 54, "ref_url": "Romania", "ref_ids": ["25445"], "sent_idx": 0}]} +{"id": "17890200", "title": "Optus D3", "sentences": ["Optus D3 is an Australian geostationary communications satellite, which is operated by Optus and provides communications services to Australasia.", "D3 was the third Optus-D satellite to be launched.", "It is a satellite, which was constructed by Orbital Sciences Corporation based on the Star-2.4 satellite bus, with the same configuration as the earlier Optus D2 satellite.", "It was launched, along with the Japanese JCSAT-12 satellite, by Arianespace.", "An Ariane 5ECA rocket was used for the launch, which occurred from ELA-3 at the Guiana Space Centre in Kourou, French Guiana.", "The launch took place at 22:09 GMT on 21 August 2009, at the start of a 60-minute launch window.", "Optus D3 separated from its carrier rocket into a geosynchronous transfer orbit, from which it raised itself to geostationary orbit using an IHI -500-N apogee motor.", "It has a design life of fifteen years, and carries thirty two J band transponders (US IEEE Ku band)."], "mentions": [{"id": 0, "start": 15, "end": 24, "ref_url": "Australia", "ref_ids": ["4689264"], "sent_idx": 0}, {"id": 1, "start": 40, "end": 64, "ref_url": "Communications_satellite", "ref_ids": ["45207"], "sent_idx": 0}, {"id": 2, "start": 0, "end": 5, "ref_url": "Optus", "ref_ids": ["358933"], "sent_idx": 6}, {"id": 3, "start": 133, "end": 144, "ref_url": "Australasia", "ref_ids": ["21492915"], "sent_idx": 0}, {"id": 4, "start": 17, "end": 24, "ref_url": "Optus_fleet_of_satellites", "ref_ids": null, "sent_idx": 1}, {"id": 5, "start": 44, "end": 72, "ref_url": "Orbital_Sciences_Corporation", "ref_ids": ["742393"], "sent_idx": 2}, {"id": 6, "start": 86, "end": 94, "ref_url": "Star-2.4", "ref_ids": null, "sent_idx": 2}, {"id": 7, "start": 95, "end": 108, "ref_url": "Satellite_bus", "ref_ids": ["17260611"], "sent_idx": 2}, {"id": 8, "start": 153, "end": 161, "ref_url": "Optus_D2", "ref_ids": null, "sent_idx": 2}, {"id": 9, "start": 32, "end": 37, "ref_url": "Japan", "ref_ids": ["15573"], "sent_idx": 3}, {"id": 10, "start": 41, "end": 49, "ref_url": "JCSAT-12", "ref_ids": null, "sent_idx": 3}, {"id": 11, "start": 64, "end": 75, "ref_url": "Arianespace", "ref_ids": ["3112"], "sent_idx": 3}, {"id": 12, "start": 3, "end": 14, "ref_url": "Ariane_5", "ref_ids": ["3111"], "sent_idx": 4}, {"id": 13, "start": 67, "end": 72, "ref_url": "ELA-3", "ref_ids": ["15927017"], "sent_idx": 4}, {"id": 14, "start": 80, "end": 99, "ref_url": "Guiana_Space_Centre", "ref_ids": ["183717"], "sent_idx": 4}, {"id": 15, "start": 103, "end": 109, "ref_url": "Kourou", "ref_ids": ["63116"], "sent_idx": 4}, {"id": 16, "start": 111, "end": 124, "ref_url": "French_Guiana", "ref_ids": ["21350970"], "sent_idx": 4}, {"id": 17, "start": 82, "end": 95, "ref_url": "Launch_window", "ref_ids": ["155759"], "sent_idx": 5}, {"id": 18, "start": 50, "end": 79, "ref_url": "Geosynchronous_transfer_orbit", "ref_ids": null, "sent_idx": 6}, {"id": 19, "start": 112, "end": 131, "ref_url": "Geostationary_orbit", "ref_ids": ["41210"], "sent_idx": 6}, {"id": 20, "start": 141, "end": 151, "ref_url": "IHI_-500-N", "ref_ids": null, "sent_idx": 6}, {"id": 21, "start": 152, "end": 164, "ref_url": "Apogee_motor", "ref_ids": null, "sent_idx": 6}, {"id": 22, "start": 69, "end": 80, "ref_url": "Transponder", "ref_ids": ["41817"], "sent_idx": 7}]} +{"id": "17890202", "title": "Rama Rama Kya Hai Dramaa?", "sentences": ["Rama Rama, Kya Hai Dramaaa is a Bollywood comedy film directed by Chandrakant Singh, and produced by Surendra Bhatia and Rajan Prakash.", "The film stars Rajpal Yadav, Neha Dhupia, Aashish Chaudhary and Amrita Arora in lead roles.", "It released on 1 February 2008, and received generally negative response upon release."], "mentions": [{"id": 0, "start": 32, "end": 41, "ref_url": "Bollywood", "ref_ids": ["4246"], "sent_idx": 0}, {"id": 1, "start": 42, "end": 53, "ref_url": "Comedy_film", "ref_ids": ["5644"], "sent_idx": 0}, {"id": 2, "start": 66, "end": 83, "ref_url": "Chandrakant_Singh", "ref_ids": ["28778484"], "sent_idx": 0}, {"id": 3, "start": 15, "end": 27, "ref_url": "Rajpal_Yadav", "ref_ids": ["41148372"], "sent_idx": 1}, {"id": 4, "start": 29, "end": 40, "ref_url": "Neha_Dhupia", "ref_ids": ["873077"], "sent_idx": 1}, {"id": 5, "start": 42, "end": 59, "ref_url": "Aashish_Chaudhary", "ref_ids": null, "sent_idx": 1}, {"id": 6, "start": 64, "end": 76, "ref_url": "Amrita_Arora", "ref_ids": ["3136185"], "sent_idx": 1}]} +{"id": "17890206", "title": "Gnoumakuy", "sentences": ["Gnoumakuy is a village in the Sanaba Department of Banwa Province in western Burkina Faso.", "As of 2005 it had a population of 440."], "mentions": [{"id": 0, "start": 30, "end": 47, "ref_url": "Sanaba_Department", "ref_ids": ["16849196"], "sent_idx": 0}, {"id": 1, "start": 51, "end": 65, "ref_url": "Banwa_Province", "ref_ids": ["2874525"], "sent_idx": 0}, {"id": 2, "start": 77, "end": 89, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17890209", "title": "Transocean Air Lines", "sentences": ["Transocean Air Lines was an Oakland, California-based airline that operated from 1946 until 1960.", "The Transocean name was also used in 1989 by another US-based air carrier, TransOcean Airways, which previously operated as Gulf Air Transport."], "mentions": [{"id": 0, "start": 28, "end": 35, "ref_url": "Oakland", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 37, "end": 47, "ref_url": "California", "ref_ids": ["5407"], "sent_idx": 0}, {"id": 2, "start": 124, "end": 142, "ref_url": "Gulf_Air_Transport", "ref_ids": ["1997116"], "sent_idx": 1}]} +{"id": "17890214", "title": "Koba, Burkina Faso", "sentences": ["Koba is a town in the Sanaba Department of Banwa Province in western Burkina Faso.", "It is on the continent of Africa.", "As of 2005 it had a population of 1,207."], "mentions": [{"id": 0, "start": 22, "end": 39, "ref_url": "Sanaba_Department", "ref_ids": ["16849196"], "sent_idx": 0}, {"id": 1, "start": 43, "end": 57, "ref_url": "Banwa_Province", "ref_ids": ["2874525"], "sent_idx": 0}, {"id": 2, "start": 69, "end": 81, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17890217", "title": "Parnassius jacobsoni", "sentences": ["Parnassius jacobsoni is a high-altitude butterfly which is found only in Tajikistan and Afghanistan.", "It is a member of the snow Apollo genus (\"Parnassius\") of the swallowtail family (Papilionidae)."], "mentions": [{"id": 0, "start": 73, "end": 83, "ref_url": "Tajikistan", "ref_ids": ["30108"], "sent_idx": 0}, {"id": 1, "start": 88, "end": 99, "ref_url": "Afghanistan", "ref_ids": ["737"], "sent_idx": 0}, {"id": 2, "start": 42, "end": 52, "ref_url": "Parnassius", "ref_ids": ["7712308"], "sent_idx": 1}, {"id": 3, "start": 82, "end": 94, "ref_url": "Papilionidae", "ref_ids": null, "sent_idx": 1}]} +{"id": "17890221", "title": "Kosso, Burkina Faso", "sentences": ["Kosso is a village in the Sanaba Department of Banwa Province in western Burkina Faso.", "As of 2005 it had a population of 435."], "mentions": [{"id": 0, "start": 26, "end": 43, "ref_url": "Sanaba_Department", "ref_ids": ["16849196"], "sent_idx": 0}, {"id": 1, "start": 47, "end": 61, "ref_url": "Banwa_Province", "ref_ids": ["2874525"], "sent_idx": 0}, {"id": 2, "start": 73, "end": 85, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17890230", "title": "Ouarakuy", "sentences": ["Ouarakuy is a village in the Sanaba Department of Banwa Province in western Burkina Faso.", "As of 2005 it had a population of 549."], "mentions": [{"id": 0, "start": 29, "end": 46, "ref_url": "Sanaba_Department", "ref_ids": ["16849196"], "sent_idx": 0}, {"id": 1, "start": 50, "end": 64, "ref_url": "Banwa_Province", "ref_ids": ["2874525"], "sent_idx": 0}, {"id": 2, "start": 76, "end": 88, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17890234", "title": "Pekuy", "sentences": ["Pekuy is a village in the Sanaba Department of Banwa Province in western Burkina Faso.", "As of 2005 it had a population of 293."], "mentions": [{"id": 0, "start": 26, "end": 43, "ref_url": "Sanaba_Department", "ref_ids": ["16849196"], "sent_idx": 0}, {"id": 1, "start": 47, "end": 61, "ref_url": "Banwa_Province", "ref_ids": ["2874525"], "sent_idx": 0}, {"id": 2, "start": 73, "end": 85, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17890238", "title": "Sorwa", "sentences": ["Sorwa is a village in the Sanaba Department of Banwa Province in western Burkina Faso.", "As of 2005 it had a population of 699."], "mentions": [{"id": 0, "start": 26, "end": 43, "ref_url": "Sanaba_Department", "ref_ids": ["16849196"], "sent_idx": 0}, {"id": 1, "start": 47, "end": 61, "ref_url": "Banwa_Province", "ref_ids": ["2874525"], "sent_idx": 0}, {"id": 2, "start": 73, "end": 85, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17890242", "title": "Soumakuy", "sentences": ["Soumakuy is a village in the Sanaba Department of Banwa Province in western Burkina Faso.", "As of 2005 it had a population of 370."], "mentions": [{"id": 0, "start": 29, "end": 46, "ref_url": "Sanaba_Department", "ref_ids": ["16849196"], "sent_idx": 0}, {"id": 1, "start": 50, "end": 64, "ref_url": "Banwa_Province", "ref_ids": ["2874525"], "sent_idx": 0}, {"id": 2, "start": 76, "end": 88, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17890244", "title": "Lipid microdomain", "sentences": ["Lipid microdomains are formed when lipids undergo lateral phase separations yielding stable coexisting lamellar domains.", "These phase separations can be induced by changes in temperature, pressure, ionic strength or by the addition of divalent cations or proteins.", "The question of whether such lipid microdomains observed in model lipid systems also exist in biomembranes had motivated considerable research efforts.", "Lipid domains are not readily isolated and examined as unique species, in contrast to the examples of lateral heterogeneity.", "One can disrupt the membrane and demonstrate a heterogeneous range of composition in the population of the resulting vesicles or fragments.", "Electron microscopy can also be used to demonstrate lateral inhomogeneities in biomembranes.", "Often, lateral heterogeneity has been inferred from biophysical techniques where the observed signal indicates multiple populations rather than the expected homogenous population.", "An example of this is the measurement of the diffusion coefficient of a fluorescent lipid analogue in soybean protoplasts.", "Membrane microheterogeneity is sometimes inferred from the behavior of enzymes, where the enzymatic activity does not appear to be correlated with the average lipid physical state exhibited by the bulk of the membrane.", "Often, the methods suggest regions with different lipid fluidity, as would be expected of coexisting gel and liquid crystalline phases within the biomembrane.", "This is also the conclusion of a series of studies where differential effects of perturbation caused by \"cis\" and \"trans\" fatty acids are interpreted in terms of preferential partitioning of the two liquid crystalline and gel-like domains."], "mentions": [{"id": 0, "start": 50, "end": 55, "ref_url": "Lipid", "ref_ids": ["17940"], "sent_idx": 9}, {"id": 1, "start": 128, "end": 133, "ref_url": "Phase_(matter)", "ref_ids": ["23637"], "sent_idx": 9}, {"id": 2, "start": 103, "end": 111, "ref_url": "Lamellar", "ref_ids": null, "sent_idx": 0}, {"id": 3, "start": 231, "end": 237, "ref_url": "Lipid_bilayer", "ref_ids": ["158011"], "sent_idx": 10}, {"id": 4, "start": 6, "end": 22, "ref_url": "Phase_separation", "ref_ids": ["60373549"], "sent_idx": 1}, {"id": 5, "start": 53, "end": 64, "ref_url": "Temperature", "ref_ids": ["20647050"], "sent_idx": 1}, {"id": 6, "start": 66, "end": 74, "ref_url": "Pressure", "ref_ids": ["23619"], "sent_idx": 1}, {"id": 7, "start": 76, "end": 90, "ref_url": "Ionic_strength", "ref_ids": ["2619023"], "sent_idx": 1}, {"id": 8, "start": 113, "end": 121, "ref_url": "Divalent", "ref_ids": null, "sent_idx": 1}, {"id": 9, "start": 122, "end": 128, "ref_url": "Cation", "ref_ids": null, "sent_idx": 1}, {"id": 10, "start": 133, "end": 140, "ref_url": "Protein", "ref_ids": ["23634"], "sent_idx": 1}, {"id": 11, "start": 146, "end": 157, "ref_url": "Biomembrane", "ref_ids": null, "sent_idx": 9}, {"id": 12, "start": 14, "end": 27, "ref_url": "Heterogeneity", "ref_ids": null, "sent_idx": 8}, {"id": 13, "start": 70, "end": 81, "ref_url": "Wikt:composition", "ref_ids": null, "sent_idx": 4}, {"id": 14, "start": 120, "end": 130, "ref_url": "Population", "ref_ids": ["22949"], "sent_idx": 6}, {"id": 15, "start": 117, "end": 125, "ref_url": "Vesicle_(biology)", "ref_ids": null, "sent_idx": 4}, {"id": 16, "start": 129, "end": 138, "ref_url": "Fragmentation_(cell_biology)", "ref_ids": ["10319792"], "sent_idx": 4}, {"id": 17, "start": 0, "end": 19, "ref_url": "Electron_microscopy", "ref_ids": null, "sent_idx": 5}, {"id": 18, "start": 45, "end": 66, "ref_url": "Diffusion_coefficient", "ref_ids": null, "sent_idx": 7}, {"id": 19, "start": 72, "end": 83, "ref_url": "Fluorescent", "ref_ids": null, "sent_idx": 7}, {"id": 20, "start": 110, "end": 120, "ref_url": "Protoplast", "ref_ids": ["964229"], "sent_idx": 7}, {"id": 21, "start": 71, "end": 77, "ref_url": "Enzyme", "ref_ids": ["9257"], "sent_idx": 8}, {"id": 22, "start": 151, "end": 158, "ref_url": "Average", "ref_ids": ["60167"], "sent_idx": 8}, {"id": 23, "start": 165, "end": 179, "ref_url": "Physical_state", "ref_ids": null, "sent_idx": 8}, {"id": 24, "start": 56, "end": 64, "ref_url": "Membrane_fluidity", "ref_ids": ["8871770"], "sent_idx": 9}, {"id": 25, "start": 222, "end": 225, "ref_url": "Gel", "ref_ids": ["41207"], "sent_idx": 10}, {"id": 26, "start": 199, "end": 213, "ref_url": "Liquid_crystal", "ref_ids": ["17973"], "sent_idx": 10}, {"id": 27, "start": 105, "end": 108, "ref_url": "Cis-trans_isomerism", "ref_ids": null, "sent_idx": 10}, {"id": 28, "start": 115, "end": 120, "ref_url": "Cis-trans_isomerism", "ref_ids": null, "sent_idx": 10}, {"id": 29, "start": 122, "end": 132, "ref_url": "Fatty_acid", "ref_ids": ["10975"], "sent_idx": 10}]} +{"id": "17890247", "title": "Timba, Burkina Faso", "sentences": ["Timba is a village in the Sanaba Department of Banwa Province in western Burkina Faso.", "As of 2005 it had a population of 241."], "mentions": [{"id": 0, "start": 26, "end": 43, "ref_url": "Sanaba_Department", "ref_ids": ["16849196"], "sent_idx": 0}, {"id": 1, "start": 47, "end": 61, "ref_url": "Banwa_Province", "ref_ids": ["2874525"], "sent_idx": 0}, {"id": 2, "start": 73, "end": 85, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17890255", "title": "Addis count", "sentences": ["The Addis count is a urine test measuring urinary casts over time.", "It is named for Thomas Addis."], "mentions": [{"id": 0, "start": 21, "end": 31, "ref_url": "Urine_test", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 42, "end": 55, "ref_url": "Urinary_casts", "ref_ids": null, "sent_idx": 0}, {"id": 2, "start": 16, "end": 28, "ref_url": "Thomas_Addis", "ref_ids": ["3562653"], "sent_idx": 1}]} +{"id": "17890259", "title": "The Sleeping Tiger", "sentences": ["The Sleeping Tiger is a 1954 film noir starring Alexis Smith and Dirk Bogarde.", "It was Joseph Losey's first British feature, which he directed under the pseudonym of Victor Hanbury due to being blacklisted in the McCarthy Era."], "mentions": [{"id": 0, "start": 29, "end": 38, "ref_url": "Film_noir", "ref_ids": ["10802"], "sent_idx": 0}, {"id": 1, "start": 48, "end": 60, "ref_url": "Alexis_Smith", "ref_ids": ["981787"], "sent_idx": 0}, {"id": 2, "start": 65, "end": 77, "ref_url": "Dirk_Bogarde", "ref_ids": ["159603"], "sent_idx": 0}, {"id": 3, "start": 7, "end": 21, "ref_url": "Joseph_Losey", "ref_ids": ["145853"], "sent_idx": 1}, {"id": 4, "start": 28, "end": 35, "ref_url": "United_Kingdom", "ref_ids": ["31717"], "sent_idx": 1}, {"id": 5, "start": 73, "end": 82, "ref_url": "Pseudonym", "ref_ids": ["40594"], "sent_idx": 1}, {"id": 6, "start": 114, "end": 125, "ref_url": "Hollywood_blacklist", "ref_ids": ["21556126"], "sent_idx": 1}, {"id": 7, "start": 133, "end": 145, "ref_url": "McCarthyism", "ref_ids": ["43805"], "sent_idx": 1}]} +{"id": "17890263", "title": "Mister Wong", "sentences": ["Mister Wong was one of the largest European free social-bookmarking web services."], "mentions": [{"id": 0, "start": 35, "end": 41, "ref_url": "Europe", "ref_ids": ["9239"], "sent_idx": 0}, {"id": 1, "start": 49, "end": 67, "ref_url": "Social_bookmarking", "ref_ids": ["1257581"], "sent_idx": 0}, {"id": 2, "start": 68, "end": 79, "ref_url": "Web_service", "ref_ids": ["93483"], "sent_idx": 0}]} +{"id": "17890265", "title": "Yenkuy", "sentences": ["Yenkuy is a village in the Sanaba Department of Banwa Province in western Burkina Faso.", "As of 2005, it had a population of 278."], "mentions": [{"id": 0, "start": 27, "end": 44, "ref_url": "Sanaba_Department", "ref_ids": ["16849196"], "sent_idx": 0}, {"id": 1, "start": 48, "end": 62, "ref_url": "Banwa_Province", "ref_ids": ["2874525"], "sent_idx": 0}, {"id": 2, "start": 74, "end": 86, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17890274", "title": "Kounla", "sentences": ["Kounla is a village in the Sanaba Department of Banwa Province in western Burkina Faso.", "As of 2005 it had a population of 452."], "mentions": [{"id": 0, "start": 27, "end": 44, "ref_url": "Sanaba_Department", "ref_ids": ["16849196"], "sent_idx": 0}, {"id": 1, "start": 48, "end": 62, "ref_url": "Banwa_Province", "ref_ids": ["2874525"], "sent_idx": 0}, {"id": 2, "start": 74, "end": 86, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17890278", "title": "1st Kansas Colored Volunteer Infantry Regiment", "sentences": ["The 1st Regiment Kansas Volunteer Infantry (Colored) was an infantry regiment that served in the Union Army during the American Civil War.", "It was the first black regiment to be organized in a northern state and the first black unit to see combat during the Civil War.", "At the Battle of Poison Spring, the regiment lost nearly half its number, and suffered the highest losses of any Kansas regiment during the war."], "mentions": [{"id": 0, "start": 60, "end": 68, "ref_url": "Infantry", "ref_ids": ["15068"], "sent_idx": 0}, {"id": 1, "start": 36, "end": 44, "ref_url": "Regiment", "ref_ids": ["166653"], "sent_idx": 2}, {"id": 2, "start": 97, "end": 107, "ref_url": "Union_Army", "ref_ids": ["360126"], "sent_idx": 0}, {"id": 3, "start": 119, "end": 137, "ref_url": "American_Civil_War", "ref_ids": ["863"], "sent_idx": 0}, {"id": 4, "start": 7, "end": 30, "ref_url": "Battle_of_Poison_Spring", "ref_ids": ["2326906"], "sent_idx": 2}]} +{"id": "17890282", "title": "Kossoba", "sentences": ["Kossoba is a town in the Sanaba Department of Banwa Province in western Burkina Faso.", "As of 2005 it had a population of 1,975."], "mentions": [{"id": 0, "start": 25, "end": 42, "ref_url": "Sanaba_Department", "ref_ids": ["16849196"], "sent_idx": 0}, {"id": 1, "start": 46, "end": 60, "ref_url": "Banwa_Province", "ref_ids": ["2874525"], "sent_idx": 0}, {"id": 2, "start": 72, "end": 84, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17890285", "title": "2nd Kansas Colored Volunteer Infantry Regiment", "sentences": ["The 2nd Kansas Volunteer Infantry Regiment (Colored) was an infantry regiment that served in the Union Army during the American Civil War."], "mentions": [{"id": 0, "start": 60, "end": 68, "ref_url": "Infantry", "ref_ids": ["15068"], "sent_idx": 0}, {"id": 1, "start": 69, "end": 77, "ref_url": "Regiment", "ref_ids": ["166653"], "sent_idx": 0}, {"id": 2, "start": 97, "end": 107, "ref_url": "Union_Army", "ref_ids": ["360126"], "sent_idx": 0}, {"id": 3, "start": 119, "end": 137, "ref_url": "American_Civil_War", "ref_ids": ["863"], "sent_idx": 0}]} +{"id": "17890286", "title": "Moussakuy", "sentences": ["Moussakuy is a town in the Sanaba Department of Banwa Province in western Burkina Faso.", "As of 2005 it had a population of 1,139."], "mentions": [{"id": 0, "start": 27, "end": 44, "ref_url": "Sanaba_Department", "ref_ids": ["16849196"], "sent_idx": 0}, {"id": 1, "start": 48, "end": 62, "ref_url": "Banwa_Province", "ref_ids": ["2874525"], "sent_idx": 0}, {"id": 2, "start": 74, "end": 86, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17890290", "title": "Nemena", "sentences": ["Nemena is a town in the Sanaba Department of Banwa Province in western Burkina Faso.", "As of 2005 it had a population of 1,017."], "mentions": [{"id": 0, "start": 24, "end": 41, "ref_url": "Sanaba_Department", "ref_ids": ["16849196"], "sent_idx": 0}, {"id": 1, "start": 45, "end": 59, "ref_url": "Banwa_Province", "ref_ids": ["2874525"], "sent_idx": 0}, {"id": 2, "start": 71, "end": 83, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17890295", "title": "Ziga", "sentences": ["Ziga is a town in the Sanaba Department of Banwa Province in western Burkina Faso.", "As of 2005 it had a population of 6,653."], "mentions": [{"id": 0, "start": 22, "end": 39, "ref_url": "Sanaba_Department", "ref_ids": ["16849196"], "sent_idx": 0}, {"id": 1, "start": 43, "end": 57, "ref_url": "Banwa_Province", "ref_ids": ["2874525"], "sent_idx": 0}, {"id": 2, "start": 69, "end": 81, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17890304", "title": "Trilogija 2: Devičanska ostrva", "sentences": ["Trilogija 2: Devičanska ostrva (Serbian Cyrillic: Трилогија 2: Девичанска острва, trans.", "\"Trilogy 2: Virgin Islands\") is the second EP from Serbian and former Yugoslav rock band Riblja Čorba.", "It is the second part of the Riblja Čorba trilogy released during 2005 and 2006.", "The band considers EPs \"\", \"Trilogija 2: Devičanska ostrva\" and \"\" three parts of the studio album titled \"Trilogija\", although all three were released separately.", "All the songs from three EPs were released on the compilation album \"Trilogija\"."], "mentions": [{"id": 0, "start": 32, "end": 48, "ref_url": "Serbian_Cyrillic", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 12, "end": 26, "ref_url": "Virgin_Islands", "ref_ids": ["32462"], "sent_idx": 1}, {"id": 2, "start": 51, "end": 57, "ref_url": "Serbia", "ref_ids": ["29265"], "sent_idx": 1}, {"id": 3, "start": 63, "end": 78, "ref_url": "Socialist_Federal_Republic_of_Yugoslavia", "ref_ids": ["297809"], "sent_idx": 1}, {"id": 4, "start": 79, "end": 88, "ref_url": "Rock_band", "ref_ids": null, "sent_idx": 1}, {"id": 5, "start": 29, "end": 41, "ref_url": "Riblja_Čorba", "ref_ids": ["13269687"], "sent_idx": 2}, {"id": 6, "start": 69, "end": 78, "ref_url": "Trilogija", "ref_ids": ["20255766"], "sent_idx": 4}]} +{"id": "17890311", "title": "Bama, Burkina Faso", "sentences": ["Bama is a village in the Solenzo Department of Banwa Province in western Burkina Faso.", "As of 2005 it had a population of 1,080.", "This village should not be confused with the much larger town of Bama, the capital of Bama Department in Houet Province, Hauts-Bassins Region."], "mentions": [{"id": 0, "start": 25, "end": 43, "ref_url": "Solenzo_Department", "ref_ids": ["16849264"], "sent_idx": 0}, {"id": 1, "start": 47, "end": 61, "ref_url": "Banwa_Province", "ref_ids": ["2874525"], "sent_idx": 0}, {"id": 2, "start": 73, "end": 85, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}, {"id": 3, "start": 86, "end": 101, "ref_url": "Bama_Department", "ref_ids": ["16982697"], "sent_idx": 2}, {"id": 4, "start": 105, "end": 119, "ref_url": "Houet_Province", "ref_ids": ["2874539"], "sent_idx": 2}, {"id": 5, "start": 121, "end": 141, "ref_url": "Hauts-Bassins_Region", "ref_ids": ["2904227"], "sent_idx": 2}]} +{"id": "17890351", "title": "Svartediket", "sentences": ["Svartediket is a lake in the city of Bergen in Hordaland county, Norway.", "The lake lies immediately north of the mountain Ulriken, northeast of the Store Lungegårdsvannet bay.", "Historically, this lake was called \"Ålrekstadsvannet\", but in the late 19th century, a dam was built on the south end of the lake to create a reservoir for the city's drinking water.", "Since then it was called Svartediket, after the water plant built there.", "The lake is the main source of drinking water for the central parts of the city of Bergen.", "There is a pumping station and treatment facility build adjacent to the lake inside the mountain Ulriken.", "Drinking water is collected at a depth in Svartediket.", "Inside the water treatment plant, the water is filtered and irradiated with UV light to kill harmful microorganisms.", "After treatment, the clean drinking water is stored in a large water pool inside the mountain.", "This is the water that is pumped down to the city center for drinking water.", "In 2004, Bergen was hit by a \"Giardia lamblia\" epidemic which had its source in the lake Svartediket.", "The water treatment facility was upgraded in 2007."], "mentions": [{"id": 0, "start": 9, "end": 15, "ref_url": "Bergen", "ref_ids": ["56494"], "sent_idx": 10}, {"id": 1, "start": 47, "end": 56, "ref_url": "Hordaland", "ref_ids": ["171185"], "sent_idx": 0}, {"id": 2, "start": 65, "end": 71, "ref_url": "Norway", "ref_ids": ["21241"], "sent_idx": 0}, {"id": 3, "start": 97, "end": 104, "ref_url": "Ulriken", "ref_ids": ["1514799"], "sent_idx": 5}, {"id": 4, "start": 74, "end": 96, "ref_url": "Store_Lungegårdsvannet", "ref_ids": ["14287926"], "sent_idx": 1}, {"id": 5, "start": 61, "end": 75, "ref_url": "Drinking_water", "ref_ids": ["198725"], "sent_idx": 9}, {"id": 6, "start": 97, "end": 104, "ref_url": "Ulriken", "ref_ids": ["1514799"], "sent_idx": 5}, {"id": 7, "start": 76, "end": 84, "ref_url": "UV_light", "ref_ids": null, "sent_idx": 7}, {"id": 8, "start": 30, "end": 45, "ref_url": "Giardia_lamblia", "ref_ids": null, "sent_idx": 10}, {"id": 9, "start": 47, "end": 55, "ref_url": "Epidemic", "ref_ids": ["66981"], "sent_idx": 10}]} +{"id": "17890353", "title": "Rita El Khayat", "sentences": ["Ghita El Khayat, (altern. translit. Rita), (born 1944, Rabat, Morocco) is Moroccan psychiatrist, anthro-psychoanalyst, writer, and anthropologist.", "Thereafter she accepted a residency as a psychiatry student in Casablanca.", "However, she moved to Paris, where she graduated in three fields of medicine: psychiatry, medicine of work & ergonomics, and spatial medicine.", "While in Paris she studied ethnopsychiatry under George Devereux and also studied Classical Arabic at École spéciale des Langues orientales and began to write.", "In 1999 she founded the Association Ainï Bennaï to broaden the culture in Morocco and Maghreb.", "In 2000, the Association became a publishing house.", "She is known for her strong involvement in favour of women's emancipation and social rights.", "She is author of more than 350 articles and 30 books.", "She is professor of anthropology of the knowledge at the Faculty of Letters and Philosophy of the D'Annunzio University of Chieti–Pescara in Italy."], "mentions": [{"id": 0, "start": 55, "end": 69, "ref_url": "Rabat,_Morocco", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 49, "end": 64, "ref_url": "George_Devereux", "ref_ids": ["17454379"], "sent_idx": 3}, {"id": 2, "start": 82, "end": 98, "ref_url": "Classical_Arabic", "ref_ids": ["547964"], "sent_idx": 3}, {"id": 3, "start": 102, "end": 139, "ref_url": "École_spéciale_des_Langues_orientales", "ref_ids": null, "sent_idx": 3}, {"id": 4, "start": 78, "end": 91, "ref_url": "Social_rights", "ref_ids": null, "sent_idx": 6}, {"id": 5, "start": 98, "end": 137, "ref_url": "D'Annunzio_University_of_Chieti–Pescara", "ref_ids": ["4946471"], "sent_idx": 8}]} +{"id": "17890354", "title": "Ban, Burkina Faso", "sentences": ["Ban is a town in the Solenzo Department of Banwa Province in western Burkina Faso.", "As of 2005 it had a population of 8,776."], "mentions": [{"id": 0, "start": 21, "end": 39, "ref_url": "Solenzo_Department", "ref_ids": ["16849264"], "sent_idx": 0}, {"id": 1, "start": 43, "end": 57, "ref_url": "Banwa_Province", "ref_ids": ["2874525"], "sent_idx": 0}, {"id": 2, "start": 69, "end": 81, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17890356", "title": "Rushan Range", "sentences": ["Rushan Range is a mountain range in south-western Pamir in Tajikistan, trending in the south-westerly direction from Sarez Lake toward Khorog, between Gunt River to the south and Bartang River to the north.", "About 120 km long, it reaches its highest elevation of 6,083 m at Patkhor Peak."], "mentions": [{"id": 0, "start": 18, "end": 32, "ref_url": "Mountain_range", "ref_ids": ["19338"], "sent_idx": 0}, {"id": 1, "start": 50, "end": 55, "ref_url": "Pamir_Mountains", "ref_ids": ["498462"], "sent_idx": 0}, {"id": 2, "start": 59, "end": 69, "ref_url": "Tajikistan", "ref_ids": ["30108"], "sent_idx": 0}, {"id": 3, "start": 117, "end": 127, "ref_url": "Sarez_Lake", "ref_ids": ["2347051"], "sent_idx": 0}, {"id": 4, "start": 135, "end": 141, "ref_url": "Khorog", "ref_ids": null, "sent_idx": 0}, {"id": 5, "start": 151, "end": 161, "ref_url": "Gunt_River", "ref_ids": ["2552382"], "sent_idx": 0}, {"id": 6, "start": 179, "end": 192, "ref_url": "Bartang_River", "ref_ids": ["2587819"], "sent_idx": 0}, {"id": 7, "start": 66, "end": 78, "ref_url": "Patkhor_Peak", "ref_ids": ["17891332"], "sent_idx": 1}]} +{"id": "17890361", "title": "James Sherard", "sentences": ["James Sherard (1 November 1666 – 12 February 1738) was an English apothecary, botanist, and amateur musician."], "mentions": [{"id": 0, "start": 66, "end": 76, "ref_url": "Apothecary", "ref_ids": ["1274598"], "sent_idx": 0}, {"id": 1, "start": 78, "end": 86, "ref_url": "Botany", "ref_ids": ["4183"], "sent_idx": 0}]} +{"id": "17890365", "title": "Bayé", "sentences": ["Bayé is a town in the Solenzo Department of Banwa Province in western Burkina Faso.", "As of 2005 it had a population of 5,478."], "mentions": [{"id": 0, "start": 22, "end": 40, "ref_url": "Solenzo_Department", "ref_ids": ["16849264"], "sent_idx": 0}, {"id": 1, "start": 44, "end": 58, "ref_url": "Banwa_Province", "ref_ids": ["2874525"], "sent_idx": 0}, {"id": 2, "start": 70, "end": 82, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17890375", "title": "Bèna", "sentences": ["Bèna is a town in the Solenzo Department of Banwa Province in western Burkina Faso.", "it had a population of 11,963."], "mentions": [{"id": 0, "start": 22, "end": 40, "ref_url": "Solenzo_Department", "ref_ids": ["16849264"], "sent_idx": 0}, {"id": 1, "start": 44, "end": 58, "ref_url": "Banwa_Province", "ref_ids": ["2874525"], "sent_idx": 0}, {"id": 2, "start": 70, "end": 82, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17890380", "title": "Bialé", "sentences": ["Bialé is a town in the Solenzo Department of Banwa Province in western Burkina Faso.", "As of 2005 it had a population of 1,892."], "mentions": [{"id": 0, "start": 23, "end": 41, "ref_url": "Solenzo_Department", "ref_ids": ["16849264"], "sent_idx": 0}, {"id": 1, "start": 45, "end": 59, "ref_url": "Banwa_Province", "ref_ids": ["2874525"], "sent_idx": 0}, {"id": 2, "start": 71, "end": 83, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17890389", "title": "Bonza", "sentences": ["Bonza is a town in the Solenzo Department of Banwa Province in western Burkina Faso.", "As of 2005 it had a population of 5,209."], "mentions": [{"id": 0, "start": 23, "end": 41, "ref_url": "Solenzo_Department", "ref_ids": ["16849264"], "sent_idx": 0}, {"id": 1, "start": 45, "end": 59, "ref_url": "Banwa_Province", "ref_ids": ["2874525"], "sent_idx": 0}, {"id": 2, "start": 71, "end": 83, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17890391", "title": "Daboura", "sentences": ["Daboura is a town in the Solenzo Department of Banwa Province in western Burkina Faso.", "As of 2005 it had a population of 7,285."], "mentions": [{"id": 0, "start": 25, "end": 43, "ref_url": "Solenzo_Department", "ref_ids": ["16849264"], "sent_idx": 0}, {"id": 1, "start": 47, "end": 61, "ref_url": "Banwa_Province", "ref_ids": ["2874525"], "sent_idx": 0}, {"id": 2, "start": 73, "end": 85, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17890401", "title": "Darsalam, Solenzo", "sentences": ["Darsalam, Banwa is a town in the Solenzo Department of Banwa Province in western Burkina Faso.", "As of 2005 it had a population of 3,114."], "mentions": [{"id": 0, "start": 33, "end": 51, "ref_url": "Solenzo_Department", "ref_ids": ["16849264"], "sent_idx": 0}, {"id": 1, "start": 55, "end": 69, "ref_url": "Banwa_Province", "ref_ids": ["2874525"], "sent_idx": 0}, {"id": 2, "start": 81, "end": 93, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17890408", "title": "Dèssè", "sentences": ["Dèssè is a town in the Solenzo Department of Banwa Province in western Burkina Faso.", "As of 2005, it had a population of 1,756."], "mentions": [{"id": 0, "start": 23, "end": 41, "ref_url": "Solenzo_Department", "ref_ids": ["16849264"], "sent_idx": 0}, {"id": 1, "start": 45, "end": 59, "ref_url": "Banwa_Province", "ref_ids": ["2874525"], "sent_idx": 0}, {"id": 2, "start": 71, "end": 83, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17890415", "title": "USS Covington (ID-1409)", "sentences": ["USS \"Covington\" (ID-1409) was a transport for the United States Navy during World War I. Prior to the war the ship, built in 1908 in Germany, was SS \"Cincinnati\" of the Hamburg America Line.", "The transport was torpedoed by \"U-86\" on 1 July 1918 and was scuttled the next day with six men killed."], "mentions": [{"id": 0, "start": 4, "end": 13, "ref_url": "Transport", "ref_ids": ["18580879"], "sent_idx": 1}, {"id": 1, "start": 50, "end": 68, "ref_url": "United_States_Navy", "ref_ids": ["20518076"], "sent_idx": 0}, {"id": 2, "start": 76, "end": 87, "ref_url": "World_War_I", "ref_ids": ["4764461"], "sent_idx": 0}, {"id": 3, "start": 169, "end": 189, "ref_url": "Hamburg_America_Line", "ref_ids": ["1906375"], "sent_idx": 0}, {"id": 4, "start": 31, "end": 37, "ref_url": "SM_U-86", "ref_ids": ["17322026"], "sent_idx": 1}]} +{"id": "17890416", "title": "Dinkiéna", "sentences": ["Dinkiéna is a town in the Solenzo Department of Banwa Province in western Burkina Faso.", "As of 2005 it had a population of 4,593."], "mentions": [{"id": 0, "start": 26, "end": 44, "ref_url": "Solenzo_Department", "ref_ids": ["16849264"], "sent_idx": 0}, {"id": 1, "start": 48, "end": 62, "ref_url": "Banwa_Province", "ref_ids": ["2874525"], "sent_idx": 0}, {"id": 2, "start": 74, "end": 86, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17890418", "title": "Protein–lipid interaction", "sentences": ["Protein–lipid interaction is the influence of membrane proteins on the lipid physical state or vice versa.", "The questions which are relevant to understanding of the structure and function of the membrane are: 1) Do intrinsic membrane proteins bind tightly to lipids (see annular lipid shell), and what is the nature of the layer of lipids adjacent to the protein?", "2) Do membrane proteins have long-range effects on the order or dynamics of membrane lipids?", "3) How do the lipids influence the structure and/or function of membrane proteins?", "4) How do peripheral membrane proteins which bind to the layer surface interact with lipids and influence their behavior?"], "mentions": [{"id": 0, "start": 21, "end": 37, "ref_url": "Membrane_protein", "ref_ids": ["168369"], "sent_idx": 4}, {"id": 1, "start": 85, "end": 90, "ref_url": "Lipid", "ref_ids": ["17940"], "sent_idx": 4}, {"id": 2, "start": 77, "end": 91, "ref_url": "Physical_state", "ref_ids": null, "sent_idx": 0}, {"id": 3, "start": 35, "end": 44, "ref_url": "Structure", "ref_ids": ["93545"], "sent_idx": 3}, {"id": 4, "start": 52, "end": 60, "ref_url": "Function_(biology)", "ref_ids": ["10013669"], "sent_idx": 3}, {"id": 5, "start": 163, "end": 182, "ref_url": "Annular_lipid_shell", "ref_ids": ["43114391"], "sent_idx": 1}, {"id": 6, "start": 10, "end": 37, "ref_url": "Peripheral_membrane_protein", "ref_ids": ["168372"], "sent_idx": 4}]} +{"id": "17890419", "title": "Denkoro", "sentences": ["Denkoro is a town in the Solenzo Department of Banwa Province in western Burkina Faso.", "As of 2005 it had a population of 2,490."], "mentions": [{"id": 0, "start": 25, "end": 43, "ref_url": "Solenzo_Department", "ref_ids": ["16849264"], "sent_idx": 0}, {"id": 1, "start": 47, "end": 61, "ref_url": "Banwa_Province", "ref_ids": ["2874525"], "sent_idx": 0}, {"id": 2, "start": 73, "end": 85, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17890421", "title": "Dira", "sentences": ["Dira is a town in the Solenzo Department of Banwa Province in western Burkina Faso.", "As of 2005 it had a population of 3,209."], "mentions": [{"id": 0, "start": 22, "end": 40, "ref_url": "Solenzo_Department", "ref_ids": ["16849264"], "sent_idx": 0}, {"id": 1, "start": 44, "end": 58, "ref_url": "Banwa_Province", "ref_ids": ["2874525"], "sent_idx": 0}, {"id": 2, "start": 70, "end": 82, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17890430", "title": "Cloud Chief", "sentences": ["Cloud Chief may refer to:"], "mentions": []} +{"id": "17890431", "title": "Dissankuy", "sentences": ["Dissankuy is a town in the Solenzo Department of Banwa Province in western Burkina Faso.", "As of 2005 it had a population of 2,794."], "mentions": [{"id": 0, "start": 27, "end": 45, "ref_url": "Solenzo_Department", "ref_ids": ["16849264"], "sent_idx": 0}, {"id": 1, "start": 49, "end": 63, "ref_url": "Banwa_Province", "ref_ids": ["2874525"], "sent_idx": 0}, {"id": 2, "start": 75, "end": 87, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17890436", "title": "Gnassoumadougou", "sentences": ["Gnassoumadougou is a town in the Solenzo Department of Banwa Province in western Burkina Faso.", "As of 2005 it had a population of 3,329."], "mentions": [{"id": 0, "start": 33, "end": 51, "ref_url": "Solenzo_Department", "ref_ids": ["16849264"], "sent_idx": 0}, {"id": 1, "start": 55, "end": 69, "ref_url": "Banwa_Province", "ref_ids": ["2874525"], "sent_idx": 0}, {"id": 2, "start": 81, "end": 93, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17890437", "title": "Manuel Doblado", "sentences": ["Manuel Doblado Partida (12 June 1818 – 19 June 1865) was a prominent Mexican liberal politician and lawyer who served as congressman, Governor of Guanajuato, Minister of Foreign Affairs (1861) in the cabinet of President Juárez and fought in the War of Reform.", "He was born in San Pedro, Piedra Gorda, Guanajuato, retired to the U.S. for health reasons, and died in New York City."], "mentions": [{"id": 0, "start": 69, "end": 76, "ref_url": "Mexico", "ref_ids": ["3966054"], "sent_idx": 0}, {"id": 1, "start": 77, "end": 84, "ref_url": "Liberalism_in_Mexico", "ref_ids": ["1181148"], "sent_idx": 0}, {"id": 2, "start": 134, "end": 156, "ref_url": "Governor_of_Guanajuato", "ref_ids": ["1788066"], "sent_idx": 0}, {"id": 3, "start": 221, "end": 227, "ref_url": "Benito_Juárez", "ref_ids": ["103360"], "sent_idx": 0}, {"id": 4, "start": 246, "end": 259, "ref_url": "War_of_Reform", "ref_ids": null, "sent_idx": 0}, {"id": 5, "start": 40, "end": 50, "ref_url": "Guanajuato", "ref_ids": ["412827"], "sent_idx": 1}, {"id": 6, "start": 104, "end": 117, "ref_url": "New_York_City", "ref_ids": ["645042"], "sent_idx": 1}]} +{"id": "17890447", "title": "Kiè", "sentences": ["Kiè is a town in the Solenzo Department of Banwa Province in western Burkina Faso.", "As of 2005 it had a population of 6,212."], "mentions": [{"id": 0, "start": 21, "end": 39, "ref_url": "Solenzo_Department", "ref_ids": ["16849264"], "sent_idx": 0}, {"id": 1, "start": 43, "end": 57, "ref_url": "Banwa_Province", "ref_ids": ["2874525"], "sent_idx": 0}, {"id": 2, "start": 69, "end": 81, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17890449", "title": "Riihimäki Cocks", "sentences": ["Riihimäki Cocks () is a Finnish handball club from Riihimäki."], "mentions": [{"id": 0, "start": 24, "end": 31, "ref_url": "Finland", "ref_ids": ["10577"], "sent_idx": 0}, {"id": 1, "start": 32, "end": 40, "ref_url": "Team_handball", "ref_ids": null, "sent_idx": 0}, {"id": 2, "start": 0, "end": 9, "ref_url": "Riihimäki", "ref_ids": ["784031"], "sent_idx": 0}]} +{"id": "17890455", "title": "Koakoa", "sentences": ["Koakoa is a small town in the Solenzo Department of Banwa Province in western Burkina Faso.", "As of 2005 it had a population of 881."], "mentions": [{"id": 0, "start": 30, "end": 48, "ref_url": "Solenzo_Department", "ref_ids": ["16849264"], "sent_idx": 0}, {"id": 1, "start": 52, "end": 66, "ref_url": "Banwa_Province", "ref_ids": ["2874525"], "sent_idx": 0}, {"id": 2, "start": 78, "end": 90, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17890458", "title": "Koma, Burkina Faso", "sentences": ["Koma is a town in the Solenzo Department of Banwa Province in western Burkina Faso.", "As of 2005 it had a population of 1,941."], "mentions": [{"id": 0, "start": 22, "end": 40, "ref_url": "Solenzo_Department", "ref_ids": ["16849264"], "sent_idx": 0}, {"id": 1, "start": 44, "end": 58, "ref_url": "Banwa_Province", "ref_ids": ["2874525"], "sent_idx": 0}, {"id": 2, "start": 70, "end": 82, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17890471", "title": "Hèrèdougou", "sentences": ["Hèrèdougou is a town in the Solenzo Department of Banwa Province in western Burkina Faso.", "As of 2005 it had a population of 3,205."], "mentions": [{"id": 0, "start": 28, "end": 46, "ref_url": "Solenzo_Department", "ref_ids": ["16849264"], "sent_idx": 0}, {"id": 1, "start": 50, "end": 64, "ref_url": "Banwa_Province", "ref_ids": ["2874525"], "sent_idx": 0}, {"id": 2, "start": 76, "end": 88, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17890474", "title": "WMDD", "sentences": ["WMDD (1480 AM, \"El 1480\") is a radio station licensed to serve Fajardo, Puerto Rico.", "The station is owned by Pan Caribbean Broadcasting de P.R., Inc.", "It airs a Spanish Tropical & news-talk format.", "The station was assigned the WMDD call letters by the Federal Communications Commission on November 26, 1947."], "mentions": [{"id": 0, "start": 11, "end": 13, "ref_url": "AM_broadcasting", "ref_ids": ["113509"], "sent_idx": 0}, {"id": 1, "start": 31, "end": 44, "ref_url": "Radio_station", "ref_ids": null, "sent_idx": 0}, {"id": 2, "start": 45, "end": 53, "ref_url": "City_of_license", "ref_ids": ["584935"], "sent_idx": 0}, {"id": 3, "start": 63, "end": 83, "ref_url": "Fajardo,_Puerto_Rico", "ref_ids": ["149206"], "sent_idx": 0}, {"id": 4, "start": 10, "end": 26, "ref_url": "Spanish_Tropical", "ref_ids": null, "sent_idx": 2}, {"id": 5, "start": 29, "end": 38, "ref_url": "News-talk", "ref_ids": null, "sent_idx": 2}, {"id": 6, "start": 54, "end": 87, "ref_url": "Federal_Communications_Commission", "ref_ids": ["55974"], "sent_idx": 3}]} +{"id": "17890475", "title": "Trilogija 3: Ambasadori loše volje", "sentences": ["Trilogija 3: Ambasadori loše volje (Serbian Cyrillic: Трилогија 3: Амбасадори лоше воље, trans.", "\"Trilogy 3: Badwill Ambassadors\") is the third EP from Serbian and former Yugoslav rock band Riblja Čorba.", "It is the third and the final part of the Riblja Čorba trilogy released during 2005 and 2006.", "The band considers EPs \"\", \"\" and \"Trilogija 3: Ambasadori loše volje\" three parts of the studio album titled \"Trilogija\", although all three were released separately.", "All the songs from three EPs were released on the compilation album \"Trilogija\".", "Song \"Prezir\" is used in film \"Uslovna sloboda\"."], "mentions": [{"id": 0, "start": 36, "end": 52, "ref_url": "Serbian_Cyrillic", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 55, "end": 61, "ref_url": "Serbia", "ref_ids": ["29265"], "sent_idx": 1}, {"id": 2, "start": 67, "end": 82, "ref_url": "Socialist_Federal_Republic_of_Yugoslavia", "ref_ids": ["297809"], "sent_idx": 1}, {"id": 3, "start": 83, "end": 92, "ref_url": "Rock_band", "ref_ids": null, "sent_idx": 1}, {"id": 4, "start": 42, "end": 54, "ref_url": "Riblja_Čorba", "ref_ids": ["13269687"], "sent_idx": 2}, {"id": 5, "start": 69, "end": 78, "ref_url": "Trilogija", "ref_ids": ["20255766"], "sent_idx": 4}]} +{"id": "17890483", "title": "Lanfiéra", "sentences": ["Lanfiéra is a village in the Solenzo Department of Banwa Province in western Burkina Faso.", "As of 2005 it had a population of 617."], "mentions": [{"id": 0, "start": 29, "end": 47, "ref_url": "Solenzo_Department", "ref_ids": ["16849264"], "sent_idx": 0}, {"id": 1, "start": 51, "end": 65, "ref_url": "Banwa_Province", "ref_ids": ["2874525"], "sent_idx": 0}, {"id": 2, "start": 77, "end": 89, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17890490", "title": "Montionkuy", "sentences": ["Montionkuy is a village in the Solenzo Department of Banwa Province in western Burkina Faso.", "As of 2005, it had a population of 786."], "mentions": [{"id": 0, "start": 31, "end": 49, "ref_url": "Solenzo_Department", "ref_ids": ["16849264"], "sent_idx": 0}, {"id": 1, "start": 53, "end": 67, "ref_url": "Banwa_Province", "ref_ids": ["2874525"], "sent_idx": 0}, {"id": 2, "start": 79, "end": 91, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17890491", "title": "The Magician Out of Manchuria", "sentences": ["The Magician Out of Manchuria is a fantasy novel by Charles G. Finney.", "It was first published by itself in 1976 by Panther Books and later in a limited edition of 600 copies from Donald M. Grant, Publisher, Inc. which were signed and numbered.", "The novel was previously included in an expanded edition of the Finney's book \"The Unholy City\" in 1968."], "mentions": [{"id": 0, "start": 35, "end": 48, "ref_url": "Fantasy_novel", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 52, "end": 69, "ref_url": "Charles_G._Finney", "ref_ids": ["2095709"], "sent_idx": 0}, {"id": 2, "start": 36, "end": 40, "ref_url": "1976_in_literature", "ref_ids": ["191071"], "sent_idx": 1}, {"id": 3, "start": 44, "end": 57, "ref_url": "Panther_Books", "ref_ids": ["9546824"], "sent_idx": 1}, {"id": 4, "start": 108, "end": 140, "ref_url": "Donald_M._Grant,_Publisher,_Inc.", "ref_ids": null, "sent_idx": 1}]} +{"id": "17890494", "title": "London Seminary", "sentences": ["London Seminary (formerly London Theological Seminary) is an evangelical vocational training college located in Finchley, London, England."], "mentions": [{"id": 0, "start": 61, "end": 72, "ref_url": "Evangelicalism", "ref_ids": ["10370"], "sent_idx": 0}, {"id": 1, "start": 112, "end": 120, "ref_url": "Finchley", "ref_ids": ["94001"], "sent_idx": 0}, {"id": 2, "start": 0, "end": 6, "ref_url": "London", "ref_ids": ["17867"], "sent_idx": 0}, {"id": 3, "start": 130, "end": 137, "ref_url": "England", "ref_ids": ["9316"], "sent_idx": 0}]} +{"id": "17890511", "title": "Lèkoro", "sentences": ["Lèkoro is a town in the Solenzo Department of Banwa Province in western Burkina Faso.", "As of 2005 it had a population of 1,668."], "mentions": [{"id": 0, "start": 24, "end": 42, "ref_url": "Solenzo_Department", "ref_ids": ["16849264"], "sent_idx": 0}, {"id": 1, "start": 46, "end": 60, "ref_url": "Banwa_Province", "ref_ids": ["2874525"], "sent_idx": 0}, {"id": 2, "start": 72, "end": 84, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17890512", "title": "Stephen D. Levene", "sentences": ["Dr. Stephen Levene is a biophysicist and professor of bioengineering, molecular biology, and physics at the University of Texas at Dallas."], "mentions": [{"id": 0, "start": 24, "end": 36, "ref_url": "Biophysics", "ref_ids": ["54000"], "sent_idx": 0}, {"id": 1, "start": 54, "end": 68, "ref_url": "Biological_engineering", "ref_ids": ["6074674"], "sent_idx": 0}, {"id": 2, "start": 70, "end": 87, "ref_url": "Molecular_biology", "ref_ids": ["19200"], "sent_idx": 0}, {"id": 3, "start": 93, "end": 100, "ref_url": "Physics", "ref_ids": ["22939"], "sent_idx": 0}, {"id": 4, "start": 108, "end": 137, "ref_url": "University_of_Texas_at_Dallas", "ref_ids": ["537010"], "sent_idx": 0}]} +{"id": "17890514", "title": "Jon Sesso", "sentences": ["Jon C. Sesso( born November 15, 1953) is an American politician of the Democratic Party.", "He is currently the Minority Leader of the Montana Senate, representing District 37 and has been since 2013.", "He was previously a member of the Montana House of Representatives, representing District 76, from 2004 to 2013."], "mentions": [{"id": 0, "start": 44, "end": 52, "ref_url": "United_States", "ref_ids": ["3434750"], "sent_idx": 0}, {"id": 1, "start": 71, "end": 87, "ref_url": "Democratic_Party_(United_States)", "ref_ids": ["5043544"], "sent_idx": 0}, {"id": 2, "start": 43, "end": 57, "ref_url": "Montana_Senate", "ref_ids": ["970686"], "sent_idx": 1}, {"id": 3, "start": 34, "end": 66, "ref_url": "Montana_House_of_Representatives", "ref_ids": ["1688653"], "sent_idx": 2}]} +{"id": "17890515", "title": "Masso", "sentences": ["Masso is a town in the Solenzo Department of Banwa Province in western Burkina Faso.", "As of 2005 it had a population of 2,465."], "mentions": [{"id": 0, "start": 23, "end": 41, "ref_url": "Solenzo_Department", "ref_ids": ["16849264"], "sent_idx": 0}, {"id": 1, "start": 45, "end": 59, "ref_url": "Banwa_Province", "ref_ids": ["2874525"], "sent_idx": 0}, {"id": 2, "start": 71, "end": 83, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17890518", "title": "Mawé, Burkina Faso", "sentences": ["Mawé is a town in the Solenzo Department of Banwa Province in western Burkina Faso.", "As of 2005 it had a population of 2,329."], "mentions": [{"id": 0, "start": 22, "end": 40, "ref_url": "Solenzo_Department", "ref_ids": ["16849264"], "sent_idx": 0}, {"id": 1, "start": 44, "end": 58, "ref_url": "Banwa_Province", "ref_ids": ["2874525"], "sent_idx": 0}, {"id": 2, "start": 70, "end": 82, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17890523", "title": "Moussakongo", "sentences": ["Moussakongo is a town in the Solenzo Department of Banwa Province in western Burkina Faso.", "As of 2005 it had a population of 3,310."], "mentions": [{"id": 0, "start": 29, "end": 47, "ref_url": "Solenzo_Department", "ref_ids": ["16849264"], "sent_idx": 0}, {"id": 1, "start": 51, "end": 65, "ref_url": "Banwa_Province", "ref_ids": ["2874525"], "sent_idx": 0}, {"id": 2, "start": 77, "end": 89, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17890525", "title": "Pouy, Burkina Faso", "sentences": ["Pouy, Burkina Faso is a town in the Solenzo Department of Banwa Province in western Burkina Faso.", "As of 2005 it had a population of 1,059."], "mentions": [{"id": 0, "start": 36, "end": 54, "ref_url": "Solenzo_Department", "ref_ids": ["16849264"], "sent_idx": 0}, {"id": 1, "start": 58, "end": 72, "ref_url": "Banwa_Province", "ref_ids": ["2874525"], "sent_idx": 0}, {"id": 2, "start": 6, "end": 18, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17890528", "title": "Sanakuy", "sentences": ["Sanakuy is a town in the Solenzo Department of Banwa Province in western Burkina Faso.", "As of 2005 it had a population of 2,327."], "mentions": [{"id": 0, "start": 25, "end": 43, "ref_url": "Solenzo_Department", "ref_ids": ["16849264"], "sent_idx": 0}, {"id": 1, "start": 47, "end": 61, "ref_url": "Banwa_Province", "ref_ids": ["2874525"], "sent_idx": 0}, {"id": 2, "start": 73, "end": 85, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17890534", "title": "Siguinonghin", "sentences": ["Siguinonghin is a town in the Solenzo Department of Banwa Province in western Burkina Faso.", "As of 2005 it had a population of 3,683."], "mentions": [{"id": 0, "start": 30, "end": 48, "ref_url": "Solenzo_Department", "ref_ids": ["16849264"], "sent_idx": 0}, {"id": 1, "start": 52, "end": 66, "ref_url": "Banwa_Province", "ref_ids": ["2874525"], "sent_idx": 0}, {"id": 2, "start": 78, "end": 90, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17890537", "title": "Bob Fisher", "sentences": ["Bob Fisher may refer to:"], "mentions": []} +{"id": "17890539", "title": "Toukoro, Banwa", "sentences": ["Toukoro is a town in the Solenzo Department of Banwa Province in western Burkina Faso.", "As of 2005 it had a population of 5,277."], "mentions": [{"id": 0, "start": 25, "end": 43, "ref_url": "Solenzo_Department", "ref_ids": ["16849264"], "sent_idx": 0}, {"id": 1, "start": 47, "end": 61, "ref_url": "Banwa_Province", "ref_ids": ["2874525"], "sent_idx": 0}, {"id": 2, "start": 73, "end": 85, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17890545", "title": "Yèrèssoro", "sentences": ["Yèrèssoro is a town in the Solenzo Department of Banwa Province in western Burkina Faso.", "As of 2005 it had a population of 2,812."], "mentions": [{"id": 0, "start": 27, "end": 45, "ref_url": "Solenzo_Department", "ref_ids": ["16849264"], "sent_idx": 0}, {"id": 1, "start": 49, "end": 63, "ref_url": "Banwa_Province", "ref_ids": ["2874525"], "sent_idx": 0}, {"id": 2, "start": 75, "end": 87, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17890558", "title": "Downtown Amman", "sentences": ["Downtown Amman (, al-Balad) is the old commercial area of Amman, Jordan."], "mentions": [{"id": 0, "start": 9, "end": 14, "ref_url": "Amman", "ref_ids": ["45007"], "sent_idx": 0}, {"id": 1, "start": 65, "end": 71, "ref_url": "Jordan", "ref_ids": ["7515964"], "sent_idx": 0}]} +{"id": "17890562", "title": "Elburz Range forest steppe", "sentences": ["The Elburz Range forest steppe ecoregion is an arid, mountainous 1,000-kilometer arc south of the Caspian Sea, stretching across northern Iran from the Azerbaijan border to near the Turkmenistan border.", "It covers and encompasses the southern and eastern slopes of the Alborz Mountains as well as their summits.", "The Caspian Hyrcanian mixed forests ecoregion, with its lush green mountainsides and plains that receive moisture from the Caspian Sea, forms this ecoregion's northern border.", "The vast Central Persian desert basin ecoregion forms its southern border."], "mentions": [{"id": 0, "start": 38, "end": 47, "ref_url": "Ecoregion", "ref_ids": ["45383"], "sent_idx": 3}, {"id": 1, "start": 123, "end": 134, "ref_url": "Caspian_Sea", "ref_ids": ["19653787"], "sent_idx": 2}, {"id": 2, "start": 138, "end": 142, "ref_url": "Iran", "ref_ids": ["14653"], "sent_idx": 0}, {"id": 3, "start": 152, "end": 162, "ref_url": "Azerbaijan", "ref_ids": ["746"], "sent_idx": 0}, {"id": 4, "start": 182, "end": 194, "ref_url": "Turkmenistan", "ref_ids": ["198149"], "sent_idx": 0}, {"id": 5, "start": 65, "end": 81, "ref_url": "Alborz", "ref_ids": ["30876082"], "sent_idx": 1}, {"id": 6, "start": 4, "end": 35, "ref_url": "Caspian_Hyrcanian_mixed_forests", "ref_ids": ["18066901"], "sent_idx": 2}, {"id": 7, "start": 9, "end": 37, "ref_url": "Central_Persian_desert_basin", "ref_ids": null, "sent_idx": 3}]} +{"id": "17890571", "title": "Parnassius kiritshenkoi", "sentences": ["Parnassius kiritshenkoi is a high-altitude butterfly which is found only in the eastern Pamir Mountains.", "It is a member of the snow Apollo genus \"Parnassius\" of the swallowtail family, Papilionidae.", "For many years, \"P. kiritshenkoi\" was regarded as a subspecies of \"Parnassius staudingeri\".", "It is however sympatric with \"P. staudingeri mustagata\" Sarykolsky Mountains and with \"P. staudingeri illustris\" Zaalaisky Mountains."], "mentions": [{"id": 0, "start": 88, "end": 103, "ref_url": "Pamir_Mountains", "ref_ids": ["498462"], "sent_idx": 0}, {"id": 1, "start": 67, "end": 77, "ref_url": "Parnassius", "ref_ids": ["7712308"], "sent_idx": 2}, {"id": 2, "start": 80, "end": 92, "ref_url": "Papilionidae", "ref_ids": null, "sent_idx": 1}, {"id": 3, "start": 67, "end": 89, "ref_url": "Parnassius_staudingeri", "ref_ids": ["17922564"], "sent_idx": 2}, {"id": 4, "start": 14, "end": 23, "ref_url": "Sympatry", "ref_ids": ["1267220"], "sent_idx": 3}, {"id": 5, "start": 56, "end": 76, "ref_url": "Sarykolsky_Mountains", "ref_ids": null, "sent_idx": 3}, {"id": 6, "start": 113, "end": 132, "ref_url": "Zaalaisky_Mountains", "ref_ids": null, "sent_idx": 3}]} +{"id": "17890594", "title": "Ben, Burkina Faso", "sentences": ["Ben is a village in the Tansila Department of Banwa Province in western Burkina Faso.", "As of 2005 it had a population of 901.", "It lies near the border with Mali."], "mentions": [{"id": 0, "start": 24, "end": 42, "ref_url": "Tansila_Department", "ref_ids": ["16849335"], "sent_idx": 0}, {"id": 1, "start": 46, "end": 60, "ref_url": "Banwa_Province", "ref_ids": ["2874525"], "sent_idx": 0}, {"id": 2, "start": 72, "end": 84, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}, {"id": 3, "start": 29, "end": 33, "ref_url": "Mali", "ref_ids": ["19127"], "sent_idx": 2}]} +{"id": "17890600", "title": "Christopher Freeman", "sentences": ["Christopher Freeman (11 September 1921 – 16 August 2010) was a British economist, the founder and first director of Science Policy Research Unit at the University of Sussex, and one of the most eminent researchers in innovation studies, modern Kondratiev wave and business cycle theorists.", "Freeman contributed substantially to the revival of the neo-Schumpeterian tradition focusing on the crucial role of innovation for economic development and of scientific and technological activities for well-being."], "mentions": [{"id": 0, "start": 63, "end": 70, "ref_url": "British_people", "ref_ids": ["19097669"], "sent_idx": 0}, {"id": 1, "start": 71, "end": 80, "ref_url": "Economist", "ref_ids": ["57349"], "sent_idx": 0}, {"id": 2, "start": 116, "end": 144, "ref_url": "SPRU", "ref_ids": null, "sent_idx": 0}, {"id": 3, "start": 152, "end": 172, "ref_url": "University_of_Sussex", "ref_ids": ["32045"], "sent_idx": 0}, {"id": 4, "start": 244, "end": 259, "ref_url": "Kondratiev_wave", "ref_ids": ["17282"], "sent_idx": 0}, {"id": 5, "start": 264, "end": 278, "ref_url": "Business_cycle", "ref_ids": ["168918"], "sent_idx": 0}, {"id": 6, "start": 56, "end": 73, "ref_url": "Joseph_Schumpeter", "ref_ids": ["15827"], "sent_idx": 1}]} +{"id": "17890609", "title": "Bouan, Burkina Faso", "sentences": ["Bouan is a village in the Tansila Department of Banwa Province in western Burkina Faso.", "As of 2005 it had a population of 682."], "mentions": [{"id": 0, "start": 26, "end": 44, "ref_url": "Tansila_Department", "ref_ids": ["16849335"], "sent_idx": 0}, {"id": 1, "start": 48, "end": 62, "ref_url": "Banwa_Province", "ref_ids": ["2874525"], "sent_idx": 0}, {"id": 2, "start": 74, "end": 86, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17890623", "title": "Faso-benkadi", "sentences": ["Faso-benkadi is a village in the Tansila Department of Banwa Province in western Burkina Faso.", "As of 2005 it had a population of 917."], "mentions": [{"id": 0, "start": 33, "end": 51, "ref_url": "Tansila_Department", "ref_ids": ["16849335"], "sent_idx": 0}, {"id": 1, "start": 55, "end": 69, "ref_url": "Banwa_Province", "ref_ids": ["2874525"], "sent_idx": 0}, {"id": 2, "start": 81, "end": 93, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17890631", "title": "Hal Jacobson", "sentences": ["Hal Jacobson is a Democratic Party member of the Montana House of Representatives, representing District 82 since 2000."], "mentions": [{"id": 0, "start": 18, "end": 34, "ref_url": "Democratic_Party_(United_States)", "ref_ids": ["5043544"], "sent_idx": 0}, {"id": 1, "start": 49, "end": 81, "ref_url": "Montana_House_of_Representatives", "ref_ids": ["1688653"], "sent_idx": 0}]} +{"id": "17890634", "title": "Féléwé", "sentences": ["Féléwé is a village in the Tansila Department of Banwa Province in western Burkina Faso.", "As of 2005 it had a population of 144."], "mentions": [{"id": 0, "start": 27, "end": 45, "ref_url": "Tansila_Department", "ref_ids": ["16849335"], "sent_idx": 0}, {"id": 1, "start": 49, "end": 63, "ref_url": "Banwa_Province", "ref_ids": ["2874525"], "sent_idx": 0}, {"id": 2, "start": 75, "end": 87, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17890638", "title": "Tsai Yi-chen", "sentences": ["Tsai Yi-chen (), born 23 August 1987, stage named Wu Xiong (五熊).", "She is a Taiwanese actress who is best known for her minor performance in Taiwanese series, KO One, as Tsai Wu Xiong, and for her lead performance in \"Summer x Summer\" as Xia Ya.", "Her older sister, Tsai Han-tsen (蔡芷紜), also known as \"Han\" (寒), is also an actress."], "mentions": [{"id": 0, "start": 9, "end": 15, "ref_url": "Taiwan", "ref_ids": ["25734"], "sent_idx": 1}, {"id": 1, "start": 92, "end": 98, "ref_url": "KO_One", "ref_ids": ["10863255"], "sent_idx": 1}, {"id": 2, "start": 151, "end": 166, "ref_url": "Summer_x_Summer", "ref_ids": ["10573623"], "sent_idx": 1}, {"id": 3, "start": 18, "end": 31, "ref_url": "Tsai_Han-tsen", "ref_ids": null, "sent_idx": 2}]} +{"id": "17890640", "title": "Gui, Burkina Faso", "sentences": ["Gui is a village in the Tansila Department of Banwa Province in western Burkina Faso.", "As of 2005 it had a population of 666."], "mentions": [{"id": 0, "start": 24, "end": 42, "ref_url": "Tansila_Department", "ref_ids": ["16849335"], "sent_idx": 0}, {"id": 1, "start": 46, "end": 60, "ref_url": "Banwa_Province", "ref_ids": ["2874525"], "sent_idx": 0}, {"id": 2, "start": 72, "end": 84, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17890646", "title": "Kira, Burkina Faso", "sentences": ["Kira is a village in the Tansila Department of Banwa Province in western Burkina Faso.", "As of 2005 it had a population of 671."], "mentions": [{"id": 0, "start": 25, "end": 43, "ref_url": "Tansila_Department", "ref_ids": ["16849335"], "sent_idx": 0}, {"id": 1, "start": 47, "end": 61, "ref_url": "Banwa_Province", "ref_ids": ["2874525"], "sent_idx": 0}, {"id": 2, "start": 73, "end": 85, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17890648", "title": "Abersychan Low Level railway station", "sentences": ["Abersychan Low Level railway station served the centre of Abersychan village in the Welsh county of Monmouthshire.", "It was located near the junction of the A4043 and the B4246 at the eastern end of the village."], "mentions": [{"id": 0, "start": 0, "end": 10, "ref_url": "Abersychan", "ref_ids": ["2716"], "sent_idx": 0}, {"id": 1, "start": 84, "end": 89, "ref_url": "Wales", "ref_ids": ["69894"], "sent_idx": 0}, {"id": 2, "start": 100, "end": 113, "ref_url": "Monmouthshire_(historic)", "ref_ids": ["370053"], "sent_idx": 0}, {"id": 3, "start": 40, "end": 45, "ref_url": "A4043_road_(Great_Britain)", "ref_ids": null, "sent_idx": 1}, {"id": 4, "start": 54, "end": 59, "ref_url": "B4246_road", "ref_ids": null, "sent_idx": 1}]} +{"id": "17890649", "title": "Korani, Burkina Faso", "sentences": ["Korani is a village in the Tansila Department of Banwa Province in western Burkina Faso.", "As of 2005 it had a population of 767."], "mentions": [{"id": 0, "start": 27, "end": 45, "ref_url": "Tansila_Department", "ref_ids": ["16849335"], "sent_idx": 0}, {"id": 1, "start": 49, "end": 63, "ref_url": "Banwa_Province", "ref_ids": ["2874525"], "sent_idx": 0}, {"id": 2, "start": 75, "end": 87, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17890650", "title": "British Cartographic Society", "sentences": ["The British Cartographic Society (BCS) is an association of individuals and organisations dedicated to exploring and developing the world of maps.", "It is a registered charity.", "Membership includes mapping companies, publishers, designers, academics, researchers, map curators, individual cartographers, GIS specialists and ordinary members of the public with an interest in maps.", "The BCS is regarded as one of the world's leading cartographic societies and its main publication, The Cartographic Journal, is recognised internationally.", "Membership of the Society can be useful for making contacts and keeping up with developments.", "The BCS promotes all aspects of cartography to a wide range of potential users."], "mentions": [{"id": 0, "start": 8, "end": 26, "ref_url": "Charitable_organization", "ref_ids": ["1176679"], "sent_idx": 1}, {"id": 1, "start": 126, "end": 129, "ref_url": "GIS", "ref_ids": null, "sent_idx": 2}, {"id": 2, "start": 20, "end": 23, "ref_url": "Map", "ref_ids": ["19877"], "sent_idx": 2}, {"id": 3, "start": 32, "end": 43, "ref_url": "Cartography", "ref_ids": ["7294"], "sent_idx": 5}]} +{"id": "17890653", "title": "Moara, Burkina Faso", "sentences": ["Moara is a village in the Tansila Department of Banwa Province in western Burkina Faso.", "As of 2005 it had a population of 875."], "mentions": [{"id": 0, "start": 26, "end": 44, "ref_url": "Tansila_Department", "ref_ids": ["16849335"], "sent_idx": 0}, {"id": 1, "start": 48, "end": 62, "ref_url": "Banwa_Province", "ref_ids": ["2874525"], "sent_idx": 0}, {"id": 2, "start": 74, "end": 86, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17890660", "title": "Persepolis VC", "sentences": ["Persepolis Volleyball Club () is an Iranian Volleyball club based in Tehran, Iran."], "mentions": [{"id": 0, "start": 36, "end": 40, "ref_url": "Iran", "ref_ids": ["14653"], "sent_idx": 0}, {"id": 1, "start": 11, "end": 21, "ref_url": "Volleyball", "ref_ids": ["32558"], "sent_idx": 0}, {"id": 2, "start": 69, "end": 75, "ref_url": "Tehran", "ref_ids": ["57654"], "sent_idx": 0}, {"id": 3, "start": 36, "end": 40, "ref_url": "Iran", "ref_ids": ["14653"], "sent_idx": 0}]} +{"id": "17890661", "title": "Ouléni", "sentences": ["Ouléni is a village in the Tansila Department of Banwa Province in western Burkina Faso.", "As of 2005 it had a population of 326."], "mentions": [{"id": 0, "start": 27, "end": 45, "ref_url": "Tansila_Department", "ref_ids": ["16849335"], "sent_idx": 0}, {"id": 1, "start": 49, "end": 63, "ref_url": "Banwa_Province", "ref_ids": ["2874525"], "sent_idx": 0}, {"id": 2, "start": 75, "end": 87, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17890667", "title": "Tamouga", "sentences": ["Tamouga is a village in the Tansila Department of Banwa Province in western Burkina Faso.", "As of 2005 it had a population of 560."], "mentions": [{"id": 0, "start": 28, "end": 46, "ref_url": "Tansila_Department", "ref_ids": ["16849335"], "sent_idx": 0}, {"id": 1, "start": 50, "end": 64, "ref_url": "Banwa_Province", "ref_ids": ["2874525"], "sent_idx": 0}, {"id": 2, "start": 76, "end": 88, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17890674", "title": "Thy, Burkina Faso", "sentences": ["Thy is a village in the Tansila Department of Banwa Province in western Burkina Faso.", "As of 2005 it had a population of 435."], "mentions": [{"id": 0, "start": 24, "end": 42, "ref_url": "Tansila_Department", "ref_ids": ["16849335"], "sent_idx": 0}, {"id": 1, "start": 46, "end": 60, "ref_url": "Banwa_Province", "ref_ids": ["2874525"], "sent_idx": 0}, {"id": 2, "start": 72, "end": 84, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17890679", "title": "Tillé, Burkina Faso", "sentences": ["Tillé is a village in the Tansila Department of Banwa Province in western Burkina Faso.", "As of 2005 it had a population of 718."], "mentions": [{"id": 0, "start": 26, "end": 44, "ref_url": "Tansila_Department", "ref_ids": ["16849335"], "sent_idx": 0}, {"id": 1, "start": 48, "end": 62, "ref_url": "Banwa_Province", "ref_ids": ["2874525"], "sent_idx": 0}, {"id": 2, "start": 74, "end": 86, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17890680", "title": "Maurice Camyré", "sentences": ["Maurice Camyré (March 10, 1915 – January 15, 2013) was a Canadian boxer who competed in the 1936 Summer Olympics.", "He was born in St. Vital.", "In 1936 he was eliminated in the first round of the welterweight class after losing his fight to Chester Rutecki."], "mentions": [{"id": 0, "start": 57, "end": 65, "ref_url": "Canadians", "ref_ids": ["19851291"], "sent_idx": 0}, {"id": 1, "start": 66, "end": 71, "ref_url": "Boxing", "ref_ids": ["4243"], "sent_idx": 0}, {"id": 2, "start": 92, "end": 112, "ref_url": "1936_Summer_Olympics", "ref_ids": ["39721"], "sent_idx": 0}, {"id": 3, "start": 15, "end": 24, "ref_url": "St._Vital", "ref_ids": null, "sent_idx": 1}, {"id": 4, "start": 52, "end": 70, "ref_url": "Boxing_at_the_1936_Summer_Olympics_-_Men's_welterweight", "ref_ids": null, "sent_idx": 2}, {"id": 5, "start": 97, "end": 112, "ref_url": "Chester_Rutecki", "ref_ids": ["18418158"], "sent_idx": 2}]} +{"id": "17890687", "title": "Toma Koura", "sentences": ["Toma Koura is a village in the Tansila Department of Banwa Province in western Burkina Faso.", "As of 2005 it had a population of 150."], "mentions": [{"id": 0, "start": 31, "end": 49, "ref_url": "Tansila_Department", "ref_ids": ["16849335"], "sent_idx": 0}, {"id": 1, "start": 53, "end": 67, "ref_url": "Banwa_Province", "ref_ids": ["2874525"], "sent_idx": 0}, {"id": 2, "start": 79, "end": 91, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17890694", "title": "Triko", "sentences": ["Triko is a village in the Tansila Department of Banwa Province in western Burkina Faso.", "As of 2005 it had a population of 687."], "mentions": [{"id": 0, "start": 26, "end": 44, "ref_url": "Tansila_Department", "ref_ids": ["16849335"], "sent_idx": 0}, {"id": 1, "start": 48, "end": 62, "ref_url": "Banwa_Province", "ref_ids": ["2874525"], "sent_idx": 0}, {"id": 2, "start": 74, "end": 86, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17890700", "title": "Darsalam, Tansila", "sentences": ["Darsalam is a town in the Tansila Department of Banwa Province in western Burkina Faso.", "As of 2005 it had a population of 1,107."], "mentions": [{"id": 0, "start": 26, "end": 44, "ref_url": "Tansila_Department", "ref_ids": ["16849335"], "sent_idx": 0}, {"id": 1, "start": 48, "end": 62, "ref_url": "Banwa_Province", "ref_ids": ["2874525"], "sent_idx": 0}, {"id": 2, "start": 74, "end": 86, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17890709", "title": "Tansila", "sentences": ["Tansila is a town and seat of the Tansila Department of Banwa Province in western Burkina Faso.", "In 2005 it had a population of 3,876."], "mentions": [{"id": 0, "start": 34, "end": 52, "ref_url": "Tansila_Department", "ref_ids": ["16849335"], "sent_idx": 0}, {"id": 1, "start": 56, "end": 70, "ref_url": "Banwa_Province", "ref_ids": ["2874525"], "sent_idx": 0}, {"id": 2, "start": 82, "end": 94, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17890717", "title": "Douma, Burkina Faso", "sentences": ["Douma is a town in the Tansila Department of Banwa Province in western Burkina Faso.", "As of 2005 it had a population of 1,154."], "mentions": [{"id": 0, "start": 23, "end": 41, "ref_url": "Tansila_Department", "ref_ids": ["16849335"], "sent_idx": 0}, {"id": 1, "start": 45, "end": 59, "ref_url": "Banwa_Province", "ref_ids": ["2874525"], "sent_idx": 0}, {"id": 2, "start": 71, "end": 83, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17890723", "title": "Driko", "sentences": ["Driko is a town in the Tansila Department of Banwa Province in western Burkina Faso.", "As of 2005 it had a population of 1,050."], "mentions": [{"id": 0, "start": 23, "end": 41, "ref_url": "Tansila_Department", "ref_ids": ["16849335"], "sent_idx": 0}, {"id": 1, "start": 45, "end": 59, "ref_url": "Banwa_Province", "ref_ids": ["2874525"], "sent_idx": 0}, {"id": 2, "start": 71, "end": 83, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17890737", "title": "Kéllé", "sentences": ["Kéllé is a town in the Tansila Department of Banwa Province in western Burkina Faso.", "As of 2005 it had a population of 1,343."], "mentions": [{"id": 0, "start": 23, "end": 41, "ref_url": "Tansila_Department", "ref_ids": ["16849335"], "sent_idx": 0}, {"id": 1, "start": 45, "end": 59, "ref_url": "Banwa_Province", "ref_ids": ["2874525"], "sent_idx": 0}, {"id": 2, "start": 71, "end": 83, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17890739", "title": "Modular Neutron Array", "sentences": ["The Modular Neutron Array (MoNA) is a large-area, high efficiency neutron detector that is used in basic research of rare isotopes at Michigan State University's National Superconducting Cyclotron Laboratory (NSCL), a nuclear physics research facility.", "It is specifically designed for detecting neutrons stemming from breakup reactions of fast fragmentation beams."], "mentions": [{"id": 0, "start": 66, "end": 82, "ref_url": "Neutron_detector", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 134, "end": 159, "ref_url": "Michigan_State_University", "ref_ids": ["241128"], "sent_idx": 0}, {"id": 2, "start": 162, "end": 207, "ref_url": "National_Superconducting_Cyclotron_Laboratory", "ref_ids": ["325996"], "sent_idx": 0}, {"id": 3, "start": 42, "end": 50, "ref_url": "Neutrons", "ref_ids": null, "sent_idx": 1}]} +{"id": "17890742", "title": "Nakoleia", "sentences": ["Nakoleia (), Latinized as Nacolia or Nacolea, was an ancient and medieval city in Phrygia.", "It corresponds to present-day Seyitgazi, Eskişehir Province in the Central Anatolia region of Turkey."], "mentions": [{"id": 0, "start": 13, "end": 22, "ref_url": "List_of_Latinised_names", "ref_ids": ["9204283"], "sent_idx": 0}, {"id": 1, "start": 82, "end": 89, "ref_url": "Phrygia", "ref_ids": ["21491716"], "sent_idx": 0}, {"id": 2, "start": 30, "end": 39, "ref_url": "Seyitgazi", "ref_ids": ["4116441"], "sent_idx": 1}, {"id": 3, "start": 41, "end": 59, "ref_url": "Eskişehir_Province", "ref_ids": ["886659"], "sent_idx": 1}, {"id": 4, "start": 67, "end": 83, "ref_url": "Central_Anatolia", "ref_ids": null, "sent_idx": 1}, {"id": 5, "start": 94, "end": 100, "ref_url": "Turkey", "ref_ids": ["11125639"], "sent_idx": 1}]} +{"id": "17890745", "title": "Kokouna", "sentences": ["Kokouna is a town in the Tansila Department of Banwa Province in western Burkina Faso.", "As of 2005 it had a population of 1,385."], "mentions": [{"id": 0, "start": 25, "end": 43, "ref_url": "Tansila_Department", "ref_ids": ["16849335"], "sent_idx": 0}, {"id": 1, "start": 47, "end": 61, "ref_url": "Banwa_Province", "ref_ids": ["2874525"], "sent_idx": 0}, {"id": 2, "start": 73, "end": 85, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17890755", "title": "Kouneni", "sentences": ["Kouneni is a town in the Tansila Department of Banwa Province in western Burkina Faso.", "As of 2005 it had a population of 1,128."], "mentions": [{"id": 0, "start": 25, "end": 43, "ref_url": "Tansila_Department", "ref_ids": ["16849335"], "sent_idx": 0}, {"id": 1, "start": 47, "end": 61, "ref_url": "Banwa_Province", "ref_ids": ["2874525"], "sent_idx": 0}, {"id": 2, "start": 73, "end": 85, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17890757", "title": "C.F. Os Belenenses (rugby union)", "sentences": ["Belenenses is a Portuguese rugby union team.", "Apart from winning several titles as a team, they have also provided five players to the National Team that played at the 2007 Rugby World Cup finals.", "Their most recent title was the victory in the National Championship of the 2007/2008 season."], "mentions": [{"id": 0, "start": 27, "end": 38, "ref_url": "Rugby_union", "ref_ids": ["25405"], "sent_idx": 0}, {"id": 1, "start": 122, "end": 142, "ref_url": "2007_Rugby_World_Cup", "ref_ids": ["375446"], "sent_idx": 1}]} +{"id": "17890765", "title": "Nangouna", "sentences": ["Nangouna is a town in the Tansila Department of Banwa Province in western Burkina Faso.", "it had a population of 1,483."], "mentions": [{"id": 0, "start": 26, "end": 44, "ref_url": "Tansila_Department", "ref_ids": ["16849335"], "sent_idx": 0}, {"id": 1, "start": 48, "end": 62, "ref_url": "Banwa_Province", "ref_ids": ["2874525"], "sent_idx": 0}, {"id": 2, "start": 74, "end": 86, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17890776", "title": "Ouorowé", "sentences": ["Ouorowé is a town in the Tansila Department of Banwa Province in western Burkina Faso.", "As of 2005 it had a population of 2,479."], "mentions": [{"id": 0, "start": 25, "end": 43, "ref_url": "Tansila_Department", "ref_ids": ["16849335"], "sent_idx": 0}, {"id": 1, "start": 47, "end": 61, "ref_url": "Banwa_Province", "ref_ids": ["2874525"], "sent_idx": 0}, {"id": 2, "start": 73, "end": 85, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17890781", "title": "Toma, Banwa", "sentences": ["Toma is a town in the Tansila Department of Banwa Province in western Burkina Faso.", "As of 2005 it had a population of 1,459."], "mentions": [{"id": 0, "start": 22, "end": 40, "ref_url": "Tansila_Department", "ref_ids": ["16849335"], "sent_idx": 0}, {"id": 1, "start": 44, "end": 58, "ref_url": "Banwa_Province", "ref_ids": ["2874525"], "sent_idx": 0}, {"id": 2, "start": 70, "end": 82, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17890791", "title": "Toula, Burkina Faso", "sentences": ["Toula is a town in the Tansila Department of Banwa Province in western Burkina Faso.", "As of 2005 it had a population of 1,407."], "mentions": [{"id": 0, "start": 23, "end": 41, "ref_url": "Tansila_Department", "ref_ids": ["16849335"], "sent_idx": 0}, {"id": 1, "start": 45, "end": 59, "ref_url": "Banwa_Province", "ref_ids": ["2874525"], "sent_idx": 0}, {"id": 2, "start": 71, "end": 83, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17890799", "title": "Toungo, Burkina Faso", "sentences": ["Toungo is a town in the Tansila Department of Banwa Province in western Burkina Faso.", "As of 2005 it had a population of 1,350."], "mentions": [{"id": 0, "start": 24, "end": 42, "ref_url": "Tansila_Department", "ref_ids": ["16849335"], "sent_idx": 0}, {"id": 1, "start": 46, "end": 60, "ref_url": "Banwa_Province", "ref_ids": ["2874525"], "sent_idx": 0}, {"id": 2, "start": 72, "end": 84, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17890812", "title": "Imre Mándi", "sentences": ["Imre Mándi (; 22 November 1916 – 1943) was a Hungarian boxer who competed in the 1936 Summer Olympics.", "He was eliminated in the quarterfinals of the welterweight class after losing his fight to the upcoming gold medalist Sten Suvio.", "Next year he won a silver medal at the European championships.", "Mándi was Jewish and died in a Nazi labor camp during World War II."], "mentions": [{"id": 0, "start": 45, "end": 54, "ref_url": "Hungary", "ref_ids": ["13275"], "sent_idx": 0}, {"id": 1, "start": 55, "end": 60, "ref_url": "Boxing", "ref_ids": ["4243"], "sent_idx": 0}, {"id": 2, "start": 81, "end": 101, "ref_url": "1936_Summer_Olympics", "ref_ids": ["39721"], "sent_idx": 0}, {"id": 3, "start": 46, "end": 64, "ref_url": "Boxing_at_the_1936_Summer_Olympics_-_Men's_welterweight", "ref_ids": null, "sent_idx": 1}, {"id": 4, "start": 118, "end": 128, "ref_url": "Sten_Suvio", "ref_ids": ["13843913"], "sent_idx": 1}]} +{"id": "17890816", "title": "Bloxwich United", "sentences": ["Bloxwich United may refer to one of two English association football clubs:"], "mentions": [{"id": 0, "start": 48, "end": 68, "ref_url": "Association_football", "ref_ids": ["10568"], "sent_idx": 0}]} +{"id": "17890892", "title": "Frank Howard (columnist)", "sentences": ["Frank Howard (3 January 1931 – 26 February 2008) was a Canadian journalist and columnist who wrote for the \"Ottawa Citizen\", \"The Globe and Mail\", the \"Montreal Gazette\", the \"Montreal Star\", and the \"Quebec Chronicle-Telegraph\".", "He was born on January 3, 1931 in Montreal, Quebec, Canada to anglophone parents, but grew up in a francophone community attending l'Academie Roussin in Pointe-aux-Trembles.", "As a young man, he also attended Queen's University in Kingston, Ontario, returning to Quebec (the francophone province) in the 1950s and 60s to cover the Quiet Revolution for the anglophone press.", "As a bilingual anglophone writing during the 1960s, he was an influential figure in the Canadian political scene at a time when there was little communication between anglophone and francophone communities.", "According to John Gray of \"The Globe and Mail\", Frank Howard sought to introduce English and French Canada to one another.", "During the Quiet Revolution, nationalist sentiment ran high and the two ethnicities were seen as something like \"Two Solitudes\".", "As an anglophone and a political moderate, Frank Howard was sympathetic to Quebec grievances without supporting separatist goals.", "At the \"Gazette\", and later at \"The Globe and Mail\", Howard broke many important stories in English Canada including the infamous \"Vive le Québec libre\" speech by Charles de Gaulle as well as covering other seminal moments in Quebec history, such as the founding of the Parti Québécois and the nationalization of Hydro-Québec.", "He worked with both René Lévesque (who became the first separatist Premier of Quebec) and Pierre Trudeau (who was the Prime Minister of Canada).", "In 1969, Frank Howard was recruited by the Canadian federal government under Trudeau for work in the Department of Communications (he became Director of Information under Eric Kierans).", "There, among other things, he wrote speeches for Kierans during the October crisis.", "He left the civil service in the 1970s and began a daily column on the federal bureaucracy.", "The column, called \"The Bureaucrats\", ran in the \"Ottawa Citizen\" for 20 years.", "He died on February 26, 2008 in Mexico of complications related to lung cancer."], "mentions": [{"id": 0, "start": 64, "end": 74, "ref_url": "Journalist", "ref_ids": ["50100"], "sent_idx": 0}, {"id": 1, "start": 79, "end": 88, "ref_url": "Columnist", "ref_ids": ["437090"], "sent_idx": 0}, {"id": 2, "start": 50, "end": 64, "ref_url": "Ottawa_Citizen", "ref_ids": ["1168864"], "sent_idx": 12}, {"id": 3, "start": 32, "end": 50, "ref_url": "The_Globe_and_Mail", "ref_ids": ["234340"], "sent_idx": 7}, {"id": 4, "start": 152, "end": 168, "ref_url": "Montreal_Gazette", "ref_ids": ["7954911"], "sent_idx": 0}, {"id": 5, "start": 176, "end": 189, "ref_url": "Montreal_Star", "ref_ids": ["4381884"], "sent_idx": 0}, {"id": 6, "start": 201, "end": 227, "ref_url": "Quebec_Chronicle-Telegraph", "ref_ids": ["1481820"], "sent_idx": 0}, {"id": 7, "start": 34, "end": 50, "ref_url": "Montreal", "ref_ids": ["7954681"], "sent_idx": 1}, {"id": 8, "start": 33, "end": 51, "ref_url": "Queen's_University", "ref_ids": ["7955551"], "sent_idx": 2}, {"id": 9, "start": 78, "end": 84, "ref_url": "Quebec", "ref_ids": ["7954867"], "sent_idx": 8}, {"id": 10, "start": 11, "end": 27, "ref_url": "Quiet_Revolution", "ref_ids": ["195163"], "sent_idx": 5}, {"id": 11, "start": 13, "end": 22, "ref_url": "John_Gray_(Canadian_author)", "ref_ids": ["1882141"], "sent_idx": 4}, {"id": 12, "start": 113, "end": 126, "ref_url": "Two_Solitudes_(Canadian_society)", "ref_ids": ["19245773"], "sent_idx": 5}, {"id": 13, "start": 131, "end": 151, "ref_url": "Vive_le_Québec_libre", "ref_ids": ["856624"], "sent_idx": 7}, {"id": 14, "start": 163, "end": 180, "ref_url": "Charles_de_Gaulle", "ref_ids": ["51255"], "sent_idx": 7}, {"id": 15, "start": 270, "end": 285, "ref_url": "Parti_Québécois", "ref_ids": ["106367"], "sent_idx": 7}, {"id": 16, "start": 313, "end": 325, "ref_url": "Hydro-Québec", "ref_ids": ["484651"], "sent_idx": 7}, {"id": 17, "start": 20, "end": 33, "ref_url": "René_Lévesque", "ref_ids": ["177359"], "sent_idx": 8}, {"id": 18, "start": 90, "end": 104, "ref_url": "Pierre_Trudeau", "ref_ids": ["24507"], "sent_idx": 8}, {"id": 19, "start": 171, "end": 183, "ref_url": "Eric_Kierans", "ref_ids": ["542945"], "sent_idx": 9}, {"id": 20, "start": 68, "end": 82, "ref_url": "October_crisis", "ref_ids": null, "sent_idx": 10}]} +{"id": "17890895", "title": "WLRP", "sentences": ["WLRP (1460 AM, \"Radio Raíces 1460 AM\") is a radio station licensed to serve San Sebastián, Puerto Rico.", "The station is owned by Las Raíces Pepinianas, Inc.", "The station has been on the air since January 6, 1965.", "It airs news, talk shows, and music in Spanish (Spanish Variety).", "The station was assigned the WLRP call letters by the Federal Communications Commission on July 12, 1982."], "mentions": [{"id": 0, "start": 11, "end": 13, "ref_url": "AM_broadcasting", "ref_ids": ["113509"], "sent_idx": 0}, {"id": 1, "start": 44, "end": 57, "ref_url": "Radio_station", "ref_ids": null, "sent_idx": 0}, {"id": 2, "start": 58, "end": 66, "ref_url": "City_of_license", "ref_ids": ["584935"], "sent_idx": 0}, {"id": 3, "start": 76, "end": 102, "ref_url": "San_Sebastián,_Puerto_Rico", "ref_ids": ["654491"], "sent_idx": 0}, {"id": 4, "start": 48, "end": 63, "ref_url": "Spanish_Variety", "ref_ids": null, "sent_idx": 3}, {"id": 5, "start": 54, "end": 87, "ref_url": "Federal_Communications_Commission", "ref_ids": ["55974"], "sent_idx": 4}, {"id": 6, "start": 100, "end": 104, "ref_url": "1982_in_radio", "ref_ids": ["13645275"], "sent_idx": 4}]} +{"id": "17890907", "title": "Yack Arena", "sentences": ["Benjamin F. Yack Arena is a 3,000-seat indoor arena located in Wyandotte, Michigan.", "It is used for ice hockey and ice skating, and was home of the Motor City Metal Jackets of the North American Hockey League.", "The arena has of space for trade shows, conventions, festivals, and other special events.", "One event is the Spring Fling hosted by the Parish of St. Vincent Pallotti."], "mentions": [{"id": 0, "start": 63, "end": 82, "ref_url": "Wyandotte,_Michigan", "ref_ids": ["89518"], "sent_idx": 0}, {"id": 1, "start": 63, "end": 87, "ref_url": "Motor_City_Metal_Jackets", "ref_ids": null, "sent_idx": 1}, {"id": 2, "start": 95, "end": 123, "ref_url": "North_American_Hockey_League", "ref_ids": ["1061938"], "sent_idx": 1}]} +{"id": "17890924", "title": "Yale (1916 automobile)", "sentences": ["The Yale was a Brass Era car manufactured in Saginaw, Michigan from 1916 to 1918, not to be confused with the Yale from Toledo, Ohio.", "The man behind this Yale was Louis J. Lampke, who previously had been with Palmer-Singer and Lion.", "He then created a car he had designed himself, this was the MPM of Mount Pleasant, Michigan.", "In early 1915, Lampke travelled to Saginaw in order to acquire funding to transfer his company there.", "Apparently, the people of Saginaw were not interested in the MPM, but were interested in Lampke himself.", "He therefore stayed in town and created the Saginaw Motor Company in June 1916.", "Various local businessmen were in the executive positions, while Lampke was in charge of product development.", "The plan was to call the automobile the Saginaw, but the Lehr Motor Company across town had already beaten them to the name.", "The Saginaw Motor people thus got together and settled on the Yale name."], "mentions": [{"id": 0, "start": 15, "end": 28, "ref_url": "Brass_Era_car", "ref_ids": ["590457"], "sent_idx": 0}, {"id": 1, "start": 45, "end": 62, "ref_url": "Saginaw,_Michigan", "ref_ids": ["119000"], "sent_idx": 0}, {"id": 2, "start": 62, "end": 66, "ref_url": "Yale_(automobile)", "ref_ids": ["1936055"], "sent_idx": 8}, {"id": 3, "start": 120, "end": 132, "ref_url": "Toledo,_Ohio", "ref_ids": ["30849"], "sent_idx": 0}, {"id": 4, "start": 29, "end": 44, "ref_url": "Louis_J._Lampke", "ref_ids": null, "sent_idx": 1}, {"id": 5, "start": 75, "end": 88, "ref_url": "Palmer-Singer", "ref_ids": null, "sent_idx": 1}, {"id": 6, "start": 93, "end": 97, "ref_url": "Lion_(automobile)", "ref_ids": ["11489944"], "sent_idx": 1}, {"id": 7, "start": 61, "end": 64, "ref_url": "MPM_(automobile)", "ref_ids": ["11749471"], "sent_idx": 4}, {"id": 8, "start": 67, "end": 91, "ref_url": "Mount_Pleasant,_Michigan", "ref_ids": ["117747"], "sent_idx": 2}, {"id": 9, "start": 25, "end": 35, "ref_url": "Automobile", "ref_ids": null, "sent_idx": 7}, {"id": 10, "start": 4, "end": 11, "ref_url": "Saginaw_(automobile)", "ref_ids": ["51603149"], "sent_idx": 8}]} +{"id": "17890925", "title": "Umberto Pittori", "sentences": ["Umberto Pittori (May 4, 1913 in Trieste, Austria-Hungary – 1965) was an Italian boxer who competed in the 1936 Summer Olympics.", "In 1936 he was eliminated in the first round of the welterweight class after losing his fight to Imre Mándi."], "mentions": [{"id": 0, "start": 32, "end": 39, "ref_url": "Trieste", "ref_ids": ["56092"], "sent_idx": 0}, {"id": 1, "start": 41, "end": 56, "ref_url": "Austria-Hungary", "ref_ids": ["2983"], "sent_idx": 0}, {"id": 2, "start": 72, "end": 79, "ref_url": "Italy", "ref_ids": ["14532"], "sent_idx": 0}, {"id": 3, "start": 80, "end": 85, "ref_url": "Boxing", "ref_ids": ["4243"], "sent_idx": 0}, {"id": 4, "start": 106, "end": 126, "ref_url": "1936_Summer_Olympics", "ref_ids": ["39721"], "sent_idx": 0}, {"id": 5, "start": 52, "end": 70, "ref_url": "Boxing_at_the_1936_Summer_Olympics_-_Men's_welterweight", "ref_ids": null, "sent_idx": 1}, {"id": 6, "start": 97, "end": 107, "ref_url": "Imre_Mándi", "ref_ids": ["17890812"], "sent_idx": 1}]} +{"id": "17890932", "title": "Coralville Lake", "sentences": ["Coralville Lake is an artificial lake in Johnson County, Iowa, US, formed by the Coralville Dam, a dam built from 1949-1958 on the Iowa River upstream from the city of Coralville, Iowa."], "mentions": [{"id": 0, "start": 22, "end": 37, "ref_url": "Reservoir", "ref_ids": ["3292675"], "sent_idx": 0}, {"id": 1, "start": 41, "end": 55, "ref_url": "Johnson_County,_Iowa", "ref_ids": ["95984"], "sent_idx": 0}, {"id": 2, "start": 57, "end": 61, "ref_url": "Iowa", "ref_ids": ["26810748"], "sent_idx": 0}, {"id": 3, "start": 63, "end": 65, "ref_url": "United_States", "ref_ids": ["3434750"], "sent_idx": 0}, {"id": 4, "start": 131, "end": 141, "ref_url": "Iowa_River", "ref_ids": ["458256"], "sent_idx": 0}, {"id": 5, "start": 168, "end": 184, "ref_url": "Coralville,_Iowa", "ref_ids": ["113931"], "sent_idx": 0}]} +{"id": "17890943", "title": "JAM IP", "sentences": ["JAM IP is a contact centre services organisation, specialising in consulting, professional services, software development, systems integration and managed services.", "It trades independently within the KCOM Group of companies.", "Its main office is based in Maidenhead, Berkshire, with an office in Hull, East Riding of Yorkshire.", "JAM IP develops and consults on applications for IP contact centre management such as text to speech, voice authentication, self serve applications, and speech recognition."], "mentions": [{"id": 0, "start": 35, "end": 45, "ref_url": "KCOM_Group", "ref_ids": ["1161482"], "sent_idx": 1}, {"id": 1, "start": 28, "end": 38, "ref_url": "Maidenhead", "ref_ids": ["206470"], "sent_idx": 2}, {"id": 2, "start": 40, "end": 49, "ref_url": "Berkshire", "ref_ids": ["64951"], "sent_idx": 2}, {"id": 3, "start": 69, "end": 73, "ref_url": "Kingston_upon_Hull", "ref_ids": ["56474"], "sent_idx": 2}, {"id": 4, "start": 75, "end": 99, "ref_url": "East_Riding_of_Yorkshire", "ref_ids": ["153612"], "sent_idx": 2}]} +{"id": "17890952", "title": "1999 World Horticultural Exposition", "sentences": ["The 1999 World Horticultural Exposition (昆明世博会) was an A1 category international horticultural exposition recognised by the Bureau International des Expositions (BIE).", "Organised under the auspices of the International Association of Horticultural Producers, the event was held in Kunming, China.", "The theme of the exposition was \"Man and Nature, marching into the 21st century.\"", "The event lasted from April 30 to October 31, 1999, and attracted 9.5 million visitors."], "mentions": [{"id": 0, "start": 81, "end": 105, "ref_url": "Horticultural_exposition", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 124, "end": 160, "ref_url": "Bureau_International_des_Expositions", "ref_ids": ["100488"], "sent_idx": 0}, {"id": 2, "start": 36, "end": 88, "ref_url": "AIPH", "ref_ids": ["30370268"], "sent_idx": 1}, {"id": 3, "start": 112, "end": 119, "ref_url": "Kunming", "ref_ids": ["197181"], "sent_idx": 1}, {"id": 4, "start": 121, "end": 126, "ref_url": "China", "ref_ids": ["5405"], "sent_idx": 1}]} +{"id": "17891059", "title": "Opus Magnum", "sentences": ["Opus Magnum is the third studio album by Austrian melodic death metal band Hollenthon, released by Napalm Records in 2008.", "Limited edition digipack contains bonus track, \"The Bazaar\" (originally performed by The Tea Party) and video clip for \"Son of Perdition\"."], "mentions": [{"id": 0, "start": 25, "end": 37, "ref_url": "Studio_album", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 41, "end": 48, "ref_url": "Austria", "ref_ids": ["26964606"], "sent_idx": 0}, {"id": 2, "start": 50, "end": 69, "ref_url": "Melodic_death_metal", "ref_ids": ["633174"], "sent_idx": 0}, {"id": 3, "start": 75, "end": 85, "ref_url": "Hollenthon", "ref_ids": ["1970390"], "sent_idx": 0}, {"id": 4, "start": 99, "end": 113, "ref_url": "Napalm_Records", "ref_ids": ["3818243"], "sent_idx": 0}, {"id": 5, "start": 16, "end": 24, "ref_url": "Digipack", "ref_ids": null, "sent_idx": 1}, {"id": 6, "start": 85, "end": 98, "ref_url": "The_Tea_Party", "ref_ids": ["288166"], "sent_idx": 1}]} +{"id": "17891078", "title": "Anglican Bishop of Nottingham", "sentences": ["The Anglican Bishop of Nottingham was an episcopal title used by a Church of England suffragan bishop.", "The title took its name after the county town of Nottingham and was first created under the Suffragan Bishops Act 1534.", "Until 1837, Nottingham had been part of the Diocese of York, when it then became part of the Diocese of Lincoln.", "With the creation of the Diocese of Southwell in 1884, Nottingham became part of that diocese, but the then- (and final) bishop remained suffragan to Lincoln.", "Since 2005, Nottingham gives its name to the Diocese of Southwell and Nottingham."], "mentions": [{"id": 0, "start": 41, "end": 50, "ref_url": "Episcopal_polity", "ref_ids": ["10067"], "sent_idx": 0}, {"id": 1, "start": 67, "end": 84, "ref_url": "Church_of_England", "ref_ids": ["5955"], "sent_idx": 0}, {"id": 2, "start": 85, "end": 101, "ref_url": "Suffragan_bishop", "ref_ids": ["296341"], "sent_idx": 0}, {"id": 3, "start": 34, "end": 45, "ref_url": "County_town", "ref_ids": ["7407"], "sent_idx": 1}, {"id": 4, "start": 12, "end": 22, "ref_url": "Nottingham", "ref_ids": ["39470"], "sent_idx": 4}, {"id": 5, "start": 92, "end": 118, "ref_url": "Suffragan_Bishops_Act_1534", "ref_ids": ["6457574"], "sent_idx": 1}, {"id": 6, "start": 44, "end": 59, "ref_url": "Diocese_of_York", "ref_ids": ["2321009"], "sent_idx": 2}, {"id": 7, "start": 93, "end": 111, "ref_url": "Diocese_of_Lincoln", "ref_ids": ["1445068"], "sent_idx": 2}, {"id": 8, "start": 45, "end": 65, "ref_url": "Diocese_of_Southwell", "ref_ids": null, "sent_idx": 4}, {"id": 9, "start": 45, "end": 80, "ref_url": "Diocese_of_Southwell_and_Nottingham", "ref_ids": ["3489741"], "sent_idx": 4}]} +{"id": "17891100", "title": "Lords and Counts of Harcourt", "sentences": ["When the Viking chieftain Rollo obtained via the Treaty of Saint-Clair-sur-Epte the territories which would later make up Normandy, he distributed them as estates among his main supporters.", "Among these lands were the seigneurie of Harcourt, near Brionne, and the county of Pont-Audemer, both of which Rollo granted to Bernard the Dane, ancestor of the lords (\"seigneurs\") of Harcourt.", "The first to use Harcourt as a name, however, was Anquetil d'Harcourt at the start of the 11th century."], "mentions": [{"id": 0, "start": 111, "end": 116, "ref_url": "Rollo", "ref_ids": ["42518"], "sent_idx": 1}, {"id": 1, "start": 49, "end": 79, "ref_url": "Treaty_of_Saint-Clair-sur-Epte", "ref_ids": ["1476272"], "sent_idx": 0}, {"id": 2, "start": 122, "end": 130, "ref_url": "Normandy", "ref_ids": ["21724"], "sent_idx": 0}, {"id": 3, "start": 27, "end": 37, "ref_url": "Fiefdom", "ref_ids": null, "sent_idx": 1}, {"id": 4, "start": 17, "end": 25, "ref_url": "Harcourt,_Eure", "ref_ids": ["1166715"], "sent_idx": 2}, {"id": 5, "start": 56, "end": 63, "ref_url": "Brionne", "ref_ids": ["12822224"], "sent_idx": 1}, {"id": 6, "start": 83, "end": 95, "ref_url": "Pont-Audemer", "ref_ids": ["1216806"], "sent_idx": 1}, {"id": 7, "start": 128, "end": 144, "ref_url": "Bernard_the_Dane", "ref_ids": ["15776031"], "sent_idx": 1}, {"id": 8, "start": 50, "end": 69, "ref_url": "Anquetil_d'Harcourt", "ref_ids": null, "sent_idx": 2}]} +{"id": "17891105", "title": "Frith Banbury", "sentences": ["Frederick Harold Frith Banbury MBE (4 May 1912 – 14 May 2008) was a British theatre actor and director.", "Frith Banbury was born in Plymouth, Devon, on 4 May 1912, the son of Rear Admiral Frederick Arthur Frith Banbury and his wife Winifred (née Fink).", "While attending Stowe School, Banbury rejected his father's naval background by refusing to join the Officer Training Corps, later being registered as a conscientious objector, enabling him to continue acting throughout the Second World War.", "He went on to attend Hertford College, Oxford, though he left after one year without obtaining an academic degree.", "He trained for the stage at the Royal Academy of Dramatic Art alongside Joan Littlewood, Rachel Kempson, Robert Morley, and Peter Bull.", "Banbury died on 14 May 2008, at the age of 96."], "mentions": [{"id": 0, "start": 31, "end": 34, "ref_url": "Order_of_the_British_Empire", "ref_ids": ["212182"], "sent_idx": 0}, {"id": 1, "start": 26, "end": 34, "ref_url": "Plymouth", "ref_ids": ["23508"], "sent_idx": 1}, {"id": 2, "start": 36, "end": 41, "ref_url": "Devon", "ref_ids": ["8166"], "sent_idx": 1}, {"id": 3, "start": 69, "end": 81, "ref_url": "Rear_Admiral", "ref_ids": null, "sent_idx": 1}, {"id": 4, "start": 16, "end": 28, "ref_url": "Stowe_School", "ref_ids": ["1222596"], "sent_idx": 2}, {"id": 5, "start": 101, "end": 123, "ref_url": "Officer_Training_Corps", "ref_ids": null, "sent_idx": 2}, {"id": 6, "start": 153, "end": 175, "ref_url": "Conscientious_objector", "ref_ids": ["83003"], "sent_idx": 2}, {"id": 7, "start": 224, "end": 240, "ref_url": "Second_World_War", "ref_ids": null, "sent_idx": 2}, {"id": 8, "start": 21, "end": 45, "ref_url": "Hertford_College,_Oxford", "ref_ids": ["128344"], "sent_idx": 3}, {"id": 9, "start": 32, "end": 61, "ref_url": "Royal_Academy_of_Dramatic_Art", "ref_ids": ["163026"], "sent_idx": 4}, {"id": 10, "start": 72, "end": 87, "ref_url": "Joan_Littlewood", "ref_ids": ["162997"], "sent_idx": 4}, {"id": 11, "start": 89, "end": 103, "ref_url": "Rachel_Kempson", "ref_ids": ["409305"], "sent_idx": 4}, {"id": 12, "start": 105, "end": 118, "ref_url": "Robert_Morley", "ref_ids": ["319233"], "sent_idx": 4}, {"id": 13, "start": 124, "end": 134, "ref_url": "Peter_Bull", "ref_ids": ["5825016"], "sent_idx": 4}]} +{"id": "17891107", "title": "Thomas Arbuthnott", "sentences": ["Thomas Bone Arbuthnott (29 June 1911 – 20 January 1995) was a New Zealand welterweight boxer.", "He competed at the 1936 Summer Olympics, but was eliminated in his first bout.", "Arbuthnott was born in Glasgow, Scotland, to David Arbuthnott and Mary Robertson, née Bone; he had five siblings.", "Arbuthnott married in 1939 to Phyllis Reeves; they had a daughter, Kay."], "mentions": [{"id": 0, "start": 62, "end": 73, "ref_url": "New_Zealand", "ref_ids": ["4913064"], "sent_idx": 0}, {"id": 1, "start": 19, "end": 39, "ref_url": "1936_Summer_Olympics", "ref_ids": ["39721"], "sent_idx": 1}, {"id": 2, "start": 67, "end": 77, "ref_url": "Boxing_at_the_1936_Summer_Olympics_-_Men's_welterweight", "ref_ids": null, "sent_idx": 1}]} +{"id": "17891122", "title": "Liber comicus (disambiguation)", "sentences": ["Liber Comicus is the name of the oldest lectionary from the Iberian Peninsula.", "\"Liber comicus\" means \"lectionary\" in Latin.", "Liber comicus may also refer to the following manuscripts:"], "mentions": [{"id": 0, "start": 0, "end": 13, "ref_url": "Liber_Comicus", "ref_ids": ["17886800"], "sent_idx": 0}, {"id": 1, "start": 23, "end": 33, "ref_url": "Lectionary", "ref_ids": ["1520065"], "sent_idx": 1}, {"id": 2, "start": 60, "end": 77, "ref_url": "Iberian_Peninsula", "ref_ids": ["14883"], "sent_idx": 0}]} +{"id": "17891130", "title": "Oceania Swimming Championships", "sentences": ["The Oceania Swimming Championships are currently held every 2 years, in even years.", "They are organized by the Oceania Swimming Association, and feature teams representing countries and islands from that region.", "The most recent championships were the 11th edition, held in June 2016."], "mentions": [{"id": 0, "start": 26, "end": 54, "ref_url": "Oceania_Swimming_Association", "ref_ids": ["17801361"], "sent_idx": 1}, {"id": 1, "start": 39, "end": 51, "ref_url": "2016_Oceania_Swimming_Championships", "ref_ids": ["50899309"], "sent_idx": 2}]} +{"id": "17891144", "title": "WKJB", "sentences": ["WKJB (710 AM, \"Radio Isla 710\") is a radio station licensed to serve Mayagüez, Puerto Rico.", "The station is owned by Radio Station WKJB AM-FM, Inc.", "It airs a News/Talk format.", "The station was assigned the WKJB call letters by the Federal Communications Commission."], "mentions": [{"id": 0, "start": 43, "end": 45, "ref_url": "AM_broadcasting", "ref_ids": ["113509"], "sent_idx": 1}, {"id": 1, "start": 37, "end": 50, "ref_url": "Radio_station", "ref_ids": null, "sent_idx": 0}, {"id": 2, "start": 51, "end": 59, "ref_url": "City_of_license", "ref_ids": ["584935"], "sent_idx": 0}, {"id": 3, "start": 69, "end": 90, "ref_url": "Mayagüez,_Puerto_Rico", "ref_ids": ["149133"], "sent_idx": 0}, {"id": 4, "start": 10, "end": 14, "ref_url": "News_radio", "ref_ids": null, "sent_idx": 2}, {"id": 5, "start": 15, "end": 19, "ref_url": "Talk_radio", "ref_ids": ["30077"], "sent_idx": 2}, {"id": 6, "start": 54, "end": 87, "ref_url": "Federal_Communications_Commission", "ref_ids": ["55974"], "sent_idx": 3}]} +{"id": "17891149", "title": "Jordan River Foundation", "sentences": ["The \"Jordan River Foundation\" was founded earlier by Queen Nour Al Hussein, the wife of the late king of Jordan Hussein bin Talal, after the death of king Hussein, queen Nour left her position as the Chair person of \"Jordan River Foundation\" and the new queen Rania took over the position.", "The Jordan River Foundation is a Nonprofit organization started back in the early nineties( sometime after 1990) Amman, Jordan to empower society, especially women and children, and in turn, improve the quality of life to secure a better future for all Jordanians.", "The foundation was founded by and is chaired by queen Nour Al Hussein and then chaired by Queen Rania Al-Abdullah."], "mentions": [{"id": 0, "start": 33, "end": 55, "ref_url": "Nonprofit_organization", "ref_ids": ["72487"], "sent_idx": 1}, {"id": 1, "start": 113, "end": 118, "ref_url": "Amman", "ref_ids": ["45007"], "sent_idx": 1}, {"id": 2, "start": 4, "end": 10, "ref_url": "Jordan", "ref_ids": ["7515964"], "sent_idx": 1}, {"id": 3, "start": 90, "end": 113, "ref_url": "Queen_Rania_Al-Abdullah", "ref_ids": null, "sent_idx": 2}]} +{"id": "17891163", "title": "Jill Cohenour", "sentences": ["Jill Cohenour is a Democratic Party member of the Montana State Senate, representing District 42 since 2015."], "mentions": [{"id": 0, "start": 19, "end": 35, "ref_url": "Democratic_Party_(United_States)", "ref_ids": ["5043544"], "sent_idx": 0}, {"id": 1, "start": 50, "end": 70, "ref_url": "Montana_State_Senate", "ref_ids": null, "sent_idx": 0}]} +{"id": "17891167", "title": "Je vous aime adieu", "sentences": ["\"Je vous aime adieu\" (English: \"I love you goodbye\") is a 1996 song recorded by the French singer Hélène Ségara.", "It was her second single, and the first from her first album, \"Cœur de verre\".", "Released in April 1996, it was a success in France and Belgium, but did not reach the top ten."], "mentions": [{"id": 0, "start": 84, "end": 90, "ref_url": "France", "ref_ids": ["5843419"], "sent_idx": 0}, {"id": 1, "start": 98, "end": 111, "ref_url": "Hélène_Ségara", "ref_ids": ["5845813"], "sent_idx": 0}, {"id": 2, "start": 63, "end": 76, "ref_url": "Cœur_de_verre", "ref_ids": ["17822958"], "sent_idx": 1}]} +{"id": "17891170", "title": "2008–09 South China AA season", "sentences": [], "mentions": []} +{"id": "17891181", "title": "Baker Tilly Virchow Krause, LLP", "sentences": ["Baker Tilly Virchow Krause, LLP (doing business as Baker Tilly) is a public accounting and consulting firm headquartered in Chicago, Illinois.", "Formerly known as Virchow, Krause & Company, LLP, the firm is the American member of Baker Tilly International, a global accounting network headquartered in London, United Kingdom.", "Having joined Baker Tilly International in 1999, the firm officially adopted the Baker Tilly name on June 1, 2009.", "Vault Accounting 50 has ranked Baker Tilly Virchow Krause, LLP as the 9th most prestigious accounting firm in their 2019 ranking."], "mentions": [{"id": 0, "start": 69, "end": 86, "ref_url": "Public_accounting", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 91, "end": 106, "ref_url": "Consulting_firm", "ref_ids": ["1804464"], "sent_idx": 0}, {"id": 2, "start": 14, "end": 39, "ref_url": "Baker_Tilly_International", "ref_ids": ["43411856"], "sent_idx": 2}, {"id": 3, "start": 121, "end": 139, "ref_url": "Accounting_network", "ref_ids": ["32933085"], "sent_idx": 1}, {"id": 4, "start": 157, "end": 163, "ref_url": "London", "ref_ids": ["17867"], "sent_idx": 1}, {"id": 5, "start": 165, "end": 179, "ref_url": "United_Kingdom", "ref_ids": ["31717"], "sent_idx": 1}]} +{"id": "17891192", "title": "Roberto Meloni (singer)", "sentences": ["Roberto Meloni (born 6 December 1977 in Ardara, Sardinia, Italy), is an Italian singer and television presenter currently residing in Latvia.", "He represented Latvia twice in a row in the Eurovision Song Contest, in 2007 as part of the group Bonaparti.lv and again in 2008 as part of Pirates of the Sea.", "In 2009, he gave the points of Latvia at the 2009 Eurovision Song Contest."], "mentions": [{"id": 0, "start": 40, "end": 46, "ref_url": "Ardara,_Italy", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 48, "end": 56, "ref_url": "Sardinia", "ref_ids": ["29376"], "sent_idx": 0}, {"id": 2, "start": 58, "end": 63, "ref_url": "Italy", "ref_ids": ["14532"], "sent_idx": 0}, {"id": 3, "start": 72, "end": 79, "ref_url": "Italy", "ref_ids": ["14532"], "sent_idx": 0}, {"id": 4, "start": 31, "end": 37, "ref_url": "Latvia", "ref_ids": ["17514"], "sent_idx": 2}, {"id": 5, "start": 50, "end": 73, "ref_url": "Eurovision_Song_Contest", "ref_ids": ["9954"], "sent_idx": 2}, {"id": 6, "start": 98, "end": 110, "ref_url": "Bonaparti.lv", "ref_ids": ["9708809"], "sent_idx": 1}, {"id": 7, "start": 140, "end": 158, "ref_url": "Pirates_of_the_Sea", "ref_ids": ["16049373"], "sent_idx": 1}, {"id": 8, "start": 50, "end": 73, "ref_url": "Eurovision_Song_Contest", "ref_ids": ["9954"], "sent_idx": 2}]} +{"id": "17891198", "title": "2006–07 South China AA season", "sentences": [], "mentions": []} +{"id": "17891201", "title": "Type 1 Ho-Ni I", "sentences": ["The\nwas a tank destroyer and self-propelled artillery developed by the Imperial Japanese Army for use during World War II in the Pacific theater."], "mentions": [{"id": 0, "start": 10, "end": 24, "ref_url": "Tank_destroyer", "ref_ids": ["2286"], "sent_idx": 0}, {"id": 1, "start": 29, "end": 53, "ref_url": "Self-propelled_artillery", "ref_ids": ["28566"], "sent_idx": 0}, {"id": 2, "start": 71, "end": 93, "ref_url": "Imperial_Japanese_Army", "ref_ids": ["481708"], "sent_idx": 0}, {"id": 3, "start": 109, "end": 121, "ref_url": "World_War_II", "ref_ids": ["32927"], "sent_idx": 0}, {"id": 4, "start": 129, "end": 144, "ref_url": "Pacific_War", "ref_ids": ["342641"], "sent_idx": 0}]} +{"id": "17891204", "title": "WKFE", "sentences": ["WKFE (1550 AM, \"Radio Café 1550\") is a radio station licensed to serve Yauco, Puerto Rico.", "The station is owned by Media Power Group, Inc.", "WKFE is part of the Radio Isla Network.", "It airs a Spanish language News Talk Information format.", "The station was assigned the WKFE call letters by the Federal Communications Commission."], "mentions": [{"id": 0, "start": 11, "end": 13, "ref_url": "AM_broadcasting", "ref_ids": ["113509"], "sent_idx": 0}, {"id": 1, "start": 39, "end": 52, "ref_url": "Radio_station", "ref_ids": null, "sent_idx": 0}, {"id": 2, "start": 53, "end": 61, "ref_url": "City_of_license", "ref_ids": ["584935"], "sent_idx": 0}, {"id": 3, "start": 71, "end": 89, "ref_url": "Yauco,_Puerto_Rico", "ref_ids": ["654502"], "sent_idx": 0}, {"id": 4, "start": 10, "end": 26, "ref_url": "Spanish_language", "ref_ids": ["26825"], "sent_idx": 3}, {"id": 5, "start": 27, "end": 48, "ref_url": "News_Talk_Information", "ref_ids": null, "sent_idx": 3}, {"id": 6, "start": 54, "end": 87, "ref_url": "Federal_Communications_Commission", "ref_ids": ["55974"], "sent_idx": 4}]} +{"id": "17891205", "title": "Gujarat State Road Transport Corporation", "sentences": ["Gujarat State Road Transport Corporation is known as GSRTC.", "It is a Govt. State Transport Undertaking of Gujarat for passengers facilitating with Road Public Transport in moffusil / City Services.", "GSRTC operates within the state of Gujarat, India and neighboring states.", "It has a workforce of 40,000."], "mentions": [{"id": 0, "start": 35, "end": 42, "ref_url": "Gujarat", "ref_ids": ["53707"], "sent_idx": 2}, {"id": 1, "start": 44, "end": 49, "ref_url": "India", "ref_ids": ["14533"], "sent_idx": 2}]} +{"id": "17891218", "title": "Superwoman (Alicia Keys song)", "sentences": ["\"Superwoman\" is a song by American R&B-soul singer–songwriter Alicia Keys from her third studio album, \"As I Am\" (2007).", "Written by Keys, Linda Perry, and Steve Mostyn, the track was released as the fourth and final single from the album.", "It earned Keys her second consecutive Grammy Award for Best Female R&B Vocal Performance at the 2009 ceremony, and was also nominated for Outstanding Music Video and Outstanding Song at the 40th NAACP Image Awards."], "mentions": [{"id": 0, "start": 67, "end": 70, "ref_url": "Contemporary_R&B", "ref_ids": ["2529904"], "sent_idx": 2}, {"id": 1, "start": 39, "end": 43, "ref_url": "Soul_music", "ref_ids": ["62808"], "sent_idx": 0}, {"id": 2, "start": 62, "end": 73, "ref_url": "Alicia_Keys", "ref_ids": ["59720"], "sent_idx": 0}, {"id": 3, "start": 104, "end": 111, "ref_url": "As_I_Am", "ref_ids": ["11860107"], "sent_idx": 0}, {"id": 4, "start": 17, "end": 28, "ref_url": "Linda_Perry", "ref_ids": ["6746576"], "sent_idx": 1}, {"id": 5, "start": 38, "end": 88, "ref_url": "Grammy_Award_for_Best_Female_R&B_Vocal_Performance", "ref_ids": ["203305"], "sent_idx": 2}, {"id": 6, "start": 96, "end": 109, "ref_url": "51st_Grammy_Awards", "ref_ids": null, "sent_idx": 2}, {"id": 7, "start": 138, "end": 161, "ref_url": "NAACP_Image_Award_for_Outstanding_Music_Video", "ref_ids": ["476739"], "sent_idx": 2}, {"id": 8, "start": 166, "end": 182, "ref_url": "NAACP_Image_Award_for_Outstanding_Song", "ref_ids": ["476740"], "sent_idx": 2}, {"id": 9, "start": 190, "end": 213, "ref_url": "40th_NAACP_Image_Awards", "ref_ids": ["21459035"], "sent_idx": 2}]} +{"id": "17891221", "title": "Du Chakay Duniya", "sentences": ["Du Chakay Duniya () (Meaning :The World on Two Wheels in English) is a Bengali book written by the first Indian Globe -Trotter Bimal Mukherjee (1903–1987) based on his experiences of traveling through the world on a bicycle.", "In 1926 Bimal Mukherjee went on an epic world tour on a bicycle.", "Already before that he had completed touring India on his bicycle during the period 1921 - 1926.", "Banking on a meager budget and an insatiable thirst of knowing the unknown world he went on his epic adventure of world tour.", "He traveled through Arab, Iran, Turkey, Syria, Britain, Iceland, Norway, Sweden, Finland, Russia, Greece, Egypt, Sudan, Italy, Switzerland, France, Denmark, Germany, United States, Colombia, Ecuador, Peru, Hawaii, Japan, China, Hong Kong, Vietnam, Thailand, Malaysia and many other countries before returning to India again in 1937.", "As the first Indian globe trotter he has jotted down all of his amazing and awesome experiences of globe trotting in this book."], "mentions": [{"id": 0, "start": 71, "end": 77, "ref_url": "Bengal", "ref_ids": ["4862"], "sent_idx": 0}, {"id": 1, "start": 13, "end": 19, "ref_url": "Indian_people", "ref_ids": ["7564733"], "sent_idx": 5}, {"id": 2, "start": 8, "end": 23, "ref_url": "Bimal_Mukherjee", "ref_ids": ["36945432"], "sent_idx": 1}, {"id": 3, "start": 20, "end": 24, "ref_url": "Arab", "ref_ids": null, "sent_idx": 4}, {"id": 4, "start": 26, "end": 30, "ref_url": "Iran", "ref_ids": ["14653"], "sent_idx": 4}, {"id": 5, "start": 32, "end": 38, "ref_url": "Turkey", "ref_ids": ["11125639"], "sent_idx": 4}, {"id": 6, "start": 40, "end": 45, "ref_url": "Syria", "ref_ids": ["7515849"], "sent_idx": 4}, {"id": 7, "start": 47, "end": 54, "ref_url": "United_Kingdom", "ref_ids": ["31717"], "sent_idx": 4}, {"id": 8, "start": 56, "end": 63, "ref_url": "Iceland", "ref_ids": ["14531"], "sent_idx": 4}, {"id": 9, "start": 81, "end": 88, "ref_url": "Finland", "ref_ids": ["10577"], "sent_idx": 4}, {"id": 10, "start": 98, "end": 104, "ref_url": "Greece", "ref_ids": ["12108"], "sent_idx": 4}, {"id": 11, "start": 106, "end": 111, "ref_url": "Egypt", "ref_ids": ["8087628"], "sent_idx": 4}, {"id": 12, "start": 113, "end": 118, "ref_url": "Sudan", "ref_ids": ["27421"], "sent_idx": 4}, {"id": 13, "start": 181, "end": 189, "ref_url": "Colombia", "ref_ids": ["5222"], "sent_idx": 4}, {"id": 14, "start": 191, "end": 198, "ref_url": "Ecuador", "ref_ids": ["9334"], "sent_idx": 4}, {"id": 15, "start": 200, "end": 204, "ref_url": "Peru", "ref_ids": ["170691"], "sent_idx": 4}, {"id": 16, "start": 206, "end": 212, "ref_url": "Hawaii", "ref_ids": ["13270"], "sent_idx": 4}, {"id": 17, "start": 228, "end": 237, "ref_url": "Hong_Kong", "ref_ids": ["13404"], "sent_idx": 4}, {"id": 18, "start": 239, "end": 246, "ref_url": "Vietnam", "ref_ids": ["202354"], "sent_idx": 4}, {"id": 19, "start": 248, "end": 256, "ref_url": "Thailand", "ref_ids": ["30128"], "sent_idx": 4}, {"id": 20, "start": 258, "end": 266, "ref_url": "Malaysia", "ref_ids": ["3607937"], "sent_idx": 4}]} +{"id": "17891225", "title": "Colombo Central Bus Station bombing", "sentences": ["The Colombo Central Bus Station bombing was the car bombing of the central bus terminal of Colombo carried out on April 21, 1987 in Pettah, Colombo, Sri Lanka.", "The bomb killed 113 people and left a crater in the ground."], "mentions": [{"id": 0, "start": 4, "end": 11, "ref_url": "Colombo", "ref_ids": ["56636"], "sent_idx": 0}, {"id": 1, "start": 132, "end": 147, "ref_url": "Pettah,_Colombo", "ref_ids": null, "sent_idx": 0}, {"id": 2, "start": 149, "end": 158, "ref_url": "Sri_Lanka", "ref_ids": ["26750"], "sent_idx": 0}]} +{"id": "17891229", "title": "Pop art (disambiguation)", "sentences": ["Pop art is a visual art movement that emerged in the 1950s in Britain and the United States.", "Pop art may also refer to:"], "mentions": [{"id": 0, "start": 0, "end": 7, "ref_url": "Pop_art", "ref_ids": ["103895"], "sent_idx": 1}]} +{"id": "17891232", "title": "George Groesback", "sentences": ["George G. Groesback is a Democratic Party member of the Montana House of Representatives, representing District 74 since 2004."], "mentions": [{"id": 0, "start": 25, "end": 41, "ref_url": "Democratic_Party_(United_States)", "ref_ids": ["5043544"], "sent_idx": 0}, {"id": 1, "start": 56, "end": 88, "ref_url": "Montana_House_of_Representatives", "ref_ids": ["1688653"], "sent_idx": 0}]} +{"id": "17891239", "title": "New Chitose Airport Station", "sentences": [], "mentions": []} +{"id": "17891242", "title": "Jackpot247", "sentences": ["Jackpot247 (formerly Challenge Jackpot) is an interactive gambling website owned by Betsson Group, which previously had television segments on ITV, except ITV Channel Television, and Vox Africa.", "The \"Challenge Jackpot\" brand was dropped in September 2011 and replaced by \"Jackpot247\" (Jackpot247.com).", "Challenge Jackpot was also a British interactive gaming channel owned by Living TV Group (later British Sky Broadcasting) and operated by NetPlay TV.", "In March 2017, Betsson Group acquired Jackpot247 having purchased Netplay TV for £26 million."], "mentions": [{"id": 0, "start": 58, "end": 66, "ref_url": "Gambling", "ref_ids": ["11921"], "sent_idx": 0}, {"id": 1, "start": 15, "end": 22, "ref_url": "Betsson", "ref_ids": ["8852318"], "sent_idx": 3}, {"id": 2, "start": 143, "end": 146, "ref_url": "ITV_(TV_channel)", "ref_ids": ["212177"], "sent_idx": 0}, {"id": 3, "start": 155, "end": 177, "ref_url": "ITV_Channel_Television", "ref_ids": ["180521"], "sent_idx": 0}, {"id": 4, "start": 29, "end": 36, "ref_url": "United_Kingdom", "ref_ids": ["31717"], "sent_idx": 2}, {"id": 5, "start": 15, "end": 22, "ref_url": "Betsson", "ref_ids": ["8852318"], "sent_idx": 3}]} +{"id": "17891252", "title": "John VI, Count of Harcourt", "sentences": ["John VI of Harcourt( or John of Vaudémont)", "( 1 December 1342 – 28 February 1389) was a count of Harcourt.", "He was son of John V of Harcourt and Blanche of Ponthieu who was the sister of Jeanne of Ponthieu.", "He succeeded to his father's counties of Aumale and Harcourt and barony of Elbeuf on his execution in 1356 In 1359 John married Catherine( 1342 – 1427), daughter of Peter I, Duke of Bourbon.", "They had the following children:"], "mentions": [{"id": 0, "start": 44, "end": 61, "ref_url": "Count_of_Harcourt", "ref_ids": null, "sent_idx": 1}, {"id": 1, "start": 14, "end": 32, "ref_url": "John_V_of_Harcourt", "ref_ids": null, "sent_idx": 2}, {"id": 2, "start": 37, "end": 56, "ref_url": "Blanche_of_Ponthieu", "ref_ids": null, "sent_idx": 2}, {"id": 3, "start": 79, "end": 97, "ref_url": "Jeanne_of_Ponthieu", "ref_ids": null, "sent_idx": 2}, {"id": 4, "start": 41, "end": 47, "ref_url": "Aumale", "ref_ids": ["1119478"], "sent_idx": 3}, {"id": 5, "start": 52, "end": 60, "ref_url": "Harcourt,_Eure", "ref_ids": ["1166715"], "sent_idx": 3}, {"id": 6, "start": 75, "end": 81, "ref_url": "Elbeuf", "ref_ids": ["2369974"], "sent_idx": 3}, {"id": 7, "start": 89, "end": 98, "ref_url": "Execution", "ref_ids": null, "sent_idx": 3}, {"id": 8, "start": 165, "end": 189, "ref_url": "Peter_I,_Duke_of_Bourbon", "ref_ids": ["703394"], "sent_idx": 3}]} +{"id": "17891259", "title": "Female Agents", "sentences": ["Female Agents is a 2008 French historical drama film directed by Jean-Paul Salomé and starring Sophie Marceau, Julie Depardieu, Marie Gillain, Déborah François, and Moritz Bleibtreu.", "Written by Salomé and Laurent Vachaud, the film is about female resistance fighters in the Second World War.", "Jean-Paul Salomé, the director, drew inspiration from an obituary in \"The Times\" newspaper of Lise de Baissac (Lise Villameur), from Mauritius (then a British colony), one of the heroines of the SOE, named \"Louise Desfontaines\" in the film and played by Sophie Marceau.", "The film was partly funded by BBC Films."], "mentions": [{"id": 0, "start": 32, "end": 48, "ref_url": "Historical_fiction", "ref_ids": ["42142"], "sent_idx": 0}, {"id": 1, "start": 66, "end": 82, "ref_url": "Jean-Paul_Salomé", "ref_ids": ["10958891"], "sent_idx": 0}, {"id": 2, "start": 96, "end": 110, "ref_url": "Sophie_Marceau", "ref_ids": ["42798"], "sent_idx": 0}, {"id": 3, "start": 112, "end": 127, "ref_url": "Julie_Depardieu", "ref_ids": ["6058523"], "sent_idx": 0}, {"id": 4, "start": 129, "end": 142, "ref_url": "Marie_Gillain", "ref_ids": ["1882025"], "sent_idx": 0}, {"id": 5, "start": 144, "end": 160, "ref_url": "Déborah_François", "ref_ids": ["10180682"], "sent_idx": 0}, {"id": 6, "start": 166, "end": 182, "ref_url": "Moritz_Bleibtreu", "ref_ids": ["1098341"], "sent_idx": 0}, {"id": 7, "start": 64, "end": 83, "ref_url": "Resistance_during_World_War_II", "ref_ids": ["2544872"], "sent_idx": 1}, {"id": 8, "start": 91, "end": 107, "ref_url": "Second_World_War", "ref_ids": null, "sent_idx": 1}, {"id": 9, "start": 57, "end": 65, "ref_url": "Obituary", "ref_ids": ["379881"], "sent_idx": 2}, {"id": 10, "start": 70, "end": 79, "ref_url": "The_Times", "ref_ids": ["39127"], "sent_idx": 2}, {"id": 11, "start": 94, "end": 109, "ref_url": "Lise_de_Baissac", "ref_ids": ["15615225"], "sent_idx": 2}, {"id": 12, "start": 133, "end": 142, "ref_url": "Mauritius", "ref_ids": ["19201"], "sent_idx": 2}, {"id": 13, "start": 195, "end": 198, "ref_url": "Special_Operations_Executive", "ref_ids": ["28898"], "sent_idx": 2}, {"id": 14, "start": 30, "end": 39, "ref_url": "BBC_Films", "ref_ids": ["9307584"], "sent_idx": 3}]} +{"id": "17891299", "title": "Mushtaq Khan (economist)", "sentences": ["Mushtaq Husain Khan is a heterodox economist and professor of economics at the School of Oriental and African Studies (SOAS), University of London.", "His work focusses on the economics of poor countries; it includes notable contributions to the field of institutional economics and South Asian development.", "Khan also developed the concept of political settlement, which is a political economy framework that highlights how the distribution of organizational and political power among different classes or groups influences policies and institutions in different countries."], "mentions": [{"id": 0, "start": 25, "end": 44, "ref_url": "Heterodox_economics", "ref_ids": ["2399697"], "sent_idx": 0}, {"id": 1, "start": 79, "end": 117, "ref_url": "School_of_Oriental_and_African_Studies", "ref_ids": null, "sent_idx": 0}, {"id": 2, "start": 126, "end": 146, "ref_url": "University_of_London", "ref_ids": ["60919"], "sent_idx": 0}, {"id": 3, "start": 25, "end": 52, "ref_url": "Development_economics", "ref_ids": ["210183"], "sent_idx": 1}, {"id": 4, "start": 104, "end": 127, "ref_url": "Institutional_economics", "ref_ids": ["859910"], "sent_idx": 1}]} +{"id": "17891305", "title": "World Horti-Expo Garden", "sentences": ["The World Horti-Expo Garden is a botanical garden center in Kunming, China.", "It played host to the October 1999 Kunming International Horticulture Exposition.", "As an international botanical garden, the Expo Garden has become a significant Kunming landmark.", "The World Horti-Expo Garden covers an area of 218 hectares and consists of 5 indoor exhibition halls (China Hall, the Man and Nature Hall, the Green House, the Science and Technology Hall, and the International Hall), 6 theme gardens (the Tree Garden, the Tea Garden, the Bonsai Garden, the Medicinal Herb Garden, the Bamboo Garden, and the Vegetable and Fruit Garden) and 34 outdoor gardens of domestic participants, 34 outdoor gardens for foreign countries and international organizational and 9 outdoor exhibition area for the enterprise participants.", "The World-expo Garden was designed to be in \"perfect harmony\" with the surrounding mountains, waters and forest, showing that it originates from and return to the nature.", "It has hosted a large collection of horticultural and gardening works from different Chinese provinces and municipalities, and also from all over the world."], "mentions": [{"id": 0, "start": 20, "end": 36, "ref_url": "Botanical_garden", "ref_ids": ["69427"], "sent_idx": 2}, {"id": 1, "start": 79, "end": 86, "ref_url": "Kunming", "ref_ids": ["197181"], "sent_idx": 2}, {"id": 2, "start": 102, "end": 107, "ref_url": "China", "ref_ids": ["5405"], "sent_idx": 3}, {"id": 3, "start": 30, "end": 80, "ref_url": "1999_Kunming_International_Horticulture_Exposition", "ref_ids": null, "sent_idx": 1}]} +{"id": "17891314", "title": "Crash (1984 TV series)", "sentences": ["Crash - Truslen fra det sorte hul (\"Crash - The Menace from the Black Hole\") is a 1984 Danish children's science fiction TV-series which was written, directed and produced by Carsten Overskov and starred Lars Ranthe."], "mentions": [{"id": 0, "start": 87, "end": 93, "ref_url": "Denmark", "ref_ids": ["76972"], "sent_idx": 0}, {"id": 1, "start": 105, "end": 120, "ref_url": "Science_fiction", "ref_ids": ["26787"], "sent_idx": 0}, {"id": 2, "start": 175, "end": 191, "ref_url": "Carsten_Overskov", "ref_ids": ["27902958"], "sent_idx": 0}, {"id": 3, "start": 204, "end": 215, "ref_url": "Lars_Ranthe", "ref_ids": ["27902730"], "sent_idx": 0}]} +{"id": "17891320", "title": "Raúl Rodríguez (boxer)", "sentences": ["Raúl H. Rodríguez (born November 26, 1915) is an Argentine boxer who competed in the 1936 Summer Olympics.", "He was born in Córdoba.", "In 1936, Rodríguez was eliminated in the quarterfinals of the welterweight class after losing his fight to the upcoming bronze medalist Gerhard Pedersen."], "mentions": [{"id": 0, "start": 49, "end": 58, "ref_url": "Argentina", "ref_ids": ["18951905"], "sent_idx": 0}, {"id": 1, "start": 59, "end": 64, "ref_url": "Boxing", "ref_ids": ["4243"], "sent_idx": 0}, {"id": 2, "start": 85, "end": 105, "ref_url": "1936_Summer_Olympics", "ref_ids": ["39721"], "sent_idx": 0}, {"id": 3, "start": 15, "end": 22, "ref_url": "Córdoba,_Argentina", "ref_ids": ["241269"], "sent_idx": 1}, {"id": 4, "start": 62, "end": 80, "ref_url": "Boxing_at_the_1936_Summer_Olympics_-_Men's_welterweight", "ref_ids": null, "sent_idx": 2}, {"id": 5, "start": 136, "end": 152, "ref_url": "Gerhard_Pedersen", "ref_ids": ["13845496"], "sent_idx": 2}]} +{"id": "17891332", "title": "Patkhor Peak", "sentences": ["Patkhor Peak (also Patkhur, ) is a mountain in Tajikistan's Gorno-Badakhshan Autonomous Province.", "At it is the highest point in the Rushan Range, a subrange of the Pamir Mountains."], "mentions": [{"id": 0, "start": 47, "end": 59, "ref_url": "Tajikistan", "ref_ids": ["30108"], "sent_idx": 0}, {"id": 1, "start": 60, "end": 96, "ref_url": "Gorno-Badakhshan_Autonomous_Province", "ref_ids": null, "sent_idx": 0}, {"id": 2, "start": 34, "end": 46, "ref_url": "Rushan_Range", "ref_ids": ["17890356"], "sent_idx": 1}, {"id": 3, "start": 66, "end": 81, "ref_url": "Pamir_Mountains", "ref_ids": ["498462"], "sent_idx": 1}]} +{"id": "17891336", "title": "Maurice Geldhof", "sentences": ["Maurice Geldhof (22 October 1905 in Moorslede – 26 April 1970 in Wevelgem) was a Belgian professional road bicycle racer.", "In the 1927 Tour de France, he won the 19th stage."], "mentions": [{"id": 0, "start": 36, "end": 45, "ref_url": "Moorslede", "ref_ids": ["154380"], "sent_idx": 0}, {"id": 1, "start": 65, "end": 73, "ref_url": "Wevelgem", "ref_ids": ["154344"], "sent_idx": 0}, {"id": 2, "start": 81, "end": 88, "ref_url": "Belgium", "ref_ids": ["3343"], "sent_idx": 0}, {"id": 3, "start": 102, "end": 120, "ref_url": "Road_bicycle_racer", "ref_ids": null, "sent_idx": 0}, {"id": 4, "start": 7, "end": 26, "ref_url": "1927_Tour_de_France", "ref_ids": ["6516497"], "sent_idx": 1}]} +{"id": "17891337", "title": "The Red Necklace", "sentences": ["The Red Necklace is a young adult historical novel by Sally Gardner, published by Orion in 2007.", "It is a story of the French Revolution, interwoven with gypsy magic.", "The audiobook is narrated by Tom Hiddleston.", "\"The Silver Blade\" (Orion, 2008) is a sequel set during the Reign of Terror ().", "US editions were published by Dial Press in 2008 and 2009."], "mentions": [{"id": 0, "start": 34, "end": 50, "ref_url": "Historical_novel", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 54, "end": 67, "ref_url": "Sally_Gardner", "ref_ids": ["6606821"], "sent_idx": 0}, {"id": 2, "start": 21, "end": 38, "ref_url": "French_Revolution", "ref_ids": ["11188"], "sent_idx": 1}, {"id": 3, "start": 29, "end": 43, "ref_url": "Tom_Hiddleston", "ref_ids": ["15003874"], "sent_idx": 2}, {"id": 4, "start": 60, "end": 75, "ref_url": "Reign_of_Terror", "ref_ids": ["25975"], "sent_idx": 3}, {"id": 5, "start": 30, "end": 40, "ref_url": "Dial_Press", "ref_ids": ["3332043"], "sent_idx": 4}]} +{"id": "17891338", "title": "Methylnaphthalene", "sentences": ["Methylnaphthalene may refer to:"], "mentions": []} +{"id": "17891339", "title": "Irene Daye", "sentences": ["Irene Daye (January 17, 1918 in Lawrence, Massachusetts – November 1, 1971 in Greenville, South Carolina) was an American jazz singer.", "Daye began her career at age 17 by singing in Jan Murphy's big band while still in high school in 1935, continuing with Murphy through 1937.", "She then worked briefly with Mal Hallett before beginning work with Gene Krupa, with whose orchestra she sang from 1938 to 1941.", "Krupa and Daye recorded 63 titles together, with her biggest hit being \"Drum Boogie\", which was recorded in the last session she did with Krupa.", "After leaving Krupa, Daye retired from music at the age of 23, marrying Corky Cornelius.", "Anita O'Day took her spot in Krupa's orchestra.", "Daye had a daughter in 1943, but Cornelius (then in the Casa Loma Orchestra) died suddenly later that year, after which she returned to her singing career."], "mentions": [{"id": 0, "start": 32, "end": 55, "ref_url": "Lawrence,_Massachusetts", "ref_ids": ["116748"], "sent_idx": 0}, {"id": 1, "start": 78, "end": 104, "ref_url": "Greenville,_South_Carolina", "ref_ids": ["134497"], "sent_idx": 0}, {"id": 2, "start": 113, "end": 121, "ref_url": "United_States", "ref_ids": ["3434750"], "sent_idx": 0}, {"id": 3, "start": 122, "end": 126, "ref_url": "Jazz", "ref_ids": ["15613"], "sent_idx": 0}, {"id": 4, "start": 46, "end": 56, "ref_url": "Jan_Murphy_(musician)", "ref_ids": null, "sent_idx": 1}, {"id": 5, "start": 59, "end": 67, "ref_url": "Big_band", "ref_ids": ["63276"], "sent_idx": 1}, {"id": 6, "start": 29, "end": 40, "ref_url": "Mal_Hallett", "ref_ids": ["17891662"], "sent_idx": 2}, {"id": 7, "start": 68, "end": 78, "ref_url": "Gene_Krupa", "ref_ids": ["196591"], "sent_idx": 2}, {"id": 8, "start": 72, "end": 87, "ref_url": "Corky_Cornelius", "ref_ids": ["17891441"], "sent_idx": 4}, {"id": 9, "start": 0, "end": 11, "ref_url": "Anita_O'Day", "ref_ids": ["56982"], "sent_idx": 5}, {"id": 10, "start": 56, "end": 75, "ref_url": "Casa_Loma_Orchestra", "ref_ids": ["904297"], "sent_idx": 6}]} +{"id": "17891342", "title": "Copper Mountain, British Columbia", "sentences": ["Copper Mountain was an important copper-mining company town in the Similkameen Country of the Southern Interior of British Columbia, Canada, just south of the town of Princeton.", "In 1884 copper ore was discovered by a trapper named James Jameson while out hunting deer.", "This discovery of copper led to a rush of miners to the area which gave rise to the town of Copper Mountain.", "The first camps located in the area was \"Volacanic\" Brown's Camp and E. Voight's Camp.", "These two camps merged to create the Granby Company's Copper Mountain operation.", "Copper Mountain mining operation lasted over half a century.", "The Copper Mountain mining operation was officially closed in 1958.", "The town of Copper Mountain was abandoned shortly after."], "mentions": [{"id": 0, "start": 47, "end": 59, "ref_url": "Company_town", "ref_ids": ["870767"], "sent_idx": 0}, {"id": 1, "start": 67, "end": 86, "ref_url": "Similkameen_Country", "ref_ids": ["10761429"], "sent_idx": 0}, {"id": 2, "start": 94, "end": 131, "ref_url": "British_Columbia_Interior", "ref_ids": ["8882484"], "sent_idx": 0}, {"id": 3, "start": 133, "end": 139, "ref_url": "Canada", "ref_ids": ["5042916"], "sent_idx": 0}, {"id": 4, "start": 167, "end": 176, "ref_url": "Princeton,_British_Columbia", "ref_ids": ["1829758"], "sent_idx": 0}]} +{"id": "17891386", "title": "Superwoman (disambiguation)", "sentences": ["Superwoman is a fictional character.", "Superwoman may also refer to:"], "mentions": [{"id": 0, "start": 0, "end": 10, "ref_url": "Superwoman", "ref_ids": ["1266342"], "sent_idx": 1}]} +{"id": "17891424", "title": "Julius Indongo", "sentences": ["Julius Munyelele Indongo (born 12 February 1983) is a Namibian professional boxer.", "He is a former unified light-welterweight world champion, having held the WBA (Unified), IBF, and IBO titles between 2016 and 2017.", "As an amateur, Indongo represented Namibia at the 2008 Olympics, reaching the first round of the lightweight bracket."], "mentions": [{"id": 0, "start": 35, "end": 42, "ref_url": "Namibia", "ref_ids": ["21292"], "sent_idx": 2}, {"id": 1, "start": 63, "end": 81, "ref_url": "Professional_boxer", "ref_ids": null, "sent_idx": 0}, {"id": 2, "start": 23, "end": 41, "ref_url": "Light-welterweight", "ref_ids": null, "sent_idx": 1}, {"id": 3, "start": 74, "end": 87, "ref_url": "WBA_(Unified)", "ref_ids": null, "sent_idx": 1}, {"id": 4, "start": 89, "end": 92, "ref_url": "International_Boxing_Federation", "ref_ids": ["241038"], "sent_idx": 1}, {"id": 5, "start": 98, "end": 101, "ref_url": "International_Boxing_Organization", "ref_ids": ["322610"], "sent_idx": 1}, {"id": 6, "start": 6, "end": 13, "ref_url": "Amateur_boxer", "ref_ids": null, "sent_idx": 2}, {"id": 7, "start": 50, "end": 63, "ref_url": "2008_Olympics", "ref_ids": null, "sent_idx": 2}, {"id": 8, "start": 97, "end": 108, "ref_url": "Lightweight", "ref_ids": ["712337"], "sent_idx": 2}]} +{"id": "17891441", "title": "Corky Cornelius", "sentences": ["Edward \"Corky\" Cornelius (December 3, 1914 – August 3, 1943) was an American jazz trumpeter.", "Cornelius's father was a drummer who worked regionally in dance bands in Texas.", "He was born in Indiana and raised in Binghamton, New York, and began his career in the early 1930s, played with Les Brown, Frank Dailey, and Buddy Rogers.", "He joined Benny Goodman's band early in 1939, and went with Gene Krupa when the drummer split off to form his own group.", "While there, Cornelius met singer Irene Daye, whom he married soon after.", "He played with the Casa Loma Orchestra from 1941 until 1943, when he died suddenly of kidney failure."], "mentions": [{"id": 0, "start": 77, "end": 81, "ref_url": "Jazz", "ref_ids": ["15613"], "sent_idx": 0}, {"id": 1, "start": 73, "end": 78, "ref_url": "Texas", "ref_ids": ["29810"], "sent_idx": 1}, {"id": 2, "start": 15, "end": 22, "ref_url": "Indiana", "ref_ids": ["21883857"], "sent_idx": 2}, {"id": 3, "start": 37, "end": 57, "ref_url": "Binghamton,_New_York", "ref_ids": ["59403"], "sent_idx": 2}, {"id": 4, "start": 112, "end": 121, "ref_url": "Les_Brown_(bandleader)", "ref_ids": ["142686"], "sent_idx": 2}, {"id": 5, "start": 123, "end": 135, "ref_url": "Frank_Dailey", "ref_ids": null, "sent_idx": 2}, {"id": 6, "start": 141, "end": 153, "ref_url": "Charles_Rogers_(actor)", "ref_ids": null, "sent_idx": 2}, {"id": 7, "start": 10, "end": 23, "ref_url": "Benny_Goodman", "ref_ids": ["53855"], "sent_idx": 3}, {"id": 8, "start": 60, "end": 70, "ref_url": "Gene_Krupa", "ref_ids": ["196591"], "sent_idx": 3}, {"id": 9, "start": 34, "end": 44, "ref_url": "Irene_Daye", "ref_ids": ["17891339"], "sent_idx": 4}, {"id": 10, "start": 19, "end": 38, "ref_url": "Casa_Loma_Orchestra", "ref_ids": ["904297"], "sent_idx": 5}, {"id": 11, "start": 86, "end": 100, "ref_url": "Kidney_failure", "ref_ids": ["284027"], "sent_idx": 5}]} +{"id": "17891467", "title": "Abertafol railway station", "sentences": ["Abertafol railway station was a halt located on the north shore of the Dyfi estuary in the old Welsh county of Merionethshire (south Gwynedd)."], "mentions": [{"id": 0, "start": 71, "end": 83, "ref_url": "River_Dyfi", "ref_ids": ["3840407"], "sent_idx": 0}, {"id": 1, "start": 95, "end": 100, "ref_url": "Wales", "ref_ids": ["69894"], "sent_idx": 0}, {"id": 2, "start": 111, "end": 125, "ref_url": "Merionethshire", "ref_ids": ["341123"], "sent_idx": 0}, {"id": 3, "start": 133, "end": 140, "ref_url": "Gwynedd", "ref_ids": ["52508"], "sent_idx": 0}]} +{"id": "17891478", "title": "Shahid Abbaspour Dam", "sentences": ["The Shahid Abbaspour Dam (Persian: سد شهید عباسپور), formerly known as Great Reza Shah Dam (Persian: سد رضاشاه کبیر) before 1979 Revolution, is a large arch dam providing hydroelectricity from the Karun River; it is located about northeast of Masjed Soleiman, in the province of Khuzestan, Iran, and originally completed in 1976, and formerly named \"Reza Shah Kabir Dam\".", "The dam was the first of a series of dams planned for development on the Karun River.", "The dam is a double-curvature concrete arch dam, high from the foundation rock.", "Its crest width is .", "The arch dam design was chosen for the narrow, rocky gorge where it is located.", "The double-curvature arch design withstands the pressure created by the reservoir with a minimum of concrete, because the shape transmits the force of the reservoir downward and laterally, against the rock foundation; this has the effect of strengthening the dam concrete and its foundation by keeping it in compression.", "The dam site houses two power stations, one built in 1976 and another built in 1995.", "Each contains four water turbines connected to electric generators of , for a combined generating capacity of .", "The dam's electrical output is connected to the national electrical grid, with most generation occurring during periods of peak demand for electricity."], "mentions": [{"id": 0, "start": 26, "end": 33, "ref_url": "Persian_language", "ref_ids": ["11600"], "sent_idx": 0}, {"id": 1, "start": 77, "end": 86, "ref_url": "Reza_Shah_Pahlavi", "ref_ids": null, "sent_idx": 0}, {"id": 2, "start": 26, "end": 33, "ref_url": "Persian_language", "ref_ids": ["11600"], "sent_idx": 0}, {"id": 3, "start": 124, "end": 139, "ref_url": "1979_Revolution", "ref_ids": null, "sent_idx": 0}, {"id": 4, "start": 4, "end": 12, "ref_url": "Arch_dam", "ref_ids": ["866601"], "sent_idx": 4}, {"id": 5, "start": 171, "end": 187, "ref_url": "Hydroelectricity", "ref_ids": ["381399"], "sent_idx": 0}, {"id": 6, "start": 73, "end": 84, "ref_url": "Karun", "ref_ids": ["2242523"], "sent_idx": 1}, {"id": 7, "start": 243, "end": 258, "ref_url": "Masjed_Soleiman", "ref_ids": null, "sent_idx": 0}, {"id": 8, "start": 279, "end": 288, "ref_url": "Khuzestan", "ref_ids": null, "sent_idx": 0}, {"id": 9, "start": 290, "end": 294, "ref_url": "Iran", "ref_ids": ["14653"], "sent_idx": 0}, {"id": 10, "start": 4, "end": 12, "ref_url": "Arch_dam", "ref_ids": ["866601"], "sent_idx": 4}, {"id": 11, "start": 72, "end": 81, "ref_url": "Reservoir", "ref_ids": ["3292675"], "sent_idx": 5}, {"id": 12, "start": 24, "end": 38, "ref_url": "Power_stations", "ref_ids": null, "sent_idx": 6}, {"id": 13, "start": 19, "end": 32, "ref_url": "Water_turbine", "ref_ids": ["34053"], "sent_idx": 7}, {"id": 14, "start": 47, "end": 65, "ref_url": "Electric_generator", "ref_ids": ["82330"], "sent_idx": 7}, {"id": 15, "start": 57, "end": 72, "ref_url": "Electrical_grid", "ref_ids": ["20344155"], "sent_idx": 8}, {"id": 16, "start": 123, "end": 134, "ref_url": "Peak_demand", "ref_ids": ["11738757"], "sent_idx": 8}]} +{"id": "17891483", "title": "Rudolf Andreassen", "sentences": ["Rudolf Andreassen (June 22, 1909 – December 26, 1983) was a Norwegian boxer who competed in the 1936 Summer Olympics.", "In 1936 he was eliminated in the second round of the welterweight class after losing his fight to the upcoming bronze medalist Gerhard Pedersen."], "mentions": [{"id": 0, "start": 60, "end": 69, "ref_url": "Norway", "ref_ids": ["21241"], "sent_idx": 0}, {"id": 1, "start": 70, "end": 75, "ref_url": "Boxing", "ref_ids": ["4243"], "sent_idx": 0}, {"id": 2, "start": 96, "end": 116, "ref_url": "1936_Summer_Olympics", "ref_ids": ["39721"], "sent_idx": 0}, {"id": 3, "start": 53, "end": 71, "ref_url": "Boxing_at_the_1936_Summer_Olympics_-_Men's_welterweight", "ref_ids": null, "sent_idx": 1}, {"id": 4, "start": 127, "end": 143, "ref_url": "Gerhard_Pedersen", "ref_ids": ["13845496"], "sent_idx": 1}]} +{"id": "17891484", "title": "Lawrence Township School District", "sentences": ["The Lawrence Township School District is a community public school district that serves students in pre-kindergarten through eighth grade from Lawrence Township, in Cumberland County, New Jersey, United States.", "As of the 2014-15 school year, the district and its one school had an enrollment of 618 students and 43.0 classroom teachers (on an FTE basis), for a student–teacher ratio of 14.4:1.", "The district is classified by the New Jersey Department of Education as being in District Factor Group \"CD\", the sixth-highest of eight groupings.", "District Factor Groups organize districts statewide to allow comparison by common socioeconomic characteristics of the local districts.", "From lowest socioeconomic status to highest, the categories are A, B, CD, DE, FG, GH, I and J.\nFor ninth through twelfth grades, public school students in Lawrence Township are assigned to one of two school districts based on the location of their residence.", "Students attend high school either in Bridgeton or Millville, based on sending/receiving relationships with the respective school districts, the Bridgeton Public Schools and the Millville Public Schools.", "Students sent to Bridgeton attend Bridgeton High School.", "Students sent to Millville join students from Commercial Township, Maurice River Township and Woodbine and attend Memorial High School for ninth grade and half of the tenth grade and Millville Senior High School for half of the tenth grade through the twelfth grade."], "mentions": [{"id": 0, "start": 129, "end": 135, "ref_url": "State_school", "ref_ids": ["471603"], "sent_idx": 4}, {"id": 1, "start": 123, "end": 138, "ref_url": "School_district", "ref_ids": ["339716"], "sent_idx": 5}, {"id": 2, "start": 100, "end": 116, "ref_url": "Pre-kindergarten", "ref_ids": ["2220265"], "sent_idx": 0}, {"id": 3, "start": 125, "end": 137, "ref_url": "Eighth_grade", "ref_ids": ["1065988"], "sent_idx": 0}, {"id": 4, "start": 155, "end": 172, "ref_url": "Lawrence_Township,_Cumberland_County,_New_Jersey", "ref_ids": ["125144"], "sent_idx": 4}, {"id": 5, "start": 165, "end": 182, "ref_url": "Cumberland_County,_New_Jersey", "ref_ids": ["93447"], "sent_idx": 0}, {"id": 6, "start": 34, "end": 44, "ref_url": "New_Jersey", "ref_ids": ["21648"], "sent_idx": 2}, {"id": 7, "start": 132, "end": 135, "ref_url": "Full-time_equivalent", "ref_ids": ["353891"], "sent_idx": 1}, {"id": 8, "start": 150, "end": 171, "ref_url": "Student–teacher_ratio", "ref_ids": ["5426552"], "sent_idx": 1}, {"id": 9, "start": 34, "end": 68, "ref_url": "New_Jersey_Department_of_Education", "ref_ids": ["3350408"], "sent_idx": 2}, {"id": 10, "start": 0, "end": 21, "ref_url": "District_Factor_Group", "ref_ids": ["3064445"], "sent_idx": 3}, {"id": 11, "start": 12, "end": 25, "ref_url": "Socioeconomic", "ref_ids": null, "sent_idx": 4}, {"id": 12, "start": 139, "end": 144, "ref_url": "Ninth_grade", "ref_ids": ["1066024"], "sent_idx": 7}, {"id": 13, "start": 252, "end": 265, "ref_url": "Twelfth_grade", "ref_ids": ["1066040"], "sent_idx": 7}, {"id": 14, "start": 17, "end": 26, "ref_url": "Bridgeton,_New_Jersey", "ref_ids": ["125133"], "sent_idx": 6}, {"id": 15, "start": 17, "end": 26, "ref_url": "Millville,_New_Jersey", "ref_ids": ["125146"], "sent_idx": 7}, {"id": 16, "start": 71, "end": 101, "ref_url": "Sending/receiving_relationship", "ref_ids": ["6042521"], "sent_idx": 5}, {"id": 17, "start": 145, "end": 169, "ref_url": "Bridgeton_Public_Schools", "ref_ids": ["5514659"], "sent_idx": 5}, {"id": 18, "start": 178, "end": 202, "ref_url": "Millville_Public_Schools", "ref_ids": ["5528517"], "sent_idx": 5}, {"id": 19, "start": 34, "end": 55, "ref_url": "Bridgeton_High_School", "ref_ids": ["9932101"], "sent_idx": 6}, {"id": 20, "start": 46, "end": 65, "ref_url": "Commercial_Township,_New_Jersey", "ref_ids": ["125135"], "sent_idx": 7}, {"id": 21, "start": 67, "end": 89, "ref_url": "Maurice_River_Township,_New_Jersey", "ref_ids": ["125145"], "sent_idx": 7}, {"id": 22, "start": 94, "end": 102, "ref_url": "Woodbine,_New_Jersey", "ref_ids": ["125131"], "sent_idx": 7}, {"id": 23, "start": 114, "end": 134, "ref_url": "Memorial_High_School_(Millville,_New_Jersey)", "ref_ids": ["6098307"], "sent_idx": 7}, {"id": 24, "start": 183, "end": 211, "ref_url": "Millville_Senior_High_School", "ref_ids": ["8089801"], "sent_idx": 7}]} +{"id": "17891485", "title": "Pseudograpsus setosus", "sentences": ["Pseudograpsus setosus () is a species of edible crab endemic to the coasts of Chile, Ecuador and Peru; it is a benthic predator that lives in the subtidal and intertidal zones in temperate waters from sea levels down to depths of .", "Its diet consists of clams, picorocos and other crabs.", "Its geographic distribution ranges from the equator in Ecuador to the Taitao Peninsula at 47° S."], "mentions": [{"id": 0, "start": 48, "end": 52, "ref_url": "Crab", "ref_ids": ["93084"], "sent_idx": 1}, {"id": 1, "start": 53, "end": 60, "ref_url": "Endemism", "ref_ids": ["937971"], "sent_idx": 0}, {"id": 2, "start": 78, "end": 83, "ref_url": "Chile", "ref_ids": ["5489"], "sent_idx": 0}, {"id": 3, "start": 55, "end": 62, "ref_url": "Ecuador", "ref_ids": ["9334"], "sent_idx": 2}, {"id": 4, "start": 97, "end": 101, "ref_url": "Peru", "ref_ids": ["170691"], "sent_idx": 0}, {"id": 5, "start": 111, "end": 118, "ref_url": "Benthic", "ref_ids": null, "sent_idx": 0}, {"id": 6, "start": 119, "end": 127, "ref_url": "Predator", "ref_ids": null, "sent_idx": 0}, {"id": 7, "start": 146, "end": 154, "ref_url": "Subtidal", "ref_ids": null, "sent_idx": 0}, {"id": 8, "start": 159, "end": 169, "ref_url": "Intertidal", "ref_ids": null, "sent_idx": 0}, {"id": 9, "start": 21, "end": 25, "ref_url": "Clam", "ref_ids": ["55528"], "sent_idx": 1}, {"id": 10, "start": 28, "end": 36, "ref_url": "Picoroco", "ref_ids": null, "sent_idx": 1}, {"id": 11, "start": 44, "end": 51, "ref_url": "Equator", "ref_ids": ["20611356"], "sent_idx": 2}, {"id": 12, "start": 55, "end": 62, "ref_url": "Ecuador", "ref_ids": ["9334"], "sent_idx": 2}, {"id": 13, "start": 70, "end": 86, "ref_url": "Taitao_Peninsula", "ref_ids": ["7764329"], "sent_idx": 2}, {"id": 14, "start": 90, "end": 95, "ref_url": "47th_parallel_south", "ref_ids": ["20923549"], "sent_idx": 2}]} +{"id": "17891491", "title": "Raul Rodriguez", "sentences": ["Raul Rodriguez or Raúl Rodríguez may refer to:"], "mentions": []} +{"id": "17891505", "title": "Aerico", "sentences": ["An Aerico ( or ) is a disease demon from Greek folklore.", "It is often believed to normally dwell unseen in the air, though it sometimes takes the form of a human.", "As a disease demon, Aerico are believed to spread disease, such as the plague and malaria."], "mentions": [{"id": 0, "start": 13, "end": 18, "ref_url": "Demon", "ref_ids": ["8280"], "sent_idx": 2}, {"id": 1, "start": 41, "end": 46, "ref_url": "Greeks", "ref_ids": ["42056"], "sent_idx": 0}, {"id": 2, "start": 71, "end": 77, "ref_url": "Plague_(disease)", "ref_ids": ["4746"], "sent_idx": 2}, {"id": 3, "start": 82, "end": 89, "ref_url": "Malaria", "ref_ids": ["20423"], "sent_idx": 2}]} +{"id": "17891507", "title": "Xavier Haas", "sentences": ["Xavier Haas, (1907–1950) was a French painter and engraver.", "Though born in Paris of Alsacian descent, he is most associated with Breton nationalist art and design."], "mentions": [{"id": 0, "start": 15, "end": 20, "ref_url": "Paris", "ref_ids": ["22989"], "sent_idx": 1}, {"id": 1, "start": 69, "end": 87, "ref_url": "Breton_nationalist", "ref_ids": null, "sent_idx": 1}]} +{"id": "17891510", "title": "Wet Blanket Policy", "sentences": ["Wet Blanket Policy is the 32nd animated cartoon short subject in the \"Woody Woodpecker\" series.", "Released theatrically on August 20, 1948, the film was produced by Walter Lantz Productions and distributed by United Artists.", "The title is a play-on-words about a type of insurance policy."], "mentions": [{"id": 0, "start": 31, "end": 47, "ref_url": "Animated_cartoon", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 48, "end": 61, "ref_url": "Short_subject", "ref_ids": null, "sent_idx": 0}, {"id": 2, "start": 70, "end": 86, "ref_url": "Woody_Woodpecker", "ref_ids": ["200953"], "sent_idx": 0}, {"id": 3, "start": 67, "end": 91, "ref_url": "Walter_Lantz_Productions", "ref_ids": ["177425"], "sent_idx": 1}, {"id": 4, "start": 111, "end": 125, "ref_url": "United_Artists", "ref_ids": ["174319"], "sent_idx": 1}]} +{"id": "17891529", "title": "Bradley Brookshire", "sentences": ["Bradley Brookshire (born 1959) is an American-born harpsichordist.", "He earned his undergraduate degree from the University of Michigan School of Music (in Music History and Musicology), his Master of Music from Mannes College in New York City (Historical Performance and Conducting), and is a Candidate for the Ph.D. in Musicology at the Graduate Center of the City University of New York, where he is also pursuing a diploma in Interactive Technology and Pedagogy.", "His harpsichord studies were with Edward Parmentier, Andreas Staier, and Arthur Haas.", "In 1997, Brookshire began a traversal of the complete harpsichord works of Bach in a series of eight recitals in various concert halls in New York City.", "He has recorded Purcell, Handel, and Bach.", "In \"The New York Times\", James Oestreich cited his 2001 recording of Bach's French Suites in his Critic's Choice roundup of the best classical recordings of that year.", "He is included on two tracks of Robert Hill's recording of the early version of Bach's Art of the Fugue, and released his own recording of that work in 2007.", "It includes a second disc with which one may view the score of the work while listening to MP3 recordings of his performance."], "mentions": [{"id": 0, "start": 51, "end": 65, "ref_url": "Harpsichordist", "ref_ids": ["2678683"], "sent_idx": 0}, {"id": 1, "start": 44, "end": 66, "ref_url": "University_of_Michigan", "ref_ids": ["31740"], "sent_idx": 1}, {"id": 2, "start": 143, "end": 157, "ref_url": "Mannes_College", "ref_ids": null, "sent_idx": 1}, {"id": 3, "start": 138, "end": 151, "ref_url": "New_York_City", "ref_ids": ["645042"], "sent_idx": 3}, {"id": 4, "start": 293, "end": 320, "ref_url": "City_University_of_New_York", "ref_ids": ["7541"], "sent_idx": 1}, {"id": 5, "start": 34, "end": 51, "ref_url": "Edward_Parmentier", "ref_ids": null, "sent_idx": 2}, {"id": 6, "start": 53, "end": 67, "ref_url": "Andreas_Staier", "ref_ids": ["7331148"], "sent_idx": 2}, {"id": 7, "start": 73, "end": 84, "ref_url": "Arthur_Haas", "ref_ids": null, "sent_idx": 2}, {"id": 8, "start": 80, "end": 84, "ref_url": "Johann_Sebastian_Bach", "ref_ids": ["9906294"], "sent_idx": 6}, {"id": 9, "start": 16, "end": 23, "ref_url": "Henry_Purcell", "ref_ids": ["14135"], "sent_idx": 4}, {"id": 10, "start": 25, "end": 31, "ref_url": "Handel", "ref_ids": null, "sent_idx": 4}, {"id": 11, "start": 4, "end": 22, "ref_url": "The_New_York_Times", "ref_ids": ["30680"], "sent_idx": 5}, {"id": 12, "start": 76, "end": 89, "ref_url": "French_Suites", "ref_ids": null, "sent_idx": 5}, {"id": 13, "start": 32, "end": 43, "ref_url": "Robert_Hill_(musician)", "ref_ids": ["28718567"], "sent_idx": 6}, {"id": 14, "start": 87, "end": 103, "ref_url": "Art_of_the_Fugue", "ref_ids": null, "sent_idx": 6}]} +{"id": "17891582", "title": "Gora Prai airstrike", "sentences": ["The Gora Prai airstrike was an airstrike by the United States that resulted in the deaths of 11 paramilitary troops of the Pakistan Army Frontier Corps and 8 Taliban fighters in Pakistan's tribal areas.", "The attack took place late on June 10, 2008, during clashes between US coalition forces and militants from the Pakistani Taliban.", "The airstrike was in retaliation for an attack on troops about inside Afghan territory, originating from a wooded area near the Pakistani border checkpoint at Gorparai in Pakistan's Mohmand Agency."], "mentions": [{"id": 0, "start": 4, "end": 13, "ref_url": "Airstrike", "ref_ids": ["323167"], "sent_idx": 2}, {"id": 1, "start": 96, "end": 108, "ref_url": "Paramilitary", "ref_ids": ["146730"], "sent_idx": 0}, {"id": 2, "start": 123, "end": 136, "ref_url": "Pakistan_Army", "ref_ids": ["1965925"], "sent_idx": 0}, {"id": 3, "start": 137, "end": 151, "ref_url": "Frontier_Corps", "ref_ids": ["10429702"], "sent_idx": 0}, {"id": 4, "start": 121, "end": 128, "ref_url": "Taliban", "ref_ids": ["30635"], "sent_idx": 1}, {"id": 5, "start": 128, "end": 136, "ref_url": "Pakistan", "ref_ids": ["23235"], "sent_idx": 2}, {"id": 6, "start": 189, "end": 201, "ref_url": "Federally_Administered_Tribal_Areas", "ref_ids": ["24760673"], "sent_idx": 0}, {"id": 7, "start": 182, "end": 196, "ref_url": "Mohmand_Agency", "ref_ids": null, "sent_idx": 2}]} +{"id": "17891585", "title": "NCAA Season 84", "sentences": ["NCAA Season 84 is the 2008–2009 season of the National Collegiate Athletic Association of the Philippines.", "Mapua Institute of Technology is the host of the 2008–2009 season, with the year's theme being \"To The Fore at 84: Building Bridges Through Sports\".", "NCAA Season 81's commissioner Joe Lipa returns as a commissioner."], "mentions": [{"id": 0, "start": 59, "end": 65, "ref_url": "Season_(sports)", "ref_ids": ["690345"], "sent_idx": 1}, {"id": 1, "start": 46, "end": 86, "ref_url": "National_Collegiate_Athletic_Association_(Philippines)", "ref_ids": ["1929415"], "sent_idx": 0}, {"id": 2, "start": 94, "end": 105, "ref_url": "Philippines", "ref_ids": ["23440"], "sent_idx": 0}, {"id": 3, "start": 0, "end": 29, "ref_url": "Mapúa_Institute_of_Technology", "ref_ids": null, "sent_idx": 1}, {"id": 4, "start": 0, "end": 14, "ref_url": "NCAA_Season_81", "ref_ids": ["2558867"], "sent_idx": 2}, {"id": 5, "start": 30, "end": 38, "ref_url": "Joe_Lipa", "ref_ids": ["20778271"], "sent_idx": 2}]} +{"id": "17891592", "title": "Full Fathom Five (album)", "sentences": ["Full Fathom Five is a live album by the band Clutch.", "The full name of the album is Full Fathom Five: Audio Field Recordings, differentiating from the accompanying DVD release \"\".", "The DVD and Album differ in that four cities recorded at are on the DVD, but the album has only three cities, being:\nThe Metro Theatre, Sydney, NSW, Australia (tracks 11-15, December 15, 2007);", "The Starland Ballroom, Sayreville, NJ (tracks 4 & 5, December 29, 2007 and tracks 6 & 7, December 28th 2007); and Mr Smalls Theatre, Pittsburgh, PA (tracks 1-3 and 8-10, March 20, 2008).", "The reference to the HiFi Bar as the Sydney concert in some source material is incorrect, as it is in Melbourne, Victoria, Australia (900 kilometers south of Sydney), which is from another live Australian recording, .", "The same owners did open a venue in Sydney in 2012 with the same name."], "mentions": [{"id": 0, "start": 22, "end": 32, "ref_url": "Live_album", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 45, "end": 51, "ref_url": "Clutch_(band)", "ref_ids": ["526685"], "sent_idx": 0}, {"id": 2, "start": 36, "end": 42, "ref_url": "Sydney", "ref_ids": ["27862"], "sent_idx": 5}, {"id": 3, "start": 144, "end": 147, "ref_url": "New_South_Wales", "ref_ids": ["21654"], "sent_idx": 2}, {"id": 4, "start": 123, "end": 132, "ref_url": "Australia", "ref_ids": ["4689264"], "sent_idx": 4}, {"id": 5, "start": 23, "end": 33, "ref_url": "Sayreville", "ref_ids": null, "sent_idx": 3}, {"id": 6, "start": 35, "end": 37, "ref_url": "New_Jersey", "ref_ids": ["21648"], "sent_idx": 3}, {"id": 7, "start": 133, "end": 143, "ref_url": "Pittsburgh", "ref_ids": ["25101"], "sent_idx": 3}, {"id": 8, "start": 145, "end": 147, "ref_url": "Pennsylvania", "ref_ids": ["23332"], "sent_idx": 3}, {"id": 9, "start": 102, "end": 111, "ref_url": "Melbourne", "ref_ids": ["17306237"], "sent_idx": 4}, {"id": 10, "start": 113, "end": 121, "ref_url": "Victoria_(Australia)", "ref_ids": ["4689460"], "sent_idx": 4}, {"id": 11, "start": 123, "end": 132, "ref_url": "Australia", "ref_ids": ["4689264"], "sent_idx": 4}]} +{"id": "17891593", "title": "Helen Loggie", "sentences": ["Helen Amanda Loggie (1895 in Bellingham, Washington - 1976) was a U.S. artist, primarily known for her etchings of trees and coastlines.", "She attended Smith College in Massachusetts.", "Between 1916 and 1924, she studied at the Art Students League of New York.", "Here she began to develop a style which rejected such modernist themes as those trumpeted by the Ashcan School.", "She toured Europe in 1926-27 where she made an extensive body of sketches and paintings.", "Soon before returning home to Washington, she met etcher and printer John Taylor Arms, beginning a 25-year collaboration.", "In 1957 she was elected an Academician of the National Academy of Design.", "In 1930-31 Loggie built a house on the shores of Eastsound, Orcas Island, where she spent most summers creating her small pencil drawings and etchings.", "Her archives, consisting of numerous prints and drawings, reside in the collection at the Western Gallery of Art at Western Washington University in Bellingham, Washington.", "The largest collection of her work and papers is in The Lambiel Museum on Orcas Island."], "mentions": [{"id": 0, "start": 149, "end": 171, "ref_url": "Bellingham,_Washington", "ref_ids": ["39300"], "sent_idx": 8}, {"id": 1, "start": 42, "end": 73, "ref_url": "Art_Students_League_of_New_York", "ref_ids": ["3715459"], "sent_idx": 2}, {"id": 2, "start": 97, "end": 110, "ref_url": "Ashcan_School", "ref_ids": ["768082"], "sent_idx": 3}, {"id": 3, "start": 74, "end": 86, "ref_url": "Orcas_Island", "ref_ids": ["593901"], "sent_idx": 9}, {"id": 4, "start": 116, "end": 145, "ref_url": "Western_Washington_University", "ref_ids": ["177826"], "sent_idx": 8}]} +{"id": "17891609", "title": "Comactinia", "sentences": ["Comactinia is a genus of crinoids, characteristically with 10 arms, belonging to the family Comasteridae.", "There are three described species, two from the western Atlantic and one, \"Comactinia titan\", recently described from the western Pacific."], "mentions": [{"id": 0, "start": 25, "end": 32, "ref_url": "Crinoid", "ref_ids": ["62175"], "sent_idx": 0}, {"id": 1, "start": 92, "end": 104, "ref_url": "Comasteridae", "ref_ids": ["17889636"], "sent_idx": 0}, {"id": 2, "start": 56, "end": 64, "ref_url": "Atlantic", "ref_ids": null, "sent_idx": 1}, {"id": 3, "start": 75, "end": 91, "ref_url": "Comactinia_titan", "ref_ids": null, "sent_idx": 1}, {"id": 4, "start": 130, "end": 137, "ref_url": "Pacific", "ref_ids": null, "sent_idx": 1}]} +{"id": "17891626", "title": "Boston College Eagles women's basketball", "sentences": ["Boston College Women's Basketball is the NCAA Division I women's basketball program for Boston College.", "Their nickname is the Eagles.", "They are coached by Joanna Bernabei-McNamee, entering her second year."], "mentions": [{"id": 0, "start": 41, "end": 56, "ref_url": "NCAA_Division_I", "ref_ids": ["669931"], "sent_idx": 0}, {"id": 1, "start": 57, "end": 75, "ref_url": "Women's_basketball", "ref_ids": ["667977"], "sent_idx": 0}, {"id": 2, "start": 0, "end": 14, "ref_url": "Boston_College", "ref_ids": ["239811"], "sent_idx": 0}, {"id": 3, "start": 22, "end": 28, "ref_url": "Boston_College_Eagles", "ref_ids": ["2544616"], "sent_idx": 1}, {"id": 4, "start": 20, "end": 43, "ref_url": "Joanna_Bernabei-McNamee", "ref_ids": ["50723921"], "sent_idx": 2}]} +{"id": "17891627", "title": "2000 Supercoppa Italiana", "sentences": ["The 2000 Supercoppa Italiana was a match contested by the 1999–2000 Serie A winners Lazio and the 1999–2000 Coppa Italia runners-up Internazionale.", "Since the Coppa Italia winners were also the newly appointed league champions, the Super Cup spot was given to the runners-up of the Coppa Italia, Internazionale.", "The match resulted in a 4-3 win for Lazio."], "mentions": [{"id": 0, "start": 9, "end": 28, "ref_url": "Supercoppa_Italiana", "ref_ids": ["3035684"], "sent_idx": 0}, {"id": 1, "start": 58, "end": 75, "ref_url": "1999–2000_Serie_A", "ref_ids": ["5495239"], "sent_idx": 0}, {"id": 2, "start": 36, "end": 41, "ref_url": "S.S._Lazio", "ref_ids": ["28984"], "sent_idx": 2}, {"id": 3, "start": 98, "end": 120, "ref_url": "1999–2000_Coppa_Italia", "ref_ids": ["42484220"], "sent_idx": 0}, {"id": 4, "start": 147, "end": 161, "ref_url": "F.C._Internazionale_Milano", "ref_ids": null, "sent_idx": 1}]} +{"id": "17891631", "title": "Wakefield High School (Arlington County, Virginia)", "sentences": ["Wakefield High School is one of three public high schools located in Arlington, Virginia, United States, closely bordering Alexandria.", "It has 140 teachers and 2,104 students in grades 912 as of the 20162017 academic year.", "Wakefield's athletes are called the \"Warriors\" and wear the colors kelly green, white, and black.", "The school participates in the Virginia High School League.", "The Warriors are represented by an image of a knight holding a sword, which was preceded by an Indian head until 2003."], "mentions": [{"id": 0, "start": 69, "end": 88, "ref_url": "Arlington_County,_Virginia", "ref_ids": ["91363"], "sent_idx": 0}, {"id": 1, "start": 123, "end": 133, "ref_url": "Alexandria,_Virginia", "ref_ids": ["18940583"], "sent_idx": 0}, {"id": 2, "start": 31, "end": 58, "ref_url": "Virginia_High_School_League", "ref_ids": ["1306252"], "sent_idx": 3}]} +{"id": "17891661", "title": "Special Enrollment Examination", "sentences": ["The Special Enrollment Examination (or SEE) is a test that individuals can take to become an Enrolled Agent in the United States.", "The Enrolled Agent credential is conferred and regulated by the Internal Revenue Service (IRS).", "The exam consists of three parts:\nAll of the questions on the examination are weighted equally, and the IRS grades the test on a bell curve.", "The test results are converted to a scale from 40-130, with 105 representing a passing score.", "Exam results can be seen right after completing the exam.", "Each exam is weighted by section according to the following:\nPart 1 – Individuals\nPart 2 – Businesses\nPart 3 – Representation, Practices and Procedures", "The total time allowed for taking the 300 questions on the exam is 10.5 hours (i.e., 3.5 hours for each of the three parts, with each part containing 100 questions).", "Candidates who wish to schedule an exam need a Preparer Tax Identification Number (PTIN).", "To obtain a PTIN, the applicant must complete a W-12 by mail, fax or online at irs.gov.", "Examinations are administered by computer at Prometric testing centers.", "Currently, the Special Enrollment Examination is given at nearly 300 Prometric testing centers located across the United States and internationally.", "Test centers are located in most major metropolitan areas.", "Once the applicant has a PTIN, he or she may register for the exam online at www.prometric.com/irs.", "Each section may be completed at the applicant's convenience.", "The parts do not have to be taken on the same day or on consecutive days.", "Each section can be taken up to four times in a testing window, and the score credit carries over for up to two years from the date of the examination.", "The testing window starts on May 1 every year, and ends at the end of February.", "There are no tests available during the months of March and April, at which time the exams are updated with the latest changes in the laws and the regulations.", "After passing all three parts of the exam, test takers may apply for enrollment to practice before the IRS.", "To apply, Form 23, Application for enrollment to practice before the IRS and check for $30 should be submitted.", "An application may also be submitted on-line at the government payments site: pay.gov."], "mentions": [{"id": 0, "start": 4, "end": 18, "ref_url": "Enrolled_Agent", "ref_ids": null, "sent_idx": 1}, {"id": 1, "start": 64, "end": 88, "ref_url": "Internal_Revenue_Service", "ref_ids": ["23430752"], "sent_idx": 1}, {"id": 2, "start": 69, "end": 78, "ref_url": "Prometric", "ref_ids": ["2136761"], "sent_idx": 10}]} +{"id": "17891662", "title": "Mal Hallett", "sentences": ["Mal Hallett (born 1893, Roxbury, Massachusetts – died November 20, 1952, Boston) was an American jazz violinist and bandleader."], "mentions": [{"id": 0, "start": 24, "end": 46, "ref_url": "Roxbury,_Massachusetts", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 73, "end": 79, "ref_url": "Boston", "ref_ids": ["24437894"], "sent_idx": 0}, {"id": 2, "start": 97, "end": 101, "ref_url": "Jazz", "ref_ids": ["15613"], "sent_idx": 0}, {"id": 3, "start": 116, "end": 126, "ref_url": "Bandleader", "ref_ids": ["445398"], "sent_idx": 0}]} +{"id": "17891671", "title": "Hugh White (American football)", "sentences": ["Hugh White (November 7, 1876 – June 11, 1936) was an American football player.", "He played for the University of Michigan from 1898 to 1901, and captained the national championship-winning 1901 team."], "mentions": [{"id": 0, "start": 53, "end": 70, "ref_url": "American_football", "ref_ids": ["18951490"], "sent_idx": 0}, {"id": 1, "start": 18, "end": 40, "ref_url": "University_of_Michigan", "ref_ids": ["31740"], "sent_idx": 1}, {"id": 2, "start": 78, "end": 99, "ref_url": "College_football_national_championships_in_NCAA_Division_I_FBS", "ref_ids": ["17278765"], "sent_idx": 1}, {"id": 3, "start": 108, "end": 117, "ref_url": "1901_Michigan_Wolverines_football_team", "ref_ids": ["14342210"], "sent_idx": 1}]} +{"id": "17891677", "title": "Taras Kermauner", "sentences": ["Taras Kermauner (13 April 1930 – 11 June 2008) was a Slovenian literary historian, critic, philosopher, essayist, playwright and translator."], "mentions": [{"id": 0, "start": 53, "end": 61, "ref_url": "Slovenia", "ref_ids": ["27338"], "sent_idx": 0}, {"id": 1, "start": 63, "end": 81, "ref_url": "Literary_history", "ref_ids": null, "sent_idx": 0}, {"id": 2, "start": 83, "end": 89, "ref_url": "Critic", "ref_ids": ["161227"], "sent_idx": 0}, {"id": 3, "start": 91, "end": 102, "ref_url": "Philosopher", "ref_ids": ["23276"], "sent_idx": 0}, {"id": 4, "start": 104, "end": 112, "ref_url": "Essayist", "ref_ids": null, "sent_idx": 0}, {"id": 5, "start": 114, "end": 124, "ref_url": "Playwright", "ref_ids": ["39262"], "sent_idx": 0}, {"id": 6, "start": 129, "end": 139, "ref_url": "Translator", "ref_ids": null, "sent_idx": 0}]} +{"id": "17891710", "title": "John Hanson (English footballer)", "sentences": ["John Hanson (born 3 December 1962 in Bradford, England) is an English former footballer.", "He played for Bradford City and Scarborough.", "He currently resides in Christchurch, New Zealand."], "mentions": [{"id": 0, "start": 14, "end": 22, "ref_url": "Bradford", "ref_ids": ["23861381"], "sent_idx": 1}, {"id": 1, "start": 77, "end": 87, "ref_url": "Association_football", "ref_ids": ["10568"], "sent_idx": 0}, {"id": 2, "start": 14, "end": 27, "ref_url": "Bradford_City_A.F.C.", "ref_ids": ["431144"], "sent_idx": 1}, {"id": 3, "start": 32, "end": 43, "ref_url": "Scarborough_F.C.", "ref_ids": ["946449"], "sent_idx": 1}, {"id": 4, "start": 24, "end": 36, "ref_url": "Christchurch", "ref_ids": ["628782"], "sent_idx": 2}, {"id": 5, "start": 38, "end": 49, "ref_url": "New_Zealand", "ref_ids": ["4913064"], "sent_idx": 2}]} +{"id": "17891726", "title": "Norman Fisher (boxer)", "sentences": ["Norman Heaton Fisher (8 March 1916 – 12 February 1991) was a New Zealand boxer.", "He competed as a lightweight in the 1936 Summer Olympics, where he was eliminated in his first bout."], "mentions": [{"id": 0, "start": 61, "end": 72, "ref_url": "New_Zealand", "ref_ids": ["4913064"], "sent_idx": 0}, {"id": 1, "start": 36, "end": 56, "ref_url": "1936_Summer_Olympics", "ref_ids": ["39721"], "sent_idx": 1}, {"id": 2, "start": 89, "end": 99, "ref_url": "Boxing_at_the_1936_Summer_Olympics_-_Men's_lightweight", "ref_ids": null, "sent_idx": 1}]} +{"id": "17891744", "title": "Volleyball at the 2008 Summer Olympics – Men's tournament", "sentences": ["The men's tournament in volleyball at the 2008 Summer Olympics was the 12th edition of the event, organized by the world's governing body, the FIVB, in conjunction with the IOC.", "It was held in Beijing, China from 10 to 24 August 2008.", "The twelve competing teams were split equally into two pools of six teams.", "Each team played all other teams in their pool with the winning team gaining 2 points and the losing side 1 point.", "The top four teams from each pool progressed through to the quarterfinals.", "The rest of the tournament was a single-elimination bracket, with a bronze medal match held between the two semifinal losers.", "A total of 38 matches were played: 15 in each group, 4 quarterfinals, 2 semifinals, 1 bronze medal match, and 1 gold medal match."], "mentions": [{"id": 0, "start": 24, "end": 62, "ref_url": "Volleyball_at_the_2008_Summer_Olympics", "ref_ids": ["8014698"], "sent_idx": 0}, {"id": 1, "start": 143, "end": 147, "ref_url": "Fédération_Internationale_de_Volleyball", "ref_ids": ["1462538"], "sent_idx": 0}, {"id": 2, "start": 173, "end": 176, "ref_url": "International_Olympic_Committee", "ref_ids": ["15147"], "sent_idx": 0}, {"id": 3, "start": 15, "end": 22, "ref_url": "Beijing", "ref_ids": ["18603746"], "sent_idx": 1}]} +{"id": "17891752", "title": "Holly Hotchner", "sentences": ["Holly Hotchner was the director of the Museum of Arts and Design, or MAD, (formerly the American Craft Museum), in New York City from 1996 to 2013.", "She was appointed by the museum’s board of governors in 1996.", "Under her leadership, MAD built a new home at 2 Columbus Circle in Manhattan, which opened in September 2008.", "After 16 years as director, she announced in January 2013 that she would step down at the end of April 2013.", "Prior to holding this position, she served as director of the New York Historical Society Museum from 1988-1995.", "There, her responsibilities included restructuring the administration, overseeing a staff of 40, participating in raising more than $40 million for the institution’s collections, and managing the museum's education programs, general operations, and facilities capital improvement program.", "From 1984-1988 she was chief conservator at the Historical Society, where she led a new program to enhance the care and cataloguing of the museum’s 1.5 million-object collection.", "Before joining the New York Historical Society, Hotchner was a Conservation fellow at the Metropolitan Museum of Art, and also held positions at The Tate Gallery in London, the Hirshhorn Museum in Washington, D.C., the Museum of Modern Art, New York and the Metropolitan Museum of Art in New York.", "She holds an M.A. in Art History and a certificate of conservation from the Institute of Fine Arts at New York University, and a B.A. in Art History and Studio Art from Trinity College.", "She has served on numerous panels for government funding of the arts, and as a juror for exhibitions and for artists’ awards.", "She also established Holly Hotchner Fine Arts Management, which provided collections management, cataloguing and conservation services to individuals and corporations.", "During her tenure, she has increased the MAD’s operating funds and endowment, while expanding its exhibition programming and outreach.", "She co-organized a number of critically acclaimed exhibitions at the Museum with accompanying catalogues, including Radical Lace & Subversive Knitting; the series on contemporary Native American art, Changing Hands: Art Without Reservation; Ruth Duckworth: Modernist Sculptor; Corporal Identity–", "Body Language; Beatrice Wood: A Centennial Tribute; 4 Acts in Glass; Art & Industry: 20th Century Porcelain from Sèvres; Defining Craft I: Collecting for the New Millennium; and Venetian Glass: 20th Century Italian Glass from the Olnick Spanu Collection."], "mentions": [{"id": 0, "start": 39, "end": 64, "ref_url": "Museum_of_Arts_and_Design", "ref_ids": ["6944308"], "sent_idx": 0}, {"id": 1, "start": 19, "end": 46, "ref_url": "New_York_Historical_Society", "ref_ids": null, "sent_idx": 7}, {"id": 2, "start": 145, "end": 161, "ref_url": "Tate", "ref_ids": ["66101"], "sent_idx": 7}, {"id": 3, "start": 177, "end": 193, "ref_url": "Hirshhorn_Museum", "ref_ids": null, "sent_idx": 7}, {"id": 4, "start": 219, "end": 239, "ref_url": "Museum_of_Modern_Art", "ref_ids": ["66107"], "sent_idx": 7}, {"id": 5, "start": 90, "end": 116, "ref_url": "Metropolitan_Museum_of_Art", "ref_ids": ["37535"], "sent_idx": 7}, {"id": 6, "start": 102, "end": 121, "ref_url": "New_York_University", "ref_ids": ["7954455"], "sent_idx": 8}, {"id": 7, "start": 169, "end": 184, "ref_url": "Trinity_College_(Connecticut)", "ref_ids": ["502336"], "sent_idx": 8}]} +{"id": "17891758", "title": "Petrolisthes laevigatus", "sentences": ["Petrolisthes laevigatus is a species of porcelain crab found in Chile and Peru.", "Its carapace width is up to 2.5 cm.", "\"P. laevigatus\" lives under rocks in the middle and lower intertidal area.", "It feeds by filtering zooplankton."], "mentions": [{"id": 0, "start": 29, "end": 36, "ref_url": "Species", "ref_ids": ["21780446"], "sent_idx": 0}, {"id": 1, "start": 40, "end": 54, "ref_url": "Porcelain_crab", "ref_ids": ["1489218"], "sent_idx": 0}, {"id": 2, "start": 64, "end": 69, "ref_url": "Chile", "ref_ids": ["5489"], "sent_idx": 0}, {"id": 3, "start": 74, "end": 78, "ref_url": "Peru", "ref_ids": ["170691"], "sent_idx": 0}, {"id": 4, "start": 4, "end": 12, "ref_url": "Carapace", "ref_ids": ["603510"], "sent_idx": 1}, {"id": 5, "start": 22, "end": 33, "ref_url": "Zooplankton", "ref_ids": ["50558"], "sent_idx": 3}]} +{"id": "17891775", "title": "Ravages du temps", "sentences": ["Ravages du Temps is the first album of Paul Hébert released in 2001."], "mentions": [{"id": 0, "start": 39, "end": 50, "ref_url": "Paul_Hébert_(bluegrass_musician)", "ref_ids": null, "sent_idx": 0}]} +{"id": "17891802", "title": "Marshall Burke", "sentences": ["Marshall Burke (born 26 March 1959) is a Scottish former footballer.", "He played for Burnley, Leeds United, Blackburn Rovers, Lincoln City, Cardiff City, Tranmere Rovers and Scarborough.", "He was part of the Colne Dynamoes team which won the 1987–88 FA Vase competition."], "mentions": [{"id": 0, "start": 57, "end": 67, "ref_url": "Association_football", "ref_ids": ["10568"], "sent_idx": 0}, {"id": 1, "start": 14, "end": 21, "ref_url": "Burnley_F.C.", "ref_ids": ["376725"], "sent_idx": 1}, {"id": 2, "start": 23, "end": 35, "ref_url": "Leeds_United_F.C.", "ref_ids": ["7609513"], "sent_idx": 1}, {"id": 3, "start": 37, "end": 53, "ref_url": "Blackburn_Rovers_F.C.", "ref_ids": ["298599"], "sent_idx": 1}, {"id": 4, "start": 55, "end": 67, "ref_url": "Lincoln_City_F.C.", "ref_ids": ["451163"], "sent_idx": 1}, {"id": 5, "start": 69, "end": 81, "ref_url": "Cardiff_City_F.C.", "ref_ids": ["5624949"], "sent_idx": 1}, {"id": 6, "start": 83, "end": 98, "ref_url": "Tranmere_Rovers_F.C.", "ref_ids": ["69219"], "sent_idx": 1}, {"id": 7, "start": 103, "end": 114, "ref_url": "Scarborough_F.C.", "ref_ids": ["946449"], "sent_idx": 1}, {"id": 8, "start": 19, "end": 33, "ref_url": "Colne_Dynamoes_F.C.", "ref_ids": ["3674690"], "sent_idx": 2}, {"id": 9, "start": 53, "end": 60, "ref_url": "1987–88_in_English_football", "ref_ids": ["4039531"], "sent_idx": 2}, {"id": 10, "start": 61, "end": 68, "ref_url": "FA_Vase", "ref_ids": ["488393"], "sent_idx": 2}]} +{"id": "17891839", "title": "Ragnar Haugen", "sentences": ["Ragnar \"Sambo\" Haugen (August 25, 1911 – October 8, 1964) was a Norwegian boxer who competed in the 1936 Summer Olympics.", "In 1936 he eliminated in the second round of the lightweight class after losing his fight to Poul Kops."], "mentions": [{"id": 0, "start": 64, "end": 73, "ref_url": "Norway", "ref_ids": ["21241"], "sent_idx": 0}, {"id": 1, "start": 74, "end": 79, "ref_url": "Boxing", "ref_ids": ["4243"], "sent_idx": 0}, {"id": 2, "start": 100, "end": 120, "ref_url": "1936_Summer_Olympics", "ref_ids": ["39721"], "sent_idx": 0}, {"id": 3, "start": 49, "end": 66, "ref_url": "Boxing_at_the_1936_Summer_Olympics_-_Men's_lightweight", "ref_ids": null, "sent_idx": 1}, {"id": 4, "start": 93, "end": 102, "ref_url": "Poul_Kops", "ref_ids": ["18413807"], "sent_idx": 1}]} +{"id": "17891851", "title": "Purton Stoke", "sentences": ["Purton Stoke is a small village in north Wiltshire, England, within the civil parish of Purton.", "The village is located along a side road off the Purton to Cricklade road, approximately north of Purton village.", "A small country lane gives access to the nearby hamlet of Bentham, to the southwest.", "The River Key, a small tributary of the Thames, passes close to the west of the village."], "mentions": [{"id": 0, "start": 41, "end": 50, "ref_url": "Wiltshire", "ref_ids": ["51231"], "sent_idx": 0}, {"id": 1, "start": 72, "end": 84, "ref_url": "Civil_parishes_in_England", "ref_ids": null, "sent_idx": 0}, {"id": 2, "start": 49, "end": 55, "ref_url": "Purton", "ref_ids": ["1383727"], "sent_idx": 1}, {"id": 3, "start": 59, "end": 68, "ref_url": "Cricklade", "ref_ids": ["1080633"], "sent_idx": 1}, {"id": 4, "start": 4, "end": 13, "ref_url": "River_Key", "ref_ids": ["17723157"], "sent_idx": 3}, {"id": 5, "start": 40, "end": 46, "ref_url": "River_Thames", "ref_ids": ["49031"], "sent_idx": 3}]} +{"id": "17891855", "title": "Hammar Marshes", "sentences": ["The Hammar Marshes () are a large wetland complex in southeastern Iraq that are part of the Mesopotamian Marshes in the Tigris–Euphrates river system.", "Historically, the Hammar Marshes extended up to during seasonal floods.", "They were destroyed during the 1990s by large-scale drainage, dam and dike construction projects.", "Since 2003, they are recovering following reflooding and destruction of dams."], "mentions": [{"id": 0, "start": 34, "end": 41, "ref_url": "Wetland", "ref_ids": ["102024"], "sent_idx": 0}, {"id": 1, "start": 66, "end": 70, "ref_url": "Iraq", "ref_ids": ["7515928"], "sent_idx": 0}, {"id": 2, "start": 92, "end": 112, "ref_url": "Mesopotamian_Marshes", "ref_ids": ["11210192"], "sent_idx": 0}, {"id": 3, "start": 120, "end": 149, "ref_url": "Tigris–Euphrates_river_system", "ref_ids": ["209798"], "sent_idx": 0}]} +{"id": "17891862", "title": "David Revere McFadden", "sentences": ["David Revere McFadden is Chief Curator and Vice President for Programs and Collections at the Museum of Arts & Design in New York City.", "He served for two years as Executive Director of the Millicent Rogers Museum of Northern New Mexico in Taos, New Mexico.", "From 1978 to 1995, McFadden served as Curator of Decorative Arts and Assistant Director for Collections and Research at Cooper–Hewitt, National Design Museum, Smithsonian Institution.", "McFadden did his undergraduate and graduate work at the University of Minnesota, and received his graduate degree in the History of Art (Renaissance and Baroque Studies), with a secondary major in Chinese history.", "He served for six years as President of the International Council of Museums Decorative Arts and Design Committee.", "McFadden has organized more than one hundred exhibitions on decorative arts, design and craft, covering developments from the ancient world to the present day.", "Exhibitions highlighting important and sometimes overlooked areas of design include tiles, keys and locks, pottery and porcelain, glass and silver.", "Most of these exhibitions were accompanied by catalogues.", "Thematic exhibitions curated by McFadden include Wine: Celebration and Ceremony, which studied the social and material culture of wine throughout history; L'Art de Vivre: Decorative Arts and Design In France 1789–1989, organized an official manifestation of the bicentennial of the French Revolution, Scandinavian Modern 1880–1980, the first American exhibition to survey modern design from all five Nordic countries over a one-hundred-year period; Hair, a landmark exploration of the visual and design history of human hair; Toward Modern Design: Revival and Reform in Applied Arts 1880–1920; Good Offices and Beyond: The Evolution of the Workplace, a survey of designs for the office in the twentieth century; Structure and Style: Modernism in Dutch Applied Arts 1880–1930, the first American exhibition devoted to Dutch applied arts from that half century.", "For the Museum of Arts and Design, McFadden has organized exhibitions that include Defining Craft (2000), Changing Hands: Art Without Reservation—Contemporary Native American Art (with co-curator Ellen Taubman), Radical Lace and Subversive Knitting (2007), Pricked: Extreme Embroidery (2008), Second Lives: Remixing the Ordinary (2008), Read My Pins: The Madeleine Albright Collection, Slash:Paper Under the Knife (2009).", "McFadden's other exhibitions have included such diverse subjects as eighteenth-century European porcelains, English Majolica of the nineteenth century, puppets, American art pottery, and Hungarian jewelry and silver, Art Nouveau ceramics, contemporary art quilts, and jewelry.", "McFadden has published more than 90 books, articles, catalogues, and reviews worldwide, and has delivered more than 200 lectures and papers to national and international audiences.", "He has spoken at such cultural institutions at the Metropolitan Museum of Art in New York, and the École du Louvre in Paris.", "He has served on many international panels and juries, and on professional, civic, governmental, and advisory boards, most notably: The Arts Advisory Board of the American Federation of Arts; the Committee for the Restoration of Gracie Mansion, New York City's Mayoral Residence; the Exhibition Committee of the American-Scandinavian Foundation; and is a member of the Steering Committee of the Smithsonian Institution's Material Culture Forum\nFor his work in cultural affairs, McFadden has been named Knight, First Class, of the Order of the Lion of Finland (1984); Knight 1st Class of the Order of the Polar Star of Sweden by Carl XVI Gustaf of Sweden (1988); and Chevalier de l'Ordre des Arts et des Lettres by the Republic of France (1989).", "Three of McFadden's exhibition projects and/or catalogue were awarded the Presidential Design Award for Excellence (1994, 1995, and 1997).", "In August 2013, McFadden announced that he would retire from his position at the Museum of Arts and Design at the end of 2013, planning to pursue independent curatorial and writing projects."], "mentions": [{"id": 0, "start": 94, "end": 117, "ref_url": "Museum_of_Arts_&_Design", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 245, "end": 258, "ref_url": "New_York_City", "ref_ids": ["645042"], "sent_idx": 13}, {"id": 2, "start": 53, "end": 76, "ref_url": "Millicent_Rogers_Museum", "ref_ids": ["31807052"], "sent_idx": 1}, {"id": 3, "start": 103, "end": 119, "ref_url": "Taos,_New_Mexico", "ref_ids": ["53717"], "sent_idx": 1}, {"id": 4, "start": 120, "end": 157, "ref_url": "Cooper–Hewitt,_National_Design_Museum", "ref_ids": null, "sent_idx": 2}, {"id": 5, "start": 56, "end": 79, "ref_url": "University_of_Minnesota", "ref_ids": ["416813"], "sent_idx": 3}, {"id": 6, "start": 137, "end": 148, "ref_url": "Renaissance", "ref_ids": ["25532"], "sent_idx": 3}, {"id": 7, "start": 153, "end": 160, "ref_url": "Baroque", "ref_ids": ["3957"], "sent_idx": 3}, {"id": 8, "start": 197, "end": 212, "ref_url": "Chinese_history", "ref_ids": null, "sent_idx": 3}, {"id": 9, "start": 44, "end": 76, "ref_url": "International_Council_of_Museums", "ref_ids": ["1444910"], "sent_idx": 4}, {"id": 10, "start": 96, "end": 105, "ref_url": "Porcelain", "ref_ids": ["167718"], "sent_idx": 10}, {"id": 11, "start": 130, "end": 135, "ref_url": "Glass", "ref_ids": ["12581"], "sent_idx": 6}, {"id": 12, "start": 209, "end": 215, "ref_url": "Silver", "ref_ids": ["27119"], "sent_idx": 10}, {"id": 13, "start": 49, "end": 53, "ref_url": "Wine", "ref_ids": ["32961"], "sent_idx": 8}, {"id": 14, "start": 282, "end": 299, "ref_url": "French_Revolution", "ref_ids": ["11188"], "sent_idx": 8}, {"id": 15, "start": 321, "end": 332, "ref_url": "Scandinavia", "ref_ids": ["26740"], "sent_idx": 13}, {"id": 16, "start": 400, "end": 416, "ref_url": "Nordic_countries", "ref_ids": ["23711165"], "sent_idx": 8}, {"id": 17, "start": 449, "end": 453, "ref_url": "Hair", "ref_ids": ["14313"], "sent_idx": 8}, {"id": 18, "start": 220, "end": 224, "ref_url": "Lace", "ref_ids": ["89303"], "sent_idx": 9}, {"id": 19, "start": 240, "end": 248, "ref_url": "Knitting", "ref_ids": ["16622"], "sent_idx": 9}, {"id": 20, "start": 274, "end": 284, "ref_url": "Embroidery", "ref_ids": ["9996"], "sent_idx": 9}, {"id": 21, "start": 96, "end": 105, "ref_url": "Porcelain", "ref_ids": ["167718"], "sent_idx": 10}, {"id": 22, "start": 116, "end": 124, "ref_url": "Victorian_majolica", "ref_ids": ["12080970"], "sent_idx": 10}, {"id": 23, "start": 152, "end": 158, "ref_url": "Puppet", "ref_ids": ["21468429"], "sent_idx": 10}, {"id": 24, "start": 161, "end": 181, "ref_url": "American_art_pottery", "ref_ids": ["52874717"], "sent_idx": 10}, {"id": 25, "start": 217, "end": 228, "ref_url": "Art_Nouveau", "ref_ids": ["59551"], "sent_idx": 10}, {"id": 26, "start": 229, "end": 237, "ref_url": "Ceramic_art", "ref_ids": ["47106911"], "sent_idx": 10}, {"id": 27, "start": 252, "end": 262, "ref_url": "Quilt_art", "ref_ids": ["13267615"], "sent_idx": 10}, {"id": 28, "start": 197, "end": 204, "ref_url": "Jewelry", "ref_ids": null, "sent_idx": 10}, {"id": 29, "start": 51, "end": 77, "ref_url": "Metropolitan_Museum_of_Art", "ref_ids": ["37535"], "sent_idx": 12}, {"id": 30, "start": 99, "end": 114, "ref_url": "École_du_Louvre", "ref_ids": ["9794957"], "sent_idx": 12}, {"id": 31, "start": 163, "end": 190, "ref_url": "American_Federation_of_Arts", "ref_ids": ["21104016"], "sent_idx": 13}, {"id": 32, "start": 229, "end": 243, "ref_url": "Gracie_Mansion", "ref_ids": ["1206268"], "sent_idx": 13}, {"id": 33, "start": 395, "end": 418, "ref_url": "Smithsonian_Institution", "ref_ids": ["65828"], "sent_idx": 13}, {"id": 34, "start": 530, "end": 558, "ref_url": "Order_of_the_Lion_of_Finland", "ref_ids": ["1618856"], "sent_idx": 13}, {"id": 35, "start": 591, "end": 614, "ref_url": "Order_of_the_Polar_Star", "ref_ids": ["3531131"], "sent_idx": 13}, {"id": 36, "start": 628, "end": 653, "ref_url": "Carl_XVI_Gustaf_of_Sweden", "ref_ids": ["104743"], "sent_idx": 13}]} +{"id": "17891863", "title": "2000 England rugby union tour of South Africa", "sentences": [], "mentions": []} +{"id": "17891865", "title": "Focus (novel)", "sentences": ["Focus is a 1945 novel by Arthur Miller which deals with issues of racism, particularly antisemitism.", "In 2001, a film version, starring William H. Macy, was released."], "mentions": [{"id": 0, "start": 25, "end": 38, "ref_url": "Arthur_Miller", "ref_ids": ["2310"], "sent_idx": 0}, {"id": 1, "start": 87, "end": 99, "ref_url": "Antisemitism", "ref_ids": ["1078"], "sent_idx": 0}, {"id": 2, "start": 11, "end": 23, "ref_url": "Focus_(2001_film)", "ref_ids": ["17124518"], "sent_idx": 1}, {"id": 3, "start": 34, "end": 49, "ref_url": "William_H._Macy", "ref_ids": ["398602"], "sent_idx": 1}]} +{"id": "17891882", "title": "François-Napoléon-Marie Moigno", "sentences": ["Abbé François-Napoléon-Marie Moigno (; 15 April 1804 – 14 July 1884) was a French Catholic priest and one time Jesuit, as well as a physicist and author.", "He considered himself a student of Cauchy."], "mentions": [{"id": 0, "start": 0, "end": 4, "ref_url": "Abbé", "ref_ids": ["1091684"], "sent_idx": 0}, {"id": 1, "start": 82, "end": 97, "ref_url": "Catholic_priest", "ref_ids": null, "sent_idx": 0}, {"id": 2, "start": 111, "end": 117, "ref_url": "Jesuit", "ref_ids": null, "sent_idx": 0}, {"id": 3, "start": 132, "end": 141, "ref_url": "Physics", "ref_ids": ["22939"], "sent_idx": 0}, {"id": 4, "start": 35, "end": 41, "ref_url": "Cauchy", "ref_ids": null, "sent_idx": 1}]} +{"id": "17891889", "title": "Birthstone", "sentences": ["A birthstone is a gemstone that represents a person's month of birth.", "Birthstones are often worn as jewelry or as a pendant."], "mentions": [{"id": 0, "start": 18, "end": 26, "ref_url": "Gemstone", "ref_ids": ["12806"], "sent_idx": 0}, {"id": 1, "start": 30, "end": 37, "ref_url": "Jewelry", "ref_ids": null, "sent_idx": 1}, {"id": 2, "start": 46, "end": 53, "ref_url": "Pendant", "ref_ids": ["247167"], "sent_idx": 1}]} +{"id": "17891904", "title": "Call Me Lightning (song)", "sentences": ["\" Call Me Lightning\" is a 1968 song by British rock band", "The Who, written by guitarist Pete Townshend.", "It appears on their fourth American album and the compilation album\" The Ultimate Collection\".", "In the United States, it was the follow- up single to their first U.S. Top 10 hit\" I Can See for Miles\" and reached No. 40 on the\" Billboard\" Hot 100 on 4 May 1968, their 16th most successful single on the Hot 100.", "A promo video was made for the song, and this video was featured in the 1979 documentary\" The Kids Are Alright\".", "The song was released in the United Kingdom as the B-side of\" Dogs\".", "The B-side,\" Dr. Jekyll and Mr. Hyde\", had been considered as a possible single release, along with\" Call Me Lightning,\" but it was released as the B-side of\" Call Me Lightning\" instead.", "The song received mediocre reception from the Who and their fans, and biographer John Atkins feels that\" Dr. Jekyll and Mr. Hyde\" was a better song than\" Call Me Lightning\", even though its horror film imagery was unsuitable for a single.", "The song was behind the naming of the rock group Call Me Lightning."], "mentions": [{"id": 0, "start": 0, "end": 7, "ref_url": "The_Who", "ref_ids": ["36517"], "sent_idx": 1}, {"id": 1, "start": 30, "end": 44, "ref_url": "Pete_Townshend", "ref_ids": ["154228"], "sent_idx": 1}, {"id": 2, "start": 69, "end": 92, "ref_url": "The_Ultimate_Collection_(The_Who_album)", "ref_ids": ["2571157"], "sent_idx": 2}, {"id": 3, "start": 83, "end": 102, "ref_url": "I_Can_See_for_Miles", "ref_ids": ["3030170"], "sent_idx": 3}, {"id": 4, "start": 129, "end": 149, "ref_url": "Billboard_Hot_100", "ref_ids": ["423161"], "sent_idx": 3}, {"id": 5, "start": 90, "end": 110, "ref_url": "The_Kids_Are_Alright_(film)", "ref_ids": ["12192712"], "sent_idx": 4}, {"id": 6, "start": 62, "end": 66, "ref_url": "Dogs_(The_Who_song)", "ref_ids": ["14134109"], "sent_idx": 5}, {"id": 7, "start": 13, "end": 36, "ref_url": "Dr._Jekyll_and_Mr._Hyde_(The_Who_song)", "ref_ids": ["27985958"], "sent_idx": 6}, {"id": 8, "start": 49, "end": 66, "ref_url": "Call_Me_Lightning_(band)", "ref_ids": ["13171732"], "sent_idx": 8}]} +{"id": "17891923", "title": "Les pauvres riches", "sentences": ["Les pauvres riches is the second CD of Paul Hébert released in 2007."], "mentions": [{"id": 0, "start": 39, "end": 50, "ref_url": "Paul_Hébert_(bluegrass_musician)", "ref_ids": null, "sent_idx": 0}]} +{"id": "17891952", "title": "Brecanavir", "sentences": ["Brecanavir (INN; codenamed GW640385) is a protease inhibitor which has been studied for the treatment of HIV.", "On December 2006, its developer, GlaxoSmith", "Kline discontinued further development because of insurmountable issues regarding formulation."], "mentions": [{"id": 0, "start": 12, "end": 15, "ref_url": "International_Nonproprietary_Name", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 42, "end": 60, "ref_url": "Protease_inhibitor_(pharmacology)", "ref_ids": ["653278"], "sent_idx": 0}, {"id": 2, "start": 105, "end": 108, "ref_url": "HIV", "ref_ids": ["14170"], "sent_idx": 0}]} +{"id": "17891966", "title": "Europe (anthem)", "sentences": ["\"Europe\" is the national anthem of Kosovo.", "It was composed by Mendi Mengjiqi.", "As with the national anthems of Spain, Bosnia and Herzegovina, and San Marino, it has no official lyrics, but unofficial lyrics have been written for it.", "It was adopted on 11 June 2008.", "It was chosen because it contained no references to any specific ethnic group.", "It was selected by the Assembly of Kosovo, with 72 MPs voting in favor, while 15 voted against and five abstained."], "mentions": [{"id": 0, "start": 12, "end": 27, "ref_url": "National_anthem", "ref_ids": ["234540"], "sent_idx": 2}, {"id": 1, "start": 35, "end": 41, "ref_url": "Kosovo", "ref_ids": ["17391"], "sent_idx": 5}, {"id": 2, "start": 19, "end": 33, "ref_url": "Mendi_Mengjiqi", "ref_ids": ["17953574"], "sent_idx": 1}, {"id": 3, "start": 32, "end": 37, "ref_url": "Marcha_Real", "ref_ids": ["248506"], "sent_idx": 2}, {"id": 4, "start": 39, "end": 61, "ref_url": "National_Anthem_of_Bosnia_and_Herzegovina", "ref_ids": ["251111"], "sent_idx": 2}, {"id": 5, "start": 67, "end": 77, "ref_url": "Inno_Nazionale_della_Repubblica", "ref_ids": ["5018469"], "sent_idx": 2}, {"id": 6, "start": 23, "end": 41, "ref_url": "Assembly_of_Kosovo", "ref_ids": null, "sent_idx": 5}]} +{"id": "17891992", "title": "Speaker of the Chamber of Deputies (Czech Republic)", "sentences": ["The President of the Chamber of Deputies of the Parliament of the Czech Republic (), normally referred to as the Speaker of the Chamber of Deputies, is an elected presiding member of the Chamber of Deputies of the Czech Republic.", "Since 22 November 2017, the speaker has been Radek Vondráček of ANO 2011.", "The speaker shall:"], "mentions": [{"id": 0, "start": 187, "end": 228, "ref_url": "Chamber_of_Deputies_of_the_Czech_Republic", "ref_ids": ["3991120"], "sent_idx": 0}, {"id": 1, "start": 45, "end": 60, "ref_url": "Radek_Vondráček", "ref_ids": ["55864236"], "sent_idx": 1}, {"id": 2, "start": 64, "end": 72, "ref_url": "ANO_2011", "ref_ids": ["40808475"], "sent_idx": 1}]} +{"id": "17892000", "title": "Ya krasivaya", "sentences": ["Ya krasivaya (; ) was a Kazakh reality television show competition of non-professional aspiring models broadcast on Hit TV for a contract with the international model agency.", "Its first cycle premiered and finished in 2005.", "18 girls competed with only 17 of them moving into the model house.", "After several weeks Saule Zhunusova made a voluntary exit of the show because she felt homesick and bullied by the other contestants.", "In a live show final the top four battled it out for the win with Altyn Bekanova triumphing over Zhanna Saryeva in the super final.", "Many of the show's alumni went on to represent Kazakhstan in several beauty pageants afterwards.", "For now there are no further plans to continue the show."], "mentions": [{"id": 0, "start": 47, "end": 53, "ref_url": "Kazakhstan", "ref_ids": ["16642"], "sent_idx": 5}, {"id": 1, "start": 31, "end": 49, "ref_url": "Reality_television", "ref_ids": ["38539"], "sent_idx": 0}]} +{"id": "17892005", "title": "Queen's Own Dorset Yeomanry", "sentences": ["The Queen's Own Dorset Yeomanry was a yeomanry regiment of the British Army founded in 1794 as the Dorsetshire Regiment of Volunteer Yeomanry Cavalry in response to the growing threat of invasion during the Napoleonic wars.", "It gained its first royal association in 1833 as The Princess Victoria's Regiment of Dorset Yeomanry Cavalry, and its second, in 1843, as the Queen's Own Regiment of Dorset Yeomanry Cavalry."], "mentions": [{"id": 0, "start": 38, "end": 46, "ref_url": "Yeomanry", "ref_ids": ["365375"], "sent_idx": 0}, {"id": 1, "start": 47, "end": 55, "ref_url": "Regiment", "ref_ids": ["166653"], "sent_idx": 0}, {"id": 2, "start": 63, "end": 75, "ref_url": "British_Army", "ref_ids": ["4887"], "sent_idx": 0}, {"id": 3, "start": 207, "end": 222, "ref_url": "Napoleonic_wars", "ref_ids": null, "sent_idx": 0}]} +{"id": "17892012", "title": "Stewart Mell", "sentences": ["Stewart Albert Mell (born 15 October 1957) is an English former professional footballer who scored 30 goals from 145 appearances in the Football League, playing for Doncaster Rovers, Halifax Town and Scarborough.", "He began his football career as a youngster with Hull City, played non-league football for Appleby Frodingham, Burton Albion, Boston United and Goole Town, and was a member of the Scarborough team that won the 1986–87 Football Conference to become the first club automatically promoted to the Football League.", "While a Burton Albion player, he was capped for England at semi-professional level."], "mentions": [{"id": 0, "start": 77, "end": 87, "ref_url": "Association_football", "ref_ids": ["10568"], "sent_idx": 0}, {"id": 1, "start": 289, "end": 308, "ref_url": "The_Football_League", "ref_ids": null, "sent_idx": 1}, {"id": 2, "start": 165, "end": 181, "ref_url": "Doncaster_Rovers_F.C.", "ref_ids": ["451160"], "sent_idx": 0}, {"id": 3, "start": 183, "end": 195, "ref_url": "Halifax_Town_A.F.C.", "ref_ids": ["451200"], "sent_idx": 0}, {"id": 4, "start": 180, "end": 191, "ref_url": "Scarborough_F.C.", "ref_ids": ["946449"], "sent_idx": 1}, {"id": 5, "start": 49, "end": 58, "ref_url": "Hull_City_A.F.C.", "ref_ids": ["451358"], "sent_idx": 1}, {"id": 6, "start": 67, "end": 86, "ref_url": "Non-league_football", "ref_ids": null, "sent_idx": 1}, {"id": 7, "start": 91, "end": 109, "ref_url": "Appleby_Frodingham_F.C.", "ref_ids": ["3932871"], "sent_idx": 1}, {"id": 8, "start": 8, "end": 21, "ref_url": "Burton_Albion_F.C.", "ref_ids": ["451172"], "sent_idx": 2}, {"id": 9, "start": 126, "end": 139, "ref_url": "Boston_United_F.C.", "ref_ids": ["451155"], "sent_idx": 1}, {"id": 10, "start": 144, "end": 154, "ref_url": "Goole_Town_F.C.", "ref_ids": ["6375403"], "sent_idx": 1}, {"id": 11, "start": 210, "end": 237, "ref_url": "1986–87_Football_Conference", "ref_ids": ["15559220"], "sent_idx": 1}, {"id": 12, "start": 48, "end": 82, "ref_url": "England_C_national_football_team", "ref_ids": null, "sent_idx": 2}]} +{"id": "17892013", "title": "Brazil–Cuba relations", "sentences": ["Brazilian-Cuban relations were classified as \"excellent\" in May 2008 following a meeting of foreign ministers.", "After the 1964 coup, the military regime severed diplomatic relations with Cuba, which were only resumed after redemocratization in 1986.", "During a January 2008 state visit to Cuba by Brazilian President Lula da Silva, the Brazilian leader expressed desire for his country to be Cuba's \"number one partner\".", "Since the 2018 Brazilian general election relations between Cuba and Brazil have waivered"], "mentions": [{"id": 0, "start": 92, "end": 109, "ref_url": "Foreign_ministers", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 10, "end": 19, "ref_url": "1964_Brazilian_coup_d'etat", "ref_ids": null, "sent_idx": 1}, {"id": 2, "start": 25, "end": 40, "ref_url": "Brazilian_military_government", "ref_ids": null, "sent_idx": 1}, {"id": 3, "start": 60, "end": 64, "ref_url": "Cuba", "ref_ids": ["5042481"], "sent_idx": 3}, {"id": 4, "start": 65, "end": 78, "ref_url": "Luiz_Inácio_Lula_da_Silva", "ref_ids": ["99328"], "sent_idx": 2}, {"id": 5, "start": 10, "end": 41, "ref_url": "2018_Brazilian_general_election", "ref_ids": ["49795012"], "sent_idx": 3}]} +{"id": "17892020", "title": "Karl Schmedes", "sentences": ["Karl Schmedes (November 14, 1908 – May 31, 1981) was a German boxer who competed in the 1936 Summer Olympics.", "He was born in Dortmund.", "In 1936 he was eliminated in the first round of the lightweight class after losing his fight to José Padilla."], "mentions": [{"id": 0, "start": 55, "end": 61, "ref_url": "Germany", "ref_ids": ["11867"], "sent_idx": 0}, {"id": 1, "start": 62, "end": 67, "ref_url": "Boxing", "ref_ids": ["4243"], "sent_idx": 0}, {"id": 2, "start": 88, "end": 108, "ref_url": "1936_Summer_Olympics", "ref_ids": ["39721"], "sent_idx": 0}, {"id": 3, "start": 15, "end": 23, "ref_url": "Dortmund", "ref_ids": ["149349"], "sent_idx": 1}, {"id": 4, "start": 52, "end": 69, "ref_url": "Boxing_at_the_1936_Summer_Olympics_-_Men's_lightweight", "ref_ids": null, "sent_idx": 2}, {"id": 5, "start": 96, "end": 108, "ref_url": "José_Padilla,_Jr.", "ref_ids": null, "sent_idx": 2}]} +{"id": "17892034", "title": "Maritsa, Rhodes", "sentences": ["Maritsa () is a village situated on west coast of the island of Rhodes, Greece, about 17 km far from the capital, between Kremasti and Psinthos.", "It is a part of the Municipality of Petaloudes.", "This village is renowned for its traditional taverns and active nightlife all year round.", "Near the village at the old international airport, car and motorbike races are held as well as model airplane shows.", "Outside the village, there are two churches: the Agios Georgios church, built in the 15th century and the Agios Nikolaos church, with wall paintings from the 15th century.", "Maritsa is also home to Iraklis AKS, a sports club with senior and junior teams competing at regional level.", "Iraklis football club also has experience playing for a year in the late 90's in the then 4th National division.", "Maritsa is renowned for its hospitable inhabitants and traditional character, it is the only village in Rhodes without a seasonal hotel, though studio apartments are available upon request.", "Every December all the young people organise a celebration for welcoming the new year at the centre square of the village.", "The money needed for the celebration is an offer from the villagers and the owners of the restaurants and bars.", "The ceremony takes place at about 19:00 of 31 December and 3 hrs later the party is \"fired up\" by a group playing local songs and dances performed by instruments like lyra and bouzouki.", "At the same period α live crib is opened where represents the birth of Christ with genuine animals, horses and goats.", "The crib is found in the top of Mt. Koymoyli, next to the holily abbey of transformation of Savior Christ.", "At the crib is offered hot tea from locally herbs as well as traditional rusk while the small children can come out photographs with Santa Claus.", "The crib functions from 24 Dec until 6 Jan.", "The village has taken his name from an Italian officer called \"Maritza\" that had the responsibility for the control of the village at the duration of Italian possession.", "Today it numbers about 1800 residents."], "mentions": [{"id": 0, "start": 104, "end": 110, "ref_url": "Rhodes", "ref_ids": ["26773183"], "sent_idx": 7}, {"id": 1, "start": 72, "end": 78, "ref_url": "Greece", "ref_ids": ["12108"], "sent_idx": 0}, {"id": 2, "start": 122, "end": 130, "ref_url": "Kremasti", "ref_ids": ["8775679"], "sent_idx": 0}, {"id": 3, "start": 20, "end": 32, "ref_url": "Communities_and_Municipalities_of_Greece", "ref_ids": null, "sent_idx": 1}, {"id": 4, "start": 36, "end": 46, "ref_url": "Petaloudes", "ref_ids": ["6160381"], "sent_idx": 1}]} +{"id": "17892043", "title": "Suhaimi Kamaruddin", "sentences": ["Dato' Haji Suhaimi Kamaruddin is a Malaysian politician and a lawyer by profession.", "He was a former chief of UMNO Youth and a former Deputy Minister in the federal government.", "He is a member of the United Malays National Organisation (UMNO), the largest political party in the former ruling Barisan Nasional (BN) coalition.", "He was the Member of Parliament for Sepang in Selangor for three terms from 1974 to 1986 representing BN.", "He was also the former president of the youth association"], "mentions": [{"id": 0, "start": 0, "end": 5, "ref_url": "Malay_titles", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 6, "end": 10, "ref_url": "Hajji", "ref_ids": ["1265835"], "sent_idx": 0}, {"id": 2, "start": 25, "end": 35, "ref_url": "UMNO_Youth", "ref_ids": null, "sent_idx": 1}, {"id": 3, "start": 22, "end": 57, "ref_url": "United_Malays_National_Organisation", "ref_ids": ["366318"], "sent_idx": 2}, {"id": 4, "start": 78, "end": 93, "ref_url": "Political_party", "ref_ids": ["23996"], "sent_idx": 2}, {"id": 5, "start": 115, "end": 131, "ref_url": "Barisan_Nasional", "ref_ids": ["126544"], "sent_idx": 2}, {"id": 6, "start": 11, "end": 31, "ref_url": "Dewan_Rakyat", "ref_ids": ["506017"], "sent_idx": 3}, {"id": 7, "start": 36, "end": 42, "ref_url": "Sepang_(federal_constituency)", "ref_ids": ["50489229"], "sent_idx": 3}]} +{"id": "17892059", "title": "Old Town Hall (Athol, Massachusetts)", "sentences": ["Old Town Hall is an historic town hall on 1307 Main Street in Athol, Massachusetts.", "Built in 1828 as a church, it served as town hall from 1847 to 1957, and now houses the local historical society.", "It is architecturally a good example of Federal period civic/religious architecture of the period.", "It was added to the National Register of Historic Places in 1987."], "mentions": [{"id": 0, "start": 40, "end": 49, "ref_url": "Town_hall", "ref_ids": null, "sent_idx": 1}, {"id": 1, "start": 62, "end": 82, "ref_url": "Athol,_Massachusetts", "ref_ids": ["259490"], "sent_idx": 0}, {"id": 2, "start": 20, "end": 56, "ref_url": "National_Register_of_Historic_Places", "ref_ids": ["64065"], "sent_idx": 3}]} +{"id": "17892073", "title": "Robert Chapman (pastor)", "sentences": ["Robert Cleaver Chapman (4 January 1803 – 12 June 1902), known as the \"apostle of Love\", was a pastor, teacher and evangelist."], "mentions": [{"id": 0, "start": 114, "end": 124, "ref_url": "Evangelism", "ref_ids": ["105796"], "sent_idx": 0}]} +{"id": "17892079", "title": "Joseph Stone House", "sentences": ["The Joseph Stone House, also known as Potter Acres, is an historic First Period house at 35 Stone Street in Auburn, Massachusetts.", "The oldest portion of this 1.5 story timber frame house, its north side, three bays and chimney, was built in 1729.", "The southern two bays were added in 1790, and a rear leanto may have been added at the same time.", "In 1886 the rear ell underwent significant alteration, and was again enlarged in 1946.", "The main body of the house has retained its original woodwork and interior decoration.", "Its most notable owner was Joseph Stone, a composer of music for sacred texts, who lived here between 1785 and 1826.", "The house was listed on the National Register of Historic Places in 1986."], "mentions": [{"id": 0, "start": 67, "end": 79, "ref_url": "First_Period", "ref_ids": ["8062182"], "sent_idx": 0}, {"id": 1, "start": 108, "end": 129, "ref_url": "Auburn,_Massachusetts", "ref_ids": ["116921"], "sent_idx": 0}, {"id": 2, "start": 28, "end": 64, "ref_url": "National_Register_of_Historic_Places", "ref_ids": ["64065"], "sent_idx": 6}]} +{"id": "17892081", "title": "André Wollscheidt", "sentences": ["André Wollscheidt (April 24, 1914 in Esch-sur-Alzette – January 31, 1995) was a Luxembourgian boxer who competed in the 1936 Summer Olympics.", "In 1936 he was eliminated in the first round of the lightweight class after losing his fight to silver medalist Nikolai Stepulov of Estonia"], "mentions": [{"id": 0, "start": 37, "end": 53, "ref_url": "Esch-sur-Alzette", "ref_ids": ["245366"], "sent_idx": 0}, {"id": 1, "start": 80, "end": 90, "ref_url": "Luxembourg", "ref_ids": ["17515"], "sent_idx": 0}, {"id": 2, "start": 94, "end": 99, "ref_url": "Boxing", "ref_ids": ["4243"], "sent_idx": 0}, {"id": 3, "start": 120, "end": 140, "ref_url": "1936_Summer_Olympics", "ref_ids": ["39721"], "sent_idx": 0}, {"id": 4, "start": 52, "end": 69, "ref_url": "Boxing_at_the_1936_Summer_Olympics_-_Men's_lightweight", "ref_ids": null, "sent_idx": 1}, {"id": 5, "start": 112, "end": 128, "ref_url": "Nikolai_Stepulov", "ref_ids": ["13844072"], "sent_idx": 1}, {"id": 6, "start": 132, "end": 139, "ref_url": "Estonia", "ref_ids": ["28222445"], "sent_idx": 1}]} +{"id": "17892091", "title": "Zhang Mengjin", "sentences": ["Zhang Mengjin (), is a former Chinese provincial politician from Zhejiang Province.", "He most recently served as vice-chairman of the Zhejiang People's Congress.", "He was also the executive vice-governor of the Zhejiang Province, and the former party chief of Ningbo."], "mentions": [{"id": 0, "start": 49, "end": 59, "ref_url": "Politician", "ref_ids": ["55959"], "sent_idx": 0}, {"id": 1, "start": 47, "end": 64, "ref_url": "Zhejiang_Province", "ref_ids": null, "sent_idx": 2}, {"id": 2, "start": 48, "end": 74, "ref_url": "National_People's_Congress", "ref_ids": ["38878740"], "sent_idx": 1}, {"id": 3, "start": 96, "end": 102, "ref_url": "Ningbo", "ref_ids": ["250022"], "sent_idx": 2}]} +{"id": "17892093", "title": "Tuttle Square School", "sentences": ["The Tuttle Square School, now the Auburn Historical Museum, is a historic school building at 41 South Street in Auburn, Massachusetts.", "The single story two-room wood frame building was constructed in 1922 to replace a smaller school on the site, in the face of increasing school population.", "The building's most prominent decorative feature is its Federal-style entry door surround, with pilasters supporting scrolled brackets, topped by a fanlight.", "The school served the Auburn public schools from 1923 to 1933.", "During the Second World War it was outfitted as a hospital in case of emergency, and briefly returned to use as a school between 1948 and 1953.", "Thereafter its primary purpose was to house school administrative office, and was finally vacated by the school administration in 1981.", "It then served as home to the Auburn District Nursing Association until about 1990.", "In 1999 the town voted to lease the building to the Auburn Historical Society for use as a museum.", "The building was listed on the National Register of Historic Places in 2002."], "mentions": [{"id": 0, "start": 112, "end": 133, "ref_url": "Auburn,_Massachusetts", "ref_ids": ["116921"], "sent_idx": 0}, {"id": 1, "start": 11, "end": 27, "ref_url": "Second_World_War", "ref_ids": null, "sent_idx": 4}, {"id": 2, "start": 31, "end": 67, "ref_url": "National_Register_of_Historic_Places", "ref_ids": ["64065"], "sent_idx": 8}]} +{"id": "17892099", "title": "Ees (place name)", "sentences": ["Ees (plural of \"ee\") is an archaic English term for a piece of land liable to flood, or water meadow.", "It is derived from the Anglo-Saxon \"¯eg\" (or \"¯ieg\") meaning \"'island', also used of a piece of firm land in a fen and of land situated on a stream or between streams\".", "It is still used locally in Greater Manchester to indicate former water meadows and flood basins adjoining the River Mersey: Chorlton Ees, Sale Ees and Stretford Ees.", "The term is also modified to \"eye\" and \"eea\" in the name of Park Eye (or Park Eea)."], "mentions": [{"id": 0, "start": 84, "end": 89, "ref_url": "Flood", "ref_ids": ["50482"], "sent_idx": 2}, {"id": 1, "start": 66, "end": 78, "ref_url": "Wet_meadow", "ref_ids": ["8587280"], "sent_idx": 2}, {"id": 2, "start": 23, "end": 34, "ref_url": "Anglo-Saxon", "ref_ids": null, "sent_idx": 1}, {"id": 3, "start": 111, "end": 114, "ref_url": "Fen", "ref_ids": ["113530"], "sent_idx": 1}, {"id": 4, "start": 28, "end": 46, "ref_url": "Greater_Manchester", "ref_ids": ["79819"], "sent_idx": 2}, {"id": 5, "start": 111, "end": 123, "ref_url": "River_Mersey", "ref_ids": ["166357"], "sent_idx": 2}, {"id": 6, "start": 125, "end": 137, "ref_url": "Chorlton_Brook", "ref_ids": ["16260218"], "sent_idx": 2}, {"id": 7, "start": 139, "end": 143, "ref_url": "Sale,_Greater_Manchester", "ref_ids": ["367154"], "sent_idx": 2}, {"id": 8, "start": 152, "end": 161, "ref_url": "Stretford", "ref_ids": ["1074030"], "sent_idx": 2}]} +{"id": "17892104", "title": "Giovanni Battista Guglielmini", "sentences": ["Giovanni Battista Guglielmini (; 16 August 1763 – 15 December 1817) was an Italian physicist.", "Guglielmini's experiments take place in the list of experiments by scientists (Hooke, Guglielmini, Benzenberg, Reich, Foucault) to demonstrate the Earth's rotation experimentally."], "mentions": [{"id": 0, "start": 79, "end": 84, "ref_url": "Robert_Hooke", "ref_ids": ["49720"], "sent_idx": 1}, {"id": 1, "start": 111, "end": 116, "ref_url": "Ferdinand_Reich", "ref_ids": ["1267644"], "sent_idx": 1}, {"id": 2, "start": 118, "end": 126, "ref_url": "Jean_Bernard_Léon_Foucault", "ref_ids": null, "sent_idx": 1}]} +{"id": "17892112", "title": "Ayer Main Street Historic District", "sentences": ["The Ayer Main Street Historic District encompasses most of the historic central business district of Ayer, Massachusetts.", "It extends along Main Street between Park and Columbia Streets, and was mostly developed between 1872 and 1898 as a hub of railroad and roadway-based economic activity.", "The district was listed on the National Register of Historic Places in 1989."], "mentions": [{"id": 0, "start": 101, "end": 120, "ref_url": "Ayer,_Massachusetts", "ref_ids": ["28033879"], "sent_idx": 0}, {"id": 1, "start": 31, "end": 67, "ref_url": "National_Register_of_Historic_Places", "ref_ids": ["64065"], "sent_idx": 2}]} +{"id": "17892118", "title": "Aberthaw High Level railway station", "sentences": ["Aberthaw High Level railway station was the Barry Railway station which served Aberthaw, located near the north shore of the Bristol Channel in the former Welsh county of South Glamorgan, and in the current county of Vale of Glamorgan."], "mentions": [{"id": 0, "start": 44, "end": 57, "ref_url": "Barry_Railway", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 0, "end": 8, "ref_url": "Aberthaw", "ref_ids": ["5838208"], "sent_idx": 0}, {"id": 2, "start": 125, "end": 140, "ref_url": "Bristol_Channel", "ref_ids": ["313697"], "sent_idx": 0}, {"id": 3, "start": 155, "end": 160, "ref_url": "Wales", "ref_ids": ["69894"], "sent_idx": 0}, {"id": 4, "start": 171, "end": 186, "ref_url": "South_Glamorgan", "ref_ids": ["411714"], "sent_idx": 0}, {"id": 5, "start": 217, "end": 234, "ref_url": "Vale_of_Glamorgan", "ref_ids": ["52519"], "sent_idx": 0}]} +{"id": "17892121", "title": "Lowe Farm", "sentences": ["Lowe Farm is a growing farming community in southern Manitoba, Canada.", "It is located in the Rural Municipality of Morris, 10 minutes west of Morris, Manitoba, on highway #23.", "It was founded in the 1880s, when John Lowe (born February 20, 1824) managed a campaign to attract immigrants, particularly farmers and farm laborers for Manitoba.", "Lowe Farm became a model farm and a testing ground for farming innovations and later developed into a village.", "Like many towns in Southern Manitoba, it has no stop lights, though it does have three churches, a Credit Union, a Recreation Centre, a privately owned grocery store, farm supply, gas station, a Cafe, an elementary/junior high school.", "and community park.", "Lowe Farm has 3 baseball diamonds, 2 in the park and one on the school playground where a baseball program is run.", "The town is laid out in a reverse L-shape.", "Lowe Farm School has 121 students, there are 14 staff members.", "The parents have been developing a Natural Playground over the last 2 years that consists of a toboggan hill and swings.", "The school has been integrating technology by using smartboards, netbooks, desktops, and LCD projectors into their everyday experiences.", "The students are very engaged, participating in leadership activities like buddy reading, computer buddies, gym helpers, canteen helpers, and book order volunteers.", "They also participate in inter-school sports like basketball, volleyball, cross-country, badminton, floor hockey, and softball.", "The student council organizes events like Fall Frolics, Spirit Week, School Newspaper, Talent Show, and Oreo dunking.", "They also organize sales of school clothing.", "Lowe Farm continues to grow with a new influx of immigration.", "The school population has increased in the five months prior to February 2016 by 27%, from 84 students to 107."], "mentions": [{"id": 0, "start": 28, "end": 36, "ref_url": "Manitoba", "ref_ids": ["18926"], "sent_idx": 4}, {"id": 1, "start": 63, "end": 69, "ref_url": "Canada", "ref_ids": ["5042916"], "sent_idx": 0}, {"id": 2, "start": 21, "end": 49, "ref_url": "Rural_Municipality_of_Morris", "ref_ids": ["17184527"], "sent_idx": 1}, {"id": 3, "start": 70, "end": 86, "ref_url": "Morris,_Manitoba", "ref_ids": ["2507494"], "sent_idx": 1}, {"id": 4, "start": 19, "end": 36, "ref_url": "Southern_Manitoba", "ref_ids": ["7636116"], "sent_idx": 4}, {"id": 5, "start": 48, "end": 58, "ref_url": "Stop_light", "ref_ids": null, "sent_idx": 4}]} +{"id": "17892128", "title": "Barre Common District", "sentences": ["The Barre Common District is a historic district encompassing the town common of Barre, Massachusetts, and immediately adjacent historic buildings.", "It is bounded roughly by South, Exchange, Main, Pleasant, Broad, School and Grove Streets.", "The Barre common was laid out in 1792, and has been a focus of the town's civic life ever since, and is flanked by a number of high-quality Greek Revival buildings, as well as those in other styles.", "The district was listed on the National Register of Historic Places in 1976."], "mentions": [{"id": 0, "start": 31, "end": 48, "ref_url": "Historic_districts_in_the_United_States", "ref_ids": ["8708862"], "sent_idx": 0}, {"id": 1, "start": 81, "end": 101, "ref_url": "Barre,_Massachusetts", "ref_ids": ["259492"], "sent_idx": 0}, {"id": 2, "start": 31, "end": 67, "ref_url": "National_Register_of_Historic_Places", "ref_ids": ["64065"], "sent_idx": 3}]} +{"id": "17892131", "title": "Patricio Pérez", "sentences": ["Patricio Pablo Pérez (born 27 June 1985) is an Argentine footballer that currently plays for Primera B Metropolitana side Club Comunicaciones as midfielder."], "mentions": [{"id": 0, "start": 47, "end": 56, "ref_url": "Argentina", "ref_ids": ["18951905"], "sent_idx": 0}, {"id": 1, "start": 57, "end": 67, "ref_url": "Association_football", "ref_ids": ["10568"], "sent_idx": 0}, {"id": 2, "start": 93, "end": 116, "ref_url": "Primera_B_Metropolitana", "ref_ids": ["5492018"], "sent_idx": 0}, {"id": 3, "start": 122, "end": 141, "ref_url": "Club_Comunicaciones", "ref_ids": ["5395739"], "sent_idx": 0}, {"id": 4, "start": 145, "end": 155, "ref_url": "Midfielder", "ref_ids": ["548981"], "sent_idx": 0}]} +{"id": "17892148", "title": "No. 4 Schoolhouse", "sentences": ["The No. 4 Schoolhouse is an historic school building on Farrington Road near Sunrise Avenue in Barre, Massachusetts.", "The one-room wood frame schoolhouse was built in 1883, and was at least the second schoolhouse in the town's fourth district.", "The building served as a schoolhouse until 1930, when the town centralized its schools.", "In 1937 the building was purchased by a local community organization dedicated to its preservation.", "It has served as a community center since then.", "The building was listed on the National Register of Historic Places in 1988."], "mentions": [{"id": 0, "start": 95, "end": 115, "ref_url": "Barre,_Massachusetts", "ref_ids": ["259492"], "sent_idx": 0}, {"id": 1, "start": 31, "end": 67, "ref_url": "National_Register_of_Historic_Places", "ref_ids": ["64065"], "sent_idx": 5}]} +{"id": "17892149", "title": "Telford College", "sentences": ["Telford College is a further education college in Telford, Shropshire, England.", "It operates from one main site and many in-company training sites and community-based courses spread out across Shropshire and the whole of the United Kingdom.", "During 2017 the college improved its Ofsted rating to Grade 3."], "mentions": [{"id": 0, "start": 21, "end": 46, "ref_url": "Further_education_college", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 0, "end": 7, "ref_url": "Telford", "ref_ids": ["199097"], "sent_idx": 0}, {"id": 2, "start": 112, "end": 122, "ref_url": "Shropshire", "ref_ids": ["51611"], "sent_idx": 1}, {"id": 3, "start": 71, "end": 78, "ref_url": "England", "ref_ids": ["9316"], "sent_idx": 0}, {"id": 4, "start": 112, "end": 122, "ref_url": "Shropshire", "ref_ids": ["51611"], "sent_idx": 1}, {"id": 5, "start": 144, "end": 158, "ref_url": "United_Kingdom", "ref_ids": ["31717"], "sent_idx": 1}]} +{"id": "17892153", "title": "GHLL", "sentences": ["GHLL (originally The Green Hills Literary Lantern) is a literary journal published by Truman State University.", "Founded in 1990 by Jack Smith, a professor of English and Philosophy at North Central Missouri College as an inexpensively-produced outlet for student and faculty work, the annual quickly grew to a regional and national mission.", "Towards the end of its existence as a print publication, the magazine typically consisted of 300 pages of poetry, fiction and nonfiction prose.", "In 2005, due to financial issues, the journal moved to an open-access, web-only format.", "The first digital issue was XVII.", "It was among the earliest of academic literary magazines available exclusively online and quickly collected a 'Best of the Web\" award\nListings in professional directories characterize the editorial policy as open, though with “emphasis on craft.” \"GHLL\" reviews poetry and novels from lesser-known, independent presses\nProse is selected by Adam Brooke Davis, verse by poet and novelist Joe Benevento.", "The editorial board has included Geoffrey Clark, Erin Flanagan, Barry Kitterman, Robert Garner McBrearty, Midge Raymond, Doug Rennie, Jude Russell, Nat Smith, John Talbird, and Mark Wisniewkski.\n\"GHLL\" has a tradition of openness to first-time authors, though a number of writers have made multiple appearances, including fiction writers Karl Harshbarger, William Eisner, Ian MacMillan, DeWitt Henry (founder of Ploughshares), Virgil Suarez and Walter Cummins.", "Regularly contributing poets include Lisa Alexander Baron, Jim Thomas, Joanne Lowery, Lee Rossi, David Lawrence, Mark Belair, Nancy Cherry, Sudie Nostrand, Terry Savoie, Francine Marie Tolf, Fredrick Zydek, William Jollif, Lee Slonimsky, Terry Godbey, Rachel Squires Bloom and Yvette Schnoeker-Shorb, as well as the first poet laureate of Missouri, Walter Bargen.", "The journal is indexed by numerous directories, including the \"American Directory of Writer's Guidelines\", \"Writers’ Market\" (various editions), the \"International Directory of Literary and Little Magazines\", \"Index of American Periodical Verse\""], "mentions": [{"id": 0, "start": 86, "end": 109, "ref_url": "Truman_State_University", "ref_ids": ["361870"], "sent_idx": 0}, {"id": 1, "start": 46, "end": 53, "ref_url": "English_studies", "ref_ids": ["18974570"], "sent_idx": 1}, {"id": 2, "start": 58, "end": 68, "ref_url": "Philosophy", "ref_ids": ["13692155"], "sent_idx": 1}, {"id": 3, "start": 262, "end": 268, "ref_url": "Poetry", "ref_ids": ["22926"], "sent_idx": 5}, {"id": 4, "start": 322, "end": 329, "ref_url": "Fiction", "ref_ids": ["18949461"], "sent_idx": 6}, {"id": 5, "start": 137, "end": 142, "ref_url": "Prose", "ref_ids": ["52103"], "sent_idx": 2}, {"id": 6, "start": 338, "end": 354, "ref_url": "Karl_Harshbarger", "ref_ids": null, "sent_idx": 6}, {"id": 7, "start": 356, "end": 370, "ref_url": "William_Eisner", "ref_ids": null, "sent_idx": 6}, {"id": 8, "start": 372, "end": 385, "ref_url": "Ian_MacMillan_(author)", "ref_ids": ["42234122"], "sent_idx": 6}, {"id": 9, "start": 387, "end": 399, "ref_url": "DeWitt_Henry", "ref_ids": ["25037820"], "sent_idx": 6}, {"id": 10, "start": 412, "end": 424, "ref_url": "Ploughshares", "ref_ids": ["2167352"], "sent_idx": 6}, {"id": 11, "start": 427, "end": 440, "ref_url": "Virgil_Suarez", "ref_ids": null, "sent_idx": 6}, {"id": 12, "start": 445, "end": 459, "ref_url": "Walter_Cummins", "ref_ids": null, "sent_idx": 6}, {"id": 13, "start": 37, "end": 57, "ref_url": "Lisa_Alexander_Baron", "ref_ids": null, "sent_idx": 7}, {"id": 14, "start": 59, "end": 69, "ref_url": "Jim_Thomas_(poet)", "ref_ids": ["31363102"], "sent_idx": 7}, {"id": 15, "start": 71, "end": 84, "ref_url": "Joanne_Lowery", "ref_ids": null, "sent_idx": 7}, {"id": 16, "start": 86, "end": 95, "ref_url": "Lee_Rossi", "ref_ids": null, "sent_idx": 7}, {"id": 17, "start": 97, "end": 111, "ref_url": "David_Lawrence_(poet)", "ref_ids": null, "sent_idx": 7}, {"id": 18, "start": 113, "end": 124, "ref_url": "Mark_Belair", "ref_ids": null, "sent_idx": 7}, {"id": 19, "start": 126, "end": 138, "ref_url": "Nancy_Cherry", "ref_ids": null, "sent_idx": 7}, {"id": 20, "start": 140, "end": 154, "ref_url": "Sudie_Nostrand", "ref_ids": null, "sent_idx": 7}, {"id": 21, "start": 156, "end": 168, "ref_url": "Terry_Savoie", "ref_ids": null, "sent_idx": 7}, {"id": 22, "start": 170, "end": 189, "ref_url": "Francine_Marie_Tolf", "ref_ids": null, "sent_idx": 7}, {"id": 23, "start": 191, "end": 205, "ref_url": "Fredrick_Zydek", "ref_ids": null, "sent_idx": 7}, {"id": 24, "start": 207, "end": 221, "ref_url": "William_Jollif", "ref_ids": null, "sent_idx": 7}, {"id": 25, "start": 223, "end": 236, "ref_url": "Lee_Slonimsky", "ref_ids": ["57173179"], "sent_idx": 7}, {"id": 26, "start": 238, "end": 250, "ref_url": "Terry_Godbey", "ref_ids": null, "sent_idx": 7}, {"id": 27, "start": 252, "end": 272, "ref_url": "Rachel_Squires_Bloom", "ref_ids": null, "sent_idx": 7}, {"id": 28, "start": 277, "end": 299, "ref_url": "Yvette_Schnoeker-Shorb", "ref_ids": null, "sent_idx": 7}, {"id": 29, "start": 322, "end": 335, "ref_url": "Poet_laureate", "ref_ids": ["53011"], "sent_idx": 7}, {"id": 30, "start": 339, "end": 347, "ref_url": "Missouri", "ref_ids": ["19571"], "sent_idx": 7}, {"id": 31, "start": 349, "end": 362, "ref_url": "Walter_Bargen", "ref_ids": ["33267706"], "sent_idx": 7}]} +{"id": "17892176", "title": "David Bowman (footballer, born 1960)", "sentences": ["David Michael Bowman (born 16 December 1960 in Scarborough, England) is an English former footballer.", "He played for Bridlington Town and Scarborough."], "mentions": [{"id": 0, "start": 35, "end": 46, "ref_url": "Scarborough,_North_Yorkshire", "ref_ids": ["381565"], "sent_idx": 1}, {"id": 1, "start": 90, "end": 100, "ref_url": "Association_football", "ref_ids": ["10568"], "sent_idx": 0}, {"id": 2, "start": 14, "end": 30, "ref_url": "Bridlington_Town_A.F.C.", "ref_ids": ["3667449"], "sent_idx": 1}, {"id": 3, "start": 35, "end": 46, "ref_url": "Scarborough_F.C.", "ref_ids": ["946449"], "sent_idx": 1}]} +{"id": "17892188", "title": "Pan Burying Ground", "sentences": ["The Pan Burying Ground (also known as the Pan Cemetery, East Burying Ground, and East Cemetery) is an historic cemetery on 477 Main Street in Bolton, Massachusetts.", "Established in 1822, the cemetery was the second in the town.", "It was named for the area known locally as \"The Pan\", which had by then become the second-largest village center in the town.", "The original plot has 400 marked graves, and is presumed to contain further unmarked graves, based on a pattern of marker layout at the rear of this portion of the cemetery.", "The cemetery was enlarged to in the 1960s.", "It is distinctive in Bolton for its seven-chambered group tomb, built in 1839 at the southeast corner of the property.", "The cemetery was listed on the National Register of Historic Places in 2007."], "mentions": [{"id": 0, "start": 4, "end": 12, "ref_url": "Cemetery", "ref_ids": ["63752"], "sent_idx": 6}, {"id": 1, "start": 142, "end": 163, "ref_url": "Bolton,_Massachusetts", "ref_ids": ["116925"], "sent_idx": 0}, {"id": 2, "start": 31, "end": 67, "ref_url": "National_Register_of_Historic_Places", "ref_ids": ["64065"], "sent_idx": 6}]} +{"id": "17892215", "title": "JSONP", "sentences": ["JSONP or JSON-P (\"JSON with Padding\") is a JavaScript technique for requesting data by loading a tag.", "It was proposed by Bob Ippolito in 2005.", "JSONP enables sharing of data bypassing same-origin policy, which disallows running JavaScript code to read media Document Object Model (DOM) elements or XMLHttpRequest data fetched from outside the page's originating site.", "The originating site is indicated by a combination of URI scheme, host name, and port number."], "mentions": [{"id": 0, "start": 40, "end": 58, "ref_url": "Same-origin_policy", "ref_ids": ["1883276"], "sent_idx": 2}, {"id": 1, "start": 84, "end": 94, "ref_url": "JavaScript", "ref_ids": ["9845"], "sent_idx": 2}, {"id": 2, "start": 114, "end": 135, "ref_url": "Document_Object_Model", "ref_ids": ["8743"], "sent_idx": 2}, {"id": 3, "start": 154, "end": 168, "ref_url": "XMLHttpRequest", "ref_ids": ["1594360"], "sent_idx": 2}, {"id": 4, "start": 54, "end": 92, "ref_url": "Uniform_Resource_Identifier", "ref_ids": ["32146"], "sent_idx": 3}]} +{"id": "17892225", "title": "Sturbridge Common Historic District", "sentences": ["The Sturbridge Common Historic District encompasses the historic center of Sturbridge, Massachusetts.", "Encompassing some , the district is focused around the town common, which was laid out when the town established in 1738.", "The district is roughly linear, extending along Main Street from its junction with Interstate 84 eastward to Hall Road.", "The district also extends for short distances along Haynes, Maple, and Charlton Streets.", "Although most of the properties in the district are residential, the institutional properties, including the 1838 Greek Revival Town Hall, the 1896 Joshua Hyde Library building, and the 1922 Federated Church.", "The oldest building in the district is the c.", "1772 Publick House, a tavern which has dominated the south side of the district since its construction.", "The district was listed on the National Register of Historic Places in 1977."], "mentions": [{"id": 0, "start": 75, "end": 100, "ref_url": "Sturbridge,_Massachusetts", "ref_ids": ["259517"], "sent_idx": 0}, {"id": 1, "start": 83, "end": 96, "ref_url": "Interstate_84_(Pennsylvania–Massachusetts)", "ref_ids": ["1519632"], "sent_idx": 2}, {"id": 2, "start": 31, "end": 67, "ref_url": "National_Register_of_Historic_Places", "ref_ids": ["64065"], "sent_idx": 7}]} +{"id": "17892227", "title": "Mark McNairy", "sentences": ["Mark McNairy (born March 8, 1961 in North Carolina) is an American fashion designer.", "He is the former creative director for the Ivy League inspired clothing company \"J. Press\", as well as his own collection simply known as Mark McNairy New Amsterdam."], "mentions": [{"id": 0, "start": 36, "end": 50, "ref_url": "Greensboro", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 58, "end": 66, "ref_url": "United_States", "ref_ids": ["3434750"], "sent_idx": 0}, {"id": 2, "start": 67, "end": 81, "ref_url": "Fashion_design", "ref_ids": ["10799117"], "sent_idx": 0}, {"id": 3, "start": 43, "end": 53, "ref_url": "Ivy_League", "ref_ids": ["14975"], "sent_idx": 1}, {"id": 4, "start": 81, "end": 89, "ref_url": "J._Press", "ref_ids": ["2657290"], "sent_idx": 1}]} +{"id": "17892293", "title": "Winthrop Street Baptist Church", "sentences": ["Winthrop Street Baptist Church is a historic Baptist church located at 39 Winthrop Street in Taunton, Massachusetts, USA.", "The Late Gothic Revival church was built in 1862 and was the second Baptist church built on the site.", "It was added to the National Register of Historic Places in 1984."], "mentions": [{"id": 0, "start": 45, "end": 52, "ref_url": "Baptist", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 93, "end": 115, "ref_url": "Taunton,_Massachusetts", "ref_ids": ["116732"], "sent_idx": 0}, {"id": 2, "start": 20, "end": 56, "ref_url": "National_Register_of_Historic_Places", "ref_ids": ["64065"], "sent_idx": 2}]} +{"id": "17892298", "title": "William Fawcett", "sentences": ["William or Bill Fawcett may refer to:"], "mentions": []} +{"id": "17892324", "title": "Westville Congregational Church", "sentences": ["The West Congregational Church of Taunton, formerly the Westville Congregational Church, is a historic church located at 415 Winthrop Street in Taunton, Massachusetts.", "Built in 1792 and moved to its present location in 1824, it is the city's oldest church building, and a well-preserved example of Federal period architecture.", "The building was listed on the National Register of Historic Places in 1984."], "mentions": [{"id": 0, "start": 144, "end": 166, "ref_url": "Taunton,_Massachusetts", "ref_ids": ["116732"], "sent_idx": 0}, {"id": 1, "start": 31, "end": 67, "ref_url": "National_Register_of_Historic_Places", "ref_ids": ["64065"], "sent_idx": 2}]} +{"id": "17892349", "title": "Unitarian Memorial Church", "sentences": ["Unitarian Memorial Church is a historic church on 102 Green Street in Fairhaven, Massachusetts, home to the Unitarian Universalist Society of Fairhaven.", "The congregation was founded as the Washington Street Christian Church in 1819, and called its first minister in 1840.", "The Reverend Jordinn Nelson Long is its currently serving minister, and the Society President is Lawrence DeSalvatore.", "UUSF is a member congregation of the Boston-based Unitarian Universalist Association, and a designated GLBTQA Welcoming Congregation, a UUA Honor Congregation, and a part of the Green Sanctuary movement.", "Services are held in the neo-Gothic sanctuary at 10:30 a.m. from September through mid-June each year.", "The church was added to the National Register of Historic Places in 1996.", "The Unitarian Memorial Church in Fairhaven Massachusetts was built, financed and donated to the Unitarians in 1904 by Henry H. Rogers in memory of his mother, Mary Eldredge Huttleston.", "The church was designed by Boston architect Charles Brigham in a Gothic Revival style.", "It is one hundred fourteen feet in height, one hundred feet long in body and fifty-three feet wide.", "The nave is thirty-two feet wide and seventy-one feet long.", "The main aisle is sixty-two feet long and six feet wide.", "The church, parish house and former parsonage (now Harrop Center) of the Unitarian Society are so placed as to form three sides of a quadrangle, set among well-kept lawns and shrubbery.", "Granite (locally quarried) with Indiana limestone decorative carvings dominate the exterior while marble and limestone carvings dominate the interior.", "All stonework artistry was created by forty-five Italian craftsmen brought to Fairhaven by Rogers."], "mentions": [{"id": 0, "start": 70, "end": 94, "ref_url": "Fairhaven,_Massachusetts", "ref_ids": ["116712"], "sent_idx": 0}, {"id": 1, "start": 50, "end": 84, "ref_url": "Unitarian_Universalist_Association", "ref_ids": ["31903"], "sent_idx": 3}, {"id": 2, "start": 28, "end": 64, "ref_url": "National_Register_of_Historic_Places", "ref_ids": ["64065"], "sent_idx": 5}, {"id": 3, "start": 118, "end": 133, "ref_url": "Henry_H._Rogers", "ref_ids": null, "sent_idx": 6}, {"id": 4, "start": 44, "end": 59, "ref_url": "Charles_Brigham", "ref_ids": ["19936744"], "sent_idx": 7}, {"id": 5, "start": 32, "end": 49, "ref_url": "Indiana_limestone", "ref_ids": null, "sent_idx": 12}]} +{"id": "17892367", "title": "Air Lanka Flight 512", "sentences": ["Air Lanka Flight 512 was an Air Lanka flight from London Gatwick Airport via Zurich and Dubai to Colombo (Bandaranaike International Airport) and Malé, Maldives (Velana International Airport).", "On 3 May 1986, the Lockheed L-1011 Tristar operating the flight was on the ground in Colombo, about to fly on to Malé, when an explosion ripped the aircraft in two, destroying it.", "Flight 512 carried mainly French, West German, British and Japanese tourists; 21 people were killed on the aircraft, including 3 British, 2 West German, 3 French, 2 Japanese, 2 Maldivians and 1 Pakistani; 41 people were injured.", "Boarding of the flight had been delayed due to the aircraft being damaged during cargo / baggage loading.", "During boarding, a bomb, hidden in the aircraft's 'Fly Away Kit' (a collection of small spare parts), exploded.", "The bomb had been timed to detonate mid-flight; the delay likely saved many lives.", "The Sri Lankan government concluded that the bomb was planted by the Liberation Tigers of Tamil Eelam (LTTE) to sabotage peace talks between the LTTE and the Sri Lankan government.", "They reported that a search of the aircraft the next day uncovered a parcel containing uniforms with the insignia of the Black Tigers, the suicide wing of LTTE."], "mentions": [{"id": 0, "start": 0, "end": 9, "ref_url": "Air_Lanka", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 50, "end": 72, "ref_url": "London_Gatwick_Airport", "ref_ids": null, "sent_idx": 0}, {"id": 2, "start": 77, "end": 83, "ref_url": "Zurich", "ref_ids": null, "sent_idx": 0}, {"id": 3, "start": 88, "end": 93, "ref_url": "Dubai", "ref_ids": ["211583"], "sent_idx": 0}, {"id": 4, "start": 106, "end": 140, "ref_url": "Bandaranaike_International_Airport", "ref_ids": ["2870845"], "sent_idx": 0}, {"id": 5, "start": 113, "end": 117, "ref_url": "Malé", "ref_ids": ["57040"], "sent_idx": 1}, {"id": 6, "start": 152, "end": 160, "ref_url": "Maldives", "ref_ids": ["19117"], "sent_idx": 0}, {"id": 7, "start": 162, "end": 190, "ref_url": "Velana_International_Airport", "ref_ids": ["2577088"], "sent_idx": 0}, {"id": 8, "start": 19, "end": 42, "ref_url": "Lockheed_L-1011", "ref_ids": null, "sent_idx": 1}, {"id": 9, "start": 4, "end": 25, "ref_url": "Sri_Lankan_government", "ref_ids": null, "sent_idx": 6}, {"id": 10, "start": 69, "end": 101, "ref_url": "Liberation_Tigers_of_Tamil_Eelam", "ref_ids": ["18606"], "sent_idx": 6}, {"id": 11, "start": 121, "end": 133, "ref_url": "Black_Tigers", "ref_ids": ["2239233"], "sent_idx": 7}]} +{"id": "17892381", "title": "1958 Laotian parliamentary election", "sentences": ["Parliamentary elections were held in Laos on 4 May 1958, in order to elect an additional 21 seats to the enlarged National Assembly.", "The Lao Patriotic Front won the most seats, although the ruling National Progressive Party remained the largest party in the Assembly, holding 26 of the 60 seats.", "Voter turnout was 82.1%."], "mentions": [{"id": 0, "start": 37, "end": 41, "ref_url": "Laos", "ref_ids": ["17752"], "sent_idx": 0}, {"id": 1, "start": 114, "end": 131, "ref_url": "National_Assembly_of_Laos", "ref_ids": null, "sent_idx": 0}, {"id": 2, "start": 4, "end": 23, "ref_url": "Lao_Patriotic_Front", "ref_ids": null, "sent_idx": 1}, {"id": 3, "start": 64, "end": 90, "ref_url": "National_Progressive_Party_(Laos)", "ref_ids": ["28506360"], "sent_idx": 1}]} +{"id": "17892395", "title": "Palmer Island Light", "sentences": ["Palmer Island Light Station is a historic lighthouse in New Bedford Harbor in New Bedford, Massachusetts, USA.", "The lighthouse was built in 1849 out of stone rubble.", "It was discontinued when the harbor's hurricane barrier was built in the early 1960s, as its location immediately north of the barrier was no longer an outlying danger and there are lights on either side of the barrier opening.", "From 1888 until 1891 it served, with Fairhaven Bridge Light, as a range light to guide vessels past Butler Flats, a rocky shoal on the west side of the entrance channel.", "It was added to the National Register of Historic Places as Palmer Island Light Station on March 26, 1980."], "mentions": [{"id": 0, "start": 4, "end": 14, "ref_url": "Lighthouse", "ref_ids": ["17725"], "sent_idx": 1}, {"id": 1, "start": 78, "end": 104, "ref_url": "New_Bedford,_Massachusetts", "ref_ids": ["116717"], "sent_idx": 0}, {"id": 2, "start": 106, "end": 109, "ref_url": "United_States", "ref_ids": ["3434750"], "sent_idx": 0}, {"id": 3, "start": 37, "end": 59, "ref_url": "Fairhaven_Bridge_Light", "ref_ids": ["24189552"], "sent_idx": 3}, {"id": 4, "start": 66, "end": 77, "ref_url": "Leading_lights", "ref_ids": ["491886"], "sent_idx": 3}, {"id": 5, "start": 20, "end": 56, "ref_url": "National_Register_of_Historic_Places", "ref_ids": ["64065"], "sent_idx": 4}]} +{"id": "17892417", "title": "Denka", "sentences": ["Deng, also known as Denka, is a sky, rain, and fertility god in Dinka mythology for the Dinka people of Sudan and South Sudan.", "He is the son of the goddess Abuk.", "Among the Nuer, Deng is considered to be \"a foreign deity\" and \"a bringer of disease.\"", "His daughter is the moon goddess.", "In Dinka religion, he is a storm and fertility god bringing lightning, rain and thunder.", "The word \"deng\" mean \"rain' in Thuɔŋjäŋ.", "Among its followers, Deng is regarded as the intermediary between humans and the supreme being.", "Closely linked with the supreme god Nhialic, he was regarded as the son of god, and sometimes as the son of the goddess Abuk.", "In some areas of Dinka country, Deng and Nhialic are \"regarded as one and the same\"."], "mentions": [{"id": 0, "start": 32, "end": 35, "ref_url": "God", "ref_ids": ["5042765"], "sent_idx": 7}, {"id": 1, "start": 64, "end": 79, "ref_url": "Dinka_mythology", "ref_ids": null, "sent_idx": 0}, {"id": 2, "start": 88, "end": 100, "ref_url": "Dinka_people", "ref_ids": ["768938"], "sent_idx": 0}, {"id": 3, "start": 104, "end": 109, "ref_url": "Sudan", "ref_ids": ["27421"], "sent_idx": 0}, {"id": 4, "start": 114, "end": 125, "ref_url": "South_Sudan", "ref_ids": ["32350676"], "sent_idx": 0}, {"id": 5, "start": 120, "end": 124, "ref_url": "Abuk", "ref_ids": null, "sent_idx": 7}, {"id": 6, "start": 10, "end": 14, "ref_url": "Nuer_people", "ref_ids": ["855296"], "sent_idx": 2}, {"id": 7, "start": 3, "end": 17, "ref_url": "Dinka_religion", "ref_ids": ["98511"], "sent_idx": 4}, {"id": 8, "start": 31, "end": 39, "ref_url": "Dinka_language", "ref_ids": ["2704870"], "sent_idx": 5}]} +{"id": "17892430", "title": "The Cage (ballet)", "sentences": ["The Cage is a ballet made by New York City Ballet ballet master Jerome Robbins to Stravinsky's \"Concerto in D\" for string orchestra, also known as the \"Basel Concerto\", which he was commissioned to compose on the twentieth anniversary of the chamber orchestra Basler Kammerorchester; it notably shifts between D major and minor.", "The premiere took place on Sunday, 10 June 1951 at the City Center of Music and Drama, New York, with décor by Jean Rosenthal, costumes by Ruth Sobatka and lighting by Jennifer Tipton.", "It was danced as part of City Ballet's 1982 Stravinsky Centennial Celebration."], "mentions": [{"id": 0, "start": 29, "end": 49, "ref_url": "New_York_City_Ballet", "ref_ids": ["440878"], "sent_idx": 0}, {"id": 1, "start": 64, "end": 78, "ref_url": "Jerome_Robbins", "ref_ids": ["663551"], "sent_idx": 0}, {"id": 2, "start": 82, "end": 94, "ref_url": "Igor_Stravinsky", "ref_ids": ["38172"], "sent_idx": 0}, {"id": 3, "start": 96, "end": 109, "ref_url": "Concerto_in_D_(Stravinsky)", "ref_ids": ["10850462"], "sent_idx": 0}, {"id": 4, "start": 43, "end": 47, "ref_url": "List_of_1951_ballet_premieres", "ref_ids": ["25364035"], "sent_idx": 1}, {"id": 5, "start": 55, "end": 85, "ref_url": "New_York_City_Center", "ref_ids": ["30864126"], "sent_idx": 1}, {"id": 6, "start": 111, "end": 125, "ref_url": "Jean_Rosenthal", "ref_ids": ["2858863"], "sent_idx": 1}, {"id": 7, "start": 139, "end": 151, "ref_url": "Ruth_Sobatka", "ref_ids": null, "sent_idx": 1}, {"id": 8, "start": 168, "end": 183, "ref_url": "Jennifer_Tipton", "ref_ids": ["1477073"], "sent_idx": 1}, {"id": 9, "start": 44, "end": 77, "ref_url": "New_York_City_Ballet", "ref_ids": ["440878"], "sent_idx": 2}]} +{"id": "17892441", "title": "Still River Baptist Church", "sentences": ["Still River Baptist Church (also known as the \"Still River Meetinghouse\") is the home of the Harvard Historical Society.", "It is an historic Gothic Revival-style meeting house located at 213 Still River Road in Harvard, Massachusetts.", "The building houses the Harvard Historical Society's museum and archival collections."], "mentions": [{"id": 0, "start": 18, "end": 32, "ref_url": "Gothic_Revival", "ref_ids": null, "sent_idx": 1}, {"id": 1, "start": 88, "end": 110, "ref_url": "Harvard,_Massachusetts", "ref_ids": ["116938"], "sent_idx": 1}]} +{"id": "17892461", "title": "Brookfield Cemetery", "sentences": ["Brookfield Cemetery is an historic cemetery on Main Street (Massachusetts Route 9) on the west side of Brookfield, Massachusetts.", "Established in 1714, it is the town's only cemetery.", "It consequently holds the burials of many of Brookfield's founders and leading citizens, from the 18th century to the present, including those of neighboring towns that were once a part of Brookfield.", "The cemetery has about 10,000 marked graves.", "It was listed on the National Register of Historic Places in 2003.", "The cemetery was developed in three distinct phases.", "Brookfield once encompassed most of what is now the towns of Warren, West Brookfield, and East Brookfield, with the West Brookfield area being the earliest area of settlement in the 1670s.", "First known as Quaboag Plantation, it was abandoned after a 1675 attack by Native Americans in King Philip's War, and resettlement only began early in the 18th century.", "Burial practices before 1714 are poorly documented.", "In that year local church members set aside the first plot of land for burials; this was confirmed by town officials after Brookfield was incorporated in 1720.", "The cemetery was at first owned as part of church lands, but (the westernmost portion of the modern cemetery) were deeded to the town of Brookfield in 1760, probably as part of the division of the town into three parishes (which later became Brookfield, East Brookfield, and West Brookfield).", "During the 19th century the town undertook a number of improvements to the cemetery.", "A stone wall was built around it in the 1850s, and a receiving tomb was added in 1861.", "The large granite entrance gate was built in 1873, the gift of local residents William Banister and Otis Hayden Banister, and its Civil War memorial was dedicated in 1890.", "These were added in the central section of the cemetery, which was laid out in the Victorian rural cemetery style that was then fashionable.", "The easternmost part of the cemetery was developed beginning about 1920, and has been expanded several times, most recently in 1996.", "Its layout reflects the aesthetics of the 20th Century Modern Cemetery movement."], "mentions": [{"id": 0, "start": 28, "end": 36, "ref_url": "Cemetery", "ref_ids": ["63752"], "sent_idx": 15}, {"id": 1, "start": 60, "end": 81, "ref_url": "Massachusetts_Route_9", "ref_ids": ["849593"], "sent_idx": 0}, {"id": 2, "start": 103, "end": 128, "ref_url": "Brookfield,_Massachusetts", "ref_ids": ["116927"], "sent_idx": 0}, {"id": 3, "start": 21, "end": 57, "ref_url": "National_Register_of_Historic_Places", "ref_ids": ["64065"], "sent_idx": 4}, {"id": 4, "start": 61, "end": 67, "ref_url": "Warren,_Massachusetts", "ref_ids": ["259519"], "sent_idx": 6}, {"id": 5, "start": 275, "end": 290, "ref_url": "West_Brookfield,_Massachusetts", "ref_ids": ["161295"], "sent_idx": 10}, {"id": 6, "start": 254, "end": 269, "ref_url": "East_Brookfield,_Massachusetts", "ref_ids": ["259496"], "sent_idx": 10}, {"id": 7, "start": 58, "end": 91, "ref_url": "Wheeler's_Surprise", "ref_ids": ["26457462"], "sent_idx": 7}, {"id": 8, "start": 95, "end": 112, "ref_url": "King_Philip's_War", "ref_ids": ["313013"], "sent_idx": 7}, {"id": 9, "start": 130, "end": 139, "ref_url": "American_Civil_War", "ref_ids": ["863"], "sent_idx": 13}]} +{"id": "17892478", "title": "William Fawcett (botanist)", "sentences": ["William Fawcett (1851–1926) was a British botanist and coauthor of the \"Flora of Jamaica\".", "Fawcett was Director of Public Gardens and Plantations in Jamaica from 1887 to 1908.", "He then returned to Britain where he worked with Alfred Barton Rendle to produce the first few volumes of the \"Flora of Jamaica\",\n(illustrated by Beatrice O. Corfe and Helen Adelaide Wood)."], "mentions": [{"id": 0, "start": 34, "end": 41, "ref_url": "United_Kingdom", "ref_ids": ["31717"], "sent_idx": 0}, {"id": 1, "start": 42, "end": 50, "ref_url": "Botanist", "ref_ids": null, "sent_idx": 0}, {"id": 2, "start": 120, "end": 127, "ref_url": "Jamaica", "ref_ids": ["15660"], "sent_idx": 2}, {"id": 3, "start": 49, "end": 69, "ref_url": "Alfred_Barton_Rendle", "ref_ids": ["17796003"], "sent_idx": 2}, {"id": 4, "start": 146, "end": 163, "ref_url": "Beatrice_O._Corfe", "ref_ids": null, "sent_idx": 2}, {"id": 5, "start": 168, "end": 187, "ref_url": "Helen_Adelaide_Wood", "ref_ids": ["59779375"], "sent_idx": 2}]} +{"id": "17892558", "title": "Gary Brook", "sentences": ["Gary Brook (born 9 May 1964) is an English former footballer.", "He played for Frickley Athletic, Newport County, Scarborough, Blackpool, Notts County and Ossett Town", "There were some famous victories under his stewardship including a 4-0 away win at Harrogate Town and a superb 5-2 home victory over Radcliffe Borough."], "mentions": [{"id": 0, "start": 50, "end": 60, "ref_url": "Association_football", "ref_ids": ["10568"], "sent_idx": 0}, {"id": 1, "start": 14, "end": 31, "ref_url": "Frickley_Athletic_F.C.", "ref_ids": ["3666646"], "sent_idx": 1}, {"id": 2, "start": 33, "end": 47, "ref_url": "Newport_County_A.F.C.", "ref_ids": ["285880"], "sent_idx": 1}, {"id": 3, "start": 49, "end": 60, "ref_url": "Scarborough_F.C.", "ref_ids": ["946449"], "sent_idx": 1}, {"id": 4, "start": 62, "end": 71, "ref_url": "Blackpool_F.C.", "ref_ids": ["245357"], "sent_idx": 1}, {"id": 5, "start": 73, "end": 85, "ref_url": "Notts_County_F.C.", "ref_ids": ["448599"], "sent_idx": 1}, {"id": 6, "start": 90, "end": 101, "ref_url": "Ossett_Town_F.C.", "ref_ids": null, "sent_idx": 1}]} +{"id": "17892575", "title": "Barakat, Inc.", "sentences": ["Barakat, Inc. is a non-governmental organization doing humanitarian work in Afghanistan, Pakistan, and India.", "With a small office in the United States and a number of contacts on the ground in the aforementioned three countries, Barakat holds to the mission of bringing progressive social change to South and Central Asian communities even when that change is locally controversial."], "mentions": [{"id": 0, "start": 19, "end": 48, "ref_url": "Non-governmental_organization", "ref_ids": ["46539"], "sent_idx": 0}, {"id": 1, "start": 76, "end": 87, "ref_url": "Afghanistan", "ref_ids": ["737"], "sent_idx": 0}, {"id": 2, "start": 89, "end": 97, "ref_url": "Pakistan", "ref_ids": ["23235"], "sent_idx": 0}, {"id": 3, "start": 103, "end": 108, "ref_url": "India", "ref_ids": ["14533"], "sent_idx": 0}, {"id": 4, "start": 27, "end": 40, "ref_url": "United_States", "ref_ids": ["3434750"], "sent_idx": 1}, {"id": 5, "start": 189, "end": 194, "ref_url": "South_Asia", "ref_ids": ["21566765"], "sent_idx": 1}, {"id": 6, "start": 199, "end": 211, "ref_url": "Central_Asia", "ref_ids": ["6742"], "sent_idx": 1}]} +{"id": "17892590", "title": "Menti nostrae", "sentences": ["Menti nostrae is an apostolic exhortation of Pope Pius XII on the sanctity of priestly life, given in Rome at St. Peter's on September 23, 1950, in the 12th year of his pontificate.", "\"Menti nostrae\" has four parts, addressing the sanctity of priestly life, the sanctity of priestly service, practical regulations and special difficulties of priestly life.", "Priestly life means first of all the imitation of the life of Christ, according to the Pontiff.", "This is especially important in a modern world, which so many are confused by conflicting even anti-Christian messages.", "Imitation of Christ means an inner relation with Christ, full observance of celibacy and a separation from earthly goods.", "The priest participates in Holy Mass on the sacrifice of Christ and in his mystical death and resurrection.", "Great emphasis is put on the prayer life of the priests.", "The Liturgy of the hours is of prime importance as is daily contemplation, private prayers, his frequent confession and spiritual guidance through an experienced priest of his confidence.", "The sanctity of priestly service is reflected in his role as giver of divine graces.", "He should be motivated by apostolic fervor and be a pastor reflecting the love and goodness of Christ.", "Constantly, he should improve his knowledge not only of divine but also temporal things.", "Pope Pius with warm words praises priests in service for the spiritual life of their fellow priests.", "Special efforts have to be undertaken to recruit young persons to the priesthood.", "Every priest has to participate in this task first of all by setting an example but also by encouraging young men.", "Candidates have to be very carefully screened to ensure that they are capable of bearing the pressures of the office.", "The seminaries, while fostering spiritual formation, should not encourage a separation from the world, since priests live and work in this world.", "Seminarians have to be trained in obedience and chastity.", "Candidates with celibacy problems should not be permitted to stay.", "There has to be a special efforts for newly ordained priests to help them in their first years.", "Priests need to undergo continuous education for which libraries should be established in every dioceses with reading rooms and a good balance of theological spiritual and practical resources for priests.", "In these changing times, priests need to learn to discern and differentiate.", "Hanging on to all old traditions is equally dangerous as blind acceptance of everything new.", "Regarding communism and capitalism, the Church has pointed the shortcomings of both, branding the misuse of private property and exploitation as well as the activities of communists, trying to destroy faith and promising material well-being.", "The priest has to be open for the poor and the workers and all those who are in need and misery, not few of them from the middle class.", "The social teachings of the Church combines the demands of justice and charity and thus promotes a social order which does not oppress the individual or foster blind egoism.", "However, with all his social engagement, the priest is not to forget he overall mission and context.", "Lay persons are mainly called to the practical apostolate and priests are only to assist if necessary.", "In his last point, Pope Pius addresses the economic needs of the clergy in some regions and countries.", "After the war, there was much suffering, but bishops and clergy generously shared with those badly affected by the consequences of war.", "Such problems cannot be solved in the long run, if he faithful are not included in a solution.", "They have to be convinced, that priests need a material basis to exist and to work for them."], "mentions": [{"id": 0, "start": 20, "end": 41, "ref_url": "Apostolic_exhortation", "ref_ids": ["21986911"], "sent_idx": 0}, {"id": 1, "start": 45, "end": 58, "ref_url": "Pope_Pius_XII", "ref_ids": ["23808"], "sent_idx": 0}, {"id": 2, "start": 59, "end": 72, "ref_url": "Priesthood_(Catholic_Church)", "ref_ids": null, "sent_idx": 1}, {"id": 3, "start": 102, "end": 106, "ref_url": "Rome", "ref_ids": ["25458"], "sent_idx": 0}, {"id": 4, "start": 110, "end": 121, "ref_url": "St._Peter's_Basilica", "ref_ids": ["73188"], "sent_idx": 0}, {"id": 5, "start": 16, "end": 24, "ref_url": "Celibacy", "ref_ids": ["6035"], "sent_idx": 17}, {"id": 6, "start": 27, "end": 36, "ref_url": "Holy_Mass", "ref_ids": null, "sent_idx": 5}, {"id": 7, "start": 95, "end": 101, "ref_url": "Christ", "ref_ids": null, "sent_idx": 9}, {"id": 8, "start": 4, "end": 24, "ref_url": "Liturgy_of_the_hours", "ref_ids": null, "sent_idx": 7}, {"id": 9, "start": 70, "end": 82, "ref_url": "Divine_grace", "ref_ids": ["26556255"], "sent_idx": 8}, {"id": 10, "start": 52, "end": 58, "ref_url": "Pastor", "ref_ids": ["215153"], "sent_idx": 9}, {"id": 11, "start": 4, "end": 14, "ref_url": "List_of_Roman_Catholic_seminaries", "ref_ids": null, "sent_idx": 15}, {"id": 12, "start": 0, "end": 10, "ref_url": "Seminarian", "ref_ids": null, "sent_idx": 16}, {"id": 13, "start": 44, "end": 52, "ref_url": "Ordination", "ref_ids": ["217115"], "sent_idx": 18}, {"id": 14, "start": 10, "end": 19, "ref_url": "Communism", "ref_ids": ["9209651"], "sent_idx": 22}, {"id": 15, "start": 24, "end": 34, "ref_url": "Capitalism", "ref_ids": ["5416"], "sent_idx": 22}, {"id": 16, "start": 122, "end": 134, "ref_url": "Middle_class", "ref_ids": ["251534"], "sent_idx": 23}]} +{"id": "17892593", "title": "Edsel and Eleanor Ford House", "sentences": ["The Edsel and Eleanor Ford House is a mansion located at 1100 Lake Shore Drive in Grosse Pointe Shores, northeast of Detroit, Michigan; it stands on the site known as \"Gaukler Point\", on the shore of Lake St. Clair.", "The house became the new residence of the Edsel and Eleanor Ford family in 1928.", "Edsel Ford was the son of Henry Ford and an executive at Ford Motor Company.", "The estate's buildings were designed by architect Albert Kahn, its site plan and gardens by renowned landscape designer Jens Jensen.", "The property was listed on the National Register of Historic Places in 1979, and was designated a National Historic Landmark in 2016."], "mentions": [{"id": 0, "start": 82, "end": 102, "ref_url": "Grosse_Pointe_Shores,_Michigan", "ref_ids": ["150556"], "sent_idx": 0}, {"id": 1, "start": 117, "end": 124, "ref_url": "Detroit", "ref_ids": ["8687"], "sent_idx": 0}, {"id": 2, "start": 126, "end": 134, "ref_url": "Michigan", "ref_ids": ["18859"], "sent_idx": 0}, {"id": 3, "start": 200, "end": 214, "ref_url": "Lake_St._Clair", "ref_ids": ["484931"], "sent_idx": 0}, {"id": 4, "start": 0, "end": 10, "ref_url": "Edsel_Ford", "ref_ids": ["214885"], "sent_idx": 2}, {"id": 5, "start": 26, "end": 36, "ref_url": "Henry_Ford", "ref_ids": ["13371"], "sent_idx": 2}, {"id": 6, "start": 57, "end": 75, "ref_url": "Ford_Motor_Company", "ref_ids": ["30433662"], "sent_idx": 2}, {"id": 7, "start": 50, "end": 61, "ref_url": "Albert_Kahn_(architect)", "ref_ids": ["394770"], "sent_idx": 3}, {"id": 8, "start": 101, "end": 119, "ref_url": "Landscape_designer", "ref_ids": null, "sent_idx": 3}, {"id": 9, "start": 120, "end": 131, "ref_url": "Jens_Jensen_(landscape_architect)", "ref_ids": ["1364207"], "sent_idx": 3}, {"id": 10, "start": 31, "end": 67, "ref_url": "National_Register_of_Historic_Places", "ref_ids": ["64065"], "sent_idx": 4}, {"id": 11, "start": 98, "end": 124, "ref_url": "National_Historic_Landmark", "ref_ids": ["404013"], "sent_idx": 4}]} +{"id": "17892598", "title": "Ringsbury Camp", "sentences": ["Ringsbury Camp is an Iron Age hill fort, thought to date from approximately the year 50BC.", "It is located in the civil parish of Purton in Wiltshire."], "mentions": [{"id": 0, "start": 21, "end": 29, "ref_url": "Iron_Age", "ref_ids": ["14711"], "sent_idx": 0}, {"id": 1, "start": 30, "end": 39, "ref_url": "Hill_fort", "ref_ids": null, "sent_idx": 0}, {"id": 2, "start": 87, "end": 89, "ref_url": "Before_Christ", "ref_ids": null, "sent_idx": 0}, {"id": 3, "start": 21, "end": 33, "ref_url": "Civil_parish", "ref_ids": ["644233"], "sent_idx": 1}, {"id": 4, "start": 37, "end": 43, "ref_url": "Purton", "ref_ids": ["1383727"], "sent_idx": 1}, {"id": 5, "start": 47, "end": 56, "ref_url": "Wiltshire", "ref_ids": ["51231"], "sent_idx": 1}]} +{"id": "17892608", "title": "North Vietnamese invasion of Laos", "sentences": ["North Vietnam supported the Pathet Lao to fight against the Kingdom of Laos between 1958–1959.", "Control over Laos allowed for the eventual construction of the Ho Chi Minh Trail that would serve as the main supply route (MSR) for enhanced NLF (the National Liberation Front, the Vietcong) and NVA (North Vietnamese Army) activities in the Republic of Vietnam.", "As such, the support for Pathet Lao to fight against Kingdom of Laos by North Vietnam would prove decisive in the eventual communist victory over South Vietnam in 1975 as the South Vietnamese and American forces could have prevented any NVA and NLF deployment and resupply if these only happened over the 17th Parallel, also known as the Demilitarized Zone (DMZ), a narrow strip of land between North and South Vietnam that was closely guarded by both sides.", "It also helped the Pathet Lao win the Kingdom of Laos, although the Kingdom of Laos had American support."], "mentions": [{"id": 0, "start": 72, "end": 85, "ref_url": "North_Vietnam", "ref_ids": ["23550210"], "sent_idx": 2}, {"id": 1, "start": 19, "end": 29, "ref_url": "Pathet_Lao", "ref_ids": ["204758"], "sent_idx": 3}, {"id": 2, "start": 38, "end": 53, "ref_url": "Kingdom_of_Laos", "ref_ids": ["203670"], "sent_idx": 3}, {"id": 3, "start": 63, "end": 80, "ref_url": "Ho_Chi_Minh_Trail", "ref_ids": null, "sent_idx": 1}]} +{"id": "17892614", "title": "Nethakani", "sentences": ["Nethakani, also known as Netkani, are a Maratha and Telugu caste of cotton weavers and labourers.", "These people were migrated from Maharashtra and spread over middle India and Northern and southern parts of Indian.", "In Maharashtra Nethakanis are known as Netkani.", "Traditionally they have been associated with the occupation of weaving (\"netha neyuta\" means \"weaving\" in Telugu), but they have now largely moved to cultivation and agricultural labour; with a few of them being small land-owners.", "Most of the people related to this caste are located in the northern region of the Indian state of Telangana in the districts of Adilabad, Karimnagar, Nizamabad, Khammam and Warangal.", "Nethakanis also migrated to Maharashtra and distributed throughout, they speak Marathi in Maharashtra & Adilabad bordering MH.", "Based on the financial status in the society, Nethekanis are classified as other backward class (OBC) in northern parts of India and scheduled caste (SC) in some parts of India.", "This people are part of Hindu Shiva and Vishnu Sampradaya.", "They are unique caste present in India.", "They speak Marati and Telugu and Kannada and Gujarati And Tulu\nTheir roots from Brigu Maharshi and Markandeya sage and Bhawana rishi Nethakani's are among the smaller SC castes in Telangana; a study estimated their population to be around 80,000, constituting about 1% of the state's total SC population.", "The Nethakanis are distinguished by their custom of tying their head cloth in a roughly square shape, and by their loin cloths, which are worn very loose and not knotted."], "mentions": [{"id": 0, "start": 40, "end": 47, "ref_url": "Indian_people", "ref_ids": ["7564733"], "sent_idx": 0}, {"id": 1, "start": 22, "end": 28, "ref_url": "Telugu_people", "ref_ids": ["3569761"], "sent_idx": 9}, {"id": 2, "start": 170, "end": 175, "ref_url": "Caste", "ref_ids": ["7257"], "sent_idx": 9}, {"id": 3, "start": 63, "end": 70, "ref_url": "Weaving", "ref_ids": ["51970"], "sent_idx": 3}, {"id": 4, "start": 22, "end": 28, "ref_url": "Telugu_language", "ref_ids": ["39202"], "sent_idx": 9}, {"id": 5, "start": 33, "end": 38, "ref_url": "India", "ref_ids": ["14533"], "sent_idx": 8}, {"id": 6, "start": 180, "end": 189, "ref_url": "Telangana", "ref_ids": ["990267"], "sent_idx": 9}, {"id": 7, "start": 104, "end": 112, "ref_url": "Adilabad_district", "ref_ids": ["38235198"], "sent_idx": 5}, {"id": 8, "start": 139, "end": 149, "ref_url": "Karimnagar_district", "ref_ids": ["3360217"], "sent_idx": 4}, {"id": 9, "start": 151, "end": 160, "ref_url": "Nizamabad_district", "ref_ids": ["3360231"], "sent_idx": 4}, {"id": 10, "start": 162, "end": 169, "ref_url": "Khammam_district", "ref_ids": ["3360118"], "sent_idx": 4}, {"id": 11, "start": 174, "end": 182, "ref_url": "Warangal_district", "ref_ids": null, "sent_idx": 4}, {"id": 12, "start": 133, "end": 148, "ref_url": "Scheduled_caste", "ref_ids": null, "sent_idx": 6}, {"id": 13, "start": 115, "end": 126, "ref_url": "Dhoti", "ref_ids": ["480528"], "sent_idx": 10}]} +{"id": "17892642", "title": "WVJP (AM)", "sentences": ["WVJP (1110 AM, VJP 1110) is a radio station licensed to serve Caguas, Puerto Rico.", "The station is owned by Borinquen Broadcasting Company.", "It airs a Talk/Personality format, and also simulcasts the Dimensión 103 network.", "The station was assigned the WVJP call letters by the Federal Communications Commission in 1947."], "mentions": [{"id": 0, "start": 11, "end": 13, "ref_url": "AM_broadcasting", "ref_ids": ["113509"], "sent_idx": 0}, {"id": 1, "start": 30, "end": 43, "ref_url": "Radio_station", "ref_ids": null, "sent_idx": 0}, {"id": 2, "start": 44, "end": 52, "ref_url": "City_of_license", "ref_ids": ["584935"], "sent_idx": 0}, {"id": 3, "start": 62, "end": 81, "ref_url": "Caguas,_Puerto_Rico", "ref_ids": ["83709"], "sent_idx": 0}, {"id": 4, "start": 10, "end": 26, "ref_url": "Talk/Personality", "ref_ids": null, "sent_idx": 2}, {"id": 5, "start": 54, "end": 87, "ref_url": "Federal_Communications_Commission", "ref_ids": ["55974"], "sent_idx": 3}]} +{"id": "17892648", "title": "Lori Ehrlich", "sentences": ["Lori Ehrlich (born June 9, 1963) is the State Representative for the Massachusetts 8th Essex District.", "Ehrlich won her seat on a March 4, 2008 special election after her predecessor, Doug Petersen, resigned.", "Peterson was appointed Commissioner of Agriculture by Governor Deval Patrick.", "After completing Petersen's term, Ehrlich went on to win re-election in November 2008.", "She is Chairwoman of the Joint Committee on Export Development."], "mentions": [{"id": 0, "start": 69, "end": 82, "ref_url": "Massachusetts", "ref_ids": ["1645518"], "sent_idx": 0}, {"id": 1, "start": 87, "end": 92, "ref_url": "Essex,_Massachusetts", "ref_ids": ["233774"], "sent_idx": 0}, {"id": 2, "start": 80, "end": 93, "ref_url": "Douglas_W._Petersen", "ref_ids": ["10396894"], "sent_idx": 1}, {"id": 3, "start": 63, "end": 76, "ref_url": "Deval_Patrick", "ref_ids": ["2086606"], "sent_idx": 2}]} +{"id": "17892665", "title": "Hunyadi László (opera)", "sentences": ["Hunyadi László (\"László Hunyadi\") is an opera in three acts by the Hungarian composer Ferenc Erkel.", "The libretto, by Béni Egressy, is based on a play by Lőrinc Tóth.", "The opera was first performed at the Pesti Nemzeti Magyar Szinház, Budapest on 27 January 1844.", "\"Hunyadi László\" is considered to be the first important Hungarian opera and Erkel's musical style draws on folk influences, particularly the dance known as the verbunkos."], "mentions": [{"id": 0, "start": 67, "end": 72, "ref_url": "Opera", "ref_ids": ["22348"], "sent_idx": 3}, {"id": 1, "start": 57, "end": 66, "ref_url": "Hungary", "ref_ids": ["13275"], "sent_idx": 3}, {"id": 2, "start": 86, "end": 98, "ref_url": "Ferenc_Erkel", "ref_ids": ["779494"], "sent_idx": 0}, {"id": 3, "start": 4, "end": 12, "ref_url": "Libretto", "ref_ids": ["46950"], "sent_idx": 1}, {"id": 4, "start": 17, "end": 29, "ref_url": "Béni_Egressy", "ref_ids": ["5694773"], "sent_idx": 1}, {"id": 5, "start": 67, "end": 75, "ref_url": "Budapest", "ref_ids": ["36787"], "sent_idx": 2}, {"id": 6, "start": 57, "end": 72, "ref_url": "Hungarian_opera", "ref_ids": ["2753827"], "sent_idx": 3}, {"id": 7, "start": 161, "end": 170, "ref_url": "Verbunkos", "ref_ids": ["407278"], "sent_idx": 3}]} +{"id": "17892675", "title": "Aberthaw Low Level railway station", "sentences": ["Aberthaw Low Level railway station was the Taff Vale Railway station which served East Aberthaw, located near the north shore of the Bristol Channel in the Welsh county of Glamorgan."], "mentions": [{"id": 0, "start": 43, "end": 60, "ref_url": "Taff_Vale_Railway", "ref_ids": ["350267"], "sent_idx": 0}, {"id": 1, "start": 82, "end": 95, "ref_url": "East_Aberthaw", "ref_ids": null, "sent_idx": 0}, {"id": 2, "start": 133, "end": 148, "ref_url": "Bristol_Channel", "ref_ids": ["313697"], "sent_idx": 0}, {"id": 3, "start": 156, "end": 161, "ref_url": "Wales", "ref_ids": ["69894"], "sent_idx": 0}, {"id": 4, "start": 172, "end": 181, "ref_url": "Glamorgan", "ref_ids": ["52520"], "sent_idx": 0}]} +{"id": "17892678", "title": "James Cockle (surgeon)", "sentences": ["James Cockle (17 July 1782 – 8 December 1854) was a prominent British surgeon and father of eventual Chief Justice of Queensland, Sir James Cockle."], "mentions": [{"id": 0, "start": 130, "end": 146, "ref_url": "James_Cockle_(lawyer)", "ref_ids": null, "sent_idx": 0}]} +{"id": "17892759", "title": "Polidori", "sentences": ["Polidori is an Italian surname.", "Notable people with the surname include:"], "mentions": []} +{"id": "17892763", "title": "George Oghani", "sentences": ["George William Oghani( born 2 September 1960) is an English former footballer."], "mentions": [{"id": 0, "start": 67, "end": 77, "ref_url": "Association_football", "ref_ids": ["10568"], "sent_idx": 0}]} +{"id": "17892777", "title": "Wayne Jackson", "sentences": ["Wayne Jackson may refer to:"], "mentions": []} +{"id": "17892781", "title": "Koffler", "sentences": ["Koffler is family name of:"], "mentions": []} +{"id": "17892804", "title": "Lionair Flight 602", "sentences": ["Lionair Flight 602 was a Lionair Antonov An-24RV which fell into the sea off the north-western coast of Sri Lanka on 29 September 1998.", "The aircraft departed Jaffna Airport with 48 passengers and a crew of seven; it disappeared from radar screens ten minutes into the flight.", "Initial reports indicated that the plane had been shot down by Liberation Tigers of Tamil Eelam terrorists using MANPADS, which has since been confirmed.", "All aboard were presumed killed."], "mentions": [{"id": 0, "start": 0, "end": 7, "ref_url": "Lionair", "ref_ids": ["8089444"], "sent_idx": 0}, {"id": 1, "start": 33, "end": 46, "ref_url": "Antonov_An-24", "ref_ids": ["762103"], "sent_idx": 0}, {"id": 2, "start": 104, "end": 113, "ref_url": "Sri_Lanka", "ref_ids": ["26750"], "sent_idx": 0}, {"id": 3, "start": 22, "end": 36, "ref_url": "Jaffna_Airport", "ref_ids": null, "sent_idx": 1}, {"id": 4, "start": 63, "end": 95, "ref_url": "Liberation_Tigers_of_Tamil_Eelam", "ref_ids": ["18606"], "sent_idx": 2}, {"id": 5, "start": 113, "end": 120, "ref_url": "Man-portable_air-defense_systems", "ref_ids": null, "sent_idx": 2}]} +{"id": "17892850", "title": "Darren Foreman (footballer)", "sentences": ["Darren Foreman (born 12 February 1968 in Southampton, England) is an English former footballer.", "He played for Fareham Town, Barnsley, Crewe Alexandra and Scarborough.", "Foreman is Scarborough's all-time top goalscorer in the Football League with 35 goals, 27 of which were scored in the 1992-93 season.", "He was forced to quit the professional game in 1995 because of injury.", "After the end of his professional career, he played for several non-league clubs, including Gateshead, Guiseley, and Barrow, before rejoining Scarborough to serve in a number of non-playing roles.", "He also played 22 games (12 goals) for IK Sirius, Uppsala in Sweden in 1995.", "He is now a paramedic, in the Scarborough area."], "mentions": [{"id": 0, "start": 41, "end": 52, "ref_url": "Southampton", "ref_ids": ["7920751"], "sent_idx": 0}, {"id": 1, "start": 84, "end": 94, "ref_url": "Association_football", "ref_ids": ["10568"], "sent_idx": 0}, {"id": 2, "start": 14, "end": 26, "ref_url": "Fareham_Town_F.C.", "ref_ids": ["3836424"], "sent_idx": 1}, {"id": 3, "start": 28, "end": 36, "ref_url": "Barnsley_F.C.", "ref_ids": ["333188"], "sent_idx": 1}, {"id": 4, "start": 38, "end": 53, "ref_url": "Crewe_Alexandra_F.C.", "ref_ids": ["69438"], "sent_idx": 1}, {"id": 5, "start": 30, "end": 41, "ref_url": "Scarborough_F.C.", "ref_ids": ["946449"], "sent_idx": 6}, {"id": 6, "start": 52, "end": 71, "ref_url": "The_Football_League", "ref_ids": null, "sent_idx": 2}, {"id": 7, "start": 64, "end": 74, "ref_url": "Non-league", "ref_ids": null, "sent_idx": 4}, {"id": 8, "start": 92, "end": 101, "ref_url": "Gateshead_F.C.", "ref_ids": ["696588"], "sent_idx": 4}, {"id": 9, "start": 103, "end": 111, "ref_url": "Guiseley_F.C.", "ref_ids": null, "sent_idx": 4}, {"id": 10, "start": 117, "end": 123, "ref_url": "Barrow_A.F.C.", "ref_ids": ["712826"], "sent_idx": 4}, {"id": 11, "start": 39, "end": 48, "ref_url": "IK_Sirius_Fotboll", "ref_ids": ["4677045"], "sent_idx": 5}, {"id": 12, "start": 50, "end": 57, "ref_url": "Uppsala", "ref_ids": ["31784"], "sent_idx": 5}, {"id": 13, "start": 61, "end": 67, "ref_url": "Sweden", "ref_ids": ["5058739"], "sent_idx": 5}, {"id": 14, "start": 12, "end": 21, "ref_url": "Paramedic", "ref_ids": ["75006"], "sent_idx": 6}, {"id": 15, "start": 30, "end": 41, "ref_url": "Scarborough,_North_Yorkshire", "ref_ids": ["381565"], "sent_idx": 6}]} +{"id": "17892855", "title": "Sumner Shapiro", "sentences": ["Sumner Shapiro (January 13, 1926 - November 14, 2006) was a United States Navy rear admiral who served as Director of the Office of Naval Intelligence from 1978 to 1982."], "mentions": [{"id": 0, "start": 60, "end": 78, "ref_url": "United_States_Navy", "ref_ids": ["20518076"], "sent_idx": 0}, {"id": 1, "start": 79, "end": 91, "ref_url": "Rear_admiral", "ref_ids": ["69476"], "sent_idx": 0}, {"id": 2, "start": 122, "end": 150, "ref_url": "Office_of_Naval_Intelligence", "ref_ids": ["1484927"], "sent_idx": 0}]} +{"id": "17892861", "title": "Kleine Scheidegg railway station", "sentences": ["Kleine Scheidegg is a railway station that is situated on the summit of the Kleine Scheidegg Pass in the Bernese Oberland region of Switzerland.", "The pass houses a complex of hotels and railway buildings.", "Administratively, the station is in the municipality of Lauterbrunnen in the canton of Bern.", "The station is on the Wengernalpbahn( WAB), whose trains operate to Kleine Scheidegg from Lauterbrunnen via Wengen, and separately from Grindelwald.", "It is also the lower terminus of the Jungfraubahn( JB), whose trains climb within the Eiger to the Jungfraujoch.", "All passengers travelling to the Jungfraujoch, or between Lauterbrunnen and Grindelwald, must change trains at the station.", "WAB trains from Lauterbrunnen enter the station at its western end, and from Grindelwald at its eastern end, but no through trains are operated.", "This is principally because of the need, for safety reasons, to have each train's motorcar or locomotive at its downhill end.", "The WAB tracks at Kleine Scheidegg includes a, partially underground, wye track to allow trains to be reversed, but this is not used for trains in passenger service.", "The WAB and JB use different rail gauges, different electrification systems and different rack railway technology, and are not physically connected.", "The depot of the JB is located at Kleine Scheidegg, but not the line's workshops.", "These are located at Eigergletscher station, one stop up the line.", "The station is served by the following passenger trains:"], "mentions": [{"id": 0, "start": 22, "end": 37, "ref_url": "Railway_station", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 76, "end": 97, "ref_url": "Kleine_Scheidegg_Pass", "ref_ids": null, "sent_idx": 0}, {"id": 2, "start": 105, "end": 121, "ref_url": "Bernese_Oberland", "ref_ids": ["1826437"], "sent_idx": 0}, {"id": 3, "start": 132, "end": 143, "ref_url": "Switzerland", "ref_ids": ["26748"], "sent_idx": 0}, {"id": 4, "start": 40, "end": 69, "ref_url": "Lauterbrunnen", "ref_ids": ["290566"], "sent_idx": 2}, {"id": 5, "start": 77, "end": 91, "ref_url": "Canton_of_Bern", "ref_ids": ["292850"], "sent_idx": 2}, {"id": 6, "start": 22, "end": 36, "ref_url": "Wengernalpbahn", "ref_ids": null, "sent_idx": 3}, {"id": 7, "start": 90, "end": 103, "ref_url": "Lauterbrunnen", "ref_ids": ["290566"], "sent_idx": 3}, {"id": 8, "start": 108, "end": 114, "ref_url": "Wengen", "ref_ids": ["665790"], "sent_idx": 3}, {"id": 9, "start": 136, "end": 147, "ref_url": "Grindelwald", "ref_ids": ["613365"], "sent_idx": 3}, {"id": 10, "start": 37, "end": 49, "ref_url": "Jungfraubahn", "ref_ids": null, "sent_idx": 4}, {"id": 11, "start": 86, "end": 91, "ref_url": "Eiger", "ref_ids": ["400251"], "sent_idx": 4}, {"id": 12, "start": 99, "end": 111, "ref_url": "Jungfraujoch", "ref_ids": ["654488"], "sent_idx": 4}, {"id": 13, "start": 70, "end": 79, "ref_url": "Wye_track", "ref_ids": null, "sent_idx": 8}, {"id": 14, "start": 29, "end": 39, "ref_url": "Rail_gauge", "ref_ids": null, "sent_idx": 9}, {"id": 15, "start": 90, "end": 102, "ref_url": "Rack_railway", "ref_ids": ["143945"], "sent_idx": 9}, {"id": 16, "start": 21, "end": 43, "ref_url": "Eigergletscher_railway_station", "ref_ids": ["19665161"], "sent_idx": 11}]} +{"id": "17892864", "title": "Octagon House (Laurens, South Carolina)", "sentences": ["The Octagon House, also known as Zelotes Holmes House, is an historic octagonal house located at 619 East Main Street in Laurens, South Carolina.", "Designed and built in 1859 to 1862 by the Rev. Zelotes Lee Holmes, a Presbyterian minister and teacher, it is thought to be the first concrete house erected in South Carolina.", "It was called the Zelotes Holmes House by the Historic American Buildings Survey.", "Also known as the Old Holmes House, the Old Watson House and the Holmes-Watson House, it was added to the National Register of Historic Places on March 20, 1973."], "mentions": [{"id": 0, "start": 70, "end": 77, "ref_url": "Octagon", "ref_ids": ["314575"], "sent_idx": 0}, {"id": 1, "start": 121, "end": 128, "ref_url": "Laurens,_South_Carolina", "ref_ids": ["134556"], "sent_idx": 0}, {"id": 2, "start": 160, "end": 174, "ref_url": "South_Carolina", "ref_ids": ["27956"], "sent_idx": 1}, {"id": 3, "start": 69, "end": 81, "ref_url": "Presbyterian", "ref_ids": null, "sent_idx": 1}, {"id": 4, "start": 134, "end": 142, "ref_url": "Concrete", "ref_ids": ["5371"], "sent_idx": 1}, {"id": 5, "start": 46, "end": 80, "ref_url": "Historic_American_Buildings_Survey", "ref_ids": null, "sent_idx": 2}, {"id": 6, "start": 106, "end": 142, "ref_url": "National_Register_of_Historic_Places", "ref_ids": ["64065"], "sent_idx": 3}]} +{"id": "17892866", "title": "El Greco Apartments", "sentences": ["El Greco Apartments is a historic twelve-unit, Spanish Revival style apartment building located in the Fairfax district of Los Angeles, California.", "The building was built in 1929 as one of the original buildings in the Westwood Village section of Los Angeles.", "Located in the heart of Westwood, the building was the home of film celebrities, including Erich von Stroheim, Michael Curtiz, and Joel McCrea.", "In the 1980s, the owner planned to demolish the building to erect a condominium building.", "In response to tenant protests, the owner agreed to pay to have the building moved to another location.", "The building was ultimately moved to the Fairfax district where it was converted to low-income housing for senior citizens."], "mentions": [{"id": 0, "start": 41, "end": 57, "ref_url": "Fairfax_District,_Los_Angeles,_California", "ref_ids": null, "sent_idx": 5}, {"id": 1, "start": 123, "end": 146, "ref_url": "Los_Angeles,_California", "ref_ids": null, "sent_idx": 0}, {"id": 2, "start": 71, "end": 87, "ref_url": "Westwood,_Los_Angeles,_California", "ref_ids": null, "sent_idx": 1}, {"id": 3, "start": 91, "end": 109, "ref_url": "Erich_von_Stroheim", "ref_ids": ["294418"], "sent_idx": 2}, {"id": 4, "start": 111, "end": 125, "ref_url": "Michael_Curtiz", "ref_ids": ["89326"], "sent_idx": 2}, {"id": 5, "start": 131, "end": 142, "ref_url": "Joel_McCrea", "ref_ids": ["537253"], "sent_idx": 2}]} +{"id": "17892923", "title": "The Best Damn Tour: Live in Toronto", "sentences": ["The Best Damn Tour: Live in Toronto is a live music DVD from Franco-Canadian singer Avril Lavigne.", "It was shot at the sold-out Air Canada Centre concert, in Toronto, Ontario, Canada on April 7, 2008 during the Best Damn Tour.", "It was released on September 9, 2008 in North America and on September 5, 2008 in Europe.", "In the USA there were clean and explicit versions of the DVD."], "mentions": [{"id": 0, "start": 57, "end": 60, "ref_url": "DVD", "ref_ids": ["11014498"], "sent_idx": 3}, {"id": 1, "start": 84, "end": 97, "ref_url": "Avril_Lavigne", "ref_ids": ["165507"], "sent_idx": 0}, {"id": 2, "start": 28, "end": 45, "ref_url": "Air_Canada_Centre", "ref_ids": null, "sent_idx": 1}, {"id": 3, "start": 111, "end": 125, "ref_url": "Best_Damn_Tour", "ref_ids": null, "sent_idx": 1}]} +{"id": "17892925", "title": "Blood and Iron (speech)", "sentences": ["Blood and Iron (German: \"Blut und Eisen\") is the name given to a speech made by Otto von Bismarck given on 30 September 1862, at the time when he was Minister President of Prussia, about the unification of the German territories.", "It is also a transposed phrase that Bismarck uttered near the end of the speech that has become one of his most widely known quotations.", "In September 1862, when the Prussian House of Representatives were refusing to approve an increase in military spending desired by King Wilhelm I, the monarch appointed Bismarck Minister President and Foreign Minister.", "A few days later, Bismarck appeared before the House's Budget Committee and stressed the need for military preparedness to solve the German Question.", "He concluded his speech with the following statement:", "“The position of Prussia in Germany will not be determined by its liberalism but by its power [...]", "Prussia must concentrate its strength and hold it for the favorable moment, which has already come and gone several times.", "Since the treaties of Vienna, our frontiers have been ill-designed for a healthy body politic.", "Not through speeches and majority decisions will the great questions of the day be decided—that was the great mistake of 1848 and 1849—but by iron and blood (\"Eisen und Blut\").”", "This phrase, relying on a patriotic poem written by Max von Schenkendorf during the Napoleonic Wars, was popularized as the more euphonious \"Blut und Eisen\" (\"Blood and Iron\"), and became symbolic of Bismarckian \"Machtpolitik\" (\"Power politics\").", "Although Bismarck was an outstanding diplomat, the phrase \"blood and iron\" has become a popular description of his foreign policy partly because he did on occasion resort to war to further the unification of Germany and the expansion of its continental power.", "Therefore he became known as \"the iron chancellor.\""], "mentions": [{"id": 0, "start": 208, "end": 214, "ref_url": "German_language", "ref_ids": ["11884"], "sent_idx": 10}, {"id": 1, "start": 80, "end": 97, "ref_url": "Otto_von_Bismarck", "ref_ids": ["22416"], "sent_idx": 0}, {"id": 2, "start": 150, "end": 179, "ref_url": "Minister_President_of_Prussia", "ref_ids": ["676988"], "sent_idx": 0}, {"id": 3, "start": 191, "end": 228, "ref_url": "Unification_of_Germany", "ref_ids": ["1070016"], "sent_idx": 0}, {"id": 4, "start": 28, "end": 61, "ref_url": "Prussian_House_of_Representatives", "ref_ids": ["21055060"], "sent_idx": 2}, {"id": 5, "start": 136, "end": 145, "ref_url": "Wilhelm_I,_German_Emperor", "ref_ids": null, "sent_idx": 2}, {"id": 6, "start": 201, "end": 217, "ref_url": "Foreign_Minister_of_Prussia", "ref_ids": null, "sent_idx": 2}, {"id": 7, "start": 133, "end": 148, "ref_url": "German_Question", "ref_ids": ["24878360"], "sent_idx": 3}, {"id": 8, "start": 6, "end": 28, "ref_url": "Congress_of_Vienna", "ref_ids": ["44628"], "sent_idx": 7}, {"id": 9, "start": 121, "end": 134, "ref_url": "German_revolutions_of_1848–49", "ref_ids": null, "sent_idx": 8}, {"id": 10, "start": 52, "end": 72, "ref_url": "Max_von_Schenkendorf", "ref_ids": ["15618444"], "sent_idx": 9}, {"id": 11, "start": 84, "end": 99, "ref_url": "Napoleonic_Wars", "ref_ids": ["45420"], "sent_idx": 9}, {"id": 12, "start": 213, "end": 225, "ref_url": "Machtpolitik", "ref_ids": null, "sent_idx": 9}, {"id": 13, "start": 193, "end": 215, "ref_url": "Otto_von_Bismarck", "ref_ids": ["22416"], "sent_idx": 10}]} +{"id": "17892926", "title": "Deborah L. Wince-Smith", "sentences": ["Deborah L. Wince-Smith is the President of the United States Council on Competitiveness since 2001."], "mentions": [{"id": 0, "start": 30, "end": 39, "ref_url": "President_(corporate_title)", "ref_ids": ["40811786"], "sent_idx": 0}, {"id": 1, "start": 61, "end": 87, "ref_url": "Council_on_Competitiveness", "ref_ids": null, "sent_idx": 0}]} +{"id": "17892943", "title": "Wengen railway station", "sentences": ["Wengen is a railway station in the car free resort of Wengen in the Bernese Oberland region of Switzerland.", "The station is on the Wengernalpbahn (WAB), whose trains operate from Lauterbrunnen to Kleine Scheidegg via Wengen.", "Administratively, the station is in the municipality of Lauterbrunnen in the canton of Bern."], "mentions": [{"id": 0, "start": 12, "end": 27, "ref_url": "Railway_station", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 108, "end": 114, "ref_url": "Wengen", "ref_ids": ["665790"], "sent_idx": 1}, {"id": 2, "start": 68, "end": 84, "ref_url": "Bernese_Oberland", "ref_ids": ["1826437"], "sent_idx": 0}, {"id": 3, "start": 95, "end": 106, "ref_url": "Switzerland", "ref_ids": ["26748"], "sent_idx": 0}, {"id": 4, "start": 22, "end": 36, "ref_url": "Wengernalpbahn", "ref_ids": null, "sent_idx": 1}, {"id": 5, "start": 56, "end": 69, "ref_url": "Lauterbrunnen", "ref_ids": ["290566"], "sent_idx": 2}, {"id": 6, "start": 87, "end": 103, "ref_url": "Kleine_Scheidegg", "ref_ids": ["650545"], "sent_idx": 1}, {"id": 7, "start": 40, "end": 69, "ref_url": "Lauterbrunnen", "ref_ids": ["290566"], "sent_idx": 2}, {"id": 8, "start": 77, "end": 91, "ref_url": "Canton_of_Bern", "ref_ids": ["292850"], "sent_idx": 2}]} +{"id": "17892957", "title": "Cunningham (crater)", "sentences": ["Cunningham is a young crater on the floor of Caloris Basin, on Mercury.", "It is surrounded by a bright ray system."], "mentions": [{"id": 0, "start": 22, "end": 28, "ref_url": "Impact_crater", "ref_ids": ["6416"], "sent_idx": 0}, {"id": 1, "start": 45, "end": 58, "ref_url": "Caloris_Basin", "ref_ids": null, "sent_idx": 0}, {"id": 2, "start": 63, "end": 70, "ref_url": "Mercury_(planet)", "ref_ids": ["19694"], "sent_idx": 0}, {"id": 3, "start": 29, "end": 39, "ref_url": "Crater_ray", "ref_ids": null, "sent_idx": 1}]} +{"id": "17892958", "title": "Wengwald railway station", "sentences": ["Wengwald is a request stop railway station in the municipality of Lauterbrunnen in the Swiss canton of Bern.", "The station is on the Wengernalpbahn (WAB), whose trains operate from Lauterbrunnen to Kleine Scheidegg via Wengen.", "The station is located on the new line from Lauterbrunnen to Wengen, which opened in 1910 to replace the more direct but steeper original routing.", "It is immediately uphill of a 180 degree helical tunnel which the line uses in order to reduce the gradient.", "The station is served by the following passenger trains:"], "mentions": [{"id": 0, "start": 14, "end": 26, "ref_url": "Request_stop", "ref_ids": ["4073596"], "sent_idx": 0}, {"id": 1, "start": 27, "end": 42, "ref_url": "Railway_station", "ref_ids": null, "sent_idx": 0}, {"id": 2, "start": 44, "end": 57, "ref_url": "Lauterbrunnen", "ref_ids": ["290566"], "sent_idx": 2}, {"id": 3, "start": 87, "end": 92, "ref_url": "Switzerland", "ref_ids": ["26748"], "sent_idx": 0}, {"id": 4, "start": 93, "end": 107, "ref_url": "Canton_of_Bern", "ref_ids": ["292850"], "sent_idx": 0}, {"id": 5, "start": 22, "end": 36, "ref_url": "Wengernalpbahn", "ref_ids": null, "sent_idx": 1}, {"id": 6, "start": 44, "end": 57, "ref_url": "Lauterbrunnen", "ref_ids": ["290566"], "sent_idx": 2}, {"id": 7, "start": 87, "end": 103, "ref_url": "Kleine_Scheidegg", "ref_ids": ["650545"], "sent_idx": 1}, {"id": 8, "start": 61, "end": 67, "ref_url": "Wengen", "ref_ids": ["665790"], "sent_idx": 2}]} +{"id": "17892967", "title": "Daniel Singer", "sentences": ["Daniel Singer may refer to:"], "mentions": []} +{"id": "17892971", "title": "510th Fighter Squadron", "sentences": ["The 510th Fighter Squadron is part of the 31st Operations Group at Aviano Air Base, Italy.", "It is a combat-ready F-16CM fighter squadron prepared to deploy and fly combat sorties as tasked by NATO and US combatant commanders.", "The squadron was first activated as the 625th Bombardment Squadron in 1943, changing to the 510th Fighter-Bomber Squadron a few months later.", "After training in the United States, it moved to England in March 1944, helping prepare for Operation Overlord by attacking targets in France.", "Following D-Day, the squadron moved to the continent, providing close air support for Allied forces.", "The squadron earned a Distinguished Unit Citation and was cited in the Order of the Day by the Belgian Army.", "After V-E Day", "The squadron returned to the United States and was inactivated at the Port of Embarkation.", "The squadron was activated again in 1952, when it replaced an Air National Guard squadron that had been mobilized for the Korean War.", "It trained for fighter bomber operations until inactivating in 1958.", "A year later, it was activated in the Philippines as the 510th Tactical Fighter Squadron In the Philippines.", "The squadron returned to the United States in 1964, but soon deployed back to the Pacific, moving to Vietnam in 1965, and engaging in combat until inactivating in 1969 as the United States began withdrawing forces from Vietnam.", "The squadron was activated with Fairchild Republic A-10 Thunderbolt IIs in 1978 as the 81st Fighter Wing doubled its tactical strength.", "It moved to Germany in 1992 and was inactivated there in 1994.", "A few weeks later, the squadron was reactivated in its current role."], "mentions": [{"id": 0, "start": 42, "end": 63, "ref_url": "31st_Operations_Group", "ref_ids": ["23276548"], "sent_idx": 0}, {"id": 1, "start": 67, "end": 82, "ref_url": "Aviano_Air_Base", "ref_ids": ["1658437"], "sent_idx": 0}, {"id": 2, "start": 21, "end": 27, "ref_url": "General_Dynamics_F-16_Fighting_Falcon", "ref_ids": ["11642"], "sent_idx": 1}, {"id": 3, "start": 100, "end": 104, "ref_url": "NATO", "ref_ids": ["21133"], "sent_idx": 1}, {"id": 4, "start": 109, "end": 132, "ref_url": "Unified_Combatant_Command", "ref_ids": null, "sent_idx": 1}, {"id": 5, "start": 23, "end": 31, "ref_url": "Squadron_(aviation)", "ref_ids": ["27830333"], "sent_idx": 14}, {"id": 6, "start": 92, "end": 110, "ref_url": "Operation_Overlord", "ref_ids": ["6723726"], "sent_idx": 3}, {"id": 7, "start": 10, "end": 15, "ref_url": "D-Day", "ref_ids": null, "sent_idx": 4}, {"id": 8, "start": 64, "end": 81, "ref_url": "Close_air_support", "ref_ids": ["600792"], "sent_idx": 4}, {"id": 9, "start": 86, "end": 92, "ref_url": "Allies_of_World_War_II", "ref_ids": ["2198844"], "sent_idx": 4}, {"id": 10, "start": 22, "end": 49, "ref_url": "Distinguished_Unit_Citation", "ref_ids": null, "sent_idx": 5}, {"id": 11, "start": 95, "end": 107, "ref_url": "Belgian_Army", "ref_ids": null, "sent_idx": 5}, {"id": 12, "start": 6, "end": 13, "ref_url": "V-E_Day", "ref_ids": null, "sent_idx": 6}, {"id": 13, "start": 62, "end": 80, "ref_url": "Air_National_Guard", "ref_ids": ["265572"], "sent_idx": 8}, {"id": 14, "start": 104, "end": 113, "ref_url": "Mobilization", "ref_ids": ["451461"], "sent_idx": 8}, {"id": 15, "start": 122, "end": 132, "ref_url": "Korean_War", "ref_ids": ["16772"], "sent_idx": 8}, {"id": 16, "start": 15, "end": 29, "ref_url": "Fighter_bomber", "ref_ids": null, "sent_idx": 9}, {"id": 17, "start": 32, "end": 70, "ref_url": "Fairchild_Republic_A-10_Thunderbolt_II", "ref_ids": ["12502446"], "sent_idx": 12}, {"id": 18, "start": 87, "end": 104, "ref_url": "81st_Fighter_Wing", "ref_ids": null, "sent_idx": 12}]} +{"id": "17893008", "title": "Eric Owens (table tennis)", "sentences": ["Eric Owens (born 1975 or 1976) is a professional American table tennis player.", "Owens began playing and competing in tournaments nationwide at the early age of six.", "Owens won every major National Junior, and Junior Olympic title.", "Owens won several major titles, including the US National Championships and North American Championships.", "He was on the US National Team competing in six (6) World Championships, three (3) Pan-American Games, winning a Gold and Bronze Medal.", "Owens was inducted to the US Table Tennis Hall of Fame in 2015."], "mentions": [{"id": 0, "start": 58, "end": 70, "ref_url": "Table_tennis", "ref_ids": ["30589"], "sent_idx": 0}]} +{"id": "17893065", "title": "Aberthin Platform railway station", "sentences": ["Aberthin Platform railway station was a short lived Taff Vale Railway station which served Aberthin, a village north east of Cowbridge in the Welsh county of Glamorganshire."], "mentions": [{"id": 0, "start": 52, "end": 69, "ref_url": "Taff_Vale_Railway", "ref_ids": ["350267"], "sent_idx": 0}, {"id": 1, "start": 0, "end": 8, "ref_url": "Aberthin", "ref_ids": ["1572664"], "sent_idx": 0}, {"id": 2, "start": 125, "end": 134, "ref_url": "Cowbridge", "ref_ids": ["125430"], "sent_idx": 0}, {"id": 3, "start": 142, "end": 147, "ref_url": "Wales", "ref_ids": ["69894"], "sent_idx": 0}, {"id": 4, "start": 158, "end": 172, "ref_url": "Glamorganshire", "ref_ids": null, "sent_idx": 0}]} +{"id": "17893101", "title": "William Fetherstone Montgomery", "sentences": ["William Fetherstone Montgomery( 1797- 1859) was an Irish obstetrician credited for first describing the Glands of Montgomery."], "mentions": [{"id": 0, "start": 51, "end": 56, "ref_url": "Ireland", "ref_ids": ["147575"], "sent_idx": 0}, {"id": 1, "start": 57, "end": 69, "ref_url": "Obstetrician", "ref_ids": null, "sent_idx": 0}, {"id": 2, "start": 104, "end": 124, "ref_url": "Glands_of_Montgomery", "ref_ids": null, "sent_idx": 0}]} +{"id": "17893155", "title": "Tai folk religion", "sentences": ["The Tai folk religion, or Satsana Phi (; , , \"religion of spirits\"), or Ban Phi (in Ahom language) is a form of animist religious beliefs traditionally and historically practiced by groups of ethnic Tai peoples.", "Tai folk animist traditions are practiced by the Lao, Tai Ahom, Shan people, Dai people, Tai Khamti Lao Isan and Thais of Thailand.", "These religions are animistic and polytheistic and their practice involves classes of shamans and ancestor worship.", "Among the Lao, the Lao Loum and Lao Lom are predominantly Buddhist, while the Lao Theung and Lao Sung are predominantly folk religious.", "Tai folk animist traditions have also been incorporated into Laotian Buddhism."], "mentions": [{"id": 0, "start": 84, "end": 97, "ref_url": "Ahom_language", "ref_ids": ["733264"], "sent_idx": 0}, {"id": 1, "start": 9, "end": 16, "ref_url": "Animism", "ref_ids": ["1423"], "sent_idx": 4}, {"id": 2, "start": 199, "end": 210, "ref_url": "Tai_peoples", "ref_ids": ["32411786"], "sent_idx": 0}, {"id": 3, "start": 61, "end": 64, "ref_url": "Lao_people", "ref_ids": ["197158"], "sent_idx": 4}, {"id": 4, "start": 54, "end": 62, "ref_url": "Ahom_people", "ref_ids": ["720374"], "sent_idx": 1}, {"id": 5, "start": 64, "end": 75, "ref_url": "Shan_people", "ref_ids": ["203686"], "sent_idx": 1}, {"id": 6, "start": 77, "end": 87, "ref_url": "Dai_people", "ref_ids": ["218079"], "sent_idx": 1}, {"id": 7, "start": 89, "end": 99, "ref_url": "Khamti_people", "ref_ids": ["7723878"], "sent_idx": 1}, {"id": 8, "start": 100, "end": 108, "ref_url": "Isan_people", "ref_ids": ["18271849"], "sent_idx": 1}, {"id": 9, "start": 113, "end": 117, "ref_url": "Thai_people", "ref_ids": ["2670504"], "sent_idx": 1}, {"id": 10, "start": 122, "end": 130, "ref_url": "Thailand", "ref_ids": ["30128"], "sent_idx": 1}, {"id": 11, "start": 34, "end": 46, "ref_url": "Polytheism", "ref_ids": ["19195836"], "sent_idx": 2}, {"id": 12, "start": 86, "end": 93, "ref_url": "Shamanism", "ref_ids": ["26861"], "sent_idx": 2}, {"id": 13, "start": 98, "end": 114, "ref_url": "Ancestor_worship", "ref_ids": null, "sent_idx": 2}, {"id": 14, "start": 19, "end": 27, "ref_url": "Lao_Loum", "ref_ids": ["1637525"], "sent_idx": 3}, {"id": 15, "start": 32, "end": 39, "ref_url": "Lao_Lom", "ref_ids": ["14076497"], "sent_idx": 3}, {"id": 16, "start": 58, "end": 66, "ref_url": "Buddhism", "ref_ids": ["3267529"], "sent_idx": 3}, {"id": 17, "start": 78, "end": 88, "ref_url": "Lao_Theung", "ref_ids": ["1603081"], "sent_idx": 3}, {"id": 18, "start": 93, "end": 101, "ref_url": "Lao_Sung", "ref_ids": ["1621217"], "sent_idx": 3}, {"id": 19, "start": 61, "end": 77, "ref_url": "Buddhism_in_Laos", "ref_ids": ["2685896"], "sent_idx": 4}]} +{"id": "17893165", "title": "Mark Quayle (footballer)", "sentences": ["Mark Leslie Quayle (born 2 October 1978) is an English former footballer.", "He played for Everton, Notts County, Grantham Town, Morecambe, Telford United, Nuneaton Borough, Chester City, Scarborough, Northwich Victoria, Hyde United and Colwyn Bay.", "Quayle scored a lot of goals in the non-league game including a crucial winning goal against Southend in a 3rd Round FA Cup Replay tie which secured Scarborough a lucrative home tie against Chelsea."], "mentions": [{"id": 0, "start": 62, "end": 72, "ref_url": "Association_football", "ref_ids": ["10568"], "sent_idx": 0}, {"id": 1, "start": 14, "end": 21, "ref_url": "Everton_F.C.", "ref_ids": ["91155"], "sent_idx": 1}, {"id": 2, "start": 23, "end": 35, "ref_url": "Notts_County_F.C.", "ref_ids": ["448599"], "sent_idx": 1}, {"id": 3, "start": 37, "end": 50, "ref_url": "Grantham_Town_F.C.", "ref_ids": ["1577867"], "sent_idx": 1}, {"id": 4, "start": 52, "end": 61, "ref_url": "Morecambe_F.C.", "ref_ids": ["451204"], "sent_idx": 1}, {"id": 5, "start": 63, "end": 77, "ref_url": "Telford_United_F.C.", "ref_ids": ["451210"], "sent_idx": 1}, {"id": 6, "start": 79, "end": 95, "ref_url": "Nuneaton_Town_F.C.", "ref_ids": null, "sent_idx": 1}, {"id": 7, "start": 97, "end": 109, "ref_url": "Chester_City_F.C.", "ref_ids": ["451195"], "sent_idx": 1}, {"id": 8, "start": 149, "end": 160, "ref_url": "Scarborough_F.C.", "ref_ids": ["946449"], "sent_idx": 2}, {"id": 9, "start": 124, "end": 142, "ref_url": "Northwich_Victoria_F.C.", "ref_ids": ["19430905"], "sent_idx": 1}, {"id": 10, "start": 144, "end": 155, "ref_url": "Hyde_United_F.C.", "ref_ids": ["8200305"], "sent_idx": 1}, {"id": 11, "start": 160, "end": 170, "ref_url": "Colwyn_Bay_F.C.", "ref_ids": ["3617222"], "sent_idx": 1}]} +{"id": "17893179", "title": "Liu Luyang", "sentences": ["Liu Luyang (born June 10, 1976) is a Chinese ice dancer.", "With partner Zhao Xiaolei, she is the 1986 Asian Winter Games champion.", "They placed 19th at the 1988 Winter Olympics and Liu was 11 years old at the time.", "Before teaming up with Zhao, she competed with Li Xiangdong.", "They placed 15th at the 1984 World Junior Figure Skating Championships."], "mentions": [{"id": 0, "start": 37, "end": 44, "ref_url": "China", "ref_ids": ["5405"], "sent_idx": 0}, {"id": 1, "start": 45, "end": 55, "ref_url": "Ice_dancer", "ref_ids": null, "sent_idx": 0}, {"id": 2, "start": 13, "end": 25, "ref_url": "Zhao_Xiaolei", "ref_ids": ["17893377"], "sent_idx": 1}, {"id": 3, "start": 43, "end": 61, "ref_url": "Asian_Winter_Games", "ref_ids": ["6511088"], "sent_idx": 1}, {"id": 4, "start": 24, "end": 44, "ref_url": "1988_Winter_Olympics", "ref_ids": ["187504"], "sent_idx": 2}, {"id": 5, "start": 47, "end": 59, "ref_url": "Li_Xiangdong", "ref_ids": null, "sent_idx": 3}, {"id": 6, "start": 29, "end": 70, "ref_url": "World_Junior_Figure_Skating_Championships", "ref_ids": ["6802329"], "sent_idx": 4}]} +{"id": "17893210", "title": "Niklaus Franz von Bachmann", "sentences": ["Niklaus Leodegar Franz Ignaz von Bachmann (Näfels, 27 March 1740 – Näfels, 11 February 1831), was a Swiss general who fought in the Napoleonic Wars."], "mentions": [{"id": 0, "start": 43, "end": 49, "ref_url": "Näfels", "ref_ids": ["6219822"], "sent_idx": 0}, {"id": 1, "start": 132, "end": 147, "ref_url": "Napoleonic_Wars", "ref_ids": ["45420"], "sent_idx": 0}]} +{"id": "17893238", "title": "1st century in Roman Britain", "sentences": ["Events from the 1st century in Roman Britain."], "mentions": [{"id": 0, "start": 31, "end": 44, "ref_url": "Roman_Britain", "ref_ids": ["13525"], "sent_idx": 0}]} +{"id": "17893251", "title": "Pausinystalia johimbe", "sentences": ["Pausinystalia johimbe, (Rubiaceae), common name yohimbe, is a plant species native to western and central Africa (Nigeria, Cabinda, Cameroon, Congo-Brazzaville, Gabon, Equatorial Guinea).", "Extracts from yohimbe have been used in traditional medicine in West Africa as an aphrodisiac and have been marketed in developed countries as dietary supplements."], "mentions": [{"id": 0, "start": 24, "end": 33, "ref_url": "Rubiaceae", "ref_ids": ["54025"], "sent_idx": 0}, {"id": 1, "start": 69, "end": 75, "ref_url": "Africa", "ref_ids": ["5334607"], "sent_idx": 1}, {"id": 2, "start": 114, "end": 121, "ref_url": "Nigeria", "ref_ids": ["21383"], "sent_idx": 0}, {"id": 3, "start": 123, "end": 130, "ref_url": "Cabinda_Province", "ref_ids": ["65213"], "sent_idx": 0}, {"id": 4, "start": 132, "end": 140, "ref_url": "Cameroon", "ref_ids": ["5447"], "sent_idx": 0}, {"id": 5, "start": 142, "end": 159, "ref_url": "Congo-Brazzaville", "ref_ids": null, "sent_idx": 0}, {"id": 6, "start": 161, "end": 166, "ref_url": "Gabon", "ref_ids": ["12027"], "sent_idx": 0}, {"id": 7, "start": 168, "end": 185, "ref_url": "Equatorial_Guinea", "ref_ids": ["9366"], "sent_idx": 0}, {"id": 8, "start": 40, "end": 60, "ref_url": "Traditional_medicine", "ref_ids": ["457857"], "sent_idx": 1}, {"id": 9, "start": 82, "end": 93, "ref_url": "Aphrodisiac", "ref_ids": ["70705"], "sent_idx": 1}, {"id": 10, "start": 120, "end": 139, "ref_url": "Developed_countries", "ref_ids": null, "sent_idx": 1}, {"id": 11, "start": 143, "end": 161, "ref_url": "Dietary_supplement", "ref_ids": ["104444"], "sent_idx": 1}]} +{"id": "17893257", "title": "Paradox Basin", "sentences": ["The Paradox Basin is an asymmetric foreland basin located mostly in southeast Utah and southwest Colorado, but extending into northeast Arizona and northwest New Mexico.", "The basin is a large elongate northwest to southeast oriented depression formed during the late Paleozoic Era.", "The basin is bordered on the east by the tectonically uplifted Uncompahgre Plateau, on the northwest by the San Rafael Swell and on the west by the Circle Cliffs Uplift.", "Its areal size is around 33,000 square miles (85470 km).", "The combined sedimentary strata of the Paradox Basin are more than 15,000 feet (4600 m) thick in some places.", "Unlike most Rocky Mountain basins, the Paradox Basin is an evaporite basin containing sediments from alternating cycles of deep marine and very shallow water.", "As a result of the thick salt sequences and the fact that salt is ductile at relatively low temperatures and pressures, salt tectonics play a major role in the post-Pennsylvanian structural deformation within the basin."], "mentions": [{"id": 0, "start": 35, "end": 49, "ref_url": "Foreland_basin", "ref_ids": ["5523577"], "sent_idx": 0}, {"id": 1, "start": 78, "end": 82, "ref_url": "Utah", "ref_ids": ["31716"], "sent_idx": 0}, {"id": 2, "start": 97, "end": 105, "ref_url": "Colorado", "ref_ids": ["5399"], "sent_idx": 0}, {"id": 3, "start": 136, "end": 143, "ref_url": "Arizona", "ref_ids": ["21883824"], "sent_idx": 0}, {"id": 4, "start": 158, "end": 168, "ref_url": "New_Mexico", "ref_ids": ["21649"], "sent_idx": 0}, {"id": 5, "start": 96, "end": 109, "ref_url": "Paleozoic_Era", "ref_ids": null, "sent_idx": 1}, {"id": 6, "start": 41, "end": 62, "ref_url": "Tectonic_uplift", "ref_ids": ["1415891"], "sent_idx": 2}, {"id": 7, "start": 63, "end": 82, "ref_url": "Uncompahgre_Plateau", "ref_ids": ["2250021"], "sent_idx": 2}, {"id": 8, "start": 108, "end": 124, "ref_url": "San_Rafael_Swell", "ref_ids": ["858728"], "sent_idx": 2}, {"id": 9, "start": 13, "end": 24, "ref_url": "Sedimentary_rock", "ref_ids": ["44412"], "sent_idx": 4}, {"id": 10, "start": 25, "end": 31, "ref_url": "Strata", "ref_ids": null, "sent_idx": 4}, {"id": 11, "start": 12, "end": 26, "ref_url": "Rocky_Mountain", "ref_ids": null, "sent_idx": 5}, {"id": 12, "start": 59, "end": 68, "ref_url": "Evaporite", "ref_ids": ["186182"], "sent_idx": 5}, {"id": 13, "start": 86, "end": 94, "ref_url": "Sediment", "ref_ids": ["60343"], "sent_idx": 5}, {"id": 14, "start": 92, "end": 103, "ref_url": "Temperature", "ref_ids": ["20647050"], "sent_idx": 6}, {"id": 15, "start": 120, "end": 134, "ref_url": "Salt_tectonics", "ref_ids": ["7884988"], "sent_idx": 6}, {"id": 16, "start": 165, "end": 178, "ref_url": "Pennsylvanian_(geology)", "ref_ids": ["23222"], "sent_idx": 6}]} +{"id": "17893263", "title": "Adam Hugh", "sentences": ["Adam Hugh is the number one table tennis player under sixteen in the United States and is most noted for his victory in the 2003 North American Cadet Championship, as well as the US National Cadet Championship.", "Other career accomplishments include finishing third in singles, doubles and team at the ITTF Junior Circuit Tournament in Canada.", "He finished 11 at the World Cadet Challenge.", "In the team event, North America lost in the semi-final to Asia 2-3, and Hugh took two points: one over World Cadet finalist and Asian Cadet Champion Jun Mizutani from Japan and the number one cadet from Hong Kong, Li, Kwun Ngai.", "Hugh finished seventh at the 2003 US Men's Team Trials in Atlanta, missing the US Team basically by one game.", "If he had beaten De Tran instead of losing 4-3, he would have been the number 4 player who was sent to the world's in Paris.", "At this event Hugh beat Eric Owens, Brian Pace, David Wang, and Ashu Jain.", "In recent years he has beaten:"], "mentions": [{"id": 0, "start": 28, "end": 40, "ref_url": "Table_tennis", "ref_ids": ["30589"], "sent_idx": 0}]} +{"id": "17893283", "title": "Fort Ripley Shoal Light", "sentences": ["The Fort Ripley Shoal Light or Middle Ground Light was a lighthouse in the Charleston, South Carolina harbor approaches."], "mentions": [{"id": 0, "start": 75, "end": 101, "ref_url": "Charleston,_South_Carolina", "ref_ids": ["61024"], "sent_idx": 0}]} +{"id": "17893293", "title": "Mirta Roses Periago", "sentences": ["Mirta Roses Periago is an Argentine epidemiologist who served as Director of the Pan American Health Organization (PAHO) until January 31, 2013."], "mentions": [{"id": 0, "start": 26, "end": 35, "ref_url": "Argentina", "ref_ids": ["18951905"], "sent_idx": 0}, {"id": 1, "start": 36, "end": 50, "ref_url": "Epidemiology", "ref_ids": ["66997"], "sent_idx": 0}, {"id": 2, "start": 81, "end": 113, "ref_url": "Pan_American_Health_Organization", "ref_ids": ["1246706"], "sent_idx": 0}]} +{"id": "17893298", "title": "Bungee chair", "sentences": ["A bungee chair is a chair that has bungee cords or bands incorporated in its construction.", "While the chair’s legs and other components are usually made with traditional materials such as plastic or metal, the seating and back portions of the chair are made with bungee.", "Bungee chairs are noted for their distinctive style and form.", "The open spaces between each bungee band tend to give the chair a unique sense of breathability and \"bounce\".", "The chair's bungee bands can be either rounded or flattened, and the number of bands on each make of bungee chair may vary.", "Types of bungee chairs include office chairs, lounge chairs and folding chairs.", "A relatively obscure form of chair a decade ago, the bungee chair is now common in many retail locations."], "mentions": [{"id": 0, "start": 35, "end": 47, "ref_url": "Bungee_cords", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 96, "end": 103, "ref_url": "Plastic", "ref_ids": ["26145195"], "sent_idx": 1}, {"id": 2, "start": 107, "end": 112, "ref_url": "Metal", "ref_ids": ["19042"], "sent_idx": 1}]} +{"id": "17893372", "title": "Grothendieck space", "sentences": ["In mathematics, a Grothendieck space, named after Alexander Grothendieck, is a Banach space \"X\" in which every weakly* convergent sequence in the dual space \"X\"* converges with respect to the weak topology of \"X\"*."], "mentions": [{"id": 0, "start": 3, "end": 14, "ref_url": "Mathematics", "ref_ids": ["18831"], "sent_idx": 0}, {"id": 1, "start": 50, "end": 72, "ref_url": "Alexander_Grothendieck", "ref_ids": ["2042"], "sent_idx": 0}, {"id": 2, "start": 79, "end": 91, "ref_url": "Banach_space", "ref_ids": ["3989"], "sent_idx": 0}, {"id": 3, "start": 146, "end": 156, "ref_url": "Dual_space", "ref_ids": ["7988"], "sent_idx": 0}, {"id": 4, "start": 192, "end": 205, "ref_url": "Weak_topology", "ref_ids": ["33662"], "sent_idx": 0}]} +{"id": "17893377", "title": "Zhao Xiaolei", "sentences": ["Zhao Xiaolei( born August 3, 1966 in Heilongjiang) is a Chinese ice dancer.", "He competed at two Winter Olympic Games with two different partners.", "With partner Liu Luyang, he is the 1986 Asian Winter Games champion.", "Liu& Zhao placed 19th at the 1988 Winter Olympics.", "With partner Xi Hongyan, he placed 19th at the 1984 Winter Olympics.", "He was 19 at the time."], "mentions": [{"id": 0, "start": 37, "end": 49, "ref_url": "Heilongjiang", "ref_ids": ["173816"], "sent_idx": 0}, {"id": 1, "start": 56, "end": 63, "ref_url": "China", "ref_ids": ["5405"], "sent_idx": 0}, {"id": 2, "start": 64, "end": 74, "ref_url": "Ice_dancer", "ref_ids": null, "sent_idx": 0}, {"id": 3, "start": 13, "end": 23, "ref_url": "Liu_Luyang", "ref_ids": ["17893179"], "sent_idx": 2}, {"id": 4, "start": 40, "end": 58, "ref_url": "Asian_Winter_Games", "ref_ids": ["6511088"], "sent_idx": 2}, {"id": 5, "start": 29, "end": 49, "ref_url": "1988_Winter_Olympics", "ref_ids": ["187504"], "sent_idx": 3}, {"id": 6, "start": 13, "end": 23, "ref_url": "Xi_Hongyan", "ref_ids": ["17893517"], "sent_idx": 4}, {"id": 7, "start": 47, "end": 67, "ref_url": "1984_Winter_Olympics", "ref_ids": ["113362"], "sent_idx": 4}]} +{"id": "17893384", "title": "Outreach (magazine)", "sentences": ["Outreach is an evangelical magazine based in Colorado Springs, Colorado.", "It focuses on activities of growing churches and is non-denominational.", "It is a periodical from the organization Outreach, Inc., founded in 1996 by Scott Evans and provides community outreach products.", "Each October, \"Outreach\" lists the 100 largest and the 100 fastest growing churches in America."], "mentions": [{"id": 0, "start": 15, "end": 26, "ref_url": "Evangelicalism", "ref_ids": ["10370"], "sent_idx": 0}, {"id": 1, "start": 27, "end": 35, "ref_url": "Magazine", "ref_ids": ["21001"], "sent_idx": 0}, {"id": 2, "start": 45, "end": 71, "ref_url": "Colorado_Springs,_Colorado", "ref_ids": ["6250"], "sent_idx": 0}, {"id": 3, "start": 52, "end": 70, "ref_url": "Non-denominational", "ref_ids": ["2570425"], "sent_idx": 1}]} +{"id": "17893430", "title": "International Builders' Show", "sentences": ["The International Builders' Show (IBS) is organized by the National Association of Home Builders (NAHB) and is the largest light construction building industry tradeshow in the United States.", "It is the only event of its kind, focusing specifically on the needs, concerns, and opportunities that face builders.", "In 1944, the NAHB held its first annual convention and exposition, later becoming the International Builders' Show in 1998.", "From its early start, the show has grown to attract more than 100,000 attendees, making it one of the largest conventions in the country.", "As such, the show has alternated its location since 2003 between the Orange County Convention Center in Orlando, Florida and the Las Vegas Convention Center in Las Vegas, Nevada (two of the United States' largest convention centers)."], "mentions": [{"id": 0, "start": 59, "end": 96, "ref_url": "National_Association_of_Home_Builders", "ref_ids": ["7914276"], "sent_idx": 0}, {"id": 1, "start": 160, "end": 169, "ref_url": "Trade_fair", "ref_ids": ["1129368"], "sent_idx": 0}, {"id": 2, "start": 190, "end": 203, "ref_url": "United_States", "ref_ids": ["3434750"], "sent_idx": 4}, {"id": 3, "start": 69, "end": 100, "ref_url": "Orange_County_Convention_Center", "ref_ids": ["6961997"], "sent_idx": 4}, {"id": 4, "start": 104, "end": 120, "ref_url": "Orlando,_Florida", "ref_ids": ["100582"], "sent_idx": 4}, {"id": 5, "start": 129, "end": 156, "ref_url": "Las_Vegas_Convention_Center", "ref_ids": ["1538321"], "sent_idx": 4}, {"id": 6, "start": 160, "end": 177, "ref_url": "Las_Vegas,_Nevada", "ref_ids": null, "sent_idx": 4}]} +{"id": "17893434", "title": "Agriculture in Laos", "sentences": ["At least 5 million hectares of Laos's total land area of 23,680,000 hectares are suitable for cultivation (about 21 percent).", "17 percent of this land area (between 850,000 and 900,000 hectares) is actually cultivated, less than 4 percent of the total area.", "Rice accounted for about 80 percent of cultivated land during the 1989-90 growing season, including 422,000 hectares of lowland wet rice and 223,0 hectares of upland rice.", "This demonstrates that although there is interplanting of upland crops and fish are found in fields, irrigated rice agriculture remains basically a monoculture system despite government efforts to encourage crop diversification.", "Cultivated land area had increased by about 6 percent from 1975-77 but in 1987 only provided citizens with less than one-fourth of a hectare each, given a population of approximately 3.72 million in 1986.", "In addition to land under cultivation, about 800,000 hectares are used for pastureland or contain ponds for raising fish.", "Pastureland is rotated, and its use is not fixed over a long period of time."], "mentions": [{"id": 0, "start": 31, "end": 35, "ref_url": "Laos", "ref_ids": ["17752"], "sent_idx": 0}, {"id": 1, "start": 0, "end": 4, "ref_url": "Rice", "ref_ids": ["36979"], "sent_idx": 2}, {"id": 2, "start": 148, "end": 159, "ref_url": "Monoculture", "ref_ids": ["52599"], "sent_idx": 3}, {"id": 3, "start": 0, "end": 7, "ref_url": "Pasture", "ref_ids": ["221932"], "sent_idx": 6}]} +{"id": "17893471", "title": "Satiation", "sentences": ["Satiation may refer to:"], "mentions": []} +{"id": "17893474", "title": "Boudry, Burkina Faso", "sentences": ["Boudry is a town in the Boudry Department of Ganzourgou Province in central Burkina Faso.", "It is the capital of Boudry Department, and has a population of 1,682."], "mentions": [{"id": 0, "start": 21, "end": 38, "ref_url": "Boudry_Department", "ref_ids": ["16975341"], "sent_idx": 1}, {"id": 1, "start": 45, "end": 64, "ref_url": "Ganzourgou_Province", "ref_ids": ["1055538"], "sent_idx": 0}, {"id": 2, "start": 76, "end": 88, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17893481", "title": "I've Got the Music in Me", "sentences": ["\"I've Got the Music in Me\" is a pop song by The Kiki Dee Band, released in 1974.", "It was written in 1973 by Bias Boshell, Kiki Dee Band's keyboardist.", "The song entered the UK Singles Chart on 7 September 1974, reached number 19 and stayed in the chart for eight weeks.", "On the \"Billboard\" pop chart in the United States, \"I've Got the Music in Me\" peaked at number 12 on November 30, 1974.", "It is also the title of a Kiki Dee Band album released in 1974 and re-mastered and re-issued with bonus tracks in 2008.", "The song is upbeat, describing in various ways how the singer will not be deterred or impeded in their goals, because they possess the quality of 'having the music' in them."], "mentions": [{"id": 0, "start": 44, "end": 61, "ref_url": "Kiki_Dee", "ref_ids": ["1057264"], "sent_idx": 0}, {"id": 1, "start": 7, "end": 14, "ref_url": "Songwriter", "ref_ids": ["94154"], "sent_idx": 1}, {"id": 2, "start": 26, "end": 38, "ref_url": "Bias_Boshell", "ref_ids": ["13430153"], "sent_idx": 1}, {"id": 3, "start": 21, "end": 37, "ref_url": "UK_Singles_Chart", "ref_ids": ["3349146"], "sent_idx": 2}, {"id": 4, "start": 23, "end": 28, "ref_url": "Record_chart", "ref_ids": ["1795886"], "sent_idx": 3}, {"id": 5, "start": 8, "end": 17, "ref_url": "Billboard_(magazine)", "ref_ids": ["18309966"], "sent_idx": 3}]} +{"id": "17893517", "title": "Xi Hongyan", "sentences": ["Xi Hongyan is a Chinese ice dancer.", "She competed at the 1984 Winter Olympics with partner Zhao Xiaolei and placed 19th.", "She was 17 at the time.", "Following her retirement from competitive skating, she became a coach.", "Her current and former students include Huang Xintong& Zheng Xun, Guo Jiameimei& Meng Fei, Yu Xiaoyang& Wang Chen, and Qi Jia& Sun Xu."], "mentions": [{"id": 0, "start": 16, "end": 23, "ref_url": "China", "ref_ids": ["5405"], "sent_idx": 0}, {"id": 1, "start": 24, "end": 34, "ref_url": "Ice_dancer", "ref_ids": null, "sent_idx": 0}, {"id": 2, "start": 20, "end": 40, "ref_url": "1984_Winter_Olympics", "ref_ids": ["113362"], "sent_idx": 1}, {"id": 3, "start": 54, "end": 66, "ref_url": "Zhao_Xiaolei", "ref_ids": ["17893377"], "sent_idx": 1}, {"id": 4, "start": 40, "end": 53, "ref_url": "Huang_Xintong", "ref_ids": ["9316627"], "sent_idx": 4}, {"id": 5, "start": 55, "end": 64, "ref_url": "Zheng_Xun", "ref_ids": ["9316684"], "sent_idx": 4}, {"id": 6, "start": 66, "end": 79, "ref_url": "Guo_Jiameimei", "ref_ids": ["20333829"], "sent_idx": 4}, {"id": 7, "start": 81, "end": 89, "ref_url": "Meng_Fei_(figure_skater)", "ref_ids": ["20333792"], "sent_idx": 4}, {"id": 8, "start": 91, "end": 102, "ref_url": "Yu_Xiaoyang", "ref_ids": ["9316731"], "sent_idx": 4}, {"id": 9, "start": 104, "end": 113, "ref_url": "Wang_Chen_(figure_skater)", "ref_ids": ["9316792"], "sent_idx": 4}, {"id": 10, "start": 119, "end": 125, "ref_url": "Qi_Jia", "ref_ids": ["12931707"], "sent_idx": 4}, {"id": 11, "start": 127, "end": 133, "ref_url": "Sun_Xu", "ref_ids": ["12931712"], "sent_idx": 4}]} +{"id": "17893534", "title": "Greg Hatza", "sentences": ["Greg Hatza is an American jazz organist born in 1948 in Reading, Pennsylvania."], "mentions": [{"id": 0, "start": 26, "end": 30, "ref_url": "Jazz", "ref_ids": ["15613"], "sent_idx": 0}, {"id": 1, "start": 56, "end": 77, "ref_url": "Reading,_Pennsylvania", "ref_ids": ["131393"], "sent_idx": 0}]} +{"id": "17893539", "title": "Bagzan", "sentences": ["Bagzan is a village in the Boudry Department of Ganzourgou Province in central Burkina Faso.", "The village has a population of 320."], "mentions": [{"id": 0, "start": 27, "end": 44, "ref_url": "Boudry_Department", "ref_ids": ["16975341"], "sent_idx": 0}, {"id": 1, "start": 48, "end": 67, "ref_url": "Ganzourgou_Province", "ref_ids": ["1055538"], "sent_idx": 0}, {"id": 2, "start": 79, "end": 91, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17893573", "title": "Simon Dewinter", "sentences": ["Simon Dewinter (20 September 1908 – 1 August 1972) was a Belgian boxer who competed in the 1936 Summer Olympics.", "In 1936 he was eliminated in the second round of the lightweight class after losing his fight to Andrew Scrivani."], "mentions": [{"id": 0, "start": 57, "end": 64, "ref_url": "Belgium", "ref_ids": ["3343"], "sent_idx": 0}, {"id": 1, "start": 65, "end": 70, "ref_url": "Boxing", "ref_ids": ["4243"], "sent_idx": 0}, {"id": 2, "start": 91, "end": 111, "ref_url": "1936_Summer_Olympics", "ref_ids": ["39721"], "sent_idx": 0}, {"id": 3, "start": 53, "end": 70, "ref_url": "Boxing_at_the_1936_Summer_Olympics_-_Men's_lightweight", "ref_ids": null, "sent_idx": 1}, {"id": 4, "start": 97, "end": 112, "ref_url": "Andrew_Scrivani", "ref_ids": null, "sent_idx": 1}]} +{"id": "17893600", "title": "Boéna", "sentences": ["Boéna is a town in the Boudry Department of Ganzourgou Province in central Burkina Faso.", "The town has a population of 8,094."], "mentions": [{"id": 0, "start": 23, "end": 40, "ref_url": "Boudry_Department", "ref_ids": ["16975341"], "sent_idx": 0}, {"id": 1, "start": 44, "end": 63, "ref_url": "Ganzourgou_Province", "ref_ids": ["1055538"], "sent_idx": 0}, {"id": 2, "start": 75, "end": 87, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17893601", "title": "Feusier Octagon House", "sentences": ["The Feusier Octagon House is an historic octagonal house located at 1067 Green Street, in the Russian Hill neighborhood of San Francisco, California.", "It was built between 1857 and 1858 by George Kenny, who sold it in 1870 to Louis Feusier.", "It is San Francisco Landmark 36 and is one of two surviving octagon houses in the city.", "Built in the 1850s, the house was later expanded with a third story, mansard roof, and cupola.", "It was added to the National Register of Historic Places on March 24, 1974.", "it is a rental house."], "mentions": [{"id": 0, "start": 60, "end": 67, "ref_url": "Octagon", "ref_ids": ["314575"], "sent_idx": 2}, {"id": 1, "start": 94, "end": 106, "ref_url": "Russian_Hill", "ref_ids": null, "sent_idx": 0}, {"id": 2, "start": 6, "end": 19, "ref_url": "San_Francisco,_California", "ref_ids": null, "sent_idx": 2}, {"id": 3, "start": 138, "end": 148, "ref_url": "California", "ref_ids": ["5407"], "sent_idx": 0}, {"id": 4, "start": 69, "end": 81, "ref_url": "Mansard_roof", "ref_ids": ["335114"], "sent_idx": 3}, {"id": 5, "start": 20, "end": 56, "ref_url": "National_Register_of_Historic_Places", "ref_ids": ["64065"], "sent_idx": 4}]} +{"id": "17893620", "title": "Globalization and Health", "sentences": ["Globalization and Health is a peer- reviewed open-access public health journal that covers the topic of globalization and its effects on health.", "The editors in chief are Greg Martin and Ronald Labonté."], "mentions": [{"id": 0, "start": 30, "end": 44, "ref_url": "Peer-reviewed", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 45, "end": 56, "ref_url": "Open-access", "ref_ids": null, "sent_idx": 0}, {"id": 2, "start": 57, "end": 78, "ref_url": "Public_health_journal", "ref_ids": ["541992"], "sent_idx": 0}, {"id": 3, "start": 104, "end": 117, "ref_url": "Globalization", "ref_ids": ["46313"], "sent_idx": 0}, {"id": 4, "start": 137, "end": 143, "ref_url": "Health", "ref_ids": ["80381"], "sent_idx": 0}, {"id": 5, "start": 4, "end": 20, "ref_url": "Editors_in_chief", "ref_ids": null, "sent_idx": 1}]} +{"id": "17893629", "title": "Boudry-Peulh", "sentences": ["Boudry-Peulh is a village in the Boudry Department of Ganzourgou Province in central Burkina Faso.", "The village has a population of 82."], "mentions": [{"id": 0, "start": 33, "end": 50, "ref_url": "Boudry_Department", "ref_ids": ["16975341"], "sent_idx": 0}, {"id": 1, "start": 54, "end": 73, "ref_url": "Ganzourgou_Province", "ref_ids": ["1055538"], "sent_idx": 0}, {"id": 2, "start": 85, "end": 97, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17893634", "title": "Title 6", "sentences": ["Title 6 or Title VI in Roman numerals, refers to the sixth part of various laws, including:"], "mentions": [{"id": 0, "start": 23, "end": 37, "ref_url": "Roman_numerals", "ref_ids": ["25657"], "sent_idx": 0}]} +{"id": "17893636", "title": "Ajman Club", "sentences": ["Ajman Club () is a United Arab Emirati football club based in Ajman, UAE.", "In 2011, Ajman was relegated to the second division, but the following year they were promoted back to the top flight."], "mentions": [{"id": 0, "start": 19, "end": 38, "ref_url": "United_Arab_Emirates", "ref_ids": ["69328"], "sent_idx": 0}, {"id": 1, "start": 39, "end": 47, "ref_url": "Football_(soccer)", "ref_ids": null, "sent_idx": 0}, {"id": 2, "start": 9, "end": 14, "ref_url": "Ajman", "ref_ids": ["2874574"], "sent_idx": 1}, {"id": 3, "start": 69, "end": 72, "ref_url": "UAE", "ref_ids": null, "sent_idx": 0}]} +{"id": "17893641", "title": "Merle Hoffman", "sentences": ["Merle Hoffman (born March 6, 1946) is an American journalist, activist, and healthcare pioneer.", "Shortly after New York State legalized abortion in 1970, three years before the Supreme Court's \"Roe v. Wade\" decision legalized abortion nationally, Hoffman helped establish one of the country's first ambulatory abortion centers, Flushing Women's Medical Center in 1971.", "It was the forerunner of Choices Women's Medical Center which Hoffman founded and serves as president and CEO.", "Choices is a full-service healthcare provider, offering gynecological services, pre-natal care, family care, transgender health care, telemedicine, mental health and other services.", "Hoffman co-founded the National Abortion Federation in 1976, the first professional organization of abortion providers in the U.S., and was its first president.", "She also founded the New York Pro-Choice Coalition in 1985.", "Hoffman is the publisher of \"On the Issues\" magazine, which began as a print publication in 1983 and then became an online publication in 2008.", "She was awarded the Front Page Award for Political Commentary in 2010 from the Newswoman's Club of New York."], "mentions": [{"id": 0, "start": 14, "end": 28, "ref_url": "New_York_State", "ref_ids": null, "sent_idx": 1}, {"id": 1, "start": 100, "end": 108, "ref_url": "Abortion", "ref_ids": ["765"], "sent_idx": 4}, {"id": 2, "start": 80, "end": 95, "ref_url": "Supreme_Court_of_the_United_States", "ref_ids": ["31737"], "sent_idx": 1}, {"id": 3, "start": 97, "end": 108, "ref_url": "Roe_v._Wade", "ref_ids": ["68493"], "sent_idx": 1}, {"id": 4, "start": 134, "end": 146, "ref_url": "Telemedicine", "ref_ids": null, "sent_idx": 3}, {"id": 5, "start": 23, "end": 51, "ref_url": "National_Abortion_Federation", "ref_ids": ["2983989"], "sent_idx": 4}, {"id": 6, "start": 28, "end": 52, "ref_url": "On_the_Issues_(magazine)", "ref_ids": ["33830156"], "sent_idx": 6}, {"id": 7, "start": 79, "end": 107, "ref_url": "Newswomen's_Club_of_New_York", "ref_ids": ["47431013"], "sent_idx": 7}]} +{"id": "17893642", "title": "Wheatcroft, Derbyshire", "sentences": ["Wheatcroft is a small hamlet in the hills of Amber Valley, near Crich, Derbyshire, England.", "Wheatcroft has no shop or church and while it once had a chapel it has been renovated into a small dwelling.", "Wheatcroft is split into two areas; Wheatcroft Mount and Lower Wheatcoft.", "Over twenty people live in permanent residence.", "Wheatcroft has been in continued existence since 1066 as it is mentioned in the Domesday Book as having \"4 houses\" 1210 as it was recorded in a same of land under the name of \"watedroft\" Further evidence to support the antiquity of Wheatcroft comes from the early 15th century as one of the larger houses is said to have dated from this period.", "Until the mid 20th century a fayre came to Wheatcroft but that has since stopped.", "One of the houses was used in the ITV drama Peak Practice.", "Of the houses three were built on land granted by the Lord of Wingfield Manor in the 17th Century and were built from stone quarried from the village quarry, as most likely were most of the others.", "In addition to the chapel, as above now renovated into a second home, the village used to possess a Village Institute in which weekly social gatherings were held until the 1950s and over the years various shops including a general store and until the 1960s a lubricant and engineering shop.", "Two of the houses have medieval cruck barns as part of their outhouses and one of the houses used to be owned by the Hopkinson Nightingale family, a branch of the Nightingale family from Lea Hurst, the childhood home of Florence Nightingale."], "mentions": [{"id": 0, "start": 45, "end": 57, "ref_url": "Amber_Valley", "ref_ids": ["227523"], "sent_idx": 0}, {"id": 1, "start": 64, "end": 69, "ref_url": "Crich", "ref_ids": ["1230354"], "sent_idx": 0}, {"id": 2, "start": 71, "end": 81, "ref_url": "Derbyshire", "ref_ids": ["71070"], "sent_idx": 0}, {"id": 3, "start": 80, "end": 93, "ref_url": "Domesday_Book", "ref_ids": ["47752"], "sent_idx": 4}, {"id": 4, "start": 187, "end": 196, "ref_url": "Lea_Hurst", "ref_ids": null, "sent_idx": 9}, {"id": 5, "start": 220, "end": 240, "ref_url": "Florence_Nightingale", "ref_ids": ["100127"], "sent_idx": 9}]} +{"id": "17893643", "title": "Bourma, Ganzourgou", "sentences": ["Bourma is a town in the Boudry Department of Ganzourgou Province in central Burkina Faso.", "The town has a population of 4,079."], "mentions": [{"id": 0, "start": 24, "end": 41, "ref_url": "Boudry_Department", "ref_ids": ["16975341"], "sent_idx": 0}, {"id": 1, "start": 45, "end": 64, "ref_url": "Ganzourgou_Province", "ref_ids": ["1055538"], "sent_idx": 0}, {"id": 2, "start": 76, "end": 88, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17893647", "title": "Christian Kit Goguen", "sentences": ["Christian Kit Goguen (born September 9, 1978) is an Acadian singer-songwriter/actor from Saint-Charles, New Brunswick.", "His work is mostly in French, but he also writes and sings in English.", "Winner of the 2003 Gala de la chanson de Caraquet for singer-songwriter and best song, the first time that the competition awarded the two top prizes to one artist.", "He has since performed as a solo artist and as part of the musical revue \"Ode à l'Acadie\".", "He was also awarded the Prix Rideau-Acadie in 2006 and the winner of the competition \"Le choix du future\" organised by Moncton's CHOY-FM.", "Christian has performed his own songs and versions of other well-known Acadian artists.", "He has performed in the United States, in Canada, Switzerland, Belgium, France, Spain, Burkina Faso.", "He regularly presents a Mi'kmaq version of \"The Gathering Song\" during his shows.", "His version of this song is part of his trademark, and shows the collaboration and friendship that has developed between the Mi'kmaq and the Acadians.", "He released his first self-titled album in 2005.", "He studied drama at the Université de Moncton, did some acting with the Théâtre l'Escaouette in Moncton and on the television show \"Samuel\", broadcast on Radio-Canada, the French CBC.", "He has been a special guest at the East Coast Music Award Show, and has also been invited by Benoît Pelletier, to perform for the Premier of Quebec Jean Charest at the Salon Rouge in the National Assembly in Quebec City.", "He is a singer on the Cirque du Soleil production Corteo."], "mentions": [{"id": 0, "start": 141, "end": 148, "ref_url": "Acadian", "ref_ids": null, "sent_idx": 8}, {"id": 1, "start": 89, "end": 117, "ref_url": "Saint-Charles,_New_Brunswick", "ref_ids": ["25195444"], "sent_idx": 0}, {"id": 2, "start": 41, "end": 49, "ref_url": "Caraquet", "ref_ids": ["30865071"], "sent_idx": 2}, {"id": 3, "start": 38, "end": 45, "ref_url": "Moncton", "ref_ids": ["19857"], "sent_idx": 10}, {"id": 4, "start": 129, "end": 136, "ref_url": "CHOY-FM", "ref_ids": ["7137593"], "sent_idx": 4}, {"id": 5, "start": 141, "end": 148, "ref_url": "Acadian", "ref_ids": null, "sent_idx": 8}, {"id": 6, "start": 24, "end": 37, "ref_url": "United_States", "ref_ids": ["3434750"], "sent_idx": 6}, {"id": 7, "start": 160, "end": 166, "ref_url": "Canada", "ref_ids": ["5042916"], "sent_idx": 10}, {"id": 8, "start": 50, "end": 61, "ref_url": "Switzerland", "ref_ids": ["26748"], "sent_idx": 6}, {"id": 9, "start": 63, "end": 70, "ref_url": "Belgium", "ref_ids": ["3343"], "sent_idx": 6}, {"id": 10, "start": 72, "end": 78, "ref_url": "France", "ref_ids": ["5843419"], "sent_idx": 6}, {"id": 11, "start": 80, "end": 85, "ref_url": "Spain", "ref_ids": ["26667"], "sent_idx": 6}, {"id": 12, "start": 87, "end": 99, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 6}, {"id": 13, "start": 125, "end": 132, "ref_url": "Mi'kmaq_language", "ref_ids": null, "sent_idx": 8}, {"id": 14, "start": 125, "end": 132, "ref_url": "Mi'kmaq_people", "ref_ids": null, "sent_idx": 8}, {"id": 15, "start": 141, "end": 149, "ref_url": "Acadians", "ref_ids": ["44515"], "sent_idx": 8}, {"id": 16, "start": 24, "end": 45, "ref_url": "Université_de_Moncton", "ref_ids": ["572302"], "sent_idx": 10}, {"id": 17, "start": 154, "end": 166, "ref_url": "Radio-Canada", "ref_ids": null, "sent_idx": 10}, {"id": 18, "start": 35, "end": 57, "ref_url": "East_Coast_Music_Award", "ref_ids": null, "sent_idx": 11}, {"id": 19, "start": 93, "end": 109, "ref_url": "Benoît_Pelletier", "ref_ids": ["7540169"], "sent_idx": 11}, {"id": 20, "start": 130, "end": 147, "ref_url": "Premier_of_Quebec", "ref_ids": ["370751"], "sent_idx": 11}, {"id": 21, "start": 148, "end": 160, "ref_url": "Jean_Charest", "ref_ids": ["211253"], "sent_idx": 11}, {"id": 22, "start": 208, "end": 219, "ref_url": "Quebec_City", "ref_ids": ["100727"], "sent_idx": 11}, {"id": 23, "start": 22, "end": 38, "ref_url": "Cirque_du_Soleil", "ref_ids": ["284379"], "sent_idx": 12}, {"id": 24, "start": 50, "end": 56, "ref_url": "Corteo", "ref_ids": ["1817916"], "sent_idx": 12}]} +{"id": "17893654", "title": "Secret Lives of Women", "sentences": ["Secret Lives of Women is a reality television series airing Tuesday nights at 10 PM Eastern Time on .", "The show probes little-known subcultures of women in the United States.", "A typical format is a series of interviews conducted with several women (and occasionally transgender women)."], "mentions": [{"id": 0, "start": 27, "end": 45, "ref_url": "Reality_television", "ref_ids": ["38539"], "sent_idx": 0}, {"id": 1, "start": 84, "end": 96, "ref_url": "Eastern_Time", "ref_ids": null, "sent_idx": 0}, {"id": 2, "start": 29, "end": 39, "ref_url": "Subculture", "ref_ids": ["150349"], "sent_idx": 1}, {"id": 3, "start": 57, "end": 70, "ref_url": "United_States", "ref_ids": ["3434750"], "sent_idx": 1}, {"id": 4, "start": 90, "end": 101, "ref_url": "Transgender", "ref_ids": ["19904525"], "sent_idx": 2}]} +{"id": "17893688", "title": "National Education Institute", "sentences": ["The National Education Institute Slovenia - NEI (, ZRSŠ) is the main public organisation in Slovenia that encourages development in education in Slovenia up to pre-university — covering all kindergartens, elementary schools, secondary schools, music schools, and boarding schools."], "mentions": [{"id": 0, "start": 33, "end": 41, "ref_url": "Slovenia", "ref_ids": ["27338"], "sent_idx": 0}]} +{"id": "17893696", "title": "Wrestling at the 1992 Summer Olympics – Men's Greco-Roman 62 kg", "sentences": ["The Men's Greco-Roman 57 kg at the 1992 Summer Olympics as part of the wrestling program were held at the Institut Nacional d'Educació Física de Catalunya from July 28 to July 30.", "The wrestlers are divided into 2 groups.", "The winner of each group decided by a double-elimination system."], "mentions": [{"id": 0, "start": 35, "end": 55, "ref_url": "1992_Summer_Olympics", "ref_ids": ["82755"], "sent_idx": 0}, {"id": 1, "start": 71, "end": 88, "ref_url": "Wrestling_at_the_1992_Summer_Olympics", "ref_ids": ["6528104"], "sent_idx": 0}, {"id": 2, "start": 106, "end": 154, "ref_url": "Institut_Nacional_d'Educació_Física_de_Catalunya", "ref_ids": ["28809435"], "sent_idx": 0}]} +{"id": "17893702", "title": "Dikomtinga", "sentences": ["Dikomtinga is a village in the Boudry Department of Ganzourgou Province in central Burkina Faso.", "The village has a population of 342."], "mentions": [{"id": 0, "start": 31, "end": 48, "ref_url": "Boudry_Department", "ref_ids": ["16975341"], "sent_idx": 0}, {"id": 1, "start": 52, "end": 71, "ref_url": "Ganzourgou_Province", "ref_ids": ["1055538"], "sent_idx": 0}, {"id": 2, "start": 83, "end": 95, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17893703", "title": "Sultan chicken", "sentences": ["The Sultan is a breed of chicken originating in Turkey, belonging to the group of crested chicken.", "Its English moniker is directly culled from the original Turkish language name of \"Serai-Tavuk\", which translates as \"fowls of the Sultan\".", "They have always been primarily ornamental, having been kept in the gardens of Ottoman sultanate.", "In the West they are bred for competitive showing as part of poultry fancy, and are generally a rare sight.", "The breed was first exported from its native country in 1854, when a Ms. Elizabeth Watts of Hampstead, London brought a small flock to Britain.", "It was seen in North America by 1867, and was recognized officially by acceptance into the American Poultry Association's Standard of Perfection in 1874.", "Sultans have a great deal of decorative plumage, including large, puffy crests, beards, long tails, and profuse foot feathering.", "Their small, V-shaped combs are almost entirely hidden under feathering.", "Sultans are also one of a minority of breeds to have five toes on each foot.", "With males weighing approximately 2.7 kilos (6 pounds) and hens 2 kilos (4 pounds), they are the smallest of the large breeds of chickens.", "They also have a bantam version.", "Sultans appear in three varieties: Black, Blue, and White, with White being the most well known.", "Hens lay small white eggs at a slow rate, and do not generally go broody.", "In temperament, Sultans are quite docile, friendly chickens, and are content at being kept in confinement.", "Most Sultan breeders take care to raise them in dry bedding in order to protect their elaborate feathering, especially on the feet.", "They may also be bullied somewhat if kept in flocks with more active breeds."], "mentions": [{"id": 0, "start": 69, "end": 74, "ref_url": "Breed", "ref_ids": ["267933"], "sent_idx": 15}, {"id": 1, "start": 51, "end": 58, "ref_url": "Chicken", "ref_ids": ["37402"], "sent_idx": 13}, {"id": 2, "start": 48, "end": 54, "ref_url": "Turkey", "ref_ids": ["11125639"], "sent_idx": 0}, {"id": 3, "start": 82, "end": 97, "ref_url": "Crested_chicken", "ref_ids": null, "sent_idx": 0}, {"id": 4, "start": 57, "end": 73, "ref_url": "Turkish_language", "ref_ids": ["29992"], "sent_idx": 1}, {"id": 5, "start": 5, "end": 11, "ref_url": "Sultan", "ref_ids": ["64647"], "sent_idx": 14}, {"id": 6, "start": 79, "end": 86, "ref_url": "Ottoman_Empire", "ref_ids": ["22278"], "sent_idx": 2}, {"id": 7, "start": 30, "end": 49, "ref_url": "Livestock_show", "ref_ids": ["1676343"], "sent_idx": 3}, {"id": 8, "start": 61, "end": 74, "ref_url": "Animal_fancy", "ref_ids": ["30873441"], "sent_idx": 3}, {"id": 9, "start": 92, "end": 101, "ref_url": "Hampstead", "ref_ids": ["94033"], "sent_idx": 4}, {"id": 10, "start": 103, "end": 109, "ref_url": "London", "ref_ids": ["17867"], "sent_idx": 4}, {"id": 11, "start": 135, "end": 142, "ref_url": "Great_Britain", "ref_ids": ["13530298"], "sent_idx": 4}, {"id": 12, "start": 91, "end": 119, "ref_url": "American_Poultry_Association", "ref_ids": ["17278528"], "sent_idx": 5}, {"id": 13, "start": 122, "end": 144, "ref_url": "Standard_of_Perfection", "ref_ids": null, "sent_idx": 5}, {"id": 14, "start": 22, "end": 27, "ref_url": "Comb_(anatomy)", "ref_ids": ["1681164"], "sent_idx": 7}, {"id": 15, "start": 17, "end": 23, "ref_url": "Bantam_(chicken)", "ref_ids": null, "sent_idx": 10}]} +{"id": "17893716", "title": "Tianbao", "sentences": ["Tianbao may refer to:"], "mentions": []} +{"id": "17893730", "title": "Wright Eclipse Fusion", "sentences": ["The Wright Eclipse Fusion is a low floor articulated single-decker bus body built on the Volvo B7LA chassis by Wrightbus.", "It was the articulated version of the Wright Eclipse, succeeding the Wright Fusion.", "Of the 88 produced, FirstGroup purchased 67 with the other 21 going to Dublin Bus.", "In October 2001, two were sent from First Hampshire & Dorset to First London's Greenford garage to operate a six month trial on route 207.", "Later FirstGroup partnered with Wrightbus in developing the Wright StreetCar, which is built on a modified version of the chassis used for the Eclipse Fusion.", "Most of the Eclipse Fusions are similar in appearance to its Wrightbus single deckers with the arched roof."], "mentions": [{"id": 0, "start": 31, "end": 40, "ref_url": "Low-floor_bus", "ref_ids": ["1206834"], "sent_idx": 0}, {"id": 1, "start": 11, "end": 22, "ref_url": "Articulated_bus", "ref_ids": ["937446"], "sent_idx": 1}, {"id": 2, "start": 53, "end": 70, "ref_url": "Single-decker_bus", "ref_ids": null, "sent_idx": 0}, {"id": 3, "start": 89, "end": 99, "ref_url": "Volvo_B7L", "ref_ids": ["1793973"], "sent_idx": 0}, {"id": 4, "start": 61, "end": 70, "ref_url": "Wrightbus", "ref_ids": ["1086084"], "sent_idx": 5}, {"id": 5, "start": 38, "end": 52, "ref_url": "Wright_Eclipse", "ref_ids": ["12517332"], "sent_idx": 1}, {"id": 6, "start": 69, "end": 82, "ref_url": "Wright_Fusion", "ref_ids": ["12009338"], "sent_idx": 1}, {"id": 7, "start": 6, "end": 16, "ref_url": "FirstGroup", "ref_ids": ["854614"], "sent_idx": 4}, {"id": 8, "start": 71, "end": 81, "ref_url": "Dublin_Bus", "ref_ids": ["497845"], "sent_idx": 2}, {"id": 9, "start": 36, "end": 60, "ref_url": "First_Hampshire_&_Dorset", "ref_ids": ["3923184"], "sent_idx": 3}, {"id": 10, "start": 64, "end": 76, "ref_url": "First_London", "ref_ids": ["2193334"], "sent_idx": 3}, {"id": 11, "start": 79, "end": 95, "ref_url": "Metroline", "ref_ids": ["2193318"], "sent_idx": 3}, {"id": 12, "start": 128, "end": 137, "ref_url": "London_Buses_route_207", "ref_ids": ["9808553"], "sent_idx": 3}, {"id": 13, "start": 60, "end": 76, "ref_url": "Wright_StreetCar", "ref_ids": ["5623903"], "sent_idx": 4}]} +{"id": "17893737", "title": "Elena Garanina", "sentences": ["Elena Anatolyevna Garanina (; born 19 October 1956) is a former ice dancer who represented the Soviet Union.", "With Igor Zavozin, she is the 1978 Nebelhorn Trophy and 1981 Winter Universiade champion.", "They never made it to the World Figure Skating Championships due to the depth of the Soviet dance field.", "After turning pro, the duo performed in Jayne Torvill and Christopher Dean's ice shows.", "Garanina currently works as a coach.", "Her students have included:\nGaranina was formerly married to Igor Zavozin.", "Their son, ice dancer Maxim Zavozin, was born on 2 March 1985 in Moscow.", "Garanina's second husband, Valery Spiridonov, also competed in ice dancing.", "She gave birth to their son, Anton Spiridonov, on 5 August 1998 in the United States."], "mentions": [{"id": 0, "start": 11, "end": 21, "ref_url": "Ice_dancer", "ref_ids": null, "sent_idx": 6}, {"id": 1, "start": 95, "end": 107, "ref_url": "Soviet_Union", "ref_ids": ["26779"], "sent_idx": 0}, {"id": 2, "start": 61, "end": 73, "ref_url": "Igor_Zavozin", "ref_ids": ["12594900"], "sent_idx": 5}, {"id": 3, "start": 35, "end": 51, "ref_url": "Nebelhorn_Trophy", "ref_ids": ["7211761"], "sent_idx": 1}, {"id": 4, "start": 61, "end": 79, "ref_url": "Winter_Universiade", "ref_ids": null, "sent_idx": 1}, {"id": 5, "start": 26, "end": 60, "ref_url": "World_Figure_Skating_Championships", "ref_ids": ["229356"], "sent_idx": 2}, {"id": 6, "start": 40, "end": 53, "ref_url": "Jayne_Torvill", "ref_ids": ["1503105"], "sent_idx": 3}, {"id": 7, "start": 58, "end": 74, "ref_url": "Christopher_Dean", "ref_ids": ["1503115"], "sent_idx": 3}, {"id": 8, "start": 22, "end": 35, "ref_url": "Maxim_Zavozin", "ref_ids": ["3905362"], "sent_idx": 6}, {"id": 9, "start": 65, "end": 71, "ref_url": "Moscow", "ref_ids": ["19004"], "sent_idx": 6}, {"id": 10, "start": 27, "end": 44, "ref_url": "Valery_Spiridonov", "ref_ids": ["46398441"], "sent_idx": 7}]} +{"id": "17893741", "title": "Douré, Boudry", "sentences": ["Douré is a village in the Boudry Department of Ganzourgou Province in central Burkina Faso.", "The village has a population of 596."], "mentions": [{"id": 0, "start": 26, "end": 43, "ref_url": "Boudry_Department", "ref_ids": ["16975341"], "sent_idx": 0}, {"id": 1, "start": 47, "end": 66, "ref_url": "Ganzourgou_Province", "ref_ids": ["1055538"], "sent_idx": 0}, {"id": 2, "start": 78, "end": 90, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17893773", "title": "Foulgo, Boudry", "sentences": ["Foulgo is a village in the Boudry Department of Ganzourgou Province in central Burkina Faso.", "The village has a population of 510."], "mentions": [{"id": 0, "start": 27, "end": 44, "ref_url": "Boudry_Department", "ref_ids": ["16975341"], "sent_idx": 0}, {"id": 1, "start": 48, "end": 67, "ref_url": "Ganzourgou_Province", "ref_ids": ["1055538"], "sent_idx": 0}, {"id": 2, "start": 79, "end": 91, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17893783", "title": "Steven Reid Williams", "sentences": ["Steven Reid Williams (born March 1976) is an English pianist and singer-songwriter born in Bristol, England.", "Steve has played keyboards for several UK artists including Jamelia, Lulu, The Overtones, Heather Small, Tulisa, Nadine Coyle, Nell Bryden, Sarah Harding, Jay James, West End star Lee Mead, Kirsty Bertarelli and Ed Drewett, vocalist on Professor Green's smash hit, 'Need you tonight'.", "Alongside Ed, Steve supported Sir Elton John on some of the UK dates of his 2011 world tour.", "Steve is also one of the four 'bandits' from Jess and the Bandits; a US meets UK country music collaboration with Texan singer, Jess Clemmons.", "Steve's song \"Need Your Love\", written for the John Fenlon album \"Rip it up\", was a runner-up in the blues category of the UK Songwriting Competition.", "Steve's album \"Corners\" was released in December 2007, the title track from which was a finalist in Radio 2's \"Sold on Song\" competition.", "The album also features a collaboration with UK blues singer Beth Rowley on \"Too Much Too Late\", and also features work with Ben Castle, Gary Alesbrook on trumpet (Scissor Sisters, Kasabian), Andy Kinsman on sax (Kasabian, Noel Gallagher) and Cliff Moore, brother of guitar legend Gary Moore.", "In 2008 Steve won the Adult Contemporary Category in the UK Song Writing Contest with 'The Better Part of Me' from his 2007 release 'Corners'.", "Following the death of best friend and musician Charlie Derrick in 2003, Steve has a played a key role in the Charlie Derrick Bursary charity as bursar and from 2009, trustee.", "Steve is also closely affiliated with 'Footprint', the section of the charity dedicated to fundraising through musical events."], "mentions": [{"id": 0, "start": 91, "end": 98, "ref_url": "Bristol", "ref_ids": ["36741"], "sent_idx": 0}, {"id": 1, "start": 45, "end": 65, "ref_url": "Jess_and_the_Bandits", "ref_ids": ["49807758"], "sent_idx": 3}, {"id": 2, "start": 61, "end": 72, "ref_url": "Beth_Rowley", "ref_ids": ["3908555"], "sent_idx": 6}, {"id": 3, "start": 125, "end": 135, "ref_url": "Ben_Castle", "ref_ids": ["14569858"], "sent_idx": 6}, {"id": 4, "start": 137, "end": 151, "ref_url": "Gary_Alesbrook", "ref_ids": ["46314296"], "sent_idx": 6}, {"id": 5, "start": 164, "end": 179, "ref_url": "Scissor_Sisters", "ref_ids": ["730322"], "sent_idx": 6}, {"id": 6, "start": 181, "end": 189, "ref_url": "Kasabian", "ref_ids": ["1176941"], "sent_idx": 6}, {"id": 7, "start": 192, "end": 204, "ref_url": "Andy_Kinsman", "ref_ids": null, "sent_idx": 6}, {"id": 8, "start": 181, "end": 189, "ref_url": "Kasabian", "ref_ids": ["1176941"], "sent_idx": 6}, {"id": 9, "start": 223, "end": 237, "ref_url": "Noel_Gallagher", "ref_ids": ["194961"], "sent_idx": 6}, {"id": 10, "start": 243, "end": 254, "ref_url": "Cliff_Moore", "ref_ids": null, "sent_idx": 6}, {"id": 11, "start": 281, "end": 291, "ref_url": "Gary_Moore", "ref_ids": ["280636"], "sent_idx": 6}, {"id": 12, "start": 48, "end": 63, "ref_url": "Charlie_Derrick", "ref_ids": null, "sent_idx": 8}]} +{"id": "17893813", "title": "Russ Young", "sentences": ["Russell Charles Young (September 15, 1902 – May 13, 1984) was a professional baseball catcher.", "He played part of one season in Major League Baseball for the St. Louis Browns in 1931.", "He was a switch hitter and threw right-handed.", "He was 6'0\" and weighed 175 lbs.", "Young also played 4 games at fullback for the Dayton Triangles of the National Football League in 1925.", "Young had an extensive career in minor league baseball, spanning eighteen seasons from 1923-40.", "He played most of his career with the minor league Milwaukee Brewers, for whom he played in all but two seasons from 1923-34."], "mentions": [{"id": 0, "start": 86, "end": 93, "ref_url": "Catcher", "ref_ids": ["436699"], "sent_idx": 0}, {"id": 1, "start": 32, "end": 53, "ref_url": "Major_League_Baseball", "ref_ids": ["38776"], "sent_idx": 1}, {"id": 2, "start": 62, "end": 78, "ref_url": "St._Louis_Browns", "ref_ids": null, "sent_idx": 1}, {"id": 3, "start": 9, "end": 22, "ref_url": "Switch_hitter", "ref_ids": ["423515"], "sent_idx": 2}, {"id": 4, "start": 46, "end": 62, "ref_url": "Dayton_Triangles", "ref_ids": ["239744"], "sent_idx": 4}, {"id": 5, "start": 70, "end": 94, "ref_url": "National_Football_League", "ref_ids": ["21211"], "sent_idx": 4}, {"id": 6, "start": 33, "end": 54, "ref_url": "Minor_league_baseball", "ref_ids": null, "sent_idx": 5}, {"id": 7, "start": 51, "end": 68, "ref_url": "Milwaukee_Brewers_(minor_league_baseball_team)", "ref_ids": null, "sent_idx": 6}]} +{"id": "17893821", "title": "Gondré", "sentences": ["Gondré is a town in the Boudry Department of Ganzourgou Province in central Burkina Faso.", "The town has a population of 2,146."], "mentions": [{"id": 0, "start": 24, "end": 41, "ref_url": "Boudry_Department", "ref_ids": ["16975341"], "sent_idx": 0}, {"id": 1, "start": 45, "end": 64, "ref_url": "Ganzourgou_Province", "ref_ids": ["1055538"], "sent_idx": 0}, {"id": 2, "start": 76, "end": 88, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17893831", "title": "Albert Irvin", "sentences": ["Albert Henry Thomas Irvin (21 August 1922 – 26 March 2015) was an English expressionist abstract artist.", "Born in London he was evacuated from there during World War II, to study at the Northampton School of Art between 1940 and 1941, before being conscripted into the Royal Air Force as a navigator.", "When the war was over, he resumed his course at Goldsmiths College from 1946 to 1950, where he would later go on to teach between 1962 and 1983 where he met and became good friends with Basil Beattie, Harry Thubron amongst others.", "He was elected to The London Group in 1955.", "He worked in studios in the East End of London from 1970 onwards.", "In the early 1950s Bert met and was hugely influenced by many of the \"St Ives\" artists including Peter Lanyon, Roger Hilton, Terry Frost and Sandra Blow.", "Irvin won a major Arts Council Award in 1975 and a Gulbenkian Award for printmaking in 1983.", "His work is widely exhibited both in the UK and abroad, in such places as Arts Council of Great Britain, Birmingham City Art Gallery, the Chase Manhattan Bank, the Contemporary Art Society, Manchester City Art Gallery, Whitworth Gallery Manchester, Leeds City Gallery Tate Britain, the Victoria and Albert Museum Oxford University, Cambridge University and Warwick University Arts Centre.", "His influences included Walter Sickert, Henri Matisse, JMW Turner, Jack Smith and Edward Middleditch.", "Irvin was appointed Officer of the Order of the British Empire (OBE) in the 2013 Birthday Honours for services to the visual arts.", "Irvin married Beatrice Olive Nicolson in August 1947."], "mentions": [{"id": 0, "start": 74, "end": 96, "ref_url": "Abstract_expressionism", "ref_ids": ["147847"], "sent_idx": 0}, {"id": 1, "start": 80, "end": 105, "ref_url": "Northampton_School_of_Art", "ref_ids": null, "sent_idx": 1}, {"id": 2, "start": 163, "end": 178, "ref_url": "Royal_Air_Force", "ref_ids": ["25679"], "sent_idx": 1}, {"id": 3, "start": 48, "end": 66, "ref_url": "Goldsmiths_College", "ref_ids": null, "sent_idx": 2}, {"id": 4, "start": 74, "end": 103, "ref_url": "Arts_Council_of_Great_Britain", "ref_ids": ["428408"], "sent_idx": 7}, {"id": 5, "start": 138, "end": 158, "ref_url": "Chase_Manhattan_Bank", "ref_ids": null, "sent_idx": 7}, {"id": 6, "start": 190, "end": 217, "ref_url": "Manchester_City_Art_Gallery", "ref_ids": null, "sent_idx": 7}, {"id": 7, "start": 268, "end": 280, "ref_url": "Tate_Britain", "ref_ids": ["18449452"], "sent_idx": 7}, {"id": 8, "start": 286, "end": 312, "ref_url": "Victoria_and_Albert_Museum", "ref_ids": ["97275"], "sent_idx": 7}, {"id": 9, "start": 24, "end": 38, "ref_url": "Walter_Sickert", "ref_ids": ["160433"], "sent_idx": 8}, {"id": 10, "start": 40, "end": 53, "ref_url": "Henri_Matisse", "ref_ids": ["60203"], "sent_idx": 8}, {"id": 11, "start": 55, "end": 65, "ref_url": "JMW_Turner", "ref_ids": null, "sent_idx": 8}, {"id": 12, "start": 67, "end": 77, "ref_url": "Jack_Smith_(artist)", "ref_ids": ["18545116"], "sent_idx": 8}, {"id": 13, "start": 20, "end": 62, "ref_url": "Officer_of_the_Order_of_the_British_Empire", "ref_ids": null, "sent_idx": 9}]} +{"id": "17893837", "title": "Hilary of Galeata", "sentences": ["Saint Hilary of Galeata (Italian: \"Sant'Ilaro\" or \"Sant'Ellero\"; 476 - May 15, 558 AD) is venerated as a saint in the Roman Catholic and Eastern Orthodox churches.", "His feast day is May 15.", "According to tradition, he was born in Tuscia in 476, and he decided to dedicate himself to the life of a hermit at the age of twelve.", "He left his home, and traveled across the Apennines towards Emilia and chose a spot, according to tradition, pointed out to him by an angel, on a mountain in the valley of the Bidente near the Ronco River.", "According to tradition, at the age of twenty, he freed a local nobleman, Olibrius, from a demon.", "In gratitude, Olibrius had his entire family christened by Hilary, and donated to the saint lands and money.", "In addition, two of Olibrius’ sons joined Hilary in the religious life.", "Around 496, then, this became the nucleus of the monastery of Galeata, later called Sant'Ellero di Galeata.", "The foundation attracted new recruits, and the monastery followed a version of the rule of Saint Pachomius.", "Numerous miracles are attributed to Hilary.", "Hilary transformed a grape into a serpent in order to teach a lazy monk named Glicerio a lesson.", "Hilary also managed to impress Theodoric, who had originally been harassing the monks and who had been building a palace near Galeata, into donating land and goods."], "mentions": [{"id": 0, "start": 86, "end": 91, "ref_url": "Saint", "ref_ids": ["28436"], "sent_idx": 5}, {"id": 1, "start": 118, "end": 132, "ref_url": "Roman_Catholic", "ref_ids": null, "sent_idx": 0}, {"id": 2, "start": 137, "end": 153, "ref_url": "Eastern_Orthodox_Church", "ref_ids": ["10186"], "sent_idx": 0}, {"id": 3, "start": 39, "end": 45, "ref_url": "Tuscia", "ref_ids": ["9434510"], "sent_idx": 2}, {"id": 4, "start": 106, "end": 112, "ref_url": "Hermit", "ref_ids": ["271054"], "sent_idx": 2}, {"id": 5, "start": 42, "end": 51, "ref_url": "Apennines", "ref_ids": null, "sent_idx": 3}, {"id": 6, "start": 60, "end": 66, "ref_url": "Emilia_(region_of_Italy)", "ref_ids": ["12647787"], "sent_idx": 3}, {"id": 7, "start": 134, "end": 139, "ref_url": "Angel", "ref_ids": ["19404136"], "sent_idx": 3}, {"id": 8, "start": 176, "end": 183, "ref_url": "Bidente", "ref_ids": null, "sent_idx": 3}, {"id": 9, "start": 193, "end": 204, "ref_url": "Ronco_River", "ref_ids": null, "sent_idx": 3}, {"id": 10, "start": 126, "end": 133, "ref_url": "Galeata", "ref_ids": ["6722621"], "sent_idx": 11}, {"id": 11, "start": 91, "end": 106, "ref_url": "Saint_Pachomius", "ref_ids": null, "sent_idx": 8}, {"id": 12, "start": 31, "end": 40, "ref_url": "Theodoric_the_Great", "ref_ids": ["31222"], "sent_idx": 11}]} +{"id": "17893850", "title": "Gouingo", "sentences": ["Gouingo is a town in the Boudry Department of Ganzourgou Province in central Burkina Faso.", "The town has a population of 2,052."], "mentions": [{"id": 0, "start": 25, "end": 42, "ref_url": "Boudry_Department", "ref_ids": ["16975341"], "sent_idx": 0}, {"id": 1, "start": 46, "end": 65, "ref_url": "Ganzourgou_Province", "ref_ids": ["1055538"], "sent_idx": 0}, {"id": 2, "start": 77, "end": 89, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17893851", "title": "2001–02 United States network television schedule (daytime)", "sentences": ["All the 5 Commercial Networks airs the Daytime Monday–Friday Schedules for each calendar season beginning in September 2001.", "All Times are in Eastern; affiliate schedules may differ.", "Talk shows are highlighted in yellow, local programming is white, reruns of prime-time programming are orange, game shows are pink, soap operas are chartreuse, news programs are gold and all others are light blue.", "New series are highlighted in bold.", "Syndicated programs are listed in italics.", "The September 11 attacks hindered the ability to start airing daytime programming in a timely manner."], "mentions": [{"id": 0, "start": 4, "end": 24, "ref_url": "September_11_attacks", "ref_ids": ["5058690"], "sent_idx": 5}]} +{"id": "17893852", "title": "Humor research", "sentences": ["Humor research (also humor studies) is a multifaceted field which enters the domains of linguistics, history, and literature.", "Research in humor has been done to understand the psychological and physiological effects, both positive and negative, on a person or groups of people.", "Research in humor has revealed many different theories of humor and many different kinds of humor including their functions and effects personally, in relationships, and in society."], "mentions": [{"id": 0, "start": 88, "end": 99, "ref_url": "Linguistics", "ref_ids": ["22760983"], "sent_idx": 0}, {"id": 1, "start": 101, "end": 108, "ref_url": "History", "ref_ids": ["10772350"], "sent_idx": 0}, {"id": 2, "start": 114, "end": 124, "ref_url": "Literature", "ref_ids": ["18963870"], "sent_idx": 0}, {"id": 3, "start": 46, "end": 63, "ref_url": "Theories_of_humor", "ref_ids": ["17909855"], "sent_idx": 2}]} +{"id": "17893856", "title": "Frederick Simpson (boxer)", "sentences": ["Frederick John \"Freddie\" Simpson (June 18, 1916 – 1975) was a British boxer who competed in the 1936 Summer Olympics.", "In 1936 he was eliminated in the first round of the lightweight class after losing his fight to Andy Scrivani."], "mentions": [{"id": 0, "start": 62, "end": 69, "ref_url": "Great_Britain", "ref_ids": ["13530298"], "sent_idx": 0}, {"id": 1, "start": 70, "end": 75, "ref_url": "Boxing", "ref_ids": ["4243"], "sent_idx": 0}, {"id": 2, "start": 96, "end": 116, "ref_url": "1936_Summer_Olympics", "ref_ids": ["39721"], "sent_idx": 0}, {"id": 3, "start": 52, "end": 69, "ref_url": "Boxing_at_the_1936_Summer_Olympics_-_Men's_lightweight", "ref_ids": null, "sent_idx": 1}, {"id": 4, "start": 96, "end": 109, "ref_url": "Andy_Scrivani", "ref_ids": ["18413523"], "sent_idx": 1}]} +{"id": "17893862", "title": "Lucius Albinius", "sentences": ["Lucius Albinius is the name of at least two people of ancient Rome:"], "mentions": [{"id": 0, "start": 54, "end": 66, "ref_url": "Ancient_Rome", "ref_ids": ["521555"], "sent_idx": 0}]} +{"id": "17893864", "title": "Drug policy in Laos", "sentences": ["In 1994, Laos was the world's third largest producer of opium, primarily in the northern provinces.", "Narcotics trafficking in Laos is difficult to control because of the remoteness of many border areas, their attendant lack of communications, and the scarcity of resources, all of which make stationing officials at many of the border crossings difficult.", "Several counternarcotics policy initiatives have been undertaken."], "mentions": [{"id": 0, "start": 25, "end": 29, "ref_url": "Laos", "ref_ids": ["17752"], "sent_idx": 1}, {"id": 1, "start": 56, "end": 61, "ref_url": "Opium", "ref_ids": ["22713"], "sent_idx": 0}, {"id": 2, "start": 0, "end": 21, "ref_url": "Narcotics_trafficking", "ref_ids": null, "sent_idx": 1}]} +{"id": "17893882", "title": "Gounghin, Ganzourgou", "sentences": ["Gounghin is a village in the Boudry Department of Ganzourgou Province in central Burkina Faso.", "The village has a population of 917."], "mentions": [{"id": 0, "start": 29, "end": 46, "ref_url": "Boudry_Department", "ref_ids": ["16975341"], "sent_idx": 0}, {"id": 1, "start": 50, "end": 69, "ref_url": "Ganzourgou_Province", "ref_ids": ["1055538"], "sent_idx": 0}, {"id": 2, "start": 81, "end": 93, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17893904", "title": "Ibogo, Ganzourgou", "sentences": ["Ibogo is a village in the Boudry Department of Ganzourgou Province in central Burkina Faso.", "The village has a population of 386."], "mentions": [{"id": 0, "start": 26, "end": 43, "ref_url": "Boudry_Department", "ref_ids": ["16975341"], "sent_idx": 0}, {"id": 1, "start": 47, "end": 66, "ref_url": "Ganzourgou_Province", "ref_ids": ["1055538"], "sent_idx": 0}, {"id": 2, "start": 78, "end": 90, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17893924", "title": "Koankin", "sentences": ["Koankin is a town in the Boudry Department of Ganzourgou Province in central Burkina Faso.", "The town has a population of 1,656."], "mentions": [{"id": 0, "start": 25, "end": 42, "ref_url": "Boudry_Department", "ref_ids": ["16975341"], "sent_idx": 0}, {"id": 1, "start": 46, "end": 65, "ref_url": "Ganzourgou_Province", "ref_ids": ["1055538"], "sent_idx": 0}, {"id": 2, "start": 77, "end": 89, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17893942", "title": "São João da Lagoa", "sentences": ["São João da Lagoa is a municipality in the north of the Brazilian state of Minas Gerais.", "As of 2007 the population was 4,729 in a total area of 990 km².", "It became a municipality in 1995."], "mentions": [{"id": 0, "start": 75, "end": 87, "ref_url": "Minas_Gerais", "ref_ids": ["222651"], "sent_idx": 0}]} +{"id": "17893965", "title": "List of European number-one hits of 1987", "sentences": ["This is a list of the European \"Music & Media\" magazine's European Hot 100 Singles and European Top 100 Albums number-ones of 1987."], "mentions": [{"id": 0, "start": 58, "end": 82, "ref_url": "Eurochart_Hot_100_Singles", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 87, "end": 110, "ref_url": "European_Top_100_Albums", "ref_ids": ["12965196"], "sent_idx": 0}]} +{"id": "17893971", "title": "Surgical sieve", "sentences": ["The surgical sieve is a thought process in medicine.", "It is a typical example of how to organise a structured examination answer for medical students and physicians when they are challenged with a question.", "It is also a way of constructing answers to questions from patients and their relatives in a logical manner, and structuring articles and reference texts in medicine.", "Some textbooks put emphasis on using the surgical sieve as a basic structure of diagnosis and management of illnesses."], "mentions": [{"id": 0, "start": 157, "end": 165, "ref_url": "Medicine", "ref_ids": ["18957"], "sent_idx": 2}, {"id": 1, "start": 79, "end": 95, "ref_url": "Medical_students", "ref_ids": null, "sent_idx": 1}, {"id": 2, "start": 100, "end": 109, "ref_url": "Physician", "ref_ids": ["23315"], "sent_idx": 1}, {"id": 3, "start": 59, "end": 67, "ref_url": "Patients", "ref_ids": null, "sent_idx": 2}, {"id": 4, "start": 80, "end": 89, "ref_url": "Diagnosis", "ref_ids": ["18507525"], "sent_idx": 3}]} +{"id": "17893977", "title": "Herbert Zipper", "sentences": ["Herbert Zipper (April 24, 1904 in Vienna, Austria – April 21, 1997 in Santa Monica, California) was an internationally renowned composer, conductor, and arts activist.", "As an inmate at Dachau concentration camp in the late 1930s, he arranged to have crude musical instruments constructed out of stolen material, and formed a small secret orchestra which performed on Sunday afternoons for the other inmates.", "Together with a friend, he composed the \"Dachau Lied\" (\"Dachau Song\"), which was learned by the other prisoners.", "Released in 1939, he accepted an invitation to conduct the Manila Symphony Orchestra.", "Jailed for four months by the Japanese during their occupation of the Philippines, after his release, he worked secretly for the Allies, transmitting shipping information by radio.", "After the war, he emigrated to the United States in 1946, where he conducted the Brooklyn Symphony Orchestra and promoted music education."], "mentions": [{"id": 0, "start": 34, "end": 49, "ref_url": "Vienna,_Austria", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 70, "end": 94, "ref_url": "Santa_Monica,_California", "ref_ids": ["28208"], "sent_idx": 0}, {"id": 2, "start": 16, "end": 41, "ref_url": "Dachau_concentration_camp", "ref_ids": ["355852"], "sent_idx": 1}, {"id": 3, "start": 59, "end": 84, "ref_url": "Manila_Symphony_Orchestra", "ref_ids": ["13736396"], "sent_idx": 3}, {"id": 4, "start": 52, "end": 81, "ref_url": "Japanese_occupation_of_the_Philippines", "ref_ids": ["11928419"], "sent_idx": 4}, {"id": 5, "start": 81, "end": 108, "ref_url": "Brooklyn_Symphony_Orchestra", "ref_ids": ["17364839"], "sent_idx": 5}]} +{"id": "17894004", "title": "1999 NCAA Division I-A football rankings", "sentences": ["Two human polls and one formulaic ranking make up the 1999 NCAA Division I-A football rankings.", "Unlike most sports, college football's governing body, the National Collegiate Athletic Association (NCAA), does not bestow a National Championship title for Division I-A football.", "That title is primarily bestowed by different polling agencies.", "There are several polls that currently exist.", "The main weekly polls are the AP Poll and Coaches Poll.", "About halfway through the season the Bowl Championship Series (BCS) Standings are released."], "mentions": [{"id": 0, "start": 24, "end": 31, "ref_url": "Formula", "ref_ids": ["164040"], "sent_idx": 0}, {"id": 1, "start": 54, "end": 58, "ref_url": "1999_NCAA_Division_I-A_football_season", "ref_ids": ["4222646"], "sent_idx": 0}, {"id": 2, "start": 59, "end": 76, "ref_url": "NCAA_Division_I-A", "ref_ids": null, "sent_idx": 0}, {"id": 3, "start": 12, "end": 17, "ref_url": "Sport", "ref_ids": ["25778403"], "sent_idx": 1}, {"id": 4, "start": 20, "end": 36, "ref_url": "College_football", "ref_ids": ["6771"], "sent_idx": 1}, {"id": 5, "start": 59, "end": 99, "ref_url": "National_Collegiate_Athletic_Association", "ref_ids": ["60706"], "sent_idx": 1}, {"id": 6, "start": 126, "end": 147, "ref_url": "NCAA_Division_I-A_national_football_championship", "ref_ids": null, "sent_idx": 1}, {"id": 7, "start": 158, "end": 168, "ref_url": "Division_I_(NCAA)", "ref_ids": null, "sent_idx": 1}, {"id": 8, "start": 30, "end": 37, "ref_url": "AP_Poll", "ref_ids": ["2976437"], "sent_idx": 4}, {"id": 9, "start": 42, "end": 54, "ref_url": "Coaches_Poll", "ref_ids": ["6141295"], "sent_idx": 4}, {"id": 10, "start": 37, "end": 61, "ref_url": "Bowl_Championship_Series", "ref_ids": ["391478"], "sent_idx": 5}]} +{"id": "17894022", "title": "Douré", "sentences": ["Douré may refer to several places in Burkina Faso:"], "mentions": [{"id": 0, "start": 37, "end": 49, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17894029", "title": "Archéophone", "sentences": ["The Archéophone is a modern, electric version of the phonographs and ediphones from the 19th and early 20th century.", "It is specifically designed to transfer phonograph cylinders and other cylinder formats to modern recording media.", "Designed in France by Henri Chamoux, the machine is used to transfer and preserve recordings at The Library of Congress, the Bibliothèque Nationale de France, Edison National Historic Site, UC Santa Barbara, University of North Carolina, University College Dublin, the Canadian Museum of Civilization and many other libraries and archives.", "Weighing almost 25 kg and costing over US $10,000, the Archéophone is a specialist's tool and not available to the general public.", "However, CDs with transferred cylinder recordings have been made available by various record labels and organizations."], "mentions": [{"id": 0, "start": 40, "end": 50, "ref_url": "Phonograph", "ref_ids": ["24471"], "sent_idx": 1}, {"id": 1, "start": 69, "end": 77, "ref_url": "Ediphone", "ref_ids": null, "sent_idx": 0}, {"id": 2, "start": 40, "end": 60, "ref_url": "Phonograph_cylinders", "ref_ids": null, "sent_idx": 1}, {"id": 3, "start": 96, "end": 119, "ref_url": "The_Library_of_Congress", "ref_ids": null, "sent_idx": 2}, {"id": 4, "start": 125, "end": 157, "ref_url": "Bibliothèque_Nationale_de_France", "ref_ids": null, "sent_idx": 2}, {"id": 5, "start": 159, "end": 188, "ref_url": "Edison_National_Historic_Site", "ref_ids": null, "sent_idx": 2}, {"id": 6, "start": 190, "end": 206, "ref_url": "University_of_California,_Santa_Barbara", "ref_ids": ["211917"], "sent_idx": 2}, {"id": 7, "start": 208, "end": 236, "ref_url": "University_of_North_Carolina", "ref_ids": ["77940"], "sent_idx": 2}, {"id": 8, "start": 238, "end": 263, "ref_url": "University_College_Dublin", "ref_ids": ["41537643"], "sent_idx": 2}, {"id": 9, "start": 269, "end": 300, "ref_url": "Canadian_Museum_of_Civilization", "ref_ids": null, "sent_idx": 2}]} +{"id": "17894032", "title": "Let the Games Begin", "sentences": ["Let the Games Begin may refer to:"], "mentions": []} +{"id": "17894045", "title": "Mangbutu language", "sentences": ["Mangbutu is a Central Sudanic language of northeastern Congo.", "It, or its speakers, are also known as \"Mangu-Ngutu, Mombuttu, Wambutu.\"", "The 1,200 Andinai are separated from other Mangbutu speakers by Lese; they speak a distinct dialect, as do the Andali tribe (Angwe dialect)."], "mentions": [{"id": 0, "start": 14, "end": 38, "ref_url": "Central_Sudanic_languages", "ref_ids": ["2078043"], "sent_idx": 0}, {"id": 1, "start": 55, "end": 60, "ref_url": "Democratic_Republic_of_the_Congo", "ref_ids": ["76762"], "sent_idx": 0}, {"id": 2, "start": 64, "end": 68, "ref_url": "Lese_language", "ref_ids": ["17894083"], "sent_idx": 2}]} +{"id": "17894051", "title": "Mangbetu language", "sentences": ["Mangbetu, or \"Nemangbetu,\" is one of the most populous of the Central Sudanic languages.", "It is spoken by the Mangbetu people of northeastern Congo.", "It, or its speakers, are also known as \"Amangbetu, Kingbetu, Mambetto.\"", "The most populous dialect, and the one most widely understood, is called Medje.", "Others are Aberu (Nabulu), Makere, Malele, Popoi (Mapopoi).", "The most divergent is Lombi; \"Ethnologue\" treats it as a distinct language.", "About half of the population speaks Bangala, a trade language similar to Lingala, and in southern areas some speak Swahili.", "The Mangbetu live in association with the Asua Pygmies, and their languages are closely related."], "mentions": [{"id": 0, "start": 62, "end": 87, "ref_url": "Central_Sudanic_languages", "ref_ids": ["2078043"], "sent_idx": 0}, {"id": 1, "start": 20, "end": 35, "ref_url": "Mangbetu_people", "ref_ids": ["1819851"], "sent_idx": 1}, {"id": 2, "start": 36, "end": 43, "ref_url": "Bangala_language", "ref_ids": ["8669349"], "sent_idx": 6}, {"id": 3, "start": 47, "end": 61, "ref_url": "Trade_language", "ref_ids": null, "sent_idx": 6}, {"id": 4, "start": 73, "end": 80, "ref_url": "Lingala_language", "ref_ids": null, "sent_idx": 6}, {"id": 5, "start": 115, "end": 122, "ref_url": "Swahili_language", "ref_ids": ["28450"], "sent_idx": 6}, {"id": 6, "start": 42, "end": 46, "ref_url": "Asua_people", "ref_ids": null, "sent_idx": 7}, {"id": 7, "start": 60, "end": 75, "ref_url": "Asoa_language", "ref_ids": ["27605406"], "sent_idx": 7}]} +{"id": "17894058", "title": "2007 Czech Open", "sentences": ["The 2007 Czech Open was the fifteenth edition of the International Floorball tournament.", "It was held in 2007 in Prague, Czech Republic.", "It was won by SalibandySeura Viikingit (SSV Helsinki), ending a 6-year 'streak' where a Swedish team has won the tournament.", "No bronze medal match or placement matches were played."], "mentions": [{"id": 0, "start": 67, "end": 76, "ref_url": "Floorball", "ref_ids": ["11247"], "sent_idx": 0}, {"id": 1, "start": 14, "end": 38, "ref_url": "SalibandySeura_Viikingit", "ref_ids": null, "sent_idx": 2}]} +{"id": "17894083", "title": "Lese language", "sentences": ["Lese is a Central Sudanic language of northeastern Congo-Kinshasa, as well as a name for the people who speak this language.", "The Lese people, live in association with the Efé Pygmies and share their language, which is occasionally known as Lissi or Efe.", "Although Efe is given a separate ISO code, Bahuchet (2006) notes that it is not even a distinct dialect, though there is dialectical variation in the language of the Lese (Dese, Karo)."], "mentions": [{"id": 0, "start": 10, "end": 34, "ref_url": "Central_Sudanic_languages", "ref_ids": ["2078043"], "sent_idx": 0}, {"id": 1, "start": 51, "end": 65, "ref_url": "Democratic_Republic_of_Congo", "ref_ids": null, "sent_idx": 0}, {"id": 2, "start": 17, "end": 41, "ref_url": "Efé_people", "ref_ids": ["1872289"], "sent_idx": 1}, {"id": 3, "start": 46, "end": 57, "ref_url": "Efe_people", "ref_ids": null, "sent_idx": 1}]} +{"id": "17894116", "title": "Quiçama National Park", "sentences": ["Quiçama National Park, also known as Kissama National Park (Portuguese: Parque Nacional do Quiçama or Parque Nacional da Quissama), is a national park in northwestern Angola.", "It is the only functioning national park in all of Angola, with the others being in disrepair due to the Angolan Civil War.", "The park is approximately 70 km from Luanda, the Angolan capital.", "The park covers 3 million acres (12,000 km²), more than twice the size of the U.S. state of Rhode Island.", "The Portuguese name \"Quiçama\" is spelled in English and other languages as \"Kissama, Kisama\" or \"Quicama\".", "The spelling \"Kissama\" in English is the closest to the Portuguese phonetic."], "mentions": [{"id": 0, "start": 56, "end": 66, "ref_url": "Portuguese_language", "ref_ids": ["23915"], "sent_idx": 5}, {"id": 1, "start": 27, "end": 40, "ref_url": "National_park", "ref_ids": ["21818"], "sent_idx": 1}, {"id": 2, "start": 49, "end": 55, "ref_url": "Angola", "ref_ids": ["701"], "sent_idx": 2}, {"id": 3, "start": 105, "end": 122, "ref_url": "Angolan_Civil_War", "ref_ids": ["2057171"], "sent_idx": 1}, {"id": 4, "start": 37, "end": 43, "ref_url": "Luanda", "ref_ids": ["18402"], "sent_idx": 2}, {"id": 5, "start": 78, "end": 82, "ref_url": "United_States", "ref_ids": ["3434750"], "sent_idx": 3}, {"id": 6, "start": 92, "end": 104, "ref_url": "Rhode_Island", "ref_ids": ["25410"], "sent_idx": 3}, {"id": 7, "start": 67, "end": 75, "ref_url": "Phonetics", "ref_ids": ["23194"], "sent_idx": 5}]} +{"id": "17894123", "title": "Extinction cross", "sentences": ["The extinction cross is an optical phenomenon that is seen when trying to extinguish a laser beam or non-planar white light using crossed polarizers.", "Ideally, crossed (90° rotated) polarizers block all light, since light which is polarized along the polarization axis of the first polarizer is perpendicular to the polarization axis of the second.", "When the beam is not perfectly collimated, however, a characteristic fringing pattern is produced."], "mentions": [{"id": 0, "start": 27, "end": 45, "ref_url": "Optical_phenomenon", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 87, "end": 97, "ref_url": "Laser_beam", "ref_ids": null, "sent_idx": 0}, {"id": 2, "start": 31, "end": 40, "ref_url": "Polarizer", "ref_ids": ["2722105"], "sent_idx": 1}, {"id": 3, "start": 31, "end": 41, "ref_url": "Collimated_light", "ref_ids": null, "sent_idx": 2}]} +{"id": "17894125", "title": "Paul Buhle", "sentences": ["Paul Merlyn Buhle (born September 27, 1944) is a (retired) Senior Lecturer at Brown University, author or editor of 35 volumes including histories of radicalism in the United States and the Caribbean, studies of popular culture, and a series of nonfiction comic art volumes.", "He is the authorized biographer of C. L. R. James."], "mentions": [{"id": 0, "start": 78, "end": 94, "ref_url": "Brown_University", "ref_ids": ["4157"], "sent_idx": 0}, {"id": 1, "start": 150, "end": 160, "ref_url": "Political_radicalism", "ref_ids": ["15611519"], "sent_idx": 0}, {"id": 2, "start": 190, "end": 199, "ref_url": "Caribbean", "ref_ids": ["18956035"], "sent_idx": 0}, {"id": 3, "start": 35, "end": 49, "ref_url": "C._L._R._James", "ref_ids": ["42034669"], "sent_idx": 1}]} +{"id": "17894132", "title": "Eyeshade", "sentences": ["Eyeshade or eye shade may refer to:"], "mentions": []} +{"id": "17894135", "title": "Chase Promenade", "sentences": ["Chase Promenade (formerly Bank One Promenade) is an open-air, tree-lined, pedestrian walkway that opened July 16, 2004.", "It is part of Millennium Park, which is located in the Loop community area of Chicago, Illinois in the United States.", "The promenade was made possible by a gift from the Bank One Foundation.", "It is and used for exhibitions, festivals and other family events as well as private rentals.", "The Chase Promenade has hosted the 2005 \"Revealing Chicago: An Aerial Portrait\" photo exhibition, the 2008 \"Paintings Below Zero\" exhibition and the 2009 Burnham Pavilions.", "The Burnham Pavilions were the cornerstone of the citywide Burnham Plan centennial celebration."], "mentions": [{"id": 0, "start": 14, "end": 29, "ref_url": "Millennium_Park", "ref_ids": ["819738"], "sent_idx": 1}, {"id": 1, "start": 55, "end": 59, "ref_url": "Chicago_Loop", "ref_ids": ["77773"], "sent_idx": 1}, {"id": 2, "start": 60, "end": 74, "ref_url": "Community_areas_of_Chicago", "ref_ids": null, "sent_idx": 1}, {"id": 3, "start": 51, "end": 58, "ref_url": "Chicago", "ref_ids": ["6886"], "sent_idx": 4}, {"id": 4, "start": 51, "end": 59, "ref_url": "Bank_One_Corporation", "ref_ids": ["418241"], "sent_idx": 2}, {"id": 5, "start": 4, "end": 21, "ref_url": "Burnham_Pavilions", "ref_ids": ["23381834"], "sent_idx": 5}, {"id": 6, "start": 59, "end": 71, "ref_url": "Burnham_Plan", "ref_ids": null, "sent_idx": 5}]} +{"id": "17894147", "title": "Dezső Frigyes", "sentences": ["Dezső Frigyes (November 27, 1913 – July 18, 1984) was a Hungarian boxer who competed in the 1936 Summer Olympics.", "He was born in Budapest and died in Cleveland.", "In 1936 he finished fourth in the featherweight class after losing the bronze medal bout to Josef Miner.", "He won the silver medal in the 1934 European Amateur Boxing Championships in Budapest, and the gold medal in the 1942 European Amateur Boxing Championships in Breslau."], "mentions": [{"id": 0, "start": 56, "end": 65, "ref_url": "Hungary", "ref_ids": ["13275"], "sent_idx": 0}, {"id": 1, "start": 66, "end": 71, "ref_url": "Boxing", "ref_ids": ["4243"], "sent_idx": 0}, {"id": 2, "start": 92, "end": 112, "ref_url": "1936_Summer_Olympics", "ref_ids": ["39721"], "sent_idx": 0}, {"id": 3, "start": 77, "end": 85, "ref_url": "Budapest", "ref_ids": ["36787"], "sent_idx": 3}, {"id": 4, "start": 36, "end": 45, "ref_url": "Cleveland", "ref_ids": ["5951"], "sent_idx": 1}, {"id": 5, "start": 34, "end": 53, "ref_url": "Boxing_at_the_1936_Summer_Olympics_-_Men's_featherweight", "ref_ids": null, "sent_idx": 2}, {"id": 6, "start": 92, "end": 103, "ref_url": "Josef_Miner", "ref_ids": ["13843310"], "sent_idx": 2}, {"id": 7, "start": 31, "end": 73, "ref_url": "1934_European_Amateur_Boxing_Championships", "ref_ids": ["29783918"], "sent_idx": 3}, {"id": 8, "start": 113, "end": 155, "ref_url": "1942_European_Amateur_Boxing_Championships", "ref_ids": ["29766758"], "sent_idx": 3}]} +{"id": "17894197", "title": "RRMC", "sentences": ["RRMC may refer to:"], "mentions": []} +{"id": "17894203", "title": "Brayden Schenn", "sentences": ["Brayden Michael Schenn (; born August 22, 1991) is a Canadian professional ice hockey centre currently playing for the St. Louis Blues of the National Hockey League (NHL).", "He was selected by the Los Angeles Kings fifth overall in the 2009 NHL Entry Draft.", "He made his NHL debut for the Kings in October 2009, after being called up on an emergency basis.", "Schenn has represented Canada internationally at several tournaments, and won two silver medals at the 2010 and 2011 World Junior Ice Hockey Championships.", "At the 2011 tournament, Schenn tied Canada's record for points in a single tournament, and was selected to the Tournament's All-Star Team as well as being named Top Forward, and Most Valuable Player.", "Schenn won the Stanley Cup in 2019 with the Blues."], "mentions": [{"id": 0, "start": 75, "end": 85, "ref_url": "Ice_hockey", "ref_ids": ["14790"], "sent_idx": 0}, {"id": 1, "start": 86, "end": 92, "ref_url": "Centre_(ice_hockey)", "ref_ids": ["567643"], "sent_idx": 0}, {"id": 2, "start": 119, "end": 134, "ref_url": "St._Louis_Blues", "ref_ids": ["30519527"], "sent_idx": 0}, {"id": 3, "start": 142, "end": 164, "ref_url": "National_Hockey_League", "ref_ids": ["21809"], "sent_idx": 0}, {"id": 4, "start": 23, "end": 40, "ref_url": "Los_Angeles_Kings", "ref_ids": ["73136"], "sent_idx": 1}, {"id": 5, "start": 62, "end": 82, "ref_url": "2009_NHL_Entry_Draft", "ref_ids": ["10897046"], "sent_idx": 1}, {"id": 6, "start": 36, "end": 42, "ref_url": "Canada_men's_national_junior_ice_hockey_team", "ref_ids": ["15068595"], "sent_idx": 4}, {"id": 7, "start": 103, "end": 107, "ref_url": "2010_World_Junior_Ice_Hockey_Championships", "ref_ids": ["15554126"], "sent_idx": 3}, {"id": 8, "start": 112, "end": 154, "ref_url": "2011_World_Junior_Ice_Hockey_Championships", "ref_ids": ["18347432"], "sent_idx": 3}, {"id": 9, "start": 15, "end": 26, "ref_url": "Stanley_Cup", "ref_ids": ["66968"], "sent_idx": 5}, {"id": 10, "start": 30, "end": 34, "ref_url": "2019_Stanley_Cup_Finals", "ref_ids": ["57387798"], "sent_idx": 5}]} +{"id": "17894223", "title": "Kyrgyzstan women's national rugby union team", "sentences": ["The Kyrgyzstan women's national rugby union team are a national sporting side of Kyrgyzstan, representing them at rugby union.", "The side first played in 2008."], "mentions": [{"id": 0, "start": 4, "end": 14, "ref_url": "Kyrgyzstan", "ref_ids": ["170131"], "sent_idx": 0}, {"id": 1, "start": 32, "end": 43, "ref_url": "Rugby_union", "ref_ids": ["25405"], "sent_idx": 0}]} +{"id": "17894240", "title": "Nacissela Maurício", "sentences": ["Nacissela Cristina de Oliveira Maurício (born 2 June 1980) is a former Angolan female professional basketball player.", "A 6'3/1.88m power forward, Maurício plays at club level for Angolan side Primeiro de Agosto.", "Mauricio was also a member of the Angola women's national basketball team at the FIBA Africa Championship for Women 2007 and FIBA World Olympic Qualifying Tournament for Women 2008.", "She was part of the Angolan team that participated in the 2012 Summer Olympics.", "Mauricio has played professionally in Spain and Portugal."], "mentions": [{"id": 0, "start": 58, "end": 68, "ref_url": "Basketball", "ref_ids": ["3921"], "sent_idx": 2}, {"id": 1, "start": 73, "end": 91, "ref_url": "Primeiro_de_Agosto_Basketball", "ref_ids": null, "sent_idx": 1}, {"id": 2, "start": 34, "end": 73, "ref_url": "Angola_women's_national_basketball_team", "ref_ids": ["13253531"], "sent_idx": 2}, {"id": 3, "start": 81, "end": 120, "ref_url": "FIBA_Africa_Championship_for_Women_2007", "ref_ids": null, "sent_idx": 2}, {"id": 4, "start": 125, "end": 180, "ref_url": "FIBA_World_Olympic_Qualifying_Tournament_for_Women_2008", "ref_ids": null, "sent_idx": 2}, {"id": 5, "start": 58, "end": 78, "ref_url": "2012_Summer_Olympics", "ref_ids": ["2176142"], "sent_idx": 3}, {"id": 6, "start": 38, "end": 43, "ref_url": "Spain", "ref_ids": ["26667"], "sent_idx": 4}, {"id": 7, "start": 48, "end": 56, "ref_url": "Portugal", "ref_ids": ["23033"], "sent_idx": 4}]} +{"id": "17894246", "title": "Ronco (disambiguation)", "sentences": ["Ronco is an American small appliances company.", "Ronco may also refer to:"], "mentions": [{"id": 0, "start": 0, "end": 5, "ref_url": "Ronco", "ref_ids": ["2052252"], "sent_idx": 1}]} +{"id": "17894249", "title": "Iraqna", "sentences": ["Iraqna was an Iraqi Mobile telecommunication company offering services in the middle of Iraq.", "It was established after the fall of Saddam Hussein in 2003.", "It was a subsidiary of Orascom Telecom.", "In 2007, it was sold to Zain to form with its subsidiary in Iraq MTC Atheer a bigger company.", "The new company is renamed Zain Iraq."], "mentions": [{"id": 0, "start": 32, "end": 36, "ref_url": "Iraq", "ref_ids": ["7515928"], "sent_idx": 4}, {"id": 1, "start": 20, "end": 44, "ref_url": "Mobile_telecommunication", "ref_ids": null, "sent_idx": 0}, {"id": 2, "start": 32, "end": 36, "ref_url": "Iraq", "ref_ids": ["7515928"], "sent_idx": 4}, {"id": 3, "start": 37, "end": 51, "ref_url": "Saddam_Hussein", "ref_ids": ["29490"], "sent_idx": 1}, {"id": 4, "start": 23, "end": 38, "ref_url": "Orascom_Telecom_Holding", "ref_ids": null, "sent_idx": 2}, {"id": 5, "start": 27, "end": 31, "ref_url": "Zain_Group", "ref_ids": ["13249890"], "sent_idx": 4}, {"id": 6, "start": 65, "end": 75, "ref_url": "MTC_Atheer", "ref_ids": null, "sent_idx": 3}, {"id": 7, "start": 27, "end": 36, "ref_url": "Zain_Iraq", "ref_ids": ["16683071"], "sent_idx": 4}]} +{"id": "17894260", "title": "McElroy Octagon House", "sentences": ["The McElroy Octagon House, also known as the Colonial Dames Octagon House, is a historic octagonal house now located at 2645 Gough Street at Union Street in the Cow Hollow neighborhood of San Francisco, California.", "William C. McElroy built it in 1861 across the street from its present location.", "It was vacant and neglected in 1951 when the National Society of the Colonial Dames of America in California bought it, moved it across the street and began its restoration.", "In 1971 it became San Francisco Landmark 17.", "It, the Feusier Octagon House and the Marine Exchange Lookout Station at Land's End are the only three remaining octagon houses in the city.", "On February 23, 1972, it was added to the National Register of Historic Places.", "The house is open to the public for tours."], "mentions": [{"id": 0, "start": 113, "end": 120, "ref_url": "Octagon", "ref_ids": ["314575"], "sent_idx": 4}, {"id": 1, "start": 161, "end": 171, "ref_url": "Cow_Hollow,_San_Francisco", "ref_ids": null, "sent_idx": 0}, {"id": 2, "start": 18, "end": 31, "ref_url": "San_Francisco,_California", "ref_ids": null, "sent_idx": 3}, {"id": 3, "start": 99, "end": 109, "ref_url": "California", "ref_ids": ["5407"], "sent_idx": 2}, {"id": 4, "start": 8, "end": 29, "ref_url": "Feusier_Octagon_House", "ref_ids": ["17893601"], "sent_idx": 4}, {"id": 5, "start": 73, "end": 83, "ref_url": "Lands_End_(San_Francisco)", "ref_ids": ["21307565"], "sent_idx": 4}, {"id": 6, "start": 113, "end": 126, "ref_url": "Octagon_house", "ref_ids": ["515896"], "sent_idx": 4}, {"id": 7, "start": 42, "end": 78, "ref_url": "National_Register_of_Historic_Places", "ref_ids": ["64065"], "sent_idx": 5}]} +{"id": "17894269", "title": "Bidente-Ronco", "sentences": ["The Bidente-Ronco is a river in the Emilia-Romagna region of Italy.", "The first portion of the river is called the Bidente.", "Once the river passes under the Ponte dei Veneziani (Bridge of the Venetians) in Meldola, the river is called the Ronco.", "The source of the river is near the border between the province of Forlì-Cesena, the province of Arezzo, and the province of Florence in the Foreste Casentinesi, Monte Falterona, Campigna National Park.", "The river flows northeast through the mountains in the province of Forlì-Cesena and flows near Santa Sofia, Galeata, Civitella di Romagna and Meldola.", "Beyond Meldola, the river flows north near Bertinoro, Forlimpopoli, and Forlì before crossing the border into the province of Ravenna.", "The river flows northeast until it joins the Montone south of Ravenna, and the resulting river is known as the Uniti.", "The Battle of Ronco took place here."], "mentions": [{"id": 0, "start": 36, "end": 50, "ref_url": "Emilia-Romagna", "ref_ids": ["162715"], "sent_idx": 0}, {"id": 1, "start": 61, "end": 66, "ref_url": "Italy", "ref_ids": ["14532"], "sent_idx": 0}, {"id": 2, "start": 7, "end": 14, "ref_url": "Meldola", "ref_ids": ["4025953"], "sent_idx": 5}, {"id": 3, "start": 55, "end": 79, "ref_url": "Province_of_Forlì-Cesena", "ref_ids": ["987097"], "sent_idx": 4}, {"id": 4, "start": 85, "end": 103, "ref_url": "Province_of_Arezzo", "ref_ids": ["1459652"], "sent_idx": 3}, {"id": 5, "start": 113, "end": 133, "ref_url": "Province_of_Florence", "ref_ids": ["1342842"], "sent_idx": 3}, {"id": 6, "start": 141, "end": 201, "ref_url": "Foreste_Casentinesi,_Monte_Falterona,_Campigna_National_Park", "ref_ids": ["10723514"], "sent_idx": 3}, {"id": 7, "start": 55, "end": 79, "ref_url": "Province_of_Forlì-Cesena", "ref_ids": ["987097"], "sent_idx": 4}, {"id": 8, "start": 95, "end": 106, "ref_url": "Santa_Sofia,_Emilia–Romagna", "ref_ids": ["6722749"], "sent_idx": 4}, {"id": 9, "start": 108, "end": 115, "ref_url": "Galeata", "ref_ids": ["6722621"], "sent_idx": 4}, {"id": 10, "start": 117, "end": 137, "ref_url": "Civitella_di_Romagna", "ref_ids": ["6722595"], "sent_idx": 4}, {"id": 11, "start": 43, "end": 52, "ref_url": "Bertinoro", "ref_ids": ["2513659"], "sent_idx": 5}, {"id": 12, "start": 54, "end": 66, "ref_url": "Forlimpopoli", "ref_ids": ["1034608"], "sent_idx": 5}, {"id": 13, "start": 72, "end": 77, "ref_url": "Forlì", "ref_ids": ["2072467"], "sent_idx": 5}, {"id": 14, "start": 114, "end": 133, "ref_url": "Province_of_Ravenna", "ref_ids": ["987144"], "sent_idx": 5}, {"id": 15, "start": 45, "end": 52, "ref_url": "Montone_(river)", "ref_ids": ["10728865"], "sent_idx": 6}, {"id": 16, "start": 62, "end": 69, "ref_url": "Ravenna", "ref_ids": ["37409"], "sent_idx": 6}, {"id": 17, "start": 111, "end": 116, "ref_url": "Uniti", "ref_ids": ["46513852"], "sent_idx": 6}, {"id": 18, "start": 4, "end": 19, "ref_url": "Battle_of_Ronco", "ref_ids": ["13252928"], "sent_idx": 7}]} +{"id": "17894283", "title": "School of Aeronautics and Astronautics, Zhejiang University", "sentences": ["School of Aeronautics and Astronautics (SAA) of Zhejiang University (Traditional Chinese: 浙江大學航空航天學院, Simplified Chinese: 浙江大学航空航天学院), is one of the university schools/colleges primarily focuses on space technology in the People's Republic of China."], "mentions": [{"id": 0, "start": 48, "end": 67, "ref_url": "Zhejiang_University", "ref_ids": ["340391"], "sent_idx": 0}, {"id": 1, "start": 69, "end": 88, "ref_url": "Traditional_Chinese", "ref_ids": null, "sent_idx": 0}, {"id": 2, "start": 102, "end": 120, "ref_url": "Simplified_Chinese", "ref_ids": null, "sent_idx": 0}, {"id": 3, "start": 149, "end": 159, "ref_url": "University", "ref_ids": ["19725260"], "sent_idx": 0}, {"id": 4, "start": 160, "end": 166, "ref_url": "School", "ref_ids": ["28022"], "sent_idx": 0}, {"id": 5, "start": 168, "end": 175, "ref_url": "College", "ref_ids": ["5689"], "sent_idx": 0}, {"id": 6, "start": 198, "end": 214, "ref_url": "Space_technology", "ref_ids": null, "sent_idx": 0}, {"id": 7, "start": 222, "end": 248, "ref_url": "People's_Republic_of_China", "ref_ids": null, "sent_idx": 0}]} +{"id": "17894294", "title": "Augusta Symphony Orchestra", "sentences": ["The Augusta Symphony Orchestra, established shortly after World War I, is a nonprofit symphony orchestra in Augusta, Maine.", "It consists of fifty volunteer amateur and semi-professional musicians and is conducted by Paul Ross.", "The orchestra's season includes a concert in November, the Messiah Sing in December, a youth concert in March, and a concert in May."], "mentions": [{"id": 0, "start": 86, "end": 94, "ref_url": "Symphony", "ref_ids": ["44114"], "sent_idx": 0}, {"id": 1, "start": 108, "end": 122, "ref_url": "Augusta,_Maine", "ref_ids": ["45920"], "sent_idx": 0}]} +{"id": "17894303", "title": "William Marquart", "sentences": ["William \"Billy\" Marquart (February 24, 1915 – June 13, 1960) was a Canadian boxer who competed in the 1936 Summer Olympics.", "He was born in Winnipeg, Manitoba and died in Chicago, Illinois, United States.", "In 1936 he was eliminated in the quarterfinals of the featherweight class after losing his fight to Dezső Frigyes.", "Embarking on a professional career after the Olympics, he eventually moved to the United States permanently and joined the country's Navy in June 1942, the same month in which he TKOed Cleo McNeal in his final professional fight."], "mentions": [{"id": 0, "start": 67, "end": 75, "ref_url": "Canadians", "ref_ids": ["19851291"], "sent_idx": 0}, {"id": 1, "start": 76, "end": 81, "ref_url": "Boxing", "ref_ids": ["4243"], "sent_idx": 0}, {"id": 2, "start": 102, "end": 122, "ref_url": "1936_Summer_Olympics", "ref_ids": ["39721"], "sent_idx": 0}, {"id": 3, "start": 15, "end": 23, "ref_url": "Winnipeg", "ref_ids": ["100730"], "sent_idx": 1}, {"id": 4, "start": 46, "end": 53, "ref_url": "Chicago", "ref_ids": ["6886"], "sent_idx": 1}, {"id": 5, "start": 54, "end": 73, "ref_url": "Boxing_at_the_1936_Summer_Olympics_-_Men's_featherweight", "ref_ids": null, "sent_idx": 2}, {"id": 6, "start": 100, "end": 113, "ref_url": "Dezső_Frigyes", "ref_ids": ["17894147"], "sent_idx": 2}, {"id": 7, "start": 185, "end": 196, "ref_url": "Cleo_McNeal", "ref_ids": null, "sent_idx": 3}]} +{"id": "17894307", "title": "Ahmad Al-Khamisi", "sentences": ["Ahmad Abdel Rahman Al-Khamisi (born 28 January 1948 in Cairo) is an Egyptian writer and journalist."], "mentions": [{"id": 0, "start": 55, "end": 60, "ref_url": "Cairo", "ref_ids": ["6293"], "sent_idx": 0}]} +{"id": "17894316", "title": "Steve Olson", "sentences": ["Steve Olson is an American writer who specializes in science, mathematics, and public policy.", "He is the author of several nonfiction trade books: \"Mapping Human History: Genes, Race, and Our Common Origins\", which was nominated for the National Book Award in 2002; \"Count Down: Six Kids Vie for Glory at the World’s Toughest Math Competition\" in 2004; \"Anarchy Evolution: Faith, Science, and Bad Religion in a World Without God\" in 2010; \"Eruption: The Untold Story of Mt. St. Helens\" in 2016.", "He also has written for many magazines, including the \"Atlantic Monthly\", the \"Smithsonian\", \"Science\", \"Scientific American\", \"Wired\", the \"Yale Alumni Magazine\", the \"Washingtonian\", \"Slate\", and \"Paste\".", "His articles have been reprinted in Best American Science and Nature Writing 2003 and 2007."], "mentions": [{"id": 0, "start": 142, "end": 161, "ref_url": "National_Book_Award", "ref_ids": ["63097"], "sent_idx": 1}, {"id": 1, "start": 55, "end": 71, "ref_url": "Atlantic_Monthly", "ref_ids": null, "sent_idx": 2}, {"id": 2, "start": 79, "end": 90, "ref_url": "Smithsonian", "ref_ids": null, "sent_idx": 2}, {"id": 3, "start": 93, "end": 102, "ref_url": "Science_(magazine)", "ref_ids": null, "sent_idx": 2}, {"id": 4, "start": 105, "end": 124, "ref_url": "Scientific_American", "ref_ids": ["29507"], "sent_idx": 2}, {"id": 5, "start": 127, "end": 134, "ref_url": "Wired_(magazine)", "ref_ids": ["65411"], "sent_idx": 2}, {"id": 6, "start": 141, "end": 161, "ref_url": "Yale_Alumni_Magazine", "ref_ids": ["5393624"], "sent_idx": 2}, {"id": 7, "start": 168, "end": 183, "ref_url": "Washingtonian_(magazine)", "ref_ids": ["4908383"], "sent_idx": 2}, {"id": 8, "start": 185, "end": 192, "ref_url": "Slate_(magazine)", "ref_ids": ["423731"], "sent_idx": 2}, {"id": 9, "start": 198, "end": 205, "ref_url": "Paste_(magazine)", "ref_ids": ["2842318"], "sent_idx": 2}]} +{"id": "17894344", "title": "Charcot's cholangitis triad", "sentences": ["Charcot's cholangitis triad is the combination of jaundice; fever, usually with rigors; and right upper quadrant abdominal pain.", "It occurs as a result of ascending cholangitis (an infection of the bile duct in the liver).", "When the presentation also includes low blood pressure and mental status changes, it is known as Reynolds' pentad.", "It is named for Jean-Martin Charcot."], "mentions": [{"id": 0, "start": 50, "end": 58, "ref_url": "Jaundice", "ref_ids": ["65980"], "sent_idx": 0}, {"id": 1, "start": 60, "end": 65, "ref_url": "Fever", "ref_ids": ["46253"], "sent_idx": 0}, {"id": 2, "start": 80, "end": 86, "ref_url": "Rigors", "ref_ids": null, "sent_idx": 0}, {"id": 3, "start": 92, "end": 112, "ref_url": "Right_upper_quadrant", "ref_ids": null, "sent_idx": 0}, {"id": 4, "start": 113, "end": 127, "ref_url": "Abdominal_pain", "ref_ids": ["593703"], "sent_idx": 0}, {"id": 5, "start": 25, "end": 46, "ref_url": "Ascending_cholangitis", "ref_ids": ["5544827"], "sent_idx": 1}, {"id": 6, "start": 68, "end": 77, "ref_url": "Bile_duct", "ref_ids": ["197021"], "sent_idx": 1}, {"id": 7, "start": 36, "end": 54, "ref_url": "Hypotension", "ref_ids": ["500475"], "sent_idx": 2}, {"id": 8, "start": 59, "end": 80, "ref_url": "Mental_status_changes", "ref_ids": null, "sent_idx": 2}, {"id": 9, "start": 97, "end": 113, "ref_url": "Reynolds'_pentad", "ref_ids": ["6673375"], "sent_idx": 2}, {"id": 10, "start": 16, "end": 35, "ref_url": "Jean-Martin_Charcot", "ref_ids": ["932831"], "sent_idx": 3}]} +{"id": "17894351", "title": "BrightSource Energy", "sentences": ["BrightSource Energy, Inc. is an Oakland, California based, corporation that designs, builds, finances, and operates utility-scale solar power plants.", "Greentech Media ranked BrightSource as one of the top 10 greentech startups in the world in 2008."], "mentions": [{"id": 0, "start": 32, "end": 51, "ref_url": "Oakland,_California", "ref_ids": ["50548"], "sent_idx": 0}, {"id": 1, "start": 130, "end": 147, "ref_url": "Solar_power_plant", "ref_ids": null, "sent_idx": 0}, {"id": 2, "start": 0, "end": 15, "ref_url": "Greentech_Media", "ref_ids": ["19284371"], "sent_idx": 1}, {"id": 3, "start": 57, "end": 66, "ref_url": "Environmental_technology", "ref_ids": ["1443002"], "sent_idx": 1}]} +{"id": "17894356", "title": "São João do Pacuí", "sentences": ["São João do Pacuí is a municipality in the north of the Brazilian state of Minas Gerais.", "As of 2007 the population was 4,003 in a total area of 420 km².", "It became a municipality in 1997."], "mentions": [{"id": 0, "start": 75, "end": 87, "ref_url": "Minas_Gerais", "ref_ids": ["222651"], "sent_idx": 0}]} +{"id": "17894369", "title": "1934 National Challenge Cup", "sentences": ["The 1934 National Challenge Cup was the annual open cup held by the United States Football Association now known as the Lamar Hunt U.S. Open Cup."], "mentions": [{"id": 0, "start": 68, "end": 102, "ref_url": "United_States_Football_Association", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 120, "end": 144, "ref_url": "Lamar_Hunt_U.S._Open_Cup", "ref_ids": null, "sent_idx": 0}]} +{"id": "17894373", "title": "Uzbekistan women's national rugby union team", "sentences": ["The Uzbekistan women's national rugby union team first played in 2008."], "mentions": []} +{"id": "17894384", "title": "Clarrie Gordon", "sentences": ["Clarence \"Clarrie\" Edward Gordon (9 March 1917 – 19 November 1983) was a New Zealand boxer.", "He competed as a featherweight at the 1936 Summer Olympics, where he was eliminated in his first bout.", "At 19 years, he was the youngest competitor from New Zealand at those Games.", "In 1938 Gordon turned professional, and retired in 1949 as a reigning national welterweight champion, with a record of 22 wins (11 by knockout), 11 losses, and 2 draws.", "In 1944, he was suspended for three years by the New Zealand Professional Boxing Association after a first-round knockout.", "During those years he fought in Australia.", "Clarrie had nine siblings; among them six of the seven brothers became career boxers, including Clarrie's twin brother Viv."], "mentions": [{"id": 0, "start": 49, "end": 60, "ref_url": "New_Zealand", "ref_ids": ["4913064"], "sent_idx": 4}, {"id": 1, "start": 38, "end": 58, "ref_url": "1936_Summer_Olympics", "ref_ids": ["39721"], "sent_idx": 1}, {"id": 2, "start": 91, "end": 101, "ref_url": "Boxing_at_the_1936_Summer_Olympics_-_Men's_featherweight", "ref_ids": null, "sent_idx": 1}, {"id": 3, "start": 49, "end": 92, "ref_url": "New_Zealand_Professional_Boxing_Association", "ref_ids": ["52936061"], "sent_idx": 4}]} +{"id": "17894386", "title": "O. A. Bushnell", "sentences": ["O. A. (Oswald Andrew) \"Ozzy\" Bushnell (11 May 1913 – 21 August 2002) was a microbiologist, historian, novelist, and professor at the University of Hawaii."], "mentions": [{"id": 0, "start": 75, "end": 89, "ref_url": "Microbiologist", "ref_ids": ["539355"], "sent_idx": 0}, {"id": 1, "start": 91, "end": 100, "ref_url": "Historian", "ref_ids": ["13575"], "sent_idx": 0}, {"id": 2, "start": 102, "end": 110, "ref_url": "Novelist", "ref_ids": ["37397201"], "sent_idx": 0}, {"id": 3, "start": 116, "end": 125, "ref_url": "Professor", "ref_ids": ["20646803"], "sent_idx": 0}, {"id": 4, "start": 133, "end": 153, "ref_url": "University_of_Hawaii", "ref_ids": ["194710"], "sent_idx": 0}]} +{"id": "17894439", "title": "Foulgo", "sentences": ["Foulgo may refer to:"], "mentions": []} +{"id": "17894446", "title": "Zoltán Kiss (footballer, born 1986)", "sentences": ["Zoltán Kiss( born 12 July 1986) is a Hungarian footballer who currently plays as a defender for Békéscsaba 1912 Előre SE."], "mentions": [{"id": 0, "start": 37, "end": 46, "ref_url": "Hungarian_people", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 47, "end": 57, "ref_url": "Football_(soccer)", "ref_ids": null, "sent_idx": 0}, {"id": 2, "start": 83, "end": 91, "ref_url": "Defender_(football)", "ref_ids": null, "sent_idx": 0}, {"id": 3, "start": 96, "end": 120, "ref_url": "Békéscsaba_1912_Előre_SE", "ref_ids": null, "sent_idx": 0}]} +{"id": "17894463", "title": "Joe Dakuitoga", "sentences": ["Joe Dakuitoga is a Fijian former professional rugby league footballer, and coach.", "He played for Fiji in the 1995 World Cup, and coached them in 2008."], "mentions": [{"id": 0, "start": 46, "end": 58, "ref_url": "Rugby_league", "ref_ids": ["25735"], "sent_idx": 0}, {"id": 1, "start": 46, "end": 51, "ref_url": "Coach_(sport)", "ref_ids": ["626967"], "sent_idx": 1}, {"id": 2, "start": 14, "end": 18, "ref_url": "Fiji_national_rugby_league_team", "ref_ids": ["2262600"], "sent_idx": 1}, {"id": 3, "start": 26, "end": 40, "ref_url": "1995_Rugby_League_World_Cup", "ref_ids": ["377480"], "sent_idx": 1}, {"id": 4, "start": 62, "end": 66, "ref_url": "2008_Rugby_League_World_Cup", "ref_ids": ["444211"], "sent_idx": 1}]} +{"id": "17894479", "title": "Kostenga, Boudry", "sentences": ["Kostenga is a village in the Boudry Department of Ganzourgou Province in central Burkina Faso.", "The village has a population of 528."], "mentions": [{"id": 0, "start": 29, "end": 46, "ref_url": "Boudry_Department", "ref_ids": ["16975341"], "sent_idx": 0}, {"id": 1, "start": 50, "end": 69, "ref_url": "Ganzourgou_Province", "ref_ids": ["1055538"], "sent_idx": 0}, {"id": 2, "start": 81, "end": 93, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17894482", "title": "Mugaiyur", "sentences": ["Mugaiyur is one of the block in Viluppuram District, Tamil Nadu, India."], "mentions": [{"id": 0, "start": 32, "end": 51, "ref_url": "Viluppuram_District", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 53, "end": 63, "ref_url": "Tamil_Nadu", "ref_ids": ["29918"], "sent_idx": 0}]} +{"id": "17894491", "title": "Liguidmalguéma", "sentences": ["Liguidmalguéma is a village in the Boudry Department of Ganzourgou Province in central Burkina Faso.", "The village has a population of 575."], "mentions": [{"id": 0, "start": 35, "end": 52, "ref_url": "Boudry_Department", "ref_ids": ["16975341"], "sent_idx": 0}, {"id": 1, "start": 56, "end": 75, "ref_url": "Ganzourgou_Province", "ref_ids": ["1055538"], "sent_idx": 0}, {"id": 2, "start": 87, "end": 99, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17894501", "title": "Kozhipattu", "sentences": ["Kozhipattu is a small village located in Kanai block in Viluppuram District in the Indian state of Tamil Nadu."], "mentions": [{"id": 0, "start": 41, "end": 46, "ref_url": "Kanai,_Viluppuram", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 56, "end": 75, "ref_url": "Viluppuram_District", "ref_ids": null, "sent_idx": 0}, {"id": 2, "start": 83, "end": 88, "ref_url": "India", "ref_ids": ["14533"], "sent_idx": 0}, {"id": 3, "start": 90, "end": 95, "ref_url": "States_and_territories_of_India", "ref_ids": null, "sent_idx": 0}, {"id": 4, "start": 99, "end": 109, "ref_url": "Tamil_Nadu", "ref_ids": ["29918"], "sent_idx": 0}]} +{"id": "17894507", "title": "Lelkom", "sentences": ["Lelkom is a town in the Boudry Department of Ganzourgou Province in central Burkina Faso.", "The town has a population of 1,223."], "mentions": [{"id": 0, "start": 24, "end": 41, "ref_url": "Boudry_Department", "ref_ids": ["16975341"], "sent_idx": 0}, {"id": 1, "start": 45, "end": 64, "ref_url": "Ganzourgou_Province", "ref_ids": ["1055538"], "sent_idx": 0}, {"id": 2, "start": 76, "end": 88, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17894522", "title": "Limsèga", "sentences": ["Limsèga is a town in the Boudry Department of Ganzourgou Province in central Burkina Faso.", "The town has a population of 1,579."], "mentions": [{"id": 0, "start": 25, "end": 42, "ref_url": "Boudry_Department", "ref_ids": ["16975341"], "sent_idx": 0}, {"id": 1, "start": 46, "end": 65, "ref_url": "Ganzourgou_Province", "ref_ids": ["1055538"], "sent_idx": 0}, {"id": 2, "start": 77, "end": 89, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17894526", "title": "Fortified Area of Silesia", "sentences": ["The Fortified Area of Silesia () was a set of Polish fortifications, constructed along the interbellum border of Poland and Germany in the area of then-divided Upper Silesia.", "It spreads from the village of Przeczyce in the north to the town of Wyry in the south, along the line of sixty kilometers.", "Headquarters of the area was placed in Chorzów and its commandant was General Jan Jagmin-Sadowski."], "mentions": [{"id": 0, "start": 46, "end": 52, "ref_url": "Second_Polish_Republic", "ref_ids": ["14245"], "sent_idx": 0}, {"id": 1, "start": 53, "end": 66, "ref_url": "Fortification", "ref_ids": ["204118"], "sent_idx": 0}, {"id": 2, "start": 91, "end": 102, "ref_url": "Interbellum", "ref_ids": null, "sent_idx": 0}, {"id": 3, "start": 124, "end": 131, "ref_url": "Weimar_Republic", "ref_ids": ["33685"], "sent_idx": 0}, {"id": 4, "start": 160, "end": 173, "ref_url": "Upper_Silesia", "ref_ids": ["196188"], "sent_idx": 0}, {"id": 5, "start": 31, "end": 40, "ref_url": "Przeczyce", "ref_ids": ["21206257"], "sent_idx": 1}, {"id": 6, "start": 69, "end": 73, "ref_url": "Wyry", "ref_ids": ["14707236"], "sent_idx": 1}, {"id": 7, "start": 0, "end": 12, "ref_url": "Headquarters", "ref_ids": ["745008"], "sent_idx": 2}, {"id": 8, "start": 39, "end": 46, "ref_url": "Chorzów", "ref_ids": ["74612"], "sent_idx": 2}, {"id": 9, "start": 78, "end": 97, "ref_url": "Jan_Jagmin-Sadowski", "ref_ids": ["16089358"], "sent_idx": 2}]} +{"id": "17894538", "title": "The Essential Shinran", "sentences": ["The Essential Shinran: A Buddhist Path of True Entrusting is a compilation of passages from the writings and life story of Shinran Shonin.", "Shinran, who wrote during the Kamakura Period, was a Japanese monk who founded Jodo Shinshu Buddhism, which eventually became the largest Buddhist sect in Japan.", "The book (compiled by Alfred Bloom, with a foreword by Ruben L.F. Habito) is divided into three major sections:", "The book is an attempt to bring Pure Land Buddhism to the attention of a Western audience.", "The book received the Silver (2nd) \"Book of the Year Award\" in the Religion category (2007) from ForeWord Magazine."], "mentions": [{"id": 0, "start": 123, "end": 137, "ref_url": "Shinran_Shonin", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 30, "end": 45, "ref_url": "Kamakura_Period", "ref_ids": null, "sent_idx": 1}, {"id": 2, "start": 53, "end": 58, "ref_url": "Japan", "ref_ids": ["15573"], "sent_idx": 1}, {"id": 3, "start": 62, "end": 66, "ref_url": "Monk", "ref_ids": ["419369"], "sent_idx": 1}, {"id": 4, "start": 79, "end": 91, "ref_url": "Jodo_Shinshu", "ref_ids": null, "sent_idx": 1}, {"id": 5, "start": 42, "end": 50, "ref_url": "Buddhism", "ref_ids": ["3267529"], "sent_idx": 3}, {"id": 6, "start": 22, "end": 34, "ref_url": "Alfred_Bloom_(Buddhist)", "ref_ids": ["9599819"], "sent_idx": 2}, {"id": 7, "start": 55, "end": 72, "ref_url": "Ruben_L.F._Habito", "ref_ids": null, "sent_idx": 2}, {"id": 8, "start": 97, "end": 114, "ref_url": "ForeWord_(magazine)", "ref_ids": null, "sent_idx": 4}]} +{"id": "17894543", "title": "Manéssé", "sentences": ["Manéssé is a village in the Boudry Department of Ganzourgou Province in central Burkina Faso.", "The village has a population of 836."], "mentions": [{"id": 0, "start": 28, "end": 45, "ref_url": "Boudry_Department", "ref_ids": ["16975341"], "sent_idx": 0}, {"id": 1, "start": 49, "end": 68, "ref_url": "Ganzourgou_Province", "ref_ids": ["1055538"], "sent_idx": 0}, {"id": 2, "start": 80, "end": 92, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17894548", "title": "Richard Goodman (writer)", "sentences": ["Richard Goodman (born July 11, 1945) is an American writer of nonfiction.", "He lives in New Orleans, Louisiana.", "He is an Associate Professor of English at the University of New Orleans..", "He is the author of four books of nonfiction.", "His articles and essays have appeared in the \"Harvard Review\", \"Ascent\", \"Vanity Fair\", \"The New York Times\", \"Creative Nonfiction\", \"French Review\", and \"The Michigan Quarterly Review\", among others.", "He was educated at the University of Michigan, where he won a Hopwood Award and received a B.A., at Wayne State University, where he received his M.A., and at Spalding University, where he received his M.F.A."], "mentions": [{"id": 0, "start": 61, "end": 72, "ref_url": "New_Orleans", "ref_ids": ["53842"], "sent_idx": 2}, {"id": 1, "start": 25, "end": 34, "ref_url": "Louisiana", "ref_ids": ["18130"], "sent_idx": 1}, {"id": 2, "start": 47, "end": 72, "ref_url": "University_of_New_Orleans", "ref_ids": ["719484"], "sent_idx": 2}, {"id": 3, "start": 46, "end": 60, "ref_url": "Harvard_Review", "ref_ids": ["13177362"], "sent_idx": 4}, {"id": 4, "start": 89, "end": 107, "ref_url": "The_New_York_Times", "ref_ids": ["30680"], "sent_idx": 4}, {"id": 5, "start": 111, "end": 130, "ref_url": "Creative_Nonfiction_(magazine)", "ref_ids": ["16966964"], "sent_idx": 4}, {"id": 6, "start": 155, "end": 184, "ref_url": "The_Michigan_Quarterly_Review", "ref_ids": null, "sent_idx": 4}, {"id": 7, "start": 23, "end": 45, "ref_url": "University_of_Michigan", "ref_ids": ["31740"], "sent_idx": 5}, {"id": 8, "start": 62, "end": 75, "ref_url": "Hopwood_Award", "ref_ids": ["13978"], "sent_idx": 5}]} +{"id": "17894566", "title": "List of European number-one hits of 1988", "sentences": ["This is a list of the European \"Music & Media\" magazine's European Hot 100 Singles and European Top 100 Albums number-ones of 1988."], "mentions": [{"id": 0, "start": 58, "end": 82, "ref_url": "Eurochart_Hot_100_Singles", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 87, "end": 110, "ref_url": "European_Top_100_Albums", "ref_ids": ["12965196"], "sent_idx": 0}]} +{"id": "17894568", "title": "Mankarga", "sentences": ["Mankarga is a town in the Boudry Department of Ganzourgou Province in central Burkina Faso.", "The town has a population of 1204."], "mentions": [{"id": 0, "start": 26, "end": 43, "ref_url": "Boudry_Department", "ref_ids": ["16975341"], "sent_idx": 0}, {"id": 1, "start": 47, "end": 66, "ref_url": "Ganzourgou_Province", "ref_ids": ["1055538"], "sent_idx": 0}, {"id": 2, "start": 78, "end": 90, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17894570", "title": "Sakésho", "sentences": ["Sakésho is a jazz quartet based band in France.", "Sakésho is based in the beguine, the polyrhythmic music of the French Caribbean.", "The band members are Mario Canonge (piano), Michel Alibo (bass) and Jean-Philippe Fanfant (drums); all born in the French Caribbean, plus North American Andy Narell (steelpan).", "The group is based in Paris.", "They perform under the direction of Heads Up International", "They recorded just two albums in the past five years, and have worked with the likes of several famous jazz musicians on their album projects."], "mentions": [{"id": 0, "start": 103, "end": 107, "ref_url": "Jazz", "ref_ids": ["15613"], "sent_idx": 5}, {"id": 1, "start": 24, "end": 31, "ref_url": "Beguine_(dance)", "ref_ids": ["1281190"], "sent_idx": 1}, {"id": 2, "start": 115, "end": 131, "ref_url": "French_West_Indies", "ref_ids": ["294564"], "sent_idx": 2}, {"id": 3, "start": 153, "end": 164, "ref_url": "Andy_Narell", "ref_ids": ["2917388"], "sent_idx": 2}, {"id": 4, "start": 166, "end": 174, "ref_url": "Steelpan", "ref_ids": ["305147"], "sent_idx": 2}, {"id": 5, "start": 36, "end": 58, "ref_url": "Heads_Up_International", "ref_ids": ["18114400"], "sent_idx": 4}]} +{"id": "17894579", "title": "Heritage Place (Ottawa)", "sentences": ["Built in 1985, Heritage Place is located in Ottawa, Ontario, Canada and home to many Government of Canada offices.", "The government departments in the building include Environment Canada and Industry Canada.", "The building has 14 storeys and offers views of the Sparks Street Mall and Parliament Hill.", "The building offers underground parking as well as a small café called Biscotti's Coffee House which offers lunch specials and snacks.", "It have a direct connection to Parliament Subway Station since 2019."], "mentions": [{"id": 0, "start": 44, "end": 59, "ref_url": "Ottawa", "ref_ids": ["22219"], "sent_idx": 0}, {"id": 1, "start": 85, "end": 105, "ref_url": "Government_of_Canada", "ref_ids": ["616255"], "sent_idx": 0}, {"id": 2, "start": 51, "end": 69, "ref_url": "Environment_Canada", "ref_ids": null, "sent_idx": 1}, {"id": 3, "start": 74, "end": 89, "ref_url": "Industry_Canada", "ref_ids": null, "sent_idx": 1}, {"id": 4, "start": 52, "end": 70, "ref_url": "Sparks_Street_Mall", "ref_ids": null, "sent_idx": 2}, {"id": 5, "start": 75, "end": 90, "ref_url": "Parliament_Hill", "ref_ids": ["87696"], "sent_idx": 2}, {"id": 6, "start": 59, "end": 63, "ref_url": "Café", "ref_ids": null, "sent_idx": 3}, {"id": 7, "start": 31, "end": 56, "ref_url": "Parliament_Station", "ref_ids": null, "sent_idx": 4}]} +{"id": "17894594", "title": "Campbell West-Watson", "sentences": ["Campbell West-Watson (23 April 1877 – 19 May 1953) was successively an Anglican suffragan bishop, diocesan bishop and archbishop over a 40-year period during the first half of the 20th century.", "Born on 23 April 1877 he was educated at Birkenhead School and Emmanuel College, Cambridge before being ordained Priest in 1903.", "After six years as Chaplain, Fellow and Lecturer at his old college he was appointed Bishop of Barrow-in-Furness in 1909.", "After 16 years he was translated to Christchurch, New Zealand.", "In 1940 he was additionally appointed to be the Archbishop and Primate of the whole country, serving until 1951.", "Described in his \"Times\" obituary as \"a man of great approachability and unaffected goodness\" he died on 19 May 1953.", "In 1935, West-Watson was awarded the King George V Silver Jubilee Medal.", "He was appointed a Companion of the Order of St Michael and St George in the 1952 Queen's Birthday Honours."], "mentions": [{"id": 0, "start": 71, "end": 79, "ref_url": "Anglicanism", "ref_ids": ["1214"], "sent_idx": 0}, {"id": 1, "start": 80, "end": 96, "ref_url": "Suffragan_bishop", "ref_ids": ["296341"], "sent_idx": 0}, {"id": 2, "start": 98, "end": 113, "ref_url": "Bishop", "ref_ids": ["4092"], "sent_idx": 0}, {"id": 3, "start": 118, "end": 128, "ref_url": "Archbishop", "ref_ids": ["48557"], "sent_idx": 0}, {"id": 4, "start": 41, "end": 58, "ref_url": "Birkenhead_School", "ref_ids": ["2059569"], "sent_idx": 1}, {"id": 5, "start": 63, "end": 90, "ref_url": "Emmanuel_College,_Cambridge", "ref_ids": ["243360"], "sent_idx": 1}, {"id": 6, "start": 113, "end": 119, "ref_url": "Priest", "ref_ids": ["23707"], "sent_idx": 1}, {"id": 7, "start": 19, "end": 27, "ref_url": "Chaplain", "ref_ids": ["183232"], "sent_idx": 2}, {"id": 8, "start": 29, "end": 35, "ref_url": "Fellow", "ref_ids": ["381889"], "sent_idx": 2}, {"id": 9, "start": 40, "end": 48, "ref_url": "Lecturer", "ref_ids": ["366338"], "sent_idx": 2}, {"id": 10, "start": 56, "end": 67, "ref_url": "Emmanuel_College,_Cambridge", "ref_ids": ["243360"], "sent_idx": 2}, {"id": 11, "start": 85, "end": 112, "ref_url": "Bishop_of_Barrow-in-Furness", "ref_ids": ["17769352"], "sent_idx": 2}, {"id": 12, "start": 22, "end": 32, "ref_url": "Translation_(ecclesiastical)", "ref_ids": ["15673547"], "sent_idx": 3}, {"id": 13, "start": 36, "end": 61, "ref_url": "Anglican_Diocese_of_Christchurch", "ref_ids": ["3894337"], "sent_idx": 3}, {"id": 14, "start": 48, "end": 91, "ref_url": "Archbishop_of_New_Zealand", "ref_ids": null, "sent_idx": 4}, {"id": 15, "start": 18, "end": 23, "ref_url": "The_Times", "ref_ids": ["39127"], "sent_idx": 5}, {"id": 16, "start": 37, "end": 71, "ref_url": "King_George_V_Silver_Jubilee_Medal", "ref_ids": ["11153712"], "sent_idx": 6}, {"id": 17, "start": 19, "end": 69, "ref_url": "Order_of_St_Michael_and_St_George", "ref_ids": ["341078"], "sent_idx": 7}, {"id": 18, "start": 77, "end": 106, "ref_url": "1952_Birthday_Honours_(New_Zealand)", "ref_ids": ["59777828"], "sent_idx": 7}]} +{"id": "17894595", "title": "Nabasnonghin", "sentences": ["Nabasnonghin (or Nabasnoguen) is a village in the Boudry Department of Ganzourgou Province in central Burkina Faso.", "The village has a population of 877."], "mentions": [{"id": 0, "start": 50, "end": 67, "ref_url": "Boudry_Department", "ref_ids": ["16975341"], "sent_idx": 0}, {"id": 1, "start": 71, "end": 90, "ref_url": "Ganzourgou_Province", "ref_ids": ["1055538"], "sent_idx": 0}, {"id": 2, "start": 102, "end": 114, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17894597", "title": "2006 FIFA World Cup qualification – UEFA Group 1", "sentences": ["The 2006 FIFA World Cup qualification UEFA", "Group 1 was a UEFA qualifying group for the 2006 FIFA World Cup.", "The group comprised Andorra, Armenia, Czech Republic, Finland, Macedonia and Netherlands and Romania.", "The group was won by Netherlands, who qualified for the 2006 FIFA World Cup.", "The runners-up Czech Republic entered the UEFA qualification play-offs."], "mentions": [{"id": 0, "start": 42, "end": 46, "ref_url": "UEFA", "ref_ids": ["32332"], "sent_idx": 4}, {"id": 1, "start": 56, "end": 75, "ref_url": "2006_FIFA_World_Cup", "ref_ids": ["157233"], "sent_idx": 3}, {"id": 2, "start": 20, "end": 27, "ref_url": "Andorra_national_football_team", "ref_ids": ["654958"], "sent_idx": 2}, {"id": 3, "start": 29, "end": 36, "ref_url": "Armenia_national_football_team", "ref_ids": ["695292"], "sent_idx": 2}, {"id": 4, "start": 15, "end": 29, "ref_url": "Czech_Republic_national_football_team", "ref_ids": ["729739"], "sent_idx": 4}, {"id": 5, "start": 54, "end": 61, "ref_url": "Finland_national_football_team", "ref_ids": ["723233"], "sent_idx": 2}, {"id": 6, "start": 63, "end": 72, "ref_url": "Macedonia_national_football_team", "ref_ids": null, "sent_idx": 2}, {"id": 7, "start": 21, "end": 32, "ref_url": "Netherlands_national_football_team", "ref_ids": ["9647657"], "sent_idx": 3}, {"id": 8, "start": 93, "end": 100, "ref_url": "Romania_national_football_team", "ref_ids": ["679755"], "sent_idx": 2}, {"id": 9, "start": 56, "end": 75, "ref_url": "2006_FIFA_World_Cup", "ref_ids": ["157233"], "sent_idx": 3}, {"id": 10, "start": 42, "end": 70, "ref_url": "2006_FIFA_World_Cup_qualification_(UEFA_play-off)", "ref_ids": null, "sent_idx": 4}]} +{"id": "17894620", "title": "Tipi: Home of the Nomadic Buffalo Hunters", "sentences": ["Tipi: Home of the Nomadic Buffalo Hunters is an illustrated, non-fiction, young adult book by Caldecott-winning author and illustrator Paul Goble.", "It was published by World Wisdom Books in 2007."], "mentions": [{"id": 0, "start": 48, "end": 59, "ref_url": "Illustrated", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 61, "end": 72, "ref_url": "Non-fiction", "ref_ids": null, "sent_idx": 0}, {"id": 2, "start": 74, "end": 85, "ref_url": "Young_adult_fiction", "ref_ids": ["893310"], "sent_idx": 0}, {"id": 3, "start": 94, "end": 103, "ref_url": "Caldecott", "ref_ids": null, "sent_idx": 0}, {"id": 4, "start": 135, "end": 145, "ref_url": "Paul_Goble", "ref_ids": ["5875541"], "sent_idx": 0}, {"id": 5, "start": 20, "end": 38, "ref_url": "World_Wisdom", "ref_ids": ["9937231"], "sent_idx": 1}]} +{"id": "17894623", "title": "Nabmalguéma", "sentences": ["Nabmalguéma is a village in the Boudry Department of Ganzourgou Province in central Burkina Faso.", "The village has a population of 712."], "mentions": [{"id": 0, "start": 32, "end": 49, "ref_url": "Boudry_Department", "ref_ids": ["16975341"], "sent_idx": 0}, {"id": 1, "start": 53, "end": 72, "ref_url": "Ganzourgou_Province", "ref_ids": ["1055538"], "sent_idx": 0}, {"id": 2, "start": 84, "end": 96, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17894636", "title": "Nabinkinsma", "sentences": ["Nabinkinsma is a village in the Boudry Department of Ganzourgou Province in central Burkina Faso.", "The village has a population of 534."], "mentions": [{"id": 0, "start": 32, "end": 49, "ref_url": "Boudry_Department", "ref_ids": ["16975341"], "sent_idx": 0}, {"id": 1, "start": 53, "end": 72, "ref_url": "Ganzourgou_Province", "ref_ids": ["1055538"], "sent_idx": 0}, {"id": 2, "start": 84, "end": 96, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17894662", "title": "Nabiraogtenga", "sentences": ["Nabiraogtenga is a village in the Boudry Department of Ganzourgou Province in central Burkina Faso.", "The village has a population of 714."], "mentions": [{"id": 0, "start": 34, "end": 51, "ref_url": "Boudry_Department", "ref_ids": ["16975341"], "sent_idx": 0}, {"id": 1, "start": 55, "end": 74, "ref_url": "Ganzourgou_Province", "ref_ids": ["1055538"], "sent_idx": 0}, {"id": 2, "start": 86, "end": 98, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17894676", "title": "Tó Neinilii", "sentences": [], "mentions": []} +{"id": "17894677", "title": "Nanom", "sentences": ["Nanom is a village in the Boudry Department of Ganzourgou Province in central Burkina Faso.", "The village has a population of 533."], "mentions": [{"id": 0, "start": 26, "end": 43, "ref_url": "Boudry_Department", "ref_ids": ["16975341"], "sent_idx": 0}, {"id": 1, "start": 47, "end": 66, "ref_url": "Ganzourgou_Province", "ref_ids": ["1055538"], "sent_idx": 0}, {"id": 2, "start": 78, "end": 90, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17894683", "title": "Fisher (yachts)", "sentences": ["The Fisher line of motorsailing yachts is a line of fiberglass yachts in sizes from 25 feet to 46 feet.", "Designed in the UK and built by Fisher Yachts International in association with Neil Marine in Sri Lanka, one of Asia's largest ship builders."], "mentions": []} +{"id": "17894692", "title": "Nadioutenga", "sentences": ["Nadioutenga is a village in the Boudry Department of Ganzourgou Province in central Burkina Faso.", "The village has a population of 448."], "mentions": [{"id": 0, "start": 32, "end": 49, "ref_url": "Boudry_Department", "ref_ids": ["16975341"], "sent_idx": 0}, {"id": 1, "start": 53, "end": 72, "ref_url": "Ganzourgou_Province", "ref_ids": ["1055538"], "sent_idx": 0}, {"id": 2, "start": 84, "end": 96, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17894697", "title": "Certified IRB Professional", "sentences": ["The Certified IRB Professional (CIP) program is a certification initiative in the United States for individuals administering and overseeing the daily activities of institutional review boards (IRBs).", "IRBs are committees that are charged with determining if a research project conforms to ethical principles and federal regulations that protect the rights and welfare of human research subjects.", "The CIP program was developed by Public Responsibility in Medicine and Research (PRIM&R) to promote standards for professional knowledge and to support adherence to regulatory requirements, best practices, and ethical standards in the conduct of research.", "At present, there are more than 2,500 individuals who have attained their certification and have been authorized to use the CIP designation."], "mentions": [{"id": 0, "start": 74, "end": 87, "ref_url": "Certification", "ref_ids": ["3692331"], "sent_idx": 3}, {"id": 1, "start": 82, "end": 95, "ref_url": "United_States", "ref_ids": ["3434750"], "sent_idx": 0}, {"id": 2, "start": 165, "end": 192, "ref_url": "Institutional_review_boards", "ref_ids": null, "sent_idx": 0}, {"id": 3, "start": 33, "end": 79, "ref_url": "Public_Responsibility_in_Medicine_and_Research", "ref_ids": ["16019476"], "sent_idx": 2}, {"id": 4, "start": 165, "end": 175, "ref_url": "Regulatory", "ref_ids": null, "sent_idx": 2}, {"id": 5, "start": 190, "end": 204, "ref_url": "Best_practices", "ref_ids": null, "sent_idx": 2}, {"id": 6, "start": 210, "end": 227, "ref_url": "Ethical_standards", "ref_ids": null, "sent_idx": 2}, {"id": 7, "start": 246, "end": 254, "ref_url": "Research", "ref_ids": ["25524"], "sent_idx": 2}]} +{"id": "17894703", "title": "List of Japanese submissions for the Academy Award for Best International Feature Film", "sentences": ["Japan has submitted films for the Academy Award for Best International Feature Film since the inception of the award.", "The award is handed out annually by the United States Academy of Motion Picture Arts and Sciences to a feature-length motion picture produced outside the United States that contains primarily non-English dialogue.", "The Academy Award for Best Foreign Language Film was not created until 1956; however, between 1947 and 1955, the Academy presented Honorary Awards to the best foreign language films released in the United States.", "These awards were not competitive, as there were no nominees but simply a winner every year that was voted on by the Board of Governors of the Academy.", "Three Japanese films were recipients of Honorary Awards during this period.", "For the 1956 Academy Awards, a competitive Academy Award of Merit, known as the Best Foreign Language Film Award, was created for non-English speaking films, and has been given annually since.", ", twelve Japanese films have been nominees for Academy Award for Best Foreign Language Film, and one film, \"Departures\", has won the award.", "The only Japanese directors to have multiple films be nominated for the award are Akira Kurosawa and Noboru Nakamura.", "Kurosawa received an Honorary Award prior to the inception of the formal award for his work on \"Rashomon\" and the actual Academy Award for \"Dersu Uzala\" (submitted for the former Soviet Union), and had four other films submitted, with two of them accepted as nominees.", "Notably, Kurosawa's 1985 film \"Ran\" was deliberately not nominated by the Japanese film industry for the Academy Award for Best Foreign Film due to the poor perception he had among Japanese filmmakers at the time.", "Nakamura had two films, \"Twin Sisters of Kyoto\" and \"Portrait of Chieko\", submitted as nominees for the award.", "Among all the countries that have submitted films for the award, Japan ranks fifth in terms of total nominees, behind Sweden (fourteen nominees) and ahead of the former Soviet Union (nine nominees)."], "mentions": [{"id": 0, "start": 34, "end": 83, "ref_url": "Academy_Award_for_Best_International_Feature_Film", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 54, "end": 97, "ref_url": "Academy_of_Motion_Picture_Arts_and_Sciences", "ref_ids": ["39842"], "sent_idx": 1}, {"id": 2, "start": 103, "end": 117, "ref_url": "Feature_length", "ref_ids": ["3898054"], "sent_idx": 1}, {"id": 3, "start": 40, "end": 55, "ref_url": "Academy_Honorary_Award", "ref_ids": ["181774"], "sent_idx": 4}, {"id": 4, "start": 8, "end": 27, "ref_url": "29th_Academy_Awards", "ref_ids": ["6235670"], "sent_idx": 5}, {"id": 5, "start": 108, "end": 118, "ref_url": "Departures_(2008_film)", "ref_ids": ["21088389"], "sent_idx": 6}, {"id": 6, "start": 82, "end": 96, "ref_url": "Akira_Kurosawa", "ref_ids": ["872"], "sent_idx": 7}, {"id": 7, "start": 101, "end": 116, "ref_url": "Noboru_Nakamura", "ref_ids": ["10595214"], "sent_idx": 7}, {"id": 8, "start": 96, "end": 104, "ref_url": "Rashomon_(film)", "ref_ids": null, "sent_idx": 8}, {"id": 9, "start": 140, "end": 151, "ref_url": "Dersu_Uzala_(1975_film)", "ref_ids": ["1624660"], "sent_idx": 8}, {"id": 10, "start": 169, "end": 181, "ref_url": "Soviet_Union", "ref_ids": ["26779"], "sent_idx": 11}, {"id": 11, "start": 30, "end": 35, "ref_url": "Ran_(film)", "ref_ids": ["75984"], "sent_idx": 9}, {"id": 12, "start": 25, "end": 46, "ref_url": "Twin_Sisters_of_Kyoto", "ref_ids": ["14375566"], "sent_idx": 10}, {"id": 13, "start": 53, "end": 71, "ref_url": "Portrait_of_Chieko", "ref_ids": ["14373786"], "sent_idx": 10}]} +{"id": "17894774", "title": "Nédogo", "sentences": ["Nédogo is a town in the Boudry Department of Ganzourgou Province in central Burkina Faso.", "The town has a population of 2,018."], "mentions": [{"id": 0, "start": 24, "end": 41, "ref_url": "Boudry_Department", "ref_ids": ["16975341"], "sent_idx": 0}, {"id": 1, "start": 45, "end": 64, "ref_url": "Ganzourgou_Province", "ref_ids": ["1055538"], "sent_idx": 0}, {"id": 2, "start": 76, "end": 88, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17894793", "title": "Nédogo-Peulh", "sentences": ["Nédogo-Peulh is a village in the Boudry Department of Ganzourgou Province in central Burkina Faso.", "The village has a population of 155."], "mentions": [{"id": 0, "start": 33, "end": 50, "ref_url": "Boudry_Department", "ref_ids": ["16975341"], "sent_idx": 0}, {"id": 1, "start": 54, "end": 73, "ref_url": "Ganzourgou_Province", "ref_ids": ["1055538"], "sent_idx": 0}, {"id": 2, "start": 85, "end": 97, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17894821", "title": "Ouaongtenga", "sentences": ["Ouaongtenga is a village in the Boudry Department of Ganzourgou Province in central Burkina Faso.", "The village has a population of 467."], "mentions": [{"id": 0, "start": 32, "end": 49, "ref_url": "Boudry_Department", "ref_ids": ["16975341"], "sent_idx": 0}, {"id": 1, "start": 53, "end": 72, "ref_url": "Ganzourgou_Province", "ref_ids": ["1055538"], "sent_idx": 0}, {"id": 2, "start": 84, "end": 96, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17894831", "title": "Ginny Tyler", "sentences": ["Merrie Virginia Eggers (née Erlandson; August 8, 1925 – July 13, 2012), known professionally as Ginny Tyler, was an American voice actress who performed on dozens of cartoons and animated films from 1957 to 1993.", "In 2006, she was named a Disney Legend."], "mentions": [{"id": 0, "start": 166, "end": 174, "ref_url": "Cartoons", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 179, "end": 193, "ref_url": "Animated_films", "ref_ids": null, "sent_idx": 0}, {"id": 2, "start": 25, "end": 38, "ref_url": "Disney_Legend", "ref_ids": null, "sent_idx": 1}]} +{"id": "17894854", "title": "Ouayalgui", "sentences": ["Ouayalgui is a town in the Boudry Department of Ganzourgou Province in central Burkina Faso.", "The town has a population of 2,049."], "mentions": [{"id": 0, "start": 27, "end": 44, "ref_url": "Boudry_Department", "ref_ids": ["16975341"], "sent_idx": 0}, {"id": 1, "start": 48, "end": 67, "ref_url": "Ganzourgou_Province", "ref_ids": ["1055538"], "sent_idx": 0}, {"id": 2, "start": 79, "end": 91, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17894866", "title": "Protectosil", "sentences": ["Protectosil is a silane manufactured by Evonik and marketed a protective coating for building surfaces.", "It is used as a water repellent, and for corrosion and graffiti control."], "mentions": [{"id": 0, "start": 17, "end": 23, "ref_url": "Silane", "ref_ids": ["188972"], "sent_idx": 0}, {"id": 1, "start": 40, "end": 46, "ref_url": "Evonik", "ref_ids": null, "sent_idx": 0}, {"id": 2, "start": 16, "end": 31, "ref_url": "Hydrophobe", "ref_ids": ["14136"], "sent_idx": 1}, {"id": 3, "start": 41, "end": 50, "ref_url": "Corrosion", "ref_ids": ["155443"], "sent_idx": 1}, {"id": 4, "start": 55, "end": 63, "ref_url": "Graffiti", "ref_ids": ["11985"], "sent_idx": 1}]} +{"id": "17894882", "title": "A. meleagris", "sentences": ["A. meleagris may refer to:"], "mentions": []} +{"id": "17894886", "title": "Payamtenga", "sentences": ["Payamtenga is a village in the Boudry Department of Ganzourgou Province in central Burkina Faso.", "The village has a population of 227."], "mentions": [{"id": 0, "start": 31, "end": 48, "ref_url": "Boudry_Department", "ref_ids": ["16975341"], "sent_idx": 0}, {"id": 1, "start": 52, "end": 71, "ref_url": "Ganzourgou_Province", "ref_ids": ["1055538"], "sent_idx": 0}, {"id": 2, "start": 83, "end": 95, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17894893", "title": "Abertridwr railway station", "sentences": ["Abertridwr railway station was a station which served Abertridwr, in the Welsh county of Glamorgan.", "It was served by trains on the line from Caerphilly to Senghenydd.", "The nearest station to Abertridwr is now Aber."], "mentions": [{"id": 0, "start": 23, "end": 33, "ref_url": "Abertridwr,_Caerphilly", "ref_ids": ["1724509"], "sent_idx": 2}, {"id": 1, "start": 73, "end": 78, "ref_url": "Wales", "ref_ids": ["69894"], "sent_idx": 0}, {"id": 2, "start": 89, "end": 98, "ref_url": "Glamorgan", "ref_ids": ["52520"], "sent_idx": 0}, {"id": 3, "start": 41, "end": 51, "ref_url": "Caerphilly_railway_station", "ref_ids": ["3569436"], "sent_idx": 1}, {"id": 4, "start": 55, "end": 65, "ref_url": "Senghenydd_railway_station", "ref_ids": null, "sent_idx": 1}, {"id": 5, "start": 23, "end": 27, "ref_url": "Aber_railway_station", "ref_ids": ["3569431"], "sent_idx": 2}]} +{"id": "17894901", "title": "2008–09 Danish 2nd Divisions", "sentences": ["The 2008-09 season in Danish 2nd Division will be divided in two groups.", "The two winners will be promoted to the 2009–10 Danish 1st Division, together with the winner of a promotion game between the two runners-up.", "Second squad teams are ineligible for promotion."], "mentions": [{"id": 0, "start": 40, "end": 67, "ref_url": "2009–10_Danish_1st_Division", "ref_ids": ["23081493"], "sent_idx": 1}]} +{"id": "17894907", "title": "Jazz Review", "sentences": ["Jazz Review was a Scottish jazz magazine, founded in 1998.", "The founders were jazz writer (and former editor of \"The Wire\") Richard Cook and Roger Spence of the talent management agency Direct Music Limited of Edinburgh, Scotland.", "\"Jazz Review\" covered the entire range of jazz history from early jazz through swing to bebop, modern jazz and the avant-garde, and was known for its scholarly approach and independent stance.", "Major artists — including Keith Jarrett, Lee Konitz, Ornette Coleman, Dave Brubeck, and Wynton Marsalis — gave interviews to the magazine; historical surveys have included the Modern Jazz Quartet, Fletcher Henderson, Oscar Peterson, and Andrew Hill.", "The magazine was also renowned for its coverage of British jazz.", "Contributors included Simon Adams, Ronald Atkins, Emma Baker, Garry Booth, Jack Cooke, Tim Dorset, Rick Finlay, Mike Fish, Derek Gorman, Fred Grand, Hugh Gregory, Andy Hamilton, Martin Longley, Alan Luff, Chris Parker, Catherine Parsonage, Mike Rogers, Bill Shoemaker, Roger Thomas, Anthony Troon, Jim Weir and Barry Witherden.", "Alongside interviews and articles, regular features included \"Posted Notes\" (reader's letters), \"Now's The Time\" (a musician diary piece), \"ANEC-Dotage\" (Alan Luff remembers...), \"The Test\" (a musician is given records to comment on without knowing what they are), CD reviews, \"Fast Taste\" (shorter reviews) and \"Yesterdays\" (a prominent musician writes about a major turning point in his or her career)."], "mentions": [{"id": 0, "start": 18, "end": 26, "ref_url": "Scotland", "ref_ids": ["26994"], "sent_idx": 0}, {"id": 1, "start": 53, "end": 61, "ref_url": "The_Wire_(magazine)", "ref_ids": ["1102461"], "sent_idx": 1}, {"id": 2, "start": 64, "end": 76, "ref_url": "Richard_Cook_(journalist)", "ref_ids": ["4163952"], "sent_idx": 1}, {"id": 3, "start": 150, "end": 169, "ref_url": "Edinburgh,_Scotland", "ref_ids": null, "sent_idx": 1}, {"id": 4, "start": 79, "end": 84, "ref_url": "Swing_(jazz_performance_style)", "ref_ids": ["16728403"], "sent_idx": 2}, {"id": 5, "start": 88, "end": 93, "ref_url": "Bebop", "ref_ids": ["48145"], "sent_idx": 2}, {"id": 6, "start": 26, "end": 39, "ref_url": "Keith_Jarrett", "ref_ids": ["207748"], "sent_idx": 3}, {"id": 7, "start": 41, "end": 51, "ref_url": "Lee_Konitz", "ref_ids": ["756707"], "sent_idx": 3}, {"id": 8, "start": 53, "end": 68, "ref_url": "Ornette_Coleman", "ref_ids": ["153079"], "sent_idx": 3}, {"id": 9, "start": 70, "end": 82, "ref_url": "Dave_Brubeck", "ref_ids": ["8176"], "sent_idx": 3}, {"id": 10, "start": 88, "end": 103, "ref_url": "Wynton_Marsalis", "ref_ids": ["210483"], "sent_idx": 3}, {"id": 11, "start": 176, "end": 195, "ref_url": "Modern_Jazz_Quartet", "ref_ids": ["310911"], "sent_idx": 3}, {"id": 12, "start": 197, "end": 215, "ref_url": "Fletcher_Henderson", "ref_ids": ["171241"], "sent_idx": 3}, {"id": 13, "start": 217, "end": 231, "ref_url": "Oscar_Peterson", "ref_ids": ["22519"], "sent_idx": 3}, {"id": 14, "start": 237, "end": 248, "ref_url": "Andrew_Hill_(jazz_musician)", "ref_ids": ["352705"], "sent_idx": 3}]} +{"id": "17894912", "title": "Pittyn", "sentences": ["Pittyn (or Piti) is a town in the Boudry Department of Ganzourgou Province in central Burkina Faso.", "The town has a population of 1,660."], "mentions": [{"id": 0, "start": 34, "end": 51, "ref_url": "Boudry_Department", "ref_ids": ["16975341"], "sent_idx": 0}, {"id": 1, "start": 55, "end": 74, "ref_url": "Ganzourgou_Province", "ref_ids": ["1055538"], "sent_idx": 0}, {"id": 2, "start": 86, "end": 98, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "17894913", "title": "Standee", "sentences": ["A standee is an American term for a large self-standing display promoting a movie, product or event, or point-of-sale advertising, often in the form of a life-size cut-out figure.", "They are typically made of foam-board, and may range from large self-standing posters to elaborate three-dimensional display devices with moving parts and lights.", "Standees are typically displayed in theater lobbies or music stores in advance of film or music releases.", "In the movie business, the more bookings a theater makes in advance for a given film, the more likely it is to place standees in its lobby because of self-interest to spur consumer interest in its future screen offerings.", "Standees are also called lobby stands in the film industry.", "In recent years, theaters increasingly look to on-site advertising from non-movie companies as a revenue source, which creates occasional friction with film distributors; when standees for Paramount's \"\" incorporated a promotion for the 2003 movie's tie-in promotion with Jeep automobiles, large theater circuit Regal Cinemas sought payments from Jeep for the exposure in its theaters.", "Paramount reportedly shifted bookings from 47 Regal theaters to other cinemas that erected the \"Tomb Raider\"/Jeep standees without payments from Jeep.", "While standees have previously been available only in large quantities, recent advances in digital photography and print-on-demand technology have made them widely available to the public.", "Several companies now offer these items as party decorations, gag gifts and memorial items for the deceased.", "Standees can now be purchased as one-off custom products, bringing them to the average consumer as well as large corporations and venues."], "mentions": [{"id": 0, "start": 16, "end": 29, "ref_url": "Wiktionary:standee", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 76, "end": 81, "ref_url": "Movie", "ref_ids": null, "sent_idx": 5}, {"id": 2, "start": 104, "end": 117, "ref_url": "Point_of_sale_display", "ref_ids": ["6636687"], "sent_idx": 0}, {"id": 3, "start": 27, "end": 37, "ref_url": "Paperboard", "ref_ids": ["423640"], "sent_idx": 1}, {"id": 4, "start": 78, "end": 84, "ref_url": "Poster", "ref_ids": ["20557129"], "sent_idx": 1}, {"id": 5, "start": 55, "end": 60, "ref_url": "Music", "ref_ids": ["18839"], "sent_idx": 2}, {"id": 6, "start": 55, "end": 60, "ref_url": "Music", "ref_ids": ["18839"], "sent_idx": 2}, {"id": 7, "start": 0, "end": 9, "ref_url": "Paramount_Pictures", "ref_ids": ["22918"], "sent_idx": 6}, {"id": 8, "start": 109, "end": 113, "ref_url": "Jeep", "ref_ids": ["15658"], "sent_idx": 6}, {"id": 9, "start": 312, "end": 325, "ref_url": "Regal_Cinemas", "ref_ids": ["1419193"], "sent_idx": 5}]} +{"id": "18270286", "title": "Parpi", "sentences": ["Parpi () is a village in the Aragatsotn Province of Armenia.", "It is home to the 5th-century Tsiranavor Church, with 7th- and 10th-century modifications.", "There is also S. Grigor or S. Grigor Lusavorich (\"Gregory the Illuminator\") Church and the 7th-century (rebuilt 10th-11th century) Targmanchats (\"Holy Translator\")", "Church located in a medieval-modern cemetery on a hill to the east.", "Nearby is a cave with a working door, used as a place of refuge between the 16th and 18th centuries."], "mentions": [{"id": 0, "start": 29, "end": 48, "ref_url": "Aragatsotn_Province", "ref_ids": ["470494"], "sent_idx": 0}, {"id": 1, "start": 52, "end": 59, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18270289", "title": "Paravakar", "sentences": ["Paravakar (), is a village in the Tavush Province of Armenia."], "mentions": [{"id": 0, "start": 34, "end": 49, "ref_url": "Tavush_Province", "ref_ids": ["470516"], "sent_idx": 0}, {"id": 1, "start": 53, "end": 60, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18270293", "title": "Nor Astghaberd", "sentences": ["Nor Astghaberd (; formerly, Payahan (), also Romanized as P’ayahan and Payagan, and Bakavank) is a community and village in the Kajaran Municipality of Syunik Province, Armenia.", "The population of the village was 93 as of 2010, down from 179 at the 2001 census.", "The village of Nor Astghaberd's population was 57 at the 2011 census, up from 53 at the 2001 census."], "mentions": [{"id": 0, "start": 45, "end": 53, "ref_url": "Romanize", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 128, "end": 148, "ref_url": "Kajaran_Municipality", "ref_ids": ["56586214"], "sent_idx": 0}, {"id": 2, "start": 152, "end": 167, "ref_url": "Syunik_Province", "ref_ids": ["470515"], "sent_idx": 0}, {"id": 3, "start": 169, "end": 176, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18270296", "title": "Petrovka, Armenia", "sentences": ["Petrovka () is a village in the Lori Province of Armenia."], "mentions": [{"id": 0, "start": 32, "end": 45, "ref_url": "Lori_Province", "ref_ids": ["470506"], "sent_idx": 0}, {"id": 1, "start": 49, "end": 56, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18270297", "title": "Pirdaudan", "sentences": ["Pirdaudan (also, Pirudan and Pirdoudan) is a town in the Syunik Province of Armenia."], "mentions": [{"id": 0, "start": 57, "end": 72, "ref_url": "Syunik_Region", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 76, "end": 83, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18270299", "title": "Verin Geghavank", "sentences": ["Verin Geghavank (), formerly known as \"Pirlu\" , is an abandoned village in the Kajaran Municipality of Syunik Province, Armenia.", "The National Statistical Service of the Republic of Armenia (ARMSTAT) reported it was uninhabited at the 2001 and 2011 censuses."], "mentions": [{"id": 0, "start": 79, "end": 99, "ref_url": "Kajaran_Municipality", "ref_ids": ["56586214"], "sent_idx": 0}, {"id": 1, "start": 103, "end": 118, "ref_url": "Syunik_Province", "ref_ids": ["470515"], "sent_idx": 0}, {"id": 2, "start": 52, "end": 59, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 1}, {"id": 3, "start": 4, "end": 59, "ref_url": "National_Statistical_Service_of_the_Republic_of_Armenia", "ref_ids": null, "sent_idx": 1}]} +{"id": "18270301", "title": "Pkhrut", "sentences": ["Pkhrut or Pukhrut, is a village (rural settlement) in the Kajaran Municipality of Syunik Province, Armenia.", "It was part of to the community of Lernadzor until the administrative reforms in June 2017.", "Pkhrut was not listed in the 2011 Armenian census.", "However, following the 2017 administrative and territorial reforms, Pkhrut appeared in the records with a total population of 16."], "mentions": [{"id": 0, "start": 58, "end": 78, "ref_url": "Kajaran_Municipality", "ref_ids": ["56586214"], "sent_idx": 0}, {"id": 1, "start": 82, "end": 97, "ref_url": "Syunik_Province", "ref_ids": ["470515"], "sent_idx": 0}, {"id": 2, "start": 34, "end": 41, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 2}, {"id": 3, "start": 35, "end": 44, "ref_url": "Lernadzor", "ref_ids": ["18260625"], "sent_idx": 1}]} +{"id": "18270304", "title": "Ptghavan", "sentences": ["Ptghavan (; also, Pkhtavan) is a town in the Tavush Province of Armenia."], "mentions": [{"id": 0, "start": 45, "end": 60, "ref_url": "Tavush", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 64, "end": 71, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18270307", "title": "RF switch matrix", "sentences": ["An RF switch matrix is a system of discrete electronic components that are integrated to route radio frequency (RF) signals between multiple inputs and multiple outputs.", "Popular applications requiring RF matrices are ground systems, test equipment, and communication systems.", "An RF matrix is used in test systems, in both design verification and manufacturing test, to route high frequency signals between the device under test (DUT) and the test and measurement equipment.", "In addition to signal routing, the RF/Microwave Switch Matrix may also contain signal conditioning components including passive signal conditioning devices, such as attenuators, filters, and directional couplers, as well as active signal conditioning, such as amplification and frequency converters.", "Since the signal routing and signal conditioning needs of a test system differ from design to design, RF/Microwave Switch Matrices typically are custom designed by the test system engineer or by a hired contractor for each new test system.", "The Switch Matrix is made up of RF switches and signal conditioners that are mounted together in a mechanical infrastructure or housing.", "Cables then interconnect the switches and signal conditioners.", "The switch matrix then employs a driver circuit and power supply to power and drive the switches and signal conditioners.", "The switch matrix uses connectors or fixtures to route the signal paths from the sourcing and measurement equipment to the DUT.", "The switch matrix is typically located close to the DUT to shorten the signal paths, thus reducing insertion loss and signal degradation."], "mentions": [{"id": 0, "start": 95, "end": 110, "ref_url": "Radio_frequency", "ref_ids": ["42852"], "sent_idx": 0}, {"id": 1, "start": 134, "end": 151, "ref_url": "Device_under_test", "ref_ids": ["5102157"], "sent_idx": 2}, {"id": 2, "start": 178, "end": 185, "ref_url": "Electronic_filter", "ref_ids": ["1866533"], "sent_idx": 3}, {"id": 3, "start": 32, "end": 41, "ref_url": "RF_switch", "ref_ids": ["1956288"], "sent_idx": 5}, {"id": 4, "start": 101, "end": 119, "ref_url": "Signal_conditioner", "ref_ids": null, "sent_idx": 7}, {"id": 5, "start": 52, "end": 64, "ref_url": "Power_supply", "ref_ids": ["219042"], "sent_idx": 7}]} +{"id": "18270311", "title": "Pokr Ayrum", "sentences": ["Pokr Ayrum (), is a village in the Lori Province of Armenia.", "The toponym reflects the former presence of Ayrums in the neighborhood."], "mentions": [{"id": 0, "start": 35, "end": 48, "ref_url": "Lori_Province", "ref_ids": ["470506"], "sent_idx": 0}, {"id": 1, "start": 52, "end": 59, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}, {"id": 2, "start": 44, "end": 50, "ref_url": "Ayrums", "ref_ids": ["6886647"], "sent_idx": 1}]} +{"id": "18270317", "title": "Pokr Mantash", "sentences": ["Pokr Mantash (; Russified as Malyy Mantash; formerly, Pokr Arkhvali) is a village in the Shirak Province of Armenia."], "mentions": [{"id": 0, "start": 89, "end": 104, "ref_url": "Shirak_Province", "ref_ids": ["470511"], "sent_idx": 0}, {"id": 1, "start": 108, "end": 115, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18270319", "title": "Hiroko Tsukumo", "sentences": ["Hiroko Tsukumo (born September 11, 1970 in Hiroshima) is a retired volleyball player from Japan, who competed for the Japan women's national team in the 1990s.", "She was named \"Best Digger\" and \"Best Receiver\" at the 1998 FIVB Women's World Championship.", "Tsukumo played as a libero."], "mentions": [{"id": 0, "start": 43, "end": 52, "ref_url": "Hiroshima", "ref_ids": ["59062"], "sent_idx": 0}, {"id": 1, "start": 67, "end": 77, "ref_url": "Volleyball", "ref_ids": ["32558"], "sent_idx": 0}, {"id": 2, "start": 90, "end": 95, "ref_url": "Japan", "ref_ids": ["15573"], "sent_idx": 0}, {"id": 3, "start": 118, "end": 145, "ref_url": "Japan_women's_national_volleyball_team", "ref_ids": ["9694642"], "sent_idx": 0}, {"id": 4, "start": 55, "end": 91, "ref_url": "1998_FIVB_Women's_World_Championship", "ref_ids": null, "sent_idx": 1}, {"id": 5, "start": 20, "end": 26, "ref_url": "Libero_(volleyball)", "ref_ids": null, "sent_idx": 2}]} +{"id": "18270323", "title": "Pokr Masrik", "sentences": ["Pokr Masrik ( – meaning \"little Masrik\", also Romanized as P’ok’r Masrik and Poqr Masrik; formerly, Malaya Mazra and Pokr-Mazra) is a town in the Gegharkunik Province of Armenia.", "The town has a 12th-century church.", "Mets Masrik (big Masrik) is nearby."], "mentions": [{"id": 0, "start": 46, "end": 54, "ref_url": "Romanize", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 146, "end": 166, "ref_url": "Gegharkunik_Province", "ref_ids": ["470502"], "sent_idx": 0}, {"id": 2, "start": 170, "end": 177, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}, {"id": 3, "start": 0, "end": 11, "ref_url": "Mets_Masrik", "ref_ids": ["16051657"], "sent_idx": 2}]} +{"id": "18270325", "title": "Pokr Sariar", "sentences": ["Pokr Sariar (, also Romanized as P’ok’r Sariar; Russified as Sariar Malyy) is a village in the Shirak Province of Armenia."], "mentions": [{"id": 0, "start": 20, "end": 28, "ref_url": "Romanize", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 95, "end": 110, "ref_url": "Shirak_Province", "ref_ids": ["470511"], "sent_idx": 0}, {"id": 2, "start": 114, "end": 121, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18270326", "title": "Pokr Sepasar", "sentences": ["Pokr Sepasar (, also Romanized as P’ok’r Sepasar; formerly, Malaya Shishtapa, and Pokr Shishtapa) is a village in the Shirak Province of Armenia."], "mentions": [{"id": 0, "start": 21, "end": 29, "ref_url": "Romanize", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 118, "end": 133, "ref_url": "Shirak_Province", "ref_ids": ["470511"], "sent_idx": 0}, {"id": 2, "start": 137, "end": 144, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18270329", "title": "Pokr Shagriar", "sentences": ["Pokr Shagriar (also, Malyy Shagriar) is a town in the Armavir Province of Armenia."], "mentions": [{"id": 0, "start": 54, "end": 70, "ref_url": "Armavir_(province)", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 74, "end": 81, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18270331", "title": "Pokrashen", "sentences": ["Pokrashen (; formerly, Lernantsk, Keti Verkhniye, and Pokr Keti) is a village in Shirak Province, Armenia."], "mentions": [{"id": 0, "start": 81, "end": 96, "ref_url": "Shirak_Province", "ref_ids": ["470511"], "sent_idx": 0}, {"id": 1, "start": 98, "end": 105, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18270332", "title": "Santos-o-Velho (Lisbon)", "sentences": ["Santos-o-Velho () is a former \"freguesia\" (civil parish) of Lisbon, Portugal, with an area of 0.51 km and 4,013 inhabitants (2001).", "It has a population density of 7899.6 inhabitants/km.", "At the administrative reorganization of Lisbon on 8 December 2012 it became part of the parish Estrela.", "It is one of the best preserved historical parts of Lisbon, including Madragoa (former village in the outskirts of Central Lisbon), the Museum of Ancient Art, and many former convents and palaces (in which the current Embassy of France in included).", "Well-known streets of this parish are Rua das Trinas, Rua do Guarda-Mor, Rua do Quelhas, Rua das Praças, and Avenida da Brasilia.", "It is also known for its lively nightlife."], "mentions": [{"id": 0, "start": 31, "end": 40, "ref_url": "Freguesia", "ref_ids": ["507764"], "sent_idx": 0}, {"id": 1, "start": 52, "end": 58, "ref_url": "Lisbon", "ref_ids": ["18091"], "sent_idx": 3}, {"id": 2, "start": 95, "end": 102, "ref_url": "Estrela_(Lisbon)", "ref_ids": ["40908718"], "sent_idx": 2}, {"id": 3, "start": 54, "end": 71, "ref_url": "Rua_do_Guarda-Mor", "ref_ids": ["61690601"], "sent_idx": 4}]} +{"id": "18270335", "title": "Por, Armenia", "sentences": ["Por (; also Romanized as P’orr and Porr) is a small disused hamlet in the Vayots Dzor Province of Armenia.", "It has a 19th-century church and medieval cemetery."], "mentions": [{"id": 0, "start": 12, "end": 20, "ref_url": "Romanize", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 60, "end": 66, "ref_url": "Hamlet_(place)", "ref_ids": ["396466"], "sent_idx": 0}, {"id": 2, "start": 74, "end": 94, "ref_url": "Vayots_Dzor_Province", "ref_ids": ["470518"], "sent_idx": 0}, {"id": 3, "start": 98, "end": 105, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18270337", "title": "Limbo Race", "sentences": ["Limbo Race was an American post-punk band formed in Boston, Massachusetts in 1979 by Randy Black on guitar, John Neidhart on bass, and Peter Keaveney on drums.", "In 1981, Keaveney was replaced by drummer Mark Poulin and the band added saxophonist Mark Chenevert.", "Limbo Race featured an angular, sometimes harsh sound that some critics compared to the Gang of Four and The Cure.", "Black's lyrics described an unsettled world where communication was difficult, and drew upon dark anthropological references, images from childhood, and intimate details of his relationships with friends and lovers.", "Limbo Race developed a passionate cult following and, after winning the WBCN Rock & Roll Rumble in 1982, the band added keyboardist Catherine Coleman and toured extensively throughout the Northeastern United States until 1984 when the band finally broke up."], "mentions": [{"id": 0, "start": 27, "end": 36, "ref_url": "Post-punk", "ref_ids": ["25382326"], "sent_idx": 0}, {"id": 1, "start": 52, "end": 73, "ref_url": "Boston,_Massachusetts", "ref_ids": null, "sent_idx": 0}, {"id": 2, "start": 88, "end": 100, "ref_url": "Gang_of_Four", "ref_ids": ["50441"], "sent_idx": 2}, {"id": 3, "start": 105, "end": 113, "ref_url": "The_Cure", "ref_ids": ["57568"], "sent_idx": 2}, {"id": 4, "start": 72, "end": 95, "ref_url": "WBCN_Rock_&_Roll_Rumble", "ref_ids": null, "sent_idx": 4}]} +{"id": "18270343", "title": "SBB-CFF-FFS Ae 4/7", "sentences": ["The Ae 4/7 was a universal locomotive of the Swiss Federal Railways, employing the so-called Buchli drive.", "Thanks to this drive construction, invented by Jakob Buchli, it was one of the longest-lasting locomotives.", "It was in regular use for 7 decades, from the 1920s into the 1990s, hauling freight and passenger trains all over Switzerland."], "mentions": [{"id": 0, "start": 45, "end": 67, "ref_url": "SBB-CFF-FFS", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 93, "end": 105, "ref_url": "Buchli_drive", "ref_ids": ["10119206"], "sent_idx": 0}, {"id": 2, "start": 47, "end": 59, "ref_url": "Jakob_Buchli", "ref_ids": ["21821516"], "sent_idx": 1}]} +{"id": "18270345", "title": "Poselok Imeni Kalinina", "sentences": ["Poselok Imeni Kalinina (also, Posëlok Imeni Kalinina) is an abandoned village in the Ararat Province of Armenia."], "mentions": [{"id": 0, "start": 85, "end": 100, "ref_url": "Ararat_Province", "ref_ids": ["470497"], "sent_idx": 0}, {"id": 1, "start": 104, "end": 111, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18270353", "title": "Privolnoye, Armenia", "sentences": ["Privolnoye (), is a village in the Tashir Province of Armenia.", "Village is situated by the border with Georgia, 17 km on south-east from the center of region – Tashir.", "Privolnoye was founded in 1850.", "First residents were Russians who moved from Czarist Russia and inhabited different parts of the village.", "It is located 1585 m above the sea level.", "Winters are severely cold and summers cool.", "In dry years drought are very frequent.", "4621.8 hectares of an overall land are agricultural lands, of which 1071.6 ha of arable land, 1480.11 hectares are hayfields and 1945.7 hectares are pastures.", "The residents are mostly engaged in animal husbandry and growing grains, potato, melons, pumpkin, cabbage and other vegetables.", "The village has a secondary school, a cultural center, a medical center and a post office.", "Privolnoye hosts a church from 1895 built by Russian inhabitants."], "mentions": [{"id": 0, "start": 35, "end": 50, "ref_url": "Tashir_Province", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 54, "end": 61, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18270357", "title": "Proshyan", "sentences": ["Proshyan (), is a major village in the Kotayk Province of Armenia."], "mentions": [{"id": 0, "start": 39, "end": 54, "ref_url": "Kotayk_Province", "ref_ids": ["470504"], "sent_idx": 0}, {"id": 1, "start": 58, "end": 65, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18270359", "title": "Ptghni", "sentences": ["Ptghni (), is a village located in the Kotayk Province of Armenia along the left bank of the Hrazdan River.", "It was founded in 1831 in the vicinity of an abandoned dwelling.", "The village has a school, kindergarten, house of culture, and a library.", "The local economy is dependent on agriculture and local inhabitants primarily grow grapes, melons, gourds, and breed cattle.", "Within the village are the remains of fortress walls and Ptghavank of the 6th to 7th-century."], "mentions": [{"id": 0, "start": 39, "end": 54, "ref_url": "Kotayk_Province", "ref_ids": ["470504"], "sent_idx": 0}, {"id": 1, "start": 58, "end": 65, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}, {"id": 2, "start": 93, "end": 106, "ref_url": "Hrazdan_River", "ref_ids": ["2090936"], "sent_idx": 0}, {"id": 3, "start": 57, "end": 66, "ref_url": "Ptghavank", "ref_ids": null, "sent_idx": 4}]} +{"id": "18270360", "title": "Athenaeum at Caltech", "sentences": ["The Athenaeum is a faculty club and private social club on the California Institute of Technology campus in Pasadena, California."], "mentions": [{"id": 0, "start": 36, "end": 55, "ref_url": "Gentlemen's_club", "ref_ids": ["1140859"], "sent_idx": 0}, {"id": 1, "start": 63, "end": 97, "ref_url": "California_Institute_of_Technology", "ref_ids": ["5786"], "sent_idx": 0}, {"id": 2, "start": 108, "end": 116, "ref_url": "Pasadena,_California", "ref_ids": ["92408"], "sent_idx": 0}]} +{"id": "18270362", "title": "Pushkino, Armenia", "sentences": ["Pushkino (; formerly, Gerger Russkiy), is a village in the Lori Province of Armenia."], "mentions": [{"id": 0, "start": 59, "end": 72, "ref_url": "Lori_Province", "ref_ids": ["470506"], "sent_idx": 0}, {"id": 1, "start": 76, "end": 83, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18270366", "title": "Ragimabad", "sentences": ["Ragimabad is a town in the Ararat Province of Armenia."], "mentions": [{"id": 0, "start": 27, "end": 42, "ref_url": "Ararat_(province)", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 46, "end": 53, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18270367", "title": "Ranchpar", "sentences": ["Ranchpar (, also Romanized as Rranch’par and Rranchpar) is a town in the Ararat Province of Armenia."], "mentions": [{"id": 0, "start": 17, "end": 25, "ref_url": "Romanize", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 73, "end": 88, "ref_url": "Ararat_(province)", "ref_ids": null, "sent_idx": 0}, {"id": 2, "start": 92, "end": 99, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18270369", "title": "Rind, Armenia", "sentences": ["Rind (, also Romanized as Rrind) is a town in the Vayots Dzor Province of Armenia."], "mentions": [{"id": 0, "start": 13, "end": 21, "ref_url": "Romanize", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 50, "end": 70, "ref_url": "Vayots_Dzor", "ref_ids": null, "sent_idx": 0}, {"id": 2, "start": 74, "end": 81, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18270372", "title": "Rya Taza", "sentences": ["Rya Taza () or \"Ria Taza\" (meaning \"Fresh way\" in Kurdish), is a village in the Aragatsotn Province of Armenia.", "It was formerly known as \"Kondakhsaz\".", "Most residents of Rya Taza are Yazidis.", "The village is home to a ruined Armenian church built between the 10th and 13th centuries.", "It also contains an old cemetery with animal-shaped tombstones."], "mentions": [{"id": 0, "start": 50, "end": 57, "ref_url": "Kurdish_language", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 80, "end": 99, "ref_url": "Aragatsotn_Province", "ref_ids": ["470494"], "sent_idx": 0}, {"id": 2, "start": 32, "end": 39, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 3}, {"id": 3, "start": 31, "end": 38, "ref_url": "Yazidis_in_Armenia", "ref_ids": ["6878423"], "sent_idx": 2}]} +{"id": "18270374", "title": "Sabunchi", "sentences": ["Sabunchi is a town in the Ararat Province of Armenia."], "mentions": [{"id": 0, "start": 26, "end": 41, "ref_url": "Ararat_(province)", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 45, "end": 52, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18270377", "title": "Saghmosavan", "sentences": ["Saghmosavan (, also Romanized as Sagmosavan) is a town in the Aragatsotn Province of Armenia.", "The town is the site of the Saghmosavank Monastery (the \"Monastery of Psalms\") with Saint Sion church, built in 1215."], "mentions": [{"id": 0, "start": 20, "end": 28, "ref_url": "Romanize", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 62, "end": 81, "ref_url": "Aragatsotn", "ref_ids": null, "sent_idx": 0}, {"id": 2, "start": 85, "end": 92, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}, {"id": 3, "start": 28, "end": 50, "ref_url": "Saghmosavank_Monastery", "ref_ids": null, "sent_idx": 1}]} +{"id": "18270379", "title": "Salli, Armenia", "sentences": ["Salli () is a town in the Vayots Dzor Province of Armenia."], "mentions": [{"id": 0, "start": 26, "end": 46, "ref_url": "Vayots_Dzor", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 50, "end": 57, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18270382", "title": "Salut", "sentences": ["Salut () is a village in the Shirak Province of Armenia."], "mentions": [{"id": 0, "start": 29, "end": 44, "ref_url": "Shirak_Province", "ref_ids": ["470511"], "sent_idx": 0}, {"id": 1, "start": 48, "end": 55, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18270386", "title": "Salvard", "sentences": ["Salvard (; formerly, Alilu) is a village and rural community (municipality) in the Syunik Province of Armenia.", "The National Statistical Service of the Republic of Armenia (ARMSTAT) reported its population was 396 in 2010, down from 418 at the 2001 census."], "mentions": [{"id": 0, "start": 45, "end": 60, "ref_url": "Community_(Armenia)", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 83, "end": 98, "ref_url": "Syunik_Region", "ref_ids": null, "sent_idx": 0}, {"id": 2, "start": 52, "end": 59, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 1}, {"id": 3, "start": 4, "end": 59, "ref_url": "National_Statistical_Service_of_the_Republic_of_Armenia", "ref_ids": null, "sent_idx": 1}]} +{"id": "18270389", "title": "Idiomag", "sentences": ["idiomag is an early product created by idio, a UK-based technology company founded in 2006 by two Warwick Business School graduates, Andrew Davies and Ed Barrow.", "The idio platform allows publishers and brands to personalise their content and distribute it across multiple platforms, which is intended to improve audience engagement and revenue.", "Using this platform, a consumer site is run at idiomag.com, which delivers digital music content to its users, in the form of a daily personalised music magazine.", "This magazine is viewed through the user’s browser, using the Adobe Flash Player, and can contain the full spectrum of text, images, and video chosen for each reader based on their tastes.", "The user can then find concert dates, download music, and buy tickets.", "The site also provides a widget, allowing the user’s magazine to be delivered on popular social networks such as Facebook, MySpace, and Bebo, or on individual blogs.", "The site aggregates content from a range of sources, both from the mainstream music press and the blogosphere, like a “glossy RSS feed”.", "Content is selected based on the user’s listening history, either manually entered or drawn from music sites such as Last.fm, Pandora, and iLike.", "In 2009, the site released their API, making idiomag's content-delivery model available to web application programmers."], "mentions": [{"id": 0, "start": 45, "end": 49, "ref_url": "Idio", "ref_ids": ["31099450"], "sent_idx": 8}, {"id": 1, "start": 98, "end": 121, "ref_url": "Warwick_Business_School", "ref_ids": ["1747773"], "sent_idx": 0}, {"id": 2, "start": 11, "end": 19, "ref_url": "Computing_platform", "ref_ids": ["81196"], "sent_idx": 2}, {"id": 3, "start": 75, "end": 88, "ref_url": "Digital_music", "ref_ids": null, "sent_idx": 2}, {"id": 4, "start": 62, "end": 80, "ref_url": "Adobe_Flash_Player", "ref_ids": ["1713552"], "sent_idx": 3}, {"id": 5, "start": 25, "end": 31, "ref_url": "Web_widget", "ref_ids": ["6725861"], "sent_idx": 5}, {"id": 6, "start": 113, "end": 121, "ref_url": "Facebook", "ref_ids": ["7529378"], "sent_idx": 5}, {"id": 7, "start": 123, "end": 130, "ref_url": "MySpace", "ref_ids": null, "sent_idx": 5}, {"id": 8, "start": 136, "end": 140, "ref_url": "Bebo", "ref_ids": ["18354521"], "sent_idx": 5}, {"id": 9, "start": 98, "end": 102, "ref_url": "Blog", "ref_ids": ["33645"], "sent_idx": 6}, {"id": 10, "start": 98, "end": 109, "ref_url": "Blogosphere", "ref_ids": ["346213"], "sent_idx": 6}, {"id": 11, "start": 117, "end": 124, "ref_url": "Last.fm", "ref_ids": ["612330"], "sent_idx": 7}, {"id": 12, "start": 126, "end": 133, "ref_url": "Pandora_(music_service)", "ref_ids": null, "sent_idx": 7}, {"id": 13, "start": 139, "end": 144, "ref_url": "ILike", "ref_ids": ["11521740"], "sent_idx": 7}, {"id": 14, "start": 33, "end": 36, "ref_url": "API", "ref_ids": null, "sent_idx": 8}, {"id": 15, "start": 91, "end": 106, "ref_url": "Web_application", "ref_ids": ["288311"], "sent_idx": 8}]} +{"id": "18270391", "title": "Sanain", "sentences": ["Sanain is a town in the Lori Province of Armenia."], "mentions": [{"id": 0, "start": 24, "end": 37, "ref_url": "Lori_Province", "ref_ids": ["470506"], "sent_idx": 0}, {"id": 1, "start": 41, "end": 48, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18270392", "title": "Kaniashir", "sentences": ["Kaniashir (), formerly known as \"Kuchuk Jangi\" and \"Sangyar\", is a village in the Aragatsotn Province of Armenia.", "The town is mostly populated by Yezidis."], "mentions": [{"id": 0, "start": 82, "end": 101, "ref_url": "Aragatsotn_Province", "ref_ids": ["470494"], "sent_idx": 0}, {"id": 1, "start": 105, "end": 112, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}, {"id": 2, "start": 32, "end": 38, "ref_url": "Yezidi", "ref_ids": null, "sent_idx": 1}]} +{"id": "18270393", "title": "Saragyugh", "sentences": ["Saragyugh (, also Romanized as Saragyukh; formerly, Darakoy, Darakey, and Daragyukh) is a village in the Shirak Province of Armenia."], "mentions": [{"id": 0, "start": 18, "end": 26, "ref_url": "Romanize", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 105, "end": 120, "ref_url": "Shirak_Province", "ref_ids": ["470511"], "sent_idx": 0}, {"id": 2, "start": 124, "end": 131, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18270394", "title": "Sarahart", "sentences": ["Sarahart (, also romanized as Sarahart’ and Saraart; formerly, Gyullidzha) is a town in the Lori Province of Armenia."], "mentions": [{"id": 0, "start": 17, "end": 26, "ref_url": "Romanization", "ref_ids": ["173533"], "sent_idx": 0}, {"id": 1, "start": 92, "end": 105, "ref_url": "Lori_Province", "ref_ids": ["470506"], "sent_idx": 0}, {"id": 2, "start": 109, "end": 116, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18270397", "title": "Sarakap", "sentences": ["Sarakap (; formerly, Bozdogan) is a village in the Shirak Province of Armenia."], "mentions": [{"id": 0, "start": 51, "end": 66, "ref_url": "Shirak_Province", "ref_ids": ["470511"], "sent_idx": 0}, {"id": 1, "start": 70, "end": 77, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18270404", "title": "Saralanj, Shirak", "sentences": ["Saralanj (, also Romanized as Saralandzh and Saralandj; formerly, Bashgyukh) is a village in the Shirak Province of Armenia."], "mentions": [{"id": 0, "start": 17, "end": 25, "ref_url": "Romanize", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 97, "end": 112, "ref_url": "Shirak_Province", "ref_ids": ["470511"], "sent_idx": 0}, {"id": 2, "start": 116, "end": 123, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18270407", "title": "A. americanus", "sentences": ["A. americanus may refer to:", "An abbreviation of a species name.", "In binomial nomenclature the name of a species is always the name of the genus to which the species belongs, followed by the species name (also called the species epithet).", "In \"A. americanus\" the genus name has been abbreviated to \"A.\" and the species has been spelled out in full.", "In a document that uses this abbreviation it should always be clear from the context which genus name has been abbreviated.", "Some of the most common uses of \"A. americanus\" are:"], "mentions": [{"id": 0, "start": 71, "end": 78, "ref_url": "Species", "ref_ids": ["21780446"], "sent_idx": 3}, {"id": 1, "start": 3, "end": 24, "ref_url": "Binomial_nomenclature", "ref_ids": ["39736"], "sent_idx": 2}, {"id": 2, "start": 91, "end": 96, "ref_url": "Genus", "ref_ids": ["38493"], "sent_idx": 4}, {"id": 3, "start": 71, "end": 78, "ref_url": "Species", "ref_ids": ["21780446"], "sent_idx": 3}]} +{"id": "18270408", "title": "Saralanj, Aragatsotn", "sentences": ["Saralanj (, also Romanized as Saralandzh and Saralandj; formerly, Gadzhi Bagr and Gadzhibagir) is a town in the Aragatsotn Province of Armenia."], "mentions": [{"id": 0, "start": 17, "end": 25, "ref_url": "Romanize", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 112, "end": 131, "ref_url": "Aragatsotn", "ref_ids": null, "sent_idx": 0}, {"id": 2, "start": 135, "end": 142, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18270410", "title": "Saralanj, Kotayk", "sentences": ["Saralanj (, also Romanized as Saralandzh and Saralandj; formerly, Tulnabi) is a village in the Kotayk Province of Armenia.", "The mayor of Saralanj is Gegham Zilifyan."], "mentions": [{"id": 0, "start": 17, "end": 25, "ref_url": "Romanize", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 95, "end": 110, "ref_url": "Kotayk_Province", "ref_ids": ["470504"], "sent_idx": 0}, {"id": 2, "start": 114, "end": 121, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18270413", "title": "Saralanj, Lori", "sentences": ["Saralanj (, also Romanized as Saralandzh and Saralandj; formerly, Gey Yekhush) is a town in the Lori Province of Armenia."], "mentions": [{"id": 0, "start": 17, "end": 25, "ref_url": "Romanize", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 96, "end": 109, "ref_url": "Lori_Province", "ref_ids": ["470506"], "sent_idx": 0}, {"id": 2, "start": 113, "end": 120, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18270415", "title": "Saramej", "sentences": ["Saramej (, also romanized as Saramech; formerly, Chotur) is a town in the Lori Province of Armenia."], "mentions": [{"id": 0, "start": 16, "end": 25, "ref_url": "Romanization", "ref_ids": ["173533"], "sent_idx": 0}, {"id": 1, "start": 74, "end": 87, "ref_url": "Lori_Province", "ref_ids": ["470506"], "sent_idx": 0}, {"id": 2, "start": 91, "end": 98, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18270420", "title": "Saranist", "sentences": ["Saranist (also, Saramist and Tutiya) is a former village in the Kotayk Province of Armenia."], "mentions": [{"id": 0, "start": 64, "end": 79, "ref_url": "Kotayk_Province", "ref_ids": ["470504"], "sent_idx": 0}, {"id": 1, "start": 83, "end": 90, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18270423", "title": "Sarapat", "sentences": ["Sarapat (; formerly, Samurli and Samrly) is a village in the Shirak Province of Armenia."], "mentions": [{"id": 0, "start": 61, "end": 76, "ref_url": "Shirak_Province", "ref_ids": ["470511"], "sent_idx": 0}, {"id": 1, "start": 80, "end": 87, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18270426", "title": "1922 Grand Prix season", "sentences": ["The 1922 Grand Prix season saw the French Grand Prix being held in Strasbourg.", "The Italian Grand Prix moved to its spiritual home at Monza."], "mentions": [{"id": 0, "start": 35, "end": 52, "ref_url": "French_Grand_Prix", "ref_ids": ["347436"], "sent_idx": 0}, {"id": 1, "start": 67, "end": 77, "ref_url": "Strasbourg", "ref_ids": ["37407"], "sent_idx": 0}, {"id": 2, "start": 4, "end": 22, "ref_url": "Italian_Grand_Prix", "ref_ids": ["511451"], "sent_idx": 1}, {"id": 3, "start": 54, "end": 59, "ref_url": "Autodromo_Nazionale_Monza", "ref_ids": null, "sent_idx": 1}]} +{"id": "18270430", "title": "Saratak", "sentences": ["Saratak (; formerly, Imrkhan) is a village in the Shirak Province of Armenia."], "mentions": [{"id": 0, "start": 50, "end": 65, "ref_url": "Shirak_Province", "ref_ids": ["470511"], "sent_idx": 0}, {"id": 1, "start": 69, "end": 76, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18270432", "title": "Saratovka, Armenia", "sentences": ["Saratovka () is a town in the Lori Province of Armenia."], "mentions": [{"id": 0, "start": 30, "end": 43, "ref_url": "Lori_Province", "ref_ids": ["470506"], "sent_idx": 0}, {"id": 1, "start": 47, "end": 54, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18270433", "title": "Saravan, Armenia", "sentences": ["Saravan (; formerly Terp, Darb is a small village in the Vayots Dzor Province of Armenia.", "The Saravan community includes the nearby town of Ughedzor.", "In the village is a 17th-century church and some medieval gravestones."], "mentions": [{"id": 0, "start": 57, "end": 77, "ref_url": "Vayots_Dzor_Province", "ref_ids": ["470518"], "sent_idx": 0}, {"id": 1, "start": 81, "end": 88, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}, {"id": 2, "start": 50, "end": 58, "ref_url": "Ughedzor", "ref_ids": ["18271173"], "sent_idx": 1}]} +{"id": "18270437", "title": "Sarchapet", "sentences": ["Sarchapet (), is a village in the Lori Province of Armenia, near the border with Georgia.", "Sarchapet Population 2100.", "Territory- 23.9 km", "The community was established in the 1820s and 1830s.", "Inhabitants mainly are migrants from Mush and Alashkert, Western Armenia, who migrated during Russian−Persian war in 1826-28 and moved to Eastern Armenia.", "Such a group came near the mountain Lokh, as it is told, to pass the woods.", "It is told that the wheel of the coach was broken and the family settled at the foot of the second queen Lokh (after Lalvar), and in the northern part it is bordered by Georgia.", "It is worth of importance the name of the village.", "There is no explanation for it.", "Near the village benefition is being built a church.", "There is church which is closed (1913).", "The relief is mountainous, erosion is seen here; summers are mild, winters- cold.", "Winter lasts for 202 days, summers- 163.", "Average temperature is 3.6oC, and the highest-+28 o C -30 o C. Annual precipitations are from 700 to 720 mm.", "The lands are on 30-40ï‚° inclination.", "Fruitfulness is low because of bad geographical position and unfavorable weather.", "Height from sea level is 1705m.", "Distance from the regional centre is 70 km, from the previous district centre- 15 km.", "The community is mainly specialized in growing of potato, wheat and barely.", "This production depends on the caprices of the nature.", "It is still possible to gather some harvest in rainy years, but in drought years villagers lose 75% of the harvest.", "Those who are occupied with cattle-breeding, produce milk and meat.", "The village has a secondary school.", "The community had a church built in 1913, which is semi-destroyed now.", "The new church of the community is semi-built, the beneficiary of which is a businessman from Sarchapet, who lives in Russian Federation.", "There is a chapel on the northern part of the village on the mountain Lokh, where is celebrated the holiday of Hambardzum."], "mentions": [{"id": 0, "start": 34, "end": 47, "ref_url": "Lori_Province", "ref_ids": ["470506"], "sent_idx": 0}, {"id": 1, "start": 65, "end": 72, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 4}, {"id": 2, "start": 169, "end": 176, "ref_url": "Georgia_(country)", "ref_ids": ["48768"], "sent_idx": 6}]} +{"id": "18270439", "title": "Sarigyugh", "sentences": ["Sarigyugh (, also Romanized as Sarigyukh and Sarigjugh; formerly, Srygekh and Srigekh) is a town in the Tavush Province of Armenia."], "mentions": [{"id": 0, "start": 18, "end": 26, "ref_url": "Romanize", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 104, "end": 119, "ref_url": "Tavush", "ref_ids": null, "sent_idx": 0}, {"id": 2, "start": 123, "end": 130, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18270444", "title": "Saritagh", "sentences": ["Saritagh (), is a neighbourhood in the Erebuni District of Yerevan, Armenia."], "mentions": [{"id": 0, "start": 39, "end": 55, "ref_url": "Erebuni_District", "ref_ids": ["18256482"], "sent_idx": 0}, {"id": 1, "start": 59, "end": 66, "ref_url": "Yerevan", "ref_ids": ["34352"], "sent_idx": 0}, {"id": 2, "start": 68, "end": 75, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18270453", "title": "Sarnakhpyur", "sentences": ["Sarnakhpyur (also, Sarnakhbyur) is a town in the Gegharkunik Province of Armenia."], "mentions": [{"id": 0, "start": 49, "end": 69, "ref_url": "Gegharkunik_Province", "ref_ids": ["470502"], "sent_idx": 0}, {"id": 1, "start": 73, "end": 80, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18270457", "title": "Sarnaghbyur", "sentences": ["Sarnaghbyur (, formerly, Sogyutlu) is a village in the Shirak Province of Armenia."], "mentions": [{"id": 0, "start": 55, "end": 70, "ref_url": "Shirak_Province", "ref_ids": ["470511"], "sent_idx": 0}, {"id": 1, "start": 74, "end": 81, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18270459", "title": "Sarnakunk", "sentences": ["Sarnakunk (, also Romanized as Sarrnakunk’, Sarnakunq, Sarrnakunk; formerly, Saybalu) is a village and rural community (municipality) in the Syunik Province of Armenia.", "The National Statistical Service of the Republic of Armenia (ARMSTAT) reported its population was 519 in 2010, up from 514 at the 2001 census."], "mentions": [{"id": 0, "start": 18, "end": 26, "ref_url": "Romanize", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 103, "end": 118, "ref_url": "Community_(Armenia)", "ref_ids": null, "sent_idx": 0}, {"id": 2, "start": 141, "end": 156, "ref_url": "Syunik_Region", "ref_ids": null, "sent_idx": 0}, {"id": 3, "start": 52, "end": 59, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 1}, {"id": 4, "start": 4, "end": 59, "ref_url": "National_Statistical_Service_of_the_Republic_of_Armenia", "ref_ids": null, "sent_idx": 1}]} +{"id": "18270470", "title": "Sayat-Nova, Armenia", "sentences": ["Sayat-Nova (, also Romanized as Sayat’-Nova) is a town in the Ararat Province of Armenia.", "The town is named after the poet Harutyun Sayatyan, whose nickname was Sayat-Nova."], "mentions": [{"id": 0, "start": 19, "end": 27, "ref_url": "Romanize", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 62, "end": 77, "ref_url": "Ararat_(province)", "ref_ids": null, "sent_idx": 0}, {"id": 2, "start": 81, "end": 88, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}, {"id": 3, "start": 71, "end": 81, "ref_url": "Sayat-Nova", "ref_ids": ["658701"], "sent_idx": 1}]} +{"id": "18270475", "title": "Semyonovka, Armenia", "sentences": ["Semyonovka (), is a village in the Gegharkunik Province of Armenia.", "It was founded in 1845 by Spiritual Christian settlers, (sectarian \"Pryguny\" and \"Subbotniki\"), who escaped oppression in Russia."], "mentions": [{"id": 0, "start": 35, "end": 55, "ref_url": "Gegharkunik_Province", "ref_ids": ["470502"], "sent_idx": 0}, {"id": 1, "start": 59, "end": 66, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}, {"id": 2, "start": 26, "end": 45, "ref_url": "Spiritual_Christian", "ref_ids": null, "sent_idx": 1}, {"id": 3, "start": 122, "end": 128, "ref_url": "Russia", "ref_ids": ["25391"], "sent_idx": 1}]} +{"id": "18270476", "title": "Sers, Armenia", "sentences": ["Sers (; also Ses), is a village in the Vayots Dzor Province of Armenia."], "mentions": [{"id": 0, "start": 39, "end": 59, "ref_url": "Vayots_Dzor_Province", "ref_ids": ["470518"], "sent_idx": 0}, {"id": 1, "start": 63, "end": 70, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18270481", "title": "Sevaqar", "sentences": ["Sevaqar (, also Romanized as Sevakar and Sevak’ar) is a village and rural community (municipality) in the Syunik Province of Armenia.", "The National Statistical Service of the Republic of Armenia (ARMSTAT) reported its population was 102 in 2010, down from 122 at the 2001 census."], "mentions": [{"id": 0, "start": 16, "end": 24, "ref_url": "Romanize", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 68, "end": 83, "ref_url": "Community_(Armenia)", "ref_ids": null, "sent_idx": 0}, {"id": 2, "start": 106, "end": 121, "ref_url": "Syunik_Region", "ref_ids": null, "sent_idx": 0}, {"id": 3, "start": 52, "end": 59, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 1}, {"id": 4, "start": 4, "end": 59, "ref_url": "National_Statistical_Service_of_the_Republic_of_Armenia", "ref_ids": null, "sent_idx": 1}]} +{"id": "18270482", "title": "Sevaberd", "sentences": ["Sevaberd (; formerly, Karakala) is a village in the Kotayk Province of Armenia."], "mentions": [{"id": 0, "start": 52, "end": 67, "ref_url": "Kotayk_Province", "ref_ids": ["470504"], "sent_idx": 0}, {"id": 1, "start": 71, "end": 78, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18270486", "title": "Sevkar", "sentences": ["Sevkar (, also Romanized as Sevk’ar) is a village in the Tavush Province of Armenia and the birthplace of the Armenian revolutionary leader Sevkaretsi Sako.", "The meaning of the name in Armenian is \"Black Stone.\""], "mentions": [{"id": 0, "start": 15, "end": 23, "ref_url": "Romanize", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 57, "end": 72, "ref_url": "Tavush_Province", "ref_ids": ["470516"], "sent_idx": 0}, {"id": 2, "start": 27, "end": 34, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 1}, {"id": 3, "start": 140, "end": 155, "ref_url": "Sevkaretsi_Sako", "ref_ids": ["38390847"], "sent_idx": 0}]} +{"id": "18270488", "title": "Shaghap", "sentences": ["Shaghap (, also Romanized as Shagap’, Shagap, and Shagab; formerly, Shagaplu) is a town in the Ararat Province of Armenia."], "mentions": [{"id": 0, "start": 16, "end": 24, "ref_url": "Romanize", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 95, "end": 110, "ref_url": "Ararat_(province)", "ref_ids": null, "sent_idx": 0}, {"id": 2, "start": 114, "end": 121, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18270490", "title": "Shaghat", "sentences": ["Shaghat (, also Romanized as Shagat) is a village and rural community (municipality) in the Syunik Province of Armenia.", "The National Statistical Service of the Republic of Armenia (ARMSTAT) reported its population was 1,219 in 2010, up from 1,049 at the 2001 census."], "mentions": [{"id": 0, "start": 16, "end": 24, "ref_url": "Romanize", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 54, "end": 69, "ref_url": "Community_(Armenia)", "ref_ids": null, "sent_idx": 0}, {"id": 2, "start": 92, "end": 107, "ref_url": "Syunik_Region", "ref_ids": null, "sent_idx": 0}, {"id": 3, "start": 52, "end": 59, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 1}, {"id": 4, "start": 4, "end": 59, "ref_url": "National_Statistical_Service_of_the_Republic_of_Armenia", "ref_ids": null, "sent_idx": 1}]} +{"id": "18270493", "title": "Shaghik", "sentences": ["Shaghik (; formerly, Karbulag, Karabulag, and Gharabulagh, ) is a village in the Shirak Province of Armenia."], "mentions": [{"id": 0, "start": 81, "end": 96, "ref_url": "Shirak_Province", "ref_ids": ["470511"], "sent_idx": 0}, {"id": 1, "start": 100, "end": 107, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18270503", "title": "Tillinghast Licht", "sentences": ["Tillinghast Licht LLP was a Providence, Rhode Island based law firm, from 1818 to 2008.", "Established in 1818 by Charles Foster Tillinghast, Sr., a scion of one of the oldest families in Rhode Island, it was one of the oldest law firms in Rhode Island.", "In 1816, Tillinghast was admitted to the Rhode Island bar and opened his first office, in the village of Chepachet.", "The following year, he returned to Providence and partnered with Samuel W. Bridgham, under whom he had studied at Brown University.", "Six years later, Tillinghast opened his own office.", "In 1842, Tillinghast partnered with Charles S. Bradley, who would later become Rhode Island’s chief justice.", "In 1843, Tillinghast represented Providence for a single term in the Rhode Island General Assembly.", "Tillinghast & Bradley was well known in Rhode Island until the firm dissolved in 1858.", "At that time, Tillinghast and his son James, who had joined the firm in 1851, continued as Tillinghast & Tillinghast.", "That name would continue after Charles died in 1864 and James continued to practice with his sons, William Richmond and Theodore Foster Tillinghast.", "In the early 20th century, with William Tillinghast still a member of the firm, Tillinghast & Tillinghast merged with another firm to form Tillinghast & Collins.", "It was the first of several mergers in the last century, all that saw the Tillinghast name remain preeminent.", "Between 1913 and 1916, later famous civil liberties advocate Zechariah Chafee practiced for Tillinghast & Collins.", "In the 1970s, Tillinghast, Collins & Tanner joined with Graham, Reid, Ewing & Stapleton.", "In the 1990s, Tillinghast Collins & Graham merged with Licht & Semonoff to form Tillinghast Licht & Semonoff.", "The name was later shortened to Tillinghast Licht.", "In 2000, it merged with the Boston firm of Perkins Smith & Cohen and called its Rhode Island office Tillinghast Licht Perkins Smith & Cohen, but the firms split only a few years later, and Tillinghast Licht returned to its former name.", "In May 2008, Tillinghast Licht announced that it will wind down business in the next few months, with six key lawyers joining Adler Pollock & Sheehan."], "mentions": [{"id": 0, "start": 18, "end": 21, "ref_url": "Limited_liability_partnership", "ref_ids": ["662415"], "sent_idx": 0}, {"id": 1, "start": 28, "end": 52, "ref_url": "Providence,_Rhode_Island", "ref_ids": ["19356538"], "sent_idx": 0}, {"id": 2, "start": 136, "end": 144, "ref_url": "Law_firm", "ref_ids": ["894300"], "sent_idx": 1}, {"id": 3, "start": 23, "end": 54, "ref_url": "Charles_Foster_Tillinghast,_Sr.", "ref_ids": null, "sent_idx": 1}, {"id": 4, "start": 80, "end": 92, "ref_url": "Rhode_Island", "ref_ids": ["25410"], "sent_idx": 16}, {"id": 5, "start": 41, "end": 57, "ref_url": "Rhode_Island_bar", "ref_ids": null, "sent_idx": 2}, {"id": 6, "start": 105, "end": 114, "ref_url": "Chepachet,_Rhode_Island", "ref_ids": ["30874358"], "sent_idx": 2}, {"id": 7, "start": 65, "end": 83, "ref_url": "Samuel_W._Bridgham", "ref_ids": ["35610199"], "sent_idx": 3}, {"id": 8, "start": 114, "end": 130, "ref_url": "Brown_University", "ref_ids": ["4157"], "sent_idx": 3}, {"id": 9, "start": 36, "end": 54, "ref_url": "Charles_S._Bradley", "ref_ids": ["18273373"], "sent_idx": 5}, {"id": 10, "start": 69, "end": 98, "ref_url": "Rhode_Island_General_Assembly", "ref_ids": ["426266"], "sent_idx": 6}, {"id": 11, "start": 36, "end": 51, "ref_url": "Civil_liberties", "ref_ids": ["37476"], "sent_idx": 12}, {"id": 12, "start": 61, "end": 77, "ref_url": "Zechariah_Chafee", "ref_ids": ["7610225"], "sent_idx": 12}]} +{"id": "18270504", "title": "Shahumyan, Lori", "sentences": ["Shahumyan (, also Romanized as Shaumyan and Shahumian; formerly, Imeni Shaumyana, Shahumyani Anvan Avazan) is a town in the Lori Province of Armenia.", "The town was named after Stepan Shahumyan, a Bolshevik commissar."], "mentions": [{"id": 0, "start": 18, "end": 26, "ref_url": "Romanize", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 124, "end": 137, "ref_url": "Lori_Province", "ref_ids": ["470506"], "sent_idx": 0}, {"id": 2, "start": 141, "end": 148, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}, {"id": 3, "start": 25, "end": 41, "ref_url": "Stepan_Shahumyan", "ref_ids": null, "sent_idx": 1}]} +{"id": "18270508", "title": "The Sixth Extinction (The X-Files)", "sentences": ["\"The Sixth Extinction\" is the first episode of the seventh season of the science fiction television series \"The X-Files\".", "It was first shown on the Fox network on November 7, 1999, in the United States.", "The episode was written by Chris Carter and directed by Kim Manners.", "\"The Sixth Extinction\" earned a Nielsen household rating of 10.6, being watched by 17.82 million people in its initial broadcast.", "The episode received mixed to positive reviews from critics.", "The show centers on FBI special agents Fox Mulder (David Duchovny) and Dana Scully (Gillian Anderson) who work on cases linked to the paranormal, called X-Files.", "Mulder is a believer in the paranormal, while the skeptical Scully has been assigned to debunk his work.", "In the episode, Assistant Director Walter Skinner (Mitch Pileggi) and Michael Kritschgau (John Finn) work desperately in an attempt to discover what is wrong with Mulder, whose abnormal brain activity has rendered him imprisoned in his own head, but they are unaware of Agent Diana Fowley’s (Mimi Rogers) duplicity.", "In the meanwhile, Scully is hunting for an ancient artifact in Africa.", "\"The Sixth Extinction\" helped to explore new aspects of the series' overarching mythology and was the second episode in a trilogy of episodes featuring Mulder's severe reaction to the appearance of an alien artifact.", "The episode was written due to series creator Chris Carter's fascination with the possibility that extraterrestrials were involved in the great extinctions that had happened millions of years ago."], "mentions": [{"id": 0, "start": 51, "end": 65, "ref_url": "The_X-Files_(season_7)", "ref_ids": ["940067"], "sent_idx": 0}, {"id": 1, "start": 73, "end": 88, "ref_url": "Science_fiction", "ref_ids": ["26787"], "sent_idx": 0}, {"id": 2, "start": 89, "end": 106, "ref_url": "Television_series", "ref_ids": null, "sent_idx": 0}, {"id": 3, "start": 108, "end": 119, "ref_url": "The_X-Files", "ref_ids": ["30304"], "sent_idx": 0}, {"id": 4, "start": 26, "end": 37, "ref_url": "Fox_Broadcasting_Company", "ref_ids": ["46252"], "sent_idx": 1}, {"id": 5, "start": 46, "end": 58, "ref_url": "Chris_Carter_(screenwriter)", "ref_ids": ["224481"], "sent_idx": 10}, {"id": 6, "start": 56, "end": 67, "ref_url": "Kim_Manners", "ref_ids": ["10228635"], "sent_idx": 2}, {"id": 7, "start": 20, "end": 23, "ref_url": "Federal_Bureau_of_Investigation", "ref_ids": ["11127"], "sent_idx": 5}, {"id": 8, "start": 39, "end": 49, "ref_url": "Fox_Mulder", "ref_ids": ["261343"], "sent_idx": 5}, {"id": 9, "start": 51, "end": 65, "ref_url": "David_Duchovny", "ref_ids": ["21189337"], "sent_idx": 5}, {"id": 10, "start": 71, "end": 82, "ref_url": "Dana_Scully", "ref_ids": ["389780"], "sent_idx": 5}, {"id": 11, "start": 84, "end": 100, "ref_url": "Gillian_Anderson", "ref_ids": ["42238"], "sent_idx": 5}, {"id": 12, "start": 153, "end": 159, "ref_url": "X-File", "ref_ids": null, "sent_idx": 5}, {"id": 13, "start": 35, "end": 49, "ref_url": "Walter_Skinner", "ref_ids": ["938316"], "sent_idx": 7}, {"id": 14, "start": 51, "end": 64, "ref_url": "Mitch_Pileggi", "ref_ids": ["1078887"], "sent_idx": 7}, {"id": 15, "start": 70, "end": 88, "ref_url": "List_of_minor_The_X-Files_characters", "ref_ids": null, "sent_idx": 7}, {"id": 16, "start": 90, "end": 99, "ref_url": "John_Finn", "ref_ids": ["1565921"], "sent_idx": 7}, {"id": 17, "start": 276, "end": 288, "ref_url": "Diana_Fowley", "ref_ids": null, "sent_idx": 7}, {"id": 18, "start": 292, "end": 303, "ref_url": "Mimi_Rogers", "ref_ids": ["374184"], "sent_idx": 7}, {"id": 19, "start": 80, "end": 89, "ref_url": "Mythology_of_The_X-Files", "ref_ids": ["23749226"], "sent_idx": 9}, {"id": 20, "start": 46, "end": 58, "ref_url": "Chris_Carter_(screenwriter)", "ref_ids": ["224481"], "sent_idx": 10}, {"id": 21, "start": 99, "end": 116, "ref_url": "Colonist_(The_X-Files)", "ref_ids": ["26846650"], "sent_idx": 10}]} +{"id": "18270510", "title": "Shahumyan, Ararat", "sentences": ["Shahumyan (, formerly Yuva), is a village in the Ararat Province of Armenia.", "It was named after Stepan Shahumyan, a Bolshevik commissar."], "mentions": [{"id": 0, "start": 49, "end": 64, "ref_url": "Ararat_Province", "ref_ids": ["470497"], "sent_idx": 0}, {"id": 1, "start": 68, "end": 75, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}, {"id": 2, "start": 19, "end": 35, "ref_url": "Stepan_Shahumyan", "ref_ids": null, "sent_idx": 1}]} +{"id": "18270512", "title": "Saunders Davies", "sentences": ["Francis James Saunders Davies (30 December 1937 – 30 March 2018) was the Anglican Bishop of Bangor from 2000 until 2004.", "Davies was educated at the University College of North Wales and Selwyn College, Cambridge.", "Ordained in 1964, he began his ministry as a curate at Holyhead before being appointed a minor canon of Bangor Cathedral.", "From 1969 to 1975 he was Rector at Llanllyfni then Vicar of Gorseinon and the rural dean from 1983.", "He became the Archdeacon of Meirionnydd in 1993 before his ordination to the episcopate.", "He died on 30 March 2018."], "mentions": [{"id": 0, "start": 73, "end": 81, "ref_url": "Anglican", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 82, "end": 98, "ref_url": "Bishop_of_Bangor", "ref_ids": ["3254704"], "sent_idx": 0}, {"id": 2, "start": 27, "end": 60, "ref_url": "University_College_of_North_Wales", "ref_ids": null, "sent_idx": 1}, {"id": 3, "start": 65, "end": 90, "ref_url": "Selwyn_College,_Cambridge", "ref_ids": ["145454"], "sent_idx": 1}, {"id": 4, "start": 45, "end": 51, "ref_url": "Curate", "ref_ids": ["185250"], "sent_idx": 2}, {"id": 5, "start": 55, "end": 63, "ref_url": "Holyhead", "ref_ids": ["181846"], "sent_idx": 2}, {"id": 6, "start": 104, "end": 120, "ref_url": "Bangor_Cathedral", "ref_ids": ["3262385"], "sent_idx": 2}, {"id": 7, "start": 25, "end": 31, "ref_url": "Rector_(ecclesiastical)", "ref_ids": ["36115576"], "sent_idx": 3}, {"id": 8, "start": 35, "end": 45, "ref_url": "Llanllyfni", "ref_ids": ["7890498"], "sent_idx": 3}, {"id": 9, "start": 51, "end": 56, "ref_url": "Vicar", "ref_ids": ["177711"], "sent_idx": 3}, {"id": 10, "start": 60, "end": 69, "ref_url": "Gorseinon", "ref_ids": ["663576"], "sent_idx": 3}, {"id": 11, "start": 78, "end": 88, "ref_url": "Rural_dean", "ref_ids": ["4456836"], "sent_idx": 3}, {"id": 12, "start": 14, "end": 39, "ref_url": "Archdeacon_of_Meirionnydd", "ref_ids": null, "sent_idx": 4}, {"id": 13, "start": 77, "end": 87, "ref_url": "Episcopate", "ref_ids": null, "sent_idx": 4}]} +{"id": "18270514", "title": "Shahumyan, Armavir", "sentences": ["Shahumyan () formerly known as \"Molla Dursun\", is a village in the Armavir Province of Armenia.", "It was renamed after Stepan Shahumyan, an Armenian Bolshevik commissar."], "mentions": [{"id": 0, "start": 67, "end": 83, "ref_url": "Armavir_Province", "ref_ids": ["470500"], "sent_idx": 0}, {"id": 1, "start": 42, "end": 49, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 1}, {"id": 2, "start": 21, "end": 37, "ref_url": "Stepan_Shahumyan", "ref_ids": null, "sent_idx": 1}]} +{"id": "18270516", "title": "Shahumyan, Yerevan", "sentences": ["Shahumyan (, also, Imeni Shaumyana, Imeni Beriya, Shaumyan, and Posëlok Imeni Shaumyana) is a town in the Yerevan Province of Armenia.", "The town was named after Stepan Shahumyan, a Bolshevik commissar."], "mentions": [{"id": 0, "start": 106, "end": 122, "ref_url": "Yerevan", "ref_ids": ["34352"], "sent_idx": 0}, {"id": 1, "start": 126, "end": 133, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}, {"id": 2, "start": 25, "end": 41, "ref_url": "Stepan_Shahumyan", "ref_ids": null, "sent_idx": 1}]} +{"id": "18270521", "title": "Shaki, Armenia", "sentences": ["Shaki (), is a village and a rural community in the Syunik Province of Armenia, located north of Sisian.", "The 2011 Armenia census reported its population was 1,197, down from 1,390 at the 2001 census.", "The village's Holy Mother of God Church was opened in 2003.", "The Shaki Waterfall is located near the village."], "mentions": [{"id": 0, "start": 52, "end": 67, "ref_url": "Syunik_Province", "ref_ids": ["470515"], "sent_idx": 0}, {"id": 1, "start": 9, "end": 16, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 1}, {"id": 2, "start": 97, "end": 103, "ref_url": "Sisian", "ref_ids": ["7108989"], "sent_idx": 0}, {"id": 3, "start": 4, "end": 19, "ref_url": "Shaki_Waterfall", "ref_ids": ["24682008"], "sent_idx": 3}]} +{"id": "18270522", "title": "Alternative versions of the Human Torch", "sentences": ["The Human Torch is a Marvel Comics superhero and a member of the Fantastic Four.", "There have been many alternative versions of him over the years."], "mentions": [{"id": 0, "start": 4, "end": 15, "ref_url": "Human_Torch", "ref_ids": ["235423"], "sent_idx": 0}, {"id": 1, "start": 21, "end": 34, "ref_url": "Marvel_Comics", "ref_ids": ["20966"], "sent_idx": 0}, {"id": 2, "start": 35, "end": 44, "ref_url": "Superhero", "ref_ids": ["43076"], "sent_idx": 0}, {"id": 3, "start": 65, "end": 79, "ref_url": "Fantastic_Four", "ref_ids": ["11664"], "sent_idx": 0}]} +{"id": "18270523", "title": "Shamakhyan", "sentences": ["Shamakhyan is a former village in Armenia, currently absorbed by the town of Dilijan in the Tavush Province.", "It was not listed in the 2011 official Armenian Census, and is likely included in the census count of the town of Dilijan."], "mentions": [{"id": 0, "start": 39, "end": 46, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 1}, {"id": 1, "start": 92, "end": 107, "ref_url": "Tavush_Province", "ref_ids": ["470516"], "sent_idx": 0}, {"id": 2, "start": 114, "end": 121, "ref_url": "Dilijan", "ref_ids": ["2697717"], "sent_idx": 1}]} +{"id": "18270530", "title": "Shamb", "sentences": ["Shamb () is a village and part of the Darbas community in the Syunik Province of Armenia."], "mentions": [{"id": 0, "start": 38, "end": 44, "ref_url": "Darbas", "ref_ids": ["18256353"], "sent_idx": 0}, {"id": 1, "start": 62, "end": 77, "ref_url": "Syunik_Province", "ref_ids": ["470515"], "sent_idx": 0}, {"id": 2, "start": 81, "end": 88, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18270531", "title": "Shamiram, Armenia", "sentences": ["Shamiram (), is a village in the Aragatsotn Province of Armenia.", "It is mostly populated by Yazidis."], "mentions": [{"id": 0, "start": 33, "end": 52, "ref_url": "Aragatsotn_Province", "ref_ids": ["470494"], "sent_idx": 0}, {"id": 1, "start": 56, "end": 63, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}, {"id": 2, "start": 26, "end": 33, "ref_url": "Yazidis", "ref_ids": ["20557247"], "sent_idx": 1}]} +{"id": "18270533", "title": "Budralazine", "sentences": ["Budralazine (INN) is a vasodilator."], "mentions": [{"id": 0, "start": 13, "end": 16, "ref_url": "International_Nonproprietary_Name", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 23, "end": 34, "ref_url": "Vasodilator", "ref_ids": null, "sent_idx": 0}]} +{"id": "18270534", "title": "Need (disambiguation)", "sentences": ["A need is something actually, or perceived as being, necessary.", "It can also refer to:"], "mentions": [{"id": 0, "start": 2, "end": 6, "ref_url": "Need", "ref_ids": ["622545"], "sent_idx": 0}]} +{"id": "18270537", "title": "Kendall Jagdeosingh", "sentences": ["Kendall Jagdeosingh (born May 30, 1986 in Manzanilla) is a Trinidadian footballer who currently plays for Ayutthaya in Thai League 3"], "mentions": [{"id": 0, "start": 42, "end": 52, "ref_url": "Manzanilla,_Trinidad_and_Tobago", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 59, "end": 70, "ref_url": "Trinidad_and_Tobago", "ref_ids": ["3565457"], "sent_idx": 0}, {"id": 2, "start": 71, "end": 81, "ref_url": "Association_football", "ref_ids": ["10568"], "sent_idx": 0}, {"id": 3, "start": 106, "end": 115, "ref_url": "Ayutthaya_F.C.", "ref_ids": ["22263533"], "sent_idx": 0}, {"id": 4, "start": 119, "end": 132, "ref_url": "Thai_League_3", "ref_ids": ["51733915"], "sent_idx": 0}]} +{"id": "18270541", "title": "Shamut", "sentences": ["Shamut (), is a village in the Lori Province of Armenia.", "It belongs to the municipality of Tumanyan."], "mentions": [{"id": 0, "start": 31, "end": 44, "ref_url": "Lori_Province", "ref_ids": ["470506"], "sent_idx": 0}, {"id": 1, "start": 48, "end": 55, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}, {"id": 2, "start": 34, "end": 42, "ref_url": "Tumanyan,_Armenia", "ref_ids": ["18607935"], "sent_idx": 1}]} +{"id": "18270543", "title": "Shatin, Armenia", "sentences": ["Shatin (; formerly, \"Shatik\", \"Hesan Kand\", \"Gasankend\", and \"Hasankand\"), is a village in the Vayots Dzor Province of Armenia.", "It is home to the only mountain goat observation point in Armenia.", "The observation point was constructed with the help of the Norwegian and Armenian governments, the World Wildlife Fund and Safari organizations.", "Tourists can come to see the mountain goats from these observation points.", "The village also has several archeological sites from various periods in Armenian history."], "mentions": [{"id": 0, "start": 95, "end": 115, "ref_url": "Vayots_Dzor_Province", "ref_ids": ["470518"], "sent_idx": 0}, {"id": 1, "start": 73, "end": 80, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 4}]} +{"id": "18270545", "title": "Shatjrek", "sentences": ["Shatjrek (, also Romanized as Shatjreq; formerly, Koshabulakh, Koshabulag, and Ghoshabulagh) is a town in the Gegharkunik Province of Armenia."], "mentions": [{"id": 0, "start": 17, "end": 25, "ref_url": "Romanize", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 110, "end": 130, "ref_url": "Gegharkunik_Province", "ref_ids": ["470502"], "sent_idx": 0}, {"id": 2, "start": 134, "end": 141, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18270548", "title": "Shatvan", "sentences": ["Shatvan (; also, Shatavan; formerly, Narimanlu, Agalu, and Guseyn Kuli) is a village in the Gegharkunik Province of Armenia.", "The local cemetery dates to the 15-16th centuries."], "mentions": [{"id": 0, "start": 92, "end": 112, "ref_url": "Gegharkunik_Province", "ref_ids": ["470502"], "sent_idx": 0}, {"id": 1, "start": 116, "end": 123, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18270553", "title": "Shavarut", "sentences": ["Shavarut is a town in the Armavir Province of Armenia."], "mentions": [{"id": 0, "start": 26, "end": 42, "ref_url": "Armavir_(province)", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 46, "end": 53, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18270555", "title": "Shenatagh", "sentences": ["Shenatagh (, also Romanized as Shenat’agh and Shenatag; formerly, Shinagat, Lernashen, and Lerrnashen) is a village and rural community (municipality) in the Syunik Province of Armenia.", "The Statistical Committee of Armenia reported its population was 422 in 2010, up from 390 at the 2001 census."], "mentions": [{"id": 0, "start": 18, "end": 26, "ref_url": "Romanize", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 120, "end": 135, "ref_url": "Community_(Armenia)", "ref_ids": null, "sent_idx": 0}, {"id": 2, "start": 158, "end": 173, "ref_url": "Syunik_Region", "ref_ids": null, "sent_idx": 0}, {"id": 3, "start": 29, "end": 36, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 1}, {"id": 4, "start": 4, "end": 36, "ref_url": "Statistical_Committee_of_Armenia", "ref_ids": ["24984172"], "sent_idx": 1}]} +{"id": "18270560", "title": "Shenavan, Aragatsotn", "sentences": ["Shenavan (; formerly, Chotavet, Blkher and Bulkheyr) is a village in the Aragatsotn Province of Armenia."], "mentions": [{"id": 0, "start": 73, "end": 92, "ref_url": "Aragatsotn", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 96, "end": 103, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18270575", "title": "Shenavan, Armavir", "sentences": ["Shenavan (; until 1946, Kyalagarkh and Kolagarkh) is a town in the Armavir Province of Armenia."], "mentions": [{"id": 0, "start": 67, "end": 83, "ref_url": "Armavir_Province", "ref_ids": ["470500"], "sent_idx": 0}, {"id": 1, "start": 87, "end": 94, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18270578", "title": "B. americanus", "sentences": ["B. americanus may refer to:"], "mentions": []} +{"id": "18270579", "title": "Josef Csaplár", "sentences": ["Josef Csaplár (born 29 October 1962 in Ostrov nad Ohří) is a Czech football manager and former player.", "He was lastly the head coach of FC Fastav Zlín."], "mentions": [{"id": 0, "start": 39, "end": 54, "ref_url": "Ostrov_nad_Ohří", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 61, "end": 66, "ref_url": "Czech_Republic", "ref_ids": ["5321"], "sent_idx": 0}, {"id": 2, "start": 67, "end": 75, "ref_url": "Association_football", "ref_ids": ["10568"], "sent_idx": 0}, {"id": 3, "start": 76, "end": 83, "ref_url": "Coach_(sport)", "ref_ids": ["626967"], "sent_idx": 0}, {"id": 4, "start": 32, "end": 46, "ref_url": "FC_Fastav_Zlín", "ref_ids": ["5234405"], "sent_idx": 1}]} +{"id": "18270582", "title": "Shenavan, Lori", "sentences": ["Shenavan (; formerly, Kzloran and Kyzloran) is a town in the Lori Province of Armenia."], "mentions": [{"id": 0, "start": 61, "end": 74, "ref_url": "Lori_Province", "ref_ids": ["470506"], "sent_idx": 0}, {"id": 1, "start": 78, "end": 85, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18270586", "title": "Preston Richards", "sentences": ["Preston D. Richards (born September 15, 1881) was an assistant solicitor for the state department of the United States under J. Reuben Clark during the Taft Administration.", "He was also a leader in The Church of Jesus Christ of Latter-day Saints (LDS Church).", "In 1907, Richards wrote a biography of early Mormon leader Willard Richards.", "Early on in his life, Richards served as a high school principal.", "In 1908, Richards was a delegate to the Republican National Convention.", "He then studied law at the University of Chicago.", "From there he became assistant solicitor of the state department and later formed a private law firm with Clark.", "Hugh B. Brown would later work for this law firm.", "In 1920, Richards was a member of the general board of the LDS Church's Young Men's Mutual Improvement Association."], "mentions": [{"id": 0, "start": 105, "end": 118, "ref_url": "United_States", "ref_ids": ["3434750"], "sent_idx": 0}, {"id": 1, "start": 125, "end": 140, "ref_url": "J._Reuben_Clark", "ref_ids": ["2019696"], "sent_idx": 0}, {"id": 2, "start": 152, "end": 156, "ref_url": "William_Howard_Taft", "ref_ids": ["33522"], "sent_idx": 0}, {"id": 3, "start": 24, "end": 71, "ref_url": "The_Church_of_Jesus_Christ_of_Latter-day_Saints", "ref_ids": ["5935"], "sent_idx": 1}, {"id": 4, "start": 45, "end": 51, "ref_url": "Mormon", "ref_ids": null, "sent_idx": 2}, {"id": 5, "start": 59, "end": 75, "ref_url": "Willard_Richards", "ref_ids": ["1074627"], "sent_idx": 2}, {"id": 6, "start": 40, "end": 70, "ref_url": "Republican_National_Convention", "ref_ids": ["509001"], "sent_idx": 4}, {"id": 7, "start": 27, "end": 48, "ref_url": "University_of_Chicago", "ref_ids": ["32127"], "sent_idx": 5}, {"id": 8, "start": 0, "end": 13, "ref_url": "Hugh_B._Brown", "ref_ids": ["1072726"], "sent_idx": 7}, {"id": 9, "start": 72, "end": 114, "ref_url": "Young_Men_(organization)", "ref_ids": ["10845858"], "sent_idx": 8}]} +{"id": "18270587", "title": "Shenkani", "sentences": ["Shenkani (; until 1978, Korbulakh, Korbulag, Kr'oyigegh, and K'yorbulagh) is a town in the Aragatsotn Province of Armenia.", "The town is mostly populated by Yazidis."], "mentions": [{"id": 0, "start": 91, "end": 110, "ref_url": "Aragatsotn", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 114, "end": 121, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}, {"id": 2, "start": 32, "end": 39, "ref_url": "Yazidis", "ref_ids": ["20557247"], "sent_idx": 1}]} +{"id": "18270588", "title": "Shgharshik, Aragatsotn", "sentences": ["Shgharshik (, also Romanized as Shgarshik; until 1935, Sheikh Haji and Shkhadzhi) is a town in the Aragatsotn Province of Armenia.", "There is a memorial in the city remembering the Armenian Genocide of 1915."], "mentions": [{"id": 0, "start": 19, "end": 27, "ref_url": "Romanize", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 99, "end": 118, "ref_url": "Aragatsotn", "ref_ids": null, "sent_idx": 0}, {"id": 2, "start": 48, "end": 55, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 1}, {"id": 3, "start": 48, "end": 65, "ref_url": "Armenian_Genocide", "ref_ids": ["140376"], "sent_idx": 1}]} +{"id": "18270598", "title": "Shgharshik, Syunik", "sentences": ["Shgharshik (also, Shgardzhik) is a town in the Syunik Province of Armenia."], "mentions": [{"id": 0, "start": 47, "end": 62, "ref_url": "Syunik_Region", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 66, "end": 73, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18270602", "title": "Shikahogh", "sentences": ["Shikahogh (), is a village and rural community (municipality) in the Syunik Province of Armenia.", "The name originated from () meaning Red land.", "The National Statistical Service of the Republic of Armenia (ARMSTAT) reported its population was 189 in 2010, down from 272 at the 2001 census."], "mentions": [{"id": 0, "start": 31, "end": 46, "ref_url": "Community_(Armenia)", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 69, "end": 84, "ref_url": "Syunik_Province", "ref_ids": ["470515"], "sent_idx": 0}, {"id": 2, "start": 52, "end": 59, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 2}, {"id": 3, "start": 4, "end": 59, "ref_url": "National_Statistical_Service_of_the_Republic_of_Armenia", "ref_ids": null, "sent_idx": 2}]} +{"id": "18270611", "title": "Shikhlar", "sentences": ["Shikhlar (also, Shkhlar) is a town in the Ararat Province of Armenia."], "mentions": [{"id": 0, "start": 42, "end": 57, "ref_url": "Ararat_(province)", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 61, "end": 68, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18270615", "title": "The Sixth Extinction II: Amor Fati", "sentences": ["\"The Sixth Extinction II: Amor Fati\" is the second episode of the seventh season of the American science fiction television series \"The X-Files\".", "It was directed by Michael Watkins and written by lead actor David Duchovny and series creator Chris Carter.", "The installment explores the series' overarching mythology and concludes a trilogy of episodes revolving around Fox Mulder's (Duchovny) severe reaction to an alien artifact.", "Originally aired by the Fox network on November 14, 1999, \"The Sixth Extinction II: Amor Fati\" received a Nielsen rating of 10.1 and was seen by 16.15 million viewers.", "Initial reviews were mixed, and the plot and dialogue attracted criticism.", "Later critics viewed the episode in a more positive light, and several writers named it among the best in the series.", "\"The X-Files\" centers on Federal Bureau of Investigation (FBI) special agents Mulder and Dana Scully (Gillian Anderson), who work on cases linked to the paranormal, called X-Files.", "Mulder is a believer in the paranormal, and the skeptical Scully was initially assigned to debunk his work, but the two have developed a deep friendship.", "In this episode, Scully returns from Africa to discover Mulder in a coma induced by exposure to shards from an alien spaceship wreck.", "After Mulder disappears from the hospital, Scully joins former government employee Michael Kritschgau (John Finn) and her boss Walter Skinner (Mitch Pileggi) to search for him.", "Meanwhile, in a dream, The Smoking Man (William B. Davis) offers Mulder a new life and a fresh start.", "After conferring with a vision of Scully, Mulder awakens from his coma and realizes his duty to prevent alien colonization.", "Carter was interested in the possibility that extraterrestrials were involved in ancient mass extinctions on Earth and used these themes in the episode.", "Much of the episode was also inspired by Nikos Kazantzakis's novel \"The Last Temptation of Christ\", and a scene showing an operation on Mulder has been thematically compared to the Crucifixion of Jesus.", "For the dream sequences, casting director Rick Millikan brought back many actors and actresses who had been absent from the show for several years, including Jerry Hardin as Deep Throat, Rebecca Toolan as Teena Mulder, and Megan Leitch as Samantha Mulder."], "mentions": [{"id": 0, "start": 66, "end": 80, "ref_url": "The_X-Files_(season_7)", "ref_ids": ["940067"], "sent_idx": 0}, {"id": 1, "start": 1, "end": 12, "ref_url": "The_X-Files", "ref_ids": ["30304"], "sent_idx": 6}, {"id": 2, "start": 19, "end": 34, "ref_url": "Michael_W._Watkins", "ref_ids": ["7326430"], "sent_idx": 1}, {"id": 3, "start": 61, "end": 75, "ref_url": "David_Duchovny", "ref_ids": ["21189337"], "sent_idx": 1}, {"id": 4, "start": 95, "end": 107, "ref_url": "Chris_Carter_(screenwriter)", "ref_ids": ["224481"], "sent_idx": 1}, {"id": 5, "start": 49, "end": 58, "ref_url": "Mythology_of_The_X-Files", "ref_ids": ["23749226"], "sent_idx": 2}, {"id": 6, "start": 112, "end": 122, "ref_url": "Fox_Mulder", "ref_ids": ["261343"], "sent_idx": 2}, {"id": 7, "start": 24, "end": 27, "ref_url": "Fox_Broadcasting_Company", "ref_ids": ["46252"], "sent_idx": 3}, {"id": 8, "start": 106, "end": 120, "ref_url": "Nielsen_ratings", "ref_ids": ["236591"], "sent_idx": 3}, {"id": 9, "start": 25, "end": 56, "ref_url": "Federal_Bureau_of_Investigation", "ref_ids": ["11127"], "sent_idx": 6}, {"id": 10, "start": 89, "end": 100, "ref_url": "Dana_Scully", "ref_ids": ["389780"], "sent_idx": 6}, {"id": 11, "start": 102, "end": 118, "ref_url": "Gillian_Anderson", "ref_ids": ["42238"], "sent_idx": 6}, {"id": 12, "start": 5, "end": 11, "ref_url": "X-File", "ref_ids": null, "sent_idx": 6}, {"id": 13, "start": 83, "end": 101, "ref_url": "List_of_The_X-Files_characters", "ref_ids": ["5471615"], "sent_idx": 9}, {"id": 14, "start": 103, "end": 112, "ref_url": "John_Finn", "ref_ids": ["1565921"], "sent_idx": 9}, {"id": 15, "start": 127, "end": 141, "ref_url": "Walter_Skinner", "ref_ids": ["938316"], "sent_idx": 9}, {"id": 16, "start": 143, "end": 156, "ref_url": "Mitch_Pileggi", "ref_ids": ["1078887"], "sent_idx": 9}, {"id": 17, "start": 23, "end": 38, "ref_url": "The_Smoking_Man", "ref_ids": null, "sent_idx": 10}, {"id": 18, "start": 40, "end": 56, "ref_url": "William_B._Davis", "ref_ids": ["673994"], "sent_idx": 10}, {"id": 19, "start": 46, "end": 77, "ref_url": "Ancient_astronauts", "ref_ids": ["301719"], "sent_idx": 12}, {"id": 20, "start": 89, "end": 105, "ref_url": "Extinction_event", "ref_ids": ["9813"], "sent_idx": 12}, {"id": 21, "start": 41, "end": 58, "ref_url": "Nikos_Kazantzakis", "ref_ids": ["51418"], "sent_idx": 13}, {"id": 22, "start": 68, "end": 97, "ref_url": "The_Last_Temptation_of_Christ", "ref_ids": ["74296"], "sent_idx": 13}, {"id": 23, "start": 181, "end": 201, "ref_url": "Crucifixion_of_Jesus", "ref_ids": ["22852566"], "sent_idx": 13}, {"id": 24, "start": 158, "end": 170, "ref_url": "Jerry_Hardin", "ref_ids": ["2599244"], "sent_idx": 14}, {"id": 25, "start": 174, "end": 185, "ref_url": "Deep_Throat_(The_X-Files)", "ref_ids": ["939789"], "sent_idx": 14}, {"id": 26, "start": 187, "end": 201, "ref_url": "Rebecca_Toolan", "ref_ids": ["13965270"], "sent_idx": 14}, {"id": 27, "start": 205, "end": 217, "ref_url": "List_of_minor_The_X-Files_characters", "ref_ids": null, "sent_idx": 14}, {"id": 28, "start": 223, "end": 235, "ref_url": "Megan_Leitch", "ref_ids": ["5576810"], "sent_idx": 14}, {"id": 29, "start": 239, "end": 254, "ref_url": "Samantha_Mulder", "ref_ids": ["2510623"], "sent_idx": 14}]} +{"id": "18270623", "title": "Boutaya", "sentences": ["Boutaya is a village in the Zonsé Department of Boulgou Province in south-eastern Burkina Faso.", "As of 2005, the village has a population of 740."], "mentions": [{"id": 0, "start": 28, "end": 44, "ref_url": "Zonsé_Department", "ref_ids": ["16925872"], "sent_idx": 0}, {"id": 1, "start": 48, "end": 64, "ref_url": "Boulgou_Province", "ref_ids": ["2874530"], "sent_idx": 0}, {"id": 2, "start": 82, "end": 94, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "18270626", "title": "Shirak, Armenia", "sentences": ["Shirak (; formerly, Kunakhkran and Konakhkran) is a village in the Shirak Province of Armenia, belongs to the Marmashen community."], "mentions": [{"id": 0, "start": 67, "end": 82, "ref_url": "Shirak_Province", "ref_ids": ["470511"], "sent_idx": 0}, {"id": 1, "start": 86, "end": 93, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18270629", "title": "Diarra-Betongo", "sentences": ["Diarra-Betongo is a village in the Zonsé Department of Boulgou Province in south-eastern Burkina Faso.", "As of 2005, the village has a population of 840."], "mentions": [{"id": 0, "start": 35, "end": 51, "ref_url": "Zonsé_Department", "ref_ids": ["16925872"], "sent_idx": 0}, {"id": 1, "start": 55, "end": 71, "ref_url": "Boulgou_Province", "ref_ids": ["2874530"], "sent_idx": 0}, {"id": 2, "start": 89, "end": 101, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "18270633", "title": "Diella", "sentences": ["Diella is a village in the Zonsé Department of Boulgou Province in south-eastern Burkina Faso.", "As of 2005, the village has a population of 476."], "mentions": [{"id": 0, "start": 27, "end": 43, "ref_url": "Zonsé_Department", "ref_ids": ["16925872"], "sent_idx": 0}, {"id": 1, "start": 47, "end": 63, "ref_url": "Boulgou_Province", "ref_ids": ["2874530"], "sent_idx": 0}, {"id": 2, "start": 81, "end": 93, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "18270634", "title": "Shirakamut", "sentences": ["Shirakamut (; also, mistakenly, Shirakarnut; formerly, Nalband) is a town in the Lori Province of Armenia.", "The town was the epicenter of 1988 Armenian earthquake."], "mentions": [{"id": 0, "start": 81, "end": 94, "ref_url": "Lori_Province", "ref_ids": ["470506"], "sent_idx": 0}, {"id": 1, "start": 35, "end": 42, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 1}, {"id": 2, "start": 17, "end": 26, "ref_url": "Epicenter", "ref_ids": ["173900"], "sent_idx": 1}, {"id": 3, "start": 30, "end": 54, "ref_url": "1988_Armenian_earthquake", "ref_ids": ["1352291"], "sent_idx": 1}]} +{"id": "18270640", "title": "Shirakavan, Armenia", "sentences": ["Shirakavan (; formerly, Tavshankishlag) is a village in the Shirak Province of Armenia."], "mentions": [{"id": 0, "start": 60, "end": 75, "ref_url": "Shirak_Province", "ref_ids": ["470511"], "sent_idx": 0}, {"id": 1, "start": 79, "end": 86, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18270642", "title": "Gnekouneta", "sentences": ["Gnekouneta is a village in the Zonsé Department of Boulgou Province in south-eastern Burkina Faso.", "As of 2005, the village has a population of 175."], "mentions": [{"id": 0, "start": 31, "end": 47, "ref_url": "Zonsé_Department", "ref_ids": ["16925872"], "sent_idx": 0}, {"id": 1, "start": 51, "end": 67, "ref_url": "Boulgou_Province", "ref_ids": ["2874530"], "sent_idx": 0}, {"id": 2, "start": 85, "end": 97, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "18270644", "title": "Shishkert", "sentences": ["Shishkert () is a village in the rural community of Tsav, Syunik Province, Armenia.", "The National Statistical Service of the Republic of Armenia (ARMSTAT) reported its population as 342 at the 2001 census."], "mentions": [{"id": 0, "start": 33, "end": 48, "ref_url": "Community_(Armenia)", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 52, "end": 56, "ref_url": "Tsav,_Armenia", "ref_ids": ["18271135"], "sent_idx": 0}, {"id": 2, "start": 58, "end": 73, "ref_url": "Syunik_Region", "ref_ids": null, "sent_idx": 0}, {"id": 3, "start": 52, "end": 59, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 1}, {"id": 4, "start": 4, "end": 59, "ref_url": "National_Statistical_Service_of_the_Republic_of_Armenia", "ref_ids": null, "sent_idx": 1}]} +{"id": "18270648", "title": "Shorlu", "sentences": ["Shorlu (also, Mets Shorlu Demurchi and Bol’shiye Demurchi) is a town in the Ararat Province of Armenia."], "mentions": [{"id": 0, "start": 76, "end": 91, "ref_url": "Ararat_(province)", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 95, "end": 102, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18270649", "title": "Kareta", "sentences": ["Kareta is a village in the Zonsé Department of Boulgou Province in south-eastern Burkina Faso.", "As of 2005, the village has a population of 841."], "mentions": [{"id": 0, "start": 27, "end": 43, "ref_url": "Zonsé_Department", "ref_ids": ["16925872"], "sent_idx": 0}, {"id": 1, "start": 47, "end": 63, "ref_url": "Boulgou_Province", "ref_ids": ["2874530"], "sent_idx": 0}, {"id": 2, "start": 81, "end": 93, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "18270652", "title": "Patrick O'Driscoll", "sentences": ["Patrick Florence O'Driscoll (14 January 1878 – 8 August 1949) was an Irish Clann na Talmhan politician.", "A farmer by profession, he was first elected to Dáil Éireann as a Clann na Talmhan Teachta Dála (TD) for the Cork West constituency at the 1943 general election.", "He was re-elected at the 1944 general election and did not contest the 1948 general election."], "mentions": [{"id": 0, "start": 66, "end": 82, "ref_url": "Clann_na_Talmhan", "ref_ids": ["1101439"], "sent_idx": 1}, {"id": 1, "start": 48, "end": 60, "ref_url": "Dáil_Éireann", "ref_ids": ["40038072"], "sent_idx": 1}, {"id": 2, "start": 83, "end": 95, "ref_url": "Teachta_Dála", "ref_ids": ["157086"], "sent_idx": 1}, {"id": 3, "start": 109, "end": 118, "ref_url": "Cork_West_(Dáil_constituency)", "ref_ids": ["5167633"], "sent_idx": 1}, {"id": 4, "start": 139, "end": 160, "ref_url": "1943_Irish_general_election", "ref_ids": ["400169"], "sent_idx": 1}, {"id": 5, "start": 25, "end": 46, "ref_url": "1944_Irish_general_election", "ref_ids": ["400165"], "sent_idx": 2}, {"id": 6, "start": 71, "end": 92, "ref_url": "1948_Irish_general_election", "ref_ids": ["400155"], "sent_idx": 2}]} +{"id": "18270654", "title": "Korgoreya", "sentences": ["Korgoreya is a village in the Zonsé Department of Boulgou Province in south-eastern Burkina Faso.", "As of 2005, the village has a population of 424."], "mentions": [{"id": 0, "start": 30, "end": 46, "ref_url": "Zonsé_Department", "ref_ids": ["16925872"], "sent_idx": 0}, {"id": 1, "start": 50, "end": 66, "ref_url": "Boulgou_Province", "ref_ids": ["2874530"], "sent_idx": 0}, {"id": 2, "start": 84, "end": 96, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "18270666", "title": "Koungou, Burkina Faso", "sentences": ["Koungou is a village in the Zonsé Department of Boulgou Province in south-eastern Burkina Faso.", "As of 2005, the village has a population of 821."], "mentions": [{"id": 0, "start": 28, "end": 44, "ref_url": "Zonsé_Department", "ref_ids": ["16925872"], "sent_idx": 0}, {"id": 1, "start": 48, "end": 64, "ref_url": "Boulgou_Province", "ref_ids": ["2874530"], "sent_idx": 0}, {"id": 2, "start": 82, "end": 94, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "18270673", "title": "Landre", "sentences": ["Landre is a village in the Zonsé Department of Boulgou Province in south-eastern Burkina Faso.", "As of 2005, the village has a population of 209."], "mentions": [{"id": 0, "start": 27, "end": 43, "ref_url": "Zonsé_Department", "ref_ids": ["16925872"], "sent_idx": 0}, {"id": 1, "start": 47, "end": 63, "ref_url": "Boulgou_Province", "ref_ids": ["2874530"], "sent_idx": 0}, {"id": 2, "start": 81, "end": 93, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "18270679", "title": "Mangare", "sentences": ["Mangare is a village in the Zonsé Department of Boulgou Province in south-eastern Burkina Faso.", "As of 2005, the village has a population of 441."], "mentions": [{"id": 0, "start": 28, "end": 44, "ref_url": "Zonsé_Department", "ref_ids": ["16925872"], "sent_idx": 0}, {"id": 1, "start": 48, "end": 64, "ref_url": "Boulgou_Province", "ref_ids": ["2874530"], "sent_idx": 0}, {"id": 2, "start": 82, "end": 94, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "18270682", "title": "Shoghakat, Armenia", "sentences": ["Shorgaklat (), known as Shorzha () until November 2017 (formerly known as Nadezhdino and Shordzhalu), is a village and a rural community located at Lake Sevan, northwest of the Artanish Peninsula in the Gegharkunik Province of Armenia.", "After administrative reforms in November 2017 the name of the village was changed from Shorzha to Shoghakat and it was chosen as administrative center of same-name rural municipality including also villages"], "mentions": [{"id": 0, "start": 148, "end": 158, "ref_url": "Lake_Sevan", "ref_ids": ["792215"], "sent_idx": 0}, {"id": 1, "start": 177, "end": 195, "ref_url": "Artanish_Peninsula", "ref_ids": null, "sent_idx": 0}, {"id": 2, "start": 203, "end": 223, "ref_url": "Gegharkunik_Province", "ref_ids": ["470502"], "sent_idx": 0}, {"id": 3, "start": 227, "end": 234, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18270685", "title": "Shrvenantz", "sentences": ["Shrvenantz (, also Romanized as Shrvenants and Shrvenants’; formerly, Daymadaglu) is a village and rural community (municipality) in the Syunik Province of Armenia.", "The National Statistical Service of the Republic of Armenia (ARMSTAT) reported its population was 76 in 2010, down from 72 at the 2001 census."], "mentions": [{"id": 0, "start": 19, "end": 27, "ref_url": "Romanize", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 99, "end": 114, "ref_url": "Community_(Armenia)", "ref_ids": null, "sent_idx": 0}, {"id": 2, "start": 137, "end": 152, "ref_url": "Syunik_Region", "ref_ids": null, "sent_idx": 0}, {"id": 3, "start": 52, "end": 59, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 1}, {"id": 4, "start": 4, "end": 59, "ref_url": "National_Statistical_Service_of_the_Republic_of_Armenia", "ref_ids": null, "sent_idx": 1}]} +{"id": "18270686", "title": "Possodo", "sentences": ["Possodo is a village in the Zonsé Department of Boulgou Province in south-eastern Burkina Faso.", "As of 2005, the village has a population of 836."], "mentions": [{"id": 0, "start": 28, "end": 44, "ref_url": "Zonsé_Department", "ref_ids": ["16925872"], "sent_idx": 0}, {"id": 1, "start": 48, "end": 64, "ref_url": "Boulgou_Province", "ref_ids": ["2874530"], "sent_idx": 0}, {"id": 2, "start": 82, "end": 94, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "18270687", "title": "Shurnukh", "sentences": ["Shurnukh () is a village and rural community (municipality) in the Syunik Province of Armenia.", "The community includes the villages of Shurnukh, Vanand, Aghbulagh, and Dzorak.", "The community's population was 207 in 2011, down from 224 in 2010.", "The village had a population of 207 at the 2011 census, up from 148 at the 2001 census."], "mentions": [{"id": 0, "start": 29, "end": 44, "ref_url": "Community_(Armenia)", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 67, "end": 82, "ref_url": "Syunik_Region", "ref_ids": null, "sent_idx": 0}, {"id": 2, "start": 86, "end": 93, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}, {"id": 3, "start": 49, "end": 55, "ref_url": "Vanand,_Syunik", "ref_ids": ["18256747"], "sent_idx": 1}, {"id": 4, "start": 57, "end": 66, "ref_url": "Aghbulagh", "ref_ids": ["18255477"], "sent_idx": 1}, {"id": 5, "start": 72, "end": 78, "ref_url": "Dzorak,_Syunik", "ref_ids": ["41030813"], "sent_idx": 1}]} +{"id": "18270690", "title": "Samprabissa", "sentences": ["Samprabissa is a village in the Zonsé Department of Boulgou Province in south-eastern Burkina Faso.", "As of 2005, the village has a population of 104."], "mentions": [{"id": 0, "start": 32, "end": 48, "ref_url": "Zonsé_Department", "ref_ids": ["16925872"], "sent_idx": 0}, {"id": 1, "start": 52, "end": 68, "ref_url": "Boulgou_Province", "ref_ids": ["2874530"], "sent_idx": 0}, {"id": 2, "start": 86, "end": 98, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "18270692", "title": "Sipan, Armenia", "sentences": ["Sipan (; until 1978, Pamb Kurdskiy, P'amb Kurd, P'ambak, and Pamb) is a town in the Aragatsotn Province of Armenia.", "The town is mostly populated by Yezidis."], "mentions": [{"id": 0, "start": 84, "end": 103, "ref_url": "Aragatsotn", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 107, "end": 114, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}, {"id": 2, "start": 32, "end": 39, "ref_url": "Yazidis", "ref_ids": ["20557247"], "sent_idx": 1}]} +{"id": "18270693", "title": "Sipanik", "sentences": ["Sipanik (, also Romanized as Sip’anik) is a town in the Ararat Province of Armenia."], "mentions": [{"id": 0, "start": 16, "end": 24, "ref_url": "Romanize", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 56, "end": 71, "ref_url": "Ararat_(province)", "ref_ids": null, "sent_idx": 0}, {"id": 2, "start": 75, "end": 82, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18270695", "title": "Sangou-Nazela", "sentences": ["Sangou-Nazela is a village in the Zonsé Department of Boulgou Province in south-eastern Burkina Faso.", "As of 2005, the village has a population of 71."], "mentions": [{"id": 0, "start": 34, "end": 50, "ref_url": "Zonsé_Department", "ref_ids": ["16925872"], "sent_idx": 0}, {"id": 1, "start": 54, "end": 70, "ref_url": "Boulgou_Province", "ref_ids": ["2874530"], "sent_idx": 0}, {"id": 2, "start": 88, "end": 100, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "18270697", "title": "Sis, Armenia", "sentences": ["Sis (; formerly, Uliya Sarvanlar and Sarvanlar) is a town in the Ararat Province of Armenia.", "The Sis territorial union refers to the seat of the archbishop of Sis, the capital of Armenian Cilicia."], "mentions": [{"id": 0, "start": 65, "end": 80, "ref_url": "Ararat_(province)", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 86, "end": 93, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 1}]} +{"id": "18270704", "title": "Sisavan", "sentences": ["Sisavan (; formerly, Yengidzha) is a town in the Ararat Province of Armenia."], "mentions": [{"id": 0, "start": 49, "end": 64, "ref_url": "Ararat_(province)", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 68, "end": 75, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18270707", "title": "Saoupo", "sentences": ["Saoupo is a village in the Zonsé Department of Boulgou Province in south-eastern Burkina Faso.", "As of 2005, the village has a population of 650.", "This village is known for the annual festival on water management."], "mentions": [{"id": 0, "start": 27, "end": 43, "ref_url": "Zonsé_Department", "ref_ids": ["16925872"], "sent_idx": 0}, {"id": 1, "start": 47, "end": 63, "ref_url": "Boulgou_Province", "ref_ids": ["2874530"], "sent_idx": 0}, {"id": 2, "start": 81, "end": 93, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "18270708", "title": "Francis Carolus Eeles", "sentences": ["Francis Carolus Eeles (1876 – 17 August 1954, Dunster) was an English liturgical scholar and ecclesiastical historian.", "Eeles was on the Advisory Committee of the Warham Guild, established in 1912.", "He gave the Rhind Lectures in 1914, on \"The Liturgy and Ceremonial of The Mediaeval Church in Scotland\".", "Eeles was the first secretary of the Central Council for the Care of Churches, serving as honorary secretary from 1917 and paid secretary from 1926 until his death in 1954.", "He was made OBE in 1938.", "In 1939 he began systematically collecting details of English parish churches in order that they might be restored after wartime damage.", "Eeles bequeathed his books to form the nucleus of the library of the Council for the Care of Churches.", "His papers are held at the Church of England Record Centre.", "He is buried in the churchyard of All Saints at Selworthy, Somerset."], "mentions": [{"id": 0, "start": 46, "end": 53, "ref_url": "Dunster", "ref_ids": ["1090088"], "sent_idx": 0}, {"id": 1, "start": 43, "end": 55, "ref_url": "Warham_Guild", "ref_ids": ["43238540"], "sent_idx": 1}, {"id": 2, "start": 12, "end": 26, "ref_url": "Rhind_Lectures", "ref_ids": ["29769334"], "sent_idx": 2}, {"id": 3, "start": 37, "end": 77, "ref_url": "Central_Council_for_the_Care_of_Churches", "ref_ids": null, "sent_idx": 3}, {"id": 4, "start": 48, "end": 57, "ref_url": "Selworthy", "ref_ids": ["15385487"], "sent_idx": 8}]} +{"id": "18270709", "title": "Sizavet", "sentences": ["Sizavet (; formerly, Korakhpyur, Koragbyur, Korbulag, and Tsakhkashen) is a village in the Shirak Province of Armenia."], "mentions": [{"id": 0, "start": 91, "end": 106, "ref_url": "Shirak_Province", "ref_ids": ["470511"], "sent_idx": 0}, {"id": 1, "start": 110, "end": 117, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18270712", "title": "Siznak", "sentences": ["Siznak is a town in the Syunik Province of Armenia."], "mentions": [{"id": 0, "start": 24, "end": 39, "ref_url": "Syunik_Region", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 43, "end": 50, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18270713", "title": "Alternative versions of the Thing", "sentences": ["The Thing is a Marvel Comics superhero and a member of the Fantastic Four.", "He has many alternative counterparts throughout the Marvel Multiverse."], "mentions": [{"id": 0, "start": 4, "end": 9, "ref_url": "Thing_(comics)", "ref_ids": ["75827"], "sent_idx": 0}, {"id": 1, "start": 15, "end": 28, "ref_url": "Marvel_Comics", "ref_ids": ["20966"], "sent_idx": 0}, {"id": 2, "start": 29, "end": 38, "ref_url": "Superhero", "ref_ids": ["43076"], "sent_idx": 0}, {"id": 3, "start": 59, "end": 73, "ref_url": "Fantastic_Four", "ref_ids": ["11664"], "sent_idx": 0}, {"id": 4, "start": 52, "end": 58, "ref_url": "Marvel_Universe", "ref_ids": ["20986"], "sent_idx": 1}, {"id": 5, "start": 59, "end": 69, "ref_url": "Multiverse_(Marvel_Comics)", "ref_ids": ["3740960"], "sent_idx": 1}]} +{"id": "18270717", "title": "Soboya", "sentences": ["Soboya is a village in the Zonsé Department of Boulgou Province in south-eastern Burkina Faso.", "As of 2005, the village has a population of 360."], "mentions": [{"id": 0, "start": 27, "end": 43, "ref_url": "Zonsé_Department", "ref_ids": ["16925872"], "sent_idx": 0}, {"id": 1, "start": 47, "end": 63, "ref_url": "Boulgou_Province", "ref_ids": ["2874530"], "sent_idx": 0}, {"id": 2, "start": 81, "end": 93, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "18270718", "title": "Nzhdeh, Armenia", "sentences": ["Nzhdeh (); formerly known as \"Pusak\" () and \"Sofulu\", \"Soflu\" (), is a village and rural community in the Syunik Province of Armenia.", "It is named after the Armenian military leader Garegin Nzhdeh.", "The community of Nzhdeh includes the villages of Nzhdeh and Tsghuni.", "The Statistical Committee of Armenia reported the community's population as 188 in 2010, up from 185 at the 2001 census.", "The village's population was 92 at the 2011 census, down from 114 at the 2001 census."], "mentions": [{"id": 0, "start": 83, "end": 98, "ref_url": "Community_(Armenia)", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 106, "end": 121, "ref_url": "Syunik_Province", "ref_ids": ["470515"], "sent_idx": 0}, {"id": 2, "start": 29, "end": 36, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 3}, {"id": 3, "start": 47, "end": 61, "ref_url": "Garegin_Nzhdeh", "ref_ids": ["4388648"], "sent_idx": 1}, {"id": 4, "start": 60, "end": 67, "ref_url": "Tsghuni", "ref_ids": ["18271141"], "sent_idx": 2}, {"id": 5, "start": 4, "end": 36, "ref_url": "Statistical_Committee_of_Armenia", "ref_ids": ["24984172"], "sent_idx": 3}]} +{"id": "18270721", "title": "Sorik", "sentences": ["Sorik (; until 1935, Zorba and Dzorba) is a town in the Aragatsotn Province of Armenia.", "The town is mostly populated by Yazidis."], "mentions": [{"id": 0, "start": 56, "end": 75, "ref_url": "Aragatsotn", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 79, "end": 86, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}, {"id": 2, "start": 32, "end": 39, "ref_url": "Yazidis", "ref_ids": ["20557247"], "sent_idx": 1}]} +{"id": "18270722", "title": "Soper, Burkina Faso", "sentences": ["Soper is a village in the Zonsé Department of Boulgou Province in south-eastern Burkina Faso.", "As of 2005, the village has a population of 727."], "mentions": [{"id": 0, "start": 26, "end": 42, "ref_url": "Zonsé_Department", "ref_ids": ["16925872"], "sent_idx": 0}, {"id": 1, "start": 46, "end": 62, "ref_url": "Boulgou_Province", "ref_ids": ["2874530"], "sent_idx": 0}, {"id": 2, "start": 80, "end": 92, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "18270723", "title": "Sotk", "sentences": ["Sotk (, until 1991 Zod) is a village in the Gegharkunik Province of Armenia, well known for its gold mines."], "mentions": [{"id": 0, "start": 44, "end": 64, "ref_url": "Gegharkunik_Province", "ref_ids": ["470502"], "sent_idx": 0}, {"id": 1, "start": 68, "end": 75, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18270726", "title": "Alashkert, Armenia", "sentences": ["Alashkert (); known as \"Kyarimarkh\" until 1935, is a village in the Armavir Province of Armenia.", "It was known as Sovetakan between 1935 and 2008.", "In 2008, the village was renamed Alashkert, after the historic Western Armenian town of Alashkert."], "mentions": [{"id": 0, "start": 68, "end": 84, "ref_url": "Armavir_Province", "ref_ids": ["470500"], "sent_idx": 0}, {"id": 1, "start": 71, "end": 78, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 2}, {"id": 2, "start": 63, "end": 78, "ref_url": "Western_Armenia", "ref_ids": ["3492582"], "sent_idx": 2}, {"id": 3, "start": 33, "end": 42, "ref_url": "Eleşkirt", "ref_ids": ["4104145"], "sent_idx": 2}]} +{"id": "18270734", "title": "Yerba-Peulh", "sentences": ["Yerba-Peulh is a village in the Zonsé Department of Boulgou Province in south-eastern Burkina Faso.", "As of 2005, the village has a population of 165."], "mentions": [{"id": 0, "start": 32, "end": 48, "ref_url": "Zonsé_Department", "ref_ids": ["16925872"], "sent_idx": 0}, {"id": 1, "start": 52, "end": 68, "ref_url": "Boulgou_Province", "ref_ids": ["2874530"], "sent_idx": 0}, {"id": 2, "start": 86, "end": 98, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "18270736", "title": "Paghaghbyur", "sentences": ["Paghaghbyur (; formerly, Sovugbulakh and Sovukbulagh - meaning \"cold spring\") is a town in the Lori Province of Armenia."], "mentions": [{"id": 0, "start": 95, "end": 108, "ref_url": "Lori_Province", "ref_ids": ["470506"], "sent_idx": 0}, {"id": 1, "start": 112, "end": 119, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18270741", "title": "Yergoya", "sentences": ["Yergoya is a village in the Zonsé Department of Boulgou Province in south-eastern Burkina Faso.", "As of 2019, the village has a population of 2,725."], "mentions": [{"id": 0, "start": 28, "end": 44, "ref_url": "Zonsé_Department", "ref_ids": ["16925872"], "sent_idx": 0}, {"id": 1, "start": 48, "end": 64, "ref_url": "Boulgou_Province", "ref_ids": ["2874530"], "sent_idx": 0}, {"id": 2, "start": 82, "end": 94, "ref_url": "Burkina_Faso", "ref_ids": ["3470"], "sent_idx": 0}]} +{"id": "18270742", "title": "Spandaryan, Shirak", "sentences": ["Spandaryan (; until 1946, Gyullija and Kulidzhan) is a village in the Shirak Province of Armenia.", "The village was renamed in 1946 in honor of Armenian revolutionary Suren Spandaryan."], "mentions": [{"id": 0, "start": 70, "end": 85, "ref_url": "Shirak_Province", "ref_ids": ["470511"], "sent_idx": 0}, {"id": 1, "start": 44, "end": 51, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 1}, {"id": 2, "start": 67, "end": 83, "ref_url": "Suren_Spandaryan", "ref_ids": ["40855522"], "sent_idx": 1}]} +{"id": "18270747", "title": "Spandaryan, Syunik", "sentences": ["Spandaryan (; formerly, Meliklar, Maliklar, Meliklu, and Kalachik) is a village and rural community (municipality) in the Syunik Province of Armenia.", "The National Statistical Service of the Republic of Armenia (ARMSTAT) reported its population was 486 in 2010, up from 446 at the 2001 census.", "The area contains the Spandaryan Hydro Power Plant, one of Armenia's largest hydro power plants and the Spandaryan Reservoir to the northwest of the village itself."], "mentions": [{"id": 0, "start": 84, "end": 99, "ref_url": "Community_(Armenia)", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 122, "end": 137, "ref_url": "Syunik_Region", "ref_ids": null, "sent_idx": 0}, {"id": 2, "start": 59, "end": 66, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 2}, {"id": 3, "start": 4, "end": 59, "ref_url": "National_Statistical_Service_of_the_Republic_of_Armenia", "ref_ids": null, "sent_idx": 1}, {"id": 4, "start": 22, "end": 50, "ref_url": "Spandaryan_Hydro_Power_Plant", "ref_ids": null, "sent_idx": 2}, {"id": 5, "start": 77, "end": 95, "ref_url": "Hydroelectricity", "ref_ids": ["381399"], "sent_idx": 2}, {"id": 6, "start": 104, "end": 124, "ref_url": "Spandaryan_Reservoir", "ref_ids": null, "sent_idx": 2}]} +{"id": "18270750", "title": "Wayne D. Wright", "sentences": ["Wayne Danforth Wright (August 21, 1916 – March 11, 2003) was an American Hall of Fame and National Champion Thoroughbred horse racing jockey who won all three of the Triple Crown races in different years.", "Wayne Wright began riding in his small town of Rexburg, Idaho and by age ten was riding in area fairground races.", "He began his professional career at age fourteen at a racetrack in Reno, Nevada where he got his first win on July 15, 1931.", "Soon his skills saw him move to the race with the best on the New York State racing circuit where in 1934 he was the United States Champion Jockey by earnings and won the first of his three Triple Crown races.", "In 1936 he again led all American jockeys in earnings, winning stakes races on the U.S. East Coast plus in California where he won the West's most prestigious races at Santa Anita Park, the Santa Anita Derby and Santa Anita Handicap.", "In 1939, Wright appeared in the Columbia Pictures film, \"Columbia World of Sports: Jockeys Up\" in which future National Radio Hall of Fame and American Sportscasters Hall of Fame inductee Bill Stern went to Santa Anita Park and spent the day visiting the stables and meeting with several jockeys, trainers, and horses."], "mentions": [{"id": 0, "start": 126, "end": 138, "ref_url": "National_Museum_of_Racing_and_Hall_of_Fame", "ref_ids": ["429691"], "sent_idx": 5}, {"id": 1, "start": 90, "end": 107, "ref_url": "United_States_Champion_Jockey_by_earnings", "ref_ids": ["9482535"], "sent_idx": 0}, {"id": 2, "start": 108, "end": 133, "ref_url": "Thoroughbred_horse_race", "ref_ids": null, "sent_idx": 0}, {"id": 3, "start": 288, "end": 294, "ref_url": "Jockey", "ref_ids": ["284545"], "sent_idx": 5}, {"id": 4, "start": 190, "end": 202, "ref_url": "Triple_Crown_of_Thoroughbred_Racing", "ref_ids": ["58596"], "sent_idx": 3}, {"id": 5, "start": 47, "end": 61, "ref_url": "Rexburg,_Idaho", "ref_ids": ["110747"], "sent_idx": 1}, {"id": 6, "start": 67, "end": 79, "ref_url": "Reno,_Nevada", "ref_ids": ["26388"], "sent_idx": 2}, {"id": 7, "start": 62, "end": 76, "ref_url": "New_York_(state)", "ref_ids": ["8210131"], "sent_idx": 3}, {"id": 8, "start": 117, "end": 158, "ref_url": "United_States_Champion_Jockey_by_earnings", "ref_ids": ["9482535"], "sent_idx": 3}, {"id": 9, "start": 83, "end": 98, "ref_url": "East_Coast_of_the_United_States", "ref_ids": ["89126"], "sent_idx": 4}, {"id": 10, "start": 107, "end": 117, "ref_url": "California", "ref_ids": ["5407"], "sent_idx": 4}, {"id": 11, "start": 135, "end": 141, "ref_url": "West_Coast_of_the_United_States", "ref_ids": ["174579"], "sent_idx": 4}, {"id": 12, "start": 207, "end": 223, "ref_url": "Santa_Anita_Park", "ref_ids": ["434184"], "sent_idx": 5}, {"id": 13, "start": 190, "end": 207, "ref_url": "Santa_Anita_Derby", "ref_ids": ["2871008"], "sent_idx": 4}, {"id": 14, "start": 212, "end": 232, "ref_url": "Santa_Anita_Handicap", "ref_ids": ["1734563"], "sent_idx": 4}, {"id": 15, "start": 32, "end": 49, "ref_url": "Columbia_Pictures", "ref_ids": ["175634"], "sent_idx": 5}, {"id": 16, "start": 50, "end": 54, "ref_url": "Film", "ref_ids": ["21555729"], "sent_idx": 5}, {"id": 17, "start": 111, "end": 138, "ref_url": "National_Radio_Hall_of_Fame", "ref_ids": ["2009545"], "sent_idx": 5}, {"id": 18, "start": 143, "end": 178, "ref_url": "American_Sportscasters_Association", "ref_ids": ["28345740"], "sent_idx": 5}, {"id": 19, "start": 188, "end": 198, "ref_url": "Bill_Stern", "ref_ids": ["1659101"], "sent_idx": 5}]} +{"id": "18270756", "title": "Srashen", "sentences": ["Srashen (; formerly, Kilisakyand) is a village and rural community (municipality) in the Syunik Province of Armenia.", "The National Statistical Service of the Republic of Armenia (ARMSTAT) reported its population was 91 in 2010, down from 105 at the 2001 census."], "mentions": [{"id": 0, "start": 51, "end": 66, "ref_url": "Community_(Armenia)", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 89, "end": 104, "ref_url": "Syunik_Region", "ref_ids": null, "sent_idx": 0}, {"id": 2, "start": 52, "end": 59, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 1}, {"id": 3, "start": 4, "end": 59, "ref_url": "National_Statistical_Service_of_the_Republic_of_Armenia", "ref_ids": null, "sent_idx": 1}]} +{"id": "18270759", "title": "Suser", "sentences": ["Suser (; until 1946, Klichatakh and Ghlijatagh) is a town in the Aragatsotn Province of Armenia."], "mentions": [{"id": 0, "start": 65, "end": 84, "ref_url": "Aragatsotn", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 88, "end": 95, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18270761", "title": "Svarants", "sentences": ["Svarants (, also Romanized as Svarants’) is a village and rural community (municipality) in the Syunik Province of Armenia.", "The National Statistical Service of the Republic of Armenia (ARMSTAT) reported its population was 270 in 2010, down from 360 at the 2001 census."], "mentions": [{"id": 0, "start": 17, "end": 25, "ref_url": "Romanize", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 58, "end": 73, "ref_url": "Community_(Armenia)", "ref_ids": null, "sent_idx": 0}, {"id": 2, "start": 96, "end": 111, "ref_url": "Syunik_Region", "ref_ids": null, "sent_idx": 0}, {"id": 3, "start": 52, "end": 59, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 1}, {"id": 4, "start": 4, "end": 59, "ref_url": "National_Statistical_Service_of_the_Republic_of_Armenia", "ref_ids": null, "sent_idx": 1}]} +{"id": "18270766", "title": "Sverdlov, Armenia", "sentences": ["Sverdlov (; formerly, Aydarbek) is a village in the Lori Province of Armenia.", "Sverdlov is situated on the Urut River, 12 km north-east of Alaverdi and 48 km from Vanadzor.", "The village dates back to 18th century, 6 km on north-east from Manstev monastery (located in Teghut village).", "Although the village is much older than assumed, because it was a center of tuff and basalt mining from 5th to 7th century.", "Sverdlov hosts the 6th century St. George Church, which was demolished and rebuilt in 19th century, then renovated and re-consecrated in 2010.", "The chapel of the church is dated to the 13th century.", "There are also preserved cemetery ruins.", "Sverdlov has a mountain climate, with severely cold winters, cool summers and frequent rainfall and hail.", "Residents are mainly engaged in livestock breeding and growing grains, potatoes, melons and pumpkin.", "Meadows and pastures are located at around 1900–2400 meters above the sea level.", "Medium steep slopes with alpine or subalpine climate are favorable conditions for livestock pasture."], "mentions": [{"id": 0, "start": 52, "end": 65, "ref_url": "Lori_Province", "ref_ids": ["470506"], "sent_idx": 0}, {"id": 1, "start": 69, "end": 76, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}, {"id": 2, "start": 28, "end": 38, "ref_url": "Urut_River", "ref_ids": null, "sent_idx": 1}, {"id": 3, "start": 60, "end": 68, "ref_url": "Alaverdi,_Armenia", "ref_ids": ["971084"], "sent_idx": 1}, {"id": 4, "start": 84, "end": 92, "ref_url": "Vanadzor", "ref_ids": ["2090978"], "sent_idx": 1}, {"id": 5, "start": 76, "end": 80, "ref_url": "Tuff", "ref_ids": ["44481"], "sent_idx": 3}, {"id": 6, "start": 85, "end": 91, "ref_url": "Basalt", "ref_ids": ["43534"], "sent_idx": 3}, {"id": 7, "start": 25, "end": 31, "ref_url": "Alpine_climate", "ref_ids": ["496730"], "sent_idx": 10}, {"id": 8, "start": 35, "end": 52, "ref_url": "Subalpine_climate", "ref_ids": null, "sent_idx": 10}]} +{"id": "18270769", "title": "Cultural Center / State Center station", "sentences": ["Cultural Center station is a Baltimore Light Rail station in Baltimore, Maryland.", "It is served by all three services that the Baltimore Light Rail operates.", "There is currently no free public parking at this station.", "Connections can be made to 9 of MTA Maryland's buses from here.", "The Cultural Center stop is located within a close walk of the Baltimore Metro Subway's State Center station, and is one of two locations where it is possible to transfer between the light rail and the subway.", "When Metro trains stop at State Center, \"State Center/Cultural Center\" is announced to let riders know of the transfer option."], "mentions": [{"id": 0, "start": 44, "end": 64, "ref_url": "Baltimore_Light_Rail", "ref_ids": null, "sent_idx": 1}, {"id": 1, "start": 61, "end": 80, "ref_url": "Baltimore,_Maryland", "ref_ids": null, "sent_idx": 0}, {"id": 2, "start": 32, "end": 44, "ref_url": "MTA_Maryland", "ref_ids": null, "sent_idx": 3}, {"id": 3, "start": 63, "end": 85, "ref_url": "Baltimore_Metro_Subway", "ref_ids": null, "sent_idx": 4}, {"id": 4, "start": 88, "end": 108, "ref_url": "State_Center_station", "ref_ids": null, "sent_idx": 4}]} +{"id": "18270770", "title": "Syunik (village)", "sentences": ["Syunik (, also Romanized as Syunik’; formerly, Siznak and Syznak) is a village and rural community (municipality) in the Syunik Province of Armenia.", "The community of Syunik contains the villages of Syunik, Bargushat, Ditsmayri, Khordzor, and Sznak.", "The National Statistical Service of the Republic of Armenia (ARMSTAT) reported that the community's population was 1,294 in 2010, up from 1,023 at the 2001 census.", "The population of the village of Syunik was 791 at the 2011 census."], "mentions": [{"id": 0, "start": 15, "end": 23, "ref_url": "Romanize", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 83, "end": 98, "ref_url": "Community_(Armenia)", "ref_ids": null, "sent_idx": 0}, {"id": 2, "start": 121, "end": 136, "ref_url": "Syunik_Region", "ref_ids": null, "sent_idx": 0}, {"id": 3, "start": 52, "end": 59, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 2}, {"id": 4, "start": 57, "end": 66, "ref_url": "Bargushat", "ref_ids": ["18269915"], "sent_idx": 1}, {"id": 5, "start": 68, "end": 77, "ref_url": "Ditsmayri", "ref_ids": ["18256404"], "sent_idx": 1}, {"id": 6, "start": 79, "end": 87, "ref_url": "Khordzor", "ref_ids": ["18260439"], "sent_idx": 1}, {"id": 7, "start": 93, "end": 98, "ref_url": "Sznak", "ref_ids": ["18813303"], "sent_idx": 1}, {"id": 8, "start": 4, "end": 59, "ref_url": "National_Statistical_Service_of_the_Republic_of_Armenia", "ref_ids": null, "sent_idx": 2}]} +{"id": "18270775", "title": "Goghovit", "sentences": ["Goghovit (; until 1978, Taknalu and Taknali) is a village and rural community (municipality) in the Shirak Province of Armenia.", "The town's church dates from 1860.", "The National Statistical Service of the Republic of Armenia (ARMSTAT) reported its population was 389 in 2010, down from 396 at the 2001 census."], "mentions": [{"id": 0, "start": 62, "end": 77, "ref_url": "Community_(Armenia)", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 100, "end": 115, "ref_url": "Shirak_Province", "ref_ids": ["470511"], "sent_idx": 0}, {"id": 2, "start": 52, "end": 59, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 2}, {"id": 3, "start": 4, "end": 59, "ref_url": "National_Statistical_Service_of_the_Republic_of_Armenia", "ref_ids": null, "sent_idx": 2}]} +{"id": "18270792", "title": "Madrona Valley, Seattle", "sentences": ["Madrona Valley is a predominantly residential neighborhood in Seattle south of Madison Valley, west of Madrona, north of the Central District, and east of Cherry Hill.", "Madrona Valley is one of Seattle's fastest gentrifying neighborhoods."], "mentions": [{"id": 0, "start": 34, "end": 45, "ref_url": "Residential", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 25, "end": 32, "ref_url": "Seattle", "ref_ids": ["11388236"], "sent_idx": 1}, {"id": 2, "start": 79, "end": 93, "ref_url": "Madison_Valley,_Seattle,_Washington", "ref_ids": null, "sent_idx": 0}, {"id": 3, "start": 0, "end": 7, "ref_url": "Madrona,_Seattle,_Washington", "ref_ids": null, "sent_idx": 1}, {"id": 4, "start": 125, "end": 141, "ref_url": "Central_District,_Seattle,_Washington", "ref_ids": null, "sent_idx": 0}, {"id": 5, "start": 155, "end": 166, "ref_url": "Cherry_Hill,_Seattle,_Washington", "ref_ids": null, "sent_idx": 0}]} +{"id": "18270809", "title": "Alternative versions of Mister Fantastic", "sentences": ["Mister Fantastic is a fictional comic book character, who has had many alternate versions that have appeared in Marvel Comics publications."], "mentions": [{"id": 0, "start": 0, "end": 16, "ref_url": "Mister_Fantastic", "ref_ids": ["151154"], "sent_idx": 0}, {"id": 1, "start": 112, "end": 125, "ref_url": "Marvel_Comics", "ref_ids": ["20966"], "sent_idx": 0}]} +{"id": "18270840", "title": "Tanahat", "sentences": ["Tanahat (, also Romanized as T’anahat; formerly, Dzhomardlu and Jomardlu) is a village and rural community (municipality) in the Syunik Province of Armenia.", "The National Statistical Service of the Republic of Armenia (ARMSTAT) reported its population as 76 in 2010, up from 42 at the 2001 census.", "Prior to the Nagorno-Karabakh war, Tanahat was home to 70 Azerbaijani families and had a regular bus service to Baku."], "mentions": [{"id": 0, "start": 16, "end": 24, "ref_url": "Romanize", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 91, "end": 106, "ref_url": "Community_(Armenia)", "ref_ids": null, "sent_idx": 0}, {"id": 2, "start": 129, "end": 144, "ref_url": "Syunik_Region", "ref_ids": null, "sent_idx": 0}, {"id": 3, "start": 52, "end": 59, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 1}, {"id": 4, "start": 4, "end": 59, "ref_url": "National_Statistical_Service_of_the_Republic_of_Armenia", "ref_ids": null, "sent_idx": 1}, {"id": 5, "start": 13, "end": 33, "ref_url": "Nagorno-Karabakh_War", "ref_ids": ["4020775"], "sent_idx": 2}, {"id": 6, "start": 112, "end": 116, "ref_url": "Baku", "ref_ids": ["4566"], "sent_idx": 2}]} +{"id": "18270846", "title": "Tandzatap", "sentences": ["Tandzatap () is a village and rural community in the Syunik Province of Armenia.", "The Statistical Committee of Armenia reported its population was 100 in 2010, up from 99 at the 2001 census."], "mentions": [{"id": 0, "start": 30, "end": 45, "ref_url": "Community_(Armenia)", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 53, "end": 68, "ref_url": "Syunik_Province", "ref_ids": ["470515"], "sent_idx": 0}, {"id": 2, "start": 29, "end": 36, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 1}, {"id": 3, "start": 4, "end": 36, "ref_url": "Statistical_Committee_of_Armenia", "ref_ids": ["24984172"], "sent_idx": 1}]} +{"id": "18270851", "title": "Machanayim", "sentences": ["Machanayim is a game similar to dodge ball that is frequently played by in Orthodox Jewish schools and summer camps.", "The name Machanayim (sometimes spelled Machanaim) comes from the Hebrew word meaning \"two encampments\" or, in this case, two teams.", "Game Play:\nPlayers are divided into two encampments and the room is split in two with the teams facing each other.", "The playing area does not extend all the way to the back of the room or court - the two far ends are left empty, and one volunteer, typically one of the better players, from each team is placed behind the opposing team.", "This player is called \"the captain\".", "A ball is thrown into play at the start, although it is not activated (see below).", "The game is similar to Dodgeball, in that players try and throw the ball at opponents.", "When a player is hit, they are out.", "The difference between Machanayim and Dodgeball is that when a player is out in Machanayim, they are still part of the game.", "Rather than leaving the court, the player goes to the end area behind the opposing team, joining the original volunteer from their team.", "Activation:", "The ball is only eligible for use in getting others out once it has been \"activated\".", "To activate a ball, a player must throw it to any other player, on either team, without it hitting the ground.", "The ball is announced \"alive\" when it has been activated.", "As soon as the ball hits the ground, it is pronounced \"dead\" and needs resuscitation.", "Ball Types:\nAlmost any spherical mass can be used to play Machanayim, the only rule being that it must be easily catchable by most players.", "To that extent, very small balls (like a pea) or very large ones (like a weather balloon) should not be used.", "Basketballs are generally considered too heavy and can cause injury.", "Soccer balls and volleyballs are the most popular.", "Winning:", "When one team runs out of players (they are all behind the opposing team), the captain goes into the middle.", "The captain has more than one \"life\" (can be up to 3) Some games allow the captain to give over one of his lives to another team player to give the captain respite.", "Once the captain is out 3 times, the other team wins."], "mentions": [{"id": 0, "start": 39, "end": 48, "ref_url": "Machanaim", "ref_ids": ["5710044"], "sent_idx": 1}, {"id": 1, "start": 38, "end": 47, "ref_url": "Dodgeball", "ref_ids": ["217291"], "sent_idx": 8}]} +{"id": "18270855", "title": "Message (disambiguation)", "sentences": ["A message is an object of communication.", "Message or Messages may also refer to"], "mentions": [{"id": 0, "start": 2, "end": 9, "ref_url": "Message", "ref_ids": ["41367"], "sent_idx": 0}]} +{"id": "18270883", "title": "Gaspereau Vineyards", "sentences": ["Gaspereau Vineyards is a small winery located in the Gaspereau River Valley of Nova Scotia.", "The vineyard encompasses and is 3 kilometres from downtown Wolfville.", "It is one of several wineries in Nova Scotia, representing a blooming industry in the province.", "The winery produces a number of red and white wines, available in dry, off dry, and semi dry.", "Additionally, icewine and maple wine are made."], "mentions": [{"id": 0, "start": 53, "end": 68, "ref_url": "Gaspereau_River", "ref_ids": ["40313604"], "sent_idx": 0}, {"id": 1, "start": 33, "end": 44, "ref_url": "Nova_Scotia", "ref_ids": ["21184"], "sent_idx": 2}, {"id": 2, "start": 59, "end": 68, "ref_url": "Wolfville", "ref_ids": ["33716"], "sent_idx": 1}, {"id": 3, "start": 21, "end": 44, "ref_url": "Nova_Scotia_wine", "ref_ids": ["24164416"], "sent_idx": 2}, {"id": 4, "start": 14, "end": 21, "ref_url": "Icewine", "ref_ids": null, "sent_idx": 4}]} +{"id": "18270891", "title": "Johnson County Courthouse (Wyoming)", "sentences": ["The Johnson County Courthouse in Buffalo, Wyoming was built in 1884.", "The Italianate style building adjoins the former Johnson County Library, which is also listed on the National Register of Historic Places."], "mentions": [{"id": 0, "start": 33, "end": 49, "ref_url": "Buffalo,_Wyoming", "ref_ids": ["140118"], "sent_idx": 0}, {"id": 1, "start": 4, "end": 14, "ref_url": "Italianate", "ref_ids": null, "sent_idx": 1}, {"id": 2, "start": 49, "end": 71, "ref_url": "Carnegie_Public_Library_(Buffalo,_Wyoming)", "ref_ids": null, "sent_idx": 1}, {"id": 3, "start": 101, "end": 137, "ref_url": "National_Register_of_Historic_Places", "ref_ids": ["64065"], "sent_idx": 1}]} +{"id": "18270892", "title": "Michigan–Wacker Historic District", "sentences": ["The Michigan–Wacker Historic District is a National Register of Historic Places District that includes parts of the Chicago Loop and Near North Side community areas in Chicago, Illinois, United States.", "The district is known for the Chicago River, two bridges that cross it, and eleven high rise and skyscraper buildings erected in the 1920s.", "Among the contributing properties are the following Chicago Landmark structures:\nOther notable sites include Pioneer Court the Jean Baptiste Point Du Sable Homesite (401 North Michigan), which as the site of Chicago's first permanent residence is a National Historic Landmark, and the Wrigley Building (410 North Michigan).", "Across the Michigan Avenue Bridge is the former site of Fort Dearborn, the US Army post established in 1803.", "To the west is the Heald Square Monument, a statue of George Washington and the financiers of the American Revolution.", "The district includes contributing properties with addresses on North Michigan Avenue, East Wacker Drive, North Wabash Avenue and East South Water Street.", "Other streets in the district are Rush Street, Hubbard, Illinois and Kinzie.", "The majority of these properties are on Michigan, with addresses ranging from 230 North Michigan to 505 North Michigan.", "The district also includes parts of Michigan, Wacker and East South Water, which are all among the many multilevel streets in Chicago.", "Most of its contributing high-rise buildings and skyscrapers are of either Gothic or Baroque architecture, in addition to Art Deco.", "The district is north of the Historic Michigan Boulevard District.", "It was listed as on the National Register of Historic Places on November 15, 1978."], "mentions": [{"id": 0, "start": 24, "end": 60, "ref_url": "National_Register_of_Historic_Places", "ref_ids": ["64065"], "sent_idx": 11}, {"id": 1, "start": 116, "end": 128, "ref_url": "Chicago_Loop", "ref_ids": ["77773"], "sent_idx": 0}, {"id": 2, "start": 133, "end": 148, "ref_url": "Near_North_Side,_Chicago", "ref_ids": ["761806"], "sent_idx": 0}, {"id": 3, "start": 149, "end": 164, "ref_url": "Community_areas_of_Chicago", "ref_ids": null, "sent_idx": 0}, {"id": 4, "start": 168, "end": 185, "ref_url": "Chicago,_Illinois", "ref_ids": null, "sent_idx": 0}, {"id": 5, "start": 30, "end": 43, "ref_url": "Chicago_River", "ref_ids": ["41962"], "sent_idx": 1}, {"id": 6, "start": 83, "end": 92, "ref_url": "High_rise", "ref_ids": null, "sent_idx": 1}, {"id": 7, "start": 49, "end": 59, "ref_url": "Skyscraper", "ref_ids": ["29485"], "sent_idx": 9}, {"id": 8, "start": 52, "end": 68, "ref_url": "Chicago_Landmark", "ref_ids": null, "sent_idx": 2}, {"id": 9, "start": 109, "end": 122, "ref_url": "Pioneer_Court", "ref_ids": ["12155332"], "sent_idx": 2}, {"id": 10, "start": 127, "end": 164, "ref_url": "Jean_Baptiste_Point_Du_Sable_Homesite", "ref_ids": ["11174231"], "sent_idx": 2}, {"id": 11, "start": 249, "end": 275, "ref_url": "National_Historic_Landmark", "ref_ids": ["404013"], "sent_idx": 2}, {"id": 12, "start": 285, "end": 301, "ref_url": "Wrigley_Building", "ref_ids": ["70736"], "sent_idx": 2}, {"id": 13, "start": 56, "end": 69, "ref_url": "Fort_Dearborn", "ref_ids": ["40417"], "sent_idx": 3}, {"id": 14, "start": 19, "end": 40, "ref_url": "Heald_Square_Monument", "ref_ids": ["11362105"], "sent_idx": 4}, {"id": 15, "start": 54, "end": 71, "ref_url": "George_Washington", "ref_ids": ["11968"], "sent_idx": 4}, {"id": 16, "start": 70, "end": 85, "ref_url": "Michigan_Avenue_(Chicago)", "ref_ids": ["503172"], "sent_idx": 5}, {"id": 17, "start": 92, "end": 104, "ref_url": "Wacker_Drive", "ref_ids": ["1492190"], "sent_idx": 5}, {"id": 18, "start": 34, "end": 45, "ref_url": "Rush_Street_(Chicago)", "ref_ids": ["13222299"], "sent_idx": 6}, {"id": 19, "start": 104, "end": 133, "ref_url": "Multilevel_streets_in_Chicago", "ref_ids": ["1493685"], "sent_idx": 8}, {"id": 20, "start": 25, "end": 34, "ref_url": "High-rise", "ref_ids": null, "sent_idx": 9}, {"id": 21, "start": 49, "end": 59, "ref_url": "Skyscraper", "ref_ids": ["29485"], "sent_idx": 9}, {"id": 22, "start": 75, "end": 81, "ref_url": "Gothic_architecture", "ref_ids": ["54044"], "sent_idx": 9}, {"id": 23, "start": 85, "end": 105, "ref_url": "Baroque_architecture", "ref_ids": ["344140"], "sent_idx": 9}, {"id": 24, "start": 122, "end": 130, "ref_url": "Art_Deco", "ref_ids": ["1881"], "sent_idx": 9}, {"id": 25, "start": 29, "end": 65, "ref_url": "Historic_Michigan_Boulevard_District", "ref_ids": ["11221991"], "sent_idx": 10}, {"id": 26, "start": 24, "end": 60, "ref_url": "National_Register_of_Historic_Places", "ref_ids": ["64065"], "sent_idx": 11}]} +{"id": "18270937", "title": "Dennis Cocke", "sentences": ["Dennis Geoffrey Cocke (June 2, 1924 – July 2, 2008) sat as a Member of the British Columbia Legislature as a New Democratic Party member from 1969 to 1986 for the seat of New Westminster.", "He grew up on a farm in Athabasca, Alberta and served in the Royal Canadian Air Force during World War II.", "Cocke served as Minister of Health under Dave Barrett from 1972 to 1975.", "In that capacity he created the BC Ambulance Service, replacing a patchwork of private and municipal enterprise with a province-wide system, professional standards and certification, and with newly designed and purpose-built ambulance vehicles.", "He also led the building of Queen's Park Hospital, and the reconstruction of Royal Columbian Hospital.", "He died at the age of 84 at the Royal Columbian Hospital after suffering a stroke."], "mentions": [{"id": 0, "start": 75, "end": 103, "ref_url": "British_Columbia_Legislature", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 109, "end": 129, "ref_url": "British_Columbia_New_Democratic_Party", "ref_ids": ["168484"], "sent_idx": 0}, {"id": 2, "start": 171, "end": 186, "ref_url": "New_Westminster_(provincial_electoral_district)", "ref_ids": ["1865646"], "sent_idx": 0}, {"id": 3, "start": 24, "end": 42, "ref_url": "Athabasca,_Alberta", "ref_ids": ["675985"], "sent_idx": 1}, {"id": 4, "start": 61, "end": 85, "ref_url": "Royal_Canadian_Air_Force", "ref_ids": ["931406"], "sent_idx": 1}, {"id": 5, "start": 93, "end": 105, "ref_url": "World_War_II", "ref_ids": ["32927"], "sent_idx": 1}, {"id": 6, "start": 41, "end": 53, "ref_url": "Dave_Barrett", "ref_ids": ["308670"], "sent_idx": 2}, {"id": 7, "start": 32, "end": 56, "ref_url": "Royal_Columbian_Hospital", "ref_ids": ["27146037"], "sent_idx": 5}, {"id": 8, "start": 75, "end": 81, "ref_url": "Stroke", "ref_ids": ["625404"], "sent_idx": 5}]} +{"id": "18270938", "title": "Gowerton RFC", "sentences": ["Gowerton Rugby Football Club is a Welsh rugby union club based in Gowerton near Swansea, Wales; officially founded in 1884.", "Gowerton RFC is a member of the Welsh Rugby Union and is a feeder club for the Ospreys.", "Gowerton RFC presently run a Senior XV, Seconds XV, youth XV and Junior teams from under 7's to under 16's."], "mentions": [{"id": 0, "start": 32, "end": 37, "ref_url": "Wales", "ref_ids": ["69894"], "sent_idx": 1}, {"id": 1, "start": 40, "end": 51, "ref_url": "Rugby_union", "ref_ids": ["25405"], "sent_idx": 0}, {"id": 2, "start": 0, "end": 8, "ref_url": "Gowerton", "ref_ids": ["2592445"], "sent_idx": 2}, {"id": 3, "start": 80, "end": 87, "ref_url": "Swansea", "ref_ids": ["46394"], "sent_idx": 0}, {"id": 4, "start": 89, "end": 94, "ref_url": "Wales", "ref_ids": ["69894"], "sent_idx": 0}, {"id": 5, "start": 32, "end": 49, "ref_url": "Welsh_Rugby_Union", "ref_ids": ["862966"], "sent_idx": 1}, {"id": 6, "start": 79, "end": 86, "ref_url": "Ospreys_(rugby_union)", "ref_ids": ["844800"], "sent_idx": 1}]} +{"id": "18270950", "title": "Home equity protection", "sentences": ["Home price protection is an agreement that pays the homeowner if a particular home price index declines in value over a period of time after the protection is purchased.", "The protection is for a new or existing homeowner that wishes to protect the value of their home from future market declines."], "mentions": []} +{"id": "18270968", "title": "Danthala Venkata Meher Baba", "sentences": ["Danthala Venkata Meher Baba is a former Indian cricketer; he represented Andhra Pradesh in the Ranji Throphy Cricket Championship from 1971 to 1989, he also represented the Hyderabad Ranji Trophy Team in 1978;", "He was a left-hand batsman and left-arm spinner with 16-year first-class career spanning from 1970 to 1987.", "He made 2649 runs, with a highest of 134 not out against Goa, He also scored 20 half centuries and took 173 wickets with his slow left arm orthodox bowling with five for 32 being the best bowling figures, he had a total of five five-wicket hauls."], "mentions": [{"id": 0, "start": 95, "end": 129, "ref_url": "Ranji_Throphy_Cricket_Championship", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 173, "end": 182, "ref_url": "Hyderabad,_Andhra_Pradesh", "ref_ids": null, "sent_idx": 0}, {"id": 2, "start": 57, "end": 60, "ref_url": "Goa", "ref_ids": ["40010153"], "sent_idx": 2}]} +{"id": "18270973", "title": "2004 League of Ireland Cup", "sentences": ["The League of Ireland Cup 2004 was the 31st staging of the League of Ireland Cup, which was won by Longford Town, the club's first victory in the competition.", "The 2004 League Cup kicked off in May.", "It featured two teams representing the Kerry and Mayo Leagues plus the 10 teams from the Premier Division and the 12 from the First Division.", "There was 24 teams drawn into eight groups of three.", "Each team played the other two in their group.", "The winner of each group progressed to the quarter-finals."], "mentions": [{"id": 0, "start": 4, "end": 25, "ref_url": "League_of_Ireland_Cup", "ref_ids": ["1525616"], "sent_idx": 0}, {"id": 1, "start": 99, "end": 112, "ref_url": "Longford_Town_F.C.", "ref_ids": ["827890"], "sent_idx": 0}, {"id": 2, "start": 39, "end": 44, "ref_url": "Kerry_League_F.C.", "ref_ids": null, "sent_idx": 2}, {"id": 3, "start": 49, "end": 60, "ref_url": "Mayo_League_F.C.", "ref_ids": null, "sent_idx": 2}, {"id": 4, "start": 89, "end": 105, "ref_url": "2004_League_of_Ireland_Premier_Division", "ref_ids": ["15044966"], "sent_idx": 2}, {"id": 5, "start": 126, "end": 140, "ref_url": "2004_League_of_Ireland_First_Division", "ref_ids": ["51089039"], "sent_idx": 2}]} +{"id": "18270976", "title": "Michael Probst", "sentences": ["Michael Probst (born 11 November 1962) is a German former footballer who played as a goalkeeper.", "He played two Bundesliga games for Bayern Munich in the 1995–96 season, and was on the bench for the 1996 UEFA Cup Final."], "mentions": [{"id": 0, "start": 44, "end": 50, "ref_url": "Germany", "ref_ids": ["11867"], "sent_idx": 0}, {"id": 1, "start": 58, "end": 68, "ref_url": "Association_football", "ref_ids": ["10568"], "sent_idx": 0}, {"id": 2, "start": 85, "end": 95, "ref_url": "Goalkeeper_(association_football)", "ref_ids": ["5348091"], "sent_idx": 0}, {"id": 3, "start": 14, "end": 24, "ref_url": "Bundesliga", "ref_ids": ["686142"], "sent_idx": 1}, {"id": 4, "start": 35, "end": 48, "ref_url": "FC_Bayern_Munich", "ref_ids": ["172326"], "sent_idx": 1}, {"id": 5, "start": 101, "end": 120, "ref_url": "1996_UEFA_Cup_Final", "ref_ids": ["11710538"], "sent_idx": 1}]} +{"id": "18270978", "title": "Tandzaver", "sentences": ["Tandzaver () is a village and rural community (municipality) in the Syunik Province of Armenia.", "The National Statistical Service of the Republic of Armenia (ARMSTAT) reported its population was 212 in 2010, down from 263 at the 2001 census."], "mentions": [{"id": 0, "start": 30, "end": 45, "ref_url": "Community_(Armenia)", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 68, "end": 83, "ref_url": "Syunik_Region", "ref_ids": null, "sent_idx": 0}, {"id": 2, "start": 52, "end": 59, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 1}, {"id": 3, "start": 4, "end": 59, "ref_url": "National_Statistical_Service_of_the_Republic_of_Armenia", "ref_ids": null, "sent_idx": 1}]} +{"id": "18270987", "title": "Tandzut, Armavir", "sentences": ["Tandzut ( formerly, \"Armutlu\") is a village in the Armavir Province of Armenia."], "mentions": [{"id": 0, "start": 51, "end": 67, "ref_url": "Armavir_Province", "ref_ids": ["470500"], "sent_idx": 0}, {"id": 1, "start": 71, "end": 78, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18270989", "title": "Tandzut, Tavush", "sentences": ["Tandzut () is a village in the Tavush Province of Armenia."], "mentions": [{"id": 0, "start": 31, "end": 46, "ref_url": "Tavush", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 50, "end": 57, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18270992", "title": "Geghasar", "sentences": ["Geghasar (; formerly, Tapanli and Tapan) is a town in the Lori Province of Armenia."], "mentions": [{"id": 0, "start": 58, "end": 71, "ref_url": "Lori_Province", "ref_ids": ["470506"], "sent_idx": 0}, {"id": 1, "start": 75, "end": 82, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18270997", "title": "Taronik", "sentences": ["Taronik (), formerly known as Zeyva Turkakan (\"Turkish Zeyva\") and Verin Zeyva, is a village in the Armavir Province of Armenia.", "The Iron Age settlement of Metsamor site is located only 1 km southwest of Taronik."], "mentions": [{"id": 0, "start": 100, "end": 116, "ref_url": "Armavir_Province", "ref_ids": ["470500"], "sent_idx": 0}, {"id": 1, "start": 120, "end": 127, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}, {"id": 2, "start": 27, "end": 40, "ref_url": "Metsamor_site", "ref_ids": null, "sent_idx": 1}]} +{"id": "18271002", "title": "Andrew Miller (cricketer, born 1963)", "sentences": ["Andrew John Trevor Miller, born at Chesham, Buckinghamshire on 30 May 1963, was a cricketer who played first-class cricket for Oxford University and Middlesex in the mid-1980s.", "Miller was a left-handed opening batsman and an occasional bowler.", "Having made his debut in one match for Oxford University in 1982, he was highly successful for both the university and his county in the 1983 season, making in total 1,002 runs in just 15 matches at a batting average of more than 43.", "He won a Blue in that year and in the two following seasons, when he played less frequently.", "His only full season of county cricket was 1986, when he made 963 runs, but he left the first-class game at the end of the following season.", "He later played Minor Counties cricket for Hertfordshire."], "mentions": [{"id": 0, "start": 35, "end": 42, "ref_url": "Chesham", "ref_ids": ["240577"], "sent_idx": 0}, {"id": 1, "start": 44, "end": 59, "ref_url": "Buckinghamshire", "ref_ids": ["55688"], "sent_idx": 0}, {"id": 2, "start": 82, "end": 91, "ref_url": "Cricketer", "ref_ids": null, "sent_idx": 0}, {"id": 3, "start": 103, "end": 122, "ref_url": "First-class_cricket", "ref_ids": ["11040"], "sent_idx": 0}, {"id": 4, "start": 39, "end": 56, "ref_url": "Oxford_University_Cricket_Club", "ref_ids": ["1622255"], "sent_idx": 2}, {"id": 5, "start": 149, "end": 158, "ref_url": "Middlesex_County_Cricket_Club", "ref_ids": ["822103"], "sent_idx": 0}, {"id": 6, "start": 9, "end": 13, "ref_url": "University_sporting_blue", "ref_ids": null, "sent_idx": 3}, {"id": 7, "start": 16, "end": 30, "ref_url": "Minor_Counties", "ref_ids": null, "sent_idx": 5}, {"id": 8, "start": 43, "end": 56, "ref_url": "Hertfordshire_County_Cricket_Club", "ref_ids": ["3711946"], "sent_idx": 5}]} +{"id": "18271004", "title": "Taratumb", "sentences": ["Taratumb (, also Romanized as Tarratumb) is a town in the Vayots Dzor Province of Armenia."], "mentions": [{"id": 0, "start": 17, "end": 25, "ref_url": "Romanize", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 58, "end": 78, "ref_url": "Vayots_Dzor", "ref_ids": null, "sent_idx": 0}, {"id": 2, "start": 82, "end": 89, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18271009", "title": "Tarsachay", "sentences": ["Tarsachay is an abandoned village in the Tavush Province of Armenia."], "mentions": [{"id": 0, "start": 41, "end": 56, "ref_url": "Tavush_Province", "ref_ids": ["470516"], "sent_idx": 0}, {"id": 1, "start": 60, "end": 67, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18271013", "title": "Tashtun", "sentences": ["Tashtun () is a village and rural community (municipality) in the Syunik Province of Armenia.", "The National Statistical Service of the Republic of Armenia (ARMSTAT) reported its population was 134 in 2010, down from 170 at the 2001 census."], "mentions": [{"id": 0, "start": 28, "end": 43, "ref_url": "Community_(Armenia)", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 66, "end": 81, "ref_url": "Syunik_Region", "ref_ids": null, "sent_idx": 0}, {"id": 2, "start": 52, "end": 59, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 1}, {"id": 3, "start": 4, "end": 59, "ref_url": "National_Statistical_Service_of_the_Republic_of_Armenia", "ref_ids": null, "sent_idx": 1}]} +{"id": "18271016", "title": "Tasik", "sentences": ["Tasik (, also Romanized as T’asik; formerly, Tazagyukh, T’azagyugh, Gushch’i, T’azak’end, Kushch’i, Kushch’i-T’azak’end, and Ghushch’i) is a village and rural community (municipality) in the Syunik Province of Armenia.", "The National Statistical Service of the Republic of Armenia (ARMSTAT) reported its population as 291 in 2010, up from 274 at the 2001 census."], "mentions": [{"id": 0, "start": 14, "end": 22, "ref_url": "Romanize", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 153, "end": 168, "ref_url": "Community_(Armenia)", "ref_ids": null, "sent_idx": 0}, {"id": 2, "start": 191, "end": 206, "ref_url": "Syunik_Region", "ref_ids": null, "sent_idx": 0}, {"id": 3, "start": 52, "end": 59, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 1}, {"id": 4, "start": 4, "end": 59, "ref_url": "National_Statistical_Service_of_the_Republic_of_Armenia", "ref_ids": null, "sent_idx": 1}]} +{"id": "18271019", "title": "Tatev (village)", "sentences": ["Tatev ( or Տաթեւ \"Tat′ev\") is a village and rural community in the Syunik Province of Armenia.", "It is home to the Tatev monastery.", "The National Statistical Service of the Republic of Armenia (ARMSTAT) reported its population was 892 in 2010, down from 1,042 at the 2001 census.", "The village hosts a station of the Wings of Tatev; the world's longest non-stop double track aerial tramway."], "mentions": [{"id": 0, "start": 44, "end": 59, "ref_url": "Community_(Armenia)", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 67, "end": 82, "ref_url": "Syunik_Province", "ref_ids": ["470515"], "sent_idx": 0}, {"id": 2, "start": 52, "end": 59, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 2}, {"id": 3, "start": 44, "end": 49, "ref_url": "Tatev", "ref_ids": null, "sent_idx": 3}, {"id": 4, "start": 4, "end": 59, "ref_url": "National_Statistical_Service_of_the_Republic_of_Armenia", "ref_ids": null, "sent_idx": 2}, {"id": 5, "start": 35, "end": 49, "ref_url": "Wings_of_Tatev", "ref_ids": ["30240805"], "sent_idx": 3}]} +{"id": "18271023", "title": "Tavshut", "sentences": ["Tavshut (, also Romanized as T’avshut; formerly, Tazakend and Tazagyukh) is a village in the Shirak Province of Armenia."], "mentions": [{"id": 0, "start": 16, "end": 24, "ref_url": "Romanize", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 93, "end": 108, "ref_url": "Shirak_Province", "ref_ids": ["470511"], "sent_idx": 0}, {"id": 2, "start": 112, "end": 119, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18271025", "title": "1989 FIFA U-16 World Championship squads", "sentences": [], "mentions": []} +{"id": "18271026", "title": "MJB", "sentences": ["MJB may refer to:"], "mentions": []} +{"id": "18271027", "title": "I'll Be Yours", "sentences": ["I'll Be Yours is a 1947 American musical comedy film directed by William A. Seiter and starring Deanna Durbin.", "Based on the play\" A jó tündér\" by Ferenc Molnár, the film is about a small- town girl who tells a fib to a wealthy businessman, which then creates complications.", "The play had earlier been adapted for the 1935 film\" The Good Fairy\" by Preston Sturges."], "mentions": [{"id": 0, "start": 33, "end": 52, "ref_url": "Musical_film", "ref_ids": ["19029"], "sent_idx": 0}, {"id": 1, "start": 65, "end": 82, "ref_url": "William_A._Seiter", "ref_ids": ["11735439"], "sent_idx": 0}, {"id": 2, "start": 96, "end": 109, "ref_url": "Deanna_Durbin", "ref_ids": ["537229"], "sent_idx": 0}, {"id": 3, "start": 19, "end": 30, "ref_url": "The_Good_Fairy_(play)", "ref_ids": ["18016862"], "sent_idx": 1}, {"id": 4, "start": 35, "end": 48, "ref_url": "Ferenc_Molnár", "ref_ids": ["2111110"], "sent_idx": 1}, {"id": 5, "start": 53, "end": 67, "ref_url": "The_Good_Fairy_(film)", "ref_ids": ["18013931"], "sent_idx": 2}, {"id": 6, "start": 72, "end": 87, "ref_url": "Preston_Sturges", "ref_ids": ["401133"], "sent_idx": 2}]} +{"id": "18271030", "title": "Tazagyukh, Ararat", "sentences": ["Tazagyukh (also, Trazagyukh) is a town in the Ararat Province of Armenia."], "mentions": [{"id": 0, "start": 46, "end": 61, "ref_url": "Ararat_(province)", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 65, "end": 72, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18271033", "title": "Teghenik", "sentences": ["Teghenik (, formerly, Tkhit) is a village in the Kotayk Province of Armenia."], "mentions": [{"id": 0, "start": 49, "end": 64, "ref_url": "Kotayk_Province", "ref_ids": ["470504"], "sent_idx": 0}, {"id": 1, "start": 68, "end": 75, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18271035", "title": "Teghut, Tavush", "sentences": ["Teghut (, also Romanized as T’eghut), is a village in the Tavush Province of Armenia."], "mentions": [{"id": 0, "start": 15, "end": 23, "ref_url": "Romanize", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 58, "end": 73, "ref_url": "Tavush_Province", "ref_ids": ["470516"], "sent_idx": 0}, {"id": 2, "start": 77, "end": 84, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18271037", "title": "Jim Gatchell Memorial Museum", "sentences": ["The Jim Gatchell Memorial Museum is an American West museum in Buffalo, Wyoming, housed in a 1909 Carnegie Library building."], "mentions": [{"id": 0, "start": 39, "end": 52, "ref_url": "American_West", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 63, "end": 79, "ref_url": "Buffalo,_Wyoming", "ref_ids": ["140118"], "sent_idx": 0}, {"id": 2, "start": 98, "end": 114, "ref_url": "Carnegie_Library", "ref_ids": null, "sent_idx": 0}]} +{"id": "18271038", "title": "Tegher, Armenia", "sentences": ["Tegher (, also Romanized as Tekher, Dgyr or Dgher; meaning \"medicine\") is a village in the Aragatsotn Province of Armenia on the southern slope of Mount Aragats.", "It earns its name from the large assortment of healing herbs that are found in the surrounding vicinity.", "The town contains the Monastery of Tegher built in 1213.", "The ruins of the 9th century village of Tegher (Old Tegher) sit a short distance walk from the monastery.", "Numerous foundations may be seen, along with the remains of a Tukh Manuk funerary chapel of the 5th century.", "Nearby is also a medieval to 19th century cemetery with some mausoleums and khachkars.", "Nearby is also a large radio telescope as well as an unfinished solar power plant, both from the Soviet era."], "mentions": [{"id": 0, "start": 15, "end": 23, "ref_url": "Romanize", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 91, "end": 110, "ref_url": "Aragatsotn", "ref_ids": null, "sent_idx": 0}, {"id": 2, "start": 114, "end": 121, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}, {"id": 3, "start": 147, "end": 160, "ref_url": "Mount_Aragats", "ref_ids": ["1015981"], "sent_idx": 0}, {"id": 4, "start": 22, "end": 41, "ref_url": "Tegher_Monastery", "ref_ids": ["18062809"], "sent_idx": 2}, {"id": 5, "start": 61, "end": 70, "ref_url": "Mausoleum", "ref_ids": ["231168"], "sent_idx": 5}, {"id": 6, "start": 76, "end": 84, "ref_url": "Khachkar", "ref_ids": ["1100953"], "sent_idx": 5}]} +{"id": "18271041", "title": "Teghut, Lori", "sentences": ["Teghut (), is a village in the Lori Province of Armenia.", "Teghut is located 70 kilometres northeastward from Vanadzor.", "The village is at the right side of Shnogh river, which is the stream of Debed river, 18 kilometers from Alaverdi in a timbered area.", "Teghut was established by residents of Shnogh village at the beginning of the 20th century.", "Some of villagers' ancestors are Lori region aboriginals, others came from Artsakh, Javakhk, Syunik, Sevan in 17th- 18th centuries.", "The village is named Teghut in connection with an old village in the central part of present-day Teghut which was plundered by Lezgins.", "The residents of the old village were captured and murdered.", "Other villages located near the Shnogh river had the same fortune.", "(Dukanadzor, Dzorigegh, Giligegh, Manstev, Akhetk)."], "mentions": [{"id": 0, "start": 31, "end": 44, "ref_url": "Lori_Province", "ref_ids": ["470506"], "sent_idx": 0}, {"id": 1, "start": 48, "end": 55, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18271042", "title": "Charde Houston", "sentences": ["Charde Lakishia Houston (born April 10, 1986) is a professional basketball player in the WNBA, last played for the New York Liberty.", "Born in Oceanside, California, Houston played high school basketball at San Diego High from 2000–2004, where she set the California state scoring record of 3,837 points.", "After becoming the first in her family to graduate high school in 2004, Houston signed a letter of intent to play college basketball at UConn.", "Houston graduated in 2008 with a degree in sociology.", "Following her collegiate career, she was selected in the 3rd round (30th overall) of the 2008 WNBA Draft by the Minnesota Lynx.", "Houston was signed by the WNBA's New York Liberty in June 2014 after spending the previous season with the Phoenix Mercury."], "mentions": [{"id": 0, "start": 26, "end": 30, "ref_url": "WNBA", "ref_ids": null, "sent_idx": 5}, {"id": 1, "start": 33, "end": 49, "ref_url": "New_York_Liberty", "ref_ids": ["180637"], "sent_idx": 5}, {"id": 2, "start": 8, "end": 29, "ref_url": "Oceanside,_California", "ref_ids": ["108053"], "sent_idx": 1}, {"id": 3, "start": 72, "end": 86, "ref_url": "San_Diego_High", "ref_ids": null, "sent_idx": 1}, {"id": 4, "start": 136, "end": 141, "ref_url": "University_of_Connecticut", "ref_ids": ["239846"], "sent_idx": 2}, {"id": 5, "start": 89, "end": 104, "ref_url": "2008_WNBA_Draft", "ref_ids": null, "sent_idx": 4}, {"id": 6, "start": 112, "end": 126, "ref_url": "Minnesota_Lynx", "ref_ids": ["237106"], "sent_idx": 4}, {"id": 7, "start": 107, "end": 122, "ref_url": "Phoenix_Mercury", "ref_ids": ["85694"], "sent_idx": 5}]} +{"id": "18271045", "title": "Tolors", "sentences": ["Tolors (; also, Tolor) is a village and rural community (municipality) in the Syunik Province of Armenia.", "The National Statistical Service of the Republic of Armenia (ARMSTAT) reported its population was 450 in 2010, up from 406 at the 2001 census."], "mentions": [{"id": 0, "start": 40, "end": 55, "ref_url": "Community_(Armenia)", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 78, "end": 93, "ref_url": "Syunik_Region", "ref_ids": null, "sent_idx": 0}, {"id": 2, "start": 52, "end": 59, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 1}, {"id": 3, "start": 4, "end": 59, "ref_url": "National_Statistical_Service_of_the_Republic_of_Armenia", "ref_ids": null, "sent_idx": 1}]} +{"id": "18271047", "title": "Torfavan", "sentences": ["Torfavan (; formerly Kamishlu) is a town in the Gegharkunik Province of Armenia."], "mentions": [{"id": 0, "start": 48, "end": 68, "ref_url": "Gegharkunik_Province", "ref_ids": ["470502"], "sent_idx": 0}, {"id": 1, "start": 72, "end": 79, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18271048", "title": "Torosgyugh", "sentences": ["Torosgyugh (, also Romanized as T’orosgyugh and Torosgyukh) is a village in the Shirak Province of Armenia."], "mentions": [{"id": 0, "start": 19, "end": 27, "ref_url": "Romanize", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 80, "end": 95, "ref_url": "Shirak_Province", "ref_ids": ["470511"], "sent_idx": 0}, {"id": 2, "start": 99, "end": 106, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18271050", "title": "Torunik", "sentences": ["Torunik (, also Romanized as Torunik’ and Toruniq; formerly, Shikhlar, Kizlshafak, Kizilshafak, Kizilshafag) is a village and rural community (municipality) in the Syunik Province of Armenia.", "The National Statistical Service of the Republic of Armenia (ARMSTAT) reported its population was 138 in 2010, down from 181 at the 2001 census."], "mentions": [{"id": 0, "start": 16, "end": 24, "ref_url": "Romanize", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 126, "end": 141, "ref_url": "Community_(Armenia)", "ref_ids": null, "sent_idx": 0}, {"id": 2, "start": 164, "end": 179, "ref_url": "Syunik_Region", "ref_ids": null, "sent_idx": 0}, {"id": 3, "start": 52, "end": 59, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 1}, {"id": 4, "start": 4, "end": 59, "ref_url": "National_Statistical_Service_of_the_Republic_of_Armenia", "ref_ids": null, "sent_idx": 1}]} +{"id": "18271054", "title": "Tavush (village)", "sentences": ["Tavush (); formerly known as \"Tovuz\", is a village in the Tavush Province of Armenia."], "mentions": [{"id": 0, "start": 58, "end": 73, "ref_url": "Tavush_Province", "ref_ids": ["470516"], "sent_idx": 0}, {"id": 1, "start": 77, "end": 84, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18271056", "title": "Tretuk", "sentences": ["Tretuk (, also Romanized as Tretuk’, previously \"Inakdagh\" and \"Inakdag\", in 1978-88 known as \"Yenikend\") is a small village in the Gegharkunik Province of Armenia, located near the border with Azerbaijan."], "mentions": [{"id": 0, "start": 15, "end": 23, "ref_url": "Romanize", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 132, "end": 152, "ref_url": "Gegharkunik_Province", "ref_ids": ["470502"], "sent_idx": 0}, {"id": 2, "start": 156, "end": 163, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}, {"id": 3, "start": 194, "end": 204, "ref_url": "Azerbaijan", "ref_ids": ["746"], "sent_idx": 0}]} +{"id": "18271059", "title": "Tsaghkaber", "sentences": ["Tsaghkaber (, also romanized as Tsakhkaber and Tzaghkaber; formerly, Tsakhkashen and Avdibek) is a town in the Lori Province of Armenia."], "mentions": [{"id": 0, "start": 19, "end": 28, "ref_url": "Romanization", "ref_ids": ["173533"], "sent_idx": 0}, {"id": 1, "start": 111, "end": 124, "ref_url": "Lori_Province", "ref_ids": ["470506"], "sent_idx": 0}, {"id": 2, "start": 128, "end": 135, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18271062", "title": "Tsaghkahovit", "sentences": ["Tsaghkahovit (), known as Haji Khalil until 1946, is a village in the Aragatsotn Province of Armenia.", "There are many Late Bronze", "Age remains which are in the process of being excavated."], "mentions": [{"id": 0, "start": 70, "end": 89, "ref_url": "Aragatsotn_Province", "ref_ids": ["470494"], "sent_idx": 0}, {"id": 1, "start": 93, "end": 100, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18271064", "title": "Tsaghkalanj", "sentences": ["Tsaghkalanj (, also Romanized as Tsakhkalandzh; until 1978, Agdzhakala and Aghjaghala) is a village in the Armavir Province of Armenia.", "The village's church, dedicated to Saint George (Surb Gevorg), dates to the 1870s.", "It rests on an earlier circular foundation.", "Nearby, is the whitewashed Amenaprkich Church of an unknown date.", "There is also a Neolithic-Chalcolithic tell in the village and Bronze Age burial mounds, while the ruins of Amenaprkich, a medieval settlement are nearby."], "mentions": [{"id": 0, "start": 20, "end": 28, "ref_url": "Romanize", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 107, "end": 123, "ref_url": "Armavir_(province)", "ref_ids": null, "sent_idx": 0}, {"id": 2, "start": 127, "end": 134, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}, {"id": 3, "start": 35, "end": 47, "ref_url": "Saint_George", "ref_ids": ["29010"], "sent_idx": 1}, {"id": 4, "start": 16, "end": 25, "ref_url": "Neolithic", "ref_ids": ["21189"], "sent_idx": 4}, {"id": 5, "start": 26, "end": 38, "ref_url": "Chalcolithic", "ref_ids": ["7446"], "sent_idx": 4}, {"id": 6, "start": 63, "end": 73, "ref_url": "Bronze_Age", "ref_ids": ["4620"], "sent_idx": 4}]} +{"id": "18271074", "title": "Nerkin Tsaghkavan", "sentences": ["Nerkin Tsaghkavan (); formerly known as \"Melikgyugh\" or \"Melikgegh\", is a village in the Tavush Province of Armenia."], "mentions": [{"id": 0, "start": 89, "end": 104, "ref_url": "Tavush_Province", "ref_ids": ["470516"], "sent_idx": 0}, {"id": 1, "start": 108, "end": 115, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18271078", "title": "Verin Tsaghkavan", "sentences": ["Verin Tsaghkavan (); formerly known as \"Veligegh\" or \"Veli\", is a village in the Tavush Province of Armenia."], "mentions": [{"id": 0, "start": 81, "end": 96, "ref_url": "Tavush_Province", "ref_ids": ["470516"], "sent_idx": 0}, {"id": 1, "start": 100, "end": 107, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18271098", "title": "Tsaghkunk, Armavir", "sentences": ["Tsaghkunk (, also Romanized as Tsaghkunk’, Tsaghkunq, and Tsakhkunk; until 1946, Abdurahman; formerly, Verkhnyaya Aylanlu and Verin Aylanlu) is a town in the Armavir Province of Armenia.", "The town's church dates from the 19th century."], "mentions": [{"id": 0, "start": 18, "end": 26, "ref_url": "Romanize", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 158, "end": 174, "ref_url": "Armavir_(province)", "ref_ids": null, "sent_idx": 0}, {"id": 2, "start": 178, "end": 185, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18271100", "title": "Tsaghkunk, Gegharkunik", "sentences": ["Tsaghkunk (, also Romanized as Tsaghkunk’, and Tsakhkunk, Tsaghkunq, and Tzaghkunk) is a village in the Gegharkunik Province of Armenia.", "It is situated at the right bank of the Hrazdan River, 40 km north-west from the capital city of the Kotayk Province, Hrazdan, 60 km from Yerevan and 7 km north-west of Sevan, Armenia.", "It is bordered by Ddmashen to the west, Geghamavan to the south, Tsovagyugh to the south-east and Dilijan National Reserve to the north-west.", "Tsaghkunk is located at the foot of Pambak Mountain's S. Hovhannes, Abazants, Khachidosh Rocks and Tapidosh Hill."], "mentions": [{"id": 0, "start": 18, "end": 26, "ref_url": "Romanize", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 104, "end": 124, "ref_url": "Gegharkunik_Province", "ref_ids": ["470502"], "sent_idx": 0}, {"id": 2, "start": 176, "end": 183, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 1}, {"id": 3, "start": 40, "end": 53, "ref_url": "Hrazdan_River", "ref_ids": ["2090936"], "sent_idx": 1}, {"id": 4, "start": 101, "end": 116, "ref_url": "Kotayk_Province", "ref_ids": ["470504"], "sent_idx": 1}, {"id": 5, "start": 40, "end": 47, "ref_url": "Hrazdan", "ref_ids": ["1368894"], "sent_idx": 1}, {"id": 6, "start": 138, "end": 145, "ref_url": "Yerevan", "ref_ids": ["34352"], "sent_idx": 1}, {"id": 7, "start": 169, "end": 183, "ref_url": "Sevan,_Armenia", "ref_ids": ["1014141"], "sent_idx": 1}, {"id": 8, "start": 18, "end": 26, "ref_url": "Ddmashen", "ref_ids": ["18256380"], "sent_idx": 2}, {"id": 9, "start": 40, "end": 50, "ref_url": "Geghamavan", "ref_ids": ["18256615"], "sent_idx": 2}, {"id": 10, "start": 65, "end": 75, "ref_url": "Tsovagyugh", "ref_ids": ["16051743"], "sent_idx": 2}, {"id": 11, "start": 98, "end": 122, "ref_url": "Dilijan_National_Park", "ref_ids": ["23956831"], "sent_idx": 2}, {"id": 12, "start": 36, "end": 51, "ref_url": "Pambak_Mountain", "ref_ids": null, "sent_idx": 3}]} +{"id": "18271103", "title": "Rivergate Tower", "sentences": ["The Rivergate Tower, also known as the Sykes building or the Beercan building, is a skyscraper in Tampa, Florida.", "With 31 floors, it is the sixth tallest building in Tampa.", "Rivergate Tower's principal tenant is Sykes Enterprises, a publicly traded company that operates technical help and customer support centers internationally."], "mentions": [{"id": 0, "start": 52, "end": 57, "ref_url": "Tampa", "ref_ids": null, "sent_idx": 1}, {"id": 1, "start": 105, "end": 112, "ref_url": "Florida", "ref_ids": ["18933066"], "sent_idx": 0}, {"id": 2, "start": 38, "end": 55, "ref_url": "Sykes_Enterprises", "ref_ids": ["11819061"], "sent_idx": 2}]} +{"id": "18271105", "title": "Tsaghkut", "sentences": ["Tsaghkut (; formerly, Gyullija), is a village in the Shirak Province of Armenia."], "mentions": [{"id": 0, "start": 53, "end": 68, "ref_url": "Shirak_Province", "ref_ids": ["470511"], "sent_idx": 0}, {"id": 1, "start": 72, "end": 79, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18271111", "title": "Tsaghkasar", "sentences": ["Tsaghkasar (, also Romanized as Tsakhkasar; formerly, Verkhniy Pirtikan, Verin Pirtikan, and Pirtikyan) is a town in the Aragatsotn Province of Armenia.", "The town has a shrine dedicated to Tadevos the Apostle and ruins of a cycoplean fort."], "mentions": [{"id": 0, "start": 19, "end": 27, "ref_url": "Romanize", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 121, "end": 140, "ref_url": "Aragatsotn", "ref_ids": null, "sent_idx": 0}, {"id": 2, "start": 144, "end": 151, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}, {"id": 3, "start": 35, "end": 54, "ref_url": "Jude_the_Apostle", "ref_ids": ["842854"], "sent_idx": 1}, {"id": 4, "start": 70, "end": 79, "ref_url": "Cyclopean_architecture", "ref_ids": null, "sent_idx": 1}]} +{"id": "18271112", "title": "Tsaghkashat", "sentences": ["Tsaghkashat (, also romanized as Tsakhkashat; formerly, Khachidur) is a town in the Lori Province of Armenia."], "mentions": [{"id": 0, "start": 20, "end": 29, "ref_url": "Romanization", "ref_ids": ["173533"], "sent_idx": 0}, {"id": 1, "start": 84, "end": 97, "ref_url": "Lori_Province", "ref_ids": ["470506"], "sent_idx": 0}, {"id": 2, "start": 101, "end": 108, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18271114", "title": "Tsaghkashen, Aragatsotn", "sentences": ["Tsaghkashen (, also Romanized as Tsakhkashen; until 1950, Takyarlu and Takiarli) is a town in the Aragatsotn Province of Armenia."], "mentions": [{"id": 0, "start": 20, "end": 28, "ref_url": "Romanize", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 98, "end": 117, "ref_url": "Aragatsotn", "ref_ids": null, "sent_idx": 0}, {"id": 2, "start": 121, "end": 128, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18271118", "title": "Tsaghkashen, Gegharkunik", "sentences": ["Tsaghkashen (; also Romanized as Tsakhkashen and Tsaghkachen; formerly Kyarimkend) is a village in the Gegharkunik Province of Armenia, founded in 1859.", "The village church dedicated to S. Hovhannes dates to the 9th-10th century."], "mentions": [{"id": 0, "start": 20, "end": 28, "ref_url": "Romanize", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 103, "end": 123, "ref_url": "Gegharkunik_Province", "ref_ids": ["470502"], "sent_idx": 0}, {"id": 2, "start": 127, "end": 134, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18271121", "title": "Tsakkar", "sentences": ["Tsakkar (, also Romanized as Tsakk’ar, Tsakqar, and Tzakkar; formerly Dalikdash) is a village in the Gegharkunik Province of Armenia.", "Tsakkar is where Armenians live and enjoy their lives and it is close to the west side of Armenia."], "mentions": [{"id": 0, "start": 16, "end": 24, "ref_url": "Romanize", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 101, "end": 121, "ref_url": "Gegharkunik_Province", "ref_ids": ["470502"], "sent_idx": 0}, {"id": 2, "start": 17, "end": 24, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 1}]} +{"id": "18271123", "title": "Tsamakaberd", "sentences": ["Tsamakaberd, is a residential neighborhood in the town of Sevan of Gegharkunik Province, Armenia.", "It is located to the north east of the town centre.", "It is home to a cyclopean fortress, and the historic district of \"Mashtotsner\"."], "mentions": [{"id": 0, "start": 58, "end": 63, "ref_url": "Sevan,_Armenia", "ref_ids": ["1014141"], "sent_idx": 0}, {"id": 1, "start": 67, "end": 87, "ref_url": "Gegharkunik_Province", "ref_ids": ["470502"], "sent_idx": 0}, {"id": 2, "start": 89, "end": 96, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}, {"id": 3, "start": 16, "end": 25, "ref_url": "Cyclopean_architecture", "ref_ids": null, "sent_idx": 2}]} +{"id": "18271124", "title": "Tsamakasar", "sentences": ["Tsamakasar (, also Romanized as Ts’amak’asar and Tsamaqasar; formerly, Susuz) is a town in the Aragatsotn Province of Armenia.", "The town is the site of Bronze Age burials being excavated."], "mentions": [{"id": 0, "start": 19, "end": 27, "ref_url": "Romanize", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 95, "end": 114, "ref_url": "Aragatsotn", "ref_ids": null, "sent_idx": 0}, {"id": 2, "start": 118, "end": 125, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18271127", "title": "Tsapatagh", "sentences": ["Tsapatagh (, also Romanized as Tsap’at’agh; formerly, Babajan, Babadzhan, Kzylkend, Kzylk’end, and Kizilkend), is a small resort-village in the Gegharkunik Province of Armenia, on the north-eastern shore of Lake Sevan.", "Since 2017 it is part of Shoghakat municipality, which includes several other nearby villages too.", "Postcode: 1611."], "mentions": [{"id": 0, "start": 18, "end": 26, "ref_url": "Romanize", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 144, "end": 164, "ref_url": "Gegharkunik_Province", "ref_ids": ["470502"], "sent_idx": 0}, {"id": 2, "start": 168, "end": 175, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}, {"id": 3, "start": 207, "end": 217, "ref_url": "Lake_Sevan", "ref_ids": ["792215"], "sent_idx": 0}, {"id": 4, "start": 25, "end": 34, "ref_url": "Shoghakat,_Armenia", "ref_ids": ["18270682"], "sent_idx": 1}]} +{"id": "18271131", "title": "Tsater", "sentences": ["Tsater (, also romanized as Tsat’er) is a town in the Lori Province of Armenia."], "mentions": [{"id": 0, "start": 15, "end": 24, "ref_url": "Romanization", "ref_ids": ["173533"], "sent_idx": 0}, {"id": 1, "start": 54, "end": 67, "ref_url": "Lori_Province", "ref_ids": ["470506"], "sent_idx": 0}, {"id": 2, "start": 71, "end": 78, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18271135", "title": "Tsav, Armenia", "sentences": ["Tsav (, also Romanized as Tzav) is a village and rural community (municipality) in the Syunik Province of Armenia.", "It has fishfarm, school and clinic.", "The community of Tsav includes the villages of Tsav and Shishkert.", "The Statistical Committee of Armenia reported the community's population as 351 in 2010, down from 499 at the 2001 census.", "The village's population was 74 at the 2011 census, down from 157 at the 2001 census."], "mentions": [{"id": 0, "start": 13, "end": 21, "ref_url": "Romanize", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 49, "end": 64, "ref_url": "Community_(Armenia)", "ref_ids": null, "sent_idx": 0}, {"id": 2, "start": 87, "end": 102, "ref_url": "Syunik_Province", "ref_ids": ["470515"], "sent_idx": 0}, {"id": 3, "start": 29, "end": 36, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 3}, {"id": 4, "start": 56, "end": 65, "ref_url": "Shishkert", "ref_ids": ["18270644"], "sent_idx": 2}, {"id": 5, "start": 4, "end": 36, "ref_url": "Statistical_Committee_of_Armenia", "ref_ids": ["24984172"], "sent_idx": 3}]} +{"id": "18271138", "title": "Tsghuk", "sentences": ["Tsghuk (, formerly known as Borisovka), is a village and rural community (municipality) in the Syunik Province of Armenia.", "The National Statistical Service of the Republic of Armenia (ARMSTAT) reported its population as 451 in 2010, up from 427 at the 2001 census."], "mentions": [{"id": 0, "start": 57, "end": 72, "ref_url": "Community_(Armenia)", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 95, "end": 110, "ref_url": "Syunik_Province", "ref_ids": ["470515"], "sent_idx": 0}, {"id": 2, "start": 52, "end": 59, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 1}, {"id": 3, "start": 4, "end": 59, "ref_url": "National_Statistical_Service_of_the_Republic_of_Armenia", "ref_ids": null, "sent_idx": 1}]} +{"id": "18271141", "title": "Tsghuni", "sentences": ["Tsghuni (, formerly \"Murkhuz\") is a village in the rural community of Nzhdeh in the Syunik Province of Armenia.", "The population was 71 at the 2001 census."], "mentions": [{"id": 0, "start": 51, "end": 66, "ref_url": "Community_(Armenia)", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 70, "end": 76, "ref_url": "Nzhdeh,_Armenia", "ref_ids": ["18270718"], "sent_idx": 0}, {"id": 2, "start": 84, "end": 99, "ref_url": "Syunik_Province", "ref_ids": ["470515"], "sent_idx": 0}, {"id": 3, "start": 103, "end": 110, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18271142", "title": "Nicky Anosike", "sentences": ["Nkolika \"Nicky\" Nonyelum Anosike (born February 27, 1986) is an American professional basketball player in the WNBA, most recently with the Los Angeles Sparks."], "mentions": [{"id": 0, "start": 111, "end": 115, "ref_url": "Women's_National_Basketball_Association", "ref_ids": ["57721"], "sent_idx": 0}, {"id": 1, "start": 140, "end": 158, "ref_url": "Los_Angeles_Sparks", "ref_ids": ["179829"], "sent_idx": 0}]} +{"id": "18271144", "title": "Tsiatsan", "sentences": ["Tsiatsan ( – meaning \"rainbow\"; until 1978, \"Grampa\") is a village in the Armavir Province of Armenia."], "mentions": [{"id": 0, "start": 74, "end": 90, "ref_url": "Armavir_Province", "ref_ids": ["470500"], "sent_idx": 0}, {"id": 1, "start": 94, "end": 101, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18271146", "title": "Tsilkar", "sentences": ["Tsilkar () is a village in the Aragatsotn Province of Armenia."], "mentions": [{"id": 0, "start": 31, "end": 50, "ref_url": "Aragatsotn_Province", "ref_ids": ["470494"], "sent_idx": 0}, {"id": 1, "start": 54, "end": 61, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18271148", "title": "Tsoghamarg", "sentences": ["Tsoghamarg (, also Romanized as Ts’oghamarg and Tsokhamarg; formerly, Chizikhlar) is a village in the Shirak Province of Armenia.", "It was founded by migrants from Sebastea."], "mentions": [{"id": 0, "start": 19, "end": 27, "ref_url": "Romanize", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 102, "end": 117, "ref_url": "Shirak_Province", "ref_ids": ["470511"], "sent_idx": 0}, {"id": 2, "start": 121, "end": 128, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}, {"id": 3, "start": 32, "end": 40, "ref_url": "Sivas", "ref_ids": ["1376207"], "sent_idx": 1}]} +{"id": "18271150", "title": "Tsovazard", "sentences": ["Tsovazard (; ; until 1978 Mukhan and Gadzhi Mukhan) is a village in the Gegharkunik Province of Armenia.", "There are Bronze Age burial sites and a church rebuilt in the 19th century in the vicinity."], "mentions": [{"id": 0, "start": 72, "end": 92, "ref_url": "Gegharkunik_Province", "ref_ids": ["470502"], "sent_idx": 0}, {"id": 1, "start": 96, "end": 103, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18271152", "title": "Tsovinar, Armenia", "sentences": ["Tsovinar (; formerly, Kolakran) is a village in the Gegharkunik Province of Armenia, located south or Lake Sevan.", "The name of the village is derived from Tsovinar the Armenian goddess of water, sea, and rain.", "East of the village is the Urartian site of Teyseba (Odzaberd) founded by Rusa I around 735-713 BC, with a lengthy but worn cuneiform inscription nearby recounting his conquest of twenty-three nations.", "It is considered to be the best-preserved Urartian fortification in the Sevan Basin.", "Below the hill of Teyseba is the point where the Arpa-Sevan Tunnel empties into Lake Sevan.", "On the hill south of the road that is adjacent to the fortress, are boulders that form walls that are especially visible along the southern boundary.", "Just outside the village headed southeast are numerous stones scattered across the side of one of the hills.", "It is all that remains of an old village destroyed during by war long ago.", "Some human bones and old coins have been found in this area by villagers from Tsovinar, attesting to the story of what had once taken place there.", "In close proximity are two small single-nave churches.", "The church of Topi Galugh sits on a hill southeast between Tsovinar and Artsvanist, while the church of Surb Sarkis (1100-1200) sits south of the village of Tsovinar on a promontory overlooking a small gorge.", "Next to Surb Sarkis is a small cemetery, and just below the church in the gorge is a small spring and cave.", "Further south are the remains of the ancient village of Karmrashen, which contains the church of Hnevank, the monument of Yot Verk Matur, numerous khachkars, and some tombs."], "mentions": [{"id": 0, "start": 52, "end": 72, "ref_url": "Gegharkunik_Province", "ref_ids": ["470502"], "sent_idx": 0}, {"id": 1, "start": 53, "end": 60, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 1}, {"id": 2, "start": 80, "end": 90, "ref_url": "Lake_Sevan", "ref_ids": ["792215"], "sent_idx": 4}, {"id": 3, "start": 59, "end": 67, "ref_url": "Tsovinar_(goddess)", "ref_ids": ["9928724"], "sent_idx": 10}, {"id": 4, "start": 53, "end": 69, "ref_url": "Armenian_mythology", "ref_ids": ["2533249"], "sent_idx": 1}, {"id": 5, "start": 42, "end": 50, "ref_url": "Urartian", "ref_ids": null, "sent_idx": 3}, {"id": 6, "start": 18, "end": 25, "ref_url": "Teyseba", "ref_ids": null, "sent_idx": 4}, {"id": 7, "start": 53, "end": 61, "ref_url": "Odzaberd", "ref_ids": ["24047362"], "sent_idx": 2}, {"id": 8, "start": 74, "end": 80, "ref_url": "Rusa_I", "ref_ids": ["4715382"], "sent_idx": 2}, {"id": 9, "start": 49, "end": 66, "ref_url": "Arpa-Sevan_Tunnel", "ref_ids": null, "sent_idx": 4}, {"id": 10, "start": 72, "end": 82, "ref_url": "Artsvanist", "ref_ids": ["16051504"], "sent_idx": 10}, {"id": 11, "start": 8, "end": 19, "ref_url": "Saint_Sargis_Church_of_Tsovinar", "ref_ids": null, "sent_idx": 11}, {"id": 12, "start": 56, "end": 66, "ref_url": "Khrber", "ref_ids": ["24078346"], "sent_idx": 12}, {"id": 13, "start": 97, "end": 104, "ref_url": "Hnevank_of_Karmrashen", "ref_ids": null, "sent_idx": 12}, {"id": 14, "start": 122, "end": 136, "ref_url": "Yot_Verk_Matur", "ref_ids": ["24077004"], "sent_idx": 12}, {"id": 15, "start": 147, "end": 155, "ref_url": "Khachkar", "ref_ids": ["1100953"], "sent_idx": 12}]} +{"id": "18271155", "title": "Ttujur, Gegharkunik", "sentences": ["Ttujur (; meaning \"sour water\", previously also known as Qoturbulaq), is a village in the Gegharkunik Province of Armenia.", "It is home to the medieval \"Kotrats Church\" and the nearby ruined settlement of Tsak Kar.", "On May 6, 2010, the \"Monument of Glory and Immortality\" was erected in the village dedicated to the German-Soviet War, where 61 Armenians form Ttujur were killed during World War II.", "The ceremony was conducted within the frames of the 65th anniversary of the Soviet victory over the Nazi Germans.", "The inauguration ceremony was attended by the Russian ambassador to Armenia."], "mentions": [{"id": 0, "start": 90, "end": 110, "ref_url": "Gegharkunik_Province", "ref_ids": ["470502"], "sent_idx": 0}, {"id": 1, "start": 68, "end": 75, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 4}, {"id": 2, "start": 100, "end": 117, "ref_url": "German-Soviet_War", "ref_ids": null, "sent_idx": 2}, {"id": 3, "start": 169, "end": 181, "ref_url": "World_War_II", "ref_ids": ["32927"], "sent_idx": 2}]} +{"id": "18271157", "title": "Ad exchange", "sentences": ["An ad exchange is a technology platform that facilitates the buying and selling of media advertising inventory from multiple ad networks.", "Prices for the inventory are determined through bidding.", "The approach is technology-driven as opposed to the historical approach of negotiating price on media inventory.", "This represents a field beyond ad networks as defined by the Interactive Advertising Bureau (IAB), and by advertising trade publications such as Advertising Age.", "The major ad exchanges include:"], "mentions": [{"id": 0, "start": 89, "end": 110, "ref_url": "Advertising_inventory", "ref_ids": ["46188498"], "sent_idx": 0}, {"id": 1, "start": 31, "end": 42, "ref_url": "Advertising_network", "ref_ids": ["1549666"], "sent_idx": 3}, {"id": 2, "start": 61, "end": 91, "ref_url": "Interactive_Advertising_Bureau", "ref_ids": ["16760695"], "sent_idx": 3}, {"id": 3, "start": 145, "end": 160, "ref_url": "Advertising_Age", "ref_ids": null, "sent_idx": 3}]} +{"id": "18271158", "title": "Ttujur, Aragatsotn", "sentences": ["Ttujur (, also Romanized as T’t’ujur and Ttudzhur; until 1950, Imrlu) is a village in the Aragatsotn Province of Armenia.", "The village 's church is dedicated to Saint Harutyun.", "The village also contains a 17th-century shrine called \"Karmir Vank\" (meaning \"red church\")."], "mentions": [{"id": 0, "start": 15, "end": 23, "ref_url": "Romanize", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 90, "end": 109, "ref_url": "Aragatsotn", "ref_ids": null, "sent_idx": 0}, {"id": 2, "start": 113, "end": 120, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18271160", "title": "Tufashen", "sentences": ["Tufashen (, also Romanized as Toufashen; formerly, Armutlu) is a village in the Shirak Province of Armenia."], "mentions": [{"id": 0, "start": 17, "end": 25, "ref_url": "Romanize", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 80, "end": 95, "ref_url": "Shirak_Province", "ref_ids": ["470511"], "sent_idx": 0}, {"id": 2, "start": 99, "end": 106, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18271166", "title": "Turdzhan", "sentences": ["Turdzhan is a town in the Tavush Province of Armenia."], "mentions": [{"id": 0, "start": 26, "end": 41, "ref_url": "Tavush", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 45, "end": 52, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18271170", "title": "Lusakunk", "sentences": ["Lusakunk (, also Romanized as Lusakunq; formerly, Tuskyulu) is a town in the Gegharkunik Province of Armenia."], "mentions": [{"id": 0, "start": 17, "end": 26, "ref_url": "Romanization", "ref_ids": ["173533"], "sent_idx": 0}, {"id": 1, "start": 77, "end": 97, "ref_url": "Gegharkunik_Province", "ref_ids": ["470502"], "sent_idx": 0}, {"id": 2, "start": 101, "end": 108, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18271173", "title": "Ughedzor", "sentences": ["Ughedzor (); formerly known as \"Kochbek\", is a village in the Vayots Dzor Province of Armenia.", "It belongs to the community of Saravan."], "mentions": [{"id": 0, "start": 62, "end": 82, "ref_url": "Vayots_Dzor_Province", "ref_ids": ["470518"], "sent_idx": 0}, {"id": 1, "start": 86, "end": 93, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}, {"id": 2, "start": 31, "end": 38, "ref_url": "Saravan,_Armenia", "ref_ids": ["18270433"], "sent_idx": 1}]} +{"id": "18271176", "title": "Ujan", "sentences": ["Ujan (, also Romanized as Udzhan and Udjan) is a town in the Aragatsotn Province of Armenia.", "Ujan is home to the first ever statue of General Andranik erected in Armenia.", "It was secretly erected at night on 4 June 1967."], "mentions": [{"id": 0, "start": 13, "end": 21, "ref_url": "Romanize", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 61, "end": 80, "ref_url": "Aragatsotn", "ref_ids": null, "sent_idx": 0}, {"id": 2, "start": 69, "end": 76, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 1}, {"id": 3, "start": 49, "end": 57, "ref_url": "Andranik", "ref_ids": ["4161993"], "sent_idx": 1}]} +{"id": "18271179", "title": "Urasar", "sentences": ["Urasar () is a town in Armenia's Lori Province."], "mentions": [{"id": 0, "start": 23, "end": 30, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}, {"id": 1, "start": 33, "end": 46, "ref_url": "Lori_Province", "ref_ids": ["470506"], "sent_idx": 0}]} +{"id": "18271182", "title": "Urut", "sentences": ["Urut (, also romanized as Urrut) is a town in the Lori Province of Armenia."], "mentions": [{"id": 0, "start": 13, "end": 22, "ref_url": "Romanization", "ref_ids": ["173533"], "sent_idx": 0}, {"id": 1, "start": 50, "end": 63, "ref_url": "Lori_Province", "ref_ids": ["470506"], "sent_idx": 0}, {"id": 2, "start": 67, "end": 74, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18271184", "title": "Urtsalanj", "sentences": ["Urtsalanj () is a town in the Ararat Province of Armenia."], "mentions": [{"id": 0, "start": 30, "end": 45, "ref_url": "Ararat_(province)", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 49, "end": 56, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18271188", "title": "Ushi, Armenia", "sentences": ["Ushi () is a village in the Aragatsotn Province of Armenia.", "It contains a 10th-century chapel, and approximately 1 km outside the village are the ruins of Saint Sargis Monastery of the 7th to 13th centuries.", "Under the Chapel of Saint Sargis (the only structure still standing), is the grave and re-interred remains of the aforementioned saint.", "On top of the hill adjacent to the monastery complex are the collapsed remains of an Iron Age fortress."], "mentions": [{"id": 0, "start": 28, "end": 47, "ref_url": "Aragatsotn", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 51, "end": 58, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}, {"id": 2, "start": 95, "end": 117, "ref_url": "Saint_Sargis_Monastery_of_Ushi", "ref_ids": null, "sent_idx": 1}, {"id": 3, "start": 85, "end": 102, "ref_url": "Ushiberd", "ref_ids": ["24400157"], "sent_idx": 3}]} +{"id": "18271192", "title": "Uyts", "sentences": ["Uyts (, also Romanized as Uz and Uits) is a village and rural community (municipality) in the Syunik Province of Armenia.", "The National Statistical Service of the Republic of Armenia (ARMSTAT) reported its population was 586 in 2010, compared to 453 at the 2001 census.", "Uyts lies roughly 3.5 kilometers away from the nearby city of Sisian.", "Many Uyts residents are farmers or sheepherders, with agriculture playing an important role in village life.", "Uyts has a small school, with roughly 70 students enrolled at any given time.", "Many teachers commute from neighboring Sisian to work in the village."], "mentions": [{"id": 0, "start": 13, "end": 21, "ref_url": "Romanize", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 56, "end": 71, "ref_url": "Community_(Armenia)", "ref_ids": null, "sent_idx": 0}, {"id": 2, "start": 94, "end": 109, "ref_url": "Syunik_Region", "ref_ids": null, "sent_idx": 0}, {"id": 3, "start": 52, "end": 59, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 1}, {"id": 4, "start": 4, "end": 59, "ref_url": "National_Statistical_Service_of_the_Republic_of_Armenia", "ref_ids": null, "sent_idx": 1}, {"id": 5, "start": 39, "end": 45, "ref_url": "Sisian", "ref_ids": ["7108989"], "sent_idx": 5}]} +{"id": "18271195", "title": "Miss Tierra República Dominicana 2007", "sentences": ["The Miss Tierra República Dominicana 2007 pageant was held on November 1, 2007.", "This year, 36 candidates competed for the national crown.", "The winner represented the Dominican Republic at the Miss Earth 2007, which was held in Manila."], "mentions": [{"id": 0, "start": 53, "end": 68, "ref_url": "Miss_Earth_2007", "ref_ids": ["10151579"], "sent_idx": 2}, {"id": 1, "start": 88, "end": 94, "ref_url": "Manila", "ref_ids": ["184334"], "sent_idx": 2}]} +{"id": "18271197", "title": "Ujanis", "sentences": ["Ujanis (, also Romanized as Uzhanis) is a village and rural community (municipality) in the Syunik Province of Armenia.", "The National Statistical Service of the Republic of Armenia (ARMSTAT) reported its population was 87 in 2010, down from 136 at the 2001 census."], "mentions": [{"id": 0, "start": 15, "end": 23, "ref_url": "Romanize", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 54, "end": 69, "ref_url": "Community_(Armenia)", "ref_ids": null, "sent_idx": 0}, {"id": 2, "start": 92, "end": 107, "ref_url": "Syunik_Region", "ref_ids": null, "sent_idx": 0}, {"id": 3, "start": 52, "end": 59, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 1}, {"id": 4, "start": 4, "end": 59, "ref_url": "National_Statistical_Service_of_the_Republic_of_Armenia", "ref_ids": null, "sent_idx": 1}]} +{"id": "18271200", "title": "Abdullahi Afrah", "sentences": ["Abdullahi Ali Afrah, born in Somalia, was a Canadian immigrant who returned to Mogadishu as a leader in the Union of Islamic Courts (UIC).", "He was killed 1 July 2008 in a battle with the Ethiopian troops in Matabaan, Hiiraan, Somalia."], "mentions": [{"id": 0, "start": 86, "end": 93, "ref_url": "Somalia", "ref_ids": ["27358"], "sent_idx": 1}, {"id": 1, "start": 44, "end": 52, "ref_url": "Canadians", "ref_ids": ["19851291"], "sent_idx": 0}, {"id": 2, "start": 79, "end": 88, "ref_url": "Mogadishu", "ref_ids": ["60154"], "sent_idx": 0}, {"id": 3, "start": 108, "end": 131, "ref_url": "Union_of_Islamic_Courts", "ref_ids": null, "sent_idx": 0}, {"id": 4, "start": 67, "end": 75, "ref_url": "Matabaan", "ref_ids": null, "sent_idx": 1}, {"id": 5, "start": 77, "end": 84, "ref_url": "Hiiraan", "ref_ids": null, "sent_idx": 1}, {"id": 6, "start": 86, "end": 93, "ref_url": "Somalia", "ref_ids": ["27358"], "sent_idx": 1}]} +{"id": "18271201", "title": "Vaghatin", "sentences": ["Vaghatin (); formerly, Vagudi and Vagudy) is a village and rural community (municipality) in the Syunik Province of Armenia.", "The National Statistical Service of the Republic of Armenia (ARMSTAT) reported its population was 756 in 2010, up from 631 at the 2001 census."], "mentions": [{"id": 0, "start": 59, "end": 74, "ref_url": "Community_(Armenia)", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 97, "end": 112, "ref_url": "Syunik_Region", "ref_ids": null, "sent_idx": 0}, {"id": 2, "start": 52, "end": 59, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 1}, {"id": 3, "start": 4, "end": 59, "ref_url": "National_Statistical_Service_of_the_Republic_of_Armenia", "ref_ids": null, "sent_idx": 1}]} +{"id": "18271204", "title": "Vaghatur", "sentences": ["Vaghatur (, also Romanized as Vagatur; also known in the past as Bayandur) is a village and rural community (municipality) in the Syunik Province of Armenia.", "The National Statistical Service of the Republic of Armenia (ARMSTAT) reported its population was 476 in 2010, up from 458 at the 2001 census."], "mentions": [{"id": 0, "start": 17, "end": 25, "ref_url": "Romanize", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 92, "end": 107, "ref_url": "Community_(Armenia)", "ref_ids": null, "sent_idx": 0}, {"id": 2, "start": 130, "end": 145, "ref_url": "Syunik_Region", "ref_ids": null, "sent_idx": 0}, {"id": 3, "start": 52, "end": 59, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 1}, {"id": 4, "start": 4, "end": 59, "ref_url": "National_Statistical_Service_of_the_Republic_of_Armenia", "ref_ids": null, "sent_idx": 1}]} +{"id": "18271211", "title": "Vahramaberd", "sentences": ["Vahramaberd (, also Romanized as Vagramaberd; formerly, Nerkin Kanlidzha) is a village in the Shirak Province of Armenia."], "mentions": [{"id": 0, "start": 20, "end": 28, "ref_url": "Romanize", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 94, "end": 109, "ref_url": "Shirak_Province", "ref_ids": ["470511"], "sent_idx": 0}, {"id": 2, "start": 113, "end": 120, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18271216", "title": "Vahagni", "sentences": ["Vahagni (, also romanized as Vaagni; formerly, Shagali) is a town in the Lori Province of Armenia."], "mentions": [{"id": 0, "start": 16, "end": 25, "ref_url": "Romanization", "ref_ids": ["173533"], "sent_idx": 0}, {"id": 1, "start": 73, "end": 86, "ref_url": "Lori_Province", "ref_ids": ["470506"], "sent_idx": 0}, {"id": 2, "start": 90, "end": 97, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18271218", "title": "Vahan, Armenia", "sentences": ["Vahan (; formerly, Ordzhonikidze or Orjonikidze) is a village in the Gegharkunik Province of Armenia.", "When the village was founded in 1925, it was named in honor of Soviet politician and politburo member, Sergo Ordzhonikidze.", "Nearby upon a hill towards the eastern end of the village is an early Iron Age cyclopean fort."], "mentions": [{"id": 0, "start": 69, "end": 89, "ref_url": "Gegharkunik_Province", "ref_ids": ["470502"], "sent_idx": 0}, {"id": 1, "start": 93, "end": 100, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}, {"id": 2, "start": 103, "end": 122, "ref_url": "Sergo_Ordzhonikidze", "ref_ids": ["1429626"], "sent_idx": 1}, {"id": 3, "start": 79, "end": 88, "ref_url": "Cyclopean_masonry", "ref_ids": ["2094245"], "sent_idx": 2}]} +{"id": "18271222", "title": "Vahravar", "sentences": ["Vahravar (, also Romanized as Vagravar) is a village and rural community (municipality) in the Syunik Province of Armenia.", "The National Statistical Service of the Republic of Armenia (ARMSTAT) reported its population was 47 in 2010, down from 57 at the 2001 census.", "The inhabitants speak the dialect of Kakavaberd."], "mentions": [{"id": 0, "start": 17, "end": 25, "ref_url": "Romanize", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 57, "end": 72, "ref_url": "Community_(Armenia)", "ref_ids": null, "sent_idx": 0}, {"id": 2, "start": 95, "end": 110, "ref_url": "Syunik_Region", "ref_ids": null, "sent_idx": 0}, {"id": 3, "start": 52, "end": 59, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 1}, {"id": 4, "start": 4, "end": 59, "ref_url": "National_Statistical_Service_of_the_Republic_of_Armenia", "ref_ids": null, "sent_idx": 1}, {"id": 5, "start": 37, "end": 47, "ref_url": "Kakavaberd_dialect_(Armenian)", "ref_ids": null, "sent_idx": 2}]} +{"id": "18271223", "title": "Francesco Schiavone", "sentences": ["Francesco Schiavone (; born March 3, 1954) is a member of the Camorra, the Neapolitan organized crime syndicate, and the head of the Casalesi clan from Casal di Principe in the province of Caserta.", "He has been dubbed \"Sandokan\" after a popular 1970s television series starring Kabir Bedi because of his thick, dark beard."], "mentions": [{"id": 0, "start": 62, "end": 69, "ref_url": "Camorra", "ref_ids": ["44945"], "sent_idx": 0}, {"id": 1, "start": 75, "end": 85, "ref_url": "Naples", "ref_ids": ["55880"], "sent_idx": 0}, {"id": 2, "start": 133, "end": 146, "ref_url": "Casalesi_clan", "ref_ids": ["19457382"], "sent_idx": 0}, {"id": 3, "start": 152, "end": 169, "ref_url": "Casal_di_Principe", "ref_ids": ["6768788"], "sent_idx": 0}, {"id": 4, "start": 177, "end": 196, "ref_url": "Province_of_Caserta", "ref_ids": ["1418102"], "sent_idx": 0}, {"id": 5, "start": 20, "end": 28, "ref_url": "Sandokan", "ref_ids": ["6895746"], "sent_idx": 1}, {"id": 6, "start": 79, "end": 89, "ref_url": "Kabir_Bedi", "ref_ids": ["16930"], "sent_idx": 1}]} +{"id": "18271224", "title": "Vanevan", "sentences": ["Vanevan (; formerly, Shafak) is a small village in the Gegharkunik Province of Armenia."], "mentions": [{"id": 0, "start": 55, "end": 75, "ref_url": "Gegharkunik_Province", "ref_ids": ["470502"], "sent_idx": 0}, {"id": 1, "start": 79, "end": 86, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18271226", "title": "Vank, Armenia", "sentences": ["Vank (վանք) is a village in the Syunik Province of Armenia."], "mentions": [{"id": 0, "start": 32, "end": 47, "ref_url": "Syunik_Region", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 51, "end": 58, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18271228", "title": "Varagavan", "sentences": ["Varagavan () is a town in the Tavush Province of Armenia."], "mentions": [{"id": 0, "start": 30, "end": 45, "ref_url": "Tavush", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 49, "end": 56, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18271230", "title": "Louis St. Laurent School", "sentences": ["Louis St. Laurent Catholic School is a fine arts oriented Junior and Senior High School in the Edmonton Catholic School District, located in south western Edmonton.", "The school averages a student body of approximately 1,000 or more pupils each year, spanning grades 7–12.", "As of 2009, the entire school participates in the International Baccalaureate programme."], "mentions": [{"id": 0, "start": 95, "end": 128, "ref_url": "Edmonton_Catholic_School_District", "ref_ids": ["17253887"], "sent_idx": 0}, {"id": 1, "start": 95, "end": 103, "ref_url": "Edmonton", "ref_ids": ["95405"], "sent_idx": 0}, {"id": 2, "start": 50, "end": 77, "ref_url": "International_Baccalaureate", "ref_ids": ["504546"], "sent_idx": 2}]} +{"id": "18271245", "title": "Archie McPherson (footballer)", "sentences": ["Archie McPherson (born 10 February 1909 in Buchanan, Stirling, Scotland, died 1969) was a Scottish footballer.", "McPherson began his career with Rangers before moving south of the border to join Liverpool, after just one year.", "He was to be a regular in Liverpool's first-team for the next five years, playing as an inside-left, and forming a partnership with Fred Hopkin.", "Described as a skilful player and an accurate passer, he moved on to Sheffield United, where he was employed as a wing-half.", "He featured on the losing side in the 1936 FA Cup Final, before returning to Scotland to spend one year at Falkirk, after which he retired.", "He later managed Alloa Athletic", "He also played cricket, as a batsman, for Clackmannan County in the Scottish Counties Championship."], "mentions": [{"id": 0, "start": 43, "end": 61, "ref_url": "Buchanan,_Stirling", "ref_ids": ["15633470"], "sent_idx": 0}, {"id": 1, "start": 32, "end": 39, "ref_url": "Rangers_F.C.", "ref_ids": ["69713"], "sent_idx": 1}, {"id": 2, "start": 54, "end": 73, "ref_url": "England", "ref_ids": ["9316"], "sent_idx": 1}, {"id": 3, "start": 26, "end": 35, "ref_url": "Liverpool_F.C.", "ref_ids": ["18119"], "sent_idx": 2}, {"id": 4, "start": 132, "end": 143, "ref_url": "Fred_Hopkin", "ref_ids": ["6115718"], "sent_idx": 2}, {"id": 5, "start": 69, "end": 85, "ref_url": "Sheffield_United_F.C.", "ref_ids": ["314933"], "sent_idx": 3}, {"id": 6, "start": 38, "end": 55, "ref_url": "1936_FA_Cup_Final", "ref_ids": ["9165866"], "sent_idx": 4}, {"id": 7, "start": 107, "end": 114, "ref_url": "Falkirk_F.C.", "ref_ids": ["560087"], "sent_idx": 4}, {"id": 8, "start": 17, "end": 31, "ref_url": "Alloa_Athletic_F.C.", "ref_ids": ["451237"], "sent_idx": 5}]} +{"id": "18271248", "title": "Cornelius J. Barton", "sentences": ["Cornelius J. Barton (born 1936) is an American metallurgical engineer, businessman and the acting president of Rensselaer Polytechnic Institute from April 1998 until July 1999.", "He received bachelor's, master's and Ph.D. degrees in metallurgical engineering from Rensselaer Polytechnic Institute.", "He is a member of the engineering honor society Sigma Xi and is a brother of the Delta Phi social fraternity.", "After his undergraduate work, from 1958–1961, he was employed as a Metallurgical Process Engineer at Olin's Nuclear Fuel Division, which manufactured nuclear reactor cores for the U. S. Navy.", "In 1961, Barton returned to R.P.I. for graduate work in Engineering.", "Upon earning an M.S. and Ph.D. in metallurgical engineering, he joined US Steel's Research Laboratory in Monroeville, Pennsylvania, as a research manager in the Advanced Applied Research Division.", "While at U.S.S., he authored several research papers in refereed journals, submitted patent applications for novel steel compositions, and completed a project sponsored by the U. S. Air Force for improvements and problem solving in a complex high-strength, high-performance family of steels.", "Barton then joined the Chase Brass & Copper Co. in 1969, a subsidiary of Kennecott Copper, as Director of Research and Development.", "From 1975-1980, he was General Manager of Chase Nuclear Inc.", "In 1981, he returned to Chase Headquarters in Shaker Heights, Ohio, to serve as Vice President, Technology, of Kennecott Engineered Systems Co., (KESCO).", "When Standard Oil of Ohio acquired Kennecott, the technologies of several Standard Oil manufacturing subsidiaries joined KESCO Technologies, and Barton's unit was renamed the Standard oil Chemicals and Industrial Products Co. Technology Group.", "During the M&A activities engaged by Kennecott, Dorr Oliver Incorporated was acquired.", "Dorr Oliver's business was twofold: the separation of liquids from solids, for instance by centrifugation, and the application of Fluid-Bed technology to a variety of process engineering, and incineration activities, including coal-fired boilers with sulphur capture.", "Dorr Oliver was an International Corporation with 16 subsidiaries in Europe, Asia, North and South America and Licensees in Japan and South Africa.", "In 1986, Barton was appointed President of Dorr Oliver.", "Shortly thereafter, British Petroleum fully acquired Standard Oil and re-shaped and re-organized the energy activities.", "Dorr Oliver was sold, and an LBO group bought the company; Barton remained as President and CEO of Dorr Oliver.", "The Management of Dorr Oliver subsequently performed an MBO to take the Company and manage it for growth.", "A significant period of world-wide growth occurred, leading ultimately to an offer of purchase for the Company\nIn 1992, Harriman bought a 20 percent stake in Dorr Oliver.", "Dorr Oliver was then sold to a German company in 1995, and Barton retired as President after more than twelve years in that position.", "He has been a member of the Rensselaer Board of Trustees beginning in 1991 until the present (as an active trustee from May 1991 through December 2012, and subsequently a trustee emeritus), and was the interim president of RPI from April 1998 until July 1999, until the current President, Shirley Ann Jackson was recruited.", "Barton Hall, a residence hall on the RPI campus that opened in 2000, was named in his honor."], "mentions": [{"id": 0, "start": 38, "end": 46, "ref_url": "United_States", "ref_ids": ["3434750"], "sent_idx": 0}, {"id": 1, "start": 34, "end": 56, "ref_url": "Metallurgy", "ref_ids": ["19722"], "sent_idx": 5}, {"id": 2, "start": 85, "end": 117, "ref_url": "Rensselaer_Polytechnic_Institute", "ref_ids": ["194026"], "sent_idx": 1}, {"id": 3, "start": 48, "end": 56, "ref_url": "Sigma_Xi", "ref_ids": ["1064914"], "sent_idx": 2}, {"id": 4, "start": 81, "end": 90, "ref_url": "Delta_Phi", "ref_ids": ["1673261"], "sent_idx": 2}, {"id": 5, "start": 71, "end": 79, "ref_url": "US_Steel", "ref_ids": null, "sent_idx": 5}, {"id": 6, "start": 105, "end": 130, "ref_url": "Monroeville,_Pennsylvania", "ref_ids": ["131143"], "sent_idx": 5}, {"id": 7, "start": 73, "end": 89, "ref_url": "Kennecott_Copper", "ref_ids": null, "sent_idx": 7}, {"id": 8, "start": 46, "end": 66, "ref_url": "Shaker_Heights,_Ohio", "ref_ids": ["129120"], "sent_idx": 9}, {"id": 9, "start": 91, "end": 105, "ref_url": "Centrifugation", "ref_ids": ["286021"], "sent_idx": 12}, {"id": 10, "start": 289, "end": 308, "ref_url": "Shirley_Ann_Jackson", "ref_ids": ["170464"], "sent_idx": 20}]} +{"id": "18271250", "title": "Vardablur, Lori", "sentences": ["Vardablur () is a village in the Lori Province of Armenia."], "mentions": [{"id": 0, "start": 33, "end": 46, "ref_url": "Lori_Province", "ref_ids": ["470506"], "sent_idx": 0}, {"id": 1, "start": 50, "end": 57, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18271258", "title": "The Legend of Chu Liuxiang (2007 TV series)", "sentences": ["The Legend of Chu Liuxiang is a Chinese television series adapted from the three novels in the \"Chu Liuxiang Chuanqi\" segment of Gu Long's \"Chu Liuxiang\" novel series.", "The series was first broadcast on CCTV-8 in December 2007 in China."], "mentions": [{"id": 0, "start": 96, "end": 116, "ref_url": "Chu_Liuxiang", "ref_ids": ["19859405"], "sent_idx": 0}, {"id": 1, "start": 129, "end": 136, "ref_url": "Gu_Long", "ref_ids": ["2370332"], "sent_idx": 0}, {"id": 2, "start": 14, "end": 26, "ref_url": "Chu_Liuxiang", "ref_ids": ["19859405"], "sent_idx": 0}, {"id": 3, "start": 34, "end": 40, "ref_url": "CCTV-8", "ref_ids": ["7343984"], "sent_idx": 1}]} +{"id": "18271264", "title": "Anita Elberse", "sentences": ["Anita Elberse is a professor of business administration at Harvard Business School, specializing in the entertainment, media and sports sectors.", "According to the \"Wall Street Journal\", she \"takes the same statistically rigorous approach to entertainment and cultural industries that sabermetricians do to baseball.\""], "mentions": [{"id": 0, "start": 59, "end": 82, "ref_url": "Harvard_Business_School", "ref_ids": ["18998741"], "sent_idx": 0}, {"id": 1, "start": 138, "end": 153, "ref_url": "Sabermetricians", "ref_ids": null, "sent_idx": 1}]} +{"id": "18271265", "title": "Newfoundland School Society", "sentences": ["The Newfoundland School Society (N.S.S) was established on June 30, 1823 by a merchant named Samuel Codner.", "Codner first came to Newfoundland in 1788 and periodically traveled back to England where he was influenced by the Evangelical Revival occurring there during this time.", "He was inspired to help Christians in neglected British colonies by the Premier of England, Lord Liverpool in 1821.", "Knowing the dire conditions in Newfoundland, he formed \"The Society for Educating the Poor of Newfoundland\" which had its first annual meeting at the London Coffee House on July 13, 1824.", "It was the ability of the N.S.S. to deal with the difficult conditions in Newfoundland that led to their great success on the Island.", "The first N.S.S. school was set up in St. John's in 1823 and within two years, the demand spread rapidly to rural Newfoundland where petitions and applications for schools began pouring out.", "By 1825, five schoolmasters had been sent between St. John’s, Quidi Vidi, Harbour Grace, Carbonear, and Petty Harbour.", "Within just ten years, 43 N.S.S. schools had been established on the Island, with an enrolment of approximately 6945 children in daily schools and 4714 in Sunday schools.", "The N.S.S. began to dissipate in the late 19th century when Newfoundland Legislature established an Education Board and set up Board Schools across the Island, and especially after the Education Act of 1891.", "In 1923, what still existed as the N.S.S. merged into a denominational school system known as the Church of England schools."], "mentions": [{"id": 0, "start": 60, "end": 72, "ref_url": "Newfoundland_and_Labrador", "ref_ids": ["21980"], "sent_idx": 8}, {"id": 1, "start": 108, "end": 115, "ref_url": "England", "ref_ids": ["9316"], "sent_idx": 9}, {"id": 2, "start": 115, "end": 134, "ref_url": "Revivalism", "ref_ids": null, "sent_idx": 1}, {"id": 3, "start": 24, "end": 34, "ref_url": "Christianity", "ref_ids": ["5211"], "sent_idx": 2}, {"id": 4, "start": 92, "end": 106, "ref_url": "Lord_Liverpool", "ref_ids": null, "sent_idx": 2}, {"id": 5, "start": 38, "end": 48, "ref_url": "St._John’s,_Newfoundland_and_Labrador", "ref_ids": null, "sent_idx": 5}, {"id": 6, "start": 62, "end": 72, "ref_url": "Quidi_Vidi", "ref_ids": ["3500533"], "sent_idx": 6}, {"id": 7, "start": 74, "end": 87, "ref_url": "Harbour_Grace", "ref_ids": ["2443924"], "sent_idx": 6}, {"id": 8, "start": 89, "end": 98, "ref_url": "Carbonear", "ref_ids": ["1904331"], "sent_idx": 6}, {"id": 9, "start": 104, "end": 117, "ref_url": "Petty_Harbour-Maddox_Cove,_Newfoundland_and_Labrador", "ref_ids": null, "sent_idx": 6}]} +{"id": "18271277", "title": "Vardablur, Aragatsotn", "sentences": ["Vardablur (, also Romanized as Vartablur and Vardaolur; until 1950, Dzhangi and Jangi) is a town in the Aragatsotn Province of Armenia."], "mentions": [{"id": 0, "start": 18, "end": 26, "ref_url": "Romanize", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 104, "end": 123, "ref_url": "Aragatsotn", "ref_ids": null, "sent_idx": 0}, {"id": 2, "start": 127, "end": 134, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18271285", "title": "R284 road (Ireland)", "sentences": ["The R284 road is a regional road in Ireland linking Sligo to Leitrim village in County Leitrim.", "En route it passes through Ballygawley, Ballyfarnan, Geevagh and Keadue.", "The road is long."], "mentions": [{"id": 0, "start": 19, "end": 32, "ref_url": "Regional_road_(Ireland)", "ref_ids": ["2271642"], "sent_idx": 0}, {"id": 1, "start": 36, "end": 43, "ref_url": "Republic_of_Ireland", "ref_ids": ["14560"], "sent_idx": 0}, {"id": 2, "start": 52, "end": 57, "ref_url": "Sligo", "ref_ids": ["584663"], "sent_idx": 0}, {"id": 3, "start": 61, "end": 76, "ref_url": "Leitrim,_County_Leitrim", "ref_ids": ["2117503"], "sent_idx": 0}, {"id": 4, "start": 80, "end": 94, "ref_url": "County_Leitrim", "ref_ids": ["52817"], "sent_idx": 0}, {"id": 5, "start": 27, "end": 38, "ref_url": "Ballygawley,_County_Sligo", "ref_ids": ["14056162"], "sent_idx": 1}, {"id": 6, "start": 40, "end": 51, "ref_url": "Ballyfarnan", "ref_ids": null, "sent_idx": 1}, {"id": 7, "start": 53, "end": 60, "ref_url": "Geevagh", "ref_ids": ["7306371"], "sent_idx": 1}, {"id": 8, "start": 65, "end": 71, "ref_url": "Keadue", "ref_ids": ["7307712"], "sent_idx": 1}]} +{"id": "18271293", "title": "Vardahovit", "sentences": ["Vardahovit (; formerly, Gyulliduz Otselok, Gyulliduz, Gharaghaya, and Gyadikvank) is a town in the Vayots Dzor Province of Armenia."], "mentions": [{"id": 0, "start": 99, "end": 119, "ref_url": "Vayots_Dzor", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 123, "end": 130, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18271296", "title": "John Hedgecoe", "sentences": ["John Hedgecoe (24 March 1932 – 3 June 2010) was a British photographer and author of over 30 books on photography.", "He established the photography department in 1965 at the Royal College of Art, where he was Professor from 1975 to 1994 and Professor Emeritus until his death.", "He was also Pro-Rector of the college from 1981 to 1994.", "His photographs appear in permanent collections at the New York Museum of Modern Art and London's National Portrait Gallery."], "mentions": [{"id": 0, "start": 57, "end": 77, "ref_url": "Royal_College_of_Art", "ref_ids": ["269757"], "sent_idx": 1}, {"id": 1, "start": 64, "end": 84, "ref_url": "Museum_of_Modern_Art", "ref_ids": ["66107"], "sent_idx": 3}, {"id": 2, "start": 98, "end": 123, "ref_url": "National_Portrait_Gallery_(London)", "ref_ids": null, "sent_idx": 3}]} +{"id": "18271300", "title": "Vardakar", "sentences": ["Vardakar (; formerly, Tomartash) is a village in the Shirak Province of Armenia."], "mentions": [{"id": 0, "start": 53, "end": 68, "ref_url": "Shirak_Province", "ref_ids": ["470511"], "sent_idx": 0}, {"id": 1, "start": 72, "end": 79, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18271304", "title": "Vardaghbyur", "sentences": ["Vardaghbyur (, also Romanized as Vartakhpyur, Vardagbyur, and Vardakhpyur; formerly, Gyullibulag Armyanskiy) is a village in the Shirak Province of Armenia."], "mentions": [{"id": 0, "start": 20, "end": 28, "ref_url": "Romanize", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 129, "end": 144, "ref_url": "Shirak_Province", "ref_ids": ["470511"], "sent_idx": 0}, {"id": 2, "start": 148, "end": 155, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18271306", "title": "Vardanashen", "sentences": ["Vardanashen (; formerly, Chibukhchi) is a town in the Armavir Province of Armenia."], "mentions": [{"id": 0, "start": 54, "end": 70, "ref_url": "Armavir_(province)", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 74, "end": 81, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18271310", "title": "Vardanidzor", "sentences": ["Vardanidzor () is a village and rural community (municipality) in the Syunik Province of Armenia.", "The community of Vardanidzor consists of the villages of Vardanidzor, Aygedzor, and Tkhkut.", "The National Statistical Service of the Republic of Armenia (ARMSTAT) reported its population was 292 in 2010, up from 263 at the 2001 census.", "The population of the village of Vardanidzor was 228 at the 2011 census, up from 197 at the 2001 census."], "mentions": [{"id": 0, "start": 32, "end": 47, "ref_url": "Community_(Armenia)", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 70, "end": 85, "ref_url": "Syunik_Province", "ref_ids": ["470515"], "sent_idx": 0}, {"id": 2, "start": 52, "end": 59, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 2}, {"id": 3, "start": 70, "end": 78, "ref_url": "Aygedzor,_Syunik", "ref_ids": ["18256114"], "sent_idx": 1}, {"id": 4, "start": 84, "end": 90, "ref_url": "Tkhkut", "ref_ids": ["18260751"], "sent_idx": 1}, {"id": 5, "start": 4, "end": 59, "ref_url": "National_Statistical_Service_of_the_Republic_of_Armenia", "ref_ids": null, "sent_idx": 2}]} +{"id": "18271313", "title": "Vardashat", "sentences": ["Vardashat (; formerly, Kashka and Ghashka) is a town in the Ararat Province of Armenia."], "mentions": [{"id": 0, "start": 60, "end": 75, "ref_url": "Ararat_(province)", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 79, "end": 86, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18271318", "title": "Vardashen, Ararat", "sentences": ["Vardashen (; formerly, Megrablu) is a town in the Ararat Province of Armenia."], "mentions": [{"id": 0, "start": 50, "end": 65, "ref_url": "Ararat_(province)", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 69, "end": 76, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18271322", "title": "Vardashen, Yerevan", "sentences": ["Vardashen () is a town in the Yerevan Province of Armenia."], "mentions": [{"id": 0, "start": 30, "end": 46, "ref_url": "Yerevan", "ref_ids": ["34352"], "sent_idx": 0}, {"id": 1, "start": 50, "end": 57, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18271327", "title": "Vardenis, Aragatsotn", "sentences": ["Vardenis (; until 1969, Gyullidzha and Gyulluja) is a town in the Aragatsotn Province of Armenia.", "The town has a 19th-century church."], "mentions": [{"id": 0, "start": 66, "end": 85, "ref_url": "Aragatsotn", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 89, "end": 96, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18271328", "title": "Vardenut", "sentences": ["Vardenut (; formerly, Shirakala) is a town in the Aragatsotn Province of Armenia.", "The town was settled in 1829 by emigrants from Persia as part of the exchange of populations under the Treaty of Turkmenchay."], "mentions": [{"id": 0, "start": 50, "end": 69, "ref_url": "Aragatsotn", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 73, "end": 80, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}, {"id": 2, "start": 47, "end": 53, "ref_url": "Persia", "ref_ids": null, "sent_idx": 1}, {"id": 3, "start": 103, "end": 124, "ref_url": "Treaty_of_Turkmenchay", "ref_ids": ["1340560"], "sent_idx": 1}]} +{"id": "18271331", "title": "Lake Hotel (disambiguation)", "sentences": ["Lake Hotel is used as the name of several hotels including:"], "mentions": []} +{"id": "18271335", "title": "Varser", "sentences": ["Varser (; until 1946, Chirchir and Chrchr) is a village in the Gegharkunik Province of Armenia.", "The village is first mentioned in the 9th century as a gift of Ashot II Bagratuni to an \"Apostles' Church\".", "Most of the Armenians in the village are descendants of migrants from Bitlis and Maku from western Armenia"], "mentions": [{"id": 0, "start": 63, "end": 83, "ref_url": "Gegharkunik_Province", "ref_ids": ["470502"], "sent_idx": 0}, {"id": 1, "start": 12, "end": 19, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 2}, {"id": 2, "start": 63, "end": 71, "ref_url": "Ashot_II", "ref_ids": null, "sent_idx": 1}]} +{"id": "18271342", "title": "Vazashen", "sentences": ["Vazashen () is a town in the Tavush Province of Armenia.", "It is the site of ancient Xałxał (), where Saint Vardan won his first military victory.", "Until recently, it was known as Lala Geł (), then Lali Gyuł ().", "The name of Vazashen was chosen to reflect the local vineyard production (վազ/vaz 'grapevine' and շեն/shen 'village')."], "mentions": [{"id": 0, "start": 29, "end": 44, "ref_url": "Tavush", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 48, "end": 55, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}, {"id": 2, "start": 43, "end": 55, "ref_url": "Saint_Vardan", "ref_ids": null, "sent_idx": 1}, {"id": 3, "start": 74, "end": 93, "ref_url": "Wikt:վազ", "ref_ids": null, "sent_idx": 3}, {"id": 4, "start": 98, "end": 116, "ref_url": "Wikt:շեն", "ref_ids": null, "sent_idx": 3}]} +{"id": "18271343", "title": "Jakar Dzong", "sentences": ["Jakar Dzong or Jakar Yugyal Dzong is the dzong of the Bumthang District in central Bhutan.", "It is located on a ridge above Jakar town in the Chamkhar valley of Bumthang.", "It is built on the site of an earlier temple established by the Ralung hierarch Yongzin Ngagi Wangchuk (1517–1554) when he came to Bhutan.", "Jakar Dzong may be the largest dzong in Bhutan, with a circumference of more than .", "The name Jakar is derived from the word \"bjakhab\", meaning \"white bird\", in reference to Jakar's foundation myth, according to which a roosting white bird signaled the proper and auspicious location to found a monastery around 1549."], "mentions": [{"id": 0, "start": 31, "end": 36, "ref_url": "Dzong", "ref_ids": null, "sent_idx": 3}, {"id": 1, "start": 54, "end": 71, "ref_url": "Bumthang_District", "ref_ids": ["766282"], "sent_idx": 0}, {"id": 2, "start": 40, "end": 46, "ref_url": "Bhutan", "ref_ids": ["2421391"], "sent_idx": 3}, {"id": 3, "start": 9, "end": 14, "ref_url": "Jakar", "ref_ids": ["7145216"], "sent_idx": 4}]} +{"id": "18271345", "title": "Verin Akhtala", "sentences": ["Verin Akhtala (; also, Akhtala and Russified as Verkhnyaya Akhtala) is a village in the Lori Province of Armenia.", "In 1995 it became part of the nearby community of Shamlugh."], "mentions": [{"id": 0, "start": 88, "end": 101, "ref_url": "Lori_Province", "ref_ids": ["470506"], "sent_idx": 0}, {"id": 1, "start": 105, "end": 112, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}, {"id": 2, "start": 50, "end": 58, "ref_url": "Shamlugh", "ref_ids": ["8393482"], "sent_idx": 1}]} +{"id": "18271348", "title": "Verin Bazmaberd", "sentences": ["Verin Bazmaberd( formerly, Verin Agdzhakala, Aghjaghala Ulia, and Verkhnyaya Agdzhakala) is a town in the Aragatsotn Province of Armenia."], "mentions": [{"id": 0, "start": 106, "end": 125, "ref_url": "Aragatsotn", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 129, "end": 136, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18271349", "title": "Dhur", "sentences": ["Dhur or D°ur (Dzongkha: དུར་; Wylie: \"dur\") is a town in western Chhoekhor Gewog, Bumthang District in central Bhutan.", "Dhur is the main area where Brokkat, one of the endangered languages of Bhutan, is spoken."], "mentions": [{"id": 0, "start": 14, "end": 22, "ref_url": "Dzongkha", "ref_ids": ["371313"], "sent_idx": 0}, {"id": 1, "start": 30, "end": 35, "ref_url": "Wylie_transliteration", "ref_ids": ["558550"], "sent_idx": 0}, {"id": 2, "start": 49, "end": 53, "ref_url": "Town", "ref_ids": ["52911"], "sent_idx": 0}, {"id": 3, "start": 65, "end": 80, "ref_url": "Chhoekhor_Gewog", "ref_ids": ["30019673"], "sent_idx": 0}, {"id": 4, "start": 82, "end": 99, "ref_url": "Bumthang_District", "ref_ids": ["766282"], "sent_idx": 0}, {"id": 5, "start": 72, "end": 78, "ref_url": "Bhutan", "ref_ids": ["2421391"], "sent_idx": 1}, {"id": 6, "start": 28, "end": 35, "ref_url": "Brokkat_language", "ref_ids": ["30870405"], "sent_idx": 1}, {"id": 7, "start": 59, "end": 78, "ref_url": "Languages_of_Bhutan", "ref_ids": ["15735312"], "sent_idx": 1}]} +{"id": "18271354", "title": "Verin Charbakh", "sentences": ["Verin Charbakh (, also, Verin Ch’arbakh and Charbakh) is a part of Shengavit District in Yerevan, Armenia."], "mentions": [{"id": 0, "start": 67, "end": 85, "ref_url": "Shengavit_District", "ref_ids": ["19059867"], "sent_idx": 0}, {"id": 1, "start": 89, "end": 96, "ref_url": "Yerevan", "ref_ids": ["34352"], "sent_idx": 0}, {"id": 2, "start": 98, "end": 105, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18271356", "title": "Gyetsa", "sentences": ["Gyetsa is a town in Bumthang District in central Bhutan."], "mentions": [{"id": 0, "start": 12, "end": 16, "ref_url": "Town", "ref_ids": ["52911"], "sent_idx": 0}, {"id": 1, "start": 20, "end": 37, "ref_url": "Bumthang_District", "ref_ids": ["766282"], "sent_idx": 0}, {"id": 2, "start": 49, "end": 55, "ref_url": "Bhutan", "ref_ids": ["2421391"], "sent_idx": 0}]} +{"id": "18271358", "title": "Verin Dvin", "sentences": ["Verin Dvin (; formerly known as Aysori Dvin and Verkhniy Dvin, literally means \"Upper Dvin\"), is a village in the Ararat Province of Armenia located 30 kilometers south of Yerevan.", "The largest Assyrian community in Armenia is in Verin Dvin, where around 2,000 out of the 2,700 residents in the village are ethnic Assyrians.", "The village is home to 2 Assyrian churches including the church of Mar Tuma (Saint Thomas) dating back to 1828.", "The village is built near the ruins of the ancient city of Dvin.", "The secondary school of the village has a majority of ethnic Assyrian students and provides lessons in Assyrian history and language."], "mentions": [{"id": 0, "start": 114, "end": 129, "ref_url": "Ararat_Province", "ref_ids": ["470497"], "sent_idx": 0}, {"id": 1, "start": 34, "end": 41, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 1}, {"id": 2, "start": 172, "end": 179, "ref_url": "Yerevan", "ref_ids": ["34352"], "sent_idx": 0}, {"id": 3, "start": 12, "end": 41, "ref_url": "Assyrians_in_Armenia", "ref_ids": ["7126020"], "sent_idx": 1}, {"id": 4, "start": 132, "end": 141, "ref_url": "Assyrian_people", "ref_ids": ["266350"], "sent_idx": 1}, {"id": 5, "start": 59, "end": 63, "ref_url": "Dvin_(ancient_city)", "ref_ids": ["4997140"], "sent_idx": 3}]} +{"id": "18271361", "title": "Kurjey", "sentences": ["Kurjey is a town in Bumthang District in central Bhutan."], "mentions": [{"id": 0, "start": 12, "end": 16, "ref_url": "Town", "ref_ids": ["52911"], "sent_idx": 0}, {"id": 1, "start": 20, "end": 37, "ref_url": "Bumthang_District", "ref_ids": ["766282"], "sent_idx": 0}, {"id": 2, "start": 49, "end": 55, "ref_url": "Bhutan", "ref_ids": ["2421391"], "sent_idx": 0}]} +{"id": "18271362", "title": "Verin Dzhrapi", "sentences": ["Verin Dzhrapi (also, Kegach) is a town in the Shirak Province of Armenia."], "mentions": [{"id": 0, "start": 46, "end": 61, "ref_url": "Shirak_Province", "ref_ids": ["470511"], "sent_idx": 0}, {"id": 1, "start": 65, "end": 72, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18271364", "title": "Verin Getashen", "sentences": ["Verin Getashen ( - meaning \"Upper Getashen\"; prior to 1945, Verin Adyaman - meaning \"Upper Adyaman\") is a major village in the Gegharkunik Province of Armenia, located just southwest of Lake Sevan.", "The village lies to the south of Nerkin Getashen (Lower Getashen) and was founded 1828-29 by migrants from Mush and Alashkert, in present-day Eastern Turkey.", "In the village are the churches of S. Astvatsatsin and S. Sargis."], "mentions": [{"id": 0, "start": 127, "end": 147, "ref_url": "Gegharkunik_Province", "ref_ids": ["470502"], "sent_idx": 0}, {"id": 1, "start": 151, "end": 158, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}, {"id": 2, "start": 186, "end": 196, "ref_url": "Lake_Sevan", "ref_ids": ["792215"], "sent_idx": 0}, {"id": 3, "start": 33, "end": 48, "ref_url": "Nerkin_Getashen", "ref_ids": ["6585189"], "sent_idx": 1}, {"id": 4, "start": 107, "end": 111, "ref_url": "Muş", "ref_ids": ["3838643"], "sent_idx": 1}, {"id": 5, "start": 116, "end": 125, "ref_url": "Eleşkirt", "ref_ids": ["4104145"], "sent_idx": 1}, {"id": 6, "start": 142, "end": 156, "ref_url": "Eastern_Anatolia_Region", "ref_ids": ["8318358"], "sent_idx": 1}]} +{"id": "18271367", "title": "Verin Giratagh", "sentences": ["Verin Giratagh (), is an abandoned village in the Kajaran Municipality of Syunik Province, Armenia.", "The National Statistical Service of the Republic of Armenia (ARMSTAT) reported that both the village of Verin Giratagh and the former community were uninhabited at the 2001 and 2011 censuses."], "mentions": [{"id": 0, "start": 50, "end": 70, "ref_url": "Kajaran_Municipality", "ref_ids": ["56586214"], "sent_idx": 0}, {"id": 1, "start": 74, "end": 89, "ref_url": "Syunik_Province", "ref_ids": ["470515"], "sent_idx": 0}, {"id": 2, "start": 52, "end": 59, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 1}, {"id": 3, "start": 4, "end": 59, "ref_url": "National_Statistical_Service_of_the_Republic_of_Armenia", "ref_ids": null, "sent_idx": 1}]} +{"id": "18271368", "title": "Moore's Fort", "sentences": ["Moore's Fort is a twin dogtrot type blockhouse in Round Top, Texas.", "Built by John Henry Moore in 1828, it is the oldest building in Fayette County.", "It was originally located where La Grange is today, as a shelter for settlers from Comanche raids.", "Later it was moved to Round Top.", "A historical marker sits at the original location in La Grange."], "mentions": [{"id": 0, "start": 23, "end": 30, "ref_url": "Dogtrot", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 36, "end": 46, "ref_url": "Blockhouse", "ref_ids": ["741071"], "sent_idx": 0}, {"id": 2, "start": 50, "end": 66, "ref_url": "Round_Top,_Texas", "ref_ids": ["135873"], "sent_idx": 0}, {"id": 3, "start": 9, "end": 25, "ref_url": "John_Henry_Moore_(Texas)", "ref_ids": ["35761321"], "sent_idx": 1}, {"id": 4, "start": 64, "end": 78, "ref_url": "Fayette_County,_Texas", "ref_ids": ["91592"], "sent_idx": 1}, {"id": 5, "start": 53, "end": 62, "ref_url": "La_Grange,_Texas", "ref_ids": ["16829837"], "sent_idx": 4}, {"id": 6, "start": 83, "end": 91, "ref_url": "Comanche", "ref_ids": ["54001"], "sent_idx": 2}]} +{"id": "18271370", "title": "Vardavank", "sentences": ["Vardavanq (, also Romanized as Vardavank; formerly, Verin Gyodak’lu, Verin Gyodaklu, Yukhari Gedaklyu, Verin Gedaklu) is a village and rural community (municipality) in the Syunik Province of Armenia.", "The National Statistical Service of the Republic of Armenia (ARMSTAT) reported its population was 91 in 2010, down from 120 at the 2001 census."], "mentions": [{"id": 0, "start": 18, "end": 26, "ref_url": "Romanize", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 135, "end": 150, "ref_url": "Community_(Armenia)", "ref_ids": null, "sent_idx": 0}, {"id": 2, "start": 173, "end": 188, "ref_url": "Syunik_Region", "ref_ids": null, "sent_idx": 0}, {"id": 3, "start": 52, "end": 59, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 1}, {"id": 4, "start": 4, "end": 59, "ref_url": "National_Statistical_Service_of_the_Republic_of_Armenia", "ref_ids": null, "sent_idx": 1}]} +{"id": "18271373", "title": "Verin Hand", "sentences": ["Verin Hand (also, Verin And) is a town in the Syunik Province of Armenia."], "mentions": [{"id": 0, "start": 46, "end": 61, "ref_url": "Syunik_Region", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 65, "end": 72, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18271377", "title": "Verin Jrashen", "sentences": ["Verin Jrashen (), is a neighbourhood in the Erebuni District of the Armenian capital Yerevan.", "It is very close by Argishti."], "mentions": [{"id": 0, "start": 44, "end": 60, "ref_url": "Erebuni_District", "ref_ids": ["18256482"], "sent_idx": 0}, {"id": 1, "start": 68, "end": 76, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}, {"id": 2, "start": 85, "end": 92, "ref_url": "Yerevan", "ref_ids": ["34352"], "sent_idx": 0}, {"id": 3, "start": 20, "end": 28, "ref_url": "Argishti,_Yerevan", "ref_ids": ["18255966"], "sent_idx": 1}]} +{"id": "18271380", "title": "Verin Karmiraghbyur", "sentences": ["Verin Karmiraghbyur (, also Romanized as Verin Karmirakhpyur and Verin Karmir aghpyur; Russified as Verkhniy Karmragbyur) is a town in the Tavush Province of Armenia."], "mentions": [{"id": 0, "start": 28, "end": 36, "ref_url": "Romanize", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 139, "end": 154, "ref_url": "Tavush", "ref_ids": null, "sent_idx": 0}, {"id": 2, "start": 158, "end": 165, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18271384", "title": "Lhedang", "sentences": ["Lhedang is a town in Bumthang District in central Bhutan."], "mentions": [{"id": 0, "start": 13, "end": 17, "ref_url": "Town", "ref_ids": ["52911"], "sent_idx": 0}, {"id": 1, "start": 21, "end": 38, "ref_url": "Bumthang_District", "ref_ids": ["766282"], "sent_idx": 0}, {"id": 2, "start": 50, "end": 56, "ref_url": "Bhutan", "ref_ids": ["2421391"], "sent_idx": 0}]} +{"id": "18271387", "title": "1974–75 DFB-Pokal", "sentences": ["The 1974–75 DFB-Pokal was the 32nd season of the annual German football cup competition.", "The DFB-Pokal is considered the second-most important club title in German football after the Bundesliga championship.", "128 teams competed in the tournament of seven rounds which began on 7 September 1974 and ended on 21 June 1975.", "In the final Eintracht Frankfurt defeated MSV Duisburg 1–0, thereby defending their title from the previous season.", "It was Frankfurt's second victory in the cup."], "mentions": [{"id": 0, "start": 4, "end": 13, "ref_url": "DFB-Pokal", "ref_ids": ["675126"], "sent_idx": 1}, {"id": 1, "start": 75, "end": 83, "ref_url": "Association_football", "ref_ids": ["10568"], "sent_idx": 1}, {"id": 2, "start": 94, "end": 104, "ref_url": "Bundesliga", "ref_ids": ["686142"], "sent_idx": 1}, {"id": 3, "start": 13, "end": 32, "ref_url": "Eintracht_Frankfurt", "ref_ids": ["862147"], "sent_idx": 3}, {"id": 4, "start": 42, "end": 54, "ref_url": "MSV_Duisburg", "ref_ids": ["1586579"], "sent_idx": 3}]} +{"id": "18271390", "title": "Verin Kelanlu", "sentences": ["Armavir gyux (also, Արմավիր գյուղ, Aralykh, Verin Armavir) is a town in the Armavir Province of Armenia."], "mentions": [{"id": 0, "start": 76, "end": 92, "ref_url": "Armavir_(province)", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 96, "end": 103, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18271392", "title": "Naspe", "sentences": ["Naspe is a town in Bumthang District in central Bhutan."], "mentions": [{"id": 0, "start": 11, "end": 15, "ref_url": "Town", "ref_ids": ["52911"], "sent_idx": 0}, {"id": 1, "start": 19, "end": 36, "ref_url": "Bumthang_District", "ref_ids": ["766282"], "sent_idx": 0}, {"id": 2, "start": 48, "end": 54, "ref_url": "Bhutan", "ref_ids": ["2421391"], "sent_idx": 0}]} +{"id": "18271395", "title": "Zorakan", "sentences": ["Zorakan (); formerly known as \"Verin Kyorpluu\", is a village in the Tavush Province of Armenia.", "The ancestors are from the Shakhl of Ghazakh region and other villages.", "Many Armenians were deported from Chardakhlu village in 1988-1989."], "mentions": [{"id": 0, "start": 68, "end": 83, "ref_url": "Tavush_Province", "ref_ids": ["470516"], "sent_idx": 0}, {"id": 1, "start": 5, "end": 12, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 2}]} +{"id": "18271397", "title": "Rife, Bhutan", "sentences": ["Rife, Bhutan is a town in Bumthang District in central Bhutan."], "mentions": [{"id": 0, "start": 18, "end": 22, "ref_url": "Town", "ref_ids": ["52911"], "sent_idx": 0}, {"id": 1, "start": 26, "end": 43, "ref_url": "Bumthang_District", "ref_ids": ["766282"], "sent_idx": 0}, {"id": 2, "start": 6, "end": 12, "ref_url": "Bhutan", "ref_ids": ["2421391"], "sent_idx": 0}]} +{"id": "18271400", "title": "Verin Khotanan", "sentences": ["Verin Khotanan (; also, Verkhniy Khotanan and Verev Khotanan) is a village and rural community (municipality) in the Syunik Province of Armenia.", "The National Statistical Service of the Republic of Armenia (ARMSTAT) reported its population was 199 in 2010, down from 295 at the 2001 census."], "mentions": [{"id": 0, "start": 79, "end": 94, "ref_url": "Community_(Armenia)", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 117, "end": 132, "ref_url": "Syunik_Region", "ref_ids": null, "sent_idx": 0}, {"id": 2, "start": 52, "end": 59, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 1}, {"id": 3, "start": 4, "end": 59, "ref_url": "National_Statistical_Service_of_the_Republic_of_Armenia", "ref_ids": null, "sent_idx": 1}]} +{"id": "18271404", "title": "Verin Nedzhirlu", "sentences": ["Verin Nedzhirlu (also, Verkhniy Nedzhirlu) is a town in the Ararat Province of Armenia."], "mentions": [{"id": 0, "start": 60, "end": 75, "ref_url": "Ararat_(province)", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 79, "end": 86, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18271405", "title": "Shuri, Bhutan", "sentences": ["Shuri, Bhutan is a town in Bumthang District in central Bhutan."], "mentions": [{"id": 0, "start": 19, "end": 23, "ref_url": "Town", "ref_ids": ["52911"], "sent_idx": 0}, {"id": 1, "start": 27, "end": 44, "ref_url": "Bumthang_District", "ref_ids": ["766282"], "sent_idx": 0}, {"id": 2, "start": 7, "end": 13, "ref_url": "Bhutan", "ref_ids": ["2421391"], "sent_idx": 0}]} +{"id": "18271408", "title": "Verin Sasnashen", "sentences": ["Verin Sasnashen (); formerly known as \"Verin Karakoymaz\" and \"Verin Sasunashen\", is a village in the Aragatsotn Province of Armenia."], "mentions": [{"id": 0, "start": 101, "end": 120, "ref_url": "Aragatsotn_Province", "ref_ids": ["470494"], "sent_idx": 0}, {"id": 1, "start": 124, "end": 131, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18271410", "title": "Verin Shengavit", "sentences": ["Verin Shengavit (, also, Verin Shengavit’, Verkhniy Shinkovit, Verkhniy Shengavit, Shinkovit, Shingaīt, and Shengavit) is a part of Shengavit District in Yerevan, Armenia."], "mentions": [{"id": 0, "start": 132, "end": 150, "ref_url": "Shengavit_District", "ref_ids": ["19059867"], "sent_idx": 0}, {"id": 1, "start": 154, "end": 161, "ref_url": "Yerevan", "ref_ids": ["34352"], "sent_idx": 0}, {"id": 2, "start": 163, "end": 170, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18271411", "title": "Wangchukling", "sentences": ["Wangchukling is a town in Bumthang District in central Bhutan."], "mentions": [{"id": 0, "start": 18, "end": 22, "ref_url": "Town", "ref_ids": ["52911"], "sent_idx": 0}, {"id": 1, "start": 26, "end": 43, "ref_url": "Bumthang_District", "ref_ids": ["766282"], "sent_idx": 0}, {"id": 2, "start": 55, "end": 61, "ref_url": "Bhutan", "ref_ids": ["2421391"], "sent_idx": 0}]} +{"id": "18271412", "title": "Verin Shorzha", "sentences": ["Verin Shorzha (; also Romanized as Verin Shorja, meaning \"Upper Shorzha\"; Russified as Verkhnyaya Shorzha; also simply Shorzha) is a small hamlet in the Gegharkunik Province of Armenia.", "Verin Shorzha and the nearby Nerkin Shorzha are both reached by a short drive from Ayrk."], "mentions": [{"id": 0, "start": 22, "end": 30, "ref_url": "Romanize", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 139, "end": 145, "ref_url": "Hamlet_(place)", "ref_ids": ["396466"], "sent_idx": 0}, {"id": 2, "start": 153, "end": 173, "ref_url": "Gegharkunik_Province", "ref_ids": ["470502"], "sent_idx": 0}, {"id": 3, "start": 177, "end": 184, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}, {"id": 4, "start": 29, "end": 43, "ref_url": "Nerkin_Shorzha", "ref_ids": ["18270000"], "sent_idx": 1}, {"id": 5, "start": 83, "end": 87, "ref_url": "Ayrk", "ref_ids": ["18256155"], "sent_idx": 1}]} +{"id": "18271414", "title": "Verin Vachagan", "sentences": ["Verin Vachagan (also, Verkhniy Vachagan) is a town in the Syunik Province of Armenia."], "mentions": [{"id": 0, "start": 58, "end": 73, "ref_url": "Syunik_Region", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 77, "end": 84, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18271425", "title": "Verin-Chambarak", "sentences": ["Verin-Chambarak (also, Verkhniy Chambarak) is a town in the Gegharkunik Province of Armenia."], "mentions": [{"id": 0, "start": 60, "end": 80, "ref_url": "Gegharkunik_Province", "ref_ids": ["470502"], "sent_idx": 0}, {"id": 1, "start": 84, "end": 91, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18271431", "title": "Kilisa", "sentences": ["Kilisa (; also, Verin-Kilisa), is a village in the Lori Province of Armenia, belonging to the community of Halavar.", "Kilisa means \"church\" in Turkish."], "mentions": [{"id": 0, "start": 51, "end": 64, "ref_url": "Lori_Province", "ref_ids": ["470506"], "sent_idx": 0}, {"id": 1, "start": 68, "end": 75, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}, {"id": 2, "start": 107, "end": 114, "ref_url": "Halavar", "ref_ids": ["18256915"], "sent_idx": 0}, {"id": 3, "start": 25, "end": 32, "ref_url": "Turkish_language", "ref_ids": ["29992"], "sent_idx": 1}]} +{"id": "18271432", "title": "Verin-Kulibeklu", "sentences": ["Verin-Kulibeklu (also, Verkhniy Kulibeklu) is a town in the Armavir Province of Armenia."], "mentions": [{"id": 0, "start": 60, "end": 76, "ref_url": "Armavir_(province)", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 80, "end": 87, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18271436", "title": "Verkhniy Aluchalu", "sentences": ["Verkhniy Aluchalu is a town in the Gegharkunik Province of Armenia."], "mentions": [{"id": 0, "start": 35, "end": 55, "ref_url": "Gegharkunik_Province", "ref_ids": ["470502"], "sent_idx": 0}, {"id": 1, "start": 59, "end": 66, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18271437", "title": "Verkhniy Kalakut", "sentences": ["Verkhniy Kalakut (also, Verin Kalakut), is an abandoned settlement in the Aragatsotn Province of Armenia."], "mentions": [{"id": 0, "start": 74, "end": 93, "ref_url": "Aragatsotn_Province", "ref_ids": ["470494"], "sent_idx": 0}, {"id": 1, "start": 97, "end": 104, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18271447", "title": "Verkhnyaya Gezaldara", "sentences": ["Verkhnyaya Gezaldara is a town in the Gegharkunik Province of Armenia."], "mentions": [{"id": 0, "start": 38, "end": 58, "ref_url": "Gegharkunik_Province", "ref_ids": ["470502"], "sent_idx": 0}, {"id": 1, "start": 62, "end": 69, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18271449", "title": "Michael Hansen (footballer)", "sentences": ["Michael Hansen (born 22 September 1971) is a Danish former professional football player, who played in the midfielder position.", "He played 40 games and scored two goals for various Danish youth national teams, and competed with the Denmark national under-21 football team at the 1992 Summer Olympics.", "He played for a number of Danish teams, including Silkeborg IF, Odense Boldklub, Esbjerg fB, and FC Midtjylland.", "He ended his playing career in 2006.", "As a youngster he played for B1938.", "He shifted to B1901 - Nykøbing in 1985, where he immediately showed his skills.", "He was offered a professional contract a few years later and made his debut in the first team when he was only 16.", "When he was 18 he joined Næstved a team in the Danish Superliga.", "He was the manager of the 1. division club Skive from 2008 to 2013.", "He replaced Ove Pedersen as manager of FC Vestsjælland on 1 July 2014.", "His first season as manager of FC Vestsjælland ended in relegation from the Danish Superliga.", "Following af poor start to the 2015-16 Danish 1st Division he was sacked on 1 September 2015.", "In August 2018 he became manager of Silkeborg IF until the end of the 2018-19-season."], "mentions": [{"id": 0, "start": 39, "end": 45, "ref_url": "Denmark", "ref_ids": ["76972"], "sent_idx": 11}, {"id": 1, "start": 129, "end": 137, "ref_url": "Association_football", "ref_ids": ["10568"], "sent_idx": 1}, {"id": 2, "start": 107, "end": 117, "ref_url": "Midfielder_(association_football)", "ref_ids": null, "sent_idx": 0}, {"id": 3, "start": 103, "end": 142, "ref_url": "Denmark_national_under-21_football_team", "ref_ids": ["3810060"], "sent_idx": 1}, {"id": 4, "start": 150, "end": 170, "ref_url": "1992_Summer_Olympics", "ref_ids": ["82755"], "sent_idx": 1}, {"id": 5, "start": 36, "end": 48, "ref_url": "Silkeborg_IF", "ref_ids": ["3026399"], "sent_idx": 12}, {"id": 6, "start": 64, "end": 79, "ref_url": "Odense_Boldklub", "ref_ids": ["2061696"], "sent_idx": 2}, {"id": 7, "start": 81, "end": 91, "ref_url": "Esbjerg_fB", "ref_ids": ["2228165"], "sent_idx": 2}, {"id": 8, "start": 97, "end": 111, "ref_url": "FC_Midtjylland", "ref_ids": ["2424594"], "sent_idx": 2}, {"id": 9, "start": 12, "end": 24, "ref_url": "Ove_Pedersen", "ref_ids": ["14200093"], "sent_idx": 9}, {"id": 10, "start": 31, "end": 46, "ref_url": "FC_Vestsjælland", "ref_ids": ["18176968"], "sent_idx": 10}, {"id": 11, "start": 76, "end": 92, "ref_url": "Danish_Superliga", "ref_ids": ["1984091"], "sent_idx": 10}, {"id": 12, "start": 31, "end": 58, "ref_url": "2015-16_Danish_1st_Division", "ref_ids": null, "sent_idx": 11}, {"id": 13, "start": 36, "end": 48, "ref_url": "Silkeborg_IF", "ref_ids": ["3026399"], "sent_idx": 12}, {"id": 14, "start": 70, "end": 84, "ref_url": "2018-19_Danish_1st_Division", "ref_ids": null, "sent_idx": 12}]} +{"id": "18271450", "title": "Vernashen", "sentences": ["Vernashen (); formerly known as \"Bashkend\", is a village in the Vayots Dzor Province of Armenia.", "The 13th-century fortress of Proshaberd (also known as Boloraberd) and the Spitakavor Monastery of 1321 are located near Vernashen."], "mentions": [{"id": 0, "start": 64, "end": 84, "ref_url": "Vayots_Dzor_Province", "ref_ids": ["470518"], "sent_idx": 0}, {"id": 1, "start": 88, "end": 95, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}, {"id": 2, "start": 29, "end": 39, "ref_url": "Proshaberd", "ref_ids": ["18480381"], "sent_idx": 1}, {"id": 3, "start": 75, "end": 95, "ref_url": "Spitakavor_Monastery", "ref_ids": ["39613715"], "sent_idx": 1}]} +{"id": "18271453", "title": "Voghjaberd", "sentences": ["Voghjaberd (, also Romanized as Vokhchaberd) is a village in the Kotayk Province of Armenia."], "mentions": [{"id": 0, "start": 19, "end": 27, "ref_url": "Romanize", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 65, "end": 80, "ref_url": "Kotayk_Province", "ref_ids": ["470504"], "sent_idx": 0}, {"id": 2, "start": 84, "end": 91, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18271455", "title": "Voghji, Shirak", "sentences": ["Voghji is a village in the Shirak Province of Armenia.", "Until 1991 the village's name was Okhchogly or Okhchoglu."], "mentions": [{"id": 0, "start": 27, "end": 42, "ref_url": "Shirak_Province", "ref_ids": ["470511"], "sent_idx": 0}, {"id": 1, "start": 46, "end": 53, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18271458", "title": "Voghji, Syunik", "sentences": ["Voghji (), is a former village in Syunik Province of Armenia, currently part of town of Kajaran."], "mentions": [{"id": 0, "start": 34, "end": 49, "ref_url": "Syunik_Province", "ref_ids": ["470515"], "sent_idx": 0}, {"id": 1, "start": 53, "end": 60, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}, {"id": 2, "start": 88, "end": 95, "ref_url": "Kajaran", "ref_ids": ["32540636"], "sent_idx": 0}]} +{"id": "18271464", "title": "Chapchha", "sentences": ["Chapchha is a town in Chukha District in northern Bhutan."], "mentions": [{"id": 0, "start": 14, "end": 18, "ref_url": "Town", "ref_ids": ["52911"], "sent_idx": 0}, {"id": 1, "start": 22, "end": 37, "ref_url": "Chukha_District", "ref_ids": ["766285"], "sent_idx": 0}, {"id": 2, "start": 50, "end": 56, "ref_url": "Bhutan", "ref_ids": ["2421391"], "sent_idx": 0}]} +{"id": "18271465", "title": "Vorotan (Goris)", "sentences": ["Vorotan (; formerly, Tatevges), is a village and rural community (municipality) in the Syunik Province of Armenia.", "The National Statistical Service of the Republic of Armenia (ARMSTAT) reported its population was 307 in 2010, up from 271 at the 2001 census."], "mentions": [{"id": 0, "start": 49, "end": 64, "ref_url": "Community_(Armenia)", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 87, "end": 102, "ref_url": "Syunik_Region", "ref_ids": null, "sent_idx": 0}, {"id": 2, "start": 52, "end": 59, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 1}, {"id": 3, "start": 4, "end": 59, "ref_url": "National_Statistical_Service_of_the_Republic_of_Armenia", "ref_ids": null, "sent_idx": 1}]} +{"id": "18271468", "title": "Vorotnavan", "sentences": ["Vorotan (; Vorotn or Urut), is a village in the Syunik Province of Armenia, belongs to the Sisian Municipality.", "The National Statistical Service of the Republic of Armenia (ARMSTAT) reported its population was 282 in 2010, down from 283 at the 2001 census."], "mentions": [{"id": 0, "start": 48, "end": 63, "ref_url": "Syunik_Province", "ref_ids": ["470515"], "sent_idx": 0}, {"id": 1, "start": 52, "end": 59, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 1}, {"id": 2, "start": 91, "end": 110, "ref_url": "Sisian_Municipality", "ref_ids": ["56637669"], "sent_idx": 0}, {"id": 3, "start": 4, "end": 59, "ref_url": "National_Statistical_Service_of_the_Republic_of_Armenia", "ref_ids": null, "sent_idx": 1}]} +{"id": "18271472", "title": "Daphu", "sentences": ["Daphu is a town in Chukha District in southwestern Bhutan.", "At the 2005 census, its population was 1,666."], "mentions": [{"id": 0, "start": 11, "end": 15, "ref_url": "Town", "ref_ids": ["52911"], "sent_idx": 0}, {"id": 1, "start": 19, "end": 34, "ref_url": "Chukha_District", "ref_ids": ["766285"], "sent_idx": 0}, {"id": 2, "start": 51, "end": 57, "ref_url": "Bhutan", "ref_ids": ["2421391"], "sent_idx": 0}]} +{"id": "18271474", "title": "Voskehask", "sentences": ["Voskehask( also Romanized as Voskeask; formerly, Musakan and Molla Musa) is a village in the Shirak Province of Armenia."], "mentions": [{"id": 0, "start": 16, "end": 24, "ref_url": "Romanize", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 93, "end": 108, "ref_url": "Shirak_Province", "ref_ids": ["470511"], "sent_idx": 0}, {"id": 2, "start": 112, "end": 119, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18271476", "title": "Euan Wemyss", "sentences": ["Euan Wemyss is currently a singer and was previously a Scottish broadcast journalist working for \"STV News\" in the Northern Scotland.", "Wemyss, the lead singer of Aberdeen rock band, Captain Face, used to also contribute to the music section of the station's website, stv.tv.", "From June 2008, Wemyss was an occasional presenter of short news bulletins and a reporter for \"North Tonight\" on STV.", "In August 2009, he moved to STV Central in Glasgow to become the dedicated reporter for the \"North\" section of the STV News website.", "Wemyss then moved back to STV North in Aberdeen."], "mentions": [{"id": 0, "start": 17, "end": 23, "ref_url": "Singer", "ref_ids": null, "sent_idx": 1}, {"id": 1, "start": 74, "end": 84, "ref_url": "Journalist", "ref_ids": ["50100"], "sent_idx": 0}, {"id": 2, "start": 115, "end": 123, "ref_url": "STV_News_at_Six", "ref_ids": null, "sent_idx": 3}, {"id": 3, "start": 115, "end": 132, "ref_url": "Northern_Scotland", "ref_ids": ["1475671"], "sent_idx": 0}, {"id": 4, "start": 47, "end": 59, "ref_url": "Captain_Face", "ref_ids": ["20646165"], "sent_idx": 1}, {"id": 5, "start": 132, "end": 138, "ref_url": "Stv.tv", "ref_ids": ["5354240"], "sent_idx": 1}, {"id": 6, "start": 95, "end": 108, "ref_url": "North_Tonight", "ref_ids": ["3980687"], "sent_idx": 2}]} +{"id": "18271485", "title": "Voskehat, Aragatsotn", "sentences": ["Voskehat (, formerly known as Patrinj), is a village in the Aragatsotn Province of Armenia, located on the right bank of Amberd river, 7 km southwest of the provincial capital Ashtarak.", "It is surrounded by the villages of Agarak from the north, Voskevaz from the east and Lernamerdz from the south.", "It has an average elevation of 1025 meters above sea level."], "mentions": [{"id": 0, "start": 60, "end": 79, "ref_url": "Aragatsotn_Province", "ref_ids": ["470494"], "sent_idx": 0}, {"id": 1, "start": 83, "end": 90, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}, {"id": 2, "start": 176, "end": 184, "ref_url": "Ashtarak", "ref_ids": ["1067111"], "sent_idx": 0}, {"id": 3, "start": 36, "end": 42, "ref_url": "Agarak,_Aragatsotn", "ref_ids": ["18255431"], "sent_idx": 1}, {"id": 4, "start": 59, "end": 67, "ref_url": "Voskevaz", "ref_ids": ["16051769"], "sent_idx": 1}, {"id": 5, "start": 86, "end": 96, "ref_url": "Lernamerdz", "ref_ids": ["7832944"], "sent_idx": 1}]} +{"id": "18271491", "title": "Dungna", "sentences": ["\"Not to be confused with\":", "Dungna Gewog\nDungna is a town in Chukha District in southwestern Bhutan."], "mentions": [{"id": 0, "start": 0, "end": 12, "ref_url": "Dungna_Gewog", "ref_ids": ["28545791"], "sent_idx": 1}, {"id": 1, "start": 25, "end": 29, "ref_url": "Town", "ref_ids": ["52911"], "sent_idx": 1}, {"id": 2, "start": 33, "end": 48, "ref_url": "Chukha_District", "ref_ids": ["766285"], "sent_idx": 1}, {"id": 3, "start": 65, "end": 71, "ref_url": "Bhutan", "ref_ids": ["2421391"], "sent_idx": 1}]} +{"id": "18271493", "title": "Bishop of Sydney", "sentences": ["Bishop of Sydney may refer to:"], "mentions": []} +{"id": "18271496", "title": "Vosketas", "sentences": ["Vosketas (; until 1935, Kuldarvish and Ghuldervish) is a town in the Aragatsotn Province of Armenia."], "mentions": [{"id": 0, "start": 69, "end": 88, "ref_url": "Aragatsotn", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 92, "end": 99, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18271498", "title": "Voskevan", "sentences": ["Voskevan (; formerly, Koshkotan), is a village in the Tavush Province of Armenia.", "The majority of the people are engaged in agriculture.", "There are numerous forests, animals, mountains, canyons and some sights in the village.", "There are no Internet providers and internet is provided only through SIM-cards."], "mentions": [{"id": 0, "start": 54, "end": 69, "ref_url": "Tavush_Province", "ref_ids": ["470516"], "sent_idx": 0}, {"id": 1, "start": 73, "end": 80, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18271506", "title": "Khitokha", "sentences": ["Khitokha is a town in Chukha District in southwestern Bhutan."], "mentions": [{"id": 0, "start": 14, "end": 18, "ref_url": "Town", "ref_ids": ["52911"], "sent_idx": 0}, {"id": 1, "start": 22, "end": 37, "ref_url": "Chukha_District", "ref_ids": ["766285"], "sent_idx": 0}, {"id": 2, "start": 54, "end": 60, "ref_url": "Bhutan", "ref_ids": ["2421391"], "sent_idx": 0}]} +{"id": "18271507", "title": "Yaghdan", "sentences": ["Yaghdan (), is a village in the Lori Province of Armenia.", "It has a majority of Greeks."], "mentions": [{"id": 0, "start": 32, "end": 45, "ref_url": "Lori_Province", "ref_ids": ["470506"], "sent_idx": 0}, {"id": 1, "start": 49, "end": 56, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}, {"id": 2, "start": 21, "end": 27, "ref_url": "Greeks_in_Armenia", "ref_ids": ["7636896"], "sent_idx": 1}]} +{"id": "18271511", "title": "Nshkhark", "sentences": ["Nshkhark (); formerly known with its Russian name Yanykhskiy Sovkhoz and Yanegh, is an abandoned village in the Gegharkunik Province of Armenia.", "It was part of the Geghhovit community."], "mentions": [{"id": 0, "start": 112, "end": 132, "ref_url": "Gegharkunik_Province", "ref_ids": ["470502"], "sent_idx": 0}, {"id": 1, "start": 136, "end": 143, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}, {"id": 2, "start": 19, "end": 28, "ref_url": "Geghhovit", "ref_ids": ["16051558"], "sent_idx": 1}]} +{"id": "18271519", "title": "Nancy Drew: The Haunting of Castle Malloy", "sentences": ["The Haunting of Castle Malloy is the 19th installment in the Nancy Drew point-and-click adventure game series by Her Interactive.", "The game is available for play on Microsoft Windows platforms.", "It has an ESRB rating of E for moments of mild violence and peril.", "Players take on the first-person view of fictional amateur sleuth Nancy Drew and must solve the mystery through interrogation of suspects, solving puzzles, and discovering clues.", "There are two levels of gameplay, Junior and Senior detective modes, each offering a different difficulty level of puzzles and hints, however neither of these changes affects the actual plot of the game.", "The game is loosely based on a book entitled \"The Bike Tour Mystery\" (2002)."], "mentions": [{"id": 0, "start": 66, "end": 76, "ref_url": "Nancy_Drew", "ref_ids": ["54832"], "sent_idx": 3}, {"id": 1, "start": 72, "end": 102, "ref_url": "Point-and-click_adventure_game", "ref_ids": null, "sent_idx": 0}, {"id": 2, "start": 113, "end": 128, "ref_url": "Her_Interactive", "ref_ids": null, "sent_idx": 0}, {"id": 3, "start": 66, "end": 76, "ref_url": "Nancy_Drew", "ref_ids": ["54832"], "sent_idx": 3}, {"id": 4, "start": 46, "end": 67, "ref_url": "The_Bike_Tour_Mystery", "ref_ids": ["28178870"], "sent_idx": 5}]} +{"id": "18271520", "title": "Lchavan", "sentences": ["Lchavan (; until 1967, Yarpuzlu and Yariuzlu) is a village located southeast of Lake Sevan in the Gegharkunik Province of Armenia.", "The village has Bronze Age tombs and a church dating to the 13th-14th century with khachkars."], "mentions": [{"id": 0, "start": 80, "end": 90, "ref_url": "Lake_Sevan", "ref_ids": ["792215"], "sent_idx": 0}, {"id": 1, "start": 98, "end": 118, "ref_url": "Gegharkunik_Province", "ref_ids": ["470502"], "sent_idx": 0}, {"id": 2, "start": 122, "end": 129, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}, {"id": 3, "start": 16, "end": 26, "ref_url": "Bronze_Age", "ref_ids": ["4620"], "sent_idx": 1}, {"id": 4, "start": 83, "end": 91, "ref_url": "Khachkar", "ref_ids": ["1100953"], "sent_idx": 1}]} +{"id": "18271523", "title": "Yegheg", "sentences": ["Yegheg (, formerly, \"Shabadin\") is a village and rural community (municipality) in the Syunik Province of Armenia.", "The National Statistical Service of the Republic of Armenia (ARMSTAT) reported its population as 113 in 2010, down from 166 at the 2001 census."], "mentions": [{"id": 0, "start": 49, "end": 64, "ref_url": "Community_(Armenia)", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 87, "end": 102, "ref_url": "Syunik_Region", "ref_ids": null, "sent_idx": 0}, {"id": 2, "start": 52, "end": 59, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 1}, {"id": 3, "start": 4, "end": 59, "ref_url": "National_Statistical_Service_of_the_Republic_of_Armenia", "ref_ids": null, "sent_idx": 1}]} +{"id": "18271529", "title": "Yeghegis", "sentences": ["Yeghegis (, also Romanized as Eghegis, Yekhegis; formerly, Alayaz, Alagyaz, and Erdapin) is a town in the Vayots Dzor Province of Armenia.", "It has rich historical past, with medieval Tsakhats Kar Monastery and Smbataberd fortress located in the vicinity of Yeghegis.", "The name of the town originated from the Armenian word which means reed."], "mentions": [{"id": 0, "start": 17, "end": 25, "ref_url": "Romanize", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 106, "end": 126, "ref_url": "Vayots_Dzor_Province", "ref_ids": ["470518"], "sent_idx": 0}, {"id": 2, "start": 41, "end": 48, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 2}, {"id": 3, "start": 43, "end": 65, "ref_url": "Tsakhats_Kar_Monastery", "ref_ids": null, "sent_idx": 1}, {"id": 4, "start": 70, "end": 80, "ref_url": "Smbataberd", "ref_ids": ["27344215"], "sent_idx": 1}, {"id": 5, "start": 41, "end": 49, "ref_url": "Armenian_language", "ref_ids": ["2217"], "sent_idx": 2}, {"id": 6, "start": 67, "end": 72, "ref_url": "Reed_(plant)", "ref_ids": ["346491"], "sent_idx": 2}]} +{"id": "18271531", "title": "Lobnig", "sentences": ["Lobnig is a town in Chukha District in southwestern Bhutan."], "mentions": [{"id": 0, "start": 12, "end": 16, "ref_url": "Town", "ref_ids": ["52911"], "sent_idx": 0}, {"id": 1, "start": 20, "end": 35, "ref_url": "Chukha_District", "ref_ids": ["766285"], "sent_idx": 0}, {"id": 2, "start": 52, "end": 58, "ref_url": "Bhutan", "ref_ids": ["2421391"], "sent_idx": 0}]} +{"id": "18271534", "title": "Yeghegnavan", "sentences": ["Yeghegnavan (, also Romanized as Eghegnavan; formerly, Shidlu) is a village in the Ararat Province of Armenia."], "mentions": [{"id": 0, "start": 20, "end": 28, "ref_url": "Romanize", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 83, "end": 98, "ref_url": "Ararat_(province)", "ref_ids": null, "sent_idx": 0}, {"id": 2, "start": 102, "end": 109, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18271539", "title": "Yeghegnut, Lori", "sentences": ["Yeghegnut (, also Romanized as Yekheknut, Yekhegnut, Yegegnut, and Eghegnut; formerly, Kamyshkut) is a town in the Lori Province of Armenia."], "mentions": [{"id": 0, "start": 18, "end": 26, "ref_url": "Romanize", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 115, "end": 128, "ref_url": "Lori_Province", "ref_ids": ["470506"], "sent_idx": 0}, {"id": 2, "start": 132, "end": 139, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18271540", "title": "Pachu", "sentences": ["Pachu is a town in Chukha District in southwestern Bhutan."], "mentions": [{"id": 0, "start": 11, "end": 15, "ref_url": "Town", "ref_ids": ["52911"], "sent_idx": 0}, {"id": 1, "start": 19, "end": 34, "ref_url": "Chukha_District", "ref_ids": ["766285"], "sent_idx": 0}, {"id": 2, "start": 51, "end": 57, "ref_url": "Bhutan", "ref_ids": ["2421391"], "sent_idx": 0}]} +{"id": "18271541", "title": "Yeghegnut, Armavir", "sentences": ["Yeghegnut (, until 1947 Ghamishlu; before 1947 Sefiabad, Molla Badal, and Badal), is a village in the Armavir Province of Armenia.", "The majority of the village are Armenians (70%) with 663 (around 30%) of the Yazidi minority."], "mentions": [{"id": 0, "start": 102, "end": 118, "ref_url": "Armavir_Province", "ref_ids": ["470500"], "sent_idx": 0}, {"id": 1, "start": 32, "end": 39, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 1}, {"id": 2, "start": 77, "end": 83, "ref_url": "Yazidis_in_Armenia", "ref_ids": ["6878423"], "sent_idx": 1}]} +{"id": "18271544", "title": "Yeghipatrush", "sentences": ["Yeghipatrush (, until 1945, Tanjrlu, from 1945–1992, Mravyan), is a village in the Aragatsotn Province of Armenia.", "During the Soviet period, the town was renamed in honor of Askanaz Mravyan, Soviet Armenian Cultural Commissar.", "The town has a 10th- to 13th-century church of Surb Astvatsatsin.", "Some 100 meters beyond the church is an early cemetery with one corner of an allegedly 5th-century basilica as well as a khachkar shrine."], "mentions": [{"id": 0, "start": 83, "end": 102, "ref_url": "Aragatsotn_Province", "ref_ids": ["470494"], "sent_idx": 0}, {"id": 1, "start": 83, "end": 90, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 1}, {"id": 2, "start": 59, "end": 74, "ref_url": "Askanaz_Mravyan", "ref_ids": ["14385040"], "sent_idx": 1}, {"id": 3, "start": 47, "end": 64, "ref_url": "Yeghipatrush_Church", "ref_ids": ["29618268"], "sent_idx": 2}]} +{"id": "18271549", "title": "Yeghnajur", "sentences": ["Yeghnajur (, also Romanized as Eghnajur; formerly, Chivinli and Ch’invinli) is a village in the rural community of Garnarich in the Shirak Province of Armenia.", "Its population was 28 at the 2001 census."], "mentions": [{"id": 0, "start": 18, "end": 26, "ref_url": "Romanize", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 96, "end": 111, "ref_url": "Community_(Armenia)", "ref_ids": null, "sent_idx": 0}, {"id": 2, "start": 115, "end": 124, "ref_url": "Garnarich", "ref_ids": ["18256528"], "sent_idx": 0}, {"id": 3, "start": 132, "end": 147, "ref_url": "Shirak_Province", "ref_ids": ["470511"], "sent_idx": 0}, {"id": 4, "start": 151, "end": 158, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18271552", "title": "Eghvard, Syunik", "sentences": ["Yeghvard (, also Romanized as Yegvart) is a village and rural community (municipality) in the Syunik Province of Armenia.", "The National Statistical Service of the Republic of Armenia (ARMSTAT) reported its population as 272 in 2010, up from 270 at the 2001 census."], "mentions": [{"id": 0, "start": 17, "end": 25, "ref_url": "Romanize", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 56, "end": 71, "ref_url": "Community_(Armenia)", "ref_ids": null, "sent_idx": 0}, {"id": 2, "start": 94, "end": 109, "ref_url": "Syunik_Province", "ref_ids": ["470515"], "sent_idx": 0}, {"id": 3, "start": 52, "end": 59, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 1}, {"id": 4, "start": 4, "end": 59, "ref_url": "National_Statistical_Service_of_the_Republic_of_Armenia", "ref_ids": null, "sent_idx": 1}]} +{"id": "18271559", "title": "Yeghnik", "sentences": ["Yeghnik (, also Romanized as Eghnik and Yekhnik; until 1946, Dadalu) is a village in the Aragatsotn Province of Armenia.", "The village's church, Saint Nshan, dates from 1866."], "mentions": [{"id": 0, "start": 16, "end": 24, "ref_url": "Romanize", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 89, "end": 108, "ref_url": "Aragatsotn", "ref_ids": null, "sent_idx": 0}, {"id": 2, "start": 112, "end": 119, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18271563", "title": "Yemazlu", "sentences": ["Yemazlu (also, Yemazli) is a town in the Syunik Province of Armenia."], "mentions": [{"id": 0, "start": 41, "end": 56, "ref_url": "Syunik_Region", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 60, "end": 67, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18271572", "title": "Goravan", "sentences": ["Goravan (, formerly Yenikend), is a village in the Ararat Province of Armenia, just south of the Vedi river, which separates the town of Vedi from Goravan."], "mentions": [{"id": 0, "start": 51, "end": 66, "ref_url": "Ararat_Province", "ref_ids": ["470497"], "sent_idx": 0}, {"id": 1, "start": 70, "end": 77, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}, {"id": 2, "start": 97, "end": 101, "ref_url": "Vedi", "ref_ids": ["3719448"], "sent_idx": 0}]} +{"id": "18271577", "title": "Holy Haunted House", "sentences": ["Holy Haunted House is a live recording released by Gov't Mule in June 2008.", "It includes the entire 2007 Halloween performance (Oct 31, 2007 at The O'Shaughnessy Theatre in St. Paul, MN) including their cover of Led Zeppelin's album \"Houses of the Holy\"."], "mentions": [{"id": 0, "start": 51, "end": 61, "ref_url": "Gov't_Mule", "ref_ids": ["891757"], "sent_idx": 0}, {"id": 1, "start": 96, "end": 108, "ref_url": "Saint_Paul,_Minnesota", "ref_ids": ["40469"], "sent_idx": 1}, {"id": 2, "start": 135, "end": 147, "ref_url": "Led_Zeppelin", "ref_ids": ["17909"], "sent_idx": 1}, {"id": 3, "start": 157, "end": 175, "ref_url": "Houses_of_the_Holy", "ref_ids": ["50243"], "sent_idx": 1}]} +{"id": "18271579", "title": "Yenokavan", "sentences": ["Yenokavan (); known as \"Krdevan\" until 1935, is a village and summer resort in the Tavush Province of Armenia.", "The village was renamed in 1935 by the Soviets after Yenok Mkrtumian, who founded the first Communist party cell in the region.", "The village is a few kilometers north of the regional capital of Ijevan, close to the main highway.", "The canyon behind the village is lush with forest, river and has caves with interesting carvings.", "A group of adventurers called \"Yell Extreme Park\" team, are planning to install the longest zip-line in Yenokavan, with an approximate cost of US$ 200,000."], "mentions": [{"id": 0, "start": 83, "end": 98, "ref_url": "Tavush_Province", "ref_ids": ["470516"], "sent_idx": 0}, {"id": 1, "start": 102, "end": 109, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}, {"id": 2, "start": 53, "end": 68, "ref_url": "Yenok_Mkrtumian", "ref_ids": null, "sent_idx": 1}, {"id": 3, "start": 65, "end": 71, "ref_url": "Ijevan", "ref_ids": ["581506"], "sent_idx": 2}, {"id": 4, "start": 31, "end": 48, "ref_url": "Yell_Extreme_Park", "ref_ids": ["61189724"], "sent_idx": 4}]} +{"id": "18271582", "title": "Yeranos", "sentences": ["Yeranos (; also Romanized), is a major village in the Gegharkunik Province of Armenia.", "It has a church of S. Astvatsatsin dating back to 1215, as well as Tukh Manuk and S. Sofia shrines.", "Following the Gavar-Martuni road, near the chicken farm are the remains of a cyclopean fort."], "mentions": [{"id": 0, "start": 16, "end": 25, "ref_url": "Romanization", "ref_ids": ["173533"], "sent_idx": 0}, {"id": 1, "start": 54, "end": 74, "ref_url": "Gegharkunik_Province", "ref_ids": ["470502"], "sent_idx": 0}, {"id": 2, "start": 78, "end": 85, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}, {"id": 3, "start": 77, "end": 86, "ref_url": "Cyclopean_masonry", "ref_ids": ["2094245"], "sent_idx": 2}]} +{"id": "18271586", "title": "Yeraskh", "sentences": ["Yeraskh (, formerly, Arazdayan) is a village in the Ararat Province of Armenia.", "Yeraskh is the last Armenian village on the closed border with Azerbaijan's exclave of Nakhichevan, what used to be the main road and rail connections between Nakhichevan and Azerbaijan proper.", "The rail line also was the only direct connection between the former USSR and Iran."], "mentions": [{"id": 0, "start": 52, "end": 67, "ref_url": "Ararat_Province", "ref_ids": ["470497"], "sent_idx": 0}, {"id": 1, "start": 20, "end": 27, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 1}, {"id": 2, "start": 63, "end": 73, "ref_url": "Azerbaijan", "ref_ids": ["746"], "sent_idx": 1}, {"id": 3, "start": 87, "end": 98, "ref_url": "Nakhichivan_Autonomous_Republic", "ref_ids": null, "sent_idx": 1}]} +{"id": "18271588", "title": "Yeraskhahun", "sentences": ["Yeraskhahun (, until 1950, Kuru-Araz) is a village in the Armavir Province of Armenia.", "Half of the population (around 930) of the village are Yazidis."], "mentions": [{"id": 0, "start": 58, "end": 74, "ref_url": "Armavir_Province", "ref_ids": ["470500"], "sent_idx": 0}, {"id": 1, "start": 78, "end": 85, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}, {"id": 2, "start": 55, "end": 62, "ref_url": "Yazidis_in_Armenia", "ref_ids": ["6878423"], "sent_idx": 1}]} +{"id": "18271590", "title": "Yerazgavors, Armenia", "sentences": ["Yerazgavors (, formerly \"Aralykh\") is a village in the Shirak Province of Armenia.", "It is named after the historic name of the ancient city of Shirakavan.", "The current village of Yerazgavors is located a few kilometers east of the historic city of Shirakavan-Yerazgavors.."], "mentions": [{"id": 0, "start": 55, "end": 70, "ref_url": "Shirak_Province", "ref_ids": ["470511"], "sent_idx": 0}, {"id": 1, "start": 74, "end": 81, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}, {"id": 2, "start": 92, "end": 102, "ref_url": "Shirakavan_(ancient_city)", "ref_ids": ["8547383"], "sent_idx": 2}]} +{"id": "18271595", "title": "Yeremes", "sentences": ["Yeremes (also, Irimis and Aramis) is a town in the Syunik Province of Armenia."], "mentions": [{"id": 0, "start": 51, "end": 66, "ref_url": "Syunik_Region", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 70, "end": 77, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18271597", "title": "Yerizak", "sentences": ["Yerizak (; formerly, Ibish), is an abandoned village in the Shirak Province of Armenia."], "mentions": [{"id": 0, "start": 60, "end": 75, "ref_url": "Shirak_Province", "ref_ids": ["470511"], "sent_idx": 0}, {"id": 1, "start": 79, "end": 86, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18271599", "title": "Yernjatap", "sentences": ["Yernjatap (, also Romanized as Yernjatap’, Yerinjatap, Yerndzhatap, Yerindzhatap, and Ernjatap; until 1949, Karabulag and Ghrabulagh) is a town in the Aragatsotn Province of Armenia."], "mentions": [{"id": 0, "start": 18, "end": 26, "ref_url": "Romanize", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 151, "end": 170, "ref_url": "Aragatsotn", "ref_ids": null, "sent_idx": 0}, {"id": 2, "start": 174, "end": 181, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18271601", "title": "Zangakatun", "sentences": ["Zangakatun (; until 1948, Chanakhchi and Russified as Nizhniye Chanakhchi, from 1948-1992, Sovetashen) is a village in the Ararat Province of Armenia.", "Zangakatun is the birthplace and burial site of Paruyr Sevak; his house is a museum.", "The village is also home to a 10th-century chapel."], "mentions": [{"id": 0, "start": 123, "end": 138, "ref_url": "Ararat_Province", "ref_ids": ["470497"], "sent_idx": 0}, {"id": 1, "start": 142, "end": 149, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}, {"id": 2, "start": 48, "end": 60, "ref_url": "Paruyr_Sevak", "ref_ids": ["4680249"], "sent_idx": 1}]} +{"id": "18271602", "title": "Sultanzade Sabahaddin", "sentences": ["Prince Sabahaddin de Neuchâtel (born Sultanzade Mehmed Sabâhaddin; 13 February 1877 in Constantinople — 30 June 1948 in Neuchâtel, Switzerland) was an Ottoman sociologist and thinker.", "Because of his threat to the ruling House of Osman (the Ottoman dynasty), of which he was a member, in the late 19th and early 20th centuries due to his political activity and push for democracy in the Empire, he was exiled.", "Although part of the ruling Ottoman dynasty himself, through his mother, Sultanzade Sabahaddin was known as a Young Turk and thus opposed to the absolute rule of the dynasty.", "As a follower of Émile Durkheim, Prince Sabahaddin is considered to be one of the founders of sociology in Turkey.", "He established the Private Enterprise and Decentralization Association () in 1902."], "mentions": [{"id": 0, "start": 73, "end": 83, "ref_url": "Sultanzade", "ref_ids": ["36191734"], "sent_idx": 2}, {"id": 1, "start": 87, "end": 101, "ref_url": "Istanbul", "ref_ids": ["3391396"], "sent_idx": 0}, {"id": 2, "start": 21, "end": 30, "ref_url": "Neuchâtel", "ref_ids": ["494581"], "sent_idx": 0}, {"id": 3, "start": 131, "end": 142, "ref_url": "Switzerland", "ref_ids": ["26748"], "sent_idx": 0}, {"id": 4, "start": 28, "end": 35, "ref_url": "Ottoman_Empire", "ref_ids": ["22278"], "sent_idx": 2}, {"id": 5, "start": 159, "end": 170, "ref_url": "Sociology", "ref_ids": ["18717981"], "sent_idx": 0}, {"id": 6, "start": 36, "end": 50, "ref_url": "House_of_Osman", "ref_ids": null, "sent_idx": 1}, {"id": 7, "start": 110, "end": 120, "ref_url": "Young_Turk", "ref_ids": null, "sent_idx": 2}, {"id": 8, "start": 17, "end": 31, "ref_url": "Émile_Durkheim", "ref_ids": ["38218"], "sent_idx": 3}, {"id": 9, "start": 94, "end": 113, "ref_url": "Sociology_in_Turkey", "ref_ids": ["43534183"], "sent_idx": 3}, {"id": 10, "start": 19, "end": 70, "ref_url": "Private_Enterprise_and_Decentralization_Association", "ref_ids": null, "sent_idx": 4}]} +{"id": "18271605", "title": "Zarishat", "sentences": ["Zarishat (; formerly, \"Qoncalı\") is a village in the Shirak Province of Armenia."], "mentions": [{"id": 0, "start": 53, "end": 68, "ref_url": "Shirak_Province", "ref_ids": ["470511"], "sent_idx": 0}, {"id": 1, "start": 72, "end": 79, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18271607", "title": "Zarinja", "sentences": ["Zarinja (; also Romanized as Zarindzha, Zarndzha, and Zarnja) is a town in the Aragatsotn Province of Armenia.", "The town contains the seventh-century church of Saint Khach, rebuilt in the tenth century."], "mentions": [{"id": 0, "start": 16, "end": 24, "ref_url": "Romanize", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 79, "end": 98, "ref_url": "Aragatsotn", "ref_ids": null, "sent_idx": 0}, {"id": 2, "start": 102, "end": 109, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18271609", "title": "Zar, Armenia", "sentences": ["Zar (), is a village in the Kotayk Province of Armenia."], "mentions": [{"id": 0, "start": 28, "end": 43, "ref_url": "Kotayk_Province", "ref_ids": ["470504"], "sent_idx": 0}, {"id": 1, "start": 47, "end": 54, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18271617", "title": "Zaritap", "sentences": ["Zaritap (; also Romanized as Zarrit’ap’ and Zarritap; formerly Azizbekov, Pashalu and Pashaghu) is a village in the Vayots Dzor Province of Armenia.", "For a time the village was renamed in honor of Meshadi Azizbekov, an early Bolshevik and one of the 26 Baku Commissars.", "In the vicinity are 13th-century khachkars and the traces of an old fort."], "mentions": [{"id": 0, "start": 16, "end": 24, "ref_url": "Romanize", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 116, "end": 136, "ref_url": "Vayots_Dzor", "ref_ids": null, "sent_idx": 0}, {"id": 2, "start": 140, "end": 147, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}, {"id": 3, "start": 47, "end": 64, "ref_url": "Meshadi_Azizbekov", "ref_ids": ["3588332"], "sent_idx": 1}, {"id": 4, "start": 100, "end": 118, "ref_url": "26_Baku_Commissars", "ref_ids": ["7855935"], "sent_idx": 1}, {"id": 5, "start": 33, "end": 41, "ref_url": "Khachkar", "ref_ids": ["1100953"], "sent_idx": 2}]} +{"id": "18271619", "title": "Zartonk", "sentences": ["Zartonk (), is a village in the Armavir Province of Armenia.", "Almost 42% (around 980 individuals) of the population are from the Yazidi minority."], "mentions": [{"id": 0, "start": 32, "end": 48, "ref_url": "Armavir_Province", "ref_ids": ["470500"], "sent_idx": 0}, {"id": 1, "start": 52, "end": 59, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}, {"id": 2, "start": 67, "end": 73, "ref_url": "Yazidis_in_Armenia", "ref_ids": ["6878423"], "sent_idx": 1}]} +{"id": "18271622", "title": "Zedea", "sentences": ["Zedea (; formerly, Zeyta and Zeita) is a small mountain hamlet in the Vayots Dzor Province of Armenia.", "There are a few khachkars in the vicinity."], "mentions": [{"id": 0, "start": 56, "end": 62, "ref_url": "Hamlet_(place)", "ref_ids": ["396466"], "sent_idx": 0}, {"id": 1, "start": 70, "end": 90, "ref_url": "Vayots_Dzor_Province", "ref_ids": ["470518"], "sent_idx": 0}, {"id": 2, "start": 94, "end": 101, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}, {"id": 3, "start": 16, "end": 24, "ref_url": "Khachkar", "ref_ids": ["1100953"], "sent_idx": 1}]} +{"id": "18271623", "title": "Leak Bros", "sentences": ["Leak Bros is an American hip hop duo consisting of New York rapper Cage and New Jersey rapper Tame One."], "mentions": [{"id": 0, "start": 25, "end": 32, "ref_url": "Hip_hop", "ref_ids": ["2632114"], "sent_idx": 0}, {"id": 1, "start": 51, "end": 59, "ref_url": "New_York_City", "ref_ids": ["645042"], "sent_idx": 0}, {"id": 2, "start": 67, "end": 71, "ref_url": "Cage_(rapper)", "ref_ids": ["2326624"], "sent_idx": 0}, {"id": 3, "start": 76, "end": 86, "ref_url": "New_Jersey", "ref_ids": ["21648"], "sent_idx": 0}, {"id": 4, "start": 94, "end": 102, "ref_url": "Tame_One", "ref_ids": ["12157161"], "sent_idx": 0}]} +{"id": "18271626", "title": "Zeyva", "sentences": ["Zeyva or Dzeyva or Zeyvə may refer to:"], "mentions": []} +{"id": "18271628", "title": "Mayisyan, Armavir", "sentences": ["Mayisyan (), is a village in the Armavir Province of Armenia.", "It was founded as a collective farm (sovkhoz) and named \"Sovkhoz No.2\".", "Later it became to be known as \"Imeni Beriya\", named after Lavrentiy Beria.", "In 1953, it was renamed \"Imeni Zhdanova\"or \"Zhdanov\" after Andrei Zhdanov.", "In 2006, the village was renamed \"Mayisyan\", in memory of the Armenian victory over the Turks during the battles of Abaran and Sardarabad in May 1918."], "mentions": [{"id": 0, "start": 33, "end": 49, "ref_url": "Armavir_Province", "ref_ids": ["470500"], "sent_idx": 0}, {"id": 1, "start": 62, "end": 69, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 4}, {"id": 2, "start": 37, "end": 44, "ref_url": "Sovkhoz", "ref_ids": ["645495"], "sent_idx": 1}, {"id": 3, "start": 59, "end": 74, "ref_url": "Lavrentiy_Beria", "ref_ids": ["18390"], "sent_idx": 2}, {"id": 4, "start": 59, "end": 73, "ref_url": "Andrei_Zhdanov", "ref_ids": ["524393"], "sent_idx": 3}, {"id": 5, "start": 116, "end": 122, "ref_url": "Battle_of_Abaran", "ref_ids": ["5522133"], "sent_idx": 4}, {"id": 6, "start": 127, "end": 137, "ref_url": "Battle_of_Sardarabad", "ref_ids": ["1828183"], "sent_idx": 4}]} +{"id": "18271637", "title": "Zhdanov, Lori", "sentences": ["Zhdanov (also, Kishlag) is a town in the Lori Province of Armenia.", "The town was renamed in honor of Andrei Zhdanov."], "mentions": [{"id": 0, "start": 41, "end": 54, "ref_url": "Lori_Province", "ref_ids": ["470506"], "sent_idx": 0}, {"id": 1, "start": 58, "end": 65, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}, {"id": 2, "start": 33, "end": 47, "ref_url": "Andrei_Zhdanov", "ref_ids": ["524393"], "sent_idx": 1}]} +{"id": "18271644", "title": "Zorak, Armenia", "sentences": ["Zorak (), is a village in the Ararat Province of Armenia."], "mentions": [{"id": 0, "start": 30, "end": 45, "ref_url": "Ararat_Province", "ref_ids": ["470497"], "sent_idx": 0}, {"id": 1, "start": 49, "end": 56, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18271646", "title": "Sleuth (video game)", "sentences": ["Sleuth is a text-based \"whodunit\" video game created by Eric N. Miller of Norland Software (now defunct).", "It was first released in 1983."], "mentions": [{"id": 0, "start": 24, "end": 32, "ref_url": "Whodunit", "ref_ids": ["69923"], "sent_idx": 0}, {"id": 1, "start": 34, "end": 44, "ref_url": "Video_game", "ref_ids": ["5363"], "sent_idx": 0}]} +{"id": "18271650", "title": "Zorakert", "sentences": ["Zorakert (; formerly,Balikli) is a village in the Shirak Province of Armenia."], "mentions": [{"id": 0, "start": 50, "end": 65, "ref_url": "Shirak_Province", "ref_ids": ["470511"], "sent_idx": 0}, {"id": 1, "start": 69, "end": 76, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18271655", "title": "Zoravan", "sentences": ["Zoravan (; formerly, Ghargavank and Pokravan; historically and prior to 1972-80, Khacho) is a village situated along the lower slopes of Mount Ara in the Kotayk Province of Armenia.", "The village was established in 1972-80, during which time it was called Pokravan, for the purpose of developing a large stockyard or feedlot that would ultimately be utilized to breed a target of eleven-thousand animals.", "After reaching capacity, the village was renamed to Zoravan after the nearby Zoravar Church, also known as Gharghavank, built between 661 and 685 by Prince Grigor Mamikonian.", "The community currently has a school and a kindergarten.", "Drinking water for the village comes from a source near Karenis, while water for irrigation comes from Lake Sevan."], "mentions": [{"id": 0, "start": 137, "end": 146, "ref_url": "Mount_Ara", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 154, "end": 169, "ref_url": "Kotayk_Province", "ref_ids": ["470504"], "sent_idx": 0}, {"id": 2, "start": 173, "end": 180, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}, {"id": 3, "start": 133, "end": 140, "ref_url": "Feedlot", "ref_ids": ["560807"], "sent_idx": 1}, {"id": 4, "start": 107, "end": 118, "ref_url": "Gharghavank", "ref_ids": ["24866252"], "sent_idx": 2}, {"id": 5, "start": 156, "end": 173, "ref_url": "Grigor_Mamikonian", "ref_ids": null, "sent_idx": 2}, {"id": 6, "start": 0, "end": 14, "ref_url": "Drinking_water", "ref_ids": ["198725"], "sent_idx": 4}, {"id": 7, "start": 56, "end": 63, "ref_url": "Karenis", "ref_ids": ["18260237"], "sent_idx": 4}, {"id": 8, "start": 81, "end": 91, "ref_url": "Irrigation", "ref_ids": ["42261"], "sent_idx": 4}, {"id": 9, "start": 103, "end": 113, "ref_url": "Lake_Sevan", "ref_ids": ["792215"], "sent_idx": 4}]} +{"id": "18271656", "title": "Juneau Police Department", "sentences": ["The Juneau Police Department (JPD) is a law enforcement agency which serves Juneau, Alaska.", "The department consists of two divisions: Administrative Support Services and Operations.", "Within these divisions there are five units: Patrol, Investigations, Community Service, Records and Dispatch.", "The department includes specialists in SWAT, bomb disposal and hostage negotiation.", "The Chief is Ed Mercer, the Capital's First Alaskan Native Police Chief.", "In April 2008, the police reversed its previous policy of not reporting rapes as sex crimes in its daily briefing report.", "In July 2008, the police department reported an increase in robberies.", "Juneau Crime Line, a non-profit organization, offers rewards for anonymous crime tips in cooperation with the police department.", "On December 1, 2009 Juneau PD became the first agency accredited by Alaska Law Enforcement Agency Accreditation Commission ([ALEAAC]http://www.aacop.org/ALEAAC.htm)."], "mentions": [{"id": 0, "start": 40, "end": 62, "ref_url": "Police_force", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 20, "end": 26, "ref_url": "Juneau,_Alaska", "ref_ids": ["87469"], "sent_idx": 8}, {"id": 2, "start": 68, "end": 74, "ref_url": "Alaska", "ref_ids": ["624"], "sent_idx": 8}, {"id": 3, "start": 39, "end": 43, "ref_url": "SWAT", "ref_ids": ["146702"], "sent_idx": 3}, {"id": 4, "start": 4, "end": 9, "ref_url": "Chief_of_police", "ref_ids": ["764281"], "sent_idx": 4}]} +{"id": "18271659", "title": "Zovaber, Gegharkunik", "sentences": ["Zovaber (; formerly, Yaydzhi or Yayji) is a village in the Gegharkunik Province of Armenia.", "It was founded in 1830 by emigrants from Maku.", "The village has a church of S. Stepanos, built in 1860."], "mentions": [{"id": 0, "start": 59, "end": 79, "ref_url": "Gegharkunik_Province", "ref_ids": ["470502"], "sent_idx": 0}, {"id": 1, "start": 83, "end": 90, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18271662", "title": "Zovaber, Syunik", "sentences": ["Zovaber (; also, Yaydzhi, Yaudzhi, and Yayji) is a town in the Syunik Province of Armenia."], "mentions": [{"id": 0, "start": 63, "end": 78, "ref_url": "Syunik_Region", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 82, "end": 89, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18271664", "title": "Zovasar", "sentences": ["Zovasar (; until 1978, Aghakchik), is a village in the Aragatsotn Province of Armenia."], "mentions": [{"id": 0, "start": 55, "end": 74, "ref_url": "Aragatsotn_Province", "ref_ids": ["470494"], "sent_idx": 0}, {"id": 1, "start": 78, "end": 85, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18271670", "title": "Uhrenmuseum Beyer", "sentences": ["The Uhrenmuseum Beyer (Beyer Watch and Clock Museum) is located in the heart of the city of Zürich, Switzerland and is one of the world's leading private museums dedicated to horology.", "The museum is located on the lower level of Bahnhofstrasse 31, the main shopping boulevard.", "It is affiliated with Chronometrie Beyer, a high grade watch retailer run by the same family for generations.", "The core of the museum was acquired during the life of Theodore 'Teddy' Beyer, a pioneer in collecting antique timekeepers.", "The collection is made up of premechanical timekeepers (sundials, sandglasses, water and fire clocks) as well as clocks and watches from around the world and covering all eras.", "The collection is particularly strong regarding early clocks and watches, including several pieces from the gothic and renaissance era, as well as complicated pieces with many complications.", "Many of the displayed pieces are unique and/or significant in the history of watchmaking, and therefore are often loaned out to major museums around the world.", "The collection includes one of the early marine chronometers by Ferdinand Berthoud, a pendule sympathique by Breguet, a pocket watch with astronomical indications by Auch, several bespoke late 20th century watches by George Daniels, one of the few reproductions of the astrarium by De Dondi to name just a few highlights.", "Furthermore, there are superb Geneva made enameled pocket watches, and a most instructive timeline illustrating the history of the Neuchâtel pendule.", "Additionally there is a good small display of locally made clocks and watches including such Zurich makers as Bachoffner, Liechti and Ochsner.", "Similar museums:"], "mentions": [{"id": 0, "start": 29, "end": 34, "ref_url": "Watch", "ref_ids": ["60883"], "sent_idx": 0}, {"id": 1, "start": 39, "end": 44, "ref_url": "Clock", "ref_ids": ["6449"], "sent_idx": 0}, {"id": 2, "start": 92, "end": 98, "ref_url": "Zürich", "ref_ids": ["40334603"], "sent_idx": 0}, {"id": 3, "start": 100, "end": 111, "ref_url": "Switzerland", "ref_ids": ["26748"], "sent_idx": 0}, {"id": 4, "start": 8, "end": 14, "ref_url": "Museum", "ref_ids": ["37585"], "sent_idx": 10}, {"id": 5, "start": 175, "end": 183, "ref_url": "Horology", "ref_ids": ["332592"], "sent_idx": 0}, {"id": 6, "start": 108, "end": 114, "ref_url": "Gothic_art", "ref_ids": ["894938"], "sent_idx": 5}, {"id": 7, "start": 119, "end": 130, "ref_url": "Renaissance", "ref_ids": ["25532"], "sent_idx": 5}, {"id": 8, "start": 176, "end": 189, "ref_url": "Complication_(horology)", "ref_ids": ["1696678"], "sent_idx": 5}, {"id": 9, "start": 41, "end": 59, "ref_url": "Marine_chronometer", "ref_ids": ["10553773"], "sent_idx": 7}, {"id": 10, "start": 64, "end": 82, "ref_url": "Ferdinand_Berthoud", "ref_ids": ["1177324"], "sent_idx": 7}, {"id": 11, "start": 109, "end": 116, "ref_url": "Breguet_(watch)", "ref_ids": null, "sent_idx": 7}, {"id": 12, "start": 217, "end": 231, "ref_url": "George_Daniels_(watchmaker)", "ref_ids": ["3586326"], "sent_idx": 7}, {"id": 13, "start": 269, "end": 278, "ref_url": "Astrarium", "ref_ids": ["6296791"], "sent_idx": 7}, {"id": 14, "start": 282, "end": 290, "ref_url": "Giovanni_Dondi_dell'Orologio", "ref_ids": ["7105875"], "sent_idx": 7}, {"id": 15, "start": 30, "end": 36, "ref_url": "Geneva", "ref_ids": ["12521"], "sent_idx": 8}, {"id": 16, "start": 42, "end": 50, "ref_url": "Vitreous_enamel", "ref_ids": ["314629"], "sent_idx": 8}, {"id": 17, "start": 51, "end": 63, "ref_url": "Pocket_watch", "ref_ids": ["767894"], "sent_idx": 8}, {"id": 18, "start": 131, "end": 140, "ref_url": "Neuchâtel", "ref_ids": ["494581"], "sent_idx": 8}]} +{"id": "18271678", "title": "C. americanus", "sentences": ["C. americanus may refer to:"], "mentions": []} +{"id": "18271681", "title": "Zovashen (Dzhannatlu), Ararat", "sentences": ["Zovashen (also, Dzhannatlu) is a town in the Ararat Province of Armenia."], "mentions": [{"id": 0, "start": 45, "end": 60, "ref_url": "Ararat_(province)", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 64, "end": 71, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18271689", "title": "SunTrust Financial Centre", "sentences": ["The SunTrust Financial Centre is a 525 ft (160m) skyscraper in Tampa, Florida.", "It was completed in 1992 and has 36 floors.", "Cooper Carry designed the building, which is the 4th tallest in Tampa.", "It was designed to take a 110 mph wind load.", "The iconic pyramid roof has variable lighting set to the season or events.", "Website -"], "mentions": [{"id": 0, "start": 49, "end": 59, "ref_url": "Skyscraper", "ref_ids": ["29485"], "sent_idx": 0}, {"id": 1, "start": 64, "end": 69, "ref_url": "Tampa", "ref_ids": null, "sent_idx": 2}, {"id": 2, "start": 70, "end": 77, "ref_url": "Florida", "ref_ids": ["18933066"], "sent_idx": 0}, {"id": 3, "start": 0, "end": 12, "ref_url": "Cooper_Carry", "ref_ids": ["8208218"], "sent_idx": 2}]} +{"id": "18271692", "title": "Zovashen (Keshishveran), Ararat", "sentences": ["Zovashen (also, Keshishveran) is a town in the Ararat Province of Armenia."], "mentions": [{"id": 0, "start": 47, "end": 62, "ref_url": "Ararat_(province)", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 66, "end": 73, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18271710", "title": "Zovashen, Ararat", "sentences": ["Zovashen, Ararat may refer to:"], "mentions": []} +{"id": "18271712", "title": "Tala, Bhutan", "sentences": ["Tala is a town in Chukha District in southwestern Bhutan, known for the Tala Hydroelectricity Project."], "mentions": [{"id": 0, "start": 10, "end": 14, "ref_url": "Town", "ref_ids": ["52911"], "sent_idx": 0}, {"id": 1, "start": 18, "end": 33, "ref_url": "Chukha_District", "ref_ids": ["766285"], "sent_idx": 0}, {"id": 2, "start": 50, "end": 56, "ref_url": "Bhutan", "ref_ids": ["2421391"], "sent_idx": 0}, {"id": 3, "start": 72, "end": 101, "ref_url": "Tala_Hydroelectricity_Project", "ref_ids": null, "sent_idx": 0}]} +{"id": "18271719", "title": "Zovashen, Kotayk", "sentences": ["Zovashen (, formerly, \"Dallaklu\") is a village in the Kotayk Province of Armenia."], "mentions": [{"id": 0, "start": 54, "end": 69, "ref_url": "Kotayk_Province", "ref_ids": ["470504"], "sent_idx": 0}, {"id": 1, "start": 73, "end": 80, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18271721", "title": "Zovk", "sentences": ["Zovk (, formerly Kyulludzha), is a village in the Kotayk Province of Armenia."], "mentions": [{"id": 0, "start": 50, "end": 65, "ref_url": "Kotayk_Province", "ref_ids": ["470504"], "sent_idx": 0}, {"id": 1, "start": 69, "end": 76, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18271726", "title": "Zovuni, Aragatsotn", "sentences": ["Zovuni (also, Molla Kasum), is an abandoned village in the Aragatsotn Province of Armenia.", "It was formed during the 1828 by Armenian migrants from the village of Akori in Western Armenia.", "The villages was abandoned in 1965 and its ruins now lie beneath the Aparan Reservoir.", "The settlement is famous for the mausoleum of Vardan Mamikonian, the Poghos-Petros Church and the Tukh Manuk shrine.", "The Zovuni cemetery sits to the east of the church, across a ravine.", "The monuments sit in close proximity to the village of Jrambar.", "After the abandonment of the village, the population was moved to a new village called Zovuni, built at the northwestern suburbs of Yerevan, currently part of the Kotayk Province."], "mentions": [{"id": 0, "start": 59, "end": 78, "ref_url": "Aragatsotn_Province", "ref_ids": ["470494"], "sent_idx": 0}, {"id": 1, "start": 33, "end": 40, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 1}, {"id": 2, "start": 80, "end": 95, "ref_url": "Western_Armenia", "ref_ids": ["3492582"], "sent_idx": 1}, {"id": 3, "start": 33, "end": 42, "ref_url": "Mausoleum", "ref_ids": ["231168"], "sent_idx": 3}, {"id": 4, "start": 46, "end": 63, "ref_url": "Vardan_Mamikonian", "ref_ids": ["3067351"], "sent_idx": 3}, {"id": 5, "start": 69, "end": 89, "ref_url": "Saint_Paul_and_Peter_Church,_Zovuni", "ref_ids": null, "sent_idx": 3}, {"id": 6, "start": 55, "end": 62, "ref_url": "Jrambar", "ref_ids": ["18257115"], "sent_idx": 5}, {"id": 7, "start": 87, "end": 93, "ref_url": "Zovuni", "ref_ids": ["16051784"], "sent_idx": 6}, {"id": 8, "start": 132, "end": 139, "ref_url": "Yerevan", "ref_ids": ["34352"], "sent_idx": 6}, {"id": 9, "start": 163, "end": 178, "ref_url": "Kotayk_Province", "ref_ids": ["470504"], "sent_idx": 6}]} +{"id": "18271731", "title": "List of Minnesota North Stars draft picks", "sentences": ["This is a complete list of ice hockey players who were drafted in the National Hockey League Entry Draft by the Minnesota North Stars franchise.", "It includes every player the franchise drafted prior to their move to Dallas, from 1967 to 1992, regardless of whether they played for the team."], "mentions": [{"id": 0, "start": 70, "end": 92, "ref_url": "National_Hockey_League", "ref_ids": ["21809"], "sent_idx": 0}, {"id": 1, "start": 93, "end": 104, "ref_url": "NHL_Entry_Draft", "ref_ids": ["858982"], "sent_idx": 0}, {"id": 2, "start": 112, "end": 133, "ref_url": "Minnesota_North_Stars", "ref_ids": ["270840"], "sent_idx": 0}, {"id": 3, "start": 70, "end": 76, "ref_url": "Dallas_Stars", "ref_ids": ["73135"], "sent_idx": 1}, {"id": 4, "start": 83, "end": 87, "ref_url": "1967_NHL_Amateur_Draft", "ref_ids": ["956350"], "sent_idx": 1}, {"id": 5, "start": 91, "end": 95, "ref_url": "1992_NHL_Entry_Draft", "ref_ids": ["859594"], "sent_idx": 1}]} +{"id": "18271733", "title": "Zuygaghbyur", "sentences": ["Zuygaghbyur (, also Romanized as Zuygaghpyur and Zuygakhpyur; formerly, \"Chiftali\") is a village in the Shirak Province of Armenia."], "mentions": [{"id": 0, "start": 20, "end": 28, "ref_url": "Romanize", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 104, "end": 119, "ref_url": "Shirak_Province", "ref_ids": ["470511"], "sent_idx": 0}, {"id": 2, "start": 123, "end": 130, "ref_url": "Armenia", "ref_ids": ["10918072"], "sent_idx": 0}]} +{"id": "18271735", "title": "Northern Co-operative Society", "sentences": ["Northern Co-operative Society Limited( abbreviated to Norco), previously named Northern Co-operative Company Limited, was a local consumer co-operative trading in Aberdeen, Scotland, from 1861 to 1993.", "It operated supermarkets and other businesses throughout Aberdeenshire, and employed 2000 people in 1992.", "It was put into the hands of a receiver for liquidation in 1993, as a result of financial difficulties that\" The Guardian\" newspaper attributed to\" an over- ambitious building and development programme\" and failed attempts to dispose of the entire business as a going concern, despite the successful sale of its dairy, five pharmacies, and then several supermarkets to Argyll Stores and the Co-operative Wholesale Society( CWS)."], "mentions": [{"id": 0, "start": 130, "end": 151, "ref_url": "Consumer_co-operative", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 163, "end": 171, "ref_url": "Aberdeen", "ref_ids": ["1627"], "sent_idx": 0}, {"id": 2, "start": 173, "end": 181, "ref_url": "Scotland", "ref_ids": ["26994"], "sent_idx": 0}, {"id": 3, "start": 31, "end": 39, "ref_url": "Receiver_(legal)", "ref_ids": null, "sent_idx": 2}, {"id": 4, "start": 44, "end": 55, "ref_url": "Liquidation", "ref_ids": ["565034"], "sent_idx": 2}, {"id": 5, "start": 262, "end": 275, "ref_url": "Going_concern", "ref_ids": ["3423312"], "sent_idx": 2}, {"id": 6, "start": 369, "end": 382, "ref_url": "Argyll_Stores", "ref_ids": null, "sent_idx": 2}, {"id": 7, "start": 391, "end": 421, "ref_url": "The_Co-operative_Group", "ref_ids": ["1434174"], "sent_idx": 2}]} +{"id": "18271760", "title": "Bachap", "sentences": ["Bachap is a town in Dagana District in southwestern Bhutan."], "mentions": [{"id": 0, "start": 12, "end": 16, "ref_url": "Town", "ref_ids": ["52911"], "sent_idx": 0}, {"id": 1, "start": 20, "end": 35, "ref_url": "Dagana_District", "ref_ids": ["766289"], "sent_idx": 0}, {"id": 2, "start": 52, "end": 58, "ref_url": "Bhutan", "ref_ids": ["2421391"], "sent_idx": 0}]} +{"id": "18271775", "title": "Chalaika", "sentences": ["Chalaika is a town in Dagana District in southwestern Bhutan."], "mentions": [{"id": 0, "start": 14, "end": 18, "ref_url": "Town", "ref_ids": ["52911"], "sent_idx": 0}, {"id": 1, "start": 22, "end": 37, "ref_url": "Dagana_District", "ref_ids": ["766289"], "sent_idx": 0}, {"id": 2, "start": 54, "end": 60, "ref_url": "Bhutan", "ref_ids": ["2421391"], "sent_idx": 0}]} +{"id": "18271788", "title": "Daga, Bhutan", "sentences": ["Daga , also officially referred to as Dagana, is a town in Goshi Gewog, Dagana District in southwestern Bhutan.", "It is the administrative capital, Dzongkhag Thromde, of the district.", "In 2005, Daga had a population of 1,146."], "mentions": [{"id": 0, "start": 51, "end": 55, "ref_url": "Town", "ref_ids": ["52911"], "sent_idx": 0}, {"id": 1, "start": 59, "end": 70, "ref_url": "Goshi_Gewog", "ref_ids": ["30019568"], "sent_idx": 0}, {"id": 2, "start": 72, "end": 87, "ref_url": "Dagana_District", "ref_ids": ["766289"], "sent_idx": 0}, {"id": 3, "start": 104, "end": 110, "ref_url": "Bhutan", "ref_ids": ["2421391"], "sent_idx": 0}, {"id": 4, "start": 44, "end": 51, "ref_url": "Thromde", "ref_ids": ["30636280"], "sent_idx": 1}]} +{"id": "18271789", "title": "Citizens Bank Building (Tampa, Florida)", "sentences": ["Citizens Bank Building was a building in Tampa, Florida.", "It is considered Tampa's first high rise and it was 145 ft (44m) tall.", "The building was completed in 1913 with 10 stories, demolished in 1978, by which time two more floors had been added and had 12 floors.", "Tampa architect Fred J. James had an office in the building."], "mentions": [{"id": 0, "start": 0, "end": 5, "ref_url": "Tampa", "ref_ids": null, "sent_idx": 3}, {"id": 1, "start": 48, "end": 55, "ref_url": "Florida", "ref_ids": ["18933066"], "sent_idx": 0}, {"id": 2, "start": 16, "end": 29, "ref_url": "Fred_J._James", "ref_ids": ["41226121"], "sent_idx": 3}]} +{"id": "18271793", "title": "Covarrubias (surname)", "sentences": ["Covarrubias is a surname in the Spanish language.", "It is a surname that indicates place of origin, in particular, in the village and municipality of Covarrubias (province of Burgos, Spain), village that was founded in the 7th century by the visigothic king Chindasuinth.", "It is compounded of the words \"cova\" (cave), \"ruber\" (red) and the suffix \"ia\" (that has the quality of), meaning \"the cave that is red\" or simply \"red cave\"."], "mentions": [{"id": 0, "start": 8, "end": 15, "ref_url": "Surname", "ref_ids": ["72243"], "sent_idx": 1}, {"id": 1, "start": 32, "end": 48, "ref_url": "Spanish_language", "ref_ids": ["26825"], "sent_idx": 0}, {"id": 2, "start": 98, "end": 109, "ref_url": "Covarrubias,_Spain", "ref_ids": null, "sent_idx": 1}, {"id": 3, "start": 111, "end": 129, "ref_url": "Province_of_Burgos", "ref_ids": ["143642"], "sent_idx": 1}, {"id": 4, "start": 131, "end": 136, "ref_url": "Spain", "ref_ids": ["26667"], "sent_idx": 1}, {"id": 5, "start": 190, "end": 200, "ref_url": "Visigothic", "ref_ids": null, "sent_idx": 1}, {"id": 6, "start": 206, "end": 218, "ref_url": "Chindasuinth", "ref_ids": ["30864826"], "sent_idx": 1}, {"id": 7, "start": 67, "end": 73, "ref_url": "Suffix", "ref_ids": ["5564386"], "sent_idx": 2}]} +{"id": "18271798", "title": "Telling Right From Wrong", "sentences": ["Telling Right From Wrong is a book by Timothy J. Cooney.", "Cooney was not able to get his first two books published, so in an attempt to legitimize his third book, he submitted a falsified letter to Random House.", "They ultimately reversed their plan to publish the book after learning of the true origins of the letter.", "\"Telling Right From Wrong\" was later published by Prometheus Books."], "mentions": [{"id": 0, "start": 140, "end": 152, "ref_url": "Random_House", "ref_ids": ["182947"], "sent_idx": 1}, {"id": 1, "start": 50, "end": 66, "ref_url": "Prometheus_Books", "ref_ids": ["2216375"], "sent_idx": 3}]} +{"id": "18271800", "title": "Dam, Bhutan", "sentences": ["Dam, Bhutan is a town in Dagana District in southwestern Bhutan."], "mentions": [{"id": 0, "start": 17, "end": 21, "ref_url": "Town", "ref_ids": ["52911"], "sent_idx": 0}, {"id": 1, "start": 25, "end": 40, "ref_url": "Dagana_District", "ref_ids": ["766289"], "sent_idx": 0}, {"id": 2, "start": 5, "end": 11, "ref_url": "Bhutan", "ref_ids": ["2421391"], "sent_idx": 0}]} +{"id": "18271810", "title": "Pinsoperi", "sentences": ["Pinsoperi is a town in Dagana District in southwestern Bhutan."], "mentions": [{"id": 0, "start": 15, "end": 19, "ref_url": "Town", "ref_ids": ["52911"], "sent_idx": 0}, {"id": 1, "start": 23, "end": 38, "ref_url": "Dagana_District", "ref_ids": ["766289"], "sent_idx": 0}, {"id": 2, "start": 55, "end": 61, "ref_url": "Bhutan", "ref_ids": ["2421391"], "sent_idx": 0}]} +{"id": "18271821", "title": "Thumgaon", "sentences": ["Thumgaon is a town in Dagana District in southwestern Bhutan."], "mentions": [{"id": 0, "start": 14, "end": 18, "ref_url": "Town", "ref_ids": ["52911"], "sent_idx": 0}, {"id": 1, "start": 22, "end": 37, "ref_url": "Dagana_District", "ref_ids": ["766289"], "sent_idx": 0}, {"id": 2, "start": 54, "end": 60, "ref_url": "Bhutan", "ref_ids": ["2421391"], "sent_idx": 0}]} +{"id": "18271826", "title": "Maria Åkerblom", "sentences": ["Ida Maria Åkerblom (September 14, 1898 – February 25, 1981) was the leader of the Finnish Åkerblom Movement, an evangelical movement sometimes called a \"cult\".", "She had also served some time in prison.", "Her movement deemed her a prophet and began in the 1920s.", "Maria Åkerblom is characterized by Aarni Voipio as a \"sleeping preacher\", that is, a person who peaches in a state of trance."], "mentions": [{"id": 0, "start": 82, "end": 89, "ref_url": "Finland", "ref_ids": ["10577"], "sent_idx": 0}, {"id": 1, "start": 90, "end": 107, "ref_url": "Åkerblom_Movement", "ref_ids": ["20710515"], "sent_idx": 0}, {"id": 2, "start": 112, "end": 123, "ref_url": "Evangelical", "ref_ids": null, "sent_idx": 0}, {"id": 3, "start": 153, "end": 157, "ref_url": "Cult", "ref_ids": ["1008588"], "sent_idx": 0}, {"id": 4, "start": 33, "end": 39, "ref_url": "Prison", "ref_ids": ["19008450"], "sent_idx": 1}, {"id": 5, "start": 26, "end": 33, "ref_url": "Prophet", "ref_ids": ["24805"], "sent_idx": 2}, {"id": 6, "start": 54, "end": 71, "ref_url": "Sleeping_preacher", "ref_ids": ["54252969"], "sent_idx": 3}, {"id": 7, "start": 118, "end": 124, "ref_url": "Trance", "ref_ids": ["28867511"], "sent_idx": 3}]} +{"id": "18271831", "title": "Usak, Bhutan", "sentences": ["Usak is a town in Dagana District in southwestern Bhutan."], "mentions": [{"id": 0, "start": 10, "end": 14, "ref_url": "Town", "ref_ids": ["52911"], "sent_idx": 0}, {"id": 1, "start": 18, "end": 33, "ref_url": "Dagana_District", "ref_ids": ["766289"], "sent_idx": 0}, {"id": 2, "start": 50, "end": 56, "ref_url": "Bhutan", "ref_ids": ["2421391"], "sent_idx": 0}]} +{"id": "18271849", "title": "Isan people", "sentences": ["The Isan people (, , ; ) or Northeastern Thai people are an ethno-regional group native to Northeastern Thailand (\"Isan\") with an estimated population of about 22 million.", "Like Thais (Siamese) and Lao, they belong to the linguistic family of Tai peoples.", "In a broader sense, everyone who comes from the 20 northeastern provinces of Thailand may be called \"khon isan\".", "In the narrower sense, the term refers only to the ethnic Lao who make up the majority population in most parts of the region.", "Following the separation of Isan from the state of Laos, its integration into the Thai nation state and the central government's policy of \"Thaification\", they have developed a distinct regional identity that differs both from the Laotians of Laos and the Thais of Central Thailand.", "Alternative terms for this group are \"T(h)ai Isan\", \"Thai-Lao\", \"Lao Isan\", or \"Isan Lao\".", "Almost all inhabitants of Thailand's Northeast are Thai nationals.", "Yet a majority of them (approximately 80%) are ethnically Lao and speak a variant of the Lao language when at home (the Lao dialects spoken in Northeastern Thailand are summarized as Lao-Isan language).", "To avoid being subjected to derogatory stereotypes and perceptions associated with Lao-speaking people, most prefer to call themselves \"khon isan\"."], "mentions": [{"id": 0, "start": 60, "end": 65, "ref_url": "Ethnic_group", "ref_ids": ["105004"], "sent_idx": 0}, {"id": 1, "start": 186, "end": 194, "ref_url": "Regionalism_(politics)", "ref_ids": ["868284"], "sent_idx": 4}, {"id": 2, "start": 143, "end": 164, "ref_url": "Isan", "ref_ids": ["203860"], "sent_idx": 7}, {"id": 3, "start": 5, "end": 20, "ref_url": "Thai_people", "ref_ids": ["2670504"], "sent_idx": 1}, {"id": 4, "start": 83, "end": 86, "ref_url": "Lao_people", "ref_ids": ["197158"], "sent_idx": 8}, {"id": 5, "start": 70, "end": 81, "ref_url": "Tai_peoples", "ref_ids": ["32411786"], "sent_idx": 1}, {"id": 6, "start": 51, "end": 61, "ref_url": "Lao_people", "ref_ids": ["197158"], "sent_idx": 3}, {"id": 7, "start": 140, "end": 152, "ref_url": "Thaification", "ref_ids": ["879147"], "sent_idx": 4}, {"id": 8, "start": 89, "end": 101, "ref_url": "Lao_language", "ref_ids": ["140914"], "sent_idx": 7}, {"id": 9, "start": 183, "end": 200, "ref_url": "Isan_language", "ref_ids": ["501222"], "sent_idx": 7}]} +{"id": "18271855", "title": "Leon Despres", "sentences": ["Leon Mathis Despres (February 2, 1908 – May 6, 2009) was an American author, attorney and politician.", "He was best known as a long-time alderman in Chicago, where he regularly disagreed with then-mayor Richard J. Daley, often engaging in loud arguments and debates on the assembly floor."], "mentions": [{"id": 0, "start": 60, "end": 68, "ref_url": "Americans", "ref_ids": ["19792942"], "sent_idx": 0}, {"id": 1, "start": 69, "end": 75, "ref_url": "Author", "ref_ids": ["914"], "sent_idx": 0}, {"id": 2, "start": 77, "end": 85, "ref_url": "Lawyer", "ref_ids": ["17541"], "sent_idx": 0}, {"id": 3, "start": 90, "end": 100, "ref_url": "Politician", "ref_ids": ["55959"], "sent_idx": 0}, {"id": 4, "start": 33, "end": 41, "ref_url": "Alderman", "ref_ids": ["286925"], "sent_idx": 1}, {"id": 5, "start": 45, "end": 52, "ref_url": "Chicago", "ref_ids": ["6886"], "sent_idx": 1}, {"id": 6, "start": 99, "end": 115, "ref_url": "Richard_J._Daley", "ref_ids": ["40221"], "sent_idx": 1}]} +{"id": "18271859", "title": "Rawlston Masaniai", "sentences": ["Rawlston\" Rawley\" Masaniai( born May 13, 1983) is an American Samoan soccer player who plays as a midfielder.", "He was one of the first players from American Samoa to play at international level."], "mentions": [{"id": 0, "start": 53, "end": 67, "ref_url": "American_Samoa", "ref_ids": ["20611195"], "sent_idx": 0}, {"id": 1, "start": 69, "end": 75, "ref_url": "Association_football", "ref_ids": ["10568"], "sent_idx": 0}, {"id": 2, "start": 98, "end": 108, "ref_url": "Midfielder", "ref_ids": ["548981"], "sent_idx": 0}]} +{"id": "18271860", "title": "Laura Miller (journalist)", "sentences": ["Laura Miller is a Scottish broadcast journalist, currently working for BBC Scotland.", "Miller is the main presenter of \"Reporting Scotland\" and is also a presenter on \"The Nine\".", "She was the former main presenter for the East Central Scotland edition of \"STV News at Six\".", "Miller studied broadcast journalism at University College Falmouth in Cornwall, where she produced an award-winning documentary on the 2004 Asian tsunami.", "In 2007, she joined STV, as part of \"Scotland Today\".", "Miller took maternity leave in May 2016, replaced by Lucy Whyte acting as \"STV News at Six\" presenter in the East region.", "She returned to the Edinburgh newsdesk on 15 May 2017.", "In 2018, Miller made a small credited appearance in \"\", where she played a news reporter who was reporting on an alien attack on New York.", "Laura Miller became one of the main presenters of \"Reporting Scotland\" in October 2019.", "She fronts the programme Monday to Wednesday."], "mentions": [{"id": 0, "start": 71, "end": 83, "ref_url": "BBC_Scotland", "ref_ids": ["569201"], "sent_idx": 0}, {"id": 1, "start": 51, "end": 69, "ref_url": "Reporting_Scotland", "ref_ids": ["1065846"], "sent_idx": 8}, {"id": 2, "start": 81, "end": 89, "ref_url": "The_Nine_(BBC_Scotland)", "ref_ids": ["59936028"], "sent_idx": 1}, {"id": 3, "start": 75, "end": 90, "ref_url": "STV_News", "ref_ids": ["22081590"], "sent_idx": 5}, {"id": 4, "start": 39, "end": 66, "ref_url": "University_College_Falmouth", "ref_ids": null, "sent_idx": 3}, {"id": 5, "start": 70, "end": 78, "ref_url": "Cornwall", "ref_ids": ["5648"], "sent_idx": 3}, {"id": 6, "start": 135, "end": 153, "ref_url": "2004_Asian_tsunami", "ref_ids": null, "sent_idx": 3}, {"id": 7, "start": 37, "end": 51, "ref_url": "Scotland_Today", "ref_ids": ["3980806"], "sent_idx": 4}, {"id": 8, "start": 53, "end": 63, "ref_url": "Lucy_Whyte", "ref_ids": ["50521809"], "sent_idx": 5}, {"id": 9, "start": 51, "end": 69, "ref_url": "Reporting_Scotland", "ref_ids": ["1065846"], "sent_idx": 8}]} +{"id": "18271870", "title": "Gasa, Bhutan", "sentences": ["\"Not to be confused with\": Gasa Dzong \"or\" Gasa District\nGasa is a town near Gasa Dzong in Gasa District in northwestern Bhutan.", "At the 2005 census, its population was 3,116."], "mentions": [{"id": 0, "start": 27, "end": 37, "ref_url": "Gasa_Dzong", "ref_ids": ["7145241"], "sent_idx": 0}, {"id": 1, "start": 43, "end": 56, "ref_url": "Gasa_District", "ref_ids": ["766291"], "sent_idx": 0}, {"id": 2, "start": 67, "end": 71, "ref_url": "Town", "ref_ids": ["52911"], "sent_idx": 0}, {"id": 3, "start": 121, "end": 127, "ref_url": "Bhutan", "ref_ids": ["2421391"], "sent_idx": 0}]} +{"id": "18271886", "title": "Kencho, Bhutan", "sentences": ["Kencho is a town in Gasa District in northwestern Bhutan."], "mentions": [{"id": 0, "start": 12, "end": 16, "ref_url": "Town", "ref_ids": ["52911"], "sent_idx": 0}, {"id": 1, "start": 20, "end": 33, "ref_url": "Gasa_District", "ref_ids": ["766291"], "sent_idx": 0}, {"id": 2, "start": 50, "end": 56, "ref_url": "Bhutan", "ref_ids": ["2421391"], "sent_idx": 0}]} +{"id": "18271916", "title": "Laya, Bhutan", "sentences": ["Laya, Bhutan is a town in Laya Gewog in Gasa District in northwestern Bhutan.", "It is inhabited by the indigenous Layap people, and is the highest settlement in the country."], "mentions": [{"id": 0, "start": 18, "end": 22, "ref_url": "Town", "ref_ids": ["52911"], "sent_idx": 0}, {"id": 1, "start": 26, "end": 36, "ref_url": "Laya_Gewog", "ref_ids": ["30019591"], "sent_idx": 0}, {"id": 2, "start": 40, "end": 53, "ref_url": "Gasa_District", "ref_ids": ["766291"], "sent_idx": 0}, {"id": 3, "start": 6, "end": 12, "ref_url": "Bhutan", "ref_ids": ["2421391"], "sent_idx": 0}, {"id": 4, "start": 34, "end": 39, "ref_url": "Layap", "ref_ids": ["1516765"], "sent_idx": 1}]} +{"id": "18271931", "title": "Lunana", "sentences": ["Lunana is a remote village in Gasa District in northwestern Bhutan.", "It is the capital of Lunana Gewog.", "It's also a Minecraft Bedrock Edition seed for a snow village.", "Just type \"LUNANA\" on the seed and then you're done."], "mentions": [{"id": 0, "start": 54, "end": 61, "ref_url": "Village", "ref_ids": ["53509"], "sent_idx": 2}, {"id": 1, "start": 30, "end": 43, "ref_url": "Gasa_District", "ref_ids": ["766291"], "sent_idx": 0}, {"id": 2, "start": 60, "end": 66, "ref_url": "Bhutan", "ref_ids": ["2421391"], "sent_idx": 0}, {"id": 3, "start": 21, "end": 33, "ref_url": "Lunana_Gewog", "ref_ids": ["30019592"], "sent_idx": 1}, {"id": 4, "start": 12, "end": 37, "ref_url": "Minecraft_Pocket_Edition", "ref_ids": null, "sent_idx": 2}]} +{"id": "18271970", "title": "Tamji", "sentences": ["Tamji is a town in Gasa District in northwestern Bhutan."], "mentions": [{"id": 0, "start": 11, "end": 15, "ref_url": "Town", "ref_ids": ["52911"], "sent_idx": 0}, {"id": 1, "start": 19, "end": 32, "ref_url": "Gasa_District", "ref_ids": ["766291"], "sent_idx": 0}, {"id": 2, "start": 49, "end": 55, "ref_url": "Bhutan", "ref_ids": ["2421391"], "sent_idx": 0}]} +{"id": "18271973", "title": "M. Joseph Conroy", "sentences": ["Michael Joseph \"Joe\" Conroy (September 20, 1874September 5, 1946) was the second Mayor of Anchorage, Alaska, serving from 1923 to 1924."], "mentions": [{"id": 0, "start": 81, "end": 86, "ref_url": "Mayor_of_Anchorage", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 90, "end": 107, "ref_url": "Anchorage,_Alaska", "ref_ids": ["7004698"], "sent_idx": 0}]} +{"id": "18271986", "title": "Confused flour beetle", "sentences": ["The confused flour beetle (\"Tribolium confusum\"), a type of darkling beetle known as a flour beetle, is a common pest insect known for attacking and infesting stored flour and grain.", "They are one of the most common and most destructive insect pests for grain and other food products stored in silos, warehouses, grocery stores, and homes."], "mentions": [{"id": 0, "start": 60, "end": 75, "ref_url": "Darkling_beetle", "ref_ids": ["797888"], "sent_idx": 0}, {"id": 1, "start": 13, "end": 25, "ref_url": "Flour_beetle", "ref_ids": ["14563934"], "sent_idx": 0}, {"id": 2, "start": 13, "end": 18, "ref_url": "Flour", "ref_ids": ["56232"], "sent_idx": 0}, {"id": 3, "start": 70, "end": 75, "ref_url": "Grain", "ref_ids": ["27988307"], "sent_idx": 1}]} +{"id": "18272022", "title": "Martin Sherson", "sentences": ["Martin Sherson (1563–1588) was an English Roman Catholic priest.", "A native of Yorkshire, he matriculated at St John's College, Oxford in 1575 at the age of twelve, becoming \"a poor scholar of George Mannering who taught Rhetoric there\".", "He arrived at the English College at Reims, 1 April 1580.", "He was confirmed by Bishop Goldwell, 11 June 1580; left for Rome, 20 March; and entered the English College, Rome 8 May 1581, aged eighteen, where \"through an over-zealous application to study and prayer he began to spit blood\".", "He returned to Reims, 22 June 1585; and was ordained: sub-deacon in the chapel of the Holy Cross in Reims Cathedral, 21 September by Mgr Louis de Brezé, Bishop of Meaux; deacon at Laon, 14 March; and priest at Laon, 5 April 1586.", "He left for England, 16 June, and was imprisoned in the Marshalsea before 22 December 1586.", "He was still there in March 1588, and died there soon after, aged twenty-five."], "mentions": [{"id": 0, "start": 42, "end": 56, "ref_url": "Roman_Catholic", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 12, "end": 21, "ref_url": "Yorkshire", "ref_ids": ["36637"], "sent_idx": 1}, {"id": 2, "start": 42, "end": 67, "ref_url": "St_John's_College,_Oxford", "ref_ids": ["128382"], "sent_idx": 1}, {"id": 3, "start": 15, "end": 20, "ref_url": "Reims", "ref_ids": ["48845"], "sent_idx": 4}, {"id": 4, "start": 20, "end": 35, "ref_url": "Bishop_Goldwell", "ref_ids": null, "sent_idx": 3}, {"id": 5, "start": 92, "end": 113, "ref_url": "English_College,_Rome", "ref_ids": ["6140167"], "sent_idx": 3}, {"id": 6, "start": 100, "end": 115, "ref_url": "Reims_Cathedral", "ref_ids": ["33622280"], "sent_idx": 4}, {"id": 7, "start": 137, "end": 151, "ref_url": "Louis_de_Brezé", "ref_ids": null, "sent_idx": 4}, {"id": 8, "start": 153, "end": 168, "ref_url": "Bishop_of_Meaux", "ref_ids": null, "sent_idx": 4}, {"id": 9, "start": 180, "end": 184, "ref_url": "Laon", "ref_ids": ["83171"], "sent_idx": 4}, {"id": 10, "start": 56, "end": 66, "ref_url": "Marshalsea", "ref_ids": ["14876329"], "sent_idx": 5}]} +{"id": "18272029", "title": "Minna Craucher", "sentences": ["Minna Craucher (23 August 1891 in Pirkkala – 8 March 1932 in Helsinki) was the false name of Maria Vilhelmiina Lindell, a Finnish socialite and spy.", "Her home was a noted salon for various writers and artist.", "She also did espionage, originally for the Cheka, and was arrested three times for fraud.", "She also had connections to the right-wing Lapua Movement.", "She became the subject of several books and stories.", "In 1932 she was murdered with a shot to the head."], "mentions": [{"id": 0, "start": 34, "end": 42, "ref_url": "Pirkkala", "ref_ids": ["536899"], "sent_idx": 0}, {"id": 1, "start": 61, "end": 69, "ref_url": "Helsinki", "ref_ids": ["13696"], "sent_idx": 0}, {"id": 2, "start": 43, "end": 48, "ref_url": "Cheka", "ref_ids": ["6752"], "sent_idx": 2}, {"id": 3, "start": 43, "end": 57, "ref_url": "Lapua_Movement", "ref_ids": ["256603"], "sent_idx": 3}]} +{"id": "18272041", "title": "Beaconhouse National University", "sentences": ["Beaconhouse National University (BNU) is a private liberal arts university located in Lahore, in the province of Punjab, Pakistan.", "Founded in 2003 by its parent company Beaconhouse School System, it is located at Raiwind, a subdivision of Lahore District; the BNU campus is adjacent to the Bahria Town.", "BNU offers study programmes in visual design and arts, architecture, liberal arts, computer information technology, psychology and mass communication.", "It is a member of the Higher Education Commission and the Association of Commonwealth Universities.", "Beaconhouse National University also organises Pakistan's largest university level festival, commonly known as 'Bestival'."], "mentions": [{"id": 0, "start": 43, "end": 50, "ref_url": "Private_university", "ref_ids": ["1699468"], "sent_idx": 0}, {"id": 1, "start": 69, "end": 81, "ref_url": "Liberal_arts", "ref_ids": null, "sent_idx": 2}, {"id": 2, "start": 108, "end": 114, "ref_url": "Lahore", "ref_ids": ["125315"], "sent_idx": 1}, {"id": 3, "start": 113, "end": 119, "ref_url": "Punjab_(Pakistan)", "ref_ids": null, "sent_idx": 0}, {"id": 4, "start": 47, "end": 55, "ref_url": "Pakistan", "ref_ids": ["23235"], "sent_idx": 4}, {"id": 5, "start": 38, "end": 63, "ref_url": "Beaconhouse_School_System", "ref_ids": ["20333916"], "sent_idx": 1}, {"id": 6, "start": 82, "end": 89, "ref_url": "Raiwind", "ref_ids": ["3181563"], "sent_idx": 1}, {"id": 7, "start": 108, "end": 123, "ref_url": "Lahore_District", "ref_ids": ["988668"], "sent_idx": 1}, {"id": 8, "start": 159, "end": 170, "ref_url": "Bahria_Town", "ref_ids": ["15719290"], "sent_idx": 1}, {"id": 9, "start": 22, "end": 49, "ref_url": "Higher_Education_Commission_(Pakistan)", "ref_ids": ["3471971"], "sent_idx": 3}, {"id": 10, "start": 58, "end": 98, "ref_url": "Association_of_Commonwealth_Universities", "ref_ids": ["948787"], "sent_idx": 3}]} +{"id": "18272050", "title": "Lower Pond (Kaliningrad)", "sentences": ["The Lower Pond () is a large artificial pond in northern Kaliningrad, Russia.", "It was known as the Schlossteich while part of Königsberg, Germany, until 1945.", "The pond is about one kilometre long, north to south.", "Along its length, its width varies between about 50 and 100 metres.", "The source of the water is from the north.", "The water eventually drains underground down to the river Pregel to the south.", "During the winter months, the pond can freeze over."], "mentions": [{"id": 0, "start": 30, "end": 34, "ref_url": "Pond", "ref_ids": ["18842299"], "sent_idx": 6}, {"id": 1, "start": 57, "end": 68, "ref_url": "Kaliningrad", "ref_ids": ["40387679"], "sent_idx": 0}, {"id": 2, "start": 70, "end": 76, "ref_url": "Russia", "ref_ids": ["25391"], "sent_idx": 0}, {"id": 3, "start": 47, "end": 57, "ref_url": "Königsberg", "ref_ids": ["15413504"], "sent_idx": 1}, {"id": 4, "start": 59, "end": 66, "ref_url": "Germany", "ref_ids": ["11867"], "sent_idx": 1}, {"id": 5, "start": 58, "end": 64, "ref_url": "Pregolya", "ref_ids": null, "sent_idx": 5}]} +{"id": "18272082", "title": "Residuated mapping", "sentences": ["In mathematics, the concept of a residuated mapping arises in the theory of partially ordered sets.", "It refines the concept of a monotone function.", "If \"A\", \"B\" are posets, a function \"f\": \"A\" → \"B\" is defined to be monotone if it is order-preserving: that is, if \"x\" ≤ \"y\" implies \"f\"(\"x\") ≤ \"f\"(\"y\").", "This is equivalent to the condition that the preimage under \"f\" of every down-set of \"B\" is a down-set of \"A\".", "We define a principal down-set to be one of the form ↓{\"b\"} = { \"b\"<nowiki>'</nowiki> ∈ \"B\" : \"b\"<nowiki>'</nowiki> ≤ \"b\" }.", "In general the preimage under \"f\" of a principal down-set need not be a principal down-set.", "If it is, \"f\" is called residuated.", "The notion of residuated map can be generalized to a binary operator (or any higher arity) via component-wise residuation.", "This approach gives rise to notions of left and right division in a partially ordered magma, additionally endowing it with a quasigroup structure.", "(One speaks only of residuated algebra for higher arities).", "A binary (or higher arity) residuated map is usually \"not\" residuated as a unary map."], "mentions": [{"id": 0, "start": 76, "end": 97, "ref_url": "Partially_ordered_set", "ref_ids": ["23572"], "sent_idx": 0}, {"id": 1, "start": 28, "end": 45, "ref_url": "Monotonic_function", "ref_ids": ["48260"], "sent_idx": 1}, {"id": 2, "start": 16, "end": 22, "ref_url": "Partially_ordered_sets", "ref_ids": null, "sent_idx": 2}, {"id": 3, "start": 15, "end": 23, "ref_url": "Preimage", "ref_ids": null, "sent_idx": 5}, {"id": 4, "start": 49, "end": 57, "ref_url": "Down-set", "ref_ids": null, "sent_idx": 5}, {"id": 5, "start": 39, "end": 57, "ref_url": "Principal_down-set", "ref_ids": null, "sent_idx": 5}, {"id": 6, "start": 53, "end": 68, "ref_url": "Binary_operator", "ref_ids": null, "sent_idx": 7}, {"id": 7, "start": 20, "end": 25, "ref_url": "Arity", "ref_ids": ["42301"], "sent_idx": 10}, {"id": 8, "start": 86, "end": 91, "ref_url": "Magma_(algebra)", "ref_ids": ["141916"], "sent_idx": 8}, {"id": 9, "start": 125, "end": 135, "ref_url": "Quasigroup", "ref_ids": ["25223"], "sent_idx": 8}]} +{"id": "18272104", "title": "Ruthenian nobility", "sentences": ["Ruthenian nobility (, ) refers to the nobility of Kievan Rus and Galicia–Volhynia, which found itself in the Grand Duchy of Lithuania, Ruthenia, Samogitia, Polish-Lithuanian Commonwealth and later Russian and Austrian Empires, and became increasingly polonized and later russified, while retaining a separate, cultural identity.", "Ruthenian nobility, originally characterized as East Slavic language speaking and Orthodox, found itself ruled by the expanding Grand Duchy of Lithuania, where it rose from second class status to equal partners of the Lithuanian nobility.", "Following the Polish-Lithuanian union of the 14th century, the Ruthenian nobles became increasingly polonized, adopting the Polish language and religion (which increasingly meant converting from the Orthodox faith to Roman Catholicism).", "Ruthenian nobility, however, retained a distinct identity within the body of the Polish-Lithuanian szlachta, leading to the Latin expression \"gente Ruthenus, natione Polonus\" or \"gente Rutheni, natione Poloni\" (translated as \"of Polish nationality, but Ruthenian origin\", \"of Ruthenia race and Polish nation\", or in various similar veins), although the extent to which they retained and maintained this separate identity is still debated by scholars, and varied based on time and place.", "Eventually, following the Union of Lublin in 1569, most of the territories of Ruthenia became part of the Crown of the Polish Kingdom in the Polish-Lithuanian Commonwealth.", "The transfer of Ruthenian lands from the Grand Duchy to Poland occurred with a strong support of the Ruthenian nobility, who were attracted to the Polish culture and desired the privileges of the Polish nobility.", "Thus the Ruthenian nobility gravitated from the Lithuanian noble tradition towards the Polish noble one, described by Stone as a change from \"wealth without legal rights\" to \"defined individual and corporate rights\".", "The Lithuanian, Polish and Ruthenian nobility gradually became more and more unified, particularly with regards to their standing as a socio-political class.", "By the 19th and 20th centuries, the Ruthenian aristocracy became so heavily polonized, that the eventual national resurgence of Belarus and Ukraine was mostly spurred by middle and lower classes of the nobility, that later was joined by the growing national consciousness of the new middle class, rather than of the former upper class of Ruthenian nobility.", "Despite polonisation in Lithuania and Ruthenia in the XVII-XVIII centuries, a large part of the lower szlachta managed to retain their cultural identity in various ways.", "According to Polish estimates from the 1930s, 300,000 members of the common nobles -\"szlachta zagrodowa\" - inhabited the subcarpathian region of the Second Polish Republic out of 800,000 in the whole country.", "90% of them were Ukrainian-speaking and 80% were Ukrainian Greek Catholics.", "In other parts of the Ukraine with a significant szlachta population, such as the Bar or the Ovruch regions, the situation was similar despite russification and earlier polonization.", "Some of the major Ruthenian noble families (all of which became polonized to a significant extent) included the Czartoryski family, Ostrogski family, Sanguszko family, Sapieha family, Wiśniowiecki family, Zasławski family and the Zbaraski family."], "mentions": [{"id": 0, "start": 50, "end": 60, "ref_url": "Kievan_Rus", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 65, "end": 81, "ref_url": "Kingdom_of_Galicia–Volhynia", "ref_ids": ["639283"], "sent_idx": 0}, {"id": 2, "start": 109, "end": 154, "ref_url": "Grand_Duchy_of_Lithuania", "ref_ids": ["380252"], "sent_idx": 0}, {"id": 3, "start": 141, "end": 171, "ref_url": "Polish-Lithuanian_Commonwealth", "ref_ids": null, "sent_idx": 4}, {"id": 4, "start": 197, "end": 204, "ref_url": "Russian_Empire", "ref_ids": ["20611504"], "sent_idx": 0}, {"id": 5, "start": 209, "end": 224, "ref_url": "Austrian_Empire", "ref_ids": ["266894"], "sent_idx": 0}, {"id": 6, "start": 64, "end": 73, "ref_url": "Polonized", "ref_ids": null, "sent_idx": 13}, {"id": 7, "start": 271, "end": 280, "ref_url": "Russified", "ref_ids": null, "sent_idx": 0}, {"id": 8, "start": 48, "end": 68, "ref_url": "East_Slavic_language", "ref_ids": null, "sent_idx": 1}, {"id": 9, "start": 199, "end": 207, "ref_url": "Eastern_Orthodox_Church", "ref_ids": ["10186"], "sent_idx": 2}, {"id": 10, "start": 218, "end": 237, "ref_url": "Lithuanian_nobility", "ref_ids": ["475283"], "sent_idx": 1}, {"id": 11, "start": 14, "end": 37, "ref_url": "Polish-Lithuanian_union", "ref_ids": null, "sent_idx": 2}, {"id": 12, "start": 124, "end": 139, "ref_url": "Polish_language", "ref_ids": ["22975"], "sent_idx": 2}, {"id": 13, "start": 217, "end": 234, "ref_url": "Roman_Catholicism", "ref_ids": null, "sent_idx": 2}, {"id": 14, "start": 49, "end": 57, "ref_url": "Szlachta", "ref_ids": ["29050"], "sent_idx": 12}, {"id": 15, "start": 124, "end": 129, "ref_url": "Latin_language", "ref_ids": null, "sent_idx": 3}, {"id": 16, "start": 18, "end": 27, "ref_url": "Ruthenians", "ref_ids": ["262838"], "sent_idx": 13}, {"id": 17, "start": 26, "end": 41, "ref_url": "Union_of_Lublin", "ref_ids": ["30875660"], "sent_idx": 4}, {"id": 18, "start": 18, "end": 26, "ref_url": "Ruthenia", "ref_ids": ["81189"], "sent_idx": 13}, {"id": 19, "start": 106, "end": 133, "ref_url": "Crown_of_the_Polish_Kingdom", "ref_ids": null, "sent_idx": 4}, {"id": 20, "start": 147, "end": 161, "ref_url": "Polish_culture", "ref_ids": null, "sent_idx": 5}, {"id": 21, "start": 178, "end": 211, "ref_url": "Golden_Freedoms", "ref_ids": null, "sent_idx": 5}, {"id": 22, "start": 135, "end": 156, "ref_url": "Social_class", "ref_ids": ["29174"], "sent_idx": 7}, {"id": 23, "start": 128, "end": 135, "ref_url": "Belarus", "ref_ids": ["3457"], "sent_idx": 8}, {"id": 24, "start": 22, "end": 29, "ref_url": "Ukraine", "ref_ids": ["31750"], "sent_idx": 12}, {"id": 25, "start": 8, "end": 20, "ref_url": "Polonization", "ref_ids": ["1330156"], "sent_idx": 9}, {"id": 26, "start": 18, "end": 26, "ref_url": "Ruthenia", "ref_ids": ["81189"], "sent_idx": 13}, {"id": 27, "start": 121, "end": 134, "ref_url": "Outer_Subcarpathia", "ref_ids": ["889573"], "sent_idx": 10}, {"id": 28, "start": 149, "end": 171, "ref_url": "Second_Polish_Republic", "ref_ids": ["14245"], "sent_idx": 10}, {"id": 29, "start": 59, "end": 74, "ref_url": "Greek_Catholic_Church", "ref_ids": ["54630472"], "sent_idx": 11}, {"id": 30, "start": 82, "end": 85, "ref_url": "Bar,_Vinnytsia_Oblast", "ref_ids": ["810256"], "sent_idx": 12}, {"id": 31, "start": 93, "end": 106, "ref_url": "Ovruch", "ref_ids": ["2936682"], "sent_idx": 12}, {"id": 32, "start": 143, "end": 156, "ref_url": "Russification", "ref_ids": ["511201"], "sent_idx": 12}, {"id": 33, "start": 112, "end": 130, "ref_url": "Czartoryski_family", "ref_ids": null, "sent_idx": 13}, {"id": 34, "start": 132, "end": 148, "ref_url": "Ostrogski_family", "ref_ids": ["1072851"], "sent_idx": 13}, {"id": 35, "start": 150, "end": 166, "ref_url": "Sanguszko_family", "ref_ids": null, "sent_idx": 13}, {"id": 36, "start": 168, "end": 182, "ref_url": "Sapieha", "ref_ids": ["1890941"], "sent_idx": 13}, {"id": 37, "start": 184, "end": 203, "ref_url": "Wiśniowiecki_family", "ref_ids": null, "sent_idx": 13}, {"id": 38, "start": 205, "end": 221, "ref_url": "Zasławski_family", "ref_ids": null, "sent_idx": 13}, {"id": 39, "start": 230, "end": 245, "ref_url": "Zbaraski_family", "ref_ids": null, "sent_idx": 13}]} +{"id": "18272124", "title": "Michel Godbout", "sentences": ["Michel Godbout is a Canadian television news anchor.", "From 2006 to 2009 he anchored \"CBC News: Montreal at Six\" for CBMT, since taking over for Dennis Trudeau in 2006.", "He studied journalism in New Brunswick where he also worked in community and private radio then joined Radio-Canada Télévision and played for the Moncton Blue Eagles.", "He moved to Manitoba, before returning to Montreal in 1999 where he joined CBC Television.", "In September 2009, with the revamping of the newscast to a 90-minute format, Godbout became the senior correspondent for CBC News Montreal, working in the field as a special assignment reporter.", "In 2010 he moved to sports as CBC's sports journalist and anchor.", "In August 2011, Godbout joined the newly created TVA Sports.", "He is head anchor for the sports news at 11:00pm."], "mentions": [{"id": 0, "start": 20, "end": 28, "ref_url": "Canadians", "ref_ids": ["19851291"], "sent_idx": 0}, {"id": 1, "start": 29, "end": 39, "ref_url": "Television", "ref_ids": ["29831"], "sent_idx": 0}, {"id": 2, "start": 40, "end": 51, "ref_url": "News_anchor", "ref_ids": null, "sent_idx": 0}, {"id": 3, "start": 31, "end": 56, "ref_url": "CBC_News_at_Six", "ref_ids": null, "sent_idx": 1}, {"id": 4, "start": 62, "end": 66, "ref_url": "CBMT", "ref_ids": null, "sent_idx": 1}, {"id": 5, "start": 90, "end": 104, "ref_url": "Dennis_Trudeau", "ref_ids": ["1271463"], "sent_idx": 1}, {"id": 6, "start": 11, "end": 21, "ref_url": "Journalism", "ref_ids": ["15928"], "sent_idx": 2}, {"id": 7, "start": 25, "end": 38, "ref_url": "New_Brunswick", "ref_ids": ["21182"], "sent_idx": 2}, {"id": 8, "start": 63, "end": 72, "ref_url": "Community_radio", "ref_ids": ["630006"], "sent_idx": 2}, {"id": 9, "start": 103, "end": 126, "ref_url": "Télévision_de_Radio-Canada", "ref_ids": null, "sent_idx": 2}, {"id": 10, "start": 146, "end": 165, "ref_url": "Moncton_Aigles_Bleu", "ref_ids": null, "sent_idx": 2}, {"id": 11, "start": 12, "end": 20, "ref_url": "Manitoba", "ref_ids": ["18926"], "sent_idx": 3}, {"id": 12, "start": 130, "end": 138, "ref_url": "Montreal", "ref_ids": ["7954681"], "sent_idx": 4}, {"id": 13, "start": 75, "end": 89, "ref_url": "CBC_Television", "ref_ids": ["1288819"], "sent_idx": 3}, {"id": 14, "start": 49, "end": 59, "ref_url": "TVA_Sports", "ref_ids": ["32563480"], "sent_idx": 6}]} +{"id": "18272157", "title": "Siobhan Paton", "sentences": ["Siobhan Bethany Paton, OAM (born 28 August 1983) is an Australian Paralympic swimmer who was born in Sydney.", "Paton has had an intellectual disability from birth which was caused as a result of a lack of oxygen.", "Paton decided to become a swimmer after finding out she has a connective tissue disorder and that swimming would assist in the strengthening of her joints.", "Siobhan initially began competing with able-bodied athletes and only in 1997 did she compete in a competition for athletes with disabilities, where she won seven gold medals and one silver medal.", "As of 2004, she holds thirteen world records in her disability class of S14.", "Paton represented Australia at the 2000 Summer Paralympics in Sydney, where she won six gold medals, for which she received a Medal of the Order of Australia, and set world records on nine occasions in the process.", "In recognition of her achievement, the Australian Paralympic Committee named her \"Paralympian of the Year\", and she was honoured on a postage stamp.", "She was also awarded an Australian Sports Medal before the 2000 games.", "In 2013, she was inducted into the ACT Sport Hall of Fame."], "mentions": [{"id": 0, "start": 23, "end": 26, "ref_url": "Order_of_Australia", "ref_ids": ["153398"], "sent_idx": 0}, {"id": 1, "start": 50, "end": 60, "ref_url": "Paralympic", "ref_ids": null, "sent_idx": 6}, {"id": 2, "start": 62, "end": 68, "ref_url": "Sydney", "ref_ids": ["27862"], "sent_idx": 5}, {"id": 3, "start": 17, "end": 40, "ref_url": "Intellectual_disability", "ref_ids": ["18567040"], "sent_idx": 1}, {"id": 4, "start": 72, "end": 75, "ref_url": "S14_(classification)", "ref_ids": ["36779273"], "sent_idx": 4}, {"id": 5, "start": 35, "end": 58, "ref_url": "2000_Summer_Paralympics", "ref_ids": ["333674"], "sent_idx": 5}, {"id": 6, "start": 126, "end": 157, "ref_url": "Medal_of_the_Order_of_Australia", "ref_ids": null, "sent_idx": 5}]} +{"id": "18272173", "title": "Pieter de Groot", "sentences": ["Pieter de Groot (March 28, 1615 – June 2, 1678) was a Dutch regent and diplomat during the First Stadtholderless Period of the Dutch Republic.", "He led the Dutch delegation that vainly tried to negotiate the Dutch capitulation to king Louis XIV of France during the Year of Disaster, 1672."], "mentions": [{"id": 0, "start": 11, "end": 16, "ref_url": "Netherlands", "ref_ids": ["21148"], "sent_idx": 1}, {"id": 1, "start": 60, "end": 66, "ref_url": "Regenten", "ref_ids": ["12478098"], "sent_idx": 0}, {"id": 2, "start": 71, "end": 79, "ref_url": "Diplomat", "ref_ids": ["82433"], "sent_idx": 0}, {"id": 3, "start": 91, "end": 119, "ref_url": "First_Stadtholderless_Period", "ref_ids": null, "sent_idx": 0}, {"id": 4, "start": 127, "end": 141, "ref_url": "Dutch_Republic", "ref_ids": ["52626"], "sent_idx": 0}, {"id": 5, "start": 90, "end": 109, "ref_url": "Louis_XIV_of_France", "ref_ids": ["18553"], "sent_idx": 1}, {"id": 6, "start": 121, "end": 137, "ref_url": "Rampjaar", "ref_ids": ["1027184"], "sent_idx": 1}]} +{"id": "18272185", "title": "36th parallel south", "sentences": ["The 36th parallel south is a circle of latitude that is 36 degrees south of the Earth's equatorial plane.", "It crosses the Atlantic Ocean, the Indian Ocean, Australasia, the Pacific Ocean and South America."], "mentions": [{"id": 0, "start": 29, "end": 47, "ref_url": "Circle_of_latitude", "ref_ids": ["142999"], "sent_idx": 0}, {"id": 1, "start": 59, "end": 66, "ref_url": "Degree_(angle)", "ref_ids": ["1195294"], "sent_idx": 0}, {"id": 2, "start": 18, "end": 23, "ref_url": "South", "ref_ids": ["77173"], "sent_idx": 0}, {"id": 3, "start": 80, "end": 87, "ref_url": "Earth", "ref_ids": ["9228"], "sent_idx": 0}, {"id": 4, "start": 88, "end": 104, "ref_url": "Equator", "ref_ids": ["20611356"], "sent_idx": 0}, {"id": 5, "start": 15, "end": 29, "ref_url": "Atlantic_Ocean", "ref_ids": ["698"], "sent_idx": 1}, {"id": 6, "start": 35, "end": 47, "ref_url": "Indian_Ocean", "ref_ids": ["14580"], "sent_idx": 1}, {"id": 7, "start": 49, "end": 60, "ref_url": "Australasia", "ref_ids": ["21492915"], "sent_idx": 1}, {"id": 8, "start": 66, "end": 79, "ref_url": "Pacific_Ocean", "ref_ids": ["23070"], "sent_idx": 1}, {"id": 9, "start": 84, "end": 97, "ref_url": "South_America", "ref_ids": ["26769"], "sent_idx": 1}]} +{"id": "18272303", "title": "42nd parallel south", "sentences": ["The 42nd parallel south is a circle of latitude that is 42 degrees south of the Earth's equatorial plane.", "It crosses the Atlantic Ocean, the Indian Ocean, Australasia, the Pacific Ocean and South America.", "At this latitude the sun is visible for 15 hours, 15 minutes during the December solstice and 9 hours, 7 minutes during the June solstice.", "The alcohol brand 42 Below was named in part because the 42nd parallel South passes through New Zealand."], "mentions": [{"id": 0, "start": 29, "end": 47, "ref_url": "Circle_of_latitude", "ref_ids": ["142999"], "sent_idx": 0}, {"id": 1, "start": 59, "end": 66, "ref_url": "Degree_(angle)", "ref_ids": ["1195294"], "sent_idx": 0}, {"id": 2, "start": 18, "end": 23, "ref_url": "South", "ref_ids": ["77173"], "sent_idx": 0}, {"id": 3, "start": 80, "end": 87, "ref_url": "Earth", "ref_ids": ["9228"], "sent_idx": 0}, {"id": 4, "start": 88, "end": 104, "ref_url": "Equator", "ref_ids": ["20611356"], "sent_idx": 0}, {"id": 5, "start": 15, "end": 29, "ref_url": "Atlantic_Ocean", "ref_ids": ["698"], "sent_idx": 1}, {"id": 6, "start": 35, "end": 47, "ref_url": "Indian_Ocean", "ref_ids": ["14580"], "sent_idx": 1}, {"id": 7, "start": 49, "end": 60, "ref_url": "Australasia", "ref_ids": ["21492915"], "sent_idx": 1}, {"id": 8, "start": 66, "end": 79, "ref_url": "Pacific_Ocean", "ref_ids": ["23070"], "sent_idx": 1}, {"id": 9, "start": 84, "end": 97, "ref_url": "South_America", "ref_ids": ["26769"], "sent_idx": 1}, {"id": 10, "start": 21, "end": 24, "ref_url": "Sun", "ref_ids": ["26751"], "sent_idx": 2}, {"id": 11, "start": 72, "end": 89, "ref_url": "Summer_solstice", "ref_ids": ["11890785"], "sent_idx": 2}, {"id": 12, "start": 124, "end": 137, "ref_url": "Winter_solstice", "ref_ids": ["8521120"], "sent_idx": 2}, {"id": 13, "start": 18, "end": 26, "ref_url": "42_Below", "ref_ids": null, "sent_idx": 3}]} +{"id": "18272324", "title": "Black Castle, East Lothian", "sentences": ["Black Castle, East Lothian is an Iron Age hillfort with a number of defensive banks, located south-east of Gifford, East Lothian, Scotland.", "It is south of the B6355 road, between Darent House and Green Castle hillfort.", "The fort is on the summit of a hillock, at .", "It measures about .", "It has an inner and an outer rampart, and two entrances marked by causeways.", "To the west is a plantation named Black Castle wood.", "It is a designated scheduled ancient monument."], "mentions": [{"id": 0, "start": 33, "end": 41, "ref_url": "Iron_Age", "ref_ids": ["14711"], "sent_idx": 0}, {"id": 1, "start": 69, "end": 77, "ref_url": "Hillfort", "ref_ids": ["156693"], "sent_idx": 1}, {"id": 2, "start": 107, "end": 114, "ref_url": "Gifford,_East_Lothian", "ref_ids": ["4401697"], "sent_idx": 0}, {"id": 3, "start": 14, "end": 26, "ref_url": "East_Lothian", "ref_ids": ["106386"], "sent_idx": 0}, {"id": 4, "start": 130, "end": 138, "ref_url": "Scotland", "ref_ids": ["26994"], "sent_idx": 0}, {"id": 5, "start": 19, "end": 29, "ref_url": "B6355_road", "ref_ids": null, "sent_idx": 1}, {"id": 6, "start": 56, "end": 68, "ref_url": "Green_Castle,_East_Lothian", "ref_ids": null, "sent_idx": 1}, {"id": 7, "start": 17, "end": 27, "ref_url": "Plantation", "ref_ids": ["142473"], "sent_idx": 5}, {"id": 8, "start": 19, "end": 45, "ref_url": "Scheduled_ancient_monument", "ref_ids": null, "sent_idx": 6}]} +{"id": "18272330", "title": "Bavarian D IV", "sentences": ["The little D IV was one of the most frequently seen tank locomotives in the stations of the Royal Bavarian State Railways (\"Königlich Bayerische Staatsbahn\").", "The Deutsche Reichsbahn took over almost all of them, 124 in total, of which 24 were from the Palatinate (\"Pfalz\")."], "mentions": [{"id": 0, "start": 52, "end": 68, "ref_url": "Tank_locomotives", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 92, "end": 121, "ref_url": "Royal_Bavarian_State_Railways", "ref_ids": ["17940544"], "sent_idx": 0}, {"id": 2, "start": 4, "end": 23, "ref_url": "Deutsche_Reichsbahn-Gesellschaft", "ref_ids": null, "sent_idx": 1}, {"id": 3, "start": 94, "end": 104, "ref_url": "Palatinate_(region)", "ref_ids": ["38849"], "sent_idx": 1}]} +{"id": "18272351", "title": "List of California Golden Seals draft picks", "sentences": ["The California Golden Seals were a professional ice hockey franchise based in Oakland, California from 1967–76.", "They played their first seven seasons in the West Division and their final two seasons in the Adams Division.", "During their time in Oakland the Golden Seals drafted 72 players and participated in ten National Hockey League Amateur Drafts before the franchise relocated to Cleveland, Ohio on July 14, 1976.", "This list features every player drafted by the Golden Seals and his regular season stats for his career.", "The Golden Seals' first draft pick was Ken Hicks who was selected third overall in the 1967 NHL Amateur Draft.", "The highest that California ever drafted was third overall, which they did three times, selecting Ken Hicks (1967), Rick Hampton (1974) and Ralph Klassen (1975).", "No Golden Seals' draft pick ever played in over 1,000 NHL games, the closest any player selected by the team came to this mark was Ron Stackhouse who played in 889 regular season games.", "To date no Golden Seals' draft pick has ever been selected to the Hockey Hall of Fame."], "mentions": [{"id": 0, "start": 4, "end": 27, "ref_url": "California_Golden_Seals", "ref_ids": ["274452"], "sent_idx": 0}, {"id": 1, "start": 48, "end": 58, "ref_url": "Ice_hockey", "ref_ids": ["14790"], "sent_idx": 0}, {"id": 2, "start": 78, "end": 97, "ref_url": "Oakland,_California", "ref_ids": ["50548"], "sent_idx": 0}, {"id": 3, "start": 109, "end": 113, "ref_url": "1967–68_NHL_season", "ref_ids": ["2458670"], "sent_idx": 5}, {"id": 4, "start": 191, "end": 193, "ref_url": "1975–76_NHL_season", "ref_ids": ["2945254"], "sent_idx": 2}, {"id": 5, "start": 45, "end": 58, "ref_url": "West_Division_(NHL)", "ref_ids": ["13996246"], "sent_idx": 1}, {"id": 6, "start": 94, "end": 108, "ref_url": "Adams_Division", "ref_ids": ["4527860"], "sent_idx": 1}, {"id": 7, "start": 89, "end": 111, "ref_url": "National_Hockey_League", "ref_ids": ["21809"], "sent_idx": 2}, {"id": 8, "start": 112, "end": 126, "ref_url": "NHL_Entry_Draft", "ref_ids": ["858982"], "sent_idx": 2}, {"id": 9, "start": 161, "end": 170, "ref_url": "Cleveland", "ref_ids": ["5951"], "sent_idx": 2}, {"id": 10, "start": 172, "end": 176, "ref_url": "Ohio", "ref_ids": ["22199"], "sent_idx": 2}, {"id": 11, "start": 98, "end": 107, "ref_url": "Ken_Hicks", "ref_ids": ["39471897"], "sent_idx": 5}, {"id": 12, "start": 87, "end": 109, "ref_url": "1967_NHL_Amateur_Draft", "ref_ids": ["956350"], "sent_idx": 4}, {"id": 13, "start": 116, "end": 128, "ref_url": "Rick_Hampton", "ref_ids": ["10290053"], "sent_idx": 5}, {"id": 14, "start": 130, "end": 134, "ref_url": "1974_NHL_Amateur_Draft", "ref_ids": ["5613438"], "sent_idx": 5}, {"id": 15, "start": 140, "end": 153, "ref_url": "Ralph_Klassen", "ref_ids": ["10817113"], "sent_idx": 5}, {"id": 16, "start": 155, "end": 159, "ref_url": "1975_NHL_Amateur_Draft", "ref_ids": ["5613907"], "sent_idx": 5}, {"id": 17, "start": 131, "end": 145, "ref_url": "Ron_Stackhouse", "ref_ids": ["3547274"], "sent_idx": 6}, {"id": 18, "start": 66, "end": 85, "ref_url": "Hockey_Hall_of_Fame", "ref_ids": ["74098"], "sent_idx": 7}]} +{"id": "18272352", "title": "Australia at the 2000 Summer Paralympics", "sentences": ["Australia was the host nation for the 2000 Summer Paralympics which was held in Sydney.", "Australia competed in the games between the 18 and 29 October.", "The team consisted of 285 athletes in 18 sports with 148 officials.", "It was the country's largest ever Paralympic delegation to a Games.", "Australia has participated at every Summer Paralympic Games since its inception.", "Australia finished at the top of the medal tally with 63 gold, 39 silver and 47 bronze medals to total 149 medals for the games.", "This was the first time and the only time to date that Australia has finished on top of either an Olympic or Paralympic medal tally.", "The most successful sports were athletics, cycling, equestrian, swimming and wheelchair tennis.", "Notable Australian performances were:"], "mentions": [{"id": 0, "start": 8, "end": 17, "ref_url": "Australia", "ref_ids": ["4689264"], "sent_idx": 8}, {"id": 1, "start": 38, "end": 61, "ref_url": "2000_Summer_Paralympics", "ref_ids": ["333674"], "sent_idx": 0}, {"id": 2, "start": 80, "end": 86, "ref_url": "Sydney", "ref_ids": ["27862"], "sent_idx": 0}, {"id": 3, "start": 36, "end": 59, "ref_url": "Summer_Paralympic_Games", "ref_ids": ["18670273"], "sent_idx": 4}, {"id": 4, "start": 77, "end": 94, "ref_url": "Wheelchair_tennis", "ref_ids": ["432890"], "sent_idx": 7}]} +{"id": "18272372", "title": "Ruslan Yeremenko", "sentences": ["Ruslan Yeremenko (born 31 July 1978) is a Ukrainian pole vaulter.", "His personal best jump is 5.70 metres, achieved in July 2001 in Kiev.", "He had 5.84 metres on the indoor track, achieved in January 2005 in Stuttgart."], "mentions": [{"id": 0, "start": 42, "end": 51, "ref_url": "Ukraine", "ref_ids": ["31750"], "sent_idx": 0}, {"id": 1, "start": 52, "end": 62, "ref_url": "Pole_vault", "ref_ids": ["24082"], "sent_idx": 0}, {"id": 2, "start": 64, "end": 68, "ref_url": "Kiev", "ref_ids": ["585629"], "sent_idx": 1}, {"id": 3, "start": 68, "end": 77, "ref_url": "Stuttgart", "ref_ids": ["28565"], "sent_idx": 2}]} +{"id": "18272419", "title": "Hungry (The X-Files)", "sentences": ["\"Hungry\" is the third episode of the seventh season of the science fiction television series \"The X-Files\".", "It premiered on the Fox network in the United States on November 21, 1999.", "It was written by Vince Gilligan, directed by Kim Manners, and featured a guest appearance by Chad Donella.", "The episode is a \"Monster-of-the-Week\" story, unconnected to the series' wider mythology.", "However, unlike previous Monster-of-the-Week stories, \"Hungry\" is told from the monster's perspective.", "\"Hungry\" earned a Nielsen household rating of 9.6, being watched by 16.17 million people in its initial broadcast.", "The episode received mixed to positive reviews from critics.", "The show centers on FBI special agents Fox Mulder (David Duchovny) and Dana Scully (Gillian Anderson) who work on cases linked to the paranormal, called X-Files.", "Mulder is a believer in the paranormal, while the skeptical Scully has been assigned to debunk his work.", "In this episode, a fast-food employee with unusual cravings becomes the focus of an FBI investigation under the direction of Mulder and Scully.", "The victims appear with no brain and a suction hole in the forehead.", "Gilligan wanted to try a \"different\" approach to \"The X-Files\" with \"Hungry\" by telling the main story through the eyes of the monster.", "Actor Chad Donella, who portrayed the monster, was chosen because he possessed a \"subtle, interesting quality,\" according to casting director Rick Millikan.", "Manners was pleased with Donella's performance, calling him a \"great little actor.\"", "Because both David Duchovny and Gillian Anderson were filming movies, \"Return to Me\" and \"The House of Mirth\" respectively, the production company decided to film \"Hungry\" before any of the other episodes, even though it would be aired third, after the conclusion of \"The Sixth Extinction\" arc."], "mentions": [{"id": 0, "start": 37, "end": 51, "ref_url": "The_X-Files_(season_7)", "ref_ids": ["940067"], "sent_idx": 0}, {"id": 1, "start": 59, "end": 74, "ref_url": "Science_fiction", "ref_ids": ["26787"], "sent_idx": 0}, {"id": 2, "start": 75, "end": 92, "ref_url": "Television_series", "ref_ids": null, "sent_idx": 0}, {"id": 3, "start": 50, "end": 61, "ref_url": "The_X-Files", "ref_ids": ["30304"], "sent_idx": 11}, {"id": 4, "start": 20, "end": 31, "ref_url": "Fox_Broadcasting_Company", "ref_ids": ["46252"], "sent_idx": 1}, {"id": 5, "start": 18, "end": 32, "ref_url": "Vince_Gilligan", "ref_ids": ["16533683"], "sent_idx": 2}, {"id": 6, "start": 46, "end": 57, "ref_url": "Kim_Manners", "ref_ids": ["10228635"], "sent_idx": 2}, {"id": 7, "start": 6, "end": 18, "ref_url": "Chad_Donella", "ref_ids": ["4280902"], "sent_idx": 12}, {"id": 8, "start": 79, "end": 88, "ref_url": "Mythology_of_The_X-Files", "ref_ids": ["23749226"], "sent_idx": 3}, {"id": 9, "start": 84, "end": 87, "ref_url": "Federal_Bureau_of_Investigation", "ref_ids": ["11127"], "sent_idx": 9}, {"id": 10, "start": 39, "end": 49, "ref_url": "Fox_Mulder", "ref_ids": ["261343"], "sent_idx": 7}, {"id": 11, "start": 13, "end": 27, "ref_url": "David_Duchovny", "ref_ids": ["21189337"], "sent_idx": 14}, {"id": 12, "start": 71, "end": 82, "ref_url": "Dana_Scully", "ref_ids": ["389780"], "sent_idx": 7}, {"id": 13, "start": 32, "end": 48, "ref_url": "Gillian_Anderson", "ref_ids": ["42238"], "sent_idx": 14}, {"id": 14, "start": 54, "end": 60, "ref_url": "X-File", "ref_ids": null, "sent_idx": 11}, {"id": 15, "start": 71, "end": 83, "ref_url": "Return_to_Me", "ref_ids": ["3105223"], "sent_idx": 14}, {"id": 16, "start": 90, "end": 108, "ref_url": "The_House_of_Mirth_(2000_film)", "ref_ids": ["15670779"], "sent_idx": 14}, {"id": 17, "start": 268, "end": 288, "ref_url": "The_Sixth_Extinction_(The_X-Files)", "ref_ids": ["18270508"], "sent_idx": 14}]} +{"id": "18272432", "title": "Ricemarch Psalter", "sentences": ["The Ricemarch Psalter is an 11th-century Welsh illuminated psalter, in a late Insular style, that has been described as \"Hiberno-Danish\", instead of the usual \"Hiberno-Saxon\", as it reflects Viking influence.", "Its 159 pages are vellum, and include the following sections: Letter of St. Jerome to Chromatius and Elidorus; Breviarius Apostolorum; Martyrologium Hieronymianum, and Various Tables.", "It is one of two surviving manuscripts from the scriptorium at Llanbadarn Fawr in Wales, established by the father of the scribe and the first owner.", "The other is a manuscript of St. Augustine's De Trinitate in Cambridge, by the same scribe.", "The psalter is now at Trinity College Dublin as MS 50."], "mentions": [{"id": 0, "start": 41, "end": 46, "ref_url": "Wales", "ref_ids": ["69894"], "sent_idx": 0}, {"id": 1, "start": 47, "end": 58, "ref_url": "Illuminated_manuscript", "ref_ids": ["92310"], "sent_idx": 0}, {"id": 2, "start": 4, "end": 11, "ref_url": "Psalter", "ref_ids": ["390503"], "sent_idx": 4}, {"id": 3, "start": 78, "end": 85, "ref_url": "Insular_art", "ref_ids": ["9206479"], "sent_idx": 0}, {"id": 4, "start": 160, "end": 173, "ref_url": "Hiberno-Saxon", "ref_ids": null, "sent_idx": 0}, {"id": 5, "start": 191, "end": 197, "ref_url": "Viking", "ref_ids": null, "sent_idx": 0}, {"id": 6, "start": 18, "end": 24, "ref_url": "Vellum", "ref_ids": ["32435"], "sent_idx": 1}, {"id": 7, "start": 86, "end": 96, "ref_url": "Chromatius", "ref_ids": ["8339434"], "sent_idx": 1}, {"id": 8, "start": 101, "end": 109, "ref_url": "Heliodorus_of_Altino", "ref_ids": ["17689187"], "sent_idx": 1}, {"id": 9, "start": 63, "end": 78, "ref_url": "Llanbadarn_Fawr,_Ceredigion", "ref_ids": ["7402834"], "sent_idx": 2}, {"id": 10, "start": 82, "end": 87, "ref_url": "Wales", "ref_ids": ["69894"], "sent_idx": 2}, {"id": 11, "start": 29, "end": 42, "ref_url": "St._Augustine", "ref_ids": null, "sent_idx": 3}, {"id": 12, "start": 45, "end": 57, "ref_url": "De_Trinitate", "ref_ids": null, "sent_idx": 3}, {"id": 13, "start": 22, "end": 44, "ref_url": "Trinity_College_Dublin", "ref_ids": ["142298"], "sent_idx": 4}]} +{"id": "18272455", "title": "Vladyslav Revenko", "sentences": ["Vladyslav Revenko (born 15 November 1984 in Kiev Oblast, Soviet Union) is a Ukrainian pole vaulter.", "He won the silver medal at the 2002 World Junior Championships.", "He also competed at the 2005 European Indoor Championships and the 2005 World Championships without reaching the final.", "His personal best jump is 5.80 metres, achieved in June 2005 in Leiria."], "mentions": [{"id": 0, "start": 44, "end": 55, "ref_url": "Kiev_Oblast", "ref_ids": ["494884"], "sent_idx": 0}, {"id": 1, "start": 57, "end": 69, "ref_url": "Soviet_Union", "ref_ids": ["26779"], "sent_idx": 0}, {"id": 2, "start": 76, "end": 85, "ref_url": "Ukraine", "ref_ids": ["31750"], "sent_idx": 0}, {"id": 3, "start": 86, "end": 96, "ref_url": "Pole_vault", "ref_ids": ["24082"], "sent_idx": 0}, {"id": 4, "start": 31, "end": 62, "ref_url": "2002_World_Junior_Championships_in_Athletics", "ref_ids": ["2429115"], "sent_idx": 1}, {"id": 5, "start": 24, "end": 58, "ref_url": "2005_European_Indoor_Championships_in_Athletics", "ref_ids": null, "sent_idx": 2}, {"id": 6, "start": 67, "end": 91, "ref_url": "2005_World_Championships_in_Athletics_-_Men's_Pole_Vault", "ref_ids": null, "sent_idx": 2}, {"id": 7, "start": 64, "end": 70, "ref_url": "Leiria", "ref_ids": ["307625"], "sent_idx": 3}]} +{"id": "18272473", "title": "Russ Marin", "sentences": ["Russ Marin (May 1, 1934 – March 6, 2005) was an American film and television actor active from the early 1970s to the early 1990s."], "mentions": []} +{"id": "18272485", "title": "Bavarian D III", "sentences": ["The Class D III engines of the Royal Bavarian State Railways (\"Königlich Bayerische Staatsbahn\") were tank locomotives designed for shunting and \"Vizinalbahn\" service.", "Georg Krauss had exhibited a locomotive of this type, which had been developed on the same design principles as the Bavarian B VII, at the 1873 Vienna World Exposition and sold it to the Bebra-Hanau Railway.", "The Bavarian State Railway also decided to procured the locomotive and order six for the same duties as those of the Maffei-built Bavarian D I class, which had been introduced in 1871.", "The D I was equipped with an outside Allan valve gear and an enclosed driver's cab.", "Its coal and water tanks jutted out over the rear driving axle.", "The locomotives were used partly for branch line service and partly for light shunting duties \"inter alia\" in Munich and Ingolstadt, and also at Lindau working with the ferries on Lake Constance.", "Because the engines could not attain the performance of the Bavarian D IV due to their simpler design, they were retired between 1894 and 1897.", "One engine was transferred to the Röthenbach–Weiler branch line and one to the Bavarian Cement Works at Marienstein, the rest were scrapped.", "One engine was sold, the remainder had been retired and scrapped by 1897."], "mentions": [{"id": 0, "start": 31, "end": 60, "ref_url": "Royal_Bavarian_State_Railways", "ref_ids": ["17940544"], "sent_idx": 0}, {"id": 1, "start": 102, "end": 118, "ref_url": "Tank_locomotives", "ref_ids": null, "sent_idx": 0}, {"id": 2, "start": 78, "end": 86, "ref_url": "Shunting_(rail)", "ref_ids": ["2784315"], "sent_idx": 5}, {"id": 3, "start": 146, "end": 157, "ref_url": "Vizinalbahn", "ref_ids": null, "sent_idx": 0}, {"id": 4, "start": 0, "end": 12, "ref_url": "Georg_Krauss", "ref_ids": null, "sent_idx": 1}, {"id": 5, "start": 116, "end": 130, "ref_url": "Bavarian_B_VII", "ref_ids": ["18457141"], "sent_idx": 1}, {"id": 6, "start": 139, "end": 167, "ref_url": "1873_Vienna_World_Exposition", "ref_ids": null, "sent_idx": 1}, {"id": 7, "start": 187, "end": 206, "ref_url": "Bebra-Hanau_Railway", "ref_ids": null, "sent_idx": 1}, {"id": 8, "start": 117, "end": 123, "ref_url": "Joseph_Anton_von_Maffei", "ref_ids": ["18243382"], "sent_idx": 2}, {"id": 9, "start": 60, "end": 72, "ref_url": "Bavarian_D_I", "ref_ids": ["18285676"], "sent_idx": 6}, {"id": 10, "start": 37, "end": 53, "ref_url": "Allan_valve_gear", "ref_ids": null, "sent_idx": 3}, {"id": 11, "start": 50, "end": 62, "ref_url": "Driving_axle", "ref_ids": null, "sent_idx": 4}, {"id": 12, "start": 110, "end": 116, "ref_url": "Munich", "ref_ids": ["19058"], "sent_idx": 5}, {"id": 13, "start": 121, "end": 131, "ref_url": "Ingolstadt", "ref_ids": ["208251"], "sent_idx": 5}, {"id": 14, "start": 145, "end": 151, "ref_url": "Lindau", "ref_ids": ["239502"], "sent_idx": 5}, {"id": 15, "start": 60, "end": 73, "ref_url": "Bavarian_D_IV", "ref_ids": ["18272330"], "sent_idx": 6}, {"id": 16, "start": 104, "end": 115, "ref_url": "Marienstein", "ref_ids": ["22235440"], "sent_idx": 7}]} +{"id": "18272505", "title": "John Hewitt (priest)", "sentences": ["John Hewitt or Hewett (alias Weldon, alias Savell)\n(date of birth unknown; executed at Mile End Green, 6 October 1588) was an English Roman Catholic priest.", "He is a Catholic martyr, beatified in 1929."], "mentions": [{"id": 0, "start": 87, "end": 101, "ref_url": "Mile_End_Green", "ref_ids": null, "sent_idx": 0}, {"id": 1, "start": 134, "end": 148, "ref_url": "Roman_Catholic", "ref_ids": null, "sent_idx": 0}, {"id": 2, "start": 25, "end": 34, "ref_url": "Beatified", "ref_ids": null, "sent_idx": 1}]} +{"id": "18272509", "title": "Duward Crow", "sentences": ["Duward L. Crow (June 26, 1919 – October 29, 1997) was a United States Air Force lieutenant general.", "Crow graduated from DeKalb County High School in 1936 and entered the University of Alabama.", "He subsequently received an appointment to the U.S. Military Academy, in West Point, New York, graduating in 1941, and joined what was then the Army Air Corps.", "He served in the China-Burma-India theater of operations during World War II .", "Crow was involved in planning and executing airborne resupply operations over the Himalayan Mountains to Chinese Nationalist and other allied forces, commonly known as The Hump operation.", "Following the war, Crow enrolled at Harvard University, where he was awarded a master of business administration degree in 1948.", "He then spent several years in various posts dealing with supply, procurement, logistics, finance and personnel matters.", "He attended the Air War College at Maxwell Air Force Base in Montgomery, Alabama, in 1957-58.", "On October 1, 1973, Crow was appointed Assistant Vice Chief of Staff of the Air Force.", "He retired from the Air Force on August 1, 1974.", "He was subsequently named Associate Deputy Administrator of NASA in 1975.", "During his military career, Crow was awarded the Distinguished Service Medal, Legion of Merit, Bronze Star Medal, and the Army Commendation Medal.", "He was subsequently awarded the NASA Distinguished Service Medal in 1978.", "Crow died on October 29, 1997."], "mentions": [{"id": 0, "start": 56, "end": 79, "ref_url": "United_States_Air_Force", "ref_ids": ["32090"], "sent_idx": 0}, {"id": 1, "start": 80, "end": 98, "ref_url": "Lieutenant_general_(United_States)", "ref_ids": ["3820104"], "sent_idx": 0}, {"id": 2, "start": 20, "end": 33, "ref_url": "DeKalb_County,_Alabama", "ref_ids": ["77827"], "sent_idx": 1}, {"id": 3, "start": 70, "end": 91, "ref_url": "University_of_Alabama", "ref_ids": ["327950"], "sent_idx": 1}, {"id": 4, "start": 47, "end": 68, "ref_url": "U.S._Military_Academy", "ref_ids": null, "sent_idx": 2}, {"id": 5, "start": 144, "end": 158, "ref_url": "United_States_Army_Air_Corps", "ref_ids": ["23869026"], "sent_idx": 2}, {"id": 6, "start": 64, "end": 76, "ref_url": "World_War_II", "ref_ids": ["32927"], "sent_idx": 3}, {"id": 7, "start": 82, "end": 101, "ref_url": "Himalayan_Mountains", "ref_ids": null, "sent_idx": 4}, {"id": 8, "start": 168, "end": 176, "ref_url": "The_Hump", "ref_ids": ["904187"], "sent_idx": 4}, {"id": 9, "start": 36, "end": 54, "ref_url": "Harvard_University", "ref_ids": ["18426501"], "sent_idx": 5}, {"id": 10, "start": 79, "end": 112, "ref_url": "Master_of_business_administration", "ref_ids": null, "sent_idx": 5}, {"id": 11, "start": 16, "end": 31, "ref_url": "Air_War_College", "ref_ids": ["5957769"], "sent_idx": 7}, {"id": 12, "start": 35, "end": 57, "ref_url": "Maxwell_Air_Force_Base", "ref_ids": ["1250840"], "sent_idx": 7}, {"id": 13, "start": 61, "end": 80, "ref_url": "Montgomery,_Alabama", "ref_ids": ["57690"], "sent_idx": 7}, {"id": 14, "start": 54, "end": 85, "ref_url": "Chief_of_Staff_of_the_United_States_Air_Force", "ref_ids": ["736842"], "sent_idx": 8}, {"id": 15, "start": 32, "end": 36, "ref_url": "NASA", "ref_ids": ["18426568"], "sent_idx": 12}, {"id": 16, "start": 37, "end": 64, "ref_url": "Air_Force_Distinguished_Service_Medal", "ref_ids": ["832034"], "sent_idx": 12}, {"id": 17, "start": 78, "end": 93, "ref_url": "Legion_of_Merit", "ref_ids": ["18691"], "sent_idx": 11}, {"id": 18, "start": 95, "end": 112, "ref_url": "Bronze_Star_Medal", "ref_ids": ["4972"], "sent_idx": 11}, {"id": 19, "start": 122, "end": 145, "ref_url": "Army_Commendation_Medal", "ref_ids": null, "sent_idx": 11}, {"id": 20, "start": 32, "end": 64, "ref_url": "NASA_Distinguished_Service_Medal", "ref_ids": ["998615"], "sent_idx": 12}]} +{"id": "18272529", "title": "Damiel Dossévi", "sentences": ["Damiel Dossévi (born 3 February 1983 in Chambray-lès-Tours) is a French pole vaulter.", "He finished fifteenth at the 2006 European Championships.", "He also competed at the 2005 European Indoor Championships, the 2005 World Championships and the 2007 World Championships without reaching the final.", "His personal best jump is 5.75 metres, which he achieved in July 2005 in Erfurt."], "mentions": [{"id": 0, "start": 40, "end": 58, "ref_url": "Chambray-lès-Tours", "ref_ids": ["5496163"], "sent_idx": 0}, {"id": 1, "start": 65, "end": 71, "ref_url": "France", "ref_ids": ["5843419"], "sent_idx": 0}, {"id": 2, "start": 72, "end": 82, "ref_url": "Pole_vault", "ref_ids": ["24082"], "sent_idx": 0}, {"id": 3, "start": 12, "end": 21, "ref_url": "Fifteenth", "ref_ids": ["4558380"], "sent_idx": 1}, {"id": 4, "start": 29, "end": 56, "ref_url": "2006_European_Championships_in_Athletics", "ref_ids": null, "sent_idx": 1}, {"id": 5, "start": 24, "end": 58, "ref_url": "2005_European_Indoor_Championships_in_Athletics", "ref_ids": null, "sent_idx": 2}, {"id": 6, "start": 64, "end": 88, "ref_url": "2005_World_Championships_in_Athletics_-_Men's_Pole_Vault", "ref_ids": null, "sent_idx": 2}, {"id": 7, "start": 97, "end": 121, "ref_url": "2007_World_Championships_in_Athletics_–_Men's_pole_vault", "ref_ids": ["13048907"], "sent_idx": 2}, {"id": 8, "start": 73, "end": 79, "ref_url": "Erfurt", "ref_ids": ["9481"], "sent_idx": 3}]} +{"id": "18272535", "title": "Kiyoshi Kawakubo", "sentences": ["Kiyoshi was part of 81 Produce."], "mentions": [{"id": 0, "start": 20, "end": 30, "ref_url": "81_Produce", "ref_ids": ["3170038"], "sent_idx": 0}]} +{"id": "18272543", "title": "John Hewett", "sentences": ["John Hewett may refer to:"], "mentions": []} +{"id": "18272547", "title": "Janice Biala", "sentences": ["Janice Biala (September 11, 1903 – September 24, 2000) was an artist whose work, spanning seven decades, is well regarded both in France and the United States.", "Known for her \"impeccable taste and remarkable intelligence\", as well as her \"intuitive feeling for composition and her orchestration of color\", she made paintings of intimate interiors, still lifes, portraits of her friends, and cityscapes of the places she traveled.", "Her work, which defies easy classification, lies between figuration and abstraction.", "One of the great modernists, she transformed her subjects into shape and color using \"unexpected color relationships and a relaxed approach to interpreting realism.\""], "mentions": [{"id": 0, "start": 17, "end": 27, "ref_url": "Modernism", "ref_ids": ["19547"], "sent_idx": 3}]} +{"id": "18272563", "title": "46th parallel south", "sentences": ["The 46th parallel south is a circle of latitude that is 46 degrees south of the Earth's equatorial plane.", "It crosses the Atlantic Ocean, the Indian Ocean, Australasia, the Pacific Ocean and South America.", "At this latitude the sun is visible for 15 hours, 45 minutes during the December solstice and 8 hours, 38 minutes during the June solstice.", "The largest city south of the 46th parallel is Punta Arenas."], "mentions": [{"id": 0, "start": 29, "end": 47, "ref_url": "Circle_of_latitude", "ref_ids": ["142999"], "sent_idx": 0}, {"id": 1, "start": 59, "end": 66, "ref_url": "Degree_(angle)", "ref_ids": ["1195294"], "sent_idx": 0}, {"id": 2, "start": 17, "end": 22, "ref_url": "South", "ref_ids": ["77173"], "sent_idx": 3}, {"id": 3, "start": 80, "end": 87, "ref_url": "Earth", "ref_ids": ["9228"], "sent_idx": 0}, {"id": 4, "start": 88, "end": 104, "ref_url": "Equator", "ref_ids": ["20611356"], "sent_idx": 0}, {"id": 5, "start": 15, "end": 29, "ref_url": "Atlantic_Ocean", "ref_ids": ["698"], "sent_idx": 1}, {"id": 6, "start": 35, "end": 47, "ref_url": "Indian_Ocean", "ref_ids": ["14580"], "sent_idx": 1}, {"id": 7, "start": 49, "end": 60, "ref_url": "Australasia", "ref_ids": ["21492915"], "sent_idx": 1}, {"id": 8, "start": 66, "end": 79, "ref_url": "Pacific_Ocean", "ref_ids": ["23070"], "sent_idx": 1}, {"id": 9, "start": 84, "end": 97, "ref_url": "South_America", "ref_ids": ["26769"], "sent_idx": 1}, {"id": 10, "start": 21, "end": 24, "ref_url": "Sun", "ref_ids": ["26751"], "sent_idx": 2}, {"id": 11, "start": 72, "end": 89, "ref_url": "Summer_solstice", "ref_ids": ["11890785"], "sent_idx": 2}, {"id": 12, "start": 125, "end": 138, "ref_url": "Winter_solstice", "ref_ids": ["8521120"], "sent_idx": 2}, {"id": 13, "start": 47, "end": 59, "ref_url": "Punta_Arenas", "ref_ids": ["8857962"], "sent_idx": 3}]} +{"id": "18272579", "title": "Lucien Olivier", "sentences": ["Lucien Olivier () (1838–14 November 1883) was a Russian chef of Belgian and French descent, and owner of Hermitage restaurant in the center of Moscow, Russian Empire, in the early 1860s.", "Olivier is known for the creation of Olivier salad, also known as \"Russian salad\".", "The secret of the recipe was not disclosed until his death.", "Lucien Olivier died in Moscow at age 45 in 1883 and was buried at Vvedenskoye Cemetery.", "His tomb was lost until 2008.", "The current salad has numerous variations which are a mixture of every component Olivier used to add to his famous dish as well as ingredients that he did not use himself, with a mayonnaise dressing."], "mentions": [{"id": 0, "start": 56, "end": 60, "ref_url": "Chef", "ref_ids": ["279684"], "sent_idx": 0}, {"id": 1, "start": 64, "end": 71, "ref_url": "Belgian_people", "ref_ids": null, "sent_idx": 0}, {"id": 2, "start": 76, "end": 82, "ref_url": "French_people", "ref_ids": ["962731"], "sent_idx": 0}, {"id": 3, "start": 105, "end": 125, "ref_url": "Hermitage_restaurant", "ref_ids": null, "sent_idx": 0}, {"id": 4, "start": 23, "end": 29, "ref_url": "Moscow", "ref_ids": ["19004"], "sent_idx": 3}, {"id": 5, "start": 151, "end": 165, "ref_url": "Russian_Empire", "ref_ids": ["20611504"], "sent_idx": 0}, {"id": 6, "start": 37, "end": 50, "ref_url": "Olivier_salad", "ref_ids": ["34231242"], "sent_idx": 1}, {"id": 7, "start": 66, "end": 86, "ref_url": "Vvedenskoye_Cemetery", "ref_ids": ["10790776"], "sent_idx": 3}]} +{"id": "18272605", "title": "Kim Yoo-suk", "sentences": ["Kim Yoo-suk (, born 19 January 1982) is a South Korean pole vaulter.", "He finished eighth at the 2003 Universiade.", "He also competed at the 2004 Olympic Games and the 2005 World Championships without reaching the final.", "His personal best jump is 5.66 metres, achieved in July 2005 in Livermore."], "mentions": [{"id": 0, "start": 42, "end": 53, "ref_url": "South_Korea", "ref_ids": ["27019"], "sent_idx": 0}, {"id": 1, "start": 55, "end": 65, "ref_url": "Pole_vault", "ref_ids": ["24082"], "sent_idx": 0}, {"id": 2, "start": 26, "end": 42, "ref_url": "Athletics_at_the_2003_Summer_Universiade", "ref_ids": ["12305146"], "sent_idx": 1}, {"id": 3, "start": 24, "end": 42, "ref_url": "Athletics_at_the_2004_Summer_Olympics_–_Men's_pole_vault", "ref_ids": ["913319"], "sent_idx": 2}, {"id": 4, "start": 51, "end": 75, "ref_url": "2005_World_Championships_in_Athletics_-_Men's_Pole_Vault", "ref_ids": null, "sent_idx": 2}, {"id": 5, "start": 64, "end": 73, "ref_url": "Livermore,_California", "ref_ids": ["107337"], "sent_idx": 3}]} +{"id": "18272624", "title": "Coffee production in Vietnam", "sentences": ["Coffee production has been a major source of income for Vietnam since the early 20th century.", "First introduced by the French in 1857, the Vietnamese coffee industry developed through the plantation system, becoming a major economic force in the country.", "After an interruption during and immediately following the Vietnam War, production rose once again after Đổi mới economic reforms, making coffee second only to rice in value of agricultural products exported from Vietnam.", "However, despite being one of the world's most competitive producer of Robusta coffee, the coffee industry has become a site of contestations, both real and imagined, within the local and global spaces of interactions.", "In particular, these tensions are a response to the risks associated with the fluctuating coffee economy which then intensified social inequality and environmental degradation.", "Due to the perceived macro level failures on the part of the government to mitigate these risks, Vietnamese have, on the micro and group level, produce their own strategies to not only address these effects but to also actively participate in the discourses surrounding the production of knowledge relating to livelihoods."], "mentions": [{"id": 0, "start": 97, "end": 104, "ref_url": "Vietnam", "ref_ids": ["202354"], "sent_idx": 5}, {"id": 1, "start": 24, "end": 30, "ref_url": "France", "ref_ids": ["5843419"], "sent_idx": 1}, {"id": 2, "start": 93, "end": 103, "ref_url": "Plantation", "ref_ids": ["142473"], "sent_idx": 1}, {"id": 3, "start": 59, "end": 70, "ref_url": "Vietnam_War", "ref_ids": ["32611"], "sent_idx": 2}, {"id": 4, "start": 105, "end": 112, "ref_url": "Đổi_mới", "ref_ids": null, "sent_idx": 2}, {"id": 5, "start": 71, "end": 85, "ref_url": "Robusta_coffee", "ref_ids": ["42994187"], "sent_idx": 3}]} +{"id": "18272634", "title": "Sierra Leoneans in the United Kingdom", "sentences": ["Sierra Leoneans in the United Kingdom are citizens or residents of the United Kingdom who are of Sierra Leonean descent.", "In 2001, there were 17,048 Sierra Leonean-born residents of the UK."], "mentions": [{"id": 0, "start": 23, "end": 37, "ref_url": "United_Kingdom", "ref_ids": ["31717"], "sent_idx": 0}, {"id": 1, "start": 27, "end": 41, "ref_url": "Sierra_Leonean", "ref_ids": null, "sent_idx": 1}]} +{"id": "18272655", "title": "Rebecca Wing", "sentences": ["Rebecca Wing (born 15 July 1992) is a British artistic gymnast from Farnborough, Hampshire.", "She was a member of the British 2008 Summer Olympics artistic gymnastics team and a member of the British team that came 6th at the 2007 World Championships in Stuttgart.", "She studied at Cove Secondary School and Farnborough Sixth Form College where she achieved straight A grades in her A Levels of Maths, Psychology, Biology and PE.", "After originally obtaining a place at the University of Exeter, Wing decided to take a gap year before obtaining a scholarship to Stanford University to compete for their NCAA gymnastics team as Class of 2015."], "mentions": [{"id": 0, "start": 53, "end": 69, "ref_url": "Artistic_gymnastics", "ref_ids": ["231482"], "sent_idx": 1}, {"id": 1, "start": 68, "end": 90, "ref_url": "Farnborough,_Hampshire", "ref_ids": ["232477"], "sent_idx": 0}, {"id": 2, "start": 32, "end": 52, "ref_url": "2008_Summer_Olympics", "ref_ids": ["77745"], "sent_idx": 1}, {"id": 3, "start": 132, "end": 156, "ref_url": "2007_World_Artistic_Gymnastics_Championships", "ref_ids": ["13062605"], "sent_idx": 1}, {"id": 4, "start": 160, "end": 169, "ref_url": "Stuttgart", "ref_ids": ["28565"], "sent_idx": 1}, {"id": 5, "start": 42, "end": 62, "ref_url": "University_of_Exeter", "ref_ids": ["33719893"], "sent_idx": 3}, {"id": 6, "start": 130, "end": 149, "ref_url": "Stanford_University", "ref_ids": ["26977"], "sent_idx": 3}]} +{"id": "18272669", "title": "Munson Line", "sentences": ["The Munson Steamship Line, frequently shortened to the Munson Line, was an American steamship company that operated in the Atlantic Ocean primarily between U.S. ports and ports in the Caribbean and South America.", "The line was founded in 1899 as a freight line, added passenger service in 1919, and went out of business in 1937."], "mentions": [{"id": 0, "start": 75, "end": 83, "ref_url": "United_States", "ref_ids": ["3434750"], "sent_idx": 0}, {"id": 1, "start": 184, "end": 193, "ref_url": "Caribbean", "ref_ids": ["18956035"], "sent_idx": 0}, {"id": 2, "start": 198, "end": 211, "ref_url": "South_America", "ref_ids": ["26769"], "sent_idx": 0}]} +{"id": "18272676", "title": "Bambuco Bridge", "sentences": ["The Bambuco Bridge was a temporary outdoor sculpture in the form of a simple suspension bridge spanning the River Tyne, England, made entirely from bamboo wood.", "The public art was designed and built for the \"SummerTyne\" festival, part of the NewcastleGateshead initiative."], "mentions": [{"id": 0, "start": 43, "end": 52, "ref_url": "Sculpture", "ref_ids": ["26714"], "sent_idx": 0}, {"id": 1, "start": 70, "end": 94, "ref_url": "Simple_suspension_bridge", "ref_ids": ["1480565"], "sent_idx": 0}, {"id": 2, "start": 108, "end": 118, "ref_url": "River_Tyne", "ref_ids": ["145607"], "sent_idx": 0}, {"id": 3, "start": 120, "end": 127, "ref_url": "England", "ref_ids": ["9316"], "sent_idx": 0}, {"id": 4, "start": 148, "end": 159, "ref_url": "Bamboo_wood", "ref_ids": null, "sent_idx": 0}, {"id": 5, "start": 4, "end": 14, "ref_url": "Public_art", "ref_ids": ["318901"], "sent_idx": 1}, {"id": 6, "start": 81, "end": 99, "ref_url": "NewcastleGateshead", "ref_ids": ["10426006"], "sent_idx": 1}]} diff --git a/docs/.gitignore b/docs/.gitignore deleted file mode 100644 index 66eb8db2..00000000 --- a/docs/.gitignore +++ /dev/null @@ -1,9 +0,0 @@ -/.quarto/ -_sidebar.yml -reference/ -^reference/index.qmd -_site -*.quarto_ipynb -*_files/execute-results -*_files/figure-html -objects.json \ No newline at end of file diff --git a/docs/_extensions/machow/interlinks/.gitignore b/docs/_extensions/machow/interlinks/.gitignore deleted file mode 100644 index 5a1bf0b4..00000000 --- a/docs/_extensions/machow/interlinks/.gitignore +++ /dev/null @@ -1,3 +0,0 @@ -*.html -*.pdf -*_files/ diff --git a/docs/_extensions/machow/interlinks/_extension.yml b/docs/_extensions/machow/interlinks/_extension.yml deleted file mode 100644 index c8a81213..00000000 --- a/docs/_extensions/machow/interlinks/_extension.yml +++ /dev/null @@ -1,7 +0,0 @@ -title: Interlinks -author: Michael Chow -version: 1.1.0 -quarto-required: ">=1.2.0" -contributes: - filters: - - interlinks.lua diff --git a/docs/_extensions/machow/interlinks/interlinks.lua b/docs/_extensions/machow/interlinks/interlinks.lua deleted file mode 100644 index 47aa61fa..00000000 --- a/docs/_extensions/machow/interlinks/interlinks.lua +++ /dev/null @@ -1,254 +0,0 @@ -local function read_inv_text(filename) - -- read file - local file = io.open(filename, "r") - if file == nil then - return nil - end - local str = file:read("a") - file:close() - - - local project = str:match("# Project: (%S+)") - local version = str:match("# Version: (%S+)") - - local data = {project = project, version = version, items = {}} - - local ptn_data = - "^" .. - "(.-)%s+" .. -- name - "([%S:]-):" .. -- domain - "([%S]+)%s+" .. -- role - "(%-?%d+)%s+" .. -- priority - "(%S*)%s+" .. -- uri - "(.-)\r?$" -- dispname - - - -- Iterate through each line in the file content - for line in str:gmatch("[^\r\n]+") do - if not line:match("^#") then - -- Match each line against the pattern - local name, domain, role, priority, uri, dispName = line:match(ptn_data) - - -- if name is nil, raise an error - if name == nil then - error("Error parsing line: " .. line) - end - - data.items[#data.items + 1] = { - name = name, - domain = domain, - role = role, - priority = priority, - uri = uri, - dispName = dispName - } - end - end - return data -end - -local function read_json(filename) - - local file = io.open(filename, "r") - if file == nil then - return nil - end - local str = file:read("a") - file:close() - - local decoded = quarto.json.decode(str) - return decoded -end - -local function read_inv_text_or_json(base_name) - local file = io.open(base_name .. ".txt", "r") - if file then - -- TODO: refactors so we don't just close the file immediately - io.close(file) - json = read_inv_text(base_name .. ".txt") - - else - json = read_json(base_name .. ".json") - end - - return json -end - -local inventory = {} - -local function lookup(search_object) - - local results = {} - for _, inv in ipairs(inventory) do - for _, item in ipairs(inv.items) do - -- e.g. :external+<inv_name>:<domain>:<role>:`<name>` - if item.inv_name and item.inv_name ~= search_object.inv_name then - goto continue - end - - if item.name ~= search_object.name then - goto continue - end - - if search_object.role and item.role ~= search_object.role then - goto continue - end - - if search_object.domain and item.domain ~= search_object.domain then - goto continue - else - if search_object.domain or item.domain == "py" then - table.insert(results, item) - end - - goto continue - end - - ::continue:: - end - end - - if #results == 1 then - return results[1] - end - if #results > 1 then - quarto.log.warning("Found multiple matches for " .. search_object.name .. ", using the first match.") - return results[1] - end - if #results == 0 then - quarto.log.warning("Found no matches for object:\n", search_object) - end - - return nil -end - -local function mysplit (inputstr, sep) - if sep == nil then - sep = "%s" - end - local t={} - for str in string.gmatch(inputstr, "([^"..sep.."]+)") do - table.insert(t, str) - end - return t -end - -local function normalize_role(role) - if role == "func" then - return "function" - end - return role -end - -local function build_search_object(str) - local starts_with_colon = str:sub(1, 1) == ":" - local search = {} - if starts_with_colon then - local t = mysplit(str, ":") - if #t == 2 then - -- e.g. :py:func:`my_func` - search.role = normalize_role(t[1]) - search.name = t[2]:match("%%60(.*)%%60") - elseif #t == 3 then - -- e.g. :py:func:`my_func` - search.domain = t[1] - search.role = normalize_role(t[2]) - search.name = t[3]:match("%%60(.*)%%60") - elseif #t == 4 then - -- e.g. :ext+inv:py:func:`my_func` - search.external = true - - search.inv_name = t[1]:match("external%+(.*)") - search.domain = t[2] - search.role = normalize_role(t[3]) - search.name = t[4]:match("%%60(.*)%%60") - else - quarto.log.warning("couldn't parse this link: " .. str) - return {} - end - else - search.name = str:match("%%60(.*)%%60") - end - - if search.name == nil then - quarto.log.warning("couldn't parse this link: " .. str) - return {} - end - - if search.name:sub(1, 1) == "~" then - search.shortened = true - search.name = search.name:sub(2, -1) - end - return search -end - -local function report_broken_link(link, search_object, replacement) - -- TODO: how to unescape html elements like [? - return pandoc.Code(pandoc.utils.stringify(link.content)) -end - -function Link(link) - -- do not process regular links ---- - if not link.target:match("%%60") then - return link - end - - -- lookup item ---- - local search = build_search_object(link.target) - local item = lookup(search) - - -- determine replacement, used if no link text specified ---- - local original_text = pandoc.utils.stringify(link.content) - local replacement = search.name - if search.shortened then - local t = mysplit(search.name, ".") - replacement = t[#t] - end - - -- set link text ---- - if original_text == "" and replacement ~= nil then - link.content = pandoc.Code(replacement) - end - - -- report broken links ---- - if item == nil then - return report_broken_link(link, search) - end - link.target = item.uri:gsub("%$$", search.name) - - - return link -end - -local function fixup_json(json, prefix) - for _, item in ipairs(json.items) do - item.uri = prefix .. item.uri - end - table.insert(inventory, json) -end - -return { - { - Meta = function(meta) - local json - local prefix - if meta.interlinks and meta.interlinks.sources then - for k, v in pairs(meta.interlinks.sources) do - local base_name = quarto.project.offset .. "/_inv/" .. k .. "_objects" - json = read_inv_text_or_json(base_name) - prefix = pandoc.utils.stringify(v.url) - if json ~= nil then - fixup_json(json, prefix) - end - end - end - json = read_inv_text_or_json(quarto.project.offset .. "/objects") - if json ~= nil then - fixup_json(json, "/") - end - end - }, - { - Link = Link - } -} diff --git a/docs/_quarto.yml b/docs/_quarto.yml deleted file mode 100644 index c3d1189b..00000000 --- a/docs/_quarto.yml +++ /dev/null @@ -1,97 +0,0 @@ -project: - type: website - -website: - title: Graph RAG - twitter-card: true - navbar: - background: primary - search: true - left: - - text: "Home" - href: index.qmd - - posts/index.qmd - - getting-started.qmd - - examples/index.qmd - - reference/index.qmd - right: - - icon: github - href: https://github.com/datastax/graph-rag - - icon: rss - href: index.xml - page-footer: - border: true - left: "© Copyright 2025, Graph RAG Contributors." - repo-url: https://github.com/datastax/graph-rag/ - repo-subdir: docs - repo-actions: - - edit - - source - site-url: https://datastax.github.io/graph-rag - - -metadata-files: - - _sidebar.yml - -format: - html: - theme: pulse - toc: true - anchor-sections: true - css: - - reference/_styles-quartodoc.css - - link-external-icon: true - link-external-newwindow: true - link-external-filter: '^(?:http:|https:)\/\/datastax.github.io\/graph-rag' - -quartodoc: - style: pkgdown - dir: reference - out_index: _api_index.qmd - css: reference/_styles-quartodoc.css - sidebar: - file: "_sidebar.yml" - style: docked - search: true - collapse-level: 2 - contents: - - text: "Home" - href: index.qmd - - href: getting-started.qmd - - examples/index.qmd - - section: "Reference" - contents: - - "{{ contents }}" - - package: langchain_graph_retriever - sections: - - title: LangChain Graph Retriever - desc: Graph `Retriever` implementation for LangChain `VectorStore`s. - contents: - - GraphRetriever - - title: Retrieval Strategies - desc: Strategies for use with the retrieval. - package: langchain_graph_retriever.strategies - contents: - - Strategy - - Eager - - Mmr - - title: Adapters - desc: Adapters for LangChain `VectorStore`s supporting Graph traversal. - package: langchain_graph_retriever.adapters - contents: - - Adapter - - astra.AstraAdapter - - cassandra.CassandraAdapter - - chroma.ChromaAdapter - - in_memory.InMemoryAdapter - - open_search.OpenSearchAdapter - -filters: - - interlinks - -interlinks: - sources: - python: - url: https://docs.python.org/3/ \ No newline at end of file diff --git a/packages/langchain-graph-retriever/src/langchain_graph_retriever/document_transformers/__init__.py b/docs/blog/index.md similarity index 100% rename from packages/langchain-graph-retriever/src/langchain_graph_retriever/document_transformers/__init__.py rename to docs/blog/index.md diff --git a/docs/posts/introducing-graph-rag.qmd b/docs/blog/posts/introducing-graph-rag.md similarity index 96% rename from docs/posts/introducing-graph-rag.qmd rename to docs/blog/posts/introducing-graph-rag.md index ffedb964..9d420053 100644 --- a/docs/posts/introducing-graph-rag.qmd +++ b/docs/blog/posts/introducing-graph-rag.md @@ -1,8 +1,9 @@ --- title: "Introducing Graph Retrievers: Smarter, Simpler Document Graphs for Vector Stores" +slug: introducing-graph-rag description: "Announcing the first release of Graph Retrievers, a powerful graph traversal retriever for your vector store!" author: "Ben Chambers" -date: "1/27/2025" +date: 2025-01-31 categories: - langchain - news @@ -12,6 +13,8 @@ We're excited to announce the release of **Graph Retrievers**, a powerful new to With Graph Retrievers, you can dynamically explore relationships between documents using metadata fields—no need for complex preprocessing or building an entire knowledge graph upfront. +<!-- more --> + ## A Brief History: Where We Started We originally developed [`GraphVectorStore`](https://www.datastax.com/blog/knowledge-graphs-for-rag-without-a-graphdb) to efficiently handle structured relationships between documents. This approach proved especially useful for [reducing costs in knowledge graph creation](https://hackernoon.com/how-to-save-$70k-building-a-knowledge-graph-for-rag-on-6m-wikipedia-pages). By lazily traversing metadata instead of building a full graph, we made real-time retrieval more efficient and cost-effective. @@ -65,7 +68,7 @@ Assuming you already have a LangChain project using a Vector Store, all you need # Define your graph traversal traversal = GraphRetriever( store=vector_store, - edges=[("mentions", "id"), "entities"], + edges=[("mentions", "id"), ("entities", "entites")], ) # Query the graph @@ -80,4 +83,4 @@ Reflecting these improvements, we've moved the implementation to a new [package] - **Documentation**: Learn how to get started in the [official documentation](https://datastax.github.io/graph-rag). - **Join the Community**: Share feedback or contribute by opening an issue or pull request in the [GitHub repo](https://github.com/datastax/graph-rag). -Give Graph Retrievers a try today and take your retrieval-augmented generation (RAG) workflows to the next level. We can’t wait to hear what you build! \ No newline at end of file +Give Graph Retrievers a try today and take your retrieval-augmented generation (RAG) workflows to the next level. We can’t wait to hear what you build! diff --git a/docs/data/historical_figures/ada_lovelace.html b/docs/data/historical_figures/ada_lovelace.html new file mode 100644 index 00000000..eec2055b --- /dev/null +++ b/docs/data/historical_figures/ada_lovelace.html @@ -0,0 +1,25 @@ +<!DOCTYPE html> +<html lang="en"> + +<head> + <meta charset="UTF-8"> + <title>Ada Lovelace</title> +</head> + +<body> + <h1>Ada Lovelace</h1> + <p>Augusta Ada King, Countess of Lovelace, was an English mathematician and writer, chiefly known for her work on + Charles Babbage's proposed mechanical general-purpose computer, the Analytical Engine. She was the first to + recognise that the machine had applications beyond pure calculation, and to have published the first algorithm + intended to be carried out by such a machine. As a result, she is often regarded as the first computer + programmer.</p> + <p>Lovelace's notes on the Analytical Engine include what is recognized as the first algorithm intended to be + processed by a machine. Her contributions to the field of computing were not fully recognized until the 20th + century.</p> + <p>Ada Lovelace was born on 10 December 1815 in London, England. She died on 27 November 1852 in Marylebone, London, + England.</p> + <p>Related figures: <a href="charles_babbage.html">Charles Babbage</a>, <a href="alan_turing.html">Alan Turing</a> + </p> +</body> + +</html> \ No newline at end of file diff --git a/docs/data/historical_figures/alan_turing.html b/docs/data/historical_figures/alan_turing.html new file mode 100644 index 00000000..c657257c --- /dev/null +++ b/docs/data/historical_figures/alan_turing.html @@ -0,0 +1,24 @@ +<!DOCTYPE html> +<html lang="en"> + +<head> + <meta charset="UTF-8"> + <title>Alan Turing</title> +</head> + +<body> + <h1>Alan Turing</h1> + <p>Alan Turing was an English mathematician, computer scientist, logician, cryptanalyst, philosopher, and + theoretical biologist. Turing was highly influential in the development of theoretical computer science, + providing a formalisation of the concepts of algorithm and computation with the Turing machine, which can be + considered a model of a general-purpose computer.</p> + <p>Turing is widely considered to be the father of theoretical computer science and artificial intelligence. During + World War II, Turing played a crucial role in cracking intercepted coded messages that enabled the Allies to + defeat the Nazis in many crucial engagements.</p> + <p>Alan Turing was born on 23 June 1912 in Maida Vale, London, England. He died on 7 June 1954 in Wilmslow, + Cheshire, England.</p> + <p>Related figures: <a href="charles_babbage.html">Charles Babbage</a>, <a href="ada_lovelace.html">Ada Lovelace</a> + </p> +</body> + +</html> \ No newline at end of file diff --git a/docs/data/historical_figures/albert_einstein.html b/docs/data/historical_figures/albert_einstein.html new file mode 100644 index 00000000..37162f01 --- /dev/null +++ b/docs/data/historical_figures/albert_einstein.html @@ -0,0 +1,23 @@ +<!DOCTYPE html> +<html lang="en"> + +<head> + <meta charset="UTF-8"> + <title>Albert Einstein</title> +</head> + +<body> + <h1>Albert Einstein</h1> + <p>Albert Einstein was a theoretical physicist who developed the theory of relativity, one of the two pillars of + modern physics (alongside quantum mechanics). His work is also known for its influence on the philosophy of + science.</p> + <p>Einstein is best known for his mass–energy equivalence formula E = mc², which has been dubbed "the world's most + famous equation". He received the 1921 Nobel Prize in Physics "for his services to theoretical physics, and + especially for his discovery of the law of the photoelectric effect", a pivotal step in the development of + quantum theory.</p> + <p>Einstein was born in the Kingdom of Württemberg in the German Empire on 14 March 1879. He died on 18 April 1955 + in Princeton, New Jersey, United States.</p> + <p>Related figures: <a href="isaac_newton.html">Isaac Newton</a>, <a href="nikola_tesla.html">Nikola Tesla</a></p> +</body> + +</html> \ No newline at end of file diff --git a/docs/data/historical_figures/alexander_graham_bell.html b/docs/data/historical_figures/alexander_graham_bell.html new file mode 100644 index 00000000..5a167b11 --- /dev/null +++ b/docs/data/historical_figures/alexander_graham_bell.html @@ -0,0 +1,22 @@ +<!DOCTYPE html> +<html lang="en"> + +<head> + <meta charset="UTF-8"> + <title>Alexander Graham Bell</title> +</head> + +<body> + <h1>Alexander Graham Bell</h1> + <p>Alexander Graham Bell was a Scottish-born inventor, scientist, and engineer who is credited with inventing and + patenting the first practical telephone. He also co-founded the American Telephone and Telegraph Company (AT&T) + in 1885.</p> + <p>Bell's research on hearing and speech further led him to experiment with hearing devices, which eventually + culminated in the invention of the telephone. His work profoundly impacted communication technology and laid the + foundation for modern telecommunications.</p> + <p>Bell was born on 3 March 1847 in Edinburgh, Scotland. He died on 2 August 1922 in Baddeck, Nova Scotia, Canada. + </p> + <p>Related figures: <a href="thomas_edison.html">Thomas Edison</a>, <a href="nikola_tesla.html">Nikola Tesla</a></p> +</body> + +</html> \ No newline at end of file diff --git a/docs/data/historical_figures/alexander_the_great.html b/docs/data/historical_figures/alexander_the_great.html new file mode 100644 index 00000000..f495cfe8 --- /dev/null +++ b/docs/data/historical_figures/alexander_the_great.html @@ -0,0 +1,24 @@ +<!DOCTYPE html> +<html lang="en"> + +<head> + <meta charset="UTF-8"> + <title>Alexander the Great</title> +</head> + +<body> + <h1>Alexander the Great</h1> + <p>Alexander III of Macedon, commonly known as Alexander the Great, was a king of the ancient Greek kingdom of + Macedon and a member of the Argead dynasty. He was born in Pella in 356 BC and succeeded his father Philip II to + the throne at the age of 20. He spent most of his ruling years on an unprecedented military campaign through + Asia and northeast Africa, and by the age of 30, he had created one of the largest empires of the ancient world, + stretching from Greece to northwestern India.</p> + <p>Alexander was undefeated in battle and is widely considered one of history's most successful military commanders. + His campaigns greatly increased the cultural interactions and exchanges between the East and West, leading to + the Hellenistic period.</p> + <p>Alexander was born on 20 July 356 BC in Pella, Macedon. He died on 10/11 June 323 BC in the Palace of + Nebuchadnezzar II, Babylon, Mesopotamia (modern-day Iraq).</p> + <p>Related figures: <a href="aristotle.html">Aristotle</a>, <a href="philip_ii.html">Philip II of Macedon</a></p> +</body> + +</html> \ No newline at end of file diff --git a/docs/data/historical_figures/alfred_russel_wallace.html b/docs/data/historical_figures/alfred_russel_wallace.html new file mode 100644 index 00000000..4173e0e6 --- /dev/null +++ b/docs/data/historical_figures/alfred_russel_wallace.html @@ -0,0 +1,24 @@ +<!DOCTYPE html> +<html lang="en"> + +<head> + <meta charset="UTF-8"> + <title>Alfred Russel Wallace</title> +</head> + +<body> + <h1>Alfred Russel Wallace</h1> + <p>Alfred Russel Wallace was a British naturalist, explorer, geographer, anthropologist, and biologist. He is best + known for independently conceiving the theory of evolution through natural selection. His paper on the subject + was jointly published with some of Charles Darwin's writings in 1858, which prompted Darwin to publish his own + theory.</p> + <p>Wallace was a prolific writer and a social activist. He was one of the leading evolutionary thinkers of the 19th + century and made significant contributions to the field of biogeography. He is sometimes called the "father of + biogeography".</p> + <p>Alfred Russel Wallace was born on 8 January 1823 in Llanbadoc, Monmouthshire, Wales. He died on 7 November 1913 + in Broadstone, Dorset, England.</p> + <p>Related figures: <a href="charles_darwin.html">Charles Darwin</a>, <a href="thomas_huxley.html">Thomas Huxley</a> + </p> +</body> + +</html> \ No newline at end of file diff --git a/docs/data/historical_figures/aristotle.html b/docs/data/historical_figures/aristotle.html new file mode 100644 index 00000000..1c43c852 --- /dev/null +++ b/docs/data/historical_figures/aristotle.html @@ -0,0 +1,23 @@ +<!DOCTYPE html> +<html lang="en"> + +<head> + <meta charset="UTF-8"> + <title>Aristotle</title> +</head> + +<body> + <h1>Aristotle</h1> + <p>Aristotle was a Greek philosopher and polymath during the Classical period in Ancient Greece. Taught by Plato, he + was the founder of the Lyceum, the Peripatetic school of philosophy, and the Aristotelian tradition. His + writings cover many subjects including physics, biology, zoology, metaphysics, logic, ethics, aesthetics, + poetry, theatre, music, rhetoric, psychology, linguistics, economics, politics, and government.</p> + <p>Aristotle provided a complex synthesis of the various philosophies existing prior to him. It was above all from + his teachings that the West inherited its intellectual lexicon, as well as problems and methods of inquiry. As a + result, his philosophy has exerted a unique influence on almost every form of knowledge in the West and it + continues to be a subject of contemporary philosophical discussion.</p> + <p>Aristotle was born in 384 BC in Stagira, Chalcidice, Greece. He died in 322 BC in Euboea, Greece.</p> + <p>Related figures: <a href="plato.html">Plato</a>, <a href="alexander_the_great.html">Alexander the Great</a></p> +</body> + +</html> \ No newline at end of file diff --git a/docs/data/historical_figures/charles_babbage.html b/docs/data/historical_figures/charles_babbage.html new file mode 100644 index 00000000..f2b52a4b --- /dev/null +++ b/docs/data/historical_figures/charles_babbage.html @@ -0,0 +1,21 @@ +<!DOCTYPE html> +<html lang="en"> + +<head> + <meta charset="UTF-8"> + <title>Charles Babbage</title> +</head> + +<body> + <h1>Charles Babbage</h1> + <p>Charles Babbage was an English polymath. A mathematician, philosopher, inventor, and mechanical engineer, Babbage + originated the concept of a digital programmable computer. He is considered by some to be "the father of the + computer".</p> + <p>Babbage is credited with inventing the first mechanical computer that eventually led to more complex designs. His + Analytical Engine, although never completed, is considered a precursor to modern computers.</p> + <p>Charles Babbage was born on 26 December 1791 in London, England. He died on 18 October 1871 in London, England. + </p> + <p>Related figures: <a href="ada_lovelace.html">Ada Lovelace</a>, <a href="alan_turing.html">Alan Turing</a></p> +</body> + +</html> \ No newline at end of file diff --git a/docs/data/historical_figures/charles_darwin.html b/docs/data/historical_figures/charles_darwin.html new file mode 100644 index 00000000..050fa072 --- /dev/null +++ b/docs/data/historical_figures/charles_darwin.html @@ -0,0 +1,23 @@ +<!DOCTYPE html> +<html lang="en"> + +<head> + <meta charset="UTF-8"> + <title>Charles Darwin</title> +</head> + +<body> + <h1>Charles Darwin</h1> + <p>Charles Robert Darwin was an English naturalist, geologist, and biologist, best known for his contributions to + the science of evolution. His proposition that all species of life have descended from common ancestors is now + widely accepted and considered a fundamental concept in science.</p> + <p>In a joint publication with Alfred Russel Wallace, he introduced his scientific theory that this branching + pattern of evolution resulted from a process that he called natural selection, in which the struggle for + existence has a similar effect to the artificial selection involved in selective breeding.</p> + <p>Darwin was born on 12 February 1809 in Shrewsbury, Shropshire, England. He died on 19 April 1882 in Down, Kent, + England.</p> + <p>Related figures: <a href="alfred_russel_wallace.html">Alfred Russel Wallace</a>, <a + href="thomas_huxley.html">Thomas Huxley</a></p> +</body> + +</html> \ No newline at end of file diff --git a/docs/data/historical_figures/galileo_galilei.html b/docs/data/historical_figures/galileo_galilei.html new file mode 100644 index 00000000..0f3442cd --- /dev/null +++ b/docs/data/historical_figures/galileo_galilei.html @@ -0,0 +1,23 @@ +<!DOCTYPE html> +<html lang="en"> + +<head> + <meta charset="UTF-8"> + <title>Galileo Galilei</title> +</head> + +<body> + <h1>Galileo Galilei</h1> + <p>Galileo Galilei was an Italian astronomer, physicist, and engineer, sometimes described as a polymath. Galileo + has been called the "father of observational astronomy", the "father of modern physics", the "father of the + scientific method", and the "father of modern science".</p> + <p>His contributions to observational astronomy include the telescopic confirmation of the phases of Venus, the + discovery of the four largest satellites of Jupiter, and the observation and analysis of sunspots. Galileo also + worked in applied science and technology, inventing an improved military compass and other instruments.</p> + <p>Galileo was born on 15 February 1564 in Pisa, Duchy of Florence (present-day Italy). He died on 8 January 1642 in + Arcetri, Grand Duchy of Tuscany (present-day Italy).</p> + <p>Related figures: <a href="isaac_newton.html">Isaac Newton</a>, <a href="nicolaus_copernicus.html">Nicolaus + Copernicus</a></p> +</body> + +</html> \ No newline at end of file diff --git a/docs/data/historical_figures/index.html b/docs/data/historical_figures/index.html new file mode 100644 index 00000000..9d144c5b --- /dev/null +++ b/docs/data/historical_figures/index.html @@ -0,0 +1,38 @@ +<!DOCTYPE html> +<html lang="en"> + +<head> + <meta charset="UTF-8"> + <title>Historical Figures</title> + <link rel="stylesheet" href="css/styles.css"> +</head> + +<body> + <h1>Historical Figures</h1> + <ul> + <li><a href="albert_einstein.html">Albert Einstein</a></li> + <li><a href="isaac_newton.html">Isaac Newton</a></li> + <li><a href="marie_curie.html">Marie Curie</a></li> + <li><a href="nikola_tesla.html">Nikola Tesla</a></li> + <li><a href="leonardo_da_vinci.html">Leonardo da Vinci</a></li> + <li><a href="galileo_galilei.html">Galileo Galilei</a></li> + <li><a href="charles_darwin.html">Charles Darwin</a></li> + <li><a href="ada_lovelace.html">Ada Lovelace</a></li> + <li><a href="aristotle.html">Aristotle</a></li> + <li><a href="alexander_the_great.html">Alexander the Great</a></li> + <li><a href="nicolaus_copernicus.html">Nicolaus Copernicus</a></li> + <li><a href="thomas_edison.html">Thomas Edison</a></li> + <li><a href="alexander_graham_bell.html">Alexander Graham Bell</a></li> + <li><a href="michelangelo.html">Michelangelo</a></li> + <li><a href="raphael.html">Raphael</a></li> + <li><a href="plato.html">Plato</a></li> + <li><a href="philip_ii.html">Philip II of Macedon</a></li> + <li><a href="charles_babbage.html">Charles Babbage</a></li> + <li><a href="alan_turing.html">Alan Turing</a></li> + <li><a href="alfred_russel_wallace.html">Alfred Russel Wallace</a></li> + <li><a href="thomas_huxley.html">Thomas Huxley</a></li> + </ul> + <script src="js/scripts.js"></script> +</body> + +</html> \ No newline at end of file diff --git a/docs/data/historical_figures/isaac_newton.html b/docs/data/historical_figures/isaac_newton.html new file mode 100644 index 00000000..5930d065 --- /dev/null +++ b/docs/data/historical_figures/isaac_newton.html @@ -0,0 +1,22 @@ +<!DOCTYPE html> +<html lang="en"> + +<head> + <meta charset="UTF-8"> + <title>Isaac Newton</title> +</head> + +<body> + <h1>Isaac Newton</h1> + <p>Sir Isaac Newton was an English mathematician, physicist, astronomer, and author who is widely recognised as one + of the most influential scientists of all time and a key figure in the scientific revolution.</p> + <p>His book "Philosophiæ Naturalis Principia Mathematica" (Mathematical Principles of Natural Philosophy), first + published in 1687, laid the foundations of classical mechanics. Newton also made seminal contributions to optics + and shares credit with Gottfried Wilhelm Leibniz for developing the infinitesimal calculus.</p> + <p>Newton was born on 25 December 1642 in Woolsthorpe, Lincolnshire, England. He died on 20 March 1726/27 in + Kensington, Middlesex, England.</p> + <p>Related figures: <a href="albert_einstein.html">Albert Einstein</a>, <a href="galileo_galilei.html">Galileo + Galilei</a></p> +</body> + +</html> \ No newline at end of file diff --git a/docs/data/historical_figures/leonardo_da_vinci.html b/docs/data/historical_figures/leonardo_da_vinci.html new file mode 100644 index 00000000..6b493d19 --- /dev/null +++ b/docs/data/historical_figures/leonardo_da_vinci.html @@ -0,0 +1,22 @@ +<!DOCTYPE html> +<html lang="en"> + +<head> + <meta charset="UTF-8"> + <title>Leonardo da Vinci</title> +</head> + +<body> + <h1>Leonardo da Vinci</h1> + <p>Leonardo da Vinci was an Italian polymath of the Renaissance whose areas of interest included invention, drawing, + painting, sculpture, architecture, science, music, mathematics, engineering, literature, anatomy, geology, + astronomy, botany, paleontology, and cartography.</p> + <p>He is widely considered one of the greatest painters of all time, despite fewer than 25 of his paintings having + survived. His most famous works include the Mona Lisa and The Last Supper. Leonardo's notebooks, which contain + drawings, scientific diagrams, and his thoughts on various subjects, are also highly regarded.</p> + <p>Leonardo was born on 15 April 1452 in Vinci, Republic of Florence (present-day Italy). He died on 2 May 1519 in + Amboise, Kingdom of France.</p> + <p>Related figures: <a href="michelangelo.html">Michelangelo</a>, <a href="raphael.html">Raphael</a></p> +</body> + +</html> \ No newline at end of file diff --git a/docs/data/historical_figures/marie_curie.html b/docs/data/historical_figures/marie_curie.html new file mode 100644 index 00000000..219a55fe --- /dev/null +++ b/docs/data/historical_figures/marie_curie.html @@ -0,0 +1,22 @@ +<!DOCTYPE html> +<html lang="en"> + +<head> + <meta charset="UTF-8"> + <title>Marie Curie</title> +</head> + +<body> + <h1>Marie Curie</h1> + <p>Marie Skłodowska Curie was a Polish and naturalized-French physicist and chemist who conducted pioneering + research on radioactivity. She was the first woman to win a Nobel Prize, the first person and only woman to win + the Nobel Prize twice, and the only person to win the Nobel Prize in two different scientific fields.</p> + <p>Her achievements included the development of the theory of radioactivity (a term that she coined), techniques for + isolating radioactive isotopes, and the discovery of two elements, polonium and radium. Under her direction, the + world's first studies were conducted into the treatment of neoplasms (cancers) using radioactive isotopes.</p> + <p>Curie was born on 7 November 1867 in Warsaw, Poland. She died on 4 July 1934 in Passy, Haute-Savoie, France.</p> + <p>Related figures: <a href="albert_einstein.html">Albert Einstein</a>, <a href="nikola_tesla.html">Nikola Tesla</a> + </p> +</body> + +</html> \ No newline at end of file diff --git a/docs/data/historical_figures/michelangelo.html b/docs/data/historical_figures/michelangelo.html new file mode 100644 index 00000000..f73aa09a --- /dev/null +++ b/docs/data/historical_figures/michelangelo.html @@ -0,0 +1,22 @@ +<!DOCTYPE html> +<html lang="en"> + +<head> + <meta charset="UTF-8"> + <title>Michelangelo</title> +</head> + +<body> + <h1>Michelangelo</h1> + <p>Michelangelo di Lodovico Buonarroti Simoni, commonly known as Michelangelo, was an Italian sculptor, painter, + architect, and poet of the High Renaissance. He is widely considered one of the greatest artists of all time and + is known for his works such as the statue of David, the Sistine Chapel ceiling, and The Last Judgment.</p> + <p>Michelangelo's influence on the development of Western art is unparalleled. His work demonstrated a blend of + psychological insight, physical realism, and intensity never before seen. His contributions to the fields of + sculpture, painting, and architecture have left a lasting legacy.</p> + <p>Michelangelo was born on 6 March 1475 in Caprese, Republic of Florence (present-day Italy). He died on 18 + February 1564 in Rome, Papal States (present-day Italy).</p> + <p>Related figures: <a href="leonardo_da_vinci.html">Leonardo da Vinci</a>, <a href="raphael.html">Raphael</a></p> +</body> + +</html> \ No newline at end of file diff --git a/docs/data/historical_figures/nicolaus_copernicus.html b/docs/data/historical_figures/nicolaus_copernicus.html new file mode 100644 index 00000000..3b22d3ce --- /dev/null +++ b/docs/data/historical_figures/nicolaus_copernicus.html @@ -0,0 +1,24 @@ +<!DOCTYPE html> +<html lang="en"> + +<head> + <meta charset="UTF-8"> + <title>Nicolaus Copernicus</title> +</head> + +<body> + <h1>Nicolaus Copernicus</h1> + <p>Nicolaus Copernicus was a Renaissance-era mathematician and astronomer who formulated a model of the universe + that placed the Sun rather than Earth at its center. This heliocentric model was a major milestone in the + history of science and marked the beginning of the Copernican Revolution.</p> + <p>Copernicus's work "De revolutionibus orbium coelestium" (On the Revolutions of the Celestial Spheres), published + just before his death in 1543, is considered one of the most important works in the history of Western science. + It challenged the geocentric model that had dominated for centuries and laid the groundwork for future + astronomers like Galileo and Kepler.</p> + <p>Nicolaus Copernicus was born on 19 February 1473 in Toruń, Poland. He died on 24 May 1543 in Frombork, Poland. + </p> + <p>Related figures: <a href="galileo_galilei.html">Galileo Galilei</a>, <a href="isaac_newton.html">Isaac Newton</a> + </p> +</body> + +</html> \ No newline at end of file diff --git a/docs/data/historical_figures/nikola_tesla.html b/docs/data/historical_figures/nikola_tesla.html new file mode 100644 index 00000000..9fccd2da --- /dev/null +++ b/docs/data/historical_figures/nikola_tesla.html @@ -0,0 +1,22 @@ +<!DOCTYPE html> +<html lang="en"> + +<head> + <meta charset="UTF-8"> + <title>Nikola Tesla</title> +</head> + +<body> + <h1>Nikola Tesla</h1> + <p>Nikola Tesla was a Serbian-American inventor, electrical engineer, mechanical engineer, and futurist who is best + known for his contributions to the design of the modern alternating current (AC) electricity supply system.</p> + <p>Tesla's work in the field of electrical engineering and electromagnetism formed the basis for many modern + technologies, including wireless communication, radar, and the development of the radio. He held over 300 + patents for his inventions.</p> + <p>Tesla was born on 10 July 1856 in Smiljan, Austrian Empire (modern-day Croatia). He died on 7 January 1943 in New + York City, United States.</p> + <p>Related figures: <a href="thomas_edison.html">Thomas Edison</a>, <a href="albert_einstein.html">Albert + Einstein</a></p> +</body> + +</html> \ No newline at end of file diff --git a/docs/data/historical_figures/philip_ii.html b/docs/data/historical_figures/philip_ii.html new file mode 100644 index 00000000..35fa5875 --- /dev/null +++ b/docs/data/historical_figures/philip_ii.html @@ -0,0 +1,21 @@ +<!DOCTYPE html> +<html lang="en"> + +<head> + <meta charset="UTF-8"> + <title>Philip II of Macedon</title> +</head> + +<body> + <h1>Philip II of Macedon</h1> + <p>Philip II of Macedon was the king of the ancient Greek kingdom of Macedon from 359 BC until his assassination in + 336 BC. He was a member of the Argead dynasty and the father of Alexander the Great.</p> + <p>Philip II is credited with transforming Macedon into a powerful military state. He reformed the Macedonian army, + introducing the phalanx infantry corps, and expanded his kingdom through both diplomacy and military conquest. + His reign laid the groundwork for the future conquests of his son, Alexander the Great.</p> + <p>Philip II was born in 382 BC in Pella, Macedon. He was assassinated in 336 BC in Aegae, Macedon.</p> + <p>Related figures: <a href="alexander_the_great.html">Alexander the Great</a>, <a + href="aristotle.html">Aristotle</a></p> +</body> + +</html> \ No newline at end of file diff --git a/docs/data/historical_figures/plato.html b/docs/data/historical_figures/plato.html new file mode 100644 index 00000000..087d07ca --- /dev/null +++ b/docs/data/historical_figures/plato.html @@ -0,0 +1,21 @@ +<!DOCTYPE html> +<html lang="en"> + +<head> + <meta charset="UTF-8"> + <title>Plato</title> +</head> + +<body> + <h1>Plato</h1> + <p>Plato was a philosopher in Classical Greece and the founder of the Academy in Athens, the first institution of + higher learning in the Western world. He is widely considered one of the most important figures in the + development of Western philosophy.</p> + <p>Plato's writings explored justice, beauty, and equality, and also contained discussions in aesthetics, political + philosophy, theology, cosmology, epistemology, and the philosophy of language. His most famous works include + "The Republic," "The Symposium," and "The Allegory of the Cave."</p> + <p>Plato was born in 428/427 or 424/423 BC in Athens, Greece. He died in 348/347 BC in Athens, Greece.</p> + <p>Related figures: <a href="aristotle.html">Aristotle</a>, <a href="socrates.html">Socrates</a></p> +</body> + +</html> \ No newline at end of file diff --git a/docs/data/historical_figures/raphael.html b/docs/data/historical_figures/raphael.html new file mode 100644 index 00000000..80cd744f --- /dev/null +++ b/docs/data/historical_figures/raphael.html @@ -0,0 +1,23 @@ +<!DOCTYPE html> +<html lang="en"> + +<head> + <meta charset="UTF-8"> + <title>Raphael</title> +</head> + +<body> + <h1>Raphael</h1> + <p>Raffaello Sanzio da Urbino, known as Raphael, was an Italian painter and architect of the High Renaissance. His + work is admired for its clarity of form, ease of composition, and visual achievement of the Neoplatonic ideal of + human grandeur. Together with Michelangelo and Leonardo da Vinci, he forms the traditional trinity of great + masters of that period.</p> + <p>Raphael is best known for his Madonnas and for his large figure compositions in the Vatican. His frescoes in the + Raphael Rooms of the Vatican Palace are considered one of the pinnacles of Renaissance art.</p> + <p>Raphael was born on 6 April 1483 in Urbino, Duchy of Urbino (present-day Italy). He died on 6 April 1520 in Rome, + Papal States (present-day Italy).</p> + <p>Related figures: <a href="michelangelo.html">Michelangelo</a>, <a href="leonardo_da_vinci.html">Leonardo da + Vinci</a></p> +</body> + +</html> \ No newline at end of file diff --git a/docs/data/historical_figures/socrates.html b/docs/data/historical_figures/socrates.html new file mode 100644 index 00000000..924893d4 --- /dev/null +++ b/docs/data/historical_figures/socrates.html @@ -0,0 +1,23 @@ +<!DOCTYPE html> +<html lang="en"> + +<head> + <meta charset="UTF-8"> + <meta name="viewport" content="width=device-width, initial-scale=1.0"> + <title>Socrates</title> +</head> + +<body> + <h1>Socrates</h1> + <p>Socrates was a classical Greek philosopher credited as one of the founders of Western philosophy. He is an + enigmatic figure known chiefly through the accounts of classical writers, especially the writings of his + students Plato and Xenophon.</p> + <nav> + <ul> + <li><a href="aristotle.html">Aristotle</a></li> + <li><a href="plato.html">Plato</a></li> + </ul> + </nav> +</body> + +</html> \ No newline at end of file diff --git a/docs/data/historical_figures/thomas_edison.html b/docs/data/historical_figures/thomas_edison.html new file mode 100644 index 00000000..bbb5dc1b --- /dev/null +++ b/docs/data/historical_figures/thomas_edison.html @@ -0,0 +1,25 @@ +<!DOCTYPE html> +<html lang="en"> + +<head> + <meta charset="UTF-8"> + <title>Thomas Edison</title> +</head> + +<body> + <h1>Thomas Edison</h1> + <p>Thomas Alva Edison was an American inventor and businessman who has been described as America's greatest + inventor. He developed many devices in fields such as electric power generation, mass communication, sound + recording, and motion pictures. These inventions, which include the phonograph, the motion picture camera, and + the long-lasting, practical electric light bulb, have had a widespread impact on the modern industrialized + world.</p> + <p>Edison was one of the first inventors to apply the principles of organized science and teamwork to the process of + invention, working with many researchers and employees. He established the first industrial research laboratory. + </p> + <p>Thomas Edison was born on 11 February 1847 in Milan, Ohio, United States. He died on 18 October 1931 in West + Orange, New Jersey, United States.</p> + <p>Related figures: <a href="nikola_tesla.html">Nikola Tesla</a>, <a href="alexander_graham_bell.html">Alexander + Graham Bell</a></p> +</body> + +</html> \ No newline at end of file diff --git a/docs/data/historical_figures/thomas_huxley.html b/docs/data/historical_figures/thomas_huxley.html new file mode 100644 index 00000000..4f22c996 --- /dev/null +++ b/docs/data/historical_figures/thomas_huxley.html @@ -0,0 +1,23 @@ +<!DOCTYPE html> +<html lang="en"> + +<head> + <meta charset="UTF-8"> + <title>Thomas Huxley</title> +</head> + +<body> + <h1>Thomas Huxley</h1> + <p>Thomas Henry Huxley was an English biologist and anthropologist specializing in comparative anatomy. He is known + as "Darwin's Bulldog" for his advocacy of Charles Darwin's theory of evolution. Huxley's vigorous defense of + evolutionary theory earned him the nickname.</p> + <p>Huxley was instrumental in developing scientific education in Britain and was a founding member of the X Club, a + group of scientists dedicated to promoting science and scientific thinking. He also coined the term "agnostic" + to describe his philosophical position on the existence of God.</p> + <p>Thomas Huxley was born on 4 May 1825 in Ealing, Middlesex, England. He died on 29 June 1895 in Eastbourne, + Sussex, England.</p> + <p>Related figures: <a href="charles_darwin.html">Charles Darwin</a>, <a href="alfred_russel_wallace.html">Alfred + Russel Wallace</a></p> +</body> + +</html> \ No newline at end of file diff --git a/docs/examples/.gitignore b/docs/examples/.gitignore index b502d643..33681cc0 100644 --- a/docs/examples/.gitignore +++ b/docs/examples/.gitignore @@ -1,2 +1,2 @@ -para_with_hyperlink.zip -load_2wikimultihop.jrnl \ No newline at end of file +*.jrnl +para_with_hyperlink.zip \ No newline at end of file diff --git a/docs/examples/code-generation.ipynb b/docs/examples/code-generation.ipynb new file mode 100644 index 00000000..cb2983f9 --- /dev/null +++ b/docs/examples/code-generation.ipynb @@ -0,0 +1,861 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "hide_cell" + ] + }, + "outputs": [], + "source": [ + "# ruff: noqa: T201" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Code Generation with GraphRAG\n", + "\n", + "## Introduction\n", + "\n", + "In this notebook, we demonstrate that **GraphRAG significantly outperforms standard vector-based retrieval** for generating working code from documentation. While traditional vector search retrieves relevant snippets, it often lacks the structured understanding needed to produce executable results. In contrast, **GraphRAG enables the LLM to follow logical relationships within documentation, leading to functional code generation**.\n", + "\n", + "We achieve this by leveraging a custom traversal strategy, selecting nodes that contain both **code examples and descriptive text**, allowing the LLM to assemble more complete responses.\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Getting Started\n", + "\n", + "Below we will experiment with the AstraPy documentation to evaluate how well GraphRAG can generate working code. \n", + "\n", + "Using AstraDB as the vector store, we compare GraphRAG’s structured retrieval with standard vector search to solve a specific coding task. \n", + "The query we will be sending to the LLM is the following:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "query = \"\"\"\n", + "Generate a function for connecting to an AstraDB cluster using the AstraPy library,\n", + "and retrieve some rows from a collection. The number of rows to return should be a\n", + "parameter on the method. Use Token Authentication. Assume the cluster is hosted on\n", + "AstraDB. Include the necessary imports and any other necessary setup. The following\n", + "environment variables are available for your use:\n", + "\n", + "- `ASTRA_DB_API_ENDPOINT`: The Astra DB API endpoint.\n", + "- `ASTRA_DB_APPLICATION_TOKEN`: The Astra DB Application token.\n", + "- `ASTRA_DB_KEYSPACE`: The Astra DB keyspace.\n", + "- `ASTRA_DB_COLLECTION`: The Astra DB collection.\" \\\n", + "\"\"\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The following block will configure the environment from the Colab Secrets.\n", + "To run it, you should have the following Colab Secrets defined and accessible to this notebook:\n", + "\n", + "- `OPENAI_API_KEY`: The OpenAI key.\n", + "- `ASTRA_DB_API_ENDPOINT`: The Astra DB API endpoint.\n", + "- `ASTRA_DB_APPLICATION_TOKEN`: The Astra DB Application token.\n", + "- `LANGCHAIN_API_KEY`: Optional. If defined, will enable LangSmith tracing.\n", + "- `ASTRA_DB_KEYSPACE`: Optional. If defined, will specify the Astra DB keyspace. If not defined, will use the default.\n", + "\n", + "If you don't yet have access to an AstraDB database, or need to check your credentials, see the help [here](https://python.langchain.com/docs/integrations/vectorstores/astradb/#credentials)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Install modules.\n", + "\n", + "%pip install \\\n", + " langchain-core \\\n", + " langchain-astradb \\\n", + " langchain-openai \\\n", + " langchain-graph-retriever \\\n", + " graph-rag-example-helpers" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The last package -- `graph-rag-example-helpers` -- includes the helpers and example documents that we will use in this notebook." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Configure import paths.\n", + "import os\n", + "import sys\n", + "\n", + "from langchain_core.documents import Document\n", + "\n", + "sys.path.append(\"../../\")\n", + "\n", + "# Initialize environment variables.\n", + "from graph_rag_example_helpers.env import Environment, initialize_environment\n", + "\n", + "initialize_environment(Environment.ASTRAPY)\n", + "\n", + "os.environ[\"LANGCHAIN_PROJECT\"] = \"code-generation\"\n", + "os.environ[\"ASTRA_DB_COLLECTION\"] = \"code_generation\"\n", + "\n", + "\n", + "def print_doc_ids(docs: list[Document]):\n", + " [print(f\"`{doc.id}` has example: {'example' in doc.metadata}\") for doc in docs]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Part 1: Loading Data\n", + "\n", + "First, we'll demonstrate how to load the example AstraPy documentation into `AstraDBVectorStore`. We will be creating a LangChain Document for every module, class, attribute, and function in the package. \n", + "\n", + "We will use the pydoc description field for the `page_content` field in the document. Note that not every item in the package has a description. Because of this, there will be many documents that have no page content. \n", + "\n", + "Besides the description, we will also include a bunch of extra information related to the item in the `metadata` field. This info can include the item's name, kind, parameters, return type, base class, etc.\n", + "\n", + "The item's `id` will be the items path in the package.\n", + "\n", + "Below are two example documents... One with page content and one without." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Example doc with page content\n", + "\n", + "<details markdown><summary>Click to expand</summary>\n", + "\n", + "```yaml\n", + "id: astrapy.client.DataAPIClient\n", + "\n", + "page_content: |\n", + " A client for using the Data API. This is the main entry point and sits\n", + " at the top of the conceptual \"client -> database -> collection\" hierarchy.\n", + "\n", + " A client is created first, optionally passing it a suitable Access Token.\n", + " Starting from the client, then:\n", + " - databases (Database and AsyncDatabase) are created for working with data\n", + " - AstraDBAdmin objects can be created for admin-level work\n", + "\n", + "metadata:\n", + " name: DataAPIClient\n", + " kind: class\n", + " path: astrapy.client.DataAPIClient\n", + " parameters: \n", + " token: |\n", + " str | TokenProvider | None = None\n", + " an Access Token to the database. Example: `\"AstraCS:xyz...\"`.\n", + " This can be either a literal token string or a subclass of\n", + " `astrapy.authentication.TokenProvider`.\n", + " \n", + " environment: |\n", + " str | None = None\n", + " a string representing the target Data API environment.\n", + " It can be left unspecified for the default value of `Environment.PROD`;\n", + " other values include `Environment.OTHER`, `Environment.DSE`.\n", + " \n", + " callers: |\n", + " Sequence[CallerType] = []\n", + " a list of caller identities, i.e. applications, or frameworks,\n", + " on behalf of which Data API and DevOps API calls are performed.\n", + " These end up in the request user-agent.\n", + " Each caller identity is a (\"caller_name\", \"caller_version\") pair.\n", + "\n", + " example: |\n", + " >>> from astrapy import DataAPIClient\n", + " >>> my_client = DataAPIClient(\"AstraCS:...\")\n", + " >>> my_db0 = my_client.get_database(\n", + " ... \"https://01234567-....apps.astra.datastax.com\"\n", + " ... )\n", + " >>> my_coll = my_db0.create_collection(\"movies\", dimension=2)\n", + " >>> my_coll.insert_one({\"title\": \"The Title\", \"$vector\": [0.1, 0.3]})\n", + " >>> my_db1 = my_client.get_database(\"01234567-...\")\n", + " >>> my_db2 = my_client.get_database(\"01234567-...\", region=\"us-east1\")\n", + " >>> my_adm0 = my_client.get_admin()\n", + " >>> my_adm1 = my_client.get_admin(token=more_powerful_token_override)\n", + " >>> database_list = my_adm0.list_databases()\n", + "\n", + " references: \n", + " astrapy.client.DataAPIClient\n", + "\n", + " gathered_types: \n", + " astrapy.constants.CallerType\n", + " astrapy.authentication.TokenProvider\n", + "```\n", + "</details>\n", + "\n", + "This is the documentation for [`astrapy.client.DataAPIClient`](https://github.com/datastax/astrapy/blob/v1.5.2/astrapy/client.py#L50) class. The `page_content` field contains the description of the class, and the `metadata` field contains the rest of the details, including example code of how to use the class.\n", + "\n", + "The `references` metadata field contains the list of related items used in the example code block. The `gathered_types` field contains the list of types from the parameters section. In GraphRAG, we can use these fields to link to other documents." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Example doc without page content\n", + "\n", + "<details markdown><summary>Click to expand</summary>\n", + "\n", + "```yaml\n", + "id: astrapy.admin.AstraDBAdmin.callers\n", + "\n", + "page_content: \"\"\n", + "\n", + "metadata:\n", + " name: callers\n", + " path: astrapy.admin.AstraDBAdmin.callers\n", + " kind: attribute\n", + "```\n", + "\n", + "</details>\n", + "\n", + "This is the documentation for `astrapy.admin.AstraDBAdmin.callers`. The `page_content` field is empty, and the `metadata` field contains the details.\n", + "\n", + "Despite having no page content, this document can still be useful for Graph RAG. We'll add a `parent` field to the metadata at vector store insertion time to link it to the parent document: `astrapy.admin.AstraDBAdmin`, and we can use this for traversal." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Create the AstraDBVectorStore\n", + "Next, we'll create the Vector Store we're going to load these documents into.\n", + "In our case, we'll use DataStax Astra DB with Open AI embeddings." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain_astradb import AstraDBVectorStore\n", + "from langchain_openai import OpenAIEmbeddings\n", + "\n", + "store = AstraDBVectorStore(\n", + " embedding=OpenAIEmbeddings(),\n", + " collection_name=os.getenv(\"ASTRA_DB_COLLECTION\"),\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Loading Data\n", + "\n", + "Now its time to load the data into our Vector Store. We'll use a helper method to download already prepared documents from the `graph-rag-example-helpers` package. If you want to see how these documents were created from the AstraPy package, see details in the Appendix.\n", + "\n", + "We will use the [`ParentTransformer`](../../guide/transformers/#parenttransformer) to add a parent field to the metadata document field. This will allow us to traverse the graph from a child to its parent.\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from graph_rag_example_helpers.datasets.astrapy import fetch_documents\n", + "from langchain_graph_retriever.transformers import ParentTransformer\n", + "\n", + "transformer = ParentTransformer(path_delimiter=\".\")\n", + "doc_ids = store.add_documents(transformer.transform_documents(fetch_documents()))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can retrieve a sample document to check if the parent field was added correctly:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "keep_output" + ] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "callers (attribute)\n", + "\n", + "path: \n", + "\tastrapy.admin.AstraDBAdmin.callers\n", + "\n", + "callers = callers_param\n", + "\n", + "parent: astrapy.admin.AstraDBAdmin\n" + ] + } + ], + "source": [ + "from graph_rag_example_helpers.examples.code_generation import format_document\n", + "\n", + "print(\n", + " format_document(\n", + " store.get_by_document_id(\"astrapy.admin.AstraDBAdmin.callers\"), debug=True\n", + " )\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "At this point, we've created a Vector Store with all the documents from the AstraPy documentation. Each document contains metadata about the module, class, attribute, or function, and the page content contains the description of the item.\n", + "\n", + "In the next section we'll see how to build relationships from the metadata in order to traverse through the documentation in a similar way to how a human would." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Part 2: Graph Traversal\n", + "\n", + "The GraphRAG library allows us to traverse through the documents in the Vector Store. By changing the [`Strategy`](../../guide/strategies/), we can control how the traversal is performed." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Basic Traversal\n", + "\n", + "We'll start with the default [`Eager`](../../guide/strategies/#eager) strategy, which will traverse the graph in a breadth-first manner. In order to do this we need to set up the relationships between the documents. This is done by defining the \"edges\" between the documents.\n", + "\n", + "In our case we will connect the \"references\", \"gathered_types\", \"parent\", \"implemented_by\", and \"bases\" fields in the metadata to the \"id\" field of the document they reference." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "edges = [\n", + " (\"gathered_types\", \"$id\"),\n", + " (\"references\", \"$id\"),\n", + " (\"parent\", \"$id\"),\n", + " (\"implemented_by\", \"$id\"),\n", + " (\"bases\", \"$id\"),\n", + "]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Note that edges are directional, and indicate metadata fields by default. The magic string `$id` is used to indicate the document's id.\n", + "\n", + "In the above `edges` list, any document id found in `gathered_types` will be connected to documents with the corresponding id. The other edges will work in a similar way.\n", + "\n", + "Lets use these edges to create a LangChain retriever and documents for our query." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "keep_output" + ] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "`astrapy.core.db.AsyncAstraDB.collection` has example: False\n", + "`astrapy.core.db.AstraDB.collection` has example: False\n", + "`astrapy.admin.DataAPIDatabaseAdmin.list_keyspaces` has example: True\n", + "`astrapy.admin.DataAPIDatabaseAdmin` has example: True\n", + "`astrapy.core.db.AsyncAstraDB` has example: False\n", + "`astrapy.core.db.AstraDBCollection` has example: False\n" + ] + } + ], + "source": [ + "from langchain_graph_retriever import GraphRetriever\n", + "\n", + "default_retriever = GraphRetriever(store=store, edges=edges)\n", + "\n", + "print_doc_ids(default_retriever.invoke(query, select_k=6, start_k=3, max_depth=2))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Notes on the extra keyword args:\n", + "- `select_k` in GraphRAG is equivalent to `k` in LangChain. It specifies the number of nodes to select during retrieval.\n", + "- `start_k` indicates the number of nodes to select using standard vector retrieval before moving onto graph traversal. \n", + "- `max_depth` is the maximum depth to traverse in the graph.\n", + "\n", + "With this configuration, we were only able to find 2 documents with example code." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Custom Strategy\n", + "\n", + "Now we will create a custom strategy that will traverse a larger portion of the graph and return the documents that contain code examples or descriptive text. \n", + "\n", + "To do this, we need to implement a class that inherits from the base [`Strategy`](../../reference/graph_retriever/strategies/#graph_retriever.strategies.Strategy) class and overrides [`iteration`](../../reference/graph_retriever/strategies/#graph_retriever.strategies.Strategy.iteration) method:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import dataclasses\n", + "from collections.abc import Iterable\n", + "\n", + "from graph_retriever.strategies import NodeTracker, Strategy\n", + "from graph_retriever.types import Node\n", + "\n", + "\n", + "@dataclasses.dataclass\n", + "class CodeExamples(Strategy):\n", + " # internal dictionary to store all nodes found during the traversal\n", + " _nodes: dict[str, Node] = dataclasses.field(default_factory=dict)\n", + "\n", + " def iteration(self, *, nodes: Iterable[Node], tracker: NodeTracker) -> None:\n", + " # save all newly found nodes to the internal node dictionary for later use\n", + " self._nodes.update({n.id: n for n in nodes})\n", + " # traverse the newly found nodes\n", + " new_count = tracker.traverse(nodes=nodes)\n", + "\n", + " # if no new nodes were found, we have reached the end of the traversal\n", + " if new_count == 0:\n", + " example_nodes = []\n", + " description_nodes = []\n", + "\n", + " # iterate over all nodes and separate nodes with examples from nodes with\n", + " # descriptions\n", + " for node in self._nodes.values():\n", + " if \"example\" in node.metadata:\n", + " example_nodes.append(node)\n", + " elif node.content != \"\":\n", + " description_nodes.append(node)\n", + "\n", + " # select the nodes with examples first and descriptions second\n", + " # note: the base `finalize_nodes` method will truncate the list to the\n", + " # `select_k` number of nodes\n", + " tracker.select(example_nodes)\n", + " tracker.select(description_nodes)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "As described in the comments above, this custom strategy will first try to select documents that contain code examples, and then will use documents that contain descriptive text.\n", + "\n", + "We can now use this custom strategy to build a custom retriever, and ask the query again:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "keep_output" + ] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "`astrapy.admin.DataAPIDatabaseAdmin.list_keyspaces` has example: True\n", + "`astrapy.admin.DataAPIDatabaseAdmin` has example: True\n", + "`astrapy.client.DataAPIClient` has example: True\n", + "`astrapy.database.AsyncDatabase` has example: True\n", + "`astrapy.database.Database` has example: True\n", + "`astrapy.authentication.UsernamePasswordTokenProvider` has example: True\n" + ] + } + ], + "source": [ + "custom_retriever = GraphRetriever(store=store, edges=edges, strategy=CodeExamples())\n", + "\n", + "print_doc_ids(custom_retriever.invoke(query, select_k=6, start_k=3, max_depth=2))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now we have found 6 documents with code examples! That is a significant improvement over the default strategy." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step 3: Using GraphRAG to Generate Code\n", + "\n", + "We now use the `CodeExamples` strategy inside a Langchain pipeline to generate code snippets.\n", + "\n", + "We will also use a custom document formatter, which will format the document in a way that makes it look like standard documentation. In particular, it will format all the extra details stored in the metadata in a way that is easy to read. This will help the LLM use the information in the documents to generate code.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "keep_output" + ] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "```python\n", + "import os\n", + "from astrapy.client import DataAPIClient\n", + "from astrapy.collection import Collection\n", + "\n", + "def connect_and_retrieve_rows(num_rows):\n", + " api_endpoint = os.getenv('ASTRA_DB_API_ENDPOINT')\n", + " application_token = os.getenv('ASTRA_DB_APPLICATION_TOKEN')\n", + " keyspace = os.getenv('ASTRA_DB_KEYSPACE')\n", + " collection_name = os.getenv('ASTRA_DB_COLLECTION')\n", + "\n", + " client = DataAPIClient(token=application_token)\n", + " database = client.get_database(api_endpoint)\n", + " collection = Collection(database=database, name=collection_name, keyspace=keyspace)\n", + "\n", + " rows = collection.find(limit=num_rows)\n", + " return list(rows)\n", + "```\n" + ] + } + ], + "source": [ + "from graph_rag_example_helpers.examples.code_generation import format_docs\n", + "from langchain.chat_models import init_chat_model\n", + "from langchain_core.output_parsers import StrOutputParser\n", + "from langchain_core.prompts import ChatPromptTemplate\n", + "from langchain_core.runnables import RunnablePassthrough\n", + "\n", + "llm = init_chat_model(\"gpt-4o-mini\", model_provider=\"openai\")\n", + "\n", + "prompt = ChatPromptTemplate.from_template(\n", + " \"\"\"Generate a block of runnable python code using the following documentation as\n", + " guidance. Return only the code. Don't include any example usage.\n", + "\n", + " Each documentation page is separated by three dashes (---) on its own line.\n", + " If certain pages of the provided documentation aren't useful for answering the\n", + " question, feel free to ignore them.\n", + "\n", + " Question: {question}\n", + "\n", + " Related Documentation:\n", + "\n", + " {context}\n", + " \"\"\"\n", + ")\n", + "\n", + "graph_chain = (\n", + " {\"context\": custom_retriever | format_docs, \"question\": RunnablePassthrough()}\n", + " | prompt\n", + " | llm\n", + " | StrOutputParser()\n", + ")\n", + "\n", + "print(graph_chain.invoke(query))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can try running this generated code to see if it works:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "skip-execution" + ] + }, + "outputs": [], + "source": [ + "import os\n", + "\n", + "from astrapy.client import DataAPIClient\n", + "from astrapy.collection import Collection\n", + "\n", + "\n", + "def connect_and_retrieve_rows(num_rows):\n", + " api_endpoint = os.getenv(\"ASTRA_DB_API_ENDPOINT\")\n", + " application_token = os.getenv(\"ASTRA_DB_APPLICATION_TOKEN\")\n", + " keyspace = os.getenv(\"ASTRA_DB_KEYSPACE\")\n", + " collection_name = os.getenv(\"ASTRA_DB_COLLECTION\")\n", + "\n", + " client = DataAPIClient(token=application_token)\n", + " database = client.get_database(api_endpoint)\n", + " collection = Collection(database=database, name=collection_name, keyspace=keyspace)\n", + "\n", + " rows = collection.find(limit=num_rows)\n", + " return list(rows)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "skip_execution", + "keep_output", + "raises-exception" + ] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'_id': 'astrapy.info.EmbeddingProviderAuthentication', 'content': 'A representation of an authentication mode for using an embedding model,\\nmodeling the corresponding part of the response returned by the\\n\\'findEmbeddingProviders\\' Data API endpoint (namely \"supportedAuthentication\").', 'metadata': {'kind': 'class', 'name': 'EmbeddingProviderAuthentication', 'path': 'astrapy.info.EmbeddingProviderAuthentication', 'parameters': [{'name': 'enabled', 'type': 'bool'}, {'name': 'tokens', 'type': 'list[EmbeddingProviderToken]'}], 'attributes': [{'name': 'enabled', 'type': 'bool', 'description': 'whether this authentication mode is available for a given model.'}, {'name': 'tokens', 'type': 'list[EmbeddingProviderToken]', 'description': 'a list of `EmbeddingProviderToken` objects,\\ndetailing the secrets required for the authentication mode.'}], 'gathered_types': ['EmbeddingProviderToken'], 'parent': 'astrapy.info'}}\n", + "{'_id': 'astrapy.defaults.DEV_OPS_RESPONSE_HTTP_CREATED', 'content': '', 'metadata': {'kind': 'attribute', 'name': 'DEV_OPS_RESPONSE_HTTP_CREATED', 'path': 'astrapy.defaults.DEV_OPS_RESPONSE_HTTP_CREATED', 'value': 'DEV_OPS_RESPONSE_HTTP_CREATED = 201', 'parent': 'astrapy.defaults'}}\n", + "{'_id': 'astrapy.info.CollectionInfo.full_name', 'content': '', 'metadata': {'kind': 'attribute', 'name': 'full_name', 'path': 'astrapy.info.CollectionInfo.full_name', 'value': 'full_name: str', 'parent': 'astrapy.info.CollectionInfo'}}\n", + "{'_id': 'astrapy.collection.Collection.full_name', 'content': 'The fully-qualified collection name within the database,\\nin the form \"keyspace.collection_name\".', 'metadata': {'kind': 'attribute', 'name': 'full_name', 'path': 'astrapy.collection.Collection.full_name', 'value': 'full_name: str', 'example': \">>> my_coll.full_name\\n'default_keyspace.my_v_collection'\", 'parent': 'astrapy.collection.Collection'}}\n", + "{'_id': 'astrapy.exceptions.DataAPIErrorDescriptor', 'content': 'An object representing a single error returned from the Data API,\\ntypically with an error code and a text message.\\nAn API request would return with an HTTP 200 success error code,\\nbut contain a nonzero amount of these.\\n\\nA single response from the Data API may return zero, one or more of these.\\nMoreover, some operations, such as an insert_many, may partally succeed\\nyet return these errors about the rest of the operation (such as,\\nsome of the input documents could not be inserted).', 'metadata': {'kind': 'class', 'name': 'DataAPIErrorDescriptor', 'path': 'astrapy.exceptions.DataAPIErrorDescriptor', 'parameters': [{'name': 'error_dict', 'type': 'dict[str, str]'}], 'attributes': [{'name': 'error_code', 'type': 'str | None', 'description': 'a string code as found in the API \"error\" item.'}, {'name': 'message', 'type': 'str | None', 'description': 'the text found in the API \"error\" item.'}, {'name': 'attributes', 'type': 'dict[str, Any]', 'description': 'a dict with any further key-value pairs returned by the API.'}], 'parent': 'astrapy.exceptions'}}\n" + ] + } + ], + "source": [ + "for row in connect_and_retrieve_rows(5):\n", + " print(row)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Conclusion\n", + "\n", + "The results clearly demonstrate that **GraphRAG leads to functional code generation, while standard vector-based retrieval fails**. \n", + "\n", + "In contrast, attempts using **only an LLM** or **standard vector-based RAG** resulted in **incomplete or non-functional outputs**. The appendix includes examples illustrating these limitations.\n", + "\n", + "By structuring document relationships effectively, **GraphRAG improves retrieval quality, enabling more reliable LLM-assisted code generation**." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Appendix" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### LLM Alone\n", + "\n", + "Here we show how to use the LLM alone to generate code for the query. We will use the same query as before, but modify the prompt to not include any context." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "skip_execution", + "keep_output" + ] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "```python\n", + "import os\n", + "from astra import AstraClient\n", + "\n", + "def fetch_rows_from_astra_db(num_rows):\n", + " # Retrieve environment variables\n", + " api_endpoint = os.getenv(\"ASTRA_DB_API_ENDPOINT\")\n", + " application_token = os.getenv(\"ASTRA_DB_APPLICATION_TOKEN\")\n", + " keyspace = os.getenv(\"ASTRA_DB_KEYSPACE\")\n", + " collection = os.getenv(\"ASTRA_DB_COLLECTION\")\n", + " \n", + " # Initialize the Astra DB client\n", + " client = AstraClient(api_endpoint, application_token)\n", + " \n", + " # Retrieve rows from the specified collection\n", + " query = f'SELECT * FROM {keyspace}.{collection} LIMIT {num_rows}'\n", + " response = client.execute_statement(query)\n", + " \n", + " # Return the rows retrieved\n", + " return response['rows']\n", + "```\n" + ] + } + ], + "source": [ + "llm_only_prompt = ChatPromptTemplate.from_template(\n", + " \"\"\"Generate a block of runnable python code. Return only the code.\n", + " Don't include any example usage.\n", + "\n", + " Question: {question}\n", + " \"\"\"\n", + ")\n", + "\n", + "llm_only_chain = (\n", + " {\"question\": RunnablePassthrough()} | llm_only_prompt | llm | StrOutputParser()\n", + ")\n", + "\n", + "print(llm_only_chain.invoke(query))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This code is not functional. The package `astra` and the class `AstraClient` do not exist." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Standard RAG\n", + "\n", + "Here we show how to use the LLM with standard RAG to generate code for the query. We will use the same query and prompt as we did with GraphRAG." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "skip_execution", + "keep_output" + ] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "```python\n", + "import os\n", + "from astra import AstraClient\n", + "\n", + "def fetch_rows_from_astradb(num_rows):\n", + " endpoint = os.getenv('ASTRA_DB_API_ENDPOINT')\n", + " token = os.getenv('ASTRA_DB_APPLICATION_TOKEN')\n", + " keyspace = os.getenv('ASTRA_DB_KEYSPACE')\n", + " collection = os.getenv('ASTRA_DB_COLLECTION')\n", + "\n", + " client = AstraClient(\n", + " endpoint=endpoint,\n", + " token=token\n", + " )\n", + "\n", + " query = f'SELECT * FROM {keyspace}.{collection} LIMIT {num_rows}'\n", + " response = client.execute(query)\n", + " return response['data']\n", + "```\n" + ] + } + ], + "source": [ + "rag_chain = (\n", + " {\n", + " \"context\": store.as_retriever(k=6) | format_docs,\n", + " \"question\": RunnablePassthrough(),\n", + " }\n", + " | prompt\n", + " | llm\n", + " | StrOutputParser()\n", + ")\n", + "\n", + "print(rag_chain.invoke(query))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This code is also not functional. " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Converting AstraPy Documentation\n", + "\n", + "The AstraPy documentation was converted into a JSONL format via some custom code that is not included in this notebook. However, the code is available in the `graph-rag-example-helpers` package [here](https://github.com/datastax/graph-rag/blob/main/packages/graph-rag-example-helpers/src/graph_rag_example_helpers/examples/code_generation/converter.py)." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.8" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/docs/examples/index.md b/docs/examples/index.md new file mode 100644 index 00000000..27e35c08 --- /dev/null +++ b/docs/examples/index.md @@ -0,0 +1,29 @@ +# Examples + +<div class="grid cards" markdown> +- :material-code-braces-box:{ .lg .middle } __Lazy Graph RAG__ + + --- + + Implements [LazyGraphRAG](https://www.microsoft.com/en-us/research/blog/lazygraphrag-setting-a-new-standard-for-quality-and-cost/) using LangChain and `langchain-graph-retriever`. + + It loads Wikipedia articles and traverses based on links ("mentions") and named entities (extracted from the content). It retrieves a large number of articles, groups them by community, and extracts claims from each community. The best claims are used to answer the question. + + [:material-fast-forward: Lazy Graph RAG Example](lazy-graph-rag.ipynb) + +- :material-code-braces-box:{ .lg .middle } __Code Generation__ + + --- + This example notebook shows how to load documentation for python packages into a + vector store so that it can be used to provide context to an LLM for code generation. + + It uses LangChain and `langchain-graph-retriever` with a custom traversal Strategy + in order to improve LLM generated code output. It shows that using GraphRAG can + provide a significant increase in quality over using either an LLM alone or standard + RAG. + + GraphRAG traverses cross references in the documentation like a software engineer + would, in order to determine how to solve a coding problem. + + [:material-fast-forward: Code Generation Example](code-generation.ipynb) +</div> diff --git a/docs/examples/index.qmd b/docs/examples/index.qmd deleted file mode 100644 index cf2c505e..00000000 --- a/docs/examples/index.qmd +++ /dev/null @@ -1,7 +0,0 @@ ---- -title: Examples ---- - -# Examples - -* [Lazy GraphRAG](./lazy-graph-rag.ipynb) shows how to load Wikipedia data into a LangChain VectorStore with citation and entity information in the metadata for graph traversal, as well as chains for extracting the claims from each community and selecting the most useful claims. \ No newline at end of file diff --git a/docs/examples/lazy-graph-rag.ipynb b/docs/examples/lazy-graph-rag.ipynb index 92a16c44..218e1b31 100644 --- a/docs/examples/lazy-graph-rag.ipynb +++ b/docs/examples/lazy-graph-rag.ipynb @@ -1,30 +1,12 @@ { "cells": [ - { - "cell_type": "raw", - "metadata": { - "vscode": { - "languageId": "raw" - } - }, - "source": [ - "---\n", - "title: \"Retrieval Beyond Similarity: Lazy GraphRAG in LangChain\"\n", - "author: Ben Chambers\n", - "execute:\n", - " output: false\n", - " warning: false\n", - " error: false\n", - "---" - ] - }, { "cell_type": "markdown", "metadata": { "id": "gqhMgAGwmLXc" }, "source": [ - "# Retrieval Beyond Similarity: LazyGraphRAG in LangChain\n", + "# LazyGraphRAG in LangChain\n", "\n", "## Introduction\n", "\n", @@ -60,92 +42,25 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": { "cellView": "form", - "id": "r0-5VJWGsBM3" + "id": "r0-5VJWGsBM3", + "tags": [ + "hide_output" + ] }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Requirement already satisfied: langchain-core in /Users/benjamin.chambers/code/graph-pancake/.venv/lib/python3.12/site-packages (0.3.31)\n", - "Requirement already satisfied: langchain-astradb in /Users/benjamin.chambers/code/graph-pancake/.venv/lib/python3.12/site-packages (0.5.2)\n", - "Requirement already satisfied: langchain-openai in /Users/benjamin.chambers/code/graph-pancake/.venv/lib/python3.12/site-packages (0.3.1)\n", - "Requirement already satisfied: langchain-graph-retriever in /Users/benjamin.chambers/code/graph-pancake/.venv/lib/python3.12/site-packages (0.1.0)\n", - "Requirement already satisfied: graph-rag-example-helpers in /Users/benjamin.chambers/code/graph-pancake/.venv/lib/python3.12/site-packages (0.1.0)\n", - "Requirement already satisfied: PyYAML>=5.3 in /Users/benjamin.chambers/code/graph-pancake/.venv/lib/python3.12/site-packages (from langchain-core) (6.0.2)\n", - "Requirement already satisfied: jsonpatch<2.0,>=1.33 in /Users/benjamin.chambers/code/graph-pancake/.venv/lib/python3.12/site-packages (from langchain-core) (1.33)\n", - "Requirement already satisfied: langsmith<0.4,>=0.1.125 in /Users/benjamin.chambers/code/graph-pancake/.venv/lib/python3.12/site-packages (from langchain-core) (0.2.10)\n", - "Requirement already satisfied: packaging<25,>=23.2 in /Users/benjamin.chambers/code/graph-pancake/.venv/lib/python3.12/site-packages (from langchain-core) (24.2)\n", - "Requirement already satisfied: pydantic<3.0.0,>=2.7.4 in /Users/benjamin.chambers/code/graph-pancake/.venv/lib/python3.12/site-packages (from langchain-core) (2.10.4)\n", - "Requirement already satisfied: tenacity!=8.4.0,<10.0.0,>=8.1.0 in /Users/benjamin.chambers/code/graph-pancake/.venv/lib/python3.12/site-packages (from langchain-core) (9.0.0)\n", - "Requirement already satisfied: typing-extensions>=4.7 in /Users/benjamin.chambers/code/graph-pancake/.venv/lib/python3.12/site-packages (from langchain-core) (4.12.2)\n", - "Requirement already satisfied: astrapy<2.0.0,>=1.5.2 in /Users/benjamin.chambers/code/graph-pancake/.venv/lib/python3.12/site-packages (from langchain-astradb) (1.5.2)\n", - "Requirement already satisfied: langchain-community>=0.3.1 in /Users/benjamin.chambers/code/graph-pancake/.venv/lib/python3.12/site-packages (from langchain-astradb) (0.3.14)\n", - "Requirement already satisfied: numpy<2.0.0,>=1.26.0 in /Users/benjamin.chambers/code/graph-pancake/.venv/lib/python3.12/site-packages (from langchain-astradb) (1.26.4)\n", - "Requirement already satisfied: openai<2.0.0,>=1.58.1 in /Users/benjamin.chambers/code/graph-pancake/.venv/lib/python3.12/site-packages (from langchain-openai) (1.60.0)\n", - "Requirement already satisfied: tiktoken<1,>=0.7 in /Users/benjamin.chambers/code/graph-pancake/.venv/lib/python3.12/site-packages (from langchain-openai) (0.8.0)\n", - "Requirement already satisfied: networkx>=3.4.2 in /Users/benjamin.chambers/code/graph-pancake/.venv/lib/python3.12/site-packages (from langchain-graph-retriever) (3.4.2)\n", - "Requirement already satisfied: backoff>=2.2.1 in /Users/benjamin.chambers/code/graph-pancake/.venv/lib/python3.12/site-packages (from graph-rag-example-helpers) (2.2.1)\n", - "Requirement already satisfied: httpx>=0.28.1 in /Users/benjamin.chambers/code/graph-pancake/.venv/lib/python3.12/site-packages (from graph-rag-example-helpers) (0.28.1)\n", - "Requirement already satisfied: python-dotenv>=1.0.1 in /Users/benjamin.chambers/code/graph-pancake/.venv/lib/python3.12/site-packages (from graph-rag-example-helpers) (1.0.1)\n", - "Requirement already satisfied: simsimd>=6.2.1 in /Users/benjamin.chambers/code/graph-pancake/.venv/lib/python3.12/site-packages (from graph-rag-example-helpers) (6.2.1)\n", - "Requirement already satisfied: tqdm>=4.67.1 in /Users/benjamin.chambers/code/graph-pancake/.venv/lib/python3.12/site-packages (from graph-rag-example-helpers) (4.67.1)\n", - "Requirement already satisfied: deprecation<2.2.0,>=2.1.0 in /Users/benjamin.chambers/code/graph-pancake/.venv/lib/python3.12/site-packages (from astrapy<2.0.0,>=1.5.2->langchain-astradb) (2.1.0)\n", - "Requirement already satisfied: pymongo>=3 in /Users/benjamin.chambers/code/graph-pancake/.venv/lib/python3.12/site-packages (from astrapy<2.0.0,>=1.5.2->langchain-astradb) (4.10.1)\n", - "Requirement already satisfied: toml<0.11.0,>=0.10.2 in /Users/benjamin.chambers/code/graph-pancake/.venv/lib/python3.12/site-packages (from astrapy<2.0.0,>=1.5.2->langchain-astradb) (0.10.2)\n", - "Requirement already satisfied: uuid6>=2024.1.12 in /Users/benjamin.chambers/code/graph-pancake/.venv/lib/python3.12/site-packages (from astrapy<2.0.0,>=1.5.2->langchain-astradb) (2024.7.10)\n", - "Requirement already satisfied: anyio in /Users/benjamin.chambers/code/graph-pancake/.venv/lib/python3.12/site-packages (from httpx>=0.28.1->graph-rag-example-helpers) (4.8.0)\n", - "Requirement already satisfied: certifi in /Users/benjamin.chambers/code/graph-pancake/.venv/lib/python3.12/site-packages (from httpx>=0.28.1->graph-rag-example-helpers) (2024.12.14)\n", - "Requirement already satisfied: httpcore==1.* in /Users/benjamin.chambers/code/graph-pancake/.venv/lib/python3.12/site-packages (from httpx>=0.28.1->graph-rag-example-helpers) (1.0.7)\n", - "Requirement already satisfied: idna in /Users/benjamin.chambers/code/graph-pancake/.venv/lib/python3.12/site-packages (from httpx>=0.28.1->graph-rag-example-helpers) (3.10)\n", - "Requirement already satisfied: h11<0.15,>=0.13 in /Users/benjamin.chambers/code/graph-pancake/.venv/lib/python3.12/site-packages (from httpcore==1.*->httpx>=0.28.1->graph-rag-example-helpers) (0.14.0)\n", - "Requirement already satisfied: jsonpointer>=1.9 in /Users/benjamin.chambers/code/graph-pancake/.venv/lib/python3.12/site-packages (from jsonpatch<2.0,>=1.33->langchain-core) (3.0.0)\n", - "Requirement already satisfied: SQLAlchemy<3,>=1.4 in /Users/benjamin.chambers/code/graph-pancake/.venv/lib/python3.12/site-packages (from langchain-community>=0.3.1->langchain-astradb) (2.0.36)\n", - "Requirement already satisfied: aiohttp<4.0.0,>=3.8.3 in /Users/benjamin.chambers/code/graph-pancake/.venv/lib/python3.12/site-packages (from langchain-community>=0.3.1->langchain-astradb) (3.11.11)\n", - "Requirement already satisfied: dataclasses-json<0.7,>=0.5.7 in /Users/benjamin.chambers/code/graph-pancake/.venv/lib/python3.12/site-packages (from langchain-community>=0.3.1->langchain-astradb) (0.6.7)\n", - "Requirement already satisfied: httpx-sse<0.5.0,>=0.4.0 in /Users/benjamin.chambers/code/graph-pancake/.venv/lib/python3.12/site-packages (from langchain-community>=0.3.1->langchain-astradb) (0.4.0)\n", - "Requirement already satisfied: langchain<0.4.0,>=0.3.14 in /Users/benjamin.chambers/code/graph-pancake/.venv/lib/python3.12/site-packages (from langchain-community>=0.3.1->langchain-astradb) (0.3.14)\n", - "Requirement already satisfied: pydantic-settings<3.0.0,>=2.4.0 in /Users/benjamin.chambers/code/graph-pancake/.venv/lib/python3.12/site-packages (from langchain-community>=0.3.1->langchain-astradb) (2.7.1)\n", - "Requirement already satisfied: requests<3,>=2 in /Users/benjamin.chambers/code/graph-pancake/.venv/lib/python3.12/site-packages (from langchain-community>=0.3.1->langchain-astradb) (2.32.3)\n", - "Requirement already satisfied: orjson<4.0.0,>=3.9.14 in /Users/benjamin.chambers/code/graph-pancake/.venv/lib/python3.12/site-packages (from langsmith<0.4,>=0.1.125->langchain-core) (3.10.13)\n", - "Requirement already satisfied: requests-toolbelt<2.0.0,>=1.0.0 in /Users/benjamin.chambers/code/graph-pancake/.venv/lib/python3.12/site-packages (from langsmith<0.4,>=0.1.125->langchain-core) (1.0.0)\n", - "Requirement already satisfied: distro<2,>=1.7.0 in /Users/benjamin.chambers/code/graph-pancake/.venv/lib/python3.12/site-packages (from openai<2.0.0,>=1.58.1->langchain-openai) (1.9.0)\n", - "Requirement already satisfied: jiter<1,>=0.4.0 in /Users/benjamin.chambers/code/graph-pancake/.venv/lib/python3.12/site-packages (from openai<2.0.0,>=1.58.1->langchain-openai) (0.8.2)\n", - "Requirement already satisfied: sniffio in /Users/benjamin.chambers/code/graph-pancake/.venv/lib/python3.12/site-packages (from openai<2.0.0,>=1.58.1->langchain-openai) (1.3.1)\n", - "Requirement already satisfied: annotated-types>=0.6.0 in /Users/benjamin.chambers/code/graph-pancake/.venv/lib/python3.12/site-packages (from pydantic<3.0.0,>=2.7.4->langchain-core) (0.7.0)\n", - "Requirement already satisfied: pydantic-core==2.27.2 in /Users/benjamin.chambers/code/graph-pancake/.venv/lib/python3.12/site-packages (from pydantic<3.0.0,>=2.7.4->langchain-core) (2.27.2)\n", - "Requirement already satisfied: regex>=2022.1.18 in /Users/benjamin.chambers/code/graph-pancake/.venv/lib/python3.12/site-packages (from tiktoken<1,>=0.7->langchain-openai) (2024.11.6)\n", - "Requirement already satisfied: aiohappyeyeballs>=2.3.0 in /Users/benjamin.chambers/code/graph-pancake/.venv/lib/python3.12/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain-community>=0.3.1->langchain-astradb) (2.4.4)\n", - "Requirement already satisfied: aiosignal>=1.1.2 in /Users/benjamin.chambers/code/graph-pancake/.venv/lib/python3.12/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain-community>=0.3.1->langchain-astradb) (1.3.2)\n", - "Requirement already satisfied: attrs>=17.3.0 in /Users/benjamin.chambers/code/graph-pancake/.venv/lib/python3.12/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain-community>=0.3.1->langchain-astradb) (24.3.0)\n", - "Requirement already satisfied: frozenlist>=1.1.1 in /Users/benjamin.chambers/code/graph-pancake/.venv/lib/python3.12/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain-community>=0.3.1->langchain-astradb) (1.5.0)\n", - "Requirement already satisfied: multidict<7.0,>=4.5 in /Users/benjamin.chambers/code/graph-pancake/.venv/lib/python3.12/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain-community>=0.3.1->langchain-astradb) (6.1.0)\n", - "Requirement already satisfied: propcache>=0.2.0 in /Users/benjamin.chambers/code/graph-pancake/.venv/lib/python3.12/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain-community>=0.3.1->langchain-astradb) (0.2.1)\n", - "Requirement already satisfied: yarl<2.0,>=1.17.0 in /Users/benjamin.chambers/code/graph-pancake/.venv/lib/python3.12/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain-community>=0.3.1->langchain-astradb) (1.18.3)\n", - "Requirement already satisfied: marshmallow<4.0.0,>=3.18.0 in /Users/benjamin.chambers/code/graph-pancake/.venv/lib/python3.12/site-packages (from dataclasses-json<0.7,>=0.5.7->langchain-community>=0.3.1->langchain-astradb) (3.24.1)\n", - "Requirement already satisfied: typing-inspect<1,>=0.4.0 in /Users/benjamin.chambers/code/graph-pancake/.venv/lib/python3.12/site-packages (from dataclasses-json<0.7,>=0.5.7->langchain-community>=0.3.1->langchain-astradb) (0.9.0)\n", - "Requirement already satisfied: h2<5,>=3 in /Users/benjamin.chambers/code/graph-pancake/.venv/lib/python3.12/site-packages (from httpx[http2]<1,>=0.25.2->astrapy<2.0.0,>=1.5.2->langchain-astradb) (4.1.0)\n", - "Requirement already satisfied: langchain-text-splitters<0.4.0,>=0.3.3 in /Users/benjamin.chambers/code/graph-pancake/.venv/lib/python3.12/site-packages (from langchain<0.4.0,>=0.3.14->langchain-community>=0.3.1->langchain-astradb) (0.3.5)\n", - "Requirement already satisfied: dnspython<3.0.0,>=1.16.0 in /Users/benjamin.chambers/code/graph-pancake/.venv/lib/python3.12/site-packages (from pymongo>=3->astrapy<2.0.0,>=1.5.2->langchain-astradb) (2.7.0)\n", - "Requirement already satisfied: charset-normalizer<4,>=2 in /Users/benjamin.chambers/code/graph-pancake/.venv/lib/python3.12/site-packages (from requests<3,>=2->langchain-community>=0.3.1->langchain-astradb) (3.4.1)\n", - "Requirement already satisfied: urllib3<3,>=1.21.1 in /Users/benjamin.chambers/code/graph-pancake/.venv/lib/python3.12/site-packages (from requests<3,>=2->langchain-community>=0.3.1->langchain-astradb) (2.3.0)\n", - "Requirement already satisfied: hyperframe<7,>=6.0 in /Users/benjamin.chambers/code/graph-pancake/.venv/lib/python3.12/site-packages (from h2<5,>=3->httpx[http2]<1,>=0.25.2->astrapy<2.0.0,>=1.5.2->langchain-astradb) (6.0.1)\n", - "Requirement already satisfied: hpack<5,>=4.0 in /Users/benjamin.chambers/code/graph-pancake/.venv/lib/python3.12/site-packages (from h2<5,>=3->httpx[http2]<1,>=0.25.2->astrapy<2.0.0,>=1.5.2->langchain-astradb) (4.0.0)\n", - "Requirement already satisfied: mypy-extensions>=0.3.0 in /Users/benjamin.chambers/code/graph-pancake/.venv/lib/python3.12/site-packages (from typing-inspect<1,>=0.4.0->dataclasses-json<0.7,>=0.5.7->langchain-community>=0.3.1->langchain-astradb) (1.0.0)\n", - "Note: you may need to restart the kernel to use updated packages.\n" - ] - } - ], + "outputs": [], "source": [ - "# @ Install modules.\n", + "# Install modules.\n", + "#\n", + "# On Apple hardware, \"spacy[apple]\" will improve performance.\n", "%pip install \\\n", " langchain-core \\\n", " langchain-astradb \\\n", " langchain-openai \\\n", " langchain-graph-retriever \\\n", + " spacy \\\n", " graph-rag-example-helpers" ] }, @@ -158,8 +73,26 @@ }, { "cell_type": "code", - "execution_count": 2, - "metadata": {}, + "execution_count": null, + "metadata": { + "tags": [ + "hide_output" + ] + }, + "outputs": [], + "source": [ + "# Downloads the model used by Spacy for extracting entities.\n", + "!python -m spacy download en_core_web_sm" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "hide_output" + ] + }, "outputs": [], "source": [ "# Configure import paths.\n", @@ -173,7 +106,12 @@ "\n", "initialize_environment(Environment.ASTRAPY)\n", "\n", - "os.environ[\"LANGCHAIN_PROJECT\"] = \"lazy-graph-rag\"" + "os.environ[\"LANGCHAIN_PROJECT\"] = \"lazy-graph-rag\"\n", + "\n", + "# The full dataset is ~6m documents, and takes hours to load.\n", + "# The short dataset is 1000 documents and loads quickly.\n", + "# Change this to `True` to use the larger dataset.\n", + "USE_SHORT_DATASET = True" ] }, { @@ -190,38 +128,29 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Create Documents from Wikipedia Articles\n", + "### Create Documents from Wikipedia Articles\n", "The first thing we need to do is create the `LangChain` `Document`s we'll import.\n", "\n", "To do this, we write some code to convert lines from a JSON file downloaded from [2wikimultihop](https://github.com/Alab-NII/2wikimultihop?tab=readme-ov-file#new-update-april-7-2021) and create a `Document`.\n", "We populate the `id` and `metadata[\"mentions\"]` from information in this file.\n", "\n", - "Then, we run those documents through the `KeybertKeywordExtractor` to populate `metadata[\"keywords\"]` with the suggested keywords from each article." + "Then, we run those documents through the `SpacyNERTransformer` to populate `metadata[\"entities\"]` with entities named in the article." ] }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": { "id": "8u4lD-AqDMMs" }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/benjamin.chambers/code/graph-pancake/.venv/lib/python3.12/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", - " from .autonotebook import tqdm as notebook_tqdm\n" - ] - } - ], + "outputs": [], "source": [ "import json\n", "from collections.abc import Iterator\n", "\n", "from langchain_core.documents import Document\n", - "from langchain_graph_retriever.document_transformers.keybert import (\n", - " KeybertKeywordExtractor,\n", + "from langchain_graph_retriever.transformers.spacy import (\n", + " SpacyNERTransformer,\n", ")\n", "\n", "\n", @@ -230,6 +159,7 @@ " para = json.loads(line)\n", "\n", " id = para[\"id\"]\n", + " title = para[\"title\"]\n", "\n", " # Use structured information (mentioned Wikipedia IDs) as metadata.\n", " mentioned_ids = [id for m in para[\"mentions\"] for m in m[\"ref_ids\"] or []]\n", @@ -239,11 +169,15 @@ " page_content=\" \".join(para[\"sentences\"]),\n", " metadata={\n", " \"mentions\": mentioned_ids,\n", + " \"title\": title,\n", " },\n", " )\n", "\n", "\n", - "KEYBERT_TRANSFORMER = KeybertKeywordExtractor()\n", + "NER_TRANSFORMER = SpacyNERTransformer(\n", + " limit=1000,\n", + " exclude_labels={\"CARDINAL\", \"MONEY\", \"QUANTITY\", \"TIME\", \"PERCENT\", \"ORDINAL\"},\n", + ")\n", "\n", "\n", "# Load data in batches, using GLiNER to extract entities.\n", @@ -251,7 +185,7 @@ " # Parse documents from the batch of lines.\n", " docs = [parse_document(line) for line in lines]\n", "\n", - " docs = KEYBERT_TRANSFORMER.transform_documents(docs)\n", + " docs = NER_TRANSFORMER.transform_documents(docs)\n", "\n", " return docs" ] @@ -260,26 +194,25 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Create the AstraDBVectorStore\n", + "### Create the AstraDBVectorStore\n", "Next, we create the Vector Store we're going to load these documents into.\n", "In our case, we use DataStax Astra DB with Open AI embeddings." ] }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "# @ Create the AstraDBVectorStore\n", - "\n", "from langchain_astradb import AstraDBVectorStore\n", "from langchain_openai import OpenAIEmbeddings\n", "\n", - "COLLECTION = \"lazy_graph_rag\"\n", + "COLLECTION = \"lazy_graph_rag_short\" if USE_SHORT_DATASET else \"lazy_graph_rag\"\n", "store = AstraDBVectorStore(\n", " embedding=OpenAIEmbeddings(),\n", " collection_name=COLLECTION,\n", + " pre_delete_collection=USE_SHORT_DATASET,\n", ")" ] }, @@ -287,7 +220,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Loading Data into the Store\n", + "### Loading Data into the Store\n", "Next, we perform the actual loading.\n", "This takes a while, so we use a helper utility to persist which batches have been written so we can resume if there are any failures.\n", "\n", @@ -296,75 +229,13 @@ }, { "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Resuming loading with 74 completed, 5916 remaining\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " 1%| | 32/5916 [35:45<109:34:36, 67.04s/it]\n", - " + Exception Group Traceback (most recent call last):\n", - " | File \"/Users/benjamin.chambers/code/graph-pancake/.venv/lib/python3.12/site-packages/IPython/core/interactiveshell.py\", line 3575, in run_code\n", - " | File \"/var/folders/c4/dcr0mh3d183d5kh9gf89wsc00000gn/T/ipykernel_60249/1917728629.py\", line 12, in <module>\n", - " | await aload_2wikimultihop(\n", - " | File \"/Users/benjamin.chambers/code/graph-pancake/packages/graph-rag-example-helpers/src/graph_rag_example_helpers/datasets/wikimultihop/load.py\", line 86, in aload_2wikimultihop\n", - " | async with asyncio.TaskGroup() as tg:\n", - " | ^^^^^^^^^^^^^^^^^^^\n", - " | File \"/Users/benjamin.chambers/.local/share/uv/python/cpython-3.12.8-macos-aarch64-none/lib/python3.12/asyncio/taskgroups.py\", line 71, in __aexit__\n", - " | return await self._aexit(et, exc)\n", - " | ^^^^^^^^^^^^^^^^^^^^^^^^^^\n", - " | File \"/Users/benjamin.chambers/.local/share/uv/python/cpython-3.12.8-macos-aarch64-none/lib/python3.12/asyncio/taskgroups.py\", line 164, in _aexit\n", - " | raise BaseExceptionGroup(\n", - " | ExceptionGroup: unhandled errors in a TaskGroup (1 sub-exception)\n", - " +-+---------------- 1 ----------------\n", - " | Traceback (most recent call last):\n", - " | File \"/Users/benjamin.chambers/code/graph-pancake/.venv/lib/python3.12/site-packages/httpx/_transports/default.py\", line 101, in map_httpcore_exceptions\n", - " | File \"/Users/benjamin.chambers/code/graph-pancake/.venv/lib/python3.12/site-packages/httpx/_transports/default.py\", line 394, in handle_async_request\n", - " | File \"/Users/benjamin.chambers/code/graph-pancake/.venv/lib/python3.12/site-packages/httpcore/_async/connection_pool.py\", line 256, in handle_async_request\n", - " | File \"/Users/benjamin.chambers/code/graph-pancake/.venv/lib/python3.12/site-packages/httpcore/_async/connection_pool.py\", line 236, in handle_async_request\n", - " | File \"/Users/benjamin.chambers/code/graph-pancake/.venv/lib/python3.12/site-packages/httpcore/_async/connection.py\", line 101, in handle_async_request\n", - " | File \"/Users/benjamin.chambers/code/graph-pancake/.venv/lib/python3.12/site-packages/httpcore/_async/connection.py\", line 78, in handle_async_request\n", - " | File \"/Users/benjamin.chambers/code/graph-pancake/.venv/lib/python3.12/site-packages/httpcore/_async/connection.py\", line 156, in _connect\n", - " | File \"/Users/benjamin.chambers/code/graph-pancake/.venv/lib/python3.12/site-packages/httpcore/_backends/anyio.py\", line 67, in start_tls\n", - " | File \"/Users/benjamin.chambers/.local/share/uv/python/cpython-3.12.8-macos-aarch64-none/lib/python3.12/contextlib.py\", line 158, in __exit__\n", - " | self.gen.throw(value)\n", - " | File \"/Users/benjamin.chambers/code/graph-pancake/.venv/lib/python3.12/site-packages/httpcore/_exceptions.py\", line 14, in map_exceptions\n", - " | httpcore.ConnectError\n", - " | \n", - " | The above exception was the direct cause of the following exception:\n", - " | \n", - " | Traceback (most recent call last):\n", - " | File \"/Users/benjamin.chambers/code/graph-pancake/.venv/lib/python3.12/site-packages/backoff/_async.py\", line 151, in retry\n", - " | File \"/Users/benjamin.chambers/code/graph-pancake/packages/graph-rag-example-helpers/src/graph_rag_example_helpers/datasets/wikimultihop/load.py\", line 95, in add_docs\n", - " | await store.aadd_documents(batch_docs)\n", - " | File \"/Users/benjamin.chambers/code/graph-pancake/.venv/lib/python3.12/site-packages/langchain_core/vectorstores/base.py\", line 322, in aadd_documents\n", - " | File \"/Users/benjamin.chambers/code/graph-pancake/.venv/lib/python3.12/site-packages/langchain_astradb/vectorstores.py\", line 1224, in aadd_texts\n", - " | File \"/Users/benjamin.chambers/code/graph-pancake/.venv/lib/python3.12/site-packages/langchain_astradb/vectorstores.py\", line 1214, in _replace_document\n", - " | File \"/Users/benjamin.chambers/code/graph-pancake/.venv/lib/python3.12/site-packages/astrapy/collection.py\", line 4482, in replace_one\n", - " | File \"/Users/benjamin.chambers/code/graph-pancake/.venv/lib/python3.12/site-packages/astrapy/api_commander.py\", line 354, in async_request\n", - " | File \"/Users/benjamin.chambers/code/graph-pancake/.venv/lib/python3.12/site-packages/astrapy/api_commander.py\", line 301, in async_raw_request\n", - " | File \"/Users/benjamin.chambers/code/graph-pancake/.venv/lib/python3.12/site-packages/httpx/_client.py\", line 1540, in request\n", - " | File \"/Users/benjamin.chambers/code/graph-pancake/.venv/lib/python3.12/site-packages/httpx/_client.py\", line 1629, in send\n", - " | File \"/Users/benjamin.chambers/code/graph-pancake/.venv/lib/python3.12/site-packages/httpx/_client.py\", line 1657, in _send_handling_auth\n", - " | File \"/Users/benjamin.chambers/code/graph-pancake/.venv/lib/python3.12/site-packages/httpx/_client.py\", line 1694, in _send_handling_redirects\n", - " | File \"/Users/benjamin.chambers/code/graph-pancake/.venv/lib/python3.12/site-packages/httpx/_client.py\", line 1730, in _send_single_request\n", - " | File \"/Users/benjamin.chambers/code/graph-pancake/.venv/lib/python3.12/site-packages/httpx/_transports/default.py\", line 393, in handle_async_request\n", - " | File \"/Users/benjamin.chambers/.local/share/uv/python/cpython-3.12.8-macos-aarch64-none/lib/python3.12/contextlib.py\", line 158, in __exit__\n", - " | self.gen.throw(value)\n", - " | File \"/Users/benjamin.chambers/code/graph-pancake/.venv/lib/python3.12/site-packages/httpx/_transports/default.py\", line 118, in map_httpcore_exceptions\n", - " | httpx.ConnectError\n", - " +------------------------------------\n" - ] - } - ], + "execution_count": null, + "metadata": { + "tags": [ + "hide_output" + ] + }, + "outputs": [], "source": [ "import os\n", "import os.path\n", @@ -376,7 +247,12 @@ "# [2wikimultihop](https://github.com/Alab-NII/2wikimultihop?tab=readme-ov-file#new-update-april-7-2021).\n", "PARA_WITH_HYPERLINK_ZIP = os.path.join(os.getcwd(), \"para_with_hyperlink.zip\")\n", "\n", - "await aload_2wikimultihop(PARA_WITH_HYPERLINK_ZIP, store, prepare_batch)" + "await aload_2wikimultihop(\n", + " limit=100 if USE_SHORT_DATASET else None,\n", + " full_para_with_hyperlink_zip_path=PARA_WITH_HYPERLINK_ZIP,\n", + " store=store,\n", + " batch_prepare=prepare_batch,\n", + ")" ] }, { @@ -384,12 +260,12 @@ "metadata": {}, "source": [ "At this point, we've created a `VectorStore` with the Wikipedia articles.\n", - "Each article is associated with metadata identifying other articles it mentions and keywords from the article.\n", - "This could be used for hybrid search -- performing a vector search for articles similar to a specific question *that also mention a specific term*.\n", + "Each article is associated with metadata identifying other articles it mentions and entities from the article.\n", "\n", - "The library `langchain-graph-retriever` makes this even more useful, allowing traversing between articles either explicitly mentioned or dealing with the same keywords.\n", + "As is, this is useful for performing a vector search filtered to articles mentioning a specific term or performing an entity seach on the documents.\n", + "The library `langchain-graph-retriever` makes this even more useful by allowing articles to be traversed based on relationships such as articles mentioned in the current article (or mentioning the current article) or articles providing more information on the entities mentioned in the current article.\n", "\n", - "In the next section, we'll go a step further and perform Lazy GraphRAG to extract relevant claims from both the similar and related articles and use the most relevant claims to answer the question." + "In the next section we'll see not just how we can use the relationships in the metadata to retrieve more articles, but we'll go a step further and perform Lazy GraphRAG to extract relevant claims from both the similar and related articles and use the most relevant claims to answer the question." ] }, { @@ -414,12 +290,16 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### LangChain for Extracting Claims" + "### LangChain for Extracting Claims\n", + "\n", + "The first thing we do is create a chain that produces the claims. Given an input containing the question and the retrieved communities, it applies an LLM in parallel extracting claims from each community.\n", + "\n", + "A claim is just a string representing the statement and the `source_id` of the document. We request structured output so we get a list of claims." ] }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -518,6 +398,8 @@ "source": [ "### LangChain for Ranking Claims\n", "\n", + "The next chain is used for ranking the claims so we can select the most relevant to the question.\n", + "\n", "This is based on ideas from [RankRAG](https://arxiv.org/abs/2407.02485).\n", "Specifically, the prompt is constructed so that the next token should be `True` if the content is relevant and `False` if not.\n", "The probability of the token is used to determine the relevance -- `True` with a higher probability is more relevant than `True` with a lesser probability." @@ -525,7 +407,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -596,18 +478,37 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## LazyGraphRAG in LangChain" + "We could extend this by using an MMR-like strategy for selecting claims.\n", + "Specifically, we could combine the relevance of the claim to the question and the diversity compared to already selected claims to select the best variety of claims." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### LazyGraphRAG in LangChain" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Finally, we produce a chain that puts everything together.\n", + "Given a `GraphRetriever` it retrieves documents, creates communities using edges amongst the retrieved documents, extracts claims from those communities, ranks and selects the best claims, and then answers the question using those claims." ] }, { "cell_type": "code", - "execution_count": 30, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ + "from typing import Any\n", + "\n", + "from graph_retriever.edges import EdgeSpec, MetadataEdgeFunction\n", "from langchain_core.language_models import BaseLanguageModel\n", - "from langchain_core.retrievers import BaseRetriever\n", "from langchain_core.runnables import chain\n", + "from langchain_graph_retriever import GraphRetriever\n", "from langchain_graph_retriever.document_graph import create_graph, group_by_community\n", "\n", "\n", @@ -615,35 +516,27 @@ "async def lazy_graph_rag(\n", " question: str,\n", " *,\n", - " retriever: BaseRetriever,\n", - " edges: Iterable[str | tuple[str, str]] | None = None,\n", + " retriever: GraphRetriever,\n", " model: BaseLanguageModel,\n", + " edges: Iterable[EdgeSpec] | MetadataEdgeFunction | None = None,\n", " max_tokens: int = 1000,\n", + " **kwargs: Any,\n", ") -> str:\n", " \"\"\"Retrieve claims relating to the question using LazyGraphRAG.\n", "\n", " Returns the top claims up to the given `max_tokens` as a markdown list.\n", "\n", " \"\"\"\n", - "\n", + " edges = edges or retriever.edges\n", " if edges is None:\n", - " try:\n", - " edges = retriever.edges\n", - " except AttributeError as _:\n", - " raise ValueError(\n", - " \"Must specify 'edges' or provide a retriever with 'edges' field defined\"\n", - " )\n", + " raise ValueError(\"Must specify 'edges' in invocation or retriever\")\n", "\n", " # 1. Retrieve documents using the (traversing) retriever.\n", - " documents = await retriever.ainvoke(question)\n", + " documents = await retriever.ainvoke(question, edges=edges, **kwargs)\n", "\n", " # 2. Create a graph and extract communities.\n", - " documents_by_id, doc_graph = create_graph(\n", - " documents,\n", - " edges=edges,\n", - " directed=False,\n", - " )\n", - " communities = group_by_community(documents_by_id, doc_graph)\n", + " document_graph = create_graph(documents, edges=edges)\n", + " communities = group_by_community(document_graph)\n", "\n", " # 3. Extract claims from the communities.\n", " claims = await claims_chain.ainvoke(\n", @@ -669,42 +562,30 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Using LazyGraphRAG in LangChain" + "### Using Lazy GraphRAG in LangChain\n", + "\n", + "Finally, we sue the Lazy GraphRAG chain we created on the store we populated earlier." ] }, { "cell_type": "code", - "execution_count": 31, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "from langchain_community.retrievers.graph_traversal import (\n", - " AstraTraversalAdapter,\n", - " GraphTraversalRetriever,\n", - ")\n", "from langchain_core.prompts import PromptTemplate\n", "from langchain_core.runnables import RunnablePassthrough\n", - "\n", - "EDGES = [(\"mentions\", \"id\"), \"entities\"]\n", - "\n", - "RETRIEVER = GraphTraversalRetriever(\n", - " store=AstraTraversalAdapter(store),\n", - " edges=EDGES,\n", - " start_k=100,\n", - " depth=3,\n", + "from langchain_graph_retriever import GraphRetriever\n", + "\n", + "RETRIEVER = GraphRetriever(\n", + " store=store,\n", + " edges=[(\"mentions\", \"$id\"), (\"entities\", \"entities\")],\n", + " k=100,\n", + " start_k=30,\n", + " adjacent_k=20,\n", + " max_depth=3,\n", ")\n", "\n", - "# RETRIEVER = GraphMMRTraversalRetriever(\n", - "# store = AstraMMRTraversalAdapter(store),\n", - "# edges = EDGES,\n", - "# k = 100,\n", - "# depth = 5,\n", - "# fetch_k = 100,\n", - "# adjacent_k = 25,\n", - "# lambda_mult = 0.8,\n", - "# score_threshold = float(\"-inf\"),\n", - "# )\n", - "\n", "ANSWER_PROMPT = PromptTemplate.from_template(\"\"\"\n", "Answer the question based on the supporting claims.\n", "\n", @@ -736,10 +617,87 @@ { "cell_type": "code", "execution_count": null, + "metadata": { + "tags": [ + "keep_output" + ] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "'Bermudan sloop ships are widely prized for several reasons. Firstly, they feature the Bermuda rig, which is popular because it is easier to sail with a smaller crew or even single-handed, is cheaper due to having less hardware, and performs well when sailing into the wind (Source: 48520). Additionally, Bermuda sloops were constructed using Bermuda cedar, a material valued for its durability and resistance to rot, contributing to the ships' longevity and performance (Source: 17186373). These factors combined make Bermudan sloops highly valued compared to other ships.'" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "QUESTION = \"Why are Bermudan sloop ships widely prized compared to other ships?\"\n", + "result = await LAZY_GRAPH_RAG_CHAIN.ainvoke(QUESTION)\n", + "result.content" + ] + }, + { + "cell_type": "markdown", "metadata": {}, - "outputs": [], "source": [ - "await LAZY_GRAPH_RAG_CHAIN.ainvoke(\"Where is Azerbaijan?\")" + "For comparison, below are the results to the same question using a basic RAG pattern with just vector similarity." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "keep_output" + ] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "'The documents do not provide specific reasons why Bermudan sloop ships are widely prized compared to other ships. They describe the development and characteristics of the Bermuda sloop, such as its fore-and-aft rigged single-masted design and the use of the Bermuda rig with triangular sails, but do not explicitly state why these ships are particularly valued over others.'" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from langchain_core.prompts import PromptTemplate\n", + "from langchain_core.runnables import RunnablePassthrough\n", + "\n", + "VECTOR_ANSWER_PROMPT = PromptTemplate.from_template(\"\"\"\n", + "Answer the question based on the provided documents.\n", + "\n", + "Only use information from the documents. Do not guess or make up any information.\n", + "\n", + "Question: {question}\n", + "\n", + "Documents:\n", + "{documents}\n", + "\"\"\")\n", + "\n", + "\n", + "def format_docs(docs):\n", + " return \"\\n\\n\".join(doc.page_content for doc in docs)\n", + "\n", + "\n", + "VECTOR_CHAIN = (\n", + " {\n", + " \"question\": RunnablePassthrough(),\n", + " \"documents\": (store.as_retriever() | format_docs),\n", + " }\n", + " | VECTOR_ANSWER_PROMPT\n", + " | MODEL\n", + ")\n", + "\n", + "result = VECTOR_CHAIN.invoke(QUESTION)\n", + "result.content" ] }, { @@ -755,17 +713,19 @@ "source": [ "## Conclusion\n", "\n", - "This post introduced _traversing retrievers_ which allow any `VectorStore` to be traversed as a knowledge graph based on properties in the metadata.\n", - "This means you can focus on populating and using your `VectorStore` with useful metadata and add GraphRAG when you need it.\n", - "We also saw that these traversing retrievers mean that any `VectorStore` can be used with LazyGraphRAG, without needing to change the stored documents.\n", + "This post demonstrated how easy it is to implement Lazy GraphRAG on top of a document graph.\n", + "\n", + "It used `langchain-graph-retriever` from the [graph-rag project](datastax.github.io/graph-rag) to implement the document graph and graph-based retrieval on top of an existing LangChain `VectorStore`.\n", + "This means you can focus on populating and using your `VectorStore` with useful metadata and add graph-based retrieval and even Lazy GraphRAG when you need it.\n", "\n", + "**Any LangChain `VectorStore` can be used with Lazy GraphRAG without needing to change or re-ingest the stored documents.**\n", "Knowledge Graphs and GraphRAG shouldn't be hard or scary.\n", "Start simple and easily overlay edges when you need them.\n", "\n", - "These traversing retrievers and LazyGraphRAG summarization work well with agents.\n", - "You can create tools that use different retriever configurations, for instance, searching for articles \"near\" existing articles or distinguishing between questions that only need a few references and deeper questions which need to retrieve and summarize a larger amount of content.\n", - "We'll show how to combine these graph techniques with agents in future posts.\n", - "Until then, ..." + "Graph retrievers and LazyGraph RAG work well with agents.\n", + "You can allow the agent to retrieve differently depending on the question -- doing a vector only search for simple questions, traversing to mentioned articles for a deeper question or traversing to articles that cite this to see if there is newer information available.\n", + "We'll show how to combine these techniques with agents in a future post.\n", + "Until then, give `langchain-graph-retriever` a try and let us know how it goes!" ] } ], diff --git a/docs/faqs/index.md b/docs/faqs/index.md new file mode 100644 index 00000000..870af622 --- /dev/null +++ b/docs/faqs/index.md @@ -0,0 +1,35 @@ +# FAQs + +## Is Graph RAG a Knowledge Graph? + +Yes, for the recent usage of the term. +Graph RAG implements graph traversal of structured metadata during retrieval. +The structured metadata provides edges connecting unstructured content ("knowledge"). +Graph RAG traverses a graph of knowledge. + +However, prior to the recent surge of Graph RAG, there was a more academic definition of knowledge graphs where nodes specifically represented entities and knowledge about the relationships appeared as edges. Graph RAG is *not* this version of a knowledge graph. + +We have found that adding edges to unstructured content is much easier and efficient to use. +See [the Lazy Graph RAG example](../examples/lazy-graph-rag.ipynb) for more details. + +??? note "Links with more information." + + We previously wrote about this distinction as ["content-centric" (nodes are content) vs. "entity-centric" (nodes are entities)](https://www.datastax.com/blog/better-llm-integration-and-relevancy-with-content-centric-knowledge-graphs). + + We've also demonstrated that [building the content-centric knowledge graph is significinatly cheaper](https://hackernoon.com/how-to-save-$70k-building-a-knowledge-graph-for-rag-on-6m-wikipedia-pages). + + In many ways this mirrors the difference between Microsoft's GraphRAG and LazyGraphRAG. + +## Does Graph RAG need a Graph DB? + +No. + +Graph databases are excellent for operating on academic knowledge graphs, where you way be looking for specific relationships between multiple nodes -- eg., finding people who live in Seattle (have a "lives in" edge pointing at Seattle) and work at a company in Santa Clara (has a "works at" edge to a company node with a "headquartered in" edge pointing at Santa Clara). In this case, the graph *structure* encodes information, meaning the graph *query* needs to understand that structure. + +However, the best knowledge graph for Graph RAG is a vector store containing unstructured content with structured metadata first, and support traversal of those structured relationships second. This means that any vector store with metadata filtering capabilities (all or nearly all) can be used for traversal. + +!!! important + + Traditional graph databases require materializing edges during ingestion, making them inflexible and costly to maintain as data evolves. Our approach operates on relationships present in the metadata without materializing them, eliminating the need to decide on the graph relationships during ingestion and enabling each query to operate on a different set of relationships. This makes it easy to add your structured metadata to the documents and traverse it for enhanced retrieval in RAG applications and adapts effortlessly to changing data. + +There are some things a vector store can support that make the kinds of metadata queries needed for traversal more efficient. See the support matrix in the [Adapters guide](../guide/adapters.md) for more information. \ No newline at end of file diff --git a/docs/getting-started.qmd b/docs/getting-started.qmd deleted file mode 100644 index 8ae72b70..00000000 --- a/docs/getting-started.qmd +++ /dev/null @@ -1,135 +0,0 @@ ---- -title: "Getting Started" -jupyter: python3 -lightbox: auto ---- - -This page is an executable notebook demonstrating how to combine Graph Traversal and Vector Search using `langchain-graph-retriever` with `langchain`. - -## Pre-requisites - -We assume you already have a working `langchain` installation, including an LLM and embedding model as well as a [supported vector store](/reference/Adapter.qmd). - -In that case, you only need to isntall `langchain-graph-retriever`: - -```{python} -#| eval: False -%pip install langchain langchain-graph-retriever -``` - -## Preparing Data - -Loading data is exactly the same as for whichever vector store you use. -The main thing to consider is what structured information you wish to include in the metadata to support traversal. - -For this guide, I have a JSON file with information about animals. Several example entries are shown below. The actual file has one entry per line, making it easy to load into `Document`s. - -```json -{ - "id": "alpaca", - "text": "alpacas are domesticated mammals valued for their soft wool and friendly demeanor.", - "metadata": { - "type": "mammal", - "number_of_legs": 4, - "keywords": ["wool", "domesticated", "friendly"], - "origin": "south america" - } -} -{ - "id": "caribou", - "text": "caribou, also known as reindeer, are migratory mammals found in arctic regions.", - "metadata": { - "type": "mammal", - "number_of_legs": 4, - "keywords": ["migratory", "arctic", "herbivore", "tundra"], - "diet": "herbivorous" - } -} -{ - "id": "cassowary", - "text": "cassowaries are flightless birds known for their colorful necks and powerful legs.", - "metadata": { - "type": "bird", - "number_of_legs": 2, - "keywords": ["flightless", "colorful", "powerful"], - "habitat": "rainforest" - } -} -``` - -```{python} -import json -from langchain_core.documents import Document -animals = [] -with open("../data/animals.jsonl", "r") as file: - for line in file: - data = json.loads(line.strip()) - animals.append(Document( - id=data["id"], - page_content=data["text"], - metadata=data["metadata"], - )) -``` - -## Populating the Vector Store - -```{python} -from langchain_astradb import AstraDBVectorStore -from langchain_openai import OpenAIEmbeddings - -vector_store = AstraDBVectorStore.from_documents( - collection_name="animals", - documents=animals, - embedding=OpenAIEmbeddings(), -) -``` - -## Simple Traversal - -For our first retrieval and graph traversal, we're going to start with a single animal best matching the query, and then traverse to other animals with the same `habitat` and/or `origin`. - -```{python} -from langchain_graph_retriever import GraphRetriever -from langchain_graph_retriever.strategies import Eager - -simple = GraphRetriever( - # Adapt AstraDBVectorStore for use with Graph Retrievers. - store = vector_store, - # Define the relationships to navigate: - edges = [("habitat", "habitat"), ("origin", "origin")], - strategy = Eager(k=10, start_k=1, depth=2), -) -``` - -The above creates a graph traversing retriever that starts with the nearest animal (`start_k=1`), retrieves 10 documents (`k=10`) and limits the search to docuemnts that are at most 2 steps away from the first animal (`depth=2`). - -The edges define how metadata values can be used for traversal. In this case, every animal is connected to other animals with the same habitat and/or same origin. - -```{python} -simple_results = simple.invoke("what mammals could be found near a capybara") - -for doc in simple_results: - print(f"{doc.id}: {doc.page_content}") -``` - -## Visualizing - -`langchain-graph-retrievers` includes code for converting the document graph into a `networkx` graph, for rendering and other analysis. -See @fig-document-graph - -```{python} -#| code-fold: True -#| label: fig-document-graph -#| fig-cap: "Graph of retrieved documents" -import networkx as nx -import matplotlib.pyplot as plt -from langchain_graph_retriever.document_graph import create_graph - -document_graph = create_graph( - documents=simple_results, - edges = simple.edges, -) - -nx.draw(document_graph, with_labels=True) -plt.show() -``` \ No newline at end of file diff --git a/docs/guide/adapters.md b/docs/guide/adapters.md new file mode 100644 index 00000000..77592e71 --- /dev/null +++ b/docs/guide/adapters.md @@ -0,0 +1,75 @@ +--- +i: + y: :material-check-circle:{.green} + n: :material-close-circle:{.red} + m: :material-alert-circle:{.yellow} +--- + +# Adapters + +Adapters allow `graph-retriever` to connect to specific vector stores. + +| Vector Store | Supported | Collections | Dict-In-List | Nested Metadata | Optimized Adjacency | +| ------------------------------ | :-------: | :---------: | :----------: | :-------------: | :-----------------: | +| [DataStax Astra](#astra) | {{ i.y }} | {{ i.y }} | {{ i.y }} | {{ i.y }} | {{ i.y }} | +| [OpenSearch](#opensearch) | {{ i.y }} | {{ i.y }} | {{ i.n }} | {{ i.n }} | {{ i.n }} | +| [Apache Cassandra](#cassandra) | {{ i.y }} | {{ i.m }} | {{ i.y }} | {{ i.n }} | {{ i.n }} | +| [Chroma](#chroma) | {{ i.y }} | {{ i.m }} | {{ i.y }} | {{ i.n }} | {{ i.n }} | + +__Supported__ + +: Indicates whether a given store is completely supported (:material-check-circle:{.green}) or has limited support (:material-alert-circle:{.yellow}). + +__Collections__ + +: Indicates whether the store supports lists in metadata values or not. Stores which do not support it directly (:material-alert-circle:{.yellow}) can be used by applying the [ShreddingTransformer][langchain_graph_retriever.transformers.ShreddingTransformer] document transformer to documents before writing, which spreads the items of the collection into multiple metadata keys. + +__Dict-In-List__ + +: Indicates the store supports using a dict-value in a list for edges. For +example, when using named-entity recognition, you may have `entities = [{"type": +"PERSON", "entity": "Bell"}, ...]` and wish to link nodes with the same entity +using an edge defined as `("entities", "entities")`. + +__Nested Metadata__ + +: Whether edges can be defined using values of nested metadata. For example, +`page_structure.section` to access the section ID stored in metadata as +`metadata["page_structure"] = { "section": ... }`. + +__Optimized Adjacency__ + +: Whether the store supports an optimized query for nodes adjacent to multiple edges. Without this optimization each edge must be queried separately. Stores that support the combined adjacent query perform much better, especially when retrieving large numbers of nodes and/or dealing with high connectivity. + +!!! warning + + Graph Retriever can be used with any of these supported Vector Stores. However, stores + that operate directly on nested collections (without denormalization) and support optimized adjacency + much more performant and better suited for production use. Stores like Chroma are best + employed for early experimentation, while it is generally recommended to use a store like DataStax AstraDB when scaling up. + +## Supported Stores + +### Astra + +[DataStax AstraDB](https://www.datastax.com/products/datastax-astra) is +supported by the +[`AstraAdapter`][langchain_graph_retriever.adapters.astra.AstraAdapter]. The adapter +supports operating on metadata containing both primitive and list values. +Additionally, it optimizes the request for nodes connected to multiple edges into a single query. + +### OpenSearch + +[OpenSearch](https://opensearch.org/) is supported by the [`OpenSearchAdapter`][langchain_graph_retriever.adapters.open_search.OpenSearchAdapter]. The adapter supports operating on metadata containing both primitive and list values. It does not perform an optimized adjacent query. + +### Apache Cassandra {: #cassandra} + +[Apache Cassandra](https://cassandra.apache.org/) is supported by the [`CassandraAdapter`][langchain_graph_retriever.adapters.cassandra.CassandraAdapter]. The adapter requires shredding metadata containing lists in order to use them as edges. It does not perform an optimized adjacent query. + +### Chroma + +[Chroma](https://www.trychroma.com/) is supported by the [`ChromaAdapter`][langchain_graph_retriever.adapters.chroma.ChromaAdapter]. The adapter requires shredding metadata containing lists in order to use them as edges. It does not perform an optimized adjacent query. + +## Implementation + +The [Adapter][graph_retriever.adapters.Adapter] interface may be implemented directly. For LangChain [VectorStores][langchain_core.vectorstores.base.VectorStore], [LangchainAdapter][langchain_graph_retriever.adapters.langchain.LangchainAdapter] and [ShreddedLangchainAdapter][langchain_graph_retriever.adapters.langchain.ShreddedLangchainAdapter] provide much of the necessary functionality. diff --git a/docs/guide/components.svg b/docs/guide/components.svg new file mode 100644 index 00000000..5907cbb1 --- /dev/null +++ b/docs/guide/components.svg @@ -0,0 +1 @@ +<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:lucid="lucid" width="1022" height="375.89"><g transform="translate(141 -365.1119791666667)" lucid:page-tab-id="0_0"><path d="M570 620c16.57 0 30 13.43 30 30v60c0 16.57-13.43 30-30 30h-680c-16.57 0-30-13.43-30-30v-60c0-16.57 13.43-30 30-30z" stroke="#6db1ff" stroke-width="2" fill="#edf5ff"/><path d="M-140 571.1a6 6 0 0 1 6-6H29.66a6 6 0 0 1 6 6V602a6 6 0 0 1-6 6H-134a6 6 0 0 1-6-6z" fill="#6db1ff"/><use xlink:href="#a" transform="matrix(1,0,0,1,-132,573.1119791666666) translate(0 21.58376736111111)"/><path d="M850 420c16.57 0 30 13.43 30 30v60c0 16.57-13.43 30-30 30h-960c-16.57 0-30-13.43-30-30v-60c0-16.57 13.43-30 30-30z" stroke="#54c45e" stroke-width="2" fill="#c3f7c8"/><path d="M-140 371.1a6 6 0 0 1 6-6h275.32a6 6 0 0 1 6 6V402a6 6 0 0 1-6 6H-134a6 6 0 0 1-6-6z" fill="#54c45e"/><use xlink:href="#b" transform="matrix(1,0,0,1,-132,373.1119791666667) translate(0 21.58376736111111)"/><path d="M-120 646a6 6 0 0 1 6-6H34a6 6 0 0 1 6 6v68a6 6 0 0 1-6 6h-148a6 6 0 0 1-6-6z" stroke="#3a414a" stroke-width="2" fill="#fff"/><use xlink:href="#c" transform="matrix(1,0,0,1,-115,645) translate(26.71440972222222 43.40277777777778)"/><path d="M60 646a6 6 0 0 1 6-6h148a6 6 0 0 1 6 6v68a6 6 0 0 1-6 6H66a6 6 0 0 1-6-6z" stroke="#3a414a" stroke-width="2" fill="#fff"/><use xlink:href="#d" transform="matrix(1,0,0,1,65,645) translate(21.96180555555555 43.40277777777778)"/><path d="M240 646a6 6 0 0 1 6-6h148a6 6 0 0 1 6 6v68a6 6 0 0 1-6 6H246a6 6 0 0 1-6-6z" stroke="#3a414a" stroke-width="2" fill="#fff"/><use xlink:href="#e" transform="matrix(1,0,0,1,245,645) translate(42.361111111111114 43.40277777777778)"/><path d="M420 646a6 6 0 0 1 6-6h148a6 6 0 0 1 6 6v68a6 6 0 0 1-6 6H426a6 6 0 0 1-6-6z" stroke="#3a414a" stroke-width="2" fill="#fff"/><use xlink:href="#f" transform="matrix(1,0,0,1,425,645) translate(27.43598090277778 43.40277777777778)"/><path d="M640 646a6 6 0 0 1 6-6h208a6 6 0 0 1 6 6v68a6 6 0 0 1-6 6H646a6 6 0 0 1-6-6z" stroke="#3a414a" stroke-width="2" fill="#fff"/><use xlink:href="#g" transform="matrix(1,0,0,1,645,645) translate(20.538194444444457 43.40277777777778)"/><use xlink:href="#h" transform="matrix(1,0,0,1,645,645) translate(138.95182291666669 43.40277777777778)"/><path d="M-120 446a6 6 0 0 1 6-6h288a6 6 0 0 1 6 6v68a6 6 0 0 1-6 6h-288a6 6 0 0 1-6-6z" stroke="#3a414a" stroke-width="2" fill="#fff"/><use xlink:href="#i" transform="matrix(1,0,0,1,-115,445) translate(65.81705729166667 43.40277777777778)"/><path d="M220 446a6 6 0 0 1 6-6h288a6 6 0 0 1 6 6v68a6 6 0 0 1-6 6H226a6 6 0 0 1-6-6z" stroke="#3a414a" stroke-width="2" fill="#fff"/><use xlink:href="#j" transform="matrix(1,0,0,1,225,445) translate(74.541015625 43.40277777777778)"/><path d="M560 446a6 6 0 0 1 6-6h288a6 6 0 0 1 6 6v68a6 6 0 0 1-6 6H566a6 6 0 0 1-6-6z" stroke="#3a414a" stroke-width="2" fill="#fff"/><use xlink:href="#f" transform="matrix(1,0,0,1,565,445) translate(97.43598090277777 43.40277777777778)"/><defs><path d="M611 442c-248 0-391-105-460-228l146-94c47 65 117 165 314 165 178 0 307-82 307-266v-224h-17C863-141 792-18 576-18c-268 0-472-195-472-546 0-346 197-568 476-568 216 0 288 133 326 193h17v-179h175V29c0 289-215 413-487 413zm-5-620c203 0 314-146 314-390 0-237-108-403-314-403-213 0-319 180-319 403 0 230 109 390 319 390" id="k"/><path d="M158 0v-1118h174v172h12c41-113 157-188 290-188 26 0 70 2 91 3v181c-11-2-60-10-108-10-161 0-279 109-279 260V0H158" id="l"/><path d="M471 26C259 26 90-98 90-318c0-256 228-303 435-329 202-27 287-16 287-108 0-139-79-219-234-219-161 0-248 86-283 164l-173-57c86-203 278-265 451-265 150 0 419 46 419 395V0H815v-152h-12C765-73 660 26 471 26zm31-159c199 0 310-134 310-271v-155c-30 35-226 55-295 64-131 17-246 59-246 186 0 116 97 176 231 176" id="m"/><path d="M158 418v-1536h174v179h20c37-59 106-193 324-193 279 0 474 222 474 576 0 356-194 580-473 580-213 0-288-135-325-197h-14v591H158zm492-555c209 0 317-186 317-421 0-232-105-413-317-413-206 0-314 166-314 413 0 249 111 421 314 421" id="n"/><path d="M338-670V0H158v-1490h180v566c73-149 190-208 336-208 226 0 379 139 379 422V0H872v-695c0-172-96-275-252-275-161 0-282 109-282 300" id="o"/><path d="M798-719v166H144v-166h654" id="p"/><path d="M628 24c-324 0-524-230-524-574 0-343 198-582 503-582 237 0 487 146 487 559v75H286c9 234 145 362 343 362 132 0 231-58 273-172l174 48C1024-91 857 24 628 24zM287-650h624c-17-190-120-322-304-322-192 0-309 151-320 322" id="q"/><path d="M598-1118v154H368v674c0 100 37 144 132 144 23 0 62-6 92-12L629-6c-37 13-88 20-134 20-193 0-307-107-307-290v-688H20v-154h168v-266h180v266h230" id="r"/><path d="M158 0v-1118h180V0H158zm91-1301c-68 0-125-53-125-119s57-119 125-119c69 0 126 53 126 119s-57 119-126 119" id="s"/><path d="M481 0L54-1118h197c107 314 236 620 325 951 87-331 217-637 324-951h197L670 0H481" id="t"/><g id="a"><use transform="matrix(0.010850694444444444,0,0,0.010850694444444444,0,0)" xlink:href="#k"/><use transform="matrix(0.010850694444444444,0,0,0.010850694444444444,13.628472222222221,0)" xlink:href="#l"/><use transform="matrix(0.010850694444444444,0,0,0.010850694444444444,21.994357638888886,0)" xlink:href="#m"/><use transform="matrix(0.010850694444444444,0,0,0.010850694444444444,34.47265625,0)" xlink:href="#n"/><use transform="matrix(0.010850694444444444,0,0,0.010850694444444444,48.07942708333333,0)" xlink:href="#o"/><use transform="matrix(0.010850694444444444,0,0,0.010850694444444444,61.21961805555555,0)" xlink:href="#p"/><use transform="matrix(0.010850694444444444,0,0,0.010850694444444444,71.44097222222221,0)" xlink:href="#l"/><use transform="matrix(0.010850694444444444,0,0,0.010850694444444444,79.43793402777777,0)" xlink:href="#q"/><use transform="matrix(0.010850694444444444,0,0,0.010850694444444444,92.39366319444444,0)" xlink:href="#r"/><use transform="matrix(0.010850694444444444,0,0,0.010850694444444444,99.66362847222221,0)" xlink:href="#l"/><use transform="matrix(0.010850694444444444,0,0,0.010850694444444444,108.37673611111111,0)" xlink:href="#s"/><use transform="matrix(0.010850694444444444,0,0,0.010850694444444444,113.75868055555556,0)" xlink:href="#q"/><use transform="matrix(0.010850694444444444,0,0,0.010850694444444444,126.28038194444446,0)" xlink:href="#t"/><use transform="matrix(0.010850694444444444,0,0,0.010850694444444444,138.33550347222223,0)" xlink:href="#q"/><use transform="matrix(0.010850694444444444,0,0,0.010850694444444444,151.29123263888889,0)" xlink:href="#l"/></g><path d="M338-1490V0H158v-1490h180" id="u"/><path d="M338-670V0H158v-1118h173l1 207c72-158 192-221 342-221 226 0 378 139 378 422V0H872v-695c0-172-96-275-252-275-161 0-282 109-282 300" id="v"/><path d="M613 24c-304 0-509-231-509-576 0-350 205-580 509-580 216 0 392 114 453 309l-173 49c-33-115-133-197-280-197-223 0-326 196-326 419 0 220 103 415 326 415 150 0 252-85 285-206l172 49C1010-95 832 24 613 24" id="w"/><g id="b"><use transform="matrix(0.010850694444444444,0,0,0.010850694444444444,0,0)" xlink:href="#u"/><use transform="matrix(0.010850694444444444,0,0,0.010850694444444444,5.381944444444445,0)" xlink:href="#m"/><use transform="matrix(0.010850694444444444,0,0,0.010850694444444444,17.860243055555557,0)" xlink:href="#v"/><use transform="matrix(0.010850694444444444,0,0,0.010850694444444444,30.989583333333336,0)" xlink:href="#k"/><use transform="matrix(0.010850694444444444,0,0,0.010850694444444444,44.61805555555556,0)" xlink:href="#w"/><use transform="matrix(0.010850694444444444,0,0,0.010850694444444444,57.31336805555555,0)" xlink:href="#o"/><use transform="matrix(0.010850694444444444,0,0,0.010850694444444444,70.45355902777777,0)" xlink:href="#m"/><use transform="matrix(0.010850694444444444,0,0,0.010850694444444444,82.93185763888889,0)" xlink:href="#s"/><use transform="matrix(0.010850694444444444,0,0,0.010850694444444444,88.31380208333333,0)" xlink:href="#v"/><use transform="matrix(0.010850694444444444,0,0,0.010850694444444444,101.44314236111111,0)" xlink:href="#p"/><use transform="matrix(0.010850694444444444,0,0,0.010850694444444444,111.66449652777777,0)" xlink:href="#k"/><use transform="matrix(0.010850694444444444,0,0,0.010850694444444444,125.29296875,0)" xlink:href="#l"/><use transform="matrix(0.010850694444444444,0,0,0.010850694444444444,133.65885416666666,0)" xlink:href="#m"/><use transform="matrix(0.010850694444444444,0,0,0.010850694444444444,146.13715277777777,0)" xlink:href="#n"/><use transform="matrix(0.010850694444444444,0,0,0.010850694444444444,159.74392361111111,0)" xlink:href="#o"/><use transform="matrix(0.010850694444444444,0,0,0.010850694444444444,172.88411458333334,0)" xlink:href="#p"/><use transform="matrix(0.010850694444444444,0,0,0.010850694444444444,183.10546875,0)" xlink:href="#l"/><use transform="matrix(0.010850694444444444,0,0,0.010850694444444444,191.10243055555554,0)" xlink:href="#q"/><use transform="matrix(0.010850694444444444,0,0,0.010850694444444444,204.05815972222223,0)" xlink:href="#r"/><use transform="matrix(0.010850694444444444,0,0,0.010850694444444444,211.32812499999994,0)" xlink:href="#l"/><use transform="matrix(0.010850694444444444,0,0,0.010850694444444444,220.04123263888883,0)" xlink:href="#s"/><use transform="matrix(0.010850694444444444,0,0,0.010850694444444444,225.42317708333331,0)" xlink:href="#q"/><use transform="matrix(0.010850694444444444,0,0,0.010850694444444444,237.9448784722222,0)" xlink:href="#t"/><use transform="matrix(0.010850694444444444,0,0,0.010850694444444444,250,0)" xlink:href="#q"/><use transform="matrix(0.010850694444444444,0,0,0.010850694444444444,262.95572916666663,0)" xlink:href="#l"/></g><path d="M98-1322v-168h1126v168H757V0H567v-1322H98" id="x"/><path d="M538 24C308 24 148-78 108-271l171-41c32 123 123 178 257 178 156 0 256-77 256-169 0-77-54-128-164-154l-186-44c-203-48-300-148-300-305 0-192 176-326 414-326 230 0 351 112 402 269l-163 42c-31-80-94-158-238-158-133 0-233 69-233 162 0 83 57 129 188 160l169 40c203 48 298 149 298 302 0 196-179 339-441 339" id="y"/><g id="c"><use transform="matrix(0.010850694444444444,0,0,0.010850694444444444,0,0)" xlink:href="#x"/><use transform="matrix(0.010850694444444444,0,0,0.010850694444444444,13.0859375,0)" xlink:href="#l"/><use transform="matrix(0.010850694444444444,0,0,0.010850694444444444,21.451822916666664,0)" xlink:href="#m"/><use transform="matrix(0.010850694444444444,0,0,0.010850694444444444,33.60460069444444,0)" xlink:href="#t"/><use transform="matrix(0.010850694444444444,0,0,0.010850694444444444,45.65972222222222,0)" xlink:href="#q"/><use transform="matrix(0.010850694444444444,0,0,0.010850694444444444,58.615451388888886,0)" xlink:href="#l"/><use transform="matrix(0.010850694444444444,0,0,0.010850694444444444,66.98133680555556,0)" xlink:href="#y"/><use transform="matrix(0.010850694444444444,0,0,0.010850694444444444,78.7109375,0)" xlink:href="#m"/><use transform="matrix(0.010850694444444444,0,0,0.010850694444444444,91.18923611111111,0)" xlink:href="#u"/></g><path d="M657 26c-323 0-524-166-541-416h195c15 169 171 246 346 246 202 0 356-106 356-265 5-203-294-238-475-293-239-73-380-191-380-389 0-252 224-419 512-419 294 0 499 171 508 396H992c-17-145-151-228-328-228-193 0-321 102-321 242 0 156 175 211 284 241l149 41c160 44 422 134 422 412 0 244-197 432-541 432" id="z"/><g id="d"><use transform="matrix(0.010850694444444444,0,0,0.010850694444444444,0,0)" xlink:href="#z"/><use transform="matrix(0.010850694444444444,0,0,0.010850694444444444,14.2578125,0)" xlink:href="#r"/><use transform="matrix(0.010850694444444444,0,0,0.010850694444444444,21.52777777777778,0)" xlink:href="#l"/><use transform="matrix(0.010850694444444444,0,0,0.010850694444444444,29.893663194444443,0)" xlink:href="#m"/><use transform="matrix(0.010850694444444444,0,0,0.010850694444444444,42.37196180555556,0)" xlink:href="#r"/><use transform="matrix(0.010850694444444444,0,0,0.010850694444444444,49.42491319444445,0)" xlink:href="#q"/><use transform="matrix(0.010850694444444444,0,0,0.010850694444444444,62.38064236111112,0)" xlink:href="#k"/><use transform="matrix(0.010850694444444444,0,0,0.010850694444444444,76.00911458333334,0)" xlink:href="#s"/><use transform="matrix(0.010850694444444444,0,0,0.010850694444444444,81.39105902777779,0)" xlink:href="#q"/><use transform="matrix(0.010850694444444444,0,0,0.010850694444444444,94.34678819444446,0)" xlink:href="#y"/></g><path d="M180 0v-1490h908v168H370v486h669v168H370v500h727V0H180" id="A"/><path d="M577 24c-279 0-473-224-473-580 0-354 195-576 474-576 218 0 287 134 324 193h14v-551h180V0H922v-173h-20C865-111 790 24 577 24zm27-161c203 0 314-172 314-421 0-247-108-413-314-413-212 0-317 181-317 413 0 235 108 421 317 421" id="B"/><g id="e"><use transform="matrix(0.010850694444444444,0,0,0.010850694444444444,0,0)" xlink:href="#A"/><use transform="matrix(0.010850694444444444,0,0,0.010850694444444444,13.35720486111111,0)" xlink:href="#B"/><use transform="matrix(0.010850694444444444,0,0,0.010850694444444444,26.963975694444443,0)" xlink:href="#k"/><use transform="matrix(0.010850694444444444,0,0,0.010850694444444444,40.592447916666664,0)" xlink:href="#q"/><use transform="matrix(0.010850694444444444,0,0,0.010850694444444444,53.54817708333333,0)" xlink:href="#y"/></g><path d="M52 0l541-1490h220L1361 0h-200l-149-416H398L254 0H52zm404-582h497c-103-290-145-390-251-756-108 377-145 460-246 756" id="C"/><g id="f"><use transform="matrix(0.010850694444444444,0,0,0.010850694444444444,0,0)" xlink:href="#C"/><use transform="matrix(0.010850694444444444,0,0,0.010850694444444444,15.33203125,0)" xlink:href="#B"/><use transform="matrix(0.010850694444444444,0,0,0.010850694444444444,28.938802083333332,0)" xlink:href="#m"/><use transform="matrix(0.010850694444444444,0,0,0.010850694444444444,41.41710069444444,0)" xlink:href="#n"/><use transform="matrix(0.010850694444444444,0,0,0.010850694444444444,55.02387152777778,0)" xlink:href="#r"/><use transform="matrix(0.010850694444444444,0,0,0.010850694444444444,62.076822916666664,0)" xlink:href="#q"/><use transform="matrix(0.010850694444444444,0,0,0.010850694444444444,75.03255208333333,0)" xlink:href="#l"/><use transform="matrix(0.010850694444444444,0,0,0.010850694444444444,83.3984375,0)" xlink:href="#y"/></g><path d="M180 0v-1490h190v1322h690V0H180" id="D"/><path d="M783 20c-382 0-661-292-661-764 0-473 279-766 661-766 302 0 548 182 601 489h-190c-42-204-217-313-411-313-268 0-476 208-476 590 0 381 209 588 476 588 195 0 369-110 411-313h190c-52 303-296 489-601 489" id="E"/><g id="g"><use transform="matrix(0.010850694444444444,0,0,0.010850694444444444,0,0)" xlink:href="#D"/><use transform="matrix(0.010850694444444444,0,0,0.010850694444444444,12.565104166666666,0)" xlink:href="#m"/><use transform="matrix(0.010850694444444444,0,0,0.010850694444444444,25.04340277777778,0)" xlink:href="#v"/><use transform="matrix(0.010850694444444444,0,0,0.010850694444444444,38.17274305555556,0)" xlink:href="#k"/><use transform="matrix(0.010850694444444444,0,0,0.010850694444444444,51.80121527777778,0)" xlink:href="#E"/><use transform="matrix(0.010850694444444444,0,0,0.010850694444444444,68.03385416666667,0)" xlink:href="#o"/><use transform="matrix(0.010850694444444444,0,0,0.010850694444444444,81.17404513888889,0)" xlink:href="#m"/><use transform="matrix(0.010850694444444444,0,0,0.010850694444444444,93.65234375,0)" xlink:href="#s"/><use transform="matrix(0.010850694444444444,0,0,0.010850694444444444,99.03428819444444,0)" xlink:href="#v"/></g><path d="M613 24c-304 0-509-231-509-576 0-350 205-580 509-580 305 0 511 230 511 580 0 345-206 576-511 576zm0-161c226 0 329-195 329-415 0-222-103-419-329-419-223 0-326 196-326 419 0 220 103 415 326 415" id="F"/><g id="h"><use transform="matrix(0.010850694444444444,0,0,0.010850694444444444,0,0)" xlink:href="#E"/><use transform="matrix(0.010850694444444444,0,0,0.010850694444444444,16.23263888888889,0)" xlink:href="#F"/><use transform="matrix(0.010850694444444444,0,0,0.010850694444444444,29.557291666666664,0)" xlink:href="#l"/><use transform="matrix(0.010850694444444444,0,0,0.010850694444444444,37.55425347222222,0)" xlink:href="#q"/></g><path d="M789 20c-398 0-667-293-667-764 0-473 273-766 655-766 315 0 558 198 612 487h-196c-61-191-204-311-415-311-262 0-471 207-471 590 0 380 208 588 482 588 249 0 420-165 425-427H828v-166h572v163c0 370-256 606-611 606" id="G"/><path d="M180 0v-1490h510c348 0 508 194 508 460 0 198-88 351-276 417L1256 0h-220L726-579c-117 2-238 0-356 1V0H180zm190-747h312c235 0 327-108 327-283 0-177-92-293-329-293H370v576" id="H"/><g id="i"><use transform="matrix(0.010850694444444444,0,0,0.010850694444444444,0,0)" xlink:href="#G"/><use transform="matrix(0.010850694444444444,0,0,0.010850694444444444,16.57986111111111,0)" xlink:href="#l"/><use transform="matrix(0.010850694444444444,0,0,0.010850694444444444,24.94574652777778,0)" xlink:href="#m"/><use transform="matrix(0.010850694444444444,0,0,0.010850694444444444,37.424045138888886,0)" xlink:href="#n"/><use transform="matrix(0.010850694444444444,0,0,0.010850694444444444,51.03081597222221,0)" xlink:href="#o"/><use transform="matrix(0.010850694444444444,0,0,0.010850694444444444,64.17100694444443,0)" xlink:href="#H"/><use transform="matrix(0.010850694444444444,0,0,0.010850694444444444,78.14670138888887,0)" xlink:href="#q"/><use transform="matrix(0.010850694444444444,0,0,0.010850694444444444,91.10243055555556,0)" xlink:href="#r"/><use transform="matrix(0.010850694444444444,0,0,0.010850694444444444,98.37239583333331,0)" xlink:href="#l"/><use transform="matrix(0.010850694444444444,0,0,0.010850694444444444,107.08550347222221,0)" xlink:href="#s"/><use transform="matrix(0.010850694444444444,0,0,0.010850694444444444,112.46744791666666,0)" xlink:href="#q"/><use transform="matrix(0.010850694444444444,0,0,0.010850694444444444,124.98914930555556,0)" xlink:href="#t"/><use transform="matrix(0.010850694444444444,0,0,0.010850694444444444,137.04427083333334,0)" xlink:href="#q"/><use transform="matrix(0.010850694444444444,0,0,0.010850694444444444,150,0)" xlink:href="#l"/></g><path d="M678-1118v154H420V0H240v-964H20v-154h220v-149c0-194 155-293 318-293 85 0 141 18 168 30l-50 154c-19-6-47-17-97-17-111 0-159 58-159 166v109h258" id="I"/><path d="M158 0v-1118h175l1 205c55-151 181-225 313-225 147 0 245 90 285 228 53-141 190-228 352-228 194 0 352 125 352 384V0h-181v-749c0-161-105-225-225-225-151 0-243 103-243 244V0H807v-767c0-124-93-207-219-207-131 0-250 92-250 270V0H158" id="J"/><g id="j"><use transform="matrix(0.010850694444444444,0,0,0.010850694444444444,0,0)" xlink:href="#x"/><use transform="matrix(0.010850694444444444,0,0,0.010850694444444444,13.0859375,0)" xlink:href="#l"/><use transform="matrix(0.010850694444444444,0,0,0.010850694444444444,21.451822916666664,0)" xlink:href="#m"/><use transform="matrix(0.010850694444444444,0,0,0.010850694444444444,33.93012152777777,0)" xlink:href="#v"/><use transform="matrix(0.010850694444444444,0,0,0.010850694444444444,47.05946180555556,0)" xlink:href="#y"/><use transform="matrix(0.010850694444444444,0,0,0.010850694444444444,58.789062499999986,0)" xlink:href="#I"/><use transform="matrix(0.010850694444444444,0,0,0.010850694444444444,66.36284722222221,0)" xlink:href="#F"/><use transform="matrix(0.010850694444444444,0,0,0.010850694444444444,79.68749999999999,0)" xlink:href="#l"/><use transform="matrix(0.010850694444444444,0,0,0.010850694444444444,88.40060763888889,0)" xlink:href="#J"/><use transform="matrix(0.010850694444444444,0,0,0.010850694444444444,107.86675347222221,0)" xlink:href="#q"/><use transform="matrix(0.010850694444444444,0,0,0.010850694444444444,120.82248263888889,0)" xlink:href="#l"/><use transform="matrix(0.010850694444444444,0,0,0.010850694444444444,129.18836805555554,0)" xlink:href="#y"/></g></defs></g></svg> \ No newline at end of file diff --git a/docs/guide/edges.md b/docs/guide/edges.md new file mode 100644 index 00000000..814e6587 --- /dev/null +++ b/docs/guide/edges.md @@ -0,0 +1,51 @@ +# Edges + +Edges specify how content should be linked. +Often, content in existing vector stores has metadata based on structured information. +For example, a vector store containing articles may have information about the authors, keywords, and citations of those articles. +__Such content can be traversed along relationships already present in that metadata!__ +See [Specifying Edges](#specifying-edges) for more on how edges are specified. + +!!! tip "Edges can be dynamically specified each time" + + Since edges can be different each time the traversal is invoked, it is possible to tailor the relationships being used to the question. + +## Specifying Edges {: #specifying-edges} + +Edges are specified by passing the `edges` parameter to [`traverse`][graph_retriever.traverse] or [`atraverse`][graph_retriever.atraverse]. When used with LangChain, they may be provided when the [`GraphRetriever`][langchain_graph_retriever.GraphRetriever] is instantiated or when `invoke` or `ainvoke` is called. + +The following example shows how edges can be defined using metadata from an example article. + +!!! example "Specifying Edges" + + ```python title="Example content for an article" + Content( + id="article1", + content="...", + metadata={ + "keywords": ["GPT", "GenAI"], + "authors": ["Ben", "Eric"], + "primary_author": "Eric", + "cites": ["article2", "article3"], + } + ) + ``` + + 1. `("keywords", "keywords")` connects to other articles about GPT and GenAI. + 2. `("authors", "authors")` connects to other articles by any of the same authors. + 3. `("authors", "primary_author")` connects to other articles whose primary author was Ben or Eric. + 4. `("cites", "$id")` connects to the articles cited (by ID). + 5. `("$id", "cites")` connects to articles which cite this one. + 6. `("cites", "cites")` connects to other articles with citations in common. + +## Edge Functions + +While sometimes the information to traverse is missing and the vector store +needs to be re-populated, in other cases the information exist but not quite be +in a suitable format for traversal. For instance, the `"authors"` field may +contain a list of authors and their institution, making it impossible to link to +other articles by the same author when they were at a different institution. + +In such cases, you can provide a custom +[`EdgeFunction`][graph_retriever.edges.EdgeFunction] to extract the edges for +traversal. \ No newline at end of file diff --git a/docs/guide/get-started.md b/docs/guide/get-started.md new file mode 100644 index 00000000..2cbfbe7b --- /dev/null +++ b/docs/guide/get-started.md @@ -0,0 +1,242 @@ +# Get Started + +This page demonstrates how to combine Graph Traversal and Vector Search using +`langchain-graph-retriever` with `langchain`. + +## Pre-requisites + +We assume you already have a working `langchain` installation, including an LLM and +embedding model as well as a [supported vector store](./adapters.md). + +In that case, you only need to install `langchain-graph-retriever`: + +```bash +pip install langchain langchain-graph-retriever +``` + +## Preparing Data + +Loading data is exactly the same as for whichever vector store you use. The main thing +to consider is what structured information you wish to include in the metadata to +support traversal. + +For this guide, I have a JSON file with information about animals. Several example +entries are shown below. The actual file has one entry per line, making it easy to +load into `Document`s. + +```json +{ + "id": "alpaca", + "text": "alpacas are domesticated mammals valued for their soft wool and friendly demeanor.", + "metadata": { + "type": "mammal", + "number_of_legs": 4, + "keywords": ["wool", "domesticated", "friendly"], + "origin": "south america" + } +} +{ + "id": "caribou", + "text": "caribou, also known as reindeer, are migratory mammals found in arctic regions.", + "metadata": { + "type": "mammal", + "number_of_legs": 4, + "keywords": ["migratory", "arctic", "herbivore", "tundra"], + "diet": "herbivorous" + } +} +{ + "id": "cassowary", + "text": "cassowaries are flightless birds known for their colorful necks and powerful legs.", + "metadata": { + "type": "bird", + "number_of_legs": 2, + "keywords": ["flightless", "colorful", "powerful"], + "habitat": "rainforest" + } +} +``` + +```python title="Fetching Animal Data" +from graph_rag_example_helpers.datasets.animals import fetch_documents +animals = fetch_documents() +``` + +## Populating the Vector Store + +The following shows how to populate a variety of vector stores with the animal data. + +=== "Astra" + + ```python + from dotenv import load_dotenv + from langchain_astradb import AstraDBVectorStore + from langchain_openai import OpenAIEmbeddings + + load_dotenv() + vector_store = AstraDBVectorStore.from_documents( + collection_name="animals", + documents=animals, + embedding=OpenAIEmbeddings(), + ) + ``` + +=== "Apache Cassandra" + + ```python + from langchain_community.vectorstores.cassandra import Cassandra + from langchain_openai import OpenAIEmbeddings + from langchain_graph_retriever.transformers import ShreddingTransformer + + shredder = ShreddingTransformer() # (1)! + vector_store = Cassandra.from_documents( + documents=list(shredder.transform_documents(animals)), + embedding=OpenAIEmbeddings(), + table_name="animals", + ) + ``` + + 1. Since Cassandra doesn't index items in lists for querying, it is necessary to + shred metadata containing list to be queried. By default, the + [`ShreddingTransformer`][langchain_graph_retriever.transformers.ShreddingTransformer] + shreds all keys. It may be configured to only shred those + metadata keys used as edge targets. + +=== "OpenSearch" + + ```python + from langchain_community.vectorstores import OpenSearchVectorSearch + from langchain_openai import OpenAIEmbeddings + + vector_store = OpenSearchVectorSearch.from_documents( + opensearch_url=OPEN_SEARCH_URL, + index_name="animals", + embedding=OpenAIEmbeddings(), + engine="faiss", + documents=animals, + bulk_size=500, # (1)! + ) + ``` + + 1. There is currently a bug in the OpenSearchVectorStore implementation that + requires this extra parameter. + +=== "Chroma" + + ```python + from langchain_chroma.vectorstores import Chroma + from langchain_openai import OpenAIEmbeddings + from langchain_graph_retriever.transformers import ShreddingTransformer + + shredder = ShreddingTransformer() # (1)! + vector_store = Chroma.from_documents( + documents=list(shredder.transform_documents(animals)), + embedding=OpenAIEmbeddings(), + collection_name_name="animals", + ) + ``` + + 1. Since Chroma doesn't index items in lists for querying, it is necessary to + shred metadata containing list to be queried. By default, the + [`ShreddingTransformer`][langchain_graph_retriever.transformers.ShreddingTransformer] + shreds all keys. It may be configured to only shred those + metadata keys used as edge targets. + +## Simple Traversal + +For our first retrieval and graph traversal, we're going to start with a single animal best +matching the query, and then traverse to other animals with the same `habitat` and/or `origin`. + +=== "Astra" + + ```python + from graph_retriever.strategies import Eager + from langchain_graph_retriever import GraphRetriever + + simple = GraphRetriever( + store = vector_store, + edges = [("habitat", "habitat"), ("origin", "origin"), ("keywords", "keywords")], + strategy = Eager(k=10, start_k=1, max_depth=2), + ) + ``` + +=== "Apache Cassandra" + + ```python + from graph_retriever.strategies import Eager + from langchain_graph_retriever import GraphRetriever + from langchain_graph_retriever.adapters.cassandra import CassandraAdapter + + simple = GraphRetriever( + store = CassandraAdapter(vector_store, shredder, {"keywords"}),, + edges = [("habitat", "habitat"), ("origin", "origin"), ("keywords", "keywords")], + strategy = Eager(k=10, start_k=1, max_depth=2), + ) + ``` + +=== "OpenSearch" + + ```python + from graph_retriever.strategies import Eager + from langchain_graph_retriever import GraphRetriever + + simple = GraphRetriever( + store = vector_store, + edges = [("habitat", "habitat"), ("origin", "origin"), ("keywords", "keywords")], + strategy = Eager(k=10, start_k=1, max_depth=2), + ) + ``` + + +=== "Chroma" + + ```python + from graph_retriever.strategies import Eager + from langchain_graph_retriever import GraphRetriever + from langchain_graph_retriever.adapters.chroma import ChromaAdapter + + simple = GraphRetriever( + store = ChromaAdapter(vector_store, shredder, {"keywords"}), + edges = [("habitat", "habitat"), ("origin", "origin"), ("keywords", "keywords")], + strategy = Eager(k=10, start_k=1, max_depth=2), + ) + ``` + +!!! note "Shredding" + + The above code is exactly the same for all stores, however adapters for shredded + stores (Chroma and Apache Cassandra) require configuration to specify which metadata + fields need to be rewritten when issuing queries. + +The above creates a graph traversing retriever that starts with the nearest animal +(`start_k=1`), retrieves 10 documents (`k=10`) and limits the search to documents that +are at most 2 steps away from the first animal (`max_depth=2`). + +The edges define how metadata values can be used for traversal. In this case, every +animal is connected to other animals with the same habitat and/or same origin. + +```python +simple_results = simple.invoke("what mammals could be found near a capybara") + +for doc in simple_results: + print(f"{doc.id}: {doc.page_content}") +``` + +## Visualizing + +`langchain-graph-retrievers` includes code for converting the document graph into a +`networkx` graph, for rendering and other analysis. See @fig-document-graph + +```python title="Graph retrieved documents" +import networkx as nx +import matplotlib.pyplot as plt +from langchain_graph_retriever.document_graph import create_graph + +document_graph = create_graph( + documents=simple_results, + edges = simple.edges, +) + +nx.draw(document_graph, with_labels=True) +plt.show() +``` diff --git a/docs/guide/index.md b/docs/guide/index.md new file mode 100644 index 00000000..1b7fd003 --- /dev/null +++ b/docs/guide/index.md @@ -0,0 +1,35 @@ +# Guide + +Graph RAG provides the ability to traverse content in a vector store based on relationships in the metadata. +The traversal may start from specific nodes or use vector search to find the best starting places (or both). +Each traversal may define a different way to use the metadata to relate information, allowing different calls to traverse to focus on different properties. + +A variety of traversal strategies are supported, allowing the specific nodes selected during traversal to be controlled depending on the goals. +In some cases, it is important to find deeper supporting content, while in others finding a broader set of relevant perspectives is more appropriate. + +This guide provides an overview of the key concepts and the components provided by the project. + +!!! tip "In a hurry to get started?" + + Go to the [Getting Started Guide](get-started.md) to jump in! + +## Packages and Components + + + +The Graph RAG project primarily consists of two Python packages: + +- [`graph_retriever`](../reference/graph_retriever/index.md) provides the core +traversal functions in a framework independent way. + - [Traversal](traversal.md): The primary methods `traverse` and `atraverse` for performing the graph traversal. + - [Strategies](strategies.md): Configurable and customizable strategies for selecting nodes to visit. + - [Edges](edges.md): Configurable and customizable specification of relationships between nodes. + - [Adapters](adapters.md): Interface used to interact with a Vector Store. + +- [`langchain_graph_retriever`](../reference/langchain_graph_retriever/index.md) +is built on it and integrates with LangChain to allow graph retrieval on LangChain +supported Vector Stores. + + - [GraphRetriever](traversal.md#graph-retriever): A LangChain Retriever for performing the traversal. Uses `traverse` and `atraverse` under the hood. + - [Transformers](transformers.md): A variety of LangChain document transformers adding metadata that may be useful for traversing. + - [Adapters](adapters.md): Adapter implementations for LangChain Vector Stores. \ No newline at end of file diff --git a/docs/guide/migration.md b/docs/guide/migration.md new file mode 100644 index 00000000..ea0f6bfb --- /dev/null +++ b/docs/guide/migration.md @@ -0,0 +1,86 @@ +# Migration + +This page discusses migration from LangChain `GraphVectorStore` as well as between versions of `graph-retriever` and `langchain-graph-retriever`. + +## From LangChain GraphVectorStore + +LangChain `GraphVectorStore` relied on putting specially crafted `Link` instances into `metadata["links"]`. Many cases used link extractors to compute these links, but it was also often useful (and necessary) to create them manually. + +When converting from a `GraphVectorStore` to the new `langchain-graph-retriever` library, you need to do the following: + +1. Replace uses of the link extractors with document transformers. +2. Replace manualy link creation with metadata fields. +3. Replace `GraphVectorStore` usage with the `GraphRetriever`. + +### Replace Link Extractors with Document Transformers + +`GLiNERLinkExtractor` + +: Replace with [GLiNERTransformer][langchain_graph_retriever.transformers.gliner.GLiNERTransformer], which will populate metadata fields for each label. + +`HierarchyLinkExtractor` + +: If you already have a parent ID in the metadata, you can remove this. Otherwise, replace with the [ParentTransformer][langchain_graph_retriever.transformers.ParentTransformer] which populates a `parent` field computed from a path. The parent field may be used with edges to achieve parent-to-child, child-to-parent, and sibling-to-sibling navigation. + +`HtmlLinkExtractor` + +: Replace with [HyperlinkTransformer][langchain_graph_retriever.transformers.html.HyperlinkTransformer] which extracts hyperlinks from each chunk and writes them to a metadata field. + +`KeybertLinkExtractor` + +: Replace with [KeybertTransformer][langchain_graph_retriever.transformers.keybert.KeyBERTTransformer], which will populate a metadata field with the keywords. + +### Replace Manual Link Creation with Metadata Fields + +With the old library, you had to choose the direction of the links when they were created -- either `in`, `out` or `bidir`. With the new library, you simply create the corresponding fields and choose the direction of edges when you issue a query (see [next section](#replace-graphvectorstore-with-the-graphretriever)). + +```py title="GraphVectorStore Links (Old)" +# Document metadata for a page at `http://somesite` linking to some other URLs +# and a few keyword links. +doc = Document( + ..., + metadata = { + "links": [ + Link.incoming("url", "http://somesite"), + Link.outgoing("url", "http://someothersite"), + Link.outgoing("url", "http://yetanothersite"), + Link.bidir("keyword", "sites"), + Link.bidir("keyword", "web"), + ] + } +) + +``` + +```py title="LangChain Graph Retriever (New)" +doc = Document( + ..., + metadata = { + "url": "http://somesite", + "hrefs": ["http://someothersite", "http://yetanothersite"], + "keywords": ["sites", "web"], + } +) +``` + +These metadata fields can be used to accomplish a variety of graph traversals. For example: + +* `edges = [("hrefs", "url"), ...]` navigates from a site to the pages it links to (from `hrefs` to `url`). +* `edges = [("keywords", "keywords"), ...]` navigates from a site to other sites with the same keyword. +* `edges = [("url", "hrefs"), ...]` navigates from a site to other sites that link to it. + +!!! tip "Per-Query Edges" + + You can use different edges for each query, allowing you to navigate different directions depending on the needs. In the old library, you only ever navigated out from a site to the things it linked to, while with the new library the metadata captures the information (what URL is this document from, what URLs does it reference) and the edges determine which fileds are traversed at retrieval time. + +### Replace GraphVectorStore with the GraphRetriever + +Finally, rather than creating the links and writing them to a `GraphVectorStore` you write the documents (with metadata) to a standard `VectorStore` and apply a [GraphRetriever][langchain_graph_retriever.GraphRetriever]: + +```py title="LangChain Graph Retriever (New)" +from langchain_graph_retriever import GraphRetriever +retriever = GraphRetriever( + store=vector_store, + edges=[("hrefs", "url"), ("keywords", "keywords")], +) +``` \ No newline at end of file diff --git a/docs/guide/strategies.md b/docs/guide/strategies.md new file mode 100644 index 00000000..d9aad706 --- /dev/null +++ b/docs/guide/strategies.md @@ -0,0 +1,29 @@ +# Strategies + +Strategies determine which nodes are selected during [traversal](./traversal.md). + +All strategies allow you to control how many nodes are retrieved (`k`) as well +as how many nodes are found during the initial vector search (`start_k`) and +each step of the traversal (`adjacent_k`) as well as bounding the nodes +retrieved based on depth (`max_depth`). + +## Eager + +The [`Eager`][graph_retriever.strategies.Eager] strategy selects all of the discovered nodes at each step of the traversal. + +It doesn't support configuration beyond the standard options. + +## MMR + +The [`MMR`][graph_retriever.strategies.Mmr] strategy selects nodes with the +highest maximum marginal relevance score at each iteration. + +It can be configured with a `lambda_mult` which controls the trade-off between relevance and diversity. + +## Scored + +The [`Scored`][graph_retriever.strategies.Scored] strategy applies a user-defined function to each node to assign a score, and selects a number of nodes with the highest scores. + +## User-Defined Strategies + +You can also implement your own [`Strategy`][graph_retriever.strategies.Strategy]. This allows you to control how discovered nodes are tracked and selected for traversal. \ No newline at end of file diff --git a/docs/guide/transformers.md b/docs/guide/transformers.md new file mode 100644 index 00000000..c50e4e6a --- /dev/null +++ b/docs/guide/transformers.md @@ -0,0 +1,295 @@ +# Transformers + +!!! note "Transformers are optional, not mandatory" + Graph traversal operates on the structured metadata. + Transformers provide tools for populating the metadata, but they are not necessary. + In many cases you may have existing structured information that is useful + in addition or instead of what the transformers would populate. + +We provide two types of document transformers that can be useful in setting up your +documents for graph traversal. + +* **Information Extractors:** These extract information out of document content + and add to the metadata. + +* **Metadata Utilities:** These add to or modify document metadata to enable certain + features + +## Information Extractors + +!!! note "Extras required" + Most of the Transformers in this section require extra packages to be installed. + Either look at the specifics in the reference documentation for each transformer, + or install all the extras via: + + ``` + pip install "langchain-graph-retriever[all]" + ``` + +### NLP-Model Based + +Several of our document transformers that extract information depend on pre-trained +Natural Language Processing (NLP) models. + +The following LangChain documents will be used for the code examples in this section: + +??? example "Test Documents" + ```python + from langchain_core.documents import Document + + model_docs = [ + Document( + id="red_fox", + page_content=""" + The Red Fox is an omnivore, feeding on small mammals, birds, fruits, and insects. It + thrives in a wide range of habitats, including forests, grasslands, and even urban areas + like New York City, where it has adapted to human presence. This agile creature moves + primarily by walking and running, but it can also leap and climb when necessary. Its + body is covered in thick fur, which helps it stay warm in colder climates. The National + Wildlife Federation has tracked their urban expansion, and their population was + highlighted in the Wildlife Conservation Summit 2023.""", + ), + Document( + id="sea_turtle", + page_content=""" + The Green Sea Turtle is a herbivore, grazing on seagrass and algae in coastal waters and + shallow tropical seas, particularly around the Great Barrier Reef. It is a powerful + swimmer, using its large, flipper-like limbs to glide through the ocean. Unlike mammals, + its body is covered in a tough, scaly shell, providing protection from predators. + Conservation efforts by The World Wildlife Fund have played a significant role in + protecting this species, and it was a major focus of discussion at the Marine Life + Protection Conference 2024.", + ), + ] + ``` + +#### GLiNERTransformer + +The [`GLiNERTransformer`][langchain_graph_retriever.transformers.gliner.GLiNERTransformer] +extracts structured entity labels from text, identifying key attributes and categories +to enrich document metadata with semantic information. + +Example use: + ```python + from pprint import pprint + from langchain_graph_retriever.transformers.gliner import GLiNERTransformer + gliner = GLiNERTransformer(labels=["diet", "habitat", "locomotion", "body covering"]) + + gliner_docs = gliner.transform_documents(docs) + for doc in gliner_docs: + pprint({"id": doc.id, "metadata": doc.metadata}, width=100) + ``` + +Example output: + ```text + {'id': 'red_fox', + 'metadata': {'body covering': ['thick fur'], + 'diet': ['birds', 'omnivore', 'small mammals', 'insects', 'fruits'], + 'habitat': ['urban areas', 'new york city', 'forests', 'grasslands'], + 'locomotion': ['walking and running']}} + {'id': 'sea_turtle', + 'metadata': {'body covering': ['scaly shell'], + 'diet': ['seagrass and algae'], + 'habitat': ['coastal waters', 'shallow tropical seas', 'great barrier reef']}} + ``` + +#### KeyBERTTransformer + +The [`KeyBERTTransformer`][langchain_graph_retriever.transformers.keybert.KeyBERTTransformer] +extracts key topics and concepts from text, generating metadata that highlights the most +relevant terms to describe the content. + +Example use: + ```python + from langchain_graph_retriever.transformers.keybert import KeyBERTTransformer + keybert = KeyBERTTransformer() + + keybert_docs = keybert.transform_documents(model_docs) + for doc in keybert_docs: + print(f"{doc.id}: {doc.metadata}") + ``` + +Example output: + ```text + red_fox: {'keywords': ['wildlife', 'fox', 'mammals', 'habitats', 'omnivore']} + sea_turtle: {'keywords': ['turtle', 'reef', 'marine', 'seagrass', 'wildlife']} + ``` + +#### SpacyNERTransformer + +The [`SpacyNERTransformer`][langchain_graph_retriever.transformers.spacy.SpacyNERTransformer] +identifies and labels named entities in text, extracting structured metadata such as organizations, locations, dates, and other key entity types. + +Example use: + ```python + from pprint import pprint + from langchain_graph_retriever.transformers.spacy import SpacyNERTransformer + spacy = SpacyNERTransformer() + + spacy_docs = spacy.transform_documents(docs) + for doc in spacy_docs: + pprint({"id": doc.id, "metadata": doc.metadata}, width=100) + ``` + +Example output: + ```text + {'id': 'red_fox', + 'metadata': {'entities': ['ORG: The National Wildlife Federation', + 'GPE: New York City', + 'ORG: the Wildlife Conservation Summit', + 'DATE: 2023']}} + {'id': 'sea_turtle', + 'metadata': {'entities': ['ORG: The World Wildlife Fund', + 'FAC: the Great Barrier Reef', + 'ORG: the Marine Life Protection Conference', + 'LOC: The Green Sea Turtle', + 'DATE: 2024']}} + ``` + +### Parser Based + +The following document transformer uses a parser to extract metadata. + +#### HyperlinkTransformer + +The [`HyperlinkTransformer`][langchain_graph_retriever.transformers.html.HyperlinkTransformer] +extracts hyperlinks from HTML content and stores them in document metadata. + +??? example "Test Html Documents" + ```python + from langchain_core.documents import Document + animal_html = """ + <!DOCTYPE html> + <html><head><title>Animals of the World</title></head> + <body> + <h2>Mammals</h2> + <p>The <a href="https://example.com/lion">lion</a> is the king of the jungle.</p> + <p>The <a href="https://example.com/elephant">elephant</a> is a large animal.</p> + + <h2>Birds</h2> + <p>The <a href="https://example.com/eagle">eagle</a> soars high in the sky.</p> + <p>The <a href="https://example.com/penguin">penguin</a> thrives in icy areas.</p> + </body></html> + """ + + html_doc = Document( + page_content=animal_html, + metadata={"url": "https://example.com/animals"} + ) + ``` + + Note that each document needs to have an existing `url` metadata field. + +Example use: + ```python + from pprint import pprint + from langchain_graph_retriever.transformers.html import HyperlinkTransformer + html_transformer = HyperlinkTransformer() + + extracted_doc = html_transformer.transform_documents(html_docs)[0] + + pprint(extracted_doc.metadata) + ``` + +Example output: + ```text + {'hyperlink': ['https://example.com/eagle', + 'https://example.com/lion', + 'https://example.com/elephant', + 'https://example.com/penguin'], + 'url': 'https://example.com/animals'} + ``` + +## Metadata Utilities + +### ParentTransformer + +The [`ParentTransformer`][langchain_graph_retriever.transformers.ParentTransformer] +adds the hierarchal `Parent` path to the document metadata. + +??? example "Test Documents" + ```python + from langchain_core.documents import Document + + parent_docs = [ + Document(id="root", page_content="test", metadata={"path": "root"}), + Document(id="h1", page_content="test", metadata={"path": "root.h1"}), + Document(id="h1a", page_content="test", metadata={"path": "root.h1.a"}), + ] + ``` + + Note that each document needs to have an existing `path` metadata field. + +Example use: + ```python + from langchain_graph_retriever.transformers import ParentTransformer + transformer = ParentTransformer(path_delimiter=".") + + transformed_docs = transformer.transform_documents(parent_docs) + for doc in transformed_docs: + print(f"{doc.id}: {doc.metadata}") + ``` + +Example output: + ```text + root: {'path': 'root'} + h1: {'path': 'root.h1', 'parent': 'root'} + h1a: {'path': 'root.h1.a', 'parent': 'root.h1'} + ``` + +### ShreddingTransformer + +The [`ShreddingTransformer`][langchain_graph_retriever.transformers.ShreddingTransformer] +is primarily designed as a helper utility for vector stores that do not have native +support for collection-based metadata fields. It transforms these fields into multiple +metadata key-value pairs before database insertion. It also provides a method to restore +metadata back to its original format. + +#### Shredding + +??? example "Test Document" + ```python + from langchain_core.documents import Document + + collection_doc = Document(id="red_fox", page_content="test", metadata={ + "diet": ["birds", "omnivore", "small mammals", "insects", "fruits"], + "size": "small" + }) + ``` + +Example use: + ```python + from pprint import pprint + from langchain_graph_retriever.transformers import ShreddingTransformer + + shredder = ShreddingTransformer() + shredded_docs = shredder.transform_documents([collection_doc]) + pprint(shredded_docs[0].metadata) + ``` + +Example output: + ```text + {'__shredded_keys': '["diet"]', + 'diet→birds': '§', + 'diet→fruits': '§', + 'diet→insects': '§', + 'diet→omnivore': '§', + 'diet→small mammals': '§', + 'size': 'small'} + ``` + +#### Restoration + +This example uses the output from the Shredding Example above. + +Example use: + ```python + restored_docs = shredder.restore_documents(shredded_docs) + pprint(restored_docs[0].metadata) + ``` + +Example output: + ```text + {'diet': ['birds', 'omnivore', 'small mammals', 'insects', 'fruits'], + 'size': 'small'} + ``` \ No newline at end of file diff --git a/docs/guide/traversal.md b/docs/guide/traversal.md new file mode 100644 index 00000000..f40010a2 --- /dev/null +++ b/docs/guide/traversal.md @@ -0,0 +1,20 @@ +# Traversal + +At a high level, traversal performs the following steps: + +1. Retrieve `start_k` most similar to the `query` using vector search. +2. Find the nodes reachable from the `initial_root_ids`. +3. Discover the `start_k` nodes and the neighbors of the initial roots as "depth 0" candidates. +4. Ask the strategy which nodes to visit next. +5. If no more nodes to visit, exit and return the selected nodes. +6. Record those nodes as selected and retrieve the top `adjacent_k` nodes reachable from them. +7. Discover the newly reachable nodes (updating depths as needed). +8. Goto 4. + +## Traversal Methods + +The [`graph_retriever`](../reference/graph_retriever/index.md) package provides [`traverse`][graph_retriever.traverse] and [`atraverse`][graph_retriever.atraverse] for performing traversals. + +## LangChain Graph Retriever {: #graph-retriever } + +The [`langchain_graph_retriever`](../reference/langchain_graph_retriever/index.md) package provides [`GraphRetriever`][langchain_graph_retriever.GraphRetriever], an implementation of LangChain's [`BaseRetriever`][langchain_core.retrievers.BaseRetriever] which performs traversals. \ No newline at end of file diff --git a/docs/index.md b/docs/index.md new file mode 100644 index 00000000..d7d3f62f --- /dev/null +++ b/docs/index.md @@ -0,0 +1,67 @@ +--- +hide: + - navigation + - toc + - path +--- + +# Graph RAG + +Graph RAG provides retrievers that combine **unstructured** similarity-search on vectors and +**structured** traversal of metadata properties. +These retrievers are implemented using the metadata search functionality of existing vector stores, **allowing you to traverse your existing vector store**! + +<div class="grid cards two" markdown> + +- :material-transit-connection-variant:{ .lg .middle } __Link based on existing metadata__ + + --- + + Use existing metadata fields without additional processing. + Retrieve more from your existing vector store! + + [:octicons-arrow-right-24: Get started](./guide/get-started.md) + +- :material-clock-edit-outline:{ .lg .middle } __Change links on demand__ + + --- + + Edges can be specified on-the-fly, allowing different relationships to be traversed based on the question. + + [:octicons-arrow-right-24: Edges](./guide/edges.md) + + +- :material-connection:{ .lg .middle } __Pluggable Traversal Strategies__ + + --- + + Use built-in traversal strategies like Eager or MMR, or define your own logic to select which nodes to explore. + + [:octicons-arrow-right-24: Strategies](./guide/strategies.md) + +- :material-multicast:{ .lg .middle } __Broad compatibility__ + + --- + + Adapters are available for a variety of vector stores with support for + additional stores easily added. + + [:octicons-arrow-right-24: Adapters](./guide/adapters.md) +</div> + +## Example: LangChain Retriever combining Vector and Graph traversal + +```python +from langchain_graph_retriever import GraphRetriever + +retriever = GraphRetriever( + store = store, + edges = [("mentions", "$id"), ("entities", "entities")], # (1)! +) + +retriever.invoke("where is Santa Clara?") +``` + +1. `edges` configures traversing from a node to other nodes listed in the `metadata["mentions"]` field (to the corresponding `id`) and to other nodes with overlapping `metadata["entities"]`. + +See [Examples](examples/index.md) for more complete examples. \ No newline at end of file diff --git a/docs/index.qmd b/docs/index.qmd deleted file mode 100644 index 571ae00a..00000000 --- a/docs/index.qmd +++ /dev/null @@ -1,31 +0,0 @@ -# Graph RAG - -Graph RAG provides retrievers combining **unstructured** similarity-search on vectors and -**structured** traversal of metadata properties. -These retrievers are implemented using the metadata search functionality of existing vector stores, **allowing you to traverse your existing vector store**! - -The core library (`graph-retriever`) can be used in generic Python applications, while `langchain-graph-retriever` provides [langchain](https://python.langchain.com/docs/introduction/)-specific functionality. - -## Getting Started with LangChain - -1. Install `langchain-graph-retriever` (or add to your Python dependencies). - - ```sh - pip install langchain-graph-retriever - ``` - -1. Wrap your existing vector store to enable graph retrieval: - - ```python - from langchain_graph_retriever import GraphRetriever - - retriever = GraphRetriever( - store = store, - # Define the relationships to navigate: - # 1. From nodes with a list of `mentions` to the nodes with the corresponding `ids`. - # 2. From noeds with a list of related `entities` to other nodes with the same entities. - edges = [("mentions", "id"), "entities"], - ) - - retriever.invoke("where is Santa Clara?") - ``` \ No newline at end of file diff --git a/docs/posts/index.qmd b/docs/posts/index.qmd deleted file mode 100644 index c6d08831..00000000 --- a/docs/posts/index.qmd +++ /dev/null @@ -1,8 +0,0 @@ ---- -title: "Blog Posts" -listing: - sort: "date desc" - type: default - categories: true - feed: true ---- \ No newline at end of file diff --git a/docs/reference/graph_rag_example_helpers/index.md b/docs/reference/graph_rag_example_helpers/index.md new file mode 100644 index 00000000..1fea4d9b --- /dev/null +++ b/docs/reference/graph_rag_example_helpers/index.md @@ -0,0 +1,10 @@ +--- +toc: + title: graph_rag_example_helpers +--- + +# graph_rag_example_helpers + +::: graph_rag_example_helpers + options: + show_submodules: true \ No newline at end of file diff --git a/docs/reference/graph_retriever/adapters.md b/docs/reference/graph_retriever/adapters.md new file mode 100644 index 00000000..33ad51b3 --- /dev/null +++ b/docs/reference/graph_retriever/adapters.md @@ -0,0 +1,10 @@ +--- +title: adapters +icon: material/subdirectory-arrow-right +toc: + title: graph_retriever.adapters +--- + +# graph_retriever.adapters + +::: graph_retriever.adapters \ No newline at end of file diff --git a/docs/reference/graph_retriever/edges.md b/docs/reference/graph_retriever/edges.md new file mode 100644 index 00000000..b38bd537 --- /dev/null +++ b/docs/reference/graph_retriever/edges.md @@ -0,0 +1,10 @@ +--- +title: edges +icon: material/subdirectory-arrow-right +toc: + title: graph_retriever.edges +--- + +# graph_retriever.edges + +::: graph_retriever.edges \ No newline at end of file diff --git a/docs/reference/graph_retriever/index.md b/docs/reference/graph_retriever/index.md new file mode 100644 index 00000000..fb3c2a8b --- /dev/null +++ b/docs/reference/graph_retriever/index.md @@ -0,0 +1,14 @@ +--- +toc: + title: graph_retriever +--- + +# graph_retriever + +::: graph_retriever + options: + members: + - Content + - Node + - atraverse + - traverse \ No newline at end of file diff --git a/docs/reference/graph_retriever/strategies.md b/docs/reference/graph_retriever/strategies.md new file mode 100644 index 00000000..b0398341 --- /dev/null +++ b/docs/reference/graph_retriever/strategies.md @@ -0,0 +1,10 @@ +--- +title: strategies +icon: material/subdirectory-arrow-right +toc: + title: graph_retriever.strategies +--- + +# graph_retriever.strategies + +::: graph_retriever.strategies \ No newline at end of file diff --git a/docs/reference/graph_retriever/testing.md b/docs/reference/graph_retriever/testing.md new file mode 100644 index 00000000..d377233b --- /dev/null +++ b/docs/reference/graph_retriever/testing.md @@ -0,0 +1,12 @@ +--- +title: testing +icon: material/subdirectory-arrow-right +toc: + title: graph_retriever.testing +--- + +# graph_retriever.testing + +::: graph_retriever.testing + options: + show_submodules: true \ No newline at end of file diff --git a/docs/reference/graph_retriever/utils.md b/docs/reference/graph_retriever/utils.md new file mode 100644 index 00000000..439c6a62 --- /dev/null +++ b/docs/reference/graph_retriever/utils.md @@ -0,0 +1,12 @@ +--- +title: utils +icon: material/subdirectory-arrow-right +toc: + title: graph_retriever.utils +--- + +# graph_retriever.utils + +::: graph_retriever.utils + options: + show_submodules: true \ No newline at end of file diff --git a/docs/reference/index.md b/docs/reference/index.md new file mode 100644 index 00000000..31ffa141 --- /dev/null +++ b/docs/reference/index.md @@ -0,0 +1,5 @@ +# API Reference + +* [`graph-retriever`](./graph_retriever/index.md) contains the core graph traversal logic. +* [`langchain-graph-retriever`](./langchain_graph_retriever/index.md) contains a [`GraphRetriever`][langchain_graph_retriever.GraphRetriever] and store adapters for use with LangChain. +* [`graph-rag-example-helpers`](./graph_rag_example_helpers/index.md) contains utilities used in some examples, such as recoverably loading large datasets. \ No newline at end of file diff --git a/docs/reference/langchain_graph_retriever/adapters.md b/docs/reference/langchain_graph_retriever/adapters.md new file mode 100644 index 00000000..53770c99 --- /dev/null +++ b/docs/reference/langchain_graph_retriever/adapters.md @@ -0,0 +1,12 @@ +--- +title: adapters +icon: material/subdirectory-arrow-right +toc: + title: ..._retriever.adapters +--- + +# langchain_graph_retriever.adapters + +::: langchain_graph_retriever.adapters + options: + show_submodules: true \ No newline at end of file diff --git a/docs/reference/langchain_graph_retriever/index.md b/docs/reference/langchain_graph_retriever/index.md new file mode 100644 index 00000000..46fa69da --- /dev/null +++ b/docs/reference/langchain_graph_retriever/index.md @@ -0,0 +1,8 @@ +--- +toc: + title: langchain_graph_retriever +--- + +# langchain_graph_retriever + +::: langchain_graph_retriever \ No newline at end of file diff --git a/docs/reference/langchain_graph_retriever/transformers.md b/docs/reference/langchain_graph_retriever/transformers.md new file mode 100644 index 00000000..ed45e45f --- /dev/null +++ b/docs/reference/langchain_graph_retriever/transformers.md @@ -0,0 +1,13 @@ +--- +title: transformers +icon: material/subdirectory-arrow-right +toc: + title: ..._retriever.transformers +--- + +# langchain_graph_retriever.transformers + +::: langchain_graph_retriever.transformers + options: + show_submodules: true + inherited_members: true \ No newline at end of file diff --git a/docs/styles.css b/docs/styles.css new file mode 100644 index 00000000..834a5e9d --- /dev/null +++ b/docs/styles.css @@ -0,0 +1,20 @@ +.grid.two { + grid-template-columns: 1fr 1fr; +} +.yellow { + color: #EED202; +} +.green { + color: #228B22; +} +.red { + color: #c83131; +} +.colab { + color: #F9AB00; +} + +/* hide cell tags */ +.jp-Cell .celltoolbar { + display: none !important; +} diff --git a/mkdocs.yml b/mkdocs.yml new file mode 100644 index 00000000..c090c75b --- /dev/null +++ b/mkdocs.yml @@ -0,0 +1,149 @@ +site_name: GraphRAG +site_url: https://datastax.github.io/graph-rag +repo_url: https://github.com/datastax/graph-rag +edit_uri: edit/main/docs/ +site_description: +extra_css: + - styles.css +extra: + analytics: + provider: google + property: "G-7DS0D7NZML" + +plugins: +# Default social cards (see below to override) +# https://squidfunk.github.io/mkdocs-material/setup/setting-up-social-cards/ +- social +- search +- blog +- macros: + on_undefined: strict +- mkdocs-jupyter: + include_source: True + remove_tag_config: + remove_input_tags: ["hide_code"] + remove_cell_tags: ["hide_cell"] + remove_all_outputs_tags: ["hide_output"] +- mkdocstrings: + default_handler: python + handlers: + python: + inventories: + - https://python.langchain.com/api_reference/objects.inv + - https://networkx.org/documentation/stable/objects.inv + - https://numpy.org/doc/stable/objects.inv + - https://docs.python.org/3/objects.inv + + options: + docstring_section_style: spacy + docstring_style: numpy + merge_init_into_class: true + show_symbol_type_toc: true + show_root_toc_entry: false + separate_signature: true + show_signature_annotations: true + signature_crossrefs: true + group_by_category: true + inherited_members: true + extensions: + - griffe_inherited_docstrings + - griffe_pydantic: + schema: true + filters: ["!^_[^_]"] + paths: + - packages/graph-retriever/src/ + - packages/langchain-graph-retriever/src/ + - packages/graph-rag-example-helpers/src/ + +markdown_extensions: + - admonition + - tables + - attr_list + - def_list + - toc: + title: On this page + permalink: true + - md_in_html + - pymdownx.details + - pymdownx.highlight: + anchor_linenums: true + line_spans: __span + pygments_lang_class: true + - pymdownx.inlinehilite + - pymdownx.snippets + - pymdownx.superfences + - pymdownx.emoji: + emoji_index: !!python/name:material.extensions.emoji.twemoji + emoji_generator: !!python/name:material.extensions.emoji.to_svg + - pymdownx.tabbed: + alternate_style: true + slugify: !!python/object/apply:pymdownx.slugs.slugify + kwds: + case: lower + +theme: + name: material + custom_dir: overrides + features: + - navigation.indexes + - navigation.instant + - navigation.sections + - navigation.tabs + - navigation.tabs.sticky + - navigation.tracking + - navigation.path + - toc.follow + - search.suggest + - search.highlight + - content.code.annotate + - content.code.copy + - content.tooltips + - content.tabs.link + palette: + - scheme: default + primary: indigo + accent: indigo + toggle: + icon: material/weather-night + name: Switch to dark mode + - scheme: slate + primary: teal + accent: teal + toggle: + icon: material/weather-sunny + name: Switch to light mode + +nav: + - Home: index.md + - Guide: + - guide/index.md + - guide/get-started.md + - guide/traversal.md + - guide/edges.md + - guide/strategies.md + - guide/adapters.md + - guide/transformers.md + - guide/migration.md + - FAQs: + - faqs/index.md + - Examples: + - examples/index.md + - examples/lazy-graph-rag.ipynb + - examples/code-generation.ipynb + - Reference: + - reference/index.md + - graph-retriever: + - reference/graph_retriever/index.md + - reference/graph_retriever/edges.md + - reference/graph_retriever/strategies.md + - reference/graph_retriever/adapters.md + - reference/graph_retriever/testing.md + - reference/graph_retriever/utils.md + - langchain-graph-retriever: + - reference/langchain_graph_retriever/index.md + - reference/langchain_graph_retriever/adapters.md + - reference/langchain_graph_retriever/transformers.md + - graph-rag-example-helpers: + - reference/graph_rag_example_helpers/index.md + - Blog: + - blog/index.md \ No newline at end of file diff --git a/overrides/main.html b/overrides/main.html new file mode 100644 index 00000000..5f6f17cd --- /dev/null +++ b/overrides/main.html @@ -0,0 +1,20 @@ +<!-- See https://squidfunk.github.io/mkdocs-material/customization/#overriding-blocks --> + +{% extends "base.html" %} + +{% block content %} +{% if page.nb_url %} + <a href="{{ page.nb_url }}" title="Download Notebook" class="md-content__button md-icon"> + <span class="twemoji"> + {% include ".icons/material/download.svg" %} + </span> + </a> + <a href="https://colab.research.google.com/github/datastax/graph-rag/blob/main/docs/{{ page.file.src_uri}}" title="Open in Colab" class="md-content__button md-icon"> + <span class="twemoji colab"> + {% include ".icons/simple/googlecolab.svg" %} + </span> + </a> +{% endif %} + +{{ super() }} +{% endblock content %} \ No newline at end of file diff --git a/overrides/partials/toc.html b/overrides/partials/toc.html new file mode 100644 index 00000000..9f7450d2 --- /dev/null +++ b/overrides/partials/toc.html @@ -0,0 +1,59 @@ +<!-- + Copyright (c) 2016-2023 Martin Donath <martin.donath@squidfunk.com> + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to + deal in the Software without restriction, including without limitation the + rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + sell copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + IN THE SOFTWARE. +--> + +<!-- Determine title --> +{% set title = lang.t("toc") %} +{% if config.mdx_configs.toc and config.mdx_configs.toc.title %} + {% set title = config.mdx_configs.toc.title %} +{% endif %} +{% if page.meta and page.meta.toc and page.meta.toc.title %} + {% set title = page.meta.toc.title %} +{% endif %} + +<!-- Table of contents --> +<nav class="md-nav md-nav--secondary" aria-label="{{ title }}"> + {% set toc = page.toc %} + + <!-- + Check whether the content starts with a level 1 headline. If it does, the + top-level anchor must be skipped, since it would be redundant to the link + to the current page that is located just above the anchor. Therefore we + directly continue with the children of the anchor. + --> + {% set first = toc | first %} + {% if first and first.level == 1 %} + {% set toc = first.children %} + {% endif %} + + <!-- Table of contents title and list --> + {% if toc %} + <label class="md-nav__title" for="__toc"> + <span class="md-nav__icon md-icon"></span> + {{ title }} + </label> + <ul class="md-nav__list" data-md-component="toc" data-md-scrollfix> + {% for toc_item in toc %} + {% include "partials/toc-item.html" %} + {% endfor %} + </ul> + {% endif %} +</nav> \ No newline at end of file diff --git a/packages/graph-rag-example-helpers/pyproject.toml b/packages/graph-rag-example-helpers/pyproject.toml index 72c595a9..12a33d34 100644 --- a/packages/graph-rag-example-helpers/pyproject.toml +++ b/packages/graph-rag-example-helpers/pyproject.toml @@ -41,24 +41,45 @@ classifiers = [ dependencies = [ "astrapy>=1.5.2", "backoff>=2.2.1", + "graph-retriever", + "griffe>=1.5.7", "httpx>=0.28.1", "langchain-core>=0.3.29", "python-dotenv>=1.0.1", + "requests>=2.32.3", "simsimd>=6.2.1", "tqdm>=4.67.1", ] [tool.deptry.package_module_name_map] +astrapy = "astrapy" +backoff = "backoff" +graph-retriever = "graph_retriever" +griffe = "griffe" +httpx = "httpx" +langchain-core = "langchain_core" +mypy = "mypy" +networkx-stubs = "networkx_stubs" +pytest = "pytest" +pytest-asyncio = "pytest_asyncio" +pytest-cov = "pytest_cov" python-dotenv = "dotenv" +requests = "requests" +simsimd = "simsimd" +tqdm = "tqdm" +types-requests = "types_requests" [tool.deptry.per_rule_ignores] DEP001 = ["google"] DEP002 = ["simsimd"] +[tool.uv.sources] +graph-retriever = { workspace = true } + [project.urls] -"Homepage" = "https://github.com/datastax/graph-rag" +"Homepage" = "https://datastax.github.io/graph-rag" +"GitHub" = "https://github.com/datastax/graph-rag" "Bug Reports" = "https://github.com/datastax/graph-rag/issues" -"Documentation" = "https://datastax.github.com/graph-rag" [build-system] requires = ["hatchling"] @@ -71,5 +92,6 @@ dev = [ "pytest>=8.3.4", "pytest-cov>=4.0.0", "pytest-asyncio>=0.25.2", + "types-requests>=2.32.0.20241016", ] diff --git a/packages/graph-rag-example-helpers/src/graph_rag_example_helpers/datasets/animals/__init__.py b/packages/graph-rag-example-helpers/src/graph_rag_example_helpers/datasets/animals/__init__.py new file mode 100644 index 00000000..a7041172 --- /dev/null +++ b/packages/graph-rag-example-helpers/src/graph_rag_example_helpers/datasets/animals/__init__.py @@ -0,0 +1,5 @@ +from .fetch import fetch_documents + +__all__ = [ + "fetch_documents", +] diff --git a/packages/graph-rag-example-helpers/src/graph_rag_example_helpers/datasets/animals/fetch.py b/packages/graph-rag-example-helpers/src/graph_rag_example_helpers/datasets/animals/fetch.py new file mode 100644 index 00000000..22fdf600 --- /dev/null +++ b/packages/graph-rag-example-helpers/src/graph_rag_example_helpers/datasets/animals/fetch.py @@ -0,0 +1,31 @@ +import json + +import requests +from langchain_core.documents import Document + +ANIMALS_JSONL_URL = "https://raw.githubusercontent.com/datastax/graph-rag/refs/heads/main/data/animals.jsonl" + + +def fetch_documents() -> list[Document]: + """ + Download and parse a list of Documents for use with Graph Retriever. + + This is a small example dataset with useful links. + + This method downloads the dataset each time -- generally it is preferable + to invoke this only once and store the documents in memory or a vector + store. + + Returns + ------- + : + The fetched animal documents. + """ + response = requests.get(ANIMALS_JSONL_URL) + response.raise_for_status() # Ensure we got a valid response + + return [ + Document(id=data["id"], page_content=data["text"], metadata=data["metadata"]) + for line in response.text.splitlines() + if (data := json.loads(line)) + ] diff --git a/packages/graph-rag-example-helpers/src/graph_rag_example_helpers/datasets/astrapy/__init__.py b/packages/graph-rag-example-helpers/src/graph_rag_example_helpers/datasets/astrapy/__init__.py new file mode 100644 index 00000000..a7041172 --- /dev/null +++ b/packages/graph-rag-example-helpers/src/graph_rag_example_helpers/datasets/astrapy/__init__.py @@ -0,0 +1,5 @@ +from .fetch import fetch_documents + +__all__ = [ + "fetch_documents", +] diff --git a/packages/graph-rag-example-helpers/src/graph_rag_example_helpers/datasets/astrapy/fetch.py b/packages/graph-rag-example-helpers/src/graph_rag_example_helpers/datasets/astrapy/fetch.py new file mode 100644 index 00000000..183d8c03 --- /dev/null +++ b/packages/graph-rag-example-helpers/src/graph_rag_example_helpers/datasets/astrapy/fetch.py @@ -0,0 +1,38 @@ +import json + +import requests +from langchain_core.documents import Document + +ASTRAPY_JSONL_URL = "https://raw.githubusercontent.com/datastax/graph-rag/refs/heads/main/data/astrapy.jsonl" + + +def fetch_documents() -> list[Document]: + """ + Download and parse a list of Documents for use with Graph Retriever. + + This dataset contains the documentation for the AstraPy project as of version 1.5.2. + + This method downloads the dataset each time -- generally it is preferable + to invoke this only once and store the documents in memory or a vector + store. + + Returns + ------- + : + The fetched astra-py documentation Documents. + + Notes + ----- + - The dataset is setup in a way where the path of the item is the `id`, the pydoc + description is the `page_content`, and the items other attributes are stored in the + `metadata`. + - There are many documents that contain an id and metadata, but no page_content. + """ + response = requests.get(ASTRAPY_JSONL_URL) + response.raise_for_status() # Ensure we got a valid response + + return [ + Document(id=data["id"], page_content=data["text"], metadata=data["metadata"]) + for line in response.text.splitlines() + if (data := json.loads(line)) + ] diff --git a/packages/graph-rag-example-helpers/src/graph_rag_example_helpers/datasets/wikimultihop/__init__.py b/packages/graph-rag-example-helpers/src/graph_rag_example_helpers/datasets/wikimultihop/__init__.py index 97c8b1d7..fbc6ab83 100644 --- a/packages/graph-rag-example-helpers/src/graph_rag_example_helpers/datasets/wikimultihop/__init__.py +++ b/packages/graph-rag-example-helpers/src/graph_rag_example_helpers/datasets/wikimultihop/__init__.py @@ -1,5 +1,6 @@ -from .load import aload_2wikimultihop +from .load import BatchPreparer, aload_2wikimultihop __all__ = [ "aload_2wikimultihop", + "BatchPreparer", ] diff --git a/packages/graph-rag-example-helpers/src/graph_rag_example_helpers/datasets/wikimultihop/load.py b/packages/graph-rag-example-helpers/src/graph_rag_example_helpers/datasets/wikimultihop/load.py index 99d0ff1e..a1312f1f 100644 --- a/packages/graph-rag-example-helpers/src/graph_rag_example_helpers/datasets/wikimultihop/load.py +++ b/packages/graph-rag-example-helpers/src/graph_rag_example_helpers/datasets/wikimultihop/load.py @@ -1,4 +1,5 @@ import asyncio +import itertools import os.path import zipfile from collections.abc import Callable, Iterable, Iterator @@ -8,11 +9,12 @@ import astrapy.exceptions import backoff import httpx +import requests +from graph_retriever.utils.batched import batched from langchain_core.documents import Document -from langchain_core.vectorstores import VectorStore +from langchain_core.vectorstores.base import VectorStore from tqdm import tqdm # type: ignore[import-untyped] -from graph_rag_example_helpers.batched import batched from graph_rag_example_helpers.persistent_iteration import PersistentIteration LINES_IN_FILE = 5989847 @@ -24,7 +26,7 @@ def wikipedia_lines(para_with_hyperlink_zip_path: str) -> Iterable[bytes]: Parameters ---------- - para_with_hyperlink_zip_path : str + para_with_hyperlink_zip_path : Path to `para_with_hyperlink.zip` downloaded following the instructions in [2wikimultihop](https://github.com/Alab-NII/2wikimultihop?tab=readme-ov-file#new-update-april-7-2021). @@ -39,8 +41,8 @@ def wikipedia_lines(para_with_hyperlink_zip_path: str) -> Iterable[bytes]: yield from para_with_hyperlink -BATCH_SIZE = 1000 -MAX_IN_FLIGHT = 5 +BATCH_SIZE = 512 +MAX_IN_FLIGHT = 1 EXCEPTIONS_TO_RETRY = ( httpx.TransportError, @@ -50,48 +52,96 @@ def wikipedia_lines(para_with_hyperlink_zip_path: str) -> Iterable[bytes]: MAX_RETRIES = 8 BatchPreparer = Callable[[Iterator[bytes]], Iterator[Document]] +"""Function to apply to batches of lines to produce the document.""" + +SHORT_URL = "https://raw.githubusercontent.com/datastax/graph-rag/refs/heads/main/data/para_with_hyperlink_short.jsonl" async def aload_2wikimultihop( - para_with_hyperlink_zip_path: str, store: VectorStore, batch_prepare: BatchPreparer + limit: int | None, + *, + full_para_with_hyperlink_zip_path: str, + store: VectorStore, + batch_prepare: BatchPreparer, ) -> None: """ Load 2wikimultihop data into the given `VectorStore`. Parameters ---------- - para_with_hyperlink_zip_path : str + limit : + Maximum number of lines to load. + If a number less than one thousand, limits loading to the given number of lines. + If `None`, loads all content. + full_para_with_hyperlink_zip_path : Path to `para_with_hyperlink.zip` downloaded following the instructions in [2wikimultihop](https://github.com/Alab-NII/2wikimultihop?tab=readme-ov-file#new-update-april-7-2021). - store : VectorStore + store : The VectorStore to populate. - batch_prepare : BatchPreparer + batch_prepare : Function to apply to batches of lines to produce the document. """ - assert os.path.isfile(para_with_hyperlink_zip_path) + if limit is None or limit > LINES_IN_FILE: + limit = LINES_IN_FILE + + if limit <= 1000: + local_path = "../../data/para_with_hyperlink_short.jsonl" + if os.path.isfile(local_path): + for batch in batched( + itertools.islice(open(local_path, "rb").readlines(), limit), BATCH_SIZE + ): + docs = batch_prepare(iter(batch)) + store.add_documents(list(docs)) + print(f"Loaded from {local_path}") # noqa: T201 + else: + print(f"{local_path} not found, fetching short dataset") # noqa: T201 + response = requests.get(SHORT_URL) + response.raise_for_status() # Ensure we get a valid response + + for batch in batched( + itertools.islice(response.content.splitlines(), limit), BATCH_SIZE + ): + docs = batch_prepare(iter(batch)) + store.add_documents(list(docs)) + print(f"Loaded from {SHORT_URL}") # noqa: T201 + return + + assert os.path.isfile(full_para_with_hyperlink_zip_path) persistence = PersistentIteration( journal_name="load_2wikimultihop.jrnl", - iterator=batched(wikipedia_lines(para_with_hyperlink_zip_path), BATCH_SIZE), + iterator=batched( + itertools.islice(wikipedia_lines(full_para_with_hyperlink_zip_path), limit), + BATCH_SIZE, + ), ) - total_batches = ceil(LINES_IN_FILE / BATCH_SIZE) - persistence.completed_count() + total_batches = ceil(limit / BATCH_SIZE) - persistence.completed_count() if persistence.completed_count() > 0: print( # noqa: T201 f"Resuming loading with {persistence.completed_count()}" f" completed, {total_batches} remaining" ) - # We can't use asyncio.TaskGroup in 3.10. This would be simpler with that. - tasks: list[asyncio.Task] = [] - @backoff.on_exception( backoff.expo, EXCEPTIONS_TO_RETRY, max_tries=MAX_RETRIES, ) async def add_docs(batch_docs, offset) -> None: - await store.aadd_documents(batch_docs) - persistence.ack(offset) + from astrapy.exceptions import CollectionInsertManyException + + try: + await store.aadd_documents(batch_docs) + persistence.ack(offset) + except CollectionInsertManyException as err: + for exp in err.exceptions: + exp_desc = str(exp) + if "DOCUMENT_ALREADY_EXISTS" not in exp_desc: + print(exp_desc) # noqa: T201 + raise + + # We can't use asyncio.TaskGroup in 3.10. This would be simpler with that. + tasks: list[asyncio.Task] = [] for offset, batch_lines in tqdm(persistence, total=total_batches): batch_docs = batch_prepare(batch_lines) @@ -103,11 +153,26 @@ async def add_docs(batch_docs, offset) -> None: tasks.append(task) while len(tasks) >= MAX_IN_FLIGHT: - _, pending = await asyncio.wait( + completed, pending = await asyncio.wait( tasks, return_when=asyncio.FIRST_COMPLETED ) + for complete in completed: + if (e := complete.exception()) is not None: + print(f"Exception in task: {e}") # noqa: T201 tasks = list(pending) else: persistence.ack(offset) + # Make sure all the tasks are done. + # This wouldn't be necessary if we used a taskgroup, but that is Python 3.11+. + while len(tasks) > 0: + completed, pending = await asyncio.wait( + tasks, return_when=asyncio.FIRST_COMPLETED + ) + for complete in completed: + if (e := complete.exception()) is not None: + print(f"Exception in task: {e}") # noqa: T201 + tasks = list(pending) + + assert len(tasks) == 0 assert persistence.pending_count() == 0 diff --git a/packages/graph-rag-example-helpers/src/graph_rag_example_helpers/env.py b/packages/graph-rag-example-helpers/src/graph_rag_example_helpers/env.py index 86142afe..334c8591 100644 --- a/packages/graph-rag-example-helpers/src/graph_rag_example_helpers/env.py +++ b/packages/graph-rag-example-helpers/src/graph_rag_example_helpers/env.py @@ -8,7 +8,10 @@ class Environment(Enum): """Enumeration of supported environments for examples.""" CASSIO = auto() + """Environment variables for connecting to AstraDB via CassIO""" + ASTRAPY = auto() + """Environment variables for connecting to AstraDB via AstraPy""" def required_envvars(self) -> list[str]: """ @@ -16,7 +19,7 @@ def required_envvars(self) -> list[str]: Returns ------- - list[str] + : The environment variables required in this environment. Raises @@ -71,37 +74,34 @@ def initialize_from_prompts(env: Environment = Environment.CASSIO): import getpass for required in env.required_envvars(): - if required in NON_SECRETS: + if required in os.environ: + continue + elif required in NON_SECRETS: os.environ[required] = input(required) else: os.environ[required] = getpass.getpass(required) - if (keyspace := input("ASTRA_DB_KEYSPACE (empty for default)")) is not None: - os.environ["ASTRA_DB_KEYSPACE"] = keyspace - else: - os.environ.pop("ASTRA_DB_KEYSPACE", None) - - if ( - lc_api_key := getpass.getpass("LANGCHAIN_API_KEY (empty for no tracing)") - ) is not None: - os.environ["LANGCHAIN_API_KEY"] = lc_api_key - os.environ["LANGCHAIN_TRACING_V2"] = "True" - else: - os.environ.pop("LANGCHAIN_API_KEY") - os.environ.pop("LANGCHAIN_TRACING_V2") - def initialize_environment(env: Environment = Environment.CASSIO): """ Initialize the environment variables. - This uses the following: - 1. If a `.env` file is found, load environment variables from that. - 2. If not, and running in colab, set necessary environment variables from secrets. - 3. If necessary variables aren't set by the above, then prompts the user. + Parameters + ---------- + env : + The environment to initialize + + Notes + ----- + This uses the following: + + 1. If a `.env` file is found, load environment variables from that. + 2. If not, and running in colab, set necessary environment variables from + secrets. + 3. If necessary variables aren't set by the above, then prompts the user. """ # 1. If a `.env` file is found, load environment variables from that. - if (dotenv_path := find_dotenv()) is not None: + if dotenv_path := find_dotenv(): load_dotenv(dotenv_path) verify_environment(env) return diff --git a/packages/langchain-graph-retriever/src/langchain_graph_retriever/edges/__init__.py b/packages/graph-rag-example-helpers/src/graph_rag_example_helpers/examples/__init__.py similarity index 100% rename from packages/langchain-graph-retriever/src/langchain_graph_retriever/edges/__init__.py rename to packages/graph-rag-example-helpers/src/graph_rag_example_helpers/examples/__init__.py diff --git a/packages/graph-rag-example-helpers/src/graph_rag_example_helpers/examples/code_generation/__init__.py b/packages/graph-rag-example-helpers/src/graph_rag_example_helpers/examples/code_generation/__init__.py new file mode 100644 index 00000000..b1ac8db7 --- /dev/null +++ b/packages/graph-rag-example-helpers/src/graph_rag_example_helpers/examples/code_generation/__init__.py @@ -0,0 +1,6 @@ +from .format import format_docs, format_document + +__all__ = [ + "format_document", + "format_docs", +] diff --git a/packages/graph-rag-example-helpers/src/graph_rag_example_helpers/examples/code_generation/converter.py b/packages/graph-rag-example-helpers/src/graph_rag_example_helpers/examples/code_generation/converter.py new file mode 100644 index 00000000..b2c4b02b --- /dev/null +++ b/packages/graph-rag-example-helpers/src/graph_rag_example_helpers/examples/code_generation/converter.py @@ -0,0 +1,449 @@ +import ast +import json +import os +import typing +from textwrap import indent +from typing import Any + +import griffe + + +def convert( + package_name: str, + search_paths: list[str], + docstring_parser: griffe.DocstringStyle, + output_path: str, +) -> None: + """ + Load and convert a package's objects and documentation into a JSONL file. + + This method converts the internal documentation of modules, classes, functions, and + attributes of a package into a format that is better suited for RAG (and GraphRAG + in particular). + + The code uses the `griffe` library, which is a Python code analysis tool that + extracts information from Python code and docstrings. + + The JSONL file contains one JSON object per line, with the following structure: + id: the path to the object in the package + text: the description of the object (if any, can be empty) + metadata: Always includes `name`, `path`, `kind` keys. + The remaining keys below are included when available. + name: the name of the object + path: the path to the object in the package + kind: either `module`, `class`, `function`, or `attribute` + parameters: the parameters for a class or function. Includes type + information, default values, and descriptions + attributes: the attributes on a class or module. Includes type + information and descriptions + gathered_types: list of non-standard types in the parameters and attributes + imports: list of non-standard types imported by the class or module + exports: list of non-standard types exported by the module + properties: list of boolean properties about the module + example: any code examples for the class, function, or module + references: list of any non-standard types used in the example code + returns: the return type and description + yields: the yield type and description + bases: list of base types inherited by the class + implemented_by: list of types that implement the a base class + + + Parameters + ---------- + package_name : + The name of the package to convert. + search_paths : + The paths to search for the package. + docstring_parser : + The docstring parser to use. + output_path : + The path to save the JSONL file. + + + Examples + -------- + from graph_rag_example_helpers.examples.code_generation.converter import convert + convert("astrapy", [".venv/lib/python3.12/site-packages"], "google", "data") + + + Notes + ----- + - This code was written the `code-generation` example and `astrapy==1.5.2`. It will + probably need tweaking for use with other python packages. Use at your own risk. + """ + my_package = griffe.load( + package_name, search_paths=search_paths, docstring_parser=docstring_parser + ) + + converter = _Converter() + items = converter._convert(package_name, my_package) + + with open(os.path.join(output_path, f"{package_name}.jsonl"), "w") as f: + for item in items: + text = item.pop("text", "") + id = item.get("path") + metadata = item + for key, value in metadata.items(): + if isinstance(value, set): + metadata[key] = list(value) + f.write(json.dumps({"id": id, "text": text, "metadata": metadata})) + f.write("\n") + + +class _Converter: + """Converts griffe objects into .""" + + def __init__(self) -> None: + self._alias_lookup: dict[str, str] = {} + self._items: dict[str, dict[str, Any]] = {} + self._bases: dict[str, set[str]] = {} + self._used: bool = False + + self._typing_type_names: set[str] = { + name for name in dir(typing) if not name.startswith("_") + } + """All standard-type names from the typing module.""" + + def _check_first_use(self): + assert not self._used, "Converters cannot be re-used." + self._used = True + + def _extract_alias(self, name, alias: griffe.Alias): + try: + self._alias_lookup[name] = alias.final_target.path + except Exception: + pass + + def _is_first_letter_lowercase(self, s: str) -> bool: + return s[:1].islower() + + def _extract_expr( + self, annotation: str | griffe.Expr | None, gathered_types: set[str] + ) -> str | None: + if isinstance(annotation, griffe.Expr): + annotation = annotation.modernize() + for expr in annotation.iterate(flat=True): + if isinstance(expr, griffe.ExprName): + if ( + self._is_first_letter_lowercase(expr.name) + or expr.name in self._typing_type_names + ): + continue + gathered_types.add(expr.name) + return annotation.__str__() + else: + return annotation + + def _extract_named_element( + self, + el: griffe.DocstringNamedElement, + gathered_types: set[str], + annotation_key: str = "type", + ): + stuff = {} + if el.name != "": + stuff["name"] = el.name + anno = self._extract_expr(el.annotation, gathered_types=gathered_types) + if anno is not None: + stuff[annotation_key] = anno + if el.description != "": + stuff["description"] = el.description + if el.value is not None: + value = self._extract_expr(el.value, gathered_types=gathered_types) + if value is not None: + stuff["value"] = value + return stuff + + def _format_parameter(self, el: dict[str, str]) -> str: + text = el["name"] + if "value" in el and "default" in el: + assert el["value"] == el["default"] + + if "type" in el: + text += f": {el['type']}" + if "default" in el: + text += f" = {el['default']}" + if "description" in el: + desc = indent(el["description"], "\t") + text += f"\n\t{desc}" + return text + + def _format_return(self, el: dict[str, str]) -> str: + items = [] + if "type" in el: + items.append(el["type"]) + if "description" in el: + items.append(indent(el["description"], "\t")) + return "\n\t".join(items) + + def _extract_common(self, obj: griffe.Object) -> dict[str, Any]: + common: dict[str, Any] = { + "kind": obj.kind.value, + "name": obj.name, + "path": obj.path, + } + if len(obj.imports) > 0: + common["imports"] = obj.imports + + exports = obj.exports + if isinstance(exports, set): + common["exports"] = list(exports) + elif isinstance(exports, list): + common["exports"] = [] + for export in exports: + if isinstance(export, str): + common["exports"].append(export) + elif isinstance(export, griffe.ExprName): + common["exports"].append(export.name) + + return common + + def _extract_module( + self, obj: griffe.Module, gathered_types: set[str] + ) -> dict[str, Any]: + item = self._extract_common(obj) + item["properties"] = { + "is_init_module": obj.is_init_module, + "is_package": obj.is_package, + "is_subpackage": obj.is_subpackage, + "is_namespace_package": obj.is_namespace_package, + "is_namespace_subpackage": obj.is_namespace_subpackage, + } + if obj.is_init_module: + for export in item.get("exports", []): + if export in item["imports"]: + # add exported items to alias lookup so references can be found + self._alias_lookup[f"{item['path']}.{export}"] = item["imports"][ + export + ] + return item + + def _extract_class( + self, obj: griffe.Class, gathered_types: set[str] + ) -> dict[str, Any]: + item = self._extract_common(obj) + params = [] + for param in obj.parameters: + if param.name == "self": + continue + el = {"name": param.name} + default = self._extract_expr(param.default, gathered_types=gathered_types) + if default is not None: + el["default"] = default + annotation = self._extract_expr( + param.annotation, gathered_types=gathered_types + ) + if annotation is not None: + el["type"] = annotation + params.append(el) + if len(params) > 0: + item["parameters"] = params + + bases = [ + self._extract_expr(b, gathered_types=gathered_types) for b in obj.bases + ] + if len(bases) > 0: + item["bases"] = bases + return item + + def _extract_function( + self, obj: griffe.Function, gathered_types: set[str] + ) -> dict[str, Any]: + item = self._extract_common(obj) + params = [] + for param in obj.parameters: + if param.name == "self": + continue + el = {"name": param.name} + default = self._extract_expr(param.default, gathered_types=gathered_types) + if default is not None: + el["default"] = default + annotation = self._extract_expr( + param.annotation, gathered_types=gathered_types + ) + if annotation is not None: + el["type"] = annotation + params.append(el) + if len(params) > 0: + item["parameters"] = params + + item["returns"] = [ + {"type": self._extract_expr(obj.returns, gathered_types=gathered_types)} + ] + return item + + def _extract_attribute( + self, obj: griffe.Attribute, gathered_types: set[str] + ) -> dict[str, Any]: + item = self._extract_common(obj) + el = {"name": obj.name} + value = self._extract_expr(obj.value, gathered_types=gathered_types) + if value is not None: + el["default"] = value + annotation = self._extract_expr(obj.annotation, gathered_types=gathered_types) + if annotation is not None: + el["type"] = annotation + item["value"] = self._format_parameter(el) + return item + + def _extract_object(self, name, obj: griffe.Object): + assert name == obj.name + + if not obj.name.startswith("_"): + gathered_types: set[str] = set() + references: set[str] = set() + if isinstance(obj, griffe.Attribute): + item = self._extract_attribute(obj, gathered_types=gathered_types) + elif isinstance(obj, griffe.Function): + item = self._extract_function(obj, gathered_types=gathered_types) + elif isinstance(obj, griffe.Class): + item = self._extract_class(obj, gathered_types=gathered_types) + elif isinstance(obj, griffe.Module): + item = self._extract_module(obj, gathered_types=gathered_types) + else: + raise TypeError(f"Unknown obj type: {obj}") + + if obj.docstring is not None: + for section in obj.docstring.parsed: + # TODO: merge this stuff with those from above if already existing. + if isinstance(section, griffe.DocstringSectionText): + item["text"] = section.value + elif isinstance(section, griffe.DocstringSectionAdmonition): + admonition_label = self._extract_expr( + section.value.annotation, gathered_types=gathered_types + ) + if admonition_label is not None: + item[admonition_label] = section.value.description + if admonition_label == "example": + references.update( + self._extract_imported_objects( + section.value.description + ) + ) + elif isinstance(section, griffe.DocstringSectionParameters): + params = [] + for param in section.value: + named_element = self._extract_named_element( + param, gathered_types=gathered_types + ) + named_element["default"] = self._extract_expr( + param.default, gathered_types=gathered_types + ) + params.append(named_element) + item["parameters"] = params + elif isinstance(section, griffe.DocstringSectionAttributes): + item["attributes"] = [ + self._extract_named_element( + e, gathered_types=gathered_types + ) + for e in section.value + ] + elif isinstance(section, griffe.DocstringSectionYields): + item["yields"] = [ + self._extract_named_element( + e, gathered_types=gathered_types + ) + for e in section.value + ] + elif isinstance(section, griffe.DocstringSectionReturns): + item["returns"] = [ + self._extract_named_element( + e, gathered_types=gathered_types + ) + for e in section.value + ] + elif isinstance(section, griffe.DocstringSectionExamples): + for example in section.value: + references.update( + self._extract_imported_objects(example[1]) + ) + item["example"] = "/n/n/n".join( + [example[1] for example in section.value] + ) + else: + raise TypeError( + f"Unknown section type: {section} of kind: {section.kind}" + ) + + if item["path"] in references: + references.remove(item["path"]) + if len(references) > 0: + item["references"] = list(references) + + if len(gathered_types) > 0: + item["gathered_types"] = list(gathered_types) + + if obj.path in self._items: + raise Exception(f"{obj.path} was already found") + self._items[obj.path] = item + + for _name, _obj in obj.members.items(): + self._extract_object_or_alias(_name, _obj) + + def _extract_object_or_alias(self, name: str, obj: griffe.Object | griffe.Alias): + if isinstance(obj, griffe.Object): + self._extract_object(name, obj) + elif isinstance(obj, griffe.Alias): + self._extract_alias(name, obj) + + def _extract_imported_objects(self, code: str) -> set[str]: + """ + Extract the fully qualified names of imported objects from a given code snippet. + + If an error occurs, it removes the code from the error and beyond, and retries. + + Returns + ------- + The set of imported types + """ + code = ( + code.replace("\n>>>", "\n") + .replace("\n...", "\n") + .replace(">>> ", "") + .replace("\n ", "\n") + .replace("imort", "import") + ) + imported_objects = set() + + while code: + try: + tree = ast.parse(code) + for node in ast.walk(tree): + if isinstance(node, ast.ImportFrom) and node.module: + for alias in node.names: + imported_objects.add(f"{node.module}.{alias.name}") + break # Stop retrying if parsing succeeds + except SyntaxError as e: + # Trim code before the error line and retry + error_line = e.lineno + if error_line is None or error_line <= 1: + break # If error is at the first line, there's nothing to salvage + code = "\n".join(code.splitlines()[: error_line - 1]) + + return imported_objects + + def _update_item_paths(self, item: dict[str, Any]): + """Update types to full paths for item attributes.""" + for key in ["gathered_types", "bases", "references"]: + if key in item: + updated = [self._alias_lookup.get(k, k) for k in item[key]] + item[key] = updated + + if "bases" in item: + for base in item["bases"]: + self._bases.setdefault(base, set()).add(item["path"]) + + def _convert( + self, package_name: str, package: griffe.Object | griffe.Alias + ) -> list[dict[str, Any]]: + self._check_first_use() + self._extract_object_or_alias(package_name, package) + + for item in self._items.values(): + self._update_item_paths(item) + + for base, implemented_by in self._bases.items(): + if base in self._items: + self._items[base]["implemented_by"] = implemented_by + + return list(self._items.values()) diff --git a/packages/graph-rag-example-helpers/src/graph_rag_example_helpers/examples/code_generation/format.py b/packages/graph-rag-example-helpers/src/graph_rag_example_helpers/examples/code_generation/format.py new file mode 100644 index 00000000..5450f744 --- /dev/null +++ b/packages/graph-rag-example-helpers/src/graph_rag_example_helpers/examples/code_generation/format.py @@ -0,0 +1,94 @@ +from textwrap import indent + +from langchain_core.documents import Document + + +def _add_tabs(text: str) -> str: + return indent(text, "\t") + + +def _format_parameter(el: dict[str, str]) -> str: + text = el["name"] + if "value" in el and "default" in el: + assert el["value"] == el["default"] + + if "type" in el: + text += f": {el['type']}" + if "default" in el: + text += f" = {el['default']}" + if "description" in el: + desc = _add_tabs(el["description"]) + text += f"\n\t{desc}" + return text + + +def _format_return(el: dict[str, str]) -> str: + items = [] + if "type" in el: + items.append(el["type"]) + if "description" in el: + items.append(_add_tabs(el["description"])) + return "\n\t".join(items) + + +def format_document(doc: Document, debug: bool = False) -> str: + """Format a document as documentation for including as context in a LLM query.""" + metadata = doc.metadata + text = f"{metadata['name']} ({metadata['kind']})\n\n" + + text += f"path: \n\t{metadata['path']}\n\n" + + for key in ["bases", "exports", "implemented_by"]: + if key in metadata: + values = "\n".join(metadata[key]) + text += f"{key}: \n\t{_add_tabs(values)}\n\n" + + if "properties" in metadata: + props = [f"{k}: {v}" for k, v in metadata["properties"].items()] + values = "\n".join(props) + text += f"properties: \n\t{_add_tabs(values)}\n\n" + + if doc.page_content != "": + text += f"description: \n\t{_add_tabs(doc.page_content)}\n\n" + elif "value" in metadata: + text += f"{metadata['value']}\n\n" + + for key in ["attributes", "parameters"]: + if key in metadata: + values = "\n\n".join([_format_parameter(v) for v in metadata[key]]) + text += f"{key}: \n\t{_add_tabs(values)}\n\n" + + for key in ["returns", "yields"]: + if key in metadata: + values = "\n\n".join([_format_return(v) for v in metadata[key]]) + text += f"{key}: \n\t{_add_tabs(values)}\n\n" + + for key in ["note", "example"]: + if key in metadata: + text += f"{key}: \n\t{_add_tabs(metadata[key])}\n\n" + + if debug: + if "imports" in metadata: + imports = [] + for as_name, real_name in metadata["imports"].items(): + if real_name == as_name: + imports.append(real_name) + else: + imports.append(f"{real_name} as {as_name}") + values = "\n".join(imports) + text += f"imports: \n\t{_add_tabs(values)}\n\n" + + for key in ["references", "gathered_types"]: + if key in metadata: + values = "\n".join(metadata[key]) + text += f"{key}: \n\t{_add_tabs(values)}\n\n" + + if "parent" in metadata: + text += f"parent: {metadata['parent']}\n\n" + + return text + + +def format_docs(docs: list[Document]) -> str: + """Format documents as documentation for including as context in a LLM query.""" + return "\n---\n".join(format_document(doc) for doc in docs) diff --git a/packages/graph-rag-example-helpers/src/graph_rag_example_helpers/persistent_iteration.py b/packages/graph-rag-example-helpers/src/graph_rag_example_helpers/persistent_iteration.py index ed455ff9..b488de49 100644 --- a/packages/graph-rag-example-helpers/src/graph_rag_example_helpers/persistent_iteration.py +++ b/packages/graph-rag-example-helpers/src/graph_rag_example_helpers/persistent_iteration.py @@ -22,11 +22,11 @@ class PersistentIteration(Generic[T]): Parameters ---------- - journal_name : str + journal_name : Name of the journal file to use. If it doesn't exist it will be created. The indices of completed items will be written to the journal. - iterator : Iterator[T] + iterator : The iterator to process persistently. It must be deterministic -- elements should always be returned in the same order on restarts. """ @@ -51,10 +51,10 @@ def __next__(self) -> tuple[Offset, T]: Returns ------- - offset : Offset + offset : The offset of the next item. Should be acknowledge after the item is finished processing. - item : T + item : The next item. """ index, item = next(self.iterator) @@ -73,7 +73,7 @@ def __iter__(self) -> Iterator[tuple[Offset, T]]: Returns ------- - Iterator[T] + : """ return self @@ -86,12 +86,12 @@ def ack(self, offset: Offset) -> int: Parameters ---------- - offset : int + offset : The offset to acknowledge. Returns ------- - int + : The numebr of pending elements. """ self._write_journal.write(f"{offset.index}\n") @@ -107,7 +107,7 @@ def pending_count(self) -> int: Returns ------- - int + : The number of pending elements. """ return len(self.pending) @@ -118,7 +118,7 @@ def completed_count(self) -> int: Returns ------- - int + : The number of completed elements. """ return len(self._completed) diff --git a/packages/graph-retriever/pyproject.toml b/packages/graph-retriever/pyproject.toml index 1990b3ba..f27b512c 100644 --- a/packages/graph-retriever/pyproject.toml +++ b/packages/graph-retriever/pyproject.toml @@ -38,15 +38,32 @@ classifiers = [ "Operating System :: OS Independent", ] -dependencies = [] +dependencies = [ + "numpy>=1.26.4", + "typing-extensions>=4.12.2", + "pytest>=8.3.4", + "immutabledict>=4.2.1", +] [project.urls] -"Homepage" = "https://github.com/datastax/graph-rag" +"Homepage" = "https://datastax.github.io/graph-rag" +"GitHub" = "https://github.com/datastax/graph-rag" "Bug Reports" = "https://github.com/datastax/graph-rag/issues" -"Documentation" = "https://datastax.github.com/graph-rag" [project.optional-dependencies] +simsimd = [ + "simsimd>=6.2.1", +] +testing = [ + "pytest>=8.3.4", +] +[tool.deptry.package_module_name_map] +immutabledict = "immutabledict" +numpy = "numpy" +pytest = "pytest" +simsimd = "simsimd" +typing-extensions = "typing_extensions" [tool.pytest.ini_options] asyncio_mode = "auto" @@ -62,5 +79,7 @@ requires = ["hatchling"] build-backend = "hatchling.build" [dependency-groups] -dev = [] +dev = [ + "pytest>=8.3.4", +] diff --git a/packages/graph-retriever/src/graph_retriever/__init__.py b/packages/graph-retriever/src/graph_retriever/__init__.py index e69de29b..46e1e768 100644 --- a/packages/graph-retriever/src/graph_retriever/__init__.py +++ b/packages/graph-retriever/src/graph_retriever/__init__.py @@ -0,0 +1,18 @@ +""" +Provides retrieval functions combining vector and graph traversal. + +The main methods are [`traverse`][graph_retriever.traverse] and +[`atraverse`][graph_retriever.atraverse] which provide synchronous and +asynchronous traversals. +""" + +from .content import Content +from .traversal import atraverse, traverse +from .types import Node + +__all__ = [ + "Content", + "Node", + "traverse", + "atraverse", +] diff --git a/packages/graph-retriever/src/graph_retriever/adapters/__init__.py b/packages/graph-retriever/src/graph_retriever/adapters/__init__.py new file mode 100644 index 00000000..7143a0e0 --- /dev/null +++ b/packages/graph-retriever/src/graph_retriever/adapters/__init__.py @@ -0,0 +1,5 @@ +from .base import Adapter + +__all__ = [ + "Adapter", +] diff --git a/packages/graph-retriever/src/graph_retriever/adapters/base.py b/packages/graph-retriever/src/graph_retriever/adapters/base.py new file mode 100644 index 00000000..2a748ed2 --- /dev/null +++ b/packages/graph-retriever/src/graph_retriever/adapters/base.py @@ -0,0 +1,385 @@ +"""Defines the base class for vector store adapters.""" + +import abc +import asyncio +from collections.abc import Iterable, Sequence +from typing import Any + +from immutabledict import immutabledict + +from graph_retriever.content import Content +from graph_retriever.edges import Edge, IdEdge, MetadataEdge +from graph_retriever.utils.run_in_executor import run_in_executor +from graph_retriever.utils.top_k import top_k + + +class Adapter(abc.ABC): + """ + Base adapter for integrating vector stores with the graph retriever system. + + This class provides a foundation for custom adapters, enabling consistent + interaction with various vector store implementations. + """ + + def __init__(self) -> None: + pass + + @abc.abstractmethod + def search_with_embedding( + self, + query: str, + k: int = 4, + filter: dict[str, Any] | None = None, + **kwargs: Any, + ) -> tuple[list[float], list[Content]]: + """ + Return content items most similar to the query. + + Also returns the embedded query vector. + + Parameters + ---------- + query : + Input text. + k : + Number of content items to return. + filter : + Filter on the metadata to apply. + kwargs : + Additional keyword arguments. + + Returns + ------- + query_embedding : + The query embedding used for selecting the most relevant content. + contents : + List of up to `k` content items most similar to the query vector. + """ + ... + + async def asearch_with_embedding( + self, + query: str, + k: int = 4, + filter: dict[str, Any] | None = None, + **kwargs: Any, + ) -> tuple[list[float], list[Content]]: + """ + Asynchronously return content items most similar to the query. + + Also returns the embedded query vector. + + Parameters + ---------- + query : + Input text. + k : + Number of content items to return. + filter : + Filter on the metadata to apply. + kwargs : + Additional keyword arguments. + + Returns + ------- + query_embedding : + The query embedding used for selecting the most relevant content. + contents : + List of up to `k` content items most similar to the query + vector. + """ + return await run_in_executor( + None, self.search_with_embedding, query, k, filter, **kwargs + ) + + @abc.abstractmethod + def search( + self, + embedding: list[float], + k: int = 4, + filter: dict[str, Any] | None = None, + **kwargs: Any, + ) -> list[Content]: + """ + Return content items most similar to the query vector. + + Parameters + ---------- + embedding : + The query embedding used for selecting the most relevant content. + k : + Number of content items to return. + filter : + Filter on the metadata to apply. + kwargs : + Additional keyword arguments. + + Returns + ------- + : + List of content items most similar to the query vector. + """ + ... + + async def asearch( + self, + embedding: list[float], + k: int = 4, + filter: dict[str, Any] | None = None, + **kwargs: Any, + ) -> list[Content]: + """ + Asynchronously return content items most similar to the query vector. + + Parameters + ---------- + embedding : + The query embedding used for selecting the most relevant content. + k : + Number of content items to return. + filter : + Filter on the metadata to apply. + kwargs : + Additional keyword arguments. + + Returns + ------- + : + List of content items most similar to the query vector. + """ + return await run_in_executor( + None, + self.search, + embedding, + k, + filter, + **kwargs, + ) + + @abc.abstractmethod + def get( + self, + ids: Sequence[str], + filter: dict[str, Any] | None = None, + **kwargs: Any, + ) -> list[Content]: + """ + Get content items by ID. + + Fewer content items may be returned than requested if some IDs are + not found or if there are duplicated IDs. This method should **NOT** + raise exceptions if no content items are found for some IDs. + + Users should not assume that the order of the returned content items + matches the order of the input IDs. Instead, users should rely on + the ID field of the returned content items. + + Parameters + ---------- + ids : + List of IDs to get. + filter : + Filter on the metadata to apply. + kwargs : + Additional keyword arguments. These are up to the implementation. + + Returns + ------- + : + List of content items that were found. + """ + ... + + async def aget( + self, + ids: Sequence[str], + filter: dict[str, Any] | None = None, + **kwargs: Any, + ) -> list[Content]: + """ + Asynchronously get content items by ID. + + Fewer content items may be returned than requested if some IDs are + not found or if there are duplicated IDs. This method should **NOT** + raise exceptions if no content items are found for some IDs. + + Users should not assume that the order of the returned content items + matches the order of the input IDs. Instead, users should rely on + the ID field of the returned content items. + + Parameters + ---------- + ids : + List of IDs to get. + filter : + Filter on the metadata to apply. + kwargs : + Additional keyword arguments. These are up to the implementation. + + Returns + ------- + : + List of content items that were found. + """ + return await run_in_executor( + None, + self.get, + ids, + filter, + **kwargs, + ) + + def adjacent( + self, + edges: set[Edge], + query_embedding: list[float], + k: int, + filter: dict[str, Any] | None, + **kwargs: Any, + ) -> Iterable[Content]: + """ + Return the content items with at least one matching incoming edge. + + Parameters + ---------- + edges : + The edges to look for. + query_embedding : + The query embedding used for selecting the most relevant content. + k : + The number of relevant content items to select. + filter : + Optional metadata to filter the results. + kwargs : + Keyword arguments to pass to the similarity search. + + Returns + ------- + : + Iterable of adjacent content items. + + Raises + ------ + ValueError + If unsupported edge types are encountered. + """ + results: list[Content] = [] + + ids = [] + for edge in edges: + if isinstance(edge, MetadataEdge): + docs = self.search( + embedding=query_embedding, + k=k, + filter=self._metadata_filter(base_filter=filter, edge=edge), + **kwargs, + ) + results.extend(docs) + elif isinstance(edge, IdEdge): + ids.append(edge.id) + else: + raise ValueError(f"Unsupported edge: {edge}") + + if ids: + results.extend(self.get(ids, filter=filter)) + + return top_k( + results, + embedding=query_embedding, + k=k, + ) + + async def aadjacent( + self, + edges: set[Edge], + query_embedding: list[float], + k: int, + filter: dict[str, Any] | None, + **kwargs: Any, + ) -> Iterable[Content]: + """ + Asynchronously return the content items with at least one matching edge. + + Parameters + ---------- + edges : + The edges to look for. + query_embedding : + The query embedding used for selecting the most relevant content. + k : + The number of relevant content items to select for the edges. + filter : + Optional metadata to filter the results. + kwargs : + Keyword arguments to pass to the similarity search. + + Returns + ------- + : + Iterable of adjacent content items. + + Raises + ------ + ValueError + If unsupported edge types are encountered. + """ + tasks = [] + ids = [] + for edge in edges: + if isinstance(edge, MetadataEdge): + tasks.append( + self.asearch( + embedding=query_embedding, + k=k, + filter=self._metadata_filter(base_filter=filter, edge=edge), + **kwargs, + ) + ) + elif isinstance(edge, IdEdge): + ids.append(edge.id) + else: + raise ValueError(f"Unsupported edge: {edge}") + + if ids: + tasks.append(self.aget(ids, filter)) + + results: list[Content] = [ + c + for completed_task in asyncio.as_completed(tasks) + for c in await completed_task + ] + + return top_k( + results, + embedding=query_embedding, + k=k, + ) + + def _metadata_filter( + self, + edge: Edge, + base_filter: dict[str, Any] | None = None, + ) -> dict[str, Any]: + """ + Return a filter for the `base_filter` and incoming edges from `edge`. + + Parameters + ---------- + base_filter : + Any base metadata filter that should be used for search. + Generally corresponds to the user specified filters for the entire + traversal. Should be combined with the filters necessary to support + nodes with an *incoming* edge matching `edge`. + edge : + An optional edge which should be added to the filter. + + Returns + ------- + : + The metadata dictionary to use for the given filter. + """ + assert isinstance(edge, MetadataEdge) + value = edge.value + if isinstance(value, immutabledict): + value = dict(value) + return {edge.incoming_field: value, **(base_filter or {})} diff --git a/packages/graph-retriever/src/graph_retriever/adapters/in_memory.py b/packages/graph-retriever/src/graph_retriever/adapters/in_memory.py new file mode 100644 index 00000000..de8cb858 --- /dev/null +++ b/packages/graph-retriever/src/graph_retriever/adapters/in_memory.py @@ -0,0 +1,144 @@ +import abc +from collections.abc import Callable, Iterable, Sequence +from typing import Any, TypeAlias + +from typing_extensions import override + +from graph_retriever.adapters.base import Adapter +from graph_retriever.content import Content +from graph_retriever.utils.math import cosine_similarity + +SENTINEL = object() + +Embedding: TypeAlias = Callable[[str], list[float]] + + +class InMemoryBase(Adapter, abc.ABC): + """ + The base class for in-memory adapters that use dict-based metadata filters. + + These are intended (mostly) for demonstration purposes and testing. + """ + + def __init__(self, embedding: Embedding, content: list[Content]) -> None: + """ + Initialize with the given embedding function. + + Parameters + ---------- + embedding : + embedding function to use. + """ + self.store: dict[str, Content] = {c.id: c for c in content} + self.embedding = embedding + + @override + def search_with_embedding( + self, + query: str, + k: int = 4, + filter: dict[str, Any] | None = None, + **kwargs: Any, + ) -> tuple[list[float], list[Content]]: + query_embedding = self.embedding(query) + docs = self.search( + embedding=query_embedding, + k=k, + filter=filter, + **kwargs, + ) + return query_embedding, docs + + @override + async def asearch_with_embedding( + self, + query: str, + k: int = 4, + filter: dict[str, Any] | None = None, + **kwargs: Any, + ) -> tuple[list[float], list[Content]]: + query_embedding = self.embedding(query) + docs = await self.asearch( + embedding=query_embedding, + k=k, + filter=filter, + **kwargs, + ) + return query_embedding, docs + + @override + def search( + self, + embedding: list[float], + k: int = 4, + filter: dict[str, Any] | None = None, + **kwargs: Any, + ) -> list[Content]: + # get all docs with fixed order in list + candidates = self._matching_content(filter) + + if not candidates: + return [] + + similarity = cosine_similarity([embedding], [c.embedding for c in candidates])[ + 0 + ] + + # get the indices ordered by similarity score + top_k_idx = similarity.argsort()[::-1][:k] + + return [candidates[idx] for idx in top_k_idx] + + @override + def get( + self, ids: Sequence[str], filter: dict[str, Any] | None = None, **kwargs: Any + ) -> list[Content]: + return [ + c + for id in ids + if (c := self.store.get(id, None)) + if self._matches(filter, c) + ] + + def _matching_content(self, filter: dict[str, Any] | None = None) -> list[Content]: + """Return a list of content matching the given filters.""" + if filter: + return [c for c in self.store.values() if self._matches(filter, c)] + else: + return list(self.store.values()) + + def _matches(self, filter: dict[str, Any] | None, content: Content) -> bool: + """Return whether `content` matches the given `filter`.""" + if not filter: + return True + + for key, filter_value in filter.items(): + content_value = content.metadata + for key_part in key.split("."): + content_value = content_value.get(key_part, SENTINEL) + if content_value is SENTINEL: + break + if not self._value_matches(filter_value, content_value): + return False + return True + + @abc.abstractmethod + def _value_matches(self, filter_value: str, content_value: Any) -> bool: + """Return whether the `content_value` matches the `filter_value`.""" + ... + + +class InMemory(InMemoryBase): + """ + In-Memory VectorStore that supports list-based metadata. + + This In-Memory store simulates VectorStores like AstraDB and OpenSearch + """ + + @override + def _value_matches(self, filter_value: str, content_value: Any) -> bool: + return (filter_value == content_value) or ( + isinstance(content_value, Iterable) + and not isinstance(content_value, str | bytes) + and filter_value in content_value + ) diff --git a/packages/graph-retriever/src/graph_retriever/content.py b/packages/graph-retriever/src/graph_retriever/content.py new file mode 100644 index 00000000..7480c7b3 --- /dev/null +++ b/packages/graph-retriever/src/graph_retriever/content.py @@ -0,0 +1,70 @@ +from __future__ import annotations + +import dataclasses +from collections.abc import Callable +from typing import Any + + +@dataclasses.dataclass +class Content: + """ + Model representing retrieved content. + + Parameters + ---------- + id : + The ID of the content. + content : + The content. + embedding : + The embedding of the content. + metadata : + The metadata associated with the content. + mime_type : + The MIME type of the content. + """ + + id: str + content: str + embedding: list[float] + metadata: dict[str, Any] = dataclasses.field(default_factory=dict) + mime_type: str = "text/plain" + + @staticmethod + def new( + id: str, + content: str, + embedding: list[float] | Callable[[str], list[float]], + *, + metadata: dict[str, Any] | None = None, + mime_type: str = "text/plain", + ) -> Content: + """ + Create a new content. + + Parameters + ---------- + id : + The ID of the content. + content : + The content. + embedding : + The embedding, or a function to apply to the content to compute the + embedding. + metadata : + The metadata associated with the content. + mime_type : + The MIME type of the content. + + Returns + ------- + : + The created content. + """ + return Content( + id=id, + content=content, + embedding=embedding(content) if callable(embedding) else embedding, + metadata=metadata or {}, + mime_type=mime_type, + ) diff --git a/packages/graph-retriever/src/graph_retriever/edges/__init__.py b/packages/graph-retriever/src/graph_retriever/edges/__init__.py new file mode 100644 index 00000000..923c829c --- /dev/null +++ b/packages/graph-retriever/src/graph_retriever/edges/__init__.py @@ -0,0 +1,20 @@ +""" +Specification and implementation of edges functions. + +These are responsible for extracting edges from nodes and expressing them in way +that the adapters can implement. +""" + +from ._base import Edge, EdgeFunction, Edges, IdEdge, MetadataEdge +from .metadata import EdgeSpec, Id, MetadataEdgeFunction + +__all__ = [ + "Edge", + "MetadataEdge", + "IdEdge", + "Edges", + "EdgeFunction", + "EdgeSpec", + "Id", + "MetadataEdgeFunction", +] diff --git a/packages/graph-retriever/src/graph_retriever/edges/_base.py b/packages/graph-retriever/src/graph_retriever/edges/_base.py new file mode 100644 index 00000000..e2d9e0ba --- /dev/null +++ b/packages/graph-retriever/src/graph_retriever/edges/_base.py @@ -0,0 +1,91 @@ +import abc +from collections.abc import Callable +from dataclasses import dataclass +from typing import Any, TypeAlias + +from immutabledict import immutabledict + +from graph_retriever import Content + + +class Edge(abc.ABC): + """ + An edge identifies properties necessary for finding matching nodes. + + Sub-classes should be hashable. + """ + + pass + + +@dataclass(frozen=True) +class MetadataEdge(Edge): + """ + Link to nodes with specific metadata. + + A `MetadataEdge` connects to nodes with either: + + - `node.metadata[field] == value` + - `node.metadata[field] CONTAINS value` (if the metadata is a collection). + + Parameters + ---------- + incoming_field : + The name of the metadata field storing incoming edges. + value : + The value associated with the key for this edge + """ + + def __init__(self, incoming_field: str, value: Any) -> None: + # `self.field = value` and `setattr(self, "field", value)` -- don't work + # because of frozen. we need to call `__setattr__` directly (as the + # default `__init__` would do) to initialize the fields of the frozen + # dataclass. + object.__setattr__(self, "incoming_field", incoming_field) + + if isinstance(value, dict): + value = immutabledict(value) + object.__setattr__(self, "value", value) + + incoming_field: str + value: Any + + +@dataclass(frozen=True) +class IdEdge(Edge): + """ + An `IdEdge` connects to nodes with `node.id == id`. + + Parameters + ---------- + id : + The ID of the node to link to. + """ + + id: str + + +@dataclass +class Edges: + """ + Information about the incoming and outgoing edges. + + Parameters + ---------- + incoming : + Incoming edges that link to this node. + outgoing : + Edges that this node link to. These edges should be defined in terms of + the *incoming* `Edge` they match. For instance, a link from "mentions" + to "id" would link to `IdEdge(...)`. + """ + + incoming: set[Edge] + outgoing: set[Edge] + + +EdgeFunction: TypeAlias = Callable[[Content], Edges] +"""A function for extracting edges from nodes. + +Implementations should be deterministic. +""" diff --git a/packages/langchain-graph-retriever/src/langchain_graph_retriever/edges/metadata.py b/packages/graph-retriever/src/graph_retriever/edges/metadata.py similarity index 62% rename from packages/langchain-graph-retriever/src/langchain_graph_retriever/edges/metadata.py rename to packages/graph-retriever/src/graph_retriever/edges/metadata.py index 81bafb03..ae860279 100644 --- a/packages/langchain-graph-retriever/src/langchain_graph_retriever/edges/metadata.py +++ b/packages/graph-retriever/src/graph_retriever/edges/metadata.py @@ -4,7 +4,9 @@ from collections.abc import Iterable from typing import Any, TypeAlias -from langchain_graph_retriever.types import Edge, Edges, IdEdge, MetadataEdge, Node +from graph_retriever.content import Content + +from ._base import Edge, Edges, IdEdge, MetadataEdge BASIC_TYPES = (str, bool, int, float, complex, bytes) @@ -13,38 +15,60 @@ # elements. SENTINEL = object() +ID_MAGIC_STRING = "$id" + class Id: - """Place-holder type indicating that the ID should be used.""" + """ + Place-holder type indicating that the ID should be used. + + Deprecated: Use "$id" instead. + """ pass EdgeSpec: TypeAlias = tuple[str | Id, str | Id] +""" +The definition of an edge for traversal, represented as a pair of fields +representing the source and target of the edge. Each may be: + +- A string, `key`, indicating `doc.metadata[key]` as the value. +- The magic string `"$id"`, indicating `doc.id` as the value. + +Examples +-------- +``` +url_to_href_edge = ("url", "href") +keywords_to_keywords_edge = ("keywords", "keywords") +mentions_to_id_edge = ("mentions", "$id") +id_to_mentions_edge = ("$id", "mentions) +``` +""" + + +def _nested_get(metadata: dict[str, Any], key: str) -> Any: + value = metadata + for key_part in key.split("."): + value = value.get(key_part, SENTINEL) + if value is SENTINEL: + break + return value class MetadataEdgeFunction: """ Helper for extracting and encoding edges in metadata. - This class provides tools to extract incoming and outgoing edges from document - metadata and normalize metadata where needed. Both incoming and outgoing edges - use the same target name, enabling equality matching for keys. + This class provides tools to extract incoming and outgoing edges from + document metadata. Both incoming and outgoing edges use the same target + name, enabling equality matching for keys. Parameters ---------- - edges : list[EdgeSpec] - Definitions of edges for traversal, represented as a pair of fields - representing the source and target of the edges. Each may be: - - - A string, `key`, indicating `doc.metadata[key]` as the value. - - The placeholder `Id()`, indicating `doc.id` as the value. - - Attributes - ---------- - edges : list[EdgeSpec] - Definitions of edges for traversal, represented as pairs of incoming - and outgoing keys. + edges : + Definitions of edges for traversal, represented as a pair of fields + representing the source and target of the edges. Raises ------ @@ -77,12 +101,12 @@ def _edges_from_dict( ---------- metadata :dict[str, Any] The metadata dictionary to process. - incoming : bool, default False + incoming : If True, extracts edges for incoming relationships. Returns ------- - set[Edge] + : A set of edges extracted from the metadata. Notes @@ -95,7 +119,7 @@ def _edges_from_dict( if incoming: source_key = target_key - if isinstance(target_key, Id): + if target_key == ID_MAGIC_STRING or isinstance(target_key, Id): def mk_edge(v) -> Edge: return IdEdge(id=str(v)) @@ -104,10 +128,10 @@ def mk_edge(v) -> Edge: def mk_edge(v) -> Edge: return MetadataEdge(incoming_field=target_key, value=v) - if isinstance(source_key, Id): + if source_key == ID_MAGIC_STRING or isinstance(source_key, Id): edges.add(mk_edge(id)) else: - value = metadata.get(source_key, SENTINEL) + value = _nested_get(metadata, source_key) if isinstance(value, BASIC_TYPES): edges.add(mk_edge(value)) elif isinstance(value, Iterable): @@ -122,24 +146,26 @@ def mk_edge(v) -> Edge: warnings.warn(f"Unsupported value {value} in '{source_key}'") return edges - def __call__(self, node: Node) -> Edges: + def __call__(self, content: Content) -> Edges: """ - Extract incoming and outgoing edges from metadata. + Extract incoming and outgoing edges for a piece of content. This method retrieves edges based on the declared edge definitions, taking into account whether nested metadata is used. Parameters ---------- - metadata : dict[str, Any] - The metadata dictionary to extract edges from. + content : + The content to extract edges from. Returns ------- - Edges - specyfing the incoming and outgoing edges of the node + : + the incoming and outgoing edges of the node """ - outgoing_edges = self._edges_from_dict(node.id, node.metadata) - incoming_edges = self._edges_from_dict(node.id, node.metadata, incoming=True) + outgoing_edges = self._edges_from_dict(content.id, content.metadata) + incoming_edges = self._edges_from_dict( + content.id, content.metadata, incoming=True + ) return Edges(incoming=incoming_edges, outgoing=outgoing_edges) diff --git a/packages/langchain-graph-retriever/tests/integration_tests/__init__.py b/packages/graph-retriever/src/graph_retriever/py.typed similarity index 100% rename from packages/langchain-graph-retriever/tests/integration_tests/__init__.py rename to packages/graph-retriever/src/graph_retriever/py.typed diff --git a/packages/graph-retriever/src/graph_retriever/strategies/__init__.py b/packages/graph-retriever/src/graph_retriever/strategies/__init__.py new file mode 100644 index 00000000..b3f44021 --- /dev/null +++ b/packages/graph-retriever/src/graph_retriever/strategies/__init__.py @@ -0,0 +1,14 @@ +"""Strategies determine which nodes are selected during traversal.""" + +from .base import NodeTracker, Strategy +from .eager import Eager +from .mmr import Mmr +from .scored import Scored + +__all__ = [ + "Eager", + "Mmr", + "NodeTracker", + "Scored", + "Strategy", +] diff --git a/packages/graph-retriever/src/graph_retriever/strategies/base.py b/packages/graph-retriever/src/graph_retriever/strategies/base.py new file mode 100644 index 00000000..0524b7d8 --- /dev/null +++ b/packages/graph-retriever/src/graph_retriever/strategies/base.py @@ -0,0 +1,246 @@ +"""Define the base traversal strategy.""" + +from __future__ import annotations + +import abc +import dataclasses +from collections.abc import Iterable +from typing import Any + +from graph_retriever.types import Node + +DEFAULT_SELECT_K = 5 + + +class NodeTracker: + """ + Helper class initiating node selection and traversal. + + Call .select(nodes) to add nodes to the result set. + Call .traverse(nodes) to add nodes to the next traversal. + Call .select_and_traverse(nodes) to add nodes to the result set and the next + traversal. + """ + + def __init__(self, select_k: int, max_depth: int | None) -> None: + self._select_k: int = select_k + self._max_depth: int | None = max_depth + self._visited_node_ids: set[str] = set() + # use a dict to preserve order + self.to_traverse: dict[str, Node] = dict() + self.selected: list[Node] = [] + + @property + def num_remaining(self): + """The remaining number of nodes to be selected.""" + return max(self._select_k - len(self.selected), 0) + + def select(self, nodes: Iterable[Node]) -> None: + """Select nodes to be included in the result set.""" + for node in nodes: + node.extra_metadata["_depth"] = node.depth + node.extra_metadata["_similarity_score"] = node.similarity_score + self.selected.extend(nodes) + + def traverse(self, nodes: Iterable[Node]) -> int: + """ + Select nodes to be included in the next traversal. + + Returns + ------- + Number of nodes added for traversal. + + Notes + ----- + - Nodes are only added if they have not been visited before. + - Nodes are only added if they do not exceed the maximum depth. + - If no new nodes are chosen for traversal, or selected for output, then + the traversal will stop. + - Traversal will also stop if the number of selected nodes reaches the select_k + limit. + """ + new_nodes = { + n.id: n + for n in nodes + if self._not_visited(n.id) + if self._max_depth is None or n.depth < self._max_depth + } + self.to_traverse.update(new_nodes) + self._visited_node_ids.update(new_nodes.keys()) + return len(new_nodes) + + def select_and_traverse(self, nodes: Iterable[Node]) -> int: + """ + Select nodes to be included in the result set and the next traversal. + + Returns + ------- + Number of nodes added for traversal. + + Notes + ----- + - Nodes are only added for traversal if they have not been visited before. + - Nodes are only added for traversal if they do not exceed the maximum depth. + - If no new nodes are chosen for traversal, or selected for output, then + the traversal will stop. + - Traversal will also stop if the number of selected nodes reaches the select_k + limit. + """ + self.select(nodes) + return self.traverse(nodes) + + def _not_visited(self, id: str): + """Return true if the node has not been visited.""" + return id not in self._visited_node_ids + + def _should_stop_traversal(self): + """Return true if traversal should be stopped.""" + return self.num_remaining == 0 or len(self.to_traverse) == 0 + + +@dataclasses.dataclass(kw_only=True) +class Strategy(abc.ABC): + """ + Interface for configuring node selection and traversal strategies. + + This base class defines how nodes are selected, traversed, and finalized during + a graph traversal. Implementations can customize behaviors like limiting the depth + of traversal, scoring nodes, or selecting the next set of nodes for exploration. + + Parameters + ---------- + select_k : + Maximum number of nodes to select and return during traversal. + start_k : + Number of nodes to fetch via similarity for starting the traversal. + Added to any initial roots provided to the traversal. + adjacent_k : + Number of nodes to fetch for each outgoing edge. + max_traverse : + Maximum number of nodes to traverse outgoing edges from before returning. + If `None`, there is no limit. + max_depth : + Maximum traversal depth. If `None`, there is no limit. + k: + Deprecated: Use `select_k` instead. + Maximum number of nodes to select and return during traversal. + """ + + select_k: int = dataclasses.field(default=DEFAULT_SELECT_K) + start_k: int = 4 + adjacent_k: int = 10 + max_traverse: int | None = None + max_depth: int | None = None + k: int = dataclasses.field(default=DEFAULT_SELECT_K, repr=False) + + _query_embedding: list[float] = dataclasses.field(default_factory=list) + + def __post_init__(self): + """Allow passing the deprecated 'k' value instead of 'select_k'.""" + if self.select_k == DEFAULT_SELECT_K and self.k != DEFAULT_SELECT_K: + self.select_k = self.k + else: + self.k = self.select_k + + @abc.abstractmethod + def iteration(self, *, nodes: Iterable[Node], tracker: NodeTracker) -> None: + """ + Process the newly discovered nodes on each iteration. + + This method should call `tracker.traverse()` and/or `tracker.select()` + as appropriate to update the nodes that need to be traversed in this iteration + or selected at the end of the retrieval, respectively. + + Parameters + ---------- + nodes : + The newly discovered nodes found from either: + - the initial vector store retrieval + - incoming edges from nodes chosen for traversal in the previous iteration + tracker : + The tracker object to manage the traversal and selection of nodes. + + Notes + ----- + - This method is called once for each iteration of the traversal. + - In order to stop iterating either choose to not traverse any additional nodes + or don't select any additional nodes for output. + """ + ... + + def finalize_nodes(self, selected: Iterable[Node]) -> Iterable[Node]: + """ + Finalize the selected nodes. + + This method is called before returning the final set of nodes. It allows + the strategy to perform any final processing or re-ranking of the selected + nodes. + + Parameters + ---------- + selected : + The selected nodes to be finalized + + Returns + ------- + : + Finalized nodes. + + Notes + ----- + - The default implementation returns the first `self.select_k` selected nodes + without any additional processing. + """ + return list(selected)[: self.select_k] + + @staticmethod + def build( + base_strategy: Strategy, + **kwargs: Any, + ) -> Strategy: + """ + Build a strategy for a retrieval operation. + + Combines a base strategy with any provided keyword arguments to + create a customized traversal strategy. + + Parameters + ---------- + base_strategy : + The base strategy to start with. + kwargs : + Additional configuration options for the strategy. + + Returns + ------- + : + A configured strategy instance. + + Raises + ------ + ValueError + If 'strategy' is set incorrectly or extra arguments are invalid. + """ + # Check if there is a new strategy to use. Otherwise, use the base. + strategy: Strategy + if "strategy" in kwargs: + if next(iter(kwargs.keys())) != "strategy": + raise ValueError("Error: 'strategy' must be set before other args.") + strategy = kwargs.pop("strategy") + if not isinstance(strategy, Strategy): + raise ValueError( + f"Unsupported 'strategy' type {type(strategy).__name__}." + " Must be a sub-class of Strategy" + ) + elif base_strategy is not None: + strategy = base_strategy + else: + raise ValueError("'strategy' must be set in `__init__` or invocation") + + # Apply the kwargs to update the strategy. + assert strategy is not None + if "k" in kwargs: + kwargs["select_k"] = kwargs.pop("k") + strategy = dataclasses.replace(strategy, **kwargs) + + return strategy diff --git a/packages/graph-retriever/src/graph_retriever/strategies/eager.py b/packages/graph-retriever/src/graph_retriever/strategies/eager.py new file mode 100644 index 00000000..38b6db63 --- /dev/null +++ b/packages/graph-retriever/src/graph_retriever/strategies/eager.py @@ -0,0 +1,40 @@ +"""Provide eager (breadth-first) traversal strategy.""" + +import dataclasses +from collections.abc import Iterable + +from typing_extensions import override + +from graph_retriever.strategies.base import NodeTracker, Strategy +from graph_retriever.types import Node + + +@dataclasses.dataclass +class Eager(Strategy): + """ + Eager traversal strategy (breadth-first). + + This strategy selects all discovered nodes at each traversal step. It ensures + breadth-first traversal by processing nodes layer by layer, which is useful for + scenarios where all nodes at the current depth should be explored before proceeding + to the next depth. + + Parameters + ---------- + select_k : + Maximum number of nodes to retrieve during traversal. + start_k : + Number of documents to fetch via similarity for starting the traversal. + Added to any initial roots provided to the traversal. + adjacent_k : + Number of documents to fetch for each outgoing edge. + max_depth : + Maximum traversal depth. If `None`, there is no limit. + k: + Deprecated: Use `select_k` instead. + Maximum number of nodes to select and return during traversal. + """ + + @override + def iteration(self, nodes: Iterable[Node], tracker: NodeTracker) -> None: + tracker.select_and_traverse(nodes) diff --git a/packages/langchain-graph-retriever/src/langchain_graph_retriever/strategies/mmr.py b/packages/graph-retriever/src/graph_retriever/strategies/mmr.py similarity index 62% rename from packages/langchain-graph-retriever/src/langchain_graph_retriever/strategies/mmr.py rename to packages/graph-retriever/src/graph_retriever/strategies/mmr.py index 2481f289..6ed5db35 100644 --- a/packages/langchain-graph-retriever/src/langchain_graph_retriever/strategies/mmr.py +++ b/packages/graph-retriever/src/graph_retriever/strategies/mmr.py @@ -6,12 +6,11 @@ import numpy as np from numpy.typing import NDArray -from pydantic import Field from typing_extensions import override -from langchain_graph_retriever.strategies.base import Strategy -from langchain_graph_retriever.types import Node -from langchain_graph_retriever.utils.math import cosine_similarity +from graph_retriever.strategies.base import NodeTracker, Strategy +from graph_retriever.types import Node +from graph_retriever.utils.math import cosine_similarity NEG_INF = float("-inf") @@ -40,6 +39,7 @@ def update_redundancy(self, new_weighted_redundancy: float) -> None: self.score = self.weighted_similarity - self.weighted_redundancy +@dataclasses.dataclass class Mmr(Strategy): """ Maximal Marginal Relevance (MMR) traversal strategy. @@ -51,53 +51,37 @@ class Mmr(Strategy): Parameters ---------- - k : int, default 5 - Maximum number of nodes to retrieve during traversal. - start_k : int, default 4 - Number of documents to fetch via similarity for starting the traversal. - Added to any initial roots provided to the traversal. - adjacent_k : int, default 10 - Number of documents to fetch for each outgoing edge. - max_depth : int, optional - Maximum traversal depth. If `None`, there is no limit. - lambda_mult : float, default 0.5 - Controls the trade-off between relevance and diversity. A value closer - to 1 prioritizes relevance, while a value closer to 0 prioritizes - diversity. Must be between 0 and 1 (inclusive). - score_threshold : float, default -infinity - Only nodes with a score greater than or equal to this value will be - selected. - - Attributes - ---------- - k : int + select_k : Maximum number of nodes to retrieve during traversal. - start_k : int + start_k : Number of documents to fetch via similarity for starting the traversal. Added to any initial roots provided to the traversal. - adjacent_k : int + adjacent_k : Number of documents to fetch for each outgoing edge. - max_depth : int + max_depth : Maximum traversal depth. If `None`, there is no limit. - lambda_mult : float + lambda_mult : Controls the trade-off between relevance and diversity. A value closer to 1 prioritizes relevance, while a value closer to 0 prioritizes diversity. Must be between 0 and 1 (inclusive). - score_threshold : float + min_mmr_score : Only nodes with a score greater than or equal to this value will be selected. + k: + Deprecated: Use `select_k` instead. + Maximum number of nodes to select and return during traversal. """ - lambda_mult: float = Field(default=0.5, ge=0.0, le=1.0) - score_threshold: float = NEG_INF + lambda_mult: float = 0.5 + min_mmr_score: float = NEG_INF - _selected_ids: list[str] = [] + _selected_ids: list[str] = dataclasses.field(default_factory=list) """List of selected IDs (in selection order).""" - _candidate_id_to_index: dict[str, int] = {} + _candidate_id_to_index: dict[str, int] = dataclasses.field(default_factory=dict) """Dictionary of candidate IDs to indices in candidates and candidate_embeddings.""" - _candidates: list[_MmrCandidate] = [] + _candidates: list[_MmrCandidate] = dataclasses.field(default_factory=list) """List containing information about candidates. Same order as rows in `candidate_embeddings`. """ @@ -107,9 +91,9 @@ class Mmr(Strategy): @cached_property def _nd_query_embedding(self) -> NDArray[np.float32]: - assert ( - self._query_embedding - ), "shouldn't access embedding / dimensions until initialized" + assert self._query_embedding, ( + "shouldn't access embedding / dimensions until initialized" + ) return _emb_to_ndarray(self._query_embedding) @property @@ -130,7 +114,7 @@ def _selected_embeddings(self) -> NDArray[np.float32]: NDArray[np.float32] (N, dim) ndarray with a row for each selected node. """ - return np.ndarray((self.k, self._dimensions), dtype=np.float32) + return np.ndarray((self.select_k, self._dimensions), dtype=np.float32) @cached_property def _candidate_embeddings(self) -> NDArray[np.float32]: @@ -175,14 +159,14 @@ def _pop_candidate( Parameters ---------- - candidate_id : str + candidate_id : The ID of the candidate to pop. Returns ------- - candidate : _MmrCandidate + candidate : The candidate with the given ID. - embedding : NDArray[np.float32] + embedding : The `NDArray` embedding of the candidate. Raises @@ -222,8 +206,7 @@ def _pop_candidate( return candidate, embedding - @override - def select_nodes(self, *, limit: int) -> Iterable[Node]: + def _next(self) -> Node | None: """ Select and pop the best item being considered. @@ -231,12 +214,10 @@ def select_nodes(self, *, limit: int) -> Iterable[Node]: Returns ------- - A tuple containing the ID of the best item. + The best node available or None if none are available. """ - if limit == 0: - return [] - if self._best_id is None or self._best_score < self.score_threshold: - return [] + if self._best_id is None or self._best_score < self.min_mmr_score: + return None # Get the selection and remove from candidates. selected_id = self._best_id @@ -249,10 +230,10 @@ def select_nodes(self, *, limit: int) -> Iterable[Node]: # Create the selected result node. selected_node = selected.node - selected_node.extra_metadata = { - "_similarity_score": selected.similarity, - "_mmr_score": self._best_score, - } + selected_node.extra_metadata["_mmr_score"] = selected.score + selected_node.extra_metadata["_redundancy"] = ( + selected.weighted_redundancy / self._lambda_mult_complement + ) # Reset the best score / best ID. self._best_score = NEG_INF @@ -264,66 +245,72 @@ def select_nodes(self, *, limit: int) -> Iterable[Node]: self._candidate_embeddings, np.expand_dims(selected_embedding, axis=0) ) for index, candidate in enumerate(self._candidates): - candidate.update_redundancy(similarity[index][0]) + candidate.update_redundancy( + self._lambda_mult_complement * similarity[index][0] + ) if candidate.score > self._best_score: self._best_score = candidate.score self._best_id = candidate.node.id - return [selected_node] + return selected_node @override - def discover_nodes(self, nodes: dict[str, Node]) -> None: + def iteration(self, nodes: Iterable[Node], tracker: NodeTracker) -> None: """Add candidates to the consideration set.""" - # Determine the keys to actually include. - # These are the candidates that aren't already selected - # or under consideration. - - include_ids_set = set(nodes.keys()) - include_ids_set.difference_update(self._selected_ids) - include_ids_set.difference_update(self._candidate_id_to_index.keys()) - include_ids = list(include_ids_set) - - # Now, build up a matrix of the remaining candidate embeddings. - # And add them to the - new_embeddings: NDArray[np.float32] = np.ndarray( - ( - len(include_ids), - self._dimensions, + nodes = list(nodes) + node_count = len(nodes) + if node_count > 0: + # Build up a matrix of the remaining candidate embeddings. + # And add them to the candidate set + new_embeddings: NDArray[np.float32] = np.ndarray( + ( + node_count, + self._dimensions, + ) ) - ) - offset = self._candidate_embeddings.shape[0] - for index, candidate_id in enumerate(include_ids): - self._candidate_id_to_index[candidate_id] = offset + index - new_embeddings[index] = nodes[candidate_id].embedding - - # Compute the similarity to the query. - similarity = cosine_similarity(new_embeddings, self._nd_query_embedding) - - # Compute the distance metrics of all of pairs in the selected set with - # the new candidates. - redundancy = cosine_similarity( - new_embeddings, self._already_selected_embeddings() - ) - for index, candidate_id in enumerate(include_ids): - max_redundancy = 0.0 - if redundancy.shape[0] > 0: - max_redundancy = redundancy[index].max() - candidate = _MmrCandidate( - node=nodes[candidate_id], - similarity=similarity[index][0], - weighted_similarity=self.lambda_mult * similarity[index][0], - weighted_redundancy=self._lambda_mult_complement * max_redundancy, + offset = self._candidate_embeddings.shape[0] + for index, candidate_node in enumerate(nodes): + self._candidate_id_to_index[candidate_node.id] = offset + index + new_embeddings[index] = candidate_node.embedding + + # Compute the similarity to the query. + similarity = cosine_similarity(new_embeddings, self._nd_query_embedding) + + # Compute the distance metrics of all of pairs in the selected set with + # the new candidates. + redundancy = cosine_similarity( + new_embeddings, self._already_selected_embeddings() ) - self._candidates.append(candidate) - - if candidate.score >= self._best_score: - self._best_score = candidate.score - self._best_id = candidate.node.id + for index, candidate_node in enumerate(nodes): + max_redundancy = 0.0 + if redundancy.shape[0] > 0: + max_redundancy = redundancy[index].max() + candidate = _MmrCandidate( + node=candidate_node, + similarity=similarity[index][0], + weighted_similarity=self.lambda_mult * similarity[index][0], + weighted_redundancy=self._lambda_mult_complement * max_redundancy, + ) + self._candidates.append(candidate) + + if candidate.score >= self._best_score: + self._best_score = candidate.score + self._best_id = candidate.node.id - # Add the new embeddings to the candidate set. - self._candidate_embeddings = np.vstack( - ( - self._candidate_embeddings, - new_embeddings, + # Add the new embeddings to the candidate set. + self._candidate_embeddings = np.vstack( + ( + self._candidate_embeddings, + new_embeddings, + ) ) - ) + + while tracker.num_remaining > 0: + next = self._next() + + if next is None: + break + + num_traversing = tracker.select_and_traverse([next]) + if num_traversing == 1: + break diff --git a/packages/graph-retriever/src/graph_retriever/strategies/scored.py b/packages/graph-retriever/src/graph_retriever/strategies/scored.py new file mode 100644 index 00000000..74f19291 --- /dev/null +++ b/packages/graph-retriever/src/graph_retriever/strategies/scored.py @@ -0,0 +1,75 @@ +import dataclasses +import heapq +from collections.abc import Callable, Iterable + +from typing_extensions import override + +from graph_retriever.strategies.base import NodeTracker, Strategy +from graph_retriever.types import Node + + +class _ScoredNode: + def __init__(self, score: float, node: Node) -> None: + self.score = score + self.node = node + + def __lt__(self, other: "_ScoredNode") -> bool: + return other.score < self.score + + +@dataclasses.dataclass +class Scored(Strategy): + """ + Scored traversal strategy. + + This strategy uses a scoring function to select nodes using a local maximum + approach. In each iteration, it chooses the top scoring nodes available and + then traverses the connected nodes. + + Parameters + ---------- + scorer: + A callable function that returns the score of a node. + select_k : + Maximum number of nodes to retrieve during traversal. + start_k : + Number of documents to fetch via similarity for starting the traversal. + Added to any initial roots provided to the traversal. + adjacent_k : + Number of documents to fetch for each outgoing edge. + max_depth : + Maximum traversal depth. If `None`, there is no limit. + per_iteration_limit: + Maximum number of nodes to select and traverse during a single + iteration. + k: + Deprecated: Use `select_k` instead. + Maximum number of nodes to select and return during traversal. + """ + + scorer: Callable[[Node], float] + _nodes: list[_ScoredNode] = dataclasses.field(default_factory=list) + + per_iteration_limit: int | None = None + + @override + def iteration(self, nodes: Iterable[Node], tracker: NodeTracker) -> None: + for node in nodes: + heapq.heappush(self._nodes, _ScoredNode(self.scorer(node), node)) + + limit = tracker.num_remaining + if self.per_iteration_limit: + limit = min(limit, self.per_iteration_limit) + + while limit > 0 and self._nodes: + highest = heapq.heappop(self._nodes) + node = highest.node + node.extra_metadata["_score"] = highest.score + limit -= tracker.select_and_traverse([node]) + + @override + def finalize_nodes(self, selected): + selected = sorted( + selected, key=lambda node: node.extra_metadata["_score"], reverse=True + ) + return super().finalize_nodes(selected) diff --git a/packages/graph-retriever/src/graph_retriever/testing/__init__.py b/packages/graph-retriever/src/graph_retriever/testing/__init__.py new file mode 100644 index 00000000..4f0d60e2 --- /dev/null +++ b/packages/graph-retriever/src/graph_retriever/testing/__init__.py @@ -0,0 +1 @@ +"""Helpers for testing Graph Retriever implementations.""" diff --git a/packages/graph-retriever/src/graph_retriever/testing/adapter_tests.py b/packages/graph-retriever/src/graph_retriever/testing/adapter_tests.py new file mode 100644 index 00000000..ee9f958f --- /dev/null +++ b/packages/graph-retriever/src/graph_retriever/testing/adapter_tests.py @@ -0,0 +1,598 @@ +import abc +from collections.abc import Iterable +from dataclasses import dataclass +from typing import Any + +import pytest + +from graph_retriever import Content +from graph_retriever.adapters import Adapter +from graph_retriever.edges import Edge, IdEdge, MetadataEdge +from graph_retriever.utils.math import cosine_similarity + + +def assert_valid_result(content: Content): + """Assert the content is valid.""" + assert isinstance(content.id, str) + assert_is_embedding(content.embedding) + + +def assert_is_embedding(value: Any): + """Assert the value is an embedding.""" + assert isinstance(value, list) + for item in value: + assert isinstance(item, float) + + +def assert_valid_results(docs: Iterable[Content]): + """Assert all of the contents are valid results.""" + for doc in docs: + assert_valid_result(doc) + + +def assert_ids_any_order( + results: Iterable[Content], + expected: list[str], +) -> None: + """Assert the results are valid and match the IDs.""" + assert_valid_results(results) + + result_ids = [r.id for r in results] + assert set(result_ids) == set(expected), "should contain exactly expected IDs" + + +def cosine_similarity_scores( + adapter: Adapter, query_or_embedding: str | list[float], ids: list[str] +) -> dict[str, float]: + """Return the cosine similarity scores for the given IDs and query embedding.""" + if len(ids) == 0: + return {} + + docs = adapter.get(ids) + found_ids = (d.id for d in docs) + assert set(ids) == set(found_ids), "can't find all IDs" + + if isinstance(query_or_embedding, str): + query_embedding = adapter.search_with_embedding(query_or_embedding, k=0)[0] + else: + query_embedding = query_or_embedding + + scores: list[float] = cosine_similarity( + [query_embedding], + [d.embedding for d in docs], + )[0] + + return {doc.id: score for doc, score in zip(docs, scores)} + + +def assert_ids_in_cosine_similarity_order( + results: Iterable[Content], + expected: list[str], + query_embedding: list[float], + adapter: Adapter, +) -> None: + """Assert the results are valid and in cosine similarity order.""" + assert_valid_results(results) + result_ids = [r.id for r in results] + + similarity_scores = cosine_similarity_scores(adapter, query_embedding, expected) + expected = sorted(expected, key=lambda id: similarity_scores[id], reverse=True) + + assert result_ids == expected, ( + "should contain expected IDs in cosine similarity order" + ) + + +@dataclass(kw_only=True) +class AdapterComplianceCase(abc.ABC): + """ + Base dataclass for test cases. + + Attributes + ---------- + id : + The ID of the test case. + + expected : + The expected results of the case. + """ + + id: str + expected: list[str] + + requires_nested: bool = False + requires_dict_in_list: bool = False + + +@dataclass +class GetCase(AdapterComplianceCase): + """A test case for `get` and `aget`.""" + + request: list[str] + filter: dict[str, Any] | None = None + + +GET_CASES: list[GetCase] = [ + # Currently, this is not required for `get` implementations since the + # traversal skips making `get` calls with no IDs. Some stores (such as chroma) + # fail in this case. + # GetCase("none", [], []), + GetCase(id="one", request=["boar"], expected=["boar"]), + GetCase( + id="many", + request=[ + "alligator", + "barracuda", + "chameleon", + "cobra", + "crocodile", + "dolphin", + "eel", + "fish", + "gecko", + "iguana", + "jellyfish", + "komodo dragon", + "lizard", + "manatee", + "narwhal", + ], + expected=[ + "alligator", + "barracuda", + "chameleon", + "cobra", + "crocodile", + "dolphin", + "eel", + "fish", + "gecko", + "iguana", + "jellyfish", + "komodo dragon", + "lizard", + "manatee", + "narwhal", + ], + ), + GetCase( + id="missing", + request=["boar", "chinchilla", "unicorn", "cobra"], + expected=["boar", "chinchilla", "cobra"], + ), + GetCase( + id="duplicate", + request=["boar", "chinchilla", "boar", "cobra"], + expected=["boar", "chinchilla", "cobra"], + ), + GetCase( + id="filtered", + request=["boar", "chinchilla", "boar", "cobra"], + expected=["chinchilla"], + filter={"keywords": "andes"}, + ), +] + + +@dataclass +class SearchCase(AdapterComplianceCase): + """A test case for `similarity_search_*` and `asimilarity_search_*` methods.""" + + query: str + k: int | None = None + filter: dict[str, str] | None = None + + @property + def kwargs(self): + """Return keyword arguments for the test invocation.""" + kwargs = {} + if self.k is not None: + kwargs["k"] = self.k + if self.filter is not None: + kwargs["filter"] = self.filter + return kwargs + + +SEARCH_CASES: list[SearchCase] = [ + SearchCase( + id="basic", + query="domesticated hunters", + expected=["cat", "horse", "chicken", "dog"], + ), + SearchCase(id="k2", query="domesticated hunters", k=2, expected=["cat", "horse"]), + SearchCase( + id="k0", + query="domesticated hunters", + k=0, + expected=[], + ), + SearchCase( + id="value_filter", + query="domesticated hunters", + filter={"type": "mammal"}, + expected=["cat", "dog", "horse", "alpaca"], + ), + SearchCase( + id="list_filter", + query="domesticated hunters", + filter={"keywords": "hunting"}, + expected=["cat"], + ), + SearchCase( + id="two_filters", + query="domesticated hunters", + filter={"type": "mammal", "diet": "carnivorous"}, + expected=["cat", "dingo", "ferret"], + ), + # OpenSearch supports filtering on multiple values, but it is not currently + # relied on. Since no other adapters support it, we don't test it nor should + # traversal depend on it. + # SimilaritySearchCase( + # id="multi_list_filter", + # query="domesticated hunters", + # filter={"keywords": ["hunting", "agile"]}, + # expected=["cat", "fox", "gazelle", "mongoose"] + # ), +] + + +@dataclass +class AdjacentCase(AdapterComplianceCase): + """A test case for `get_adjacent` and `aget_adjacent`.""" + + query: str + edges: set[Edge] + + k: int = 4 + filter: dict[str, Any] | None = None + + +ADJACENT_CASES: list[AdjacentCase] = [ + AdjacentCase( + id="one_edge", + query="domesticated hunters", + edges={MetadataEdge("type", "mammal")}, + expected=["horse", "alpaca", "dog", "cat"], + ), + AdjacentCase( + id="two_edges_same_field", + query="domesticated hunters", + edges={ + MetadataEdge("type", "mammal"), + MetadataEdge("type", "crustacean"), + }, + expected=[ + "alpaca", + "cat", + "dog", + "horse", + ], + ), + AdjacentCase( + id="numeric", + query="domesticated hunters", + edges={ + MetadataEdge("number_of_legs", 0), + }, + k=20, # more than match the filter so we get all + expected=[ + "barracuda", + "cobra", + "dolphin", + "eel", + "fish", + "jellyfish", + "manatee", + "narwhal", + ], + ), + AdjacentCase( + id="two_edges_diff_field", + query="domesticated hunters", + edges={ + MetadataEdge("type", "reptile"), + MetadataEdge("number_of_legs", 0), + }, + k=20, # more than match the filter so we get all + expected=[ + "alligator", + "barracuda", + "chameleon", + "cobra", + "crocodile", + "dolphin", + "eel", + "fish", + "gecko", + "iguana", + "jellyfish", + "komodo dragon", + "lizard", + "manatee", + "narwhal", + ], + ), + AdjacentCase( + id="one_ids", + query="domesticated hunters", + edges={ + IdEdge("cat"), + }, + expected=[ + "cat", + ], + ), + AdjacentCase( + id="many_ids", + query="domesticated hunters", + edges={ + IdEdge("cat"), + IdEdge("dog"), + IdEdge("unicorn"), + IdEdge("crab"), + }, + expected=[ + "cat", + "dog", + "crab", + ], + ), + AdjacentCase( + id="ids_limit_k", + query="domesticated hunters", + edges={ + IdEdge("cat"), + IdEdge("dog"), + IdEdge("unicorn"), + IdEdge("antelope"), + }, + k=2, + expected=[ + "cat", + "dog", + ], + ), + AdjacentCase( + id="filtered_ids", + query="domesticated hunters", + edges={ + IdEdge("boar"), + IdEdge("chinchilla"), + IdEdge("unicorn"), + IdEdge("griaffe"), + }, + filter={"keywords": "andes"}, + expected=[ + "chinchilla", + ], + ), + AdjacentCase( + id="metadata_and_id", + query="domesticated hunters", + edges={ + IdEdge("cat"), + MetadataEdge("type", "reptile"), + }, + k=6, + expected=[ + "alligator", # reptile + "crocodile", # reptile + "cat", # by ID + "chameleon", # reptile + "gecko", # reptile + "komodo dragon", # reptile + ], + ), + AdjacentCase( + id="dict_in_list", + query="domesticated hunters", + edges={ + MetadataEdge("tags", {"a": 5, "b": 7}), + }, + expected=[ + "aardvark", + ], + requires_dict_in_list=True, + ), + AdjacentCase( + id="dict_in_list_multiple", + query="domesticated hunters", + edges={ + MetadataEdge("tags", {"a": 5, "b": 7}), + MetadataEdge("tags", {"a": 5, "b": 8}), + }, + expected=[ + "aardvark", + "albatross", + ], + requires_dict_in_list=True, + ), + AdjacentCase( + id="absent_dict", + query="domesticated hunters", + edges={ + MetadataEdge("tags", {"a": 5, "b": 10}), + }, + expected=[], + requires_dict_in_list=True, + ), + AdjacentCase( + id="nested", + query="domesticated hunters", + edges={ + MetadataEdge("nested.a", 5), + }, + expected=[ + "alligator", + "alpaca", + ], + requires_nested=True, + ), + AdjacentCase( + id="nested_same_field", + query="domesticated hunters", + edges={ + MetadataEdge("nested.a", 5), + MetadataEdge("nested.a", 6), + }, + expected=[ + "alligator", + "alpaca", + "ant", + ], + requires_nested=True, + ), + AdjacentCase( + id="nested_diff_field", + query="domesticated hunters", + edges={ + MetadataEdge("nested.a", 5), + MetadataEdge("nested.b", 5), + }, + expected=[ + "alligator", + "alpaca", + "anteater", + ], + requires_nested=True, + ), +] + + +class AdapterComplianceSuite(abc.ABC): + """ + Test suite for adapter compliance. + + To use this, create a sub-class containing a `@pytest.fixture` named + `adapter` which returns an `Adapter` with the documents from `animals.jsonl` + loaded. + """ + + def supports_nested_metadata(self) -> bool: + """Return whether nested metadata is expected to work.""" + return True + + def supports_dict_in_list(self) -> bool: + """Return whether dicts can appear in list fields in metadata.""" + return True + + def expected(self, method: str, case: AdapterComplianceCase) -> list[str]: + """ + Override to change the expected behavior of a case. + + If the test is expected to fail, call `pytest.xfail(reason)`, or + `pytest.skip(reason)` if it can't be executed. + + Generally, this should *not* change the expected results, unless the the + adapter being tested uses wildly different distance metrics or a + different embedding. The `AnimalsEmbedding` is deterministic and the + results across vector stores should generally be deterministic and + consistent. + + Parameters + ---------- + method : + The method being tested. For instance, `get`, `aget`, or + `similarity_search_with_embedding`, etc. + case : + The case being tested. + + Returns + ------- + : + The expected animals. + """ + if not self.supports_nested_metadata() and case.requires_nested: + pytest.xfail("nested metadata not supported") + if not self.supports_dict_in_list() and case.requires_dict_in_list: + pytest.xfail("dict-in-list fields is not supported") + return case.expected + + @pytest.fixture(params=GET_CASES, ids=lambda c: c.id) + def get_case(self, request) -> GetCase: + """Fixture providing the `get` and `aget` test cases.""" + return request.param + + @pytest.fixture(params=ADJACENT_CASES, ids=lambda c: c.id) + def adjacent_case(self, request) -> AdjacentCase: + """Fixture providing the `get_adjacent` and `aget_adjacent` test cases.""" + return request.param + + @pytest.fixture(params=SEARCH_CASES, ids=lambda c: c.id) + def search_case(self, request) -> SearchCase: + """Fixture providing the `(a)?similarity_search_*` test cases.""" + return request.param + + def test_get(self, adapter: Adapter, get_case: GetCase) -> None: + """Run tests for `get`.""" + expected = self.expected("get", get_case) + results = adapter.get(get_case.request, filter=get_case.filter) + assert_ids_any_order(results, expected) + + async def test_aget(self, adapter: Adapter, get_case: GetCase) -> None: + """Run tests for `aget`.""" + expected = self.expected("aget", get_case) + results = await adapter.aget(get_case.request, filter=get_case.filter) + assert_ids_any_order(results, expected) + + def test_search_with_embedding( + self, adapter: Adapter, search_case: SearchCase + ) -> None: + """Run tests for `search_with_embedding`.""" + expected = self.expected("search_with_embedding", search_case) + embedding, results = adapter.search_with_embedding( + search_case.query, **search_case.kwargs + ) + assert_is_embedding(embedding) + assert_ids_in_cosine_similarity_order(results, expected, embedding, adapter) + + async def test_asearch_with_embedding( + self, adapter: Adapter, search_case: SearchCase + ) -> None: + """Run tests for `asearch_with_embedding`.""" + expected = self.expected("asearch_with_embedding", search_case) + embedding, results = await adapter.asearch_with_embedding( + search_case.query, **search_case.kwargs + ) + assert_is_embedding(embedding) + assert_ids_in_cosine_similarity_order(results, expected, embedding, adapter) + + def test_search(self, adapter: Adapter, search_case: SearchCase) -> None: + """Run tests for `search`.""" + expected = self.expected("search", search_case) + embedding, _ = adapter.search_with_embedding(search_case.query, k=0) + results = adapter.search(embedding, **search_case.kwargs) + assert_ids_in_cosine_similarity_order(results, expected, embedding, adapter) + + async def test_asearch(self, adapter: Adapter, search_case: SearchCase) -> None: + """Run tests for `asearch`.""" + expected = self.expected("asearch", search_case) + embedding, _ = await adapter.asearch_with_embedding(search_case.query, k=0) + results = await adapter.asearch(embedding, **search_case.kwargs) + assert_ids_in_cosine_similarity_order(results, expected, embedding, adapter) + + def test_adjacent(self, adapter: Adapter, adjacent_case: AdjacentCase) -> None: + """Run tests for `adjacent.""" + expected = self.expected("adjacent", adjacent_case) + embedding, _ = adapter.search_with_embedding(adjacent_case.query, k=0) + results = adapter.adjacent( + edges=adjacent_case.edges, + query_embedding=embedding, + k=adjacent_case.k, + filter=adjacent_case.filter, + ) + assert_ids_in_cosine_similarity_order(results, expected, embedding, adapter) + + async def test_aadjacent( + self, adapter: Adapter, adjacent_case: AdjacentCase + ) -> None: + """Run tests for `aadjacent.""" + expected = self.expected("aadjacent", adjacent_case) + embedding, _ = await adapter.asearch_with_embedding(adjacent_case.query, k=0) + results = await adapter.aadjacent( + edges=adjacent_case.edges, + query_embedding=embedding, + k=adjacent_case.k, + filter=adjacent_case.filter, + ) + assert_ids_in_cosine_similarity_order(results, expected, embedding, adapter) diff --git a/packages/langchain-graph-retriever/tests/embeddings/simple_embeddings.py b/packages/graph-retriever/src/graph_retriever/testing/embeddings.py similarity index 57% rename from packages/langchain-graph-retriever/tests/embeddings/simple_embeddings.py rename to packages/graph-retriever/src/graph_retriever/testing/embeddings.py index 872365f0..7d8d114b 100644 --- a/packages/langchain-graph-retriever/tests/embeddings/simple_embeddings.py +++ b/packages/graph-retriever/src/graph_retriever/testing/embeddings.py @@ -1,97 +1,81 @@ import json import math import random -from abc import abstractmethod -from langchain_core.embeddings import Embeddings - -class BaseEmbeddings(Embeddings): - @abstractmethod - def embed_query(self, text: str) -> list[float]: - pass - - def embed_documents(self, texts: list[str]) -> list[list[float]]: - return [self.embed_query(txt) for txt in texts] - - -class Angular2DEmbeddings(BaseEmbeddings): - """ - From angles (as strings in units of pi) to unit embedding vectors on a circle. +def angular_2d_embedding(text: str) -> list[float]: """ + Convert input text to a 'vector' (list of floats). - def embed_query(self, text: str) -> list[float]: - """ - Convert input text to a 'vector' (list of floats). + Parameters + ---------- + text: str + The text to embed. - Parameters - ---------- - text: str - The text to embed. + Returns + ------- + : + If the text is a number, use it as the angle for the unit vector in + units of pi. - Returns - ------- - list[float] - If the text is a number, use it as the angle for the unit vector in - units of pi. + Any other input text becomes the singular result `[0, 0]`. + """ + try: + angle = float(text) + return [math.cos(angle * math.pi), math.sin(angle * math.pi)] + except ValueError: + # Assume: just test string, no attention is paid to values. + return [0.0, 0.0] - Any other input text becomes the singular result `[0, 0]`. - """ - try: - angle = float(text) - return [math.cos(angle * math.pi), math.sin(angle * math.pi)] - except ValueError: - # Assume: just test string, no attention is paid to values. - return [0.0, 0.0] +def earth_embeddings(text: str) -> list[float]: + """Split words and return a vector based on that.""" -class EarthEmbeddings(BaseEmbeddings): - def get_vector_near(self, value: float) -> list[float]: + def vector_near(value: float) -> list[float]: base_point = [value, (1 - value**2) ** 0.5] fluctuation = random.random() / 100.0 return [base_point[0] + fluctuation, base_point[1] - fluctuation] - def embed_query(self, text: str) -> list[float]: - words = set(text.lower().split()) - if "earth" in words: - vector = self.get_vector_near(0.9) - elif {"planet", "world", "globe", "sphere"}.intersection(words): - vector = self.get_vector_near(0.8) - else: - vector = self.get_vector_near(0.1) - return vector + words = set(text.lower().split()) + if "earth" in words: + return vector_near(0.9) + elif {"planet", "world", "globe", "sphere"}.intersection(words): + return vector_near(0.8) + else: + return vector_near(0.1) -class ParserEmbeddings(BaseEmbeddings): - """Parse input texts: if they are json for a List[float], fine. - Otherwise, return all zeros and call it a day. - """ +class ParserEmbeddings: + """Parse the tuext as a list of floats, otherwise return zeros.""" - def __init__(self, dimension: int) -> None: + def __init__(self, dimension: int = 10) -> None: self.dimension = dimension - def embed_query(self, text: str) -> list[float]: + def __call__(self, text: str) -> list[float]: + """Return the embedding.""" try: vals = json.loads(text) - except json.JSONDecodeError: - return [0.0] * self.dimension - else: assert len(vals) == self.dimension return vals + except json.JSONDecodeError: + return [0.0] * self.dimension -def string_to_number(word: str) -> int: +def _string_to_number(word: str) -> int: return sum(ord(char) for char in word) -class WordEmbeddings(BaseEmbeddings): +class WordEmbeddings: + """Embeddings based on a word list.""" + def __init__(self, words: list[str]): self._words = words self._offsets = [ - string_to_number(word=word) * ((-1) ** i) for i, word in enumerate(words) + _string_to_number(w) * ((-1) ** i) for i, w in enumerate(words) ] - def embed_query(self, text: str) -> list[float]: + def __call__(self, text: str) -> list[float]: + """Return the embedding.""" return [ 1.0 + (100 / self._offsets[i]) if word in text else 0.2 / (i + 1) for i, word in enumerate(self._words) @@ -99,6 +83,8 @@ def embed_query(self, text: str) -> list[float]: class AnimalEmbeddings(WordEmbeddings): + """Embeddings for animal test-case.""" + def __init__(self): super().__init__( words=""" diff --git a/packages/graph-retriever/src/graph_retriever/traversal.py b/packages/graph-retriever/src/graph_retriever/traversal.py new file mode 100644 index 00000000..6d2cdee2 --- /dev/null +++ b/packages/graph-retriever/src/graph_retriever/traversal.py @@ -0,0 +1,426 @@ +"""Implements the traversal logic for graph-based document retrieval.""" + +import copy +from collections.abc import Iterable, Sequence +from typing import Any + +from graph_retriever.adapters import Adapter +from graph_retriever.content import Content +from graph_retriever.edges import Edge, EdgeFunction, EdgeSpec, MetadataEdgeFunction +from graph_retriever.strategies import NodeTracker, Strategy +from graph_retriever.types import Node +from graph_retriever.utils.math import cosine_similarity + + +def traverse( + query: str, + *, + edges: list[EdgeSpec] | EdgeFunction, + strategy: Strategy, + store: Adapter, + metadata_filter: dict[str, Any] | None = None, + initial_root_ids: Sequence[str] = (), + store_kwargs: dict[str, Any] = {}, +) -> list[Node]: + """ + Perform a graph traversal to retrieve nodes for a specific query. + + Parameters + ---------- + query : + The query string for the traversal. + edges : + A list of [EdgeSpec][graph_retriever.edges.EdgeSpec] for use in creating a + [MetadataEdgeFunction][graph_retriever.edges.MetadataEdgeFunction], + or an [EdgeFunction][graph_retriever.edges.EdgeFunction]. + strategy : + The traversal strategy that defines how nodes are discovered, selected, + and finalized. + store : + The vector store adapter used for similarity searches and document + retrieval. + metadata_filter : + Optional filter for metadata during traversal. + initial_root_ids : + IDs of the initial root nodes for the traversal. + store_kwargs : + Additional arguments passed to the store adapter. + + Returns + ------- + : + Nodes returned by the traversal. + """ + traversal = _Traversal( + query=query, + edges=edges, + strategy=copy.deepcopy(strategy), + store=store, + metadata_filter=metadata_filter, + initial_root_ids=initial_root_ids, + store_kwargs=store_kwargs, + ) + return traversal.traverse() + + +async def atraverse( + query: str, + *, + edges: list[EdgeSpec] | EdgeFunction, + strategy: Strategy, + store: Adapter, + metadata_filter: dict[str, Any] | None = None, + initial_root_ids: Sequence[str] = (), + store_kwargs: dict[str, Any] = {}, +) -> list[Node]: + """ + Asynchronously perform a graph traversal to retrieve nodes for a specific query. + + Parameters + ---------- + query : + The query string for the traversal. + edges : + A list of [EdgeSpec][graph_retriever.edges.EdgeSpec] for use in creating a + [MetadataEdgeFunction][graph_retriever.edges.MetadataEdgeFunction], + or an [EdgeFunction][graph_retriever.edges.EdgeFunction]. + strategy : + The traversal strategy that defines how nodes are discovered, selected, + and finalized. + store : + The vector store adapter used for similarity searches and document + retrieval. + metadata_filter : + Optional filter for metadata during traversal. + initial_root_ids : + IDs of the initial root nodes for the traversal. + store_kwargs : + Additional arguments passed to the store adapter. + + Returns + ------- + : + Nodes returned by the traversal. + """ + traversal = _Traversal( + query=query, + edges=edges, + strategy=copy.deepcopy(strategy), + store=store, + metadata_filter=metadata_filter, + initial_root_ids=initial_root_ids, + store_kwargs=store_kwargs, + ) + return await traversal.atraverse() + + +class _Traversal: + """ + Handles a single traversal operation for a graph-based retrieval system. + + The `_Traversal` class manages the process of discovering, visiting, and selecting + nodes within a graph, based on a query and a traversal strategy. It supports + synchronous and asynchronous traversal, enabling retrieval of documents in a + controlled, iterative manner. + + This class should not be reused between traversals. + """ + + def __init__( + self, + query: str, + *, + edges: list[EdgeSpec] | EdgeFunction, + strategy: Strategy, + store: Adapter, + metadata_filter: dict[str, Any] | None = None, + initial_root_ids: Sequence[str] = (), + store_kwargs: dict[str, Any] = {}, + ) -> None: + self.query = query + + self.edge_function: EdgeFunction + if isinstance(edges, list): + self.edge_function = MetadataEdgeFunction(edges) + elif callable(edges): + self.edge_function = edges + else: + raise ValueError(f"Invalid edges: {edges}") + + self.strategy = strategy + self.store = store + self.metadata_filter = metadata_filter + self.initial_root_ids = initial_root_ids + self.store_kwargs = store_kwargs + + self._used = False + self._visited_edges: set[Edge] = set() + self._edge_depths: dict[Edge, int] = {} + self._discovered_node_ids: set[str] = set() + self._node_tracker: NodeTracker = NodeTracker( + select_k=strategy.select_k, max_depth=strategy.max_depth + ) + + def _check_first_use(self): + assert not self._used, "Traversals cannot be re-used." + self._used = True + + def traverse(self) -> list[Node]: + """ + Execute the traversal synchronously. + + This method retrieves initial candidates, discovers and visits nodes, + and explores edges iteratively until the traversal is complete. + + Returns + ------- + : + The final set of nodes resulting from the traversal. + """ + self._check_first_use() + + # Retrieve initial candidates. + initial_content = self._fetch_initial_candidates() + if self.initial_root_ids: + initial_content.extend(self.store.get(self.initial_root_ids)) + nodes = self._contents_to_new_nodes(initial_content, depth=0) + + while True: + self.strategy.iteration(nodes=nodes, tracker=self._node_tracker) + + if self._node_tracker._should_stop_traversal(): + break + + next_outgoing_edges = self.select_next_edges(self._node_tracker.to_traverse) + new_content = self._fetch_adjacent(next_outgoing_edges) + nodes = self._contents_to_new_nodes(new_content) + + self._node_tracker.to_traverse.clear() + + return list(self.strategy.finalize_nodes(self._node_tracker.selected)) + + async def atraverse(self) -> list[Node]: + """ + Execute the traversal asynchronously. + + This method retrieves initial candidates, discovers and visits nodes, + and explores edges iteratively until the traversal is complete. + + Returns + ------- + : + The final set of nodes resulting from the traversal. + """ + self._check_first_use() + + # Retrieve initial candidates. + initial_content = await self._afetch_initial_candidates() + if self.initial_root_ids: + initial_content.extend(await self.store.aget(self.initial_root_ids)) + new_nodes = self._contents_to_new_nodes(initial_content, depth=0) + + while True: + self.strategy.iteration(nodes=new_nodes, tracker=self._node_tracker) + + if self._node_tracker._should_stop_traversal(): + break + + next_outgoing_edges = self.select_next_edges(self._node_tracker.to_traverse) + next_content = await self._afetch_adjacent(next_outgoing_edges) + new_nodes = self._contents_to_new_nodes(next_content) + + self._node_tracker.to_traverse.clear() + + return list(self.strategy.finalize_nodes(self._node_tracker.selected)) + + def _fetch_initial_candidates(self) -> list[Content]: + """ + Retrieve initial candidates based on the query and strategy. + + Returns + ------- + : + The initial content retrieved via similarity search. + """ + query_embedding, docs = self.store.search_with_embedding( + query=self.query, + k=self.strategy.start_k, + filter=self.metadata_filter, + **self.store_kwargs, + ) + self.strategy._query_embedding = query_embedding + return docs + + async def _afetch_initial_candidates(self) -> list[Content]: + query_embedding, docs = await self.store.asearch_with_embedding( + query=self.query, + k=self.strategy.start_k, + filter=self.metadata_filter, + **self.store_kwargs, + ) + self.strategy._query_embedding = query_embedding + return docs + + def _fetch_adjacent(self, edges: set[Edge]) -> Iterable[Content]: + """ + Retrieve documents adjacent to the specified outgoing edges. + + This method uses the vector store adapter to fetch documents connected to + the provided edges. + + Parameters + ---------- + edges : + The edges whose adjacent documents need to be fetched. + + Returns + ------- + : + The set of content adjacent to the specified edges. + """ + return self.store.adjacent( + edges=edges, + query_embedding=self.strategy._query_embedding, + k=self.strategy.adjacent_k, + filter=self.metadata_filter, + **self.store_kwargs, + ) + + async def _afetch_adjacent(self, edges: set[Edge]) -> Iterable[Content]: + """ + Asynchronously retrieve documents adjacent to the specified outgoing edges. + + This method uses the vector store adapter to fetch documents connected to + the provided edges. + + Parameters + ---------- + edges : + The edges whose adjacent documents need to be fetched. + + Returns + ------- + : + The set of content adjacent to the specified edges. + """ + return await self.store.aadjacent( + edges=edges, + query_embedding=self.strategy._query_embedding, + k=self.strategy.adjacent_k, + filter=self.metadata_filter, + **self.store_kwargs, + ) + + def _contents_to_new_nodes( + self, contents: Iterable[Content], *, depth: int | None = None + ) -> Iterable[Node]: + """ + Convert a content object into a node for traversal. + + This method creates a new `Node` instance, associates it with the document's + metadata, and calculates its depth based on the incoming edges. + + Parameters + ---------- + contents : + The contents to convert into a node. + depth : + The depth of the node. If None, the depth is calculated based on the + incoming edges. + + Returns + ------- + : + The newly discovered nodes. + """ + # Determine which contents to include. + + # TODO: We could push this filtering down into the `adjacent` calls, + # which could allow some stores to avoid retrieving/decoding already + # discovered contents. This would complicate the implementation, so we + # should only do so if the cost of retrieving/decoding is significant. + content_dict = { + c.id: c for c in contents if c.id not in self._discovered_node_ids + } + + if len(content_dict) == 0: + return [] + + # Compute scores. + scores: list[float] = cosine_similarity( + [self.strategy._query_embedding], + [c.embedding for c in content_dict.values()], + )[0] + + # Create the nodes + nodes = [] + for content, score in zip(content_dict.values(), scores): + # Determine incoming/outgoing edges. + edges = self.edge_function(content) + + # Compute the depth + if depth is None: + depth = min( + [ + d + for e in edges.incoming + if (d := self._edge_depths.get(e, None)) is not None + ], + default=0, + ) + + nodes.append( + Node( + id=content.id, + content=content.content, + depth=depth, + embedding=content.embedding, + similarity_score=score, + metadata=content.metadata, + incoming_edges=edges.incoming, + outgoing_edges=edges.outgoing, + ) + ) + self._discovered_node_ids.update(content_dict.keys()) + return nodes + + def select_next_edges(self, nodes: dict[str, Node]) -> set[Edge]: + """ + Find the unvisited outgoing edges from the set of new nodes to traverse. + + This method updates the traversal state by recording the outgoing edges of the + provided nodes. Outgoing edges that have not been visited before are identified + and added to the set of edges to explore in subsequent traversal steps. + + Parameters + ---------- + nodes : + The new nodes to traverse + + Returns + ------- + : + The set of new outgoing edges that need to be explored. + + Notes + ----- + - The `new_outgoing_edges` dictionary tracks the depth of each outgoing + edge. + - If a node's outgoing edge leads to a lower depth, the edge's depth is + updated to reflect the shortest path. + - The `_visited_edges` set is updated to include all outgoing edges + from the provided nodes. + """ + new_outgoing_edges: dict[Edge, int] = {} + for node in nodes.values(): + node_new_outgoing_edges = node.outgoing_edges - self._visited_edges + for edge in node_new_outgoing_edges: + depth = new_outgoing_edges.setdefault(edge, node.depth + 1) + if node.depth + 1 < depth: + new_outgoing_edges[edge] = node.depth + 1 + + self._edge_depths.update(new_outgoing_edges) + + new_outgoing_edge_set = set(new_outgoing_edges.keys()) + self._visited_edges.update(new_outgoing_edge_set) + return new_outgoing_edge_set diff --git a/packages/graph-retriever/src/graph_retriever/types.py b/packages/graph-retriever/src/graph_retriever/types.py new file mode 100644 index 00000000..b6401064 --- /dev/null +++ b/packages/graph-retriever/src/graph_retriever/types.py @@ -0,0 +1,50 @@ +"""Defines the `Node` class used during graph traversal.""" + +from __future__ import annotations + +from dataclasses import dataclass, field +from typing import Any + +from graph_retriever.edges import Edge + + +@dataclass +class Node: + """ + Represents a node in the traversal graph. + + The [Node][graph_retriever.Node] class contains information about a document + during graph traversal, including its depth, embedding, edges, and metadata. + + Parameters + ---------- + id : + The unique identifier of the document represented by this node. + content : + The content. + depth : + The depth (number of edges) through which this node was discovered. This + depth may not reflect the true depth in the full graph if only a subset + of edges is retrieved. + embedding : + The embedding vector of the document, used for similarity calculations. + metadata : + Metadata from the original document. This is a reference to the original + document metadata and should not be modified directly. Any updates to + metadata should be made to `extra_metadata`. + extra_metadata : + Additional metadata to override or augment the original document + metadata during traversal. + """ + + id: str + content: str + depth: int + similarity_score: float + embedding: list[float] + metadata: dict[str, Any] = field(default_factory=dict) + + incoming_edges: set[Edge] = field(default_factory=set) + outgoing_edges: set[Edge] = field(default_factory=set) + + extra_metadata: dict[str, Any] = field(default_factory=dict) diff --git a/packages/graph-retriever/src/graph_retriever/utils/__init__.py b/packages/graph-retriever/src/graph_retriever/utils/__init__.py new file mode 100644 index 00000000..7c376493 --- /dev/null +++ b/packages/graph-retriever/src/graph_retriever/utils/__init__.py @@ -0,0 +1 @@ +"""Utilities used in graph_retriever and related packages.""" diff --git a/packages/graph-rag-example-helpers/src/graph_rag_example_helpers/batched.py b/packages/graph-retriever/src/graph_retriever/utils/batched.py similarity index 72% rename from packages/graph-rag-example-helpers/src/graph_rag_example_helpers/batched.py rename to packages/graph-retriever/src/graph_retriever/utils/batched.py index 9ff79103..8b46f2dd 100644 --- a/packages/graph-rag-example-helpers/src/graph_rag_example_helpers/batched.py +++ b/packages/graph-retriever/src/graph_retriever/utils/batched.py @@ -1,7 +1,9 @@ +from collections.abc import Iterable + try: # Try importing the function from itertools (Python 3.12+) from itertools import batched # type: ignore[attr-defined] -except ImportError: +except ImportError: # pragma: no cover from collections.abc import Iterator from itertools import islice from typing import TypeVar @@ -11,15 +13,15 @@ T = TypeVar("T") # This is equivalent to `itertools.batched`, but that is only available in 3.12 - def batched(iterable: Iterator[T], n: int) -> Iterator[Iterator[T]]: # type: ignore[no-redef] + def batched(iterable: Iterable[T], n: int) -> Iterator[tuple[T, ...]]: # type: ignore[no-redef] """ Equivalent of itertools.batched for pre 3.12. Parameters ---------- - iterable : Iterator[T] + iterable : Iterator over elements. - n : int + n : Size of batches. Yields @@ -34,6 +36,7 @@ def batched(iterable: Iterator[T], n: int) -> Iterator[Iterator[T]]: # type: ig """ if n < 1: raise ValueError("n must be at least one") - it = iterable - while batch := tuple(islice(it, n)): - yield iter(batch) + + iterator = iter(iterable) + while batch := tuple(islice(iterator, n)): + yield batch diff --git a/packages/langchain-graph-retriever/src/langchain_graph_retriever/utils/math.py b/packages/graph-retriever/src/graph_retriever/utils/math.py similarity index 75% rename from packages/langchain-graph-retriever/src/langchain_graph_retriever/utils/math.py rename to packages/graph-retriever/src/graph_retriever/utils/math.py index 39e54b68..8af386e5 100644 --- a/packages/langchain-graph-retriever/src/langchain_graph_retriever/utils/math.py +++ b/packages/graph-retriever/src/graph_retriever/utils/math.py @@ -15,16 +15,16 @@ def cosine_similarity(X: Matrix, Y: Matrix) -> np.ndarray: Parameters ---------- - X : Matrix + X : A matrix of shape (m, n), where `m` is the number of rows and `n` is the number of columns (features). - Y : Matrix + Y : A matrix of shape (p, n), where `p` is the number of rows and `n` is the number of columns (features). Returns ------- - np.ndarray + : A matrix of shape (m, p) containing the cosine similarity scores between each row of `X` and each row of `Y`. @@ -74,43 +74,32 @@ def cosine_similarity(X: Matrix, Y: Matrix) -> np.ndarray: def cosine_similarity_top_k( X: Matrix, Y: Matrix, - top_k: int | None = 5, + top_k: int | None, score_threshold: float | None = None, ) -> tuple[list[tuple[int, int]], list[float]]: """ - Compute cosine similarity with filtering for top-k results and a score threshold. - - This function calculates the row-wise cosine similarity between two matrices and - returns the indices and scores of the top results based on the provided - parameters. + Row-wise cosine similarity with optional top-k and score threshold filtering. Parameters ---------- - X : Matrix + X : A matrix of shape (m, n), where `m` is the number of rows and `n` is the number of columns (features). - Y : Matrix + Y : A matrix of shape (p, n), where `p` is the number of rows and `n` is the number of columns (features). - top_k : int, default 5 - The maximum number of results to return. If `None`, returns all results - above the score threshold. - score_threshold: float, default -1.0 - Minimum cosine similarity score for results to be included. + top_k : + Max number of results to return. + score_threshold: + Minimum score to return. Returns ------- - indices : list[tuple[int, int]] - A list of index pairs (X_idx, Y_idx) corresponding to the highest - similarity scores. - scores : list[float] - A list of the corresponding similarity scores. - - Notes - ----- - - If `top_k` is None or greater than the number of valid scores, all scores - above the threshold will be returned. - - The results are sorted in descending order of similarity scores. + list[tuple[int, int]] + Two-tuples of indices `(X_idx, Y_idx)` indicating the respective rows in + `X` and `Y`. + list[float] + The corresponding cosine similarities. """ if len(X) == 0 or len(Y) == 0: return [], [] diff --git a/packages/graph-retriever/src/graph_retriever/utils/merge.py b/packages/graph-retriever/src/graph_retriever/utils/merge.py new file mode 100644 index 00000000..d39880b7 --- /dev/null +++ b/packages/graph-retriever/src/graph_retriever/utils/merge.py @@ -0,0 +1,65 @@ +import asyncio +import dataclasses +from collections.abc import AsyncIterator +from typing import TypeVar + +T = TypeVar("T") + + +@dataclasses.dataclass +class _Done: + exception: bool = False + + +async def amerge( + *async_iterables: AsyncIterator[T], + queue_size: int = 10, +) -> AsyncIterator[T]: + """ + Merge async iterables into a single async iterator. + + Elements are yielded in the order they become available. + + Parameters + ---------- + async_iterables : + The async iterators to merge. + queue_size : + Number of elements to buffer in the queue. + + Yields + ------ + : + The elements of the iterators as they become available. + """ + queue: asyncio.Queue[T | _Done] = asyncio.Queue(queue_size) + + async def pump(aiter: AsyncIterator[T]) -> None: + try: + async for item in aiter: + await queue.put(item) + await queue.put(_Done(exception=False)) + except: + await queue.put(_Done(exception=True)) + raise + + tasks = [asyncio.create_task(pump(aiter)) for aiter in async_iterables] + + try: + pending_count = len(async_iterables) + while pending_count > 0: + item = await queue.get() + if isinstance(item, _Done): + if item.exception: + # If there has been an exception, end early. + break + else: + pending_count -= 1 + else: + yield item + queue.task_done() + finally: + for task in tasks: + if not task.done(): + task.cancel() + await asyncio.gather(*tasks) diff --git a/packages/graph-retriever/src/graph_retriever/utils/run_in_executor.py b/packages/graph-retriever/src/graph_retriever/utils/run_in_executor.py new file mode 100644 index 00000000..f844ab7f --- /dev/null +++ b/packages/graph-retriever/src/graph_retriever/utils/run_in_executor.py @@ -0,0 +1,59 @@ +import asyncio +from collections.abc import Callable +from concurrent.futures import Executor +from contextvars import copy_context +from functools import partial +from typing import ParamSpec, TypeVar, cast + +P = ParamSpec("P") +T = TypeVar("T") + + +async def run_in_executor( + executor: Executor | None, + func: Callable[P, T], + *args: P.args, + **kwargs: P.kwargs, +) -> T: # noqa: DOC502 + """ + Run a function in an executor. + + Parameters + ---------- + executor : + The executor to run in. + func : + The function. + *args : + The positional arguments to the function. + kwargs : + The keyword arguments to the function. + + Returns + ------- + : + The output of the function. + + Raises + ------ + RuntimeError + If the function raises a StopIteration. + """ # noqa: DOC502 + + def wrapper() -> T: + try: + return func(*args, **kwargs) + except StopIteration as exc: + # StopIteration can't be set on an asyncio.Future + # it raises a TypeError and leaves the Future pending forever + # so we need to convert it to a RuntimeError + raise RuntimeError from exc + + if executor is None or isinstance(executor, dict): + # Use default executor with context copied from current context + return await asyncio.get_running_loop().run_in_executor( + None, + cast(Callable[..., T], partial(copy_context().run, wrapper)), + ) + + return await asyncio.get_running_loop().run_in_executor(executor, wrapper) diff --git a/packages/graph-retriever/src/graph_retriever/utils/top_k.py b/packages/graph-retriever/src/graph_retriever/utils/top_k.py new file mode 100644 index 00000000..1414d55b --- /dev/null +++ b/packages/graph-retriever/src/graph_retriever/utils/top_k.py @@ -0,0 +1,65 @@ +from collections.abc import Iterable + +from graph_retriever.content import Content +from graph_retriever.utils.math import cosine_similarity_top_k + + +def top_k( + contents: Iterable[Content], + *, + embedding: list[float], + k: int, +) -> list[Content]: + """ + Select the top-k contents from the given content. + + Parameters + ---------- + contents : + The content from which to select the top-K. + embedding: list[float] + The embedding we're looking for. + k : + The number of items to select. + + Returns + ------- + list[Content] + Top-K by similarity. + """ + # TODO: Consider handling specially cases of already-sorted batches (merge). + # TODO: Consider passing threshold here to limit results. + + # Use dicts to de-duplicate by ID. This ensures we choose the top K distinct + # content (rather than K copies of the same content). + unscored = {c.id: c for c in contents} + + top_scored = _similarity_sort_top_k( + list(unscored.values()), embedding=embedding, k=k + ) + + sorted = list(top_scored.values()) + sorted.sort(key=_score, reverse=True) + + return [c[0] for c in sorted] + + +def _score(content_with_score: tuple[Content, float]) -> float: + return content_with_score[1] + + +def _similarity_sort_top_k( + contents: list[Content], *, embedding: list[float], k: int +) -> dict[str, tuple[Content, float]]: + # Flatten the content and use a dict to deduplicate. + # We need to do this *before* selecting the top_k to ensure we don't + # get duplicates (and fail to produce `k`). + top_k, scores = cosine_similarity_top_k( + [embedding], [c.embedding for c in contents], top_k=k + ) + + results = {} + for (_x, y), score in zip(top_k, scores): + c = contents[y] + results[c.id] = (c, score) + return results diff --git a/packages/graph-retriever/tests/adapters/test_in_memory.py b/packages/graph-retriever/tests/adapters/test_in_memory.py new file mode 100644 index 00000000..1f7a2b32 --- /dev/null +++ b/packages/graph-retriever/tests/adapters/test_in_memory.py @@ -0,0 +1,11 @@ +import pytest +from graph_retriever.adapters import Adapter +from graph_retriever.adapters.in_memory import InMemory +from graph_retriever.testing.adapter_tests import AdapterComplianceSuite + + +class TestInMemory(AdapterComplianceSuite): + @pytest.fixture(scope="class") + def adapter(self, animals: Adapter) -> Adapter: + assert isinstance(animals, InMemory) + return animals diff --git a/packages/graph-retriever/tests/conftest.py b/packages/graph-retriever/tests/conftest.py new file mode 100644 index 00000000..45f85bbf --- /dev/null +++ b/packages/graph-retriever/tests/conftest.py @@ -0,0 +1,26 @@ +import asyncio + +import pytest + +pytest.register_assert_rewrite("graph_retriever.testing") + +from tests.testing.adapters import animals # noqa: E402 +from tests.testing.invoker import sync_or_async # noqa: E402 + +# Mark these imports as used so they don't get removed. +# They need to be imported in `conftest.py` so the fixtures are registered. +_ = ( + animals, + sync_or_async, +) + + +# Event Loop for async tests. +@pytest.fixture(scope="session") +def event_loop(): + try: + loop = asyncio.get_running_loop() + except RuntimeError: + loop = asyncio.new_event_loop() + yield loop + loop.close() diff --git a/packages/langchain-graph-retriever/tests/integration_tests/adapters/__init__.py b/packages/graph-retriever/tests/edges/__init__.py similarity index 100% rename from packages/langchain-graph-retriever/tests/integration_tests/adapters/__init__.py rename to packages/graph-retriever/tests/edges/__init__.py diff --git a/packages/langchain-graph-retriever/tests/unit_tests/edges/test_metadata.py b/packages/graph-retriever/tests/edges/test_metadata.py similarity index 77% rename from packages/langchain-graph-retriever/tests/unit_tests/edges/test_metadata.py rename to packages/graph-retriever/tests/edges/test_metadata.py index 5f86379b..49ce49df 100644 --- a/packages/langchain-graph-retriever/tests/unit_tests/edges/test_metadata.py +++ b/packages/graph-retriever/tests/edges/test_metadata.py @@ -1,16 +1,18 @@ from typing import Any import pytest -from langchain_graph_retriever.edges.metadata import Id, MetadataEdgeFunction -from langchain_graph_retriever.types import Edges, IdEdge, MetadataEdge, Node +from graph_retriever import Node +from graph_retriever.edges import Edges, IdEdge, MetadataEdge, MetadataEdgeFunction def mk_node(metadata: dict[str, Any]) -> Node: return Node( id="id", + content="testing", metadata=metadata, depth=0, embedding=[], + similarity_score=0.2, ) @@ -37,8 +39,16 @@ def test_edge_function(): ) +def test_nested_edge(): + edge_function = MetadataEdgeFunction([("a.b", "b.c")]) + assert edge_function(mk_node({"a": {"b": 5}, "b": {"c": 7}})) == Edges( + {MetadataEdge("b.c", 7)}, + {MetadataEdge("b.c", 5)}, + ) + + def test_link_to_id(): - edge_function = MetadataEdgeFunction([("mentions", Id())]) + edge_function = MetadataEdgeFunction([("mentions", "$id")]) result = edge_function(mk_node({"mentions": ["a", "c"]})) assert result.incoming == {IdEdge("id")} @@ -46,7 +56,7 @@ def test_link_to_id(): def test_link_from_id(): - edge_function = MetadataEdgeFunction([(Id(), "mentions")]) + edge_function = MetadataEdgeFunction([("$id", "mentions")]) result = edge_function(mk_node({"mentions": ["a", "c"]})) assert result.incoming == { diff --git a/packages/langchain-graph-retriever/tests/unit_tests/__init__.py b/packages/graph-retriever/tests/strategies/__init__.py similarity index 100% rename from packages/langchain-graph-retriever/tests/unit_tests/__init__.py rename to packages/graph-retriever/tests/strategies/__init__.py diff --git a/packages/graph-retriever/tests/strategies/test_base.py b/packages/graph-retriever/tests/strategies/test_base.py new file mode 100644 index 00000000..58cf708e --- /dev/null +++ b/packages/graph-retriever/tests/strategies/test_base.py @@ -0,0 +1,109 @@ +import dataclasses + +import pytest +from graph_retriever.strategies import ( + Eager, + Mmr, + Strategy, +) + + +def test_build_strategy_base(): + base_strategy = Eager(select_k=6, start_k=5, adjacent_k=9, max_depth=2) + + # base strategy with no changes + strategy = Strategy.build(base_strategy=base_strategy) + assert strategy == base_strategy + + # base strategy with changed k + strategy = Strategy.build(base_strategy=base_strategy, select_k=7) + assert strategy == Eager(select_k=7, start_k=5, adjacent_k=9, max_depth=2) + + # base strategy with invalid kwarg + with pytest.raises( + TypeError, match=r"got an unexpected keyword argument 'invalid_kwarg'" + ): + strategy = Strategy.build(base_strategy=base_strategy, invalid_kwarg=4) + assert strategy == base_strategy + + +def test_build_strategy_base_override(): + base_strategy = Eager(select_k=6, start_k=5, adjacent_k=9, max_depth=2) + override_strategy = Eager(select_k=7, start_k=4, adjacent_k=8, max_depth=3) + + # override base strategy + strategy = Strategy.build( + base_strategy=base_strategy, strategy=override_strategy, select_k=4 + ) + assert strategy == dataclasses.replace(override_strategy, select_k=4) + + # override base strategy and change params + strategy = Strategy.build( + base_strategy=base_strategy, + strategy=override_strategy, + select_k=3, + adjacent_k=7, + ) + assert strategy == Eager(select_k=3, start_k=4, adjacent_k=7, max_depth=3) + + # override base strategy and invalid kwarg + with pytest.raises( + TypeError, match=r"got an unexpected keyword argument 'invalid_kwarg'" + ): + strategy = Strategy.build( + base_strategy=base_strategy, + strategy=override_strategy, + select_k=4, + invalid_kwarg=4, + ) + + # attempt override base strategy with dict + with pytest.raises(ValueError, match="Unsupported 'strategy'"): + strategy = Strategy.build( + base_strategy=base_strategy, + strategy={"k": 9, "start_k": 7, "adjacent_k": 11}, + ) + + +def test_build_strategy_base_override_mmr(): + base_strategy = Eager(select_k=6, start_k=5, adjacent_k=9, max_depth=2) + override_strategy = Mmr( + select_k=7, start_k=4, adjacent_k=8, max_depth=3, lambda_mult=0.3 + ) + + # override base strategy with mmr kwarg + with pytest.raises( + TypeError, match="got an unexpected keyword argument 'lambda_mult'" + ): + strategy = Strategy.build(base_strategy=base_strategy, lambda_mult=0.2) + assert strategy == base_strategy + + # override base strategy with mmr strategy + strategy = Strategy.build( + base_strategy=base_strategy, strategy=override_strategy, select_k=4 + ) + assert strategy == dataclasses.replace(override_strategy, select_k=4) + + # override base strategy with mmr strategy and mmr arg + strategy = Strategy.build( + base_strategy=base_strategy, + strategy=override_strategy, + select_k=4, + lambda_mult=0.2, + ) + assert strategy == Mmr( + select_k=4, start_k=4, adjacent_k=8, max_depth=3, lambda_mult=0.2 + ) + + # start with override strategy, change to base, try to set mmr arg + with pytest.raises( + TypeError, match="got an unexpected keyword argument 'lambda_mult'" + ): + Strategy.build( + base_strategy=override_strategy, strategy=base_strategy, lambda_mult=0.2 + ) + + +def test_setting_k_sets_select_k(): + assert Eager(select_k=4) == Eager(k=4) + assert Mmr(select_k=3) == Mmr(k=3) diff --git a/packages/graph-retriever/tests/strategies/test_eager.py b/packages/graph-retriever/tests/strategies/test_eager.py new file mode 100644 index 00000000..d7079dfb --- /dev/null +++ b/packages/graph-retriever/tests/strategies/test_eager.py @@ -0,0 +1,375 @@ +import pytest +from graph_retriever import Content +from graph_retriever.adapters import Adapter +from graph_retriever.adapters.in_memory import InMemory +from graph_retriever.edges import Edges, MetadataEdge +from graph_retriever.strategies import ( + Eager, +) +from graph_retriever.testing.adapter_tests import cosine_similarity_scores +from graph_retriever.testing.embeddings import ( + ParserEmbeddings, + angular_2d_embedding, + earth_embeddings, +) + +from tests.testing.adapters import ( + ANIMALS_DEPTH_0_EXPECTED, + ANIMALS_QUERY, +) +from tests.testing.invoker import SyncOrAsync + + +async def test_earth(sync_or_async: SyncOrAsync): + embedding = earth_embeddings + greetings = Content( + id="greetings", + content="Typical Greetings", + embedding=embedding("Typical Greetings"), + metadata={ + "incoming": "parent", + }, + ) + + doc1 = Content( + id="doc1", + content="Hello World", + embedding=embedding("Hello World"), + metadata={"outgoing": "parent", "keywords": ["greeting", "world"]}, + ) + + doc2 = Content( + id="doc2", + content="Hello Earth", + embedding=embedding("Hello Earth"), + metadata={"outgoing": "parent", "keywords": ["greeting", "earth"]}, + ) + + store = InMemory(embedding, [greetings, doc1, doc2]) + + traversal = sync_or_async.traverse_sorted_ids( + store=store, + query="Earth", + edges=[("outgoing", "incoming"), ("keywords", "keywords")], + strategy=Eager(select_k=10), + ) + + assert await traversal(start_k=1, max_depth=0) == ["doc2"] + assert await traversal(start_k=2, max_depth=0) == ["doc1", "doc2"] + assert await traversal(start_k=1, max_depth=1) == ["doc1", "doc2", "greetings"] + + +async def test_animals_select_k(animals: Adapter, sync_or_async: SyncOrAsync): + """Test traversing a bi-directional field with lists.""" + traversal = sync_or_async.traverse_sorted_ids( + store=animals, + query=ANIMALS_QUERY, + edges=[("keywords", "keywords")], + strategy=Eager(), + ) + assert len(await traversal(select_k=5)) == 5 + assert len(await traversal(select_k=3)) == 3 + + +async def test_animals_keywords(animals: Adapter, sync_or_async: SyncOrAsync): + """Test traversing a bi-directional field with lists.""" + traversal = sync_or_async.traverse_sorted_ids( + store=animals, + query=ANIMALS_QUERY, + edges=[("keywords", "keywords")], + strategy=Eager(select_k=100, start_k=2), + ) + + assert await traversal(max_depth=0) == ANIMALS_DEPTH_0_EXPECTED + assert await traversal(max_depth=1) == [ + "cat", + "coyote", + "fox", + "gazelle", + "hyena", + "jackal", + "mongoose", + ] + assert await traversal(max_depth=2) == [ + "alpaca", + "bison", + "cat", + "coyote", + "crow", + "dog", + "fox", + "gazelle", + "horse", + "hyena", + "jackal", + "mongoose", + ] + + +async def test_animals_habitat(animals: Adapter, sync_or_async: SyncOrAsync): + """Test traversing a bi-directional field with singular values.""" + traversal = sync_or_async.traverse_sorted_ids( + store=animals, + query=ANIMALS_QUERY, + edges=[("habitat", "habitat")], + strategy=Eager(select_k=100, start_k=2), + ) + + assert await traversal(max_depth=0) == ANIMALS_DEPTH_0_EXPECTED + assert await traversal(max_depth=1) == [ + "bobcat", + "cobra", + "deer", + "elk", + "fox", + "mongoose", + ] + assert await traversal(max_depth=2) == [ + "bobcat", + "cobra", + "deer", + "elk", + "fox", + "mongoose", + ] + + +async def test_animals_populates_metrics_and_order( + animals: Adapter, sync_or_async: SyncOrAsync +): + """Test that score and depth are populated and results are returned in order.""" + results = await sync_or_async.traverse( + store=animals, + query=ANIMALS_QUERY, + edges=[("habitat", "habitat")], + strategy=Eager(select_k=100, start_k=2, max_depth=2), + )() + + expected_depths = { + "mongoose": 0, + "bobcat": 1, + "cobra": 1, + "deer": 1, + "elk": 1, + "fox": 0, + } + + expected_similarity_scores = cosine_similarity_scores( + animals, ANIMALS_QUERY, list(expected_depths.keys()) + ) + + for n in results: + assert n.extra_metadata["_similarity_score"] == pytest.approx( + expected_similarity_scores[n.id] + ), f"incorrect similarity score for {n.id}" + assert n.extra_metadata["_depth"] == expected_depths[n.id], ( + f"incorrect depth for {n.id}" + ) + + expected_ids_in_order = sorted( + expected_similarity_scores.keys(), + key=lambda id: expected_similarity_scores[id], + reverse=True, + ) + assert [n.id for n in results] == expected_ids_in_order, ( + "incorrect order of results" + ) + + +async def test_animals_habitat_to_keywords( + animals: Adapter, sync_or_async: SyncOrAsync +): + """Test traversing a from a singular field (habitat) to collection (keywords).""" + traversal = sync_or_async.traverse_sorted_ids( + store=animals, + query=ANIMALS_QUERY, + edges=[("habitat", "keywords")], + strategy=Eager(select_k=10, start_k=2), + ) + + assert await traversal(max_depth=0) == ANIMALS_DEPTH_0_EXPECTED + assert await traversal(max_depth=1) == ["bear", "bobcat", "fox", "mongoose"] + assert await traversal(max_depth=2) == [ + "bear", + "bobcat", + "caribou", + "fox", + "mongoose", + ] + + +async def test_animals_initial_roots(animals: Adapter, sync_or_async: SyncOrAsync): + """Test traversing with initial root IDs.""" + traversal = sync_or_async.traverse_sorted_ids( + store=animals, + query=ANIMALS_QUERY, + edges=[("keywords", "keywords")], + strategy=Eager(select_k=10, start_k=0), + ) + + assert await traversal(initial_root_ids=["bobcat"], max_depth=0) == [ + "bobcat", + ] + assert await traversal(initial_root_ids=["bobcat"], max_depth=1) == [ + "bear", + "bobcat", + ] + assert await traversal(initial_root_ids=["bobcat"], max_depth=2) == [ + "bear", + "bobcat", + "moose", + "ostrich", + ] + assert await traversal( + initial_root_ids=["bobcat", "cheetah"], select_k=20, max_depth=2 + ) == [ + "bear", + "bobcat", + "cassowary", + "cheetah", + "dingo", + "eagle", + "emu", + "falcon", + "hawk", + "jaguar", + "kangaroo", + "leopard", + "moose", + ] + + +async def test_parsed(sync_or_async: SyncOrAsync): + """ + This is a test of set of Documents to pre-populate, + a graph vector store with entries placed in a certain way. + + Space of the entries (under Euclidean similarity): + + A0 (*) + .... AL AR <.... + : + : + v | . v + | : + TR | : + T0 --------------x-------------- B0 + TL | : + | : + | . + | . + | + FL FR + F0 + + the query point is meant to be at (*). + the A are bidirectionally with B + the A are outgoing to T + the A are incoming from F + The links are like: L with L, 0 with 0 and R with R. + """ + embedding = ParserEmbeddings(2) + docs_a = [ + Content.new("AL", "[-1, 9]", embedding), + Content.new("A0", "[0, 10]", embedding), + Content.new("AR", "[1, 9]", embedding), + ] + docs_b = [ + Content.new("BL", "[9, 1]", [9.0, 1.0]), + Content.new("B0", "[10, 0]", [10.0, 0.0]), + Content.new("BR", "[9, -1]", [9.0, -1.0]), + ] + docs_f = [ + Content.new("FL", "[1, -9]", [1.0, -9.0]), + Content.new("F0", "[0, -10]", [0.0, -10.0]), + Content.new("FR", "[-1, -9]", [-1.0, -9.0]), + ] + docs_t = [ + Content.new("TL", "[-9, -1]", [-9.0, -1.0]), + Content.new("T0", "[-10, 0]", [-10.0, 0.0]), + Content.new("TR", "[-9, 1]", [-9.0, 1.0]), + ] + for doc_a, suffix in zip(docs_a, ["l", "0", "r"]): + doc_a.metadata["tag"] = f"ab_{suffix}" + doc_a.metadata["out"] = f"at_{suffix}" + doc_a.metadata["in"] = f"af_{suffix}" + for doc_b, suffix in zip(docs_b, ["l", "0", "r"]): + doc_b.metadata["tag"] = f"ab_{suffix}" + for doc_t, suffix in zip(docs_t, ["l", "0", "r"]): + doc_t.metadata["in"] = f"at_{suffix}" + for doc_f, suffix in zip(docs_f, ["l", "0", "r"]): + doc_f.metadata["out"] = f"af_{suffix}" + documents = docs_a + docs_b + docs_f + docs_t + + traversal = sync_or_async.traverse_sorted_ids( + store=InMemory(embedding, documents), + edges=[("out", "in"), ("tag", "tag")], + query="[2, 10]", + strategy=Eager(select_k=10, start_k=2), + ) + + assert await traversal(max_depth=0) == ["A0", "AR"] + assert await traversal(max_depth=2) == ["A0", "AR", "B0", "BR", "T0", "TR"] + + +async def test_ids(sync_or_async: SyncOrAsync): + embedding = angular_2d_embedding + v0 = Content.new("v0", "-0.124", embedding) + v1 = Content.new("v1", "+0.127", embedding, metadata={"mentions": ["v0"]}) + v2 = Content.new("v2", "+0.250", embedding, metadata={"mentions": ["v1", "v3"]}) + v3 = Content.new("v3", "+1.000", embedding) + store = InMemory(embedding, [v0, v1, v2, v3]) + + mentions_to_id = sync_or_async.traverse_sorted_ids( + store=store, + query="+0.249", + strategy=Eager(start_k=1), + edges=[("mentions", "$id")], + ) + assert await mentions_to_id(max_depth=0) == ["v2"] + assert await mentions_to_id(max_depth=1) == ["v1", "v2", "v3"] + assert await mentions_to_id(max_depth=2) == ["v0", "v1", "v2", "v3"] + + id_to_mentions = sync_or_async.traverse_sorted_ids( + store=store, + query="-0.125", + strategy=Eager(start_k=1), + edges=[("$id", "mentions")], + ) + assert await id_to_mentions(max_depth=0) == ["v0"] + assert await id_to_mentions(max_depth=1) == ["v0", "v1"] + assert await id_to_mentions(max_depth=2) == ["v0", "v1", "v2"] + + +async def test_edge_functions(sync_or_async: SyncOrAsync): + embedding = angular_2d_embedding + v0 = Content.new( + "v0", + "-0.124", + embedding, + metadata={"links": [("a", 5.0)], "incoming": ["a"]}, + ) + v1 = Content.new( + "v1", + "+1.000", + embedding, + metadata={"links": [("a", 6.0)], "incoming": ["a"]}, + ) + store = InMemory(embedding, [v0, v1]) + + def link_function(node: Content) -> Edges: + links = node.metadata.get("links", []) + incoming = node.metadata.get("incoming", []) + return Edges( + incoming={MetadataEdge("incoming", v) for v in incoming}, + outgoing={MetadataEdge("incoming", v) for v, _weight in links}, + ) + + traversal = sync_or_async.traverse_sorted_ids( + store=store, + query="-0.125", + edges=link_function, + strategy=Eager(start_k=1), + ) + assert await traversal(max_depth=0) == ["v0"] + assert await traversal(max_depth=1) == ["v0", "v1"] diff --git a/packages/graph-retriever/tests/strategies/test_mmr.py b/packages/graph-retriever/tests/strategies/test_mmr.py new file mode 100644 index 00000000..6028f682 --- /dev/null +++ b/packages/graph-retriever/tests/strategies/test_mmr.py @@ -0,0 +1,219 @@ +import pytest +from graph_retriever.adapters.base import Adapter +from graph_retriever.adapters.in_memory import InMemory +from graph_retriever.content import Content +from graph_retriever.strategies.mmr import Mmr +from graph_retriever.testing.adapter_tests import cosine_similarity_scores +from graph_retriever.testing.embeddings import angular_2d_embedding + +from tests.testing.adapters import ANIMALS_DEPTH_0_EXPECTED, ANIMALS_QUERY +from tests.testing.invoker import SyncOrAsync + + +async def test_animals_keywords(animals: Adapter, sync_or_async: SyncOrAsync): + """Test traversing a bi-directional field with lists.""" + traversal = sync_or_async.traverse_sorted_ids( + store=animals, + query=ANIMALS_QUERY, + edges=[("keywords", "keywords")], + strategy=Mmr(start_k=2), + ) + + assert await traversal(select_k=4, max_depth=0) == ANIMALS_DEPTH_0_EXPECTED + assert await traversal(select_k=4, max_depth=1) == [ + "cat", + "fox", + "gazelle", + "mongoose", + ] + assert await traversal(select_k=6, max_depth=2) == [ + "cat", + "fox", + "gazelle", + "hyena", + "jackal", + "mongoose", + ] + + +async def test_rediscovering(animals: Adapter, sync_or_async: SyncOrAsync): + """Test for https://github.com/datastax/graph-rag/issues/167. + + The issue was nodes were being "rediscovered" and readded to the candidates + list in MMR. This violates the contract of the traversal. This test runs MMR + with a high number of iterations (select 97 nodes, 1 at a time) and a high + adjacent K (100) nodes at each iteration. This ensures that some nodes will + be rediscovered. + """ + traversal = sync_or_async.traverse_sorted_ids( + store=animals, + edges=[("habitat", "habitat")], + ) + result = await traversal( + query="cat", + strategy=Mmr(select_k=97, adjacent_k=100, start_k=100, lambda_mult=0.9), + ) + assert len(result) == 97 + + +async def test_animals_habitat(animals: Adapter, sync_or_async: SyncOrAsync): + """Test traversing a bi-directional field with singular values.""" + traversal = sync_or_async.traverse_sorted_ids( + store=animals, + query=ANIMALS_QUERY, + edges=[("habitat", "habitat")], + strategy=Mmr(select_k=10, start_k=2), + ) + + assert await traversal(max_depth=0) == ANIMALS_DEPTH_0_EXPECTED + assert await traversal(max_depth=1) == [ + "bobcat", + "cobra", + "deer", + "elk", + "fox", + "mongoose", + ] + assert await traversal(max_depth=2) == [ + "bobcat", + "cobra", + "deer", + "elk", + "fox", + "mongoose", + ] + + +async def test_animals_populates_metrics_and_order( + animals: Adapter, sync_or_async: SyncOrAsync +): + """Test that score and depth are populated and results are returned in order.""" + results = await sync_or_async.traverse( + store=animals, + query=ANIMALS_QUERY, + edges=[("habitat", "habitat")], + strategy=Mmr(start_k=2), + )(select_k=10, max_depth=2) + + expected_mmr_scores = { + "mongoose": 0.28934083735912275, + "fox": 0.11235363166682244, + "deer": 0.03904356616509902, + "bobcat": 0.0031420490138288626, + "cobra": -0.11165876337613051, + "elk": -0.22759302101291784, + } + expected_redundancy = { + "mongoose": 0.0, + "fox": 0.30860872491035307, + "deer": 0.10890754955985982, + "bobcat": 0.016695295174737335, + "cobra": 0.24437328139277636, + "elk": 0.4839543733764524, + } + expected_depths = { + "mongoose": 0, + "bobcat": 1, + "cobra": 1, + "deer": 1, + "elk": 1, + "fox": 0, + } + + expected_similarity_scores = cosine_similarity_scores( + animals, ANIMALS_QUERY, list(expected_depths.keys()) + ) + + for n in results: + assert n.extra_metadata["_similarity_score"] == pytest.approx( + expected_similarity_scores[n.id] + ), f"incorrect similarity score for {n.id}" + assert n.extra_metadata["_mmr_score"] == pytest.approx( + expected_mmr_scores[n.id] + ), f"incorrect score for {n.id}" + assert n.extra_metadata["_redundancy"] == pytest.approx( + expected_redundancy[n.id] + ), f"incorrect redundancy for {n.id}" + assert n.extra_metadata["_depth"] == expected_depths[n.id], ( + f"incorrect depth for {n.id}" + ) + + expected_ids_in_order = sorted( + expected_mmr_scores.keys(), key=lambda id: expected_mmr_scores[id], reverse=True + ) + assert [n.id for n in results] == expected_ids_in_order, ( + "incorrect order of results" + ) + + +async def test_animals_habitat_to_keywords( + animals: Adapter, sync_or_async: SyncOrAsync +): + """Test traversing a from a singular field (habitat) to collection (keywords).""" + traversal = sync_or_async.traverse_sorted_ids( + store=animals, + query=ANIMALS_QUERY, + edges=[("habitat", "keywords")], + strategy=Mmr(select_k=10, start_k=2), + ) + + assert await traversal(max_depth=0) == ANIMALS_DEPTH_0_EXPECTED + assert await traversal(max_depth=1) == ["bear", "bobcat", "fox", "mongoose"] + assert await traversal(max_depth=2) == [ + "bear", + "bobcat", + "caribou", + "fox", + "mongoose", + ] + + +async def test_angular(sync_or_async: SyncOrAsync): + """ + Test end to end construction and MMR search. + + The embedding function used here ensures `texts` become + the following vectors on a circle (numbered v0 through v3): + + ______ v2 + // \\ + // \\ v1 + v3 || . || query + \\ // v0 + \\______// (N.B. very crude drawing) + + With start_k==2 and select_k==2, when query is at (1, ), + one expects that v2 and v0 are returned (in some order) + because v1 is "too close" to v0 (and v0 is closer than v1)). + + Both v2 and v3 are reachable via edges from v0, so once it is + selected, those are both considered. + """ + embedding = angular_2d_embedding + v0 = Content.new("v0", "-0.124", embedding, metadata={"outgoing": "link"}) + v1 = Content.new("v1", "+0.127", embedding) + v2 = Content.new("v2", "+0.25", embedding, metadata={"incoming": "link"}) + v3 = Content.new("v3", "+1.0", embedding, metadata={"incoming": "link"}) + + traversal = sync_or_async.traverse_sorted_ids( + query="0.0", + store=InMemory(embedding, [v0, v1, v2, v3]), + edges=[("outgoing", "incoming")], + strategy=Mmr(select_k=2, start_k=2, max_depth=2), + ) + + assert await traversal() == ["v0", "v2"] + assert await traversal() == ["v0", "v2"] + + # With max depth 0, no edges are traversed, so this doesn't reach v2 or v3. + # So it ends up picking "v1" even though it's similar to "v0". + assert await traversal(max_depth=0) == ["v0", "v1"] + + # With max depth 0 but higher `start_k`, we encounter v2 + assert await traversal(start_k=3, max_depth=0) == ["v0", "v2"] + + # v0 score is .46, v2 score is 0.16 so it won't be chosen. + assert await traversal(min_mmr_score=0.2) == ["v0"] + + # with select_k=4 we should get all of the documents. + assert await traversal(select_k=4) == ["v0", "v1", "v2", "v3"] diff --git a/packages/graph-retriever/tests/strategies/test_scored.py b/packages/graph-retriever/tests/strategies/test_scored.py new file mode 100644 index 00000000..61658035 --- /dev/null +++ b/packages/graph-retriever/tests/strategies/test_scored.py @@ -0,0 +1,97 @@ +import pytest +from graph_retriever.adapters.base import Adapter +from graph_retriever.strategies.scored import Scored +from graph_retriever.testing.adapter_tests import cosine_similarity_scores +from graph_retriever.types import Node + +from tests.testing.adapters import ANIMALS_QUERY +from tests.testing.invoker import SyncOrAsync + + +def score_animals(node: Node) -> float: + return (20 - len(node.id)) + ((ord(node.id[0]) - ord("a")) / 100) + + +async def test_animals_keywords(animals: Adapter, sync_or_async: SyncOrAsync): + """Test traversing a bi-directional field with lists.""" + traversal = sync_or_async.traverse_sorted_ids( + store=animals, + query=ANIMALS_QUERY, + edges=[("keywords", "keywords")], + strategy=Scored(scorer=score_animals, start_k=2), + ) + + # # start_k=2 => 2 closest matches to the query + assert await traversal(max_depth=0) == [ + "fox", + "mongoose", + ] + # # select_k=8, we start with 2 closest and choose 6 more with shortest names + assert await traversal(select_k=8, max_depth=1) == [ + "cat", + "coyote", + "fox", + "gazelle", + "hyena", + "jackal", + "mongoose", + ] + # select_k=4, we start with 2 closest and choose 2 more with shortest names + # (from "cat", "coyote", "gazelle", "hyena", "jackal") + assert await traversal(select_k=4, max_depth=1) == [ + "cat", + "fox", + "hyena", + "jackal", + ] + + +async def test_animals_populates_metrics_and_order( + animals: Adapter, sync_or_async: SyncOrAsync +): + """Test that score and depth are populated and results are returned in order.""" + results = await sync_or_async.traverse( + store=animals, + query=ANIMALS_QUERY, + edges=[("habitat", "habitat")], + strategy=Scored(scorer=score_animals, start_k=2), + )(select_k=8, max_depth=2) + + expected_scores = { + "mongoose": 12.12, + "bobcat": 14.01, + "cobra": 15.02, + "deer": 16.03, + "elk": 17.04, + "fox": 17.05, + } + expected_depths = { + "mongoose": 0, + "bobcat": 1, + "cobra": 1, + "deer": 1, + "elk": 1, + "fox": 0, + } + + expected_similarity_scores = cosine_similarity_scores( + animals, ANIMALS_QUERY, list(expected_depths.keys()) + ) + + for n in results: + assert n.extra_metadata["_similarity_score"] == pytest.approx( + expected_similarity_scores[n.id] + ), f"incorrect similarity score for {n.id}" + assert n.extra_metadata["_score"] == expected_scores[n.id], ( + f"incorrect score for {n.id}" + ) + assert n.extra_metadata["_depth"] == expected_depths[n.id], ( + f"incorrect depth for {n.id}" + ) + + expected_ids_in_order = sorted( + expected_scores.keys(), key=lambda id: expected_scores[id], reverse=True + ) + assert [n.id for n in results] == expected_ids_in_order, ( + "incorrect order of results" + ) diff --git a/packages/langchain-graph-retriever/tests/unit_tests/edges/__init__.py b/packages/graph-retriever/tests/testing/__init__.py similarity index 100% rename from packages/langchain-graph-retriever/tests/unit_tests/edges/__init__.py rename to packages/graph-retriever/tests/testing/__init__.py diff --git a/packages/graph-retriever/tests/testing/adapters.py b/packages/graph-retriever/tests/testing/adapters.py new file mode 100644 index 00000000..563535af --- /dev/null +++ b/packages/graph-retriever/tests/testing/adapters.py @@ -0,0 +1,38 @@ +import json +import os + +import pytest +from graph_retriever import Content +from graph_retriever.adapters import Adapter +from graph_retriever.adapters.in_memory import InMemory +from graph_retriever.testing import embeddings + + +def load_animal_content(embedding: embeddings.AnimalEmbeddings) -> list[Content]: + documents = [] + + path = os.path.abspath( + os.path.join(os.path.dirname(__file__), "../../../../data/animals.jsonl") + ) + with open(path) as file: + for line in file: + data = json.loads(line.strip()) + documents.append( + Content( + id=data["id"], + content=data["text"], + embedding=embedding(data["text"]), + metadata=data["metadata"], + ) + ) + return documents + + +@pytest.fixture(scope="session") +def animals() -> Adapter: + embedding = embeddings.AnimalEmbeddings() + return InMemory(embedding, load_animal_content(embedding)) + + +ANIMALS_QUERY: str = "small agile mammal" +ANIMALS_DEPTH_0_EXPECTED: list[str] = ["fox", "mongoose"] diff --git a/packages/graph-retriever/tests/testing/invoker.py b/packages/graph-retriever/tests/testing/invoker.py new file mode 100644 index 00000000..ec62f20f --- /dev/null +++ b/packages/graph-retriever/tests/testing/invoker.py @@ -0,0 +1,119 @@ +from __future__ import annotations + +import abc +import dataclasses +from collections.abc import Callable, Sequence +from typing import Any, Generic, TypeVar + +import pytest +from graph_retriever import Node, atraverse, traverse +from graph_retriever.adapters import Adapter +from graph_retriever.edges import EdgeFunction, EdgeSpec +from graph_retriever.strategies import Strategy + + +class SyncOrAsync(abc.ABC): + @abc.abstractmethod + async def _traverse(self, **kwargs: Any) -> list[Node]: ... + + def traverse_sorted_ids( + self, + store: Adapter, + query: str | None = None, + edges: list[EdgeSpec] | EdgeFunction | None = None, + strategy: Strategy | None = None, + ) -> TraversalCall[list[str]]: + return TraversalCall( + transform=lambda nodes: sorted([n.id for n in nodes]), + sync_or_async=self, + store=store, + query=query, + edges=edges, + strategy=strategy, + ) + + def traverse( + self, + store: Adapter, + query: str | None = None, + edges: list[EdgeSpec] | EdgeFunction | None = None, + strategy: Strategy | None = None, + ) -> TraversalCall[list[Node]]: + return TraversalCall( + transform=lambda nodes: nodes, + sync_or_async=self, + store=store, + query=query, + edges=edges, + strategy=strategy, + ) + + +class SyncTraversal(SyncOrAsync): + async def _traverse(self, **kwargs): + return traverse(**kwargs) + + +class AsyncTraversal(SyncOrAsync): + async def _traverse(self, **kwargs): + return await atraverse(**kwargs) + + +T = TypeVar("T") + + +def _pick(name: str, call: T | None, init: T | None) -> T: + value = call or init + if value is None: + raise ValueError(f"{name} must be set in call or init") + return value + + +class TraversalCall(Generic[T]): + def __init__( + self, + transform: Callable[[list[Node]], T], + sync_or_async: SyncOrAsync, + store: Adapter, + query: str | None = None, + edges: list[EdgeSpec] | EdgeFunction | None = None, + strategy: Strategy | None = None, + ) -> None: + self.transform = transform + self.sync_or_async = sync_or_async + self.store = store + self.query = query + self.edges = edges + self.strategy = strategy + + async def __call__( + self, + query: str | None = None, + edges: list[EdgeSpec] | EdgeFunction | None = None, + strategy: Strategy | None = None, + metadata_filter: dict[str, Any] | None = None, + initial_root_ids: Sequence[str] = (), + **kwargs: Any, + ) -> T: + strategy = _pick("strategy", strategy, self.strategy) + strategy = dataclasses.replace(strategy, **kwargs) + + results = await self.sync_or_async._traverse( + query=_pick("query", query, self.query), + store=self.store, + edges=_pick("edges", edges, self.edges), + strategy=strategy, + metadata_filter=metadata_filter, + initial_root_ids=initial_root_ids, + ) + return self.transform(results) + + +@pytest.fixture(scope="function", params=["sync", "async"]) +def sync_or_async(request: pytest.FixtureRequest) -> SyncOrAsync: + if request.param == "sync": + return SyncTraversal() + elif request.param == "async": + return AsyncTraversal() + else: + raise ValueError(f"Unexpected value '{request.param}'") diff --git a/packages/graph-retriever/tests/utils/test_batched.py b/packages/graph-retriever/tests/utils/test_batched.py new file mode 100644 index 00000000..e778f0f4 --- /dev/null +++ b/packages/graph-retriever/tests/utils/test_batched.py @@ -0,0 +1,7 @@ +from graph_retriever.utils.batched import batched + + +def test_batched(): + assert tuple(batched([], 2)) == () + assert tuple(batched([0, 1, 2, 3, 4], 2)) == ((0, 1), (2, 3), (4,)) + assert tuple(batched([0, 1, 2, 3, 4, 5], 2)) == ((0, 1), (2, 3), (4, 5)) diff --git a/packages/graph-retriever/tests/utils/test_merge.py b/packages/graph-retriever/tests/utils/test_merge.py new file mode 100644 index 00000000..23e75d4d --- /dev/null +++ b/packages/graph-retriever/tests/utils/test_merge.py @@ -0,0 +1,94 @@ +import asyncio +from collections.abc import AsyncIterator + +import pytest +from graph_retriever.utils.merge import amerge + + +async def async_generator( + values: list[int], event: asyncio.Event | None = None +) -> AsyncIterator[int]: + """Helper async generator that yields values with an optional delay.""" + for value in values: + if event: + await event.wait() + event.clear() + yield value + + +async def test_amerge_basic(): + """Test merging two basic async iterators.""" + gen1 = async_generator([1, 3, 5]) + gen2 = async_generator([2, 4, 6]) + + result = [val async for val in amerge(gen1, gen2)] + assert sorted(result) == [1, 2, 3, 4, 5, 6] + + +async def test_merge_empty_iterators(): + """Test merging when one of the iterators is empty.""" + gen1 = async_generator([]) + gen2 = async_generator([1, 2, 3]) + + result = [val async for val in amerge(gen1, gen2)] + assert result == [1, 2, 3] # Should return only the non-empty iterator's items + + +async def test_merge_all_empty(): + """Test merging when all iterators are empty.""" + gen1 = async_generator([]) + gen2 = async_generator([]) + + result = [val async for val in amerge(gen1, gen2)] + assert result == [] # Should return an empty list + + +async def test_merge_large_iterators(): + """Test merging large iterators.""" + gen1 = async_generator(range(100)) + gen2 = async_generator(range(100, 200)) + + result = [val async for val in amerge(gen1, gen2)] + result.sort() + assert result == list(range(200)) # Ensure all items are included + + +async def test_merge_unordered_iterators(): + """Ensure iterators are merged in order of availability, not sorting.""" + e1 = asyncio.Event() + e2 = asyncio.Event() + gen1 = async_generator([10, 30, 50], e1) + gen2 = async_generator([20, 40], e2) + + it = amerge(gen1, gen2) + e1.set() + assert await anext(it) == 10 + e1.set() + assert await anext(it) == 30 + e1.set() + assert await anext(it) == 50 + e1.set() + e2.set() + assert await anext(it) == 20 + e2.set() + assert await anext(it) == 40 + e1.set() + e2.set() + assert await anext(it, None) is None + + +async def test_merge_exception_handling(): + """Ensure that an exception in one iterator does not break everything.""" + + async def faulty_generator(): + """Async generator that raises an exception mid-way.""" + yield 1 + yield 2 + raise ValueError("Test exception") + yield 3 # Should never be reached + + gen1 = async_generator([10, 20, 30]) + gen2 = faulty_generator() + + with pytest.raises(ValueError, match="Test exception"): + _result = [val async for val in amerge(gen1, gen2)] diff --git a/packages/langchain-graph-retriever/pyproject.toml b/packages/langchain-graph-retriever/pyproject.toml index 93aba587..de2fa26a 100644 --- a/packages/langchain-graph-retriever/pyproject.toml +++ b/packages/langchain-graph-retriever/pyproject.toml @@ -40,30 +40,66 @@ classifiers = [ ] dependencies = [ + "backoff>=2.2.1", + "graph-retriever", + "immutabledict>=4.2.1", "langchain-core>=0.3.29", "networkx>=3.4.2", - "numpy>=1.26.4", "pydantic>=2.10.4", "typing-extensions>=4.12.2", ] [project.urls] -"Homepage" = "https://github.com/datastax/graph-rag" +"Homepage" = "https://datastax.github.io/graph-rag" +"GitHub" = "https://github.com/datastax/graph-rag" "Bug Reports" = "https://github.com/datastax/graph-rag/issues" -"Documentation" = "https://datastax.github.com/graph-rag" [tool.deptry.package_module_name_map] +astrapy = "astrapy" +backoff = "backoff" beautifulsoup4 = "bs4" -opensearch-py = "opensearchpy" +cassio = "cassio" +chromadb = "chromadb" +gliner = "gliner" +graph-retriever = "graph_retriever" +httpx = "httpx" +immutabledict = "immutabledict" +keybert = "keybert" +langchain-astradb = "langchain_astradb" +langchain-chroma = "langchain_chroma" +langchain-community = "langchain_community" +langchain-core = "langchain_core" +langchain-tests = "langchain_tests" +mypy = "mypy" +networkx = "networkx" +networkx-stubs = "networkx_stubs" +numpy = "numpy" +opensearch-py = "opensearch_py" +pydantic = "pydantic" +pytest = "pytest" +pytest-asyncio = "pytest_asyncio" +pytest-cov = "pytest_cov" +python-dotenv = "dotenv" +simsimd = "simsimd" +spacy = "spacy" +testcontainers = "testcontainers" +tqdm = "tqdm" +types-beautifulsoup4 = "types_beautifulsoup4" +typing-extensions = "typing_extensions" [tool.deptry.per_rule_ignores] -DEP002 = ["cassio"] +DEP002 = ["cassio", "opensearch-py"] + +[tool.uv.sources] +graph-retriever = { workspace = true } [project.optional-dependencies] astra = [ - "langchain-astradb>=0.5.2", + "astrapy>=2.0.0", + "httpx>=0.28.1", + "langchain-astradb>=0.6.0", ] -beautifulsoup4 = [ +html = [ "beautifulsoup4>=4.12.3", ] cassandra = [ @@ -84,8 +120,8 @@ opensearch = [ "langchain-community>=0.3.14", "opensearch-py>=2.8.0", ] -simsimd = [ - "simsimd>=6.2.1", +spacy = [ + "spacy>=3.8.4", ] [build-system] @@ -95,10 +131,9 @@ build-backend = "hatchling.build" [dependency-groups] dev = [ "cassio>=0.1.10", - "langchain-astradb>=0.5.2", + "langchain-astradb>=0.5.3", "langchain-chroma>=0.2.0", "langchain-community>=0.3.14", - "langchain-opensearch>=0.0.2", "mypy>=1.14.1", "networkx-stubs>=0.0.1", "pytest>=8.3.4", @@ -107,5 +142,5 @@ dev = [ "simsimd>=6.2.1", "testcontainers>=4.9.0", "langchain-tests>=0.3.8", + "types-beautifulsoup4>=4.12.0.20250204", ] - diff --git a/packages/langchain-graph-retriever/src/langchain_graph_retriever/__init__.py b/packages/langchain-graph-retriever/src/langchain_graph_retriever/__init__.py index 38a5e524..ed632cd7 100644 --- a/packages/langchain-graph-retriever/src/langchain_graph_retriever/__init__.py +++ b/packages/langchain-graph-retriever/src/langchain_graph_retriever/__init__.py @@ -1,7 +1,5 @@ -from .edges.metadata import Id from .graph_retriever import GraphRetriever __all__ = [ "GraphRetriever", - "Id", ] diff --git a/packages/langchain-graph-retriever/src/langchain_graph_retriever/_conversion.py b/packages/langchain-graph-retriever/src/langchain_graph_retriever/_conversion.py new file mode 100644 index 00000000..f2e77bb0 --- /dev/null +++ b/packages/langchain-graph-retriever/src/langchain_graph_retriever/_conversion.py @@ -0,0 +1,45 @@ +from graph_retriever.content import Content +from graph_retriever.types import Node +from langchain_core.documents import Document + +METADATA_EMBEDDING_KEY = "__embedding" + + +def node_to_doc(node: Node) -> Document: + return Document( + id=node.id, + page_content=node.content, + metadata={**node.extra_metadata, **node.metadata}, + ) + + +def doc_to_content(doc: Document, *, embedding: list[float] | None = None) -> Content: + """ + Convert a LangChain document to a `Content`. + + Parameters + ---------- + doc : + The document to convert. + + embedding : + The embedding of the document. If not provided, the + `doc.metadata[METADATA_EMBEDDING_KEY]` should be set to the embedding. + + Returns + ------- + : + The converted content. + """ + assert doc.id is not None + + if embedding is None: + embedding = doc.metadata.pop(METADATA_EMBEDDING_KEY) + assert embedding is not None + + return Content( + id=doc.id, + content=doc.page_content, + embedding=embedding, + metadata=doc.metadata, + ) diff --git a/packages/langchain-graph-retriever/src/langchain_graph_retriever/_traversal.py b/packages/langchain-graph-retriever/src/langchain_graph_retriever/_traversal.py deleted file mode 100644 index 57607127..00000000 --- a/packages/langchain-graph-retriever/src/langchain_graph_retriever/_traversal.py +++ /dev/null @@ -1,495 +0,0 @@ -"""Implements the traversal logic for graph-based document retrieval.""" - -from collections.abc import Iterable, Sequence -from typing import Any - -from langchain_core.documents import Document - -from langchain_graph_retriever.adapters.base import METADATA_EMBEDDING_KEY, Adapter -from langchain_graph_retriever.edges.metadata import EdgeSpec, MetadataEdgeFunction -from langchain_graph_retriever.strategies import Strategy -from langchain_graph_retriever.types import Edge, EdgeFunction, Node - - -class Traversal: - """ - Handles a single traversal operation for a graph-based retrieval system. - - The `Traversal` class manages the process of discovering, visiting, and selecting - nodes within a graph, based on a query and a traversal strategy. It supports - synchronous and asynchronous traversal, enabling retrieval of documents in a - controlled, iterative manner. - - This class should not be reused between traversals. - - Parameters - ---------- - query : str - The query string for the traversal. - edges : list[EdgeSpec] | EdgeFunction - An `EdgeFunction` or the edge specifications to use for creating a - `MetadataEdgeFunction`. - strategy : Strategy - The traversal strategy that defines how nodes are discovered, selected, - and finalized. - store : Adapter - The vector store adapter used for similarity searches and document - retrieval. - metadata_filter : dict[str, Any], optional - Optional filter for metadata during traversal. - initial_root_ids : Sequence[str], optional - IDs of the initial root nodes for the traversal. - store_kwargs : dict[str, Any], optional - Additional arguments passed to the store adapter. - """ - - def __init__( - self, - query: str, - *, - edges: list[EdgeSpec] | EdgeFunction, - strategy: Strategy, - store: Adapter, - metadata_filter: dict[str, Any] | None = None, - initial_root_ids: Sequence[str] = (), - store_kwargs: dict[str, Any] = {}, - ) -> None: - self.query = query - - self.edge_function: EdgeFunction - if isinstance(edges, list): - self.edge_function = MetadataEdgeFunction(edges) - elif callable(edges): - self.edge_function = edges - else: - raise ValueError(f"Invalid edges: {edges}") - - self.strategy = strategy - self.store = store - self.metadata_filter = metadata_filter - self.initial_root_ids = initial_root_ids - self.store_kwargs = store_kwargs - - self._used = False - self._visited_edges: set[Edge] = set() - self._edge_depths: dict[Edge, int] = {} - self._doc_cache: dict[str, Document] = {} - self._node_cache: dict[str, Node] = {} - self._selected_nodes: dict[str, Node] = {} - - def _check_first_use(self): - assert not self._used, "Traversals cannot be re-used." - self._used = True - - def traverse(self) -> list[Document]: - """ - Execute the traversal synchronously. - - This method retrieves initial candidates, discovers and visits nodes, - and explores edges iteratively until the traversal is complete. - - Returns - ------- - list[Document] - The final set of documents resulting from the traversal. - """ - self._check_first_use() - - # Retrieve initial candidates. - initial_docs = self._fetch_initial_candidates() - self.add_docs(initial_docs, depth=0) - - if self.initial_root_ids: - neighborhood_adjacent_docs = self._fetch_neighborhood_candidates() - self.add_docs(neighborhood_adjacent_docs, depth=0) - - while True: - # Select the next batch of nodes, and (new) outgoing edges. - next_outgoing_edges = self.select_next_edges() - if next_outgoing_edges is None: - break - elif next_outgoing_edges: - # Find the (new) document with incoming edges from those edges. - adjacent_docs = self._fetch_adjacent(next_outgoing_edges) - self.add_docs(adjacent_docs) - - return self.finish() - - async def atraverse(self) -> list[Document]: - """ - Execute the traversal asynchronously. - - This method retrieves initial candidates, discovers and visits nodes, - and explores edges iteratively until the traversal is complete. - - Returns - ------- - list[Document] - The final set of documents resulting from the traversal. - """ - self._check_first_use() - - # Retrieve initial candidates. - initial_docs = await self._afetch_initial_candidates() - self.add_docs(initial_docs, depth=0) - - if self.initial_root_ids: - neighborhood_adjacent_docs = await self._afetch_neighborhood_candidates() - self.add_docs(neighborhood_adjacent_docs, depth=0) - - while True: - # Select the next batch of nodes, and (new) outgoing edges. - next_outgoing_edges = self.select_next_edges() - if next_outgoing_edges is None: - break - elif next_outgoing_edges: - # Find the (new) document with incoming edges from those edges. - adjacent_docs = await self._afetch_adjacent(next_outgoing_edges) - self.add_docs(adjacent_docs) - - return self.finish() - - def _fetch_initial_candidates(self) -> list[Document]: - """ - Retrieve initial candidates based on the query and strategy. - - Returns - ------- - list[Document] - The initial set of documents retrieved via similarity search. - """ - query_embedding, docs = self.store.similarity_search_with_embedding( - query=self.query, - k=self.strategy.start_k, - filter=self.metadata_filter, - **self.store_kwargs, - ) - self.strategy._query_embedding = query_embedding - return docs - - async def _afetch_initial_candidates(self) -> list[Document]: - query_embedding, docs = await self.store.asimilarity_search_with_embedding( - query=self.query, - k=self.strategy.start_k, - filter=self.metadata_filter, - **self.store_kwargs, - ) - self.strategy._query_embedding = query_embedding - return docs - - def _fetch_neighborhood_candidates(self) -> Iterable[Document]: - """ - Retrieve neighborhood candidates for traversal. - - This method fetches initial root documents, converts them to nodes, and records - their outgoing edges as visited. It then fetches additional candidates adjacent - to these nodes. - - Returns - ------- - Iterable[Document] - The set of documents adjacent to the initial root nodes. - """ - neighborhood_docs = self.store.get(self.initial_root_ids) - neighborhood_nodes = self.add_docs(neighborhood_docs) - - # Record the neighborhood nodes (specifically the outgoing edges from the - # neighborhood) as visited. - outgoing_edges = self.visit_nodes(neighborhood_nodes.values()) - - # Fetch the candidates. - return self._fetch_adjacent(outgoing_edges) - - async def _afetch_neighborhood_candidates( - self, - ) -> Iterable[Document]: - """ - Asynchronously retrieve neighborhood candidates for traversal. - - This method fetches initial root documents, converts them to nodes, and records - their outgoing edges as visited. It then fetches additional candidates adjacent - to these nodes. - - Returns - ------- - Iterable[Document] - The set of documents adjacent to the initial root nodes. - """ - neighborhood_docs = await self.store.aget(self.initial_root_ids) - neighborhood_nodes = self.add_docs(neighborhood_docs) - - # Record the neighborhood nodes (specifically the outgoing edges from the - # neighborhood) as visited. - outgoing_edges = self.visit_nodes(neighborhood_nodes.values()) - - # Fetch the candidates. - return await self._afetch_adjacent(outgoing_edges) - - def _fetch_adjacent(self, outgoing_edges: set[Edge]) -> Iterable[Document]: - """ - Retrieve documents adjacent to the specified outgoing edges. - - This method uses the vector store adapter to fetch documents connected to - the provided edges. - - Parameters - ---------- - outgoing_edges : set[Edge] - The edges whose adjacent documents need to be fetched. - - Returns - ------- - Iterable[Document] - The set of documents adjacent to the specified edges. - """ - return self.store.get_adjacent( - outgoing_edges=outgoing_edges, - query_embedding=self.strategy._query_embedding, - adjacent_k=self.strategy.adjacent_k, - filter=self.metadata_filter, - **self.store_kwargs, - ) - - async def _afetch_adjacent(self, outgoing_edges: set[Edge]) -> Iterable[Document]: - """ - Asynchronously retrieve documents adjacent to the specified outgoing edges. - - This method uses the vector store adapter to fetch documents connected to - the provided edges. - - Parameters - ---------- - outgoing_edges : set[Edge] - The edges whose adjacent documents need to be fetched. - - Returns - ------- - Iterable[Document] - The set of documents adjacent to the specified edges. - """ - return await self.store.aget_adjacent( - outgoing_edges=outgoing_edges, - query_embedding=self.strategy._query_embedding, - adjacent_k=self.strategy.adjacent_k, - filter=self.metadata_filter, - **self.store_kwargs, - ) - - def _doc_to_new_node( - self, doc: Document, *, depth: int | None = None - ) -> Node | None: - """ - Convert a document into a new node for the traversal. - - This method checks whether the document has already been processed. If not, - it creates a new `Node` instance, associates it with the document's metadata, - and calculates its depth based on the incoming edges. - - Parameters - ---------- - doc : Document - The document to convert into a node. - depth : int, optional - The depth of the node. If None, the depth is calculated based on the - incoming edges. - - Returns - ------- - Node | None: - The newly created node, or None if the document has already been - processed. - - Raises - ------ - ValueError - If the document does not have an ID. - """ - if doc.id is None: - raise ValueError("All documents should have ids") - if doc.id in self._node_cache: - return None - - doc = self._doc_cache.setdefault(doc.id, doc) - assert doc.id is not None - - # This is a bit weird. To focus on nodes we want to create it - # before calling the `edge_function`. But that means we don't - # know the depth - node = Node( - id=doc.id, - depth=0, - embedding=doc.metadata[METADATA_EMBEDDING_KEY], - metadata=doc.metadata, - ) - - # Determine incoming/outgoing edges. - edges = self.edge_function(node) - - # Compute the depth - if depth is None: - depth = min( - [ - d - for e in edges.incoming - if (d := self._edge_depths.get(e, None)) is not None - ], - default=0, - ) - - # Now, set the `incoming_edges`, `outgoing_edges` and `depth`. - node.incoming_edges = edges.incoming - node.outgoing_edges = edges.outgoing - node.depth = depth - - self._node_cache[doc.id] = node - - return node - - def add_docs( - self, docs: Iterable[Document], *, depth: int | None = None - ) -> dict[str, Node]: - """ - Add a batch of documents to the traversal and convert them into nodes. - - This method records the depth of new nodes, filters them based on the - strategy's maximum depth, and updates the strategy with the discovered nodes. - - Parameters - ---------- - docs : Iterable[Document] - The documents to add. - depth : int, optional - The depth to assign to the nodes. If None, the depth is inferred - based on the incoming edges. - - Returns - ------- - dict[str, Node] - A dictionary of node IDs to the newly created nodes. - """ - # Record the depth of new nodes. - nodes = { - node.id: node - for doc in docs - if (node := self._doc_to_new_node(doc, depth=depth)) is not None - if ( - self.strategy.max_depth is None or node.depth <= self.strategy.max_depth - ) - } - self.strategy.discover_nodes(nodes) - return nodes - - def visit_nodes(self, nodes: Iterable[Node]) -> set[Edge]: - """ - Mark nodes as visited and return their new outgoing edges. - - This method updates the traversal state by marking the provided nodes as visited - and recording their outgoing edges. Outgoing edges that have not been visited - before are identified and added to the set of edges to explore in subsequent - traversal steps. - - Parameters - ---------- - nodes : Iterable[Node] - The nodes to mark as visited. - - Returns - ------- - set[Edge] - The set of new outgoing edges that need to be explored. - - Notes - ----- - - The `new_outgoing_edges` dictionary tracks the depth of each outgoing - edge. - - If a node's outgoing edge leads to a lower depth, the edge's depth is - updated to reflect the shortest path. - - The `_visited_edges` set is updated to include all outgoing edges - from the provided nodes. - """ - new_outgoing_edges: dict[Edge, int] = {} - for node in nodes: - node_new_outgoing_edges = node.outgoing_edges - self._visited_edges - for edge in node_new_outgoing_edges: - depth = new_outgoing_edges.setdefault(edge, node.depth + 1) - if node.depth + 1 < depth: - new_outgoing_edges[edge] = node.depth + 1 - - self._edge_depths.update(new_outgoing_edges) - - new_outgoing_edge_set = set(new_outgoing_edges.keys()) - self._visited_edges.update(new_outgoing_edge_set) - return new_outgoing_edge_set - - def select_next_edges(self) -> set[Edge] | None: - """ - Select the next set of edges to explore. - - This method uses the traversal strategy to select the next batch of nodes - and identifies new outgoing edges for exploration. - - Returns - ------- - set[Edge] | None - The set of new edges to explore, or None if the traversal is - complete. - """ - remaining = self.strategy.k - len(self._selected_nodes) - - if remaining <= 0: - return None - - next_nodes = self.strategy.select_nodes(limit=remaining) - if not next_nodes: - return None - - next_nodes = [n for n in next_nodes if n.id not in self._selected_nodes] - if len(next_nodes) == 0: - return None - - self._selected_nodes.update({n.id: n for n in next_nodes}) - new_outgoing_edges = self.visit_nodes(next_nodes) - return new_outgoing_edges - - def finish(self) -> list[Document]: - """ - Finalize the traversal and return the final set of documents. - - This method finalizes the selected nodes using the traversal strategy, - processes their metadata, and assembles the final list of documents. - - Returns - ------- - list[Document] - The final set of documents resulting from the traversal. - - Raises - ------ - RuntimeError - If unexpected situations arise, such as a document missing from the cache. - """ - final_nodes = self.strategy.finalize_nodes(self._selected_nodes.values()) - docs = [] - for node in final_nodes: - doc = self._doc_cache.get(node.id, None) - if doc is None: - raise RuntimeError( - f"unexpected, cache should contain doc id: {node.id}" - ) - # Compute new metadata from extra metadata and metadata. - # This allows us to avoid modifying the orginal metadata. - metadata = { - "depth": node.depth, - **node.extra_metadata, - **doc.metadata, - } - # Remove the metadata embedding key. TODO: Find a better way to do this. - metadata.pop(METADATA_EMBEDDING_KEY, None) - docs.append( - Document( - id=node.id, - page_content=doc.page_content, - metadata=metadata, - ) - ) - return docs diff --git a/packages/langchain-graph-retriever/src/langchain_graph_retriever/adapters/__init__.py b/packages/langchain-graph-retriever/src/langchain_graph_retriever/adapters/__init__.py index d4e62b5f..e69de29b 100644 --- a/packages/langchain-graph-retriever/src/langchain_graph_retriever/adapters/__init__.py +++ b/packages/langchain-graph-retriever/src/langchain_graph_retriever/adapters/__init__.py @@ -1,7 +0,0 @@ -from .base import Adapter -from .inference import infer_adapter - -__all__ = [ - "Adapter", - "infer_adapter", -] diff --git a/packages/langchain-graph-retriever/src/langchain_graph_retriever/adapters/astra.py b/packages/langchain-graph-retriever/src/langchain_graph_retriever/adapters/astra.py index 7d871433..644b0938 100644 --- a/packages/langchain-graph-retriever/src/langchain_graph_retriever/adapters/astra.py +++ b/packages/langchain-graph-retriever/src/langchain_graph_retriever/adapters/astra.py @@ -1,196 +1,440 @@ """Provides an adapter for AstraDB vector store integration.""" -from collections.abc import Sequence -from typing import Any +from __future__ import annotations +from collections.abc import AsyncIterable, AsyncIterator, Iterable, Iterator, Sequence +from typing import Any, Literal, cast, overload + +import backoff +from graph_retriever import Content +from graph_retriever.edges import Edge, IdEdge, MetadataEdge +from graph_retriever.utils import merge +from graph_retriever.utils.batched import batched +from graph_retriever.utils.top_k import top_k +from immutabledict import immutabledict from typing_extensions import override try: from langchain_astradb import AstraDBVectorStore + from langchain_astradb.vectorstores import AstraDBQueryResult except (ImportError, ModuleNotFoundError): raise ImportError("please `pip install langchain-astradb`") -from langchain_core.documents import Document -from .base import METADATA_EMBEDDING_KEY, Adapter +try: + import astrapy +except (ImportError, ModuleNotFoundError): + raise ImportError("please `pip install astrapy") +import httpx +from graph_retriever.adapters import Adapter + +_EXCEPTIONS_TO_RETRY = ( + httpx.TransportError, + astrapy.exceptions.DataAPIException, +) +_MAX_RETRIES = 3 + + +def _extract_queries(edges: set[Edge]) -> tuple[dict[str, Iterable[Any]], set[str]]: + metadata: dict[str, set[Any]] = {} + ids: set[str] = set() + for edge in edges: + if isinstance(edge, MetadataEdge): + metadata.setdefault(edge.incoming_field, set()).add(edge.value) + elif isinstance(edge, IdEdge): + ids.add(edge.id) + else: + raise ValueError(f"Unsupported edge {edge}") -class AstraAdapter(Adapter[AstraDBVectorStore]): + return (cast(dict[str, Iterable[Any]], metadata), ids) + + +def _metadata_queries( + user_filters: dict[str, Any] | None, + metadata: dict[str, Iterable[Any]] = {}, +) -> Iterator[dict[str, Any]]: """ - Adapter for AstraDBVectorStore. + Generate queries for matching all user_filters and any `metadata`. + + The results of the queries can be merged to produce the results. - This adapter provides DataStax AstraDB support for the graph retriever - system, enabling similarity search and document retrieval. + Results will match at least one metadata value in one of the metadata fields. - It supports normalized metadata (collections of values) without - denormalization. + Results will also match all of the `user_filters`. Parameters ---------- - vector_store : AstraDBVectorStore - The AstraDB vector store instance. + user_filters : + User filters that all results must match. + metadata : + An item matches the queries if it matches all user filters, and + there exists a `key` such that `metadata[key]` has a non-empty + intersection with the actual values of `item.metadata[key]`. + + Yields + ------ + : + Queries corresponding to `user_filters AND metadata`. """ + if user_filters: + + def with_user_filters(filter: dict[str, Any]) -> dict[str, Any]: + return {"$and": [filter, user_filters]} + else: + + def with_user_filters(filter: dict[str, Any]) -> dict[str, Any]: + return filter + + def process_value(v: Any) -> Any: + if isinstance(v, immutabledict): + return dict(v) + else: + return v + + for k, v in metadata.items(): + for v_batch in batched(v, 100): + batch = [process_value(v) for v in v_batch] + if isinstance(batch[0], dict): + if len(batch) == 1: + yield with_user_filters({k: {"$all": [batch[0]]}}) + else: + yield with_user_filters( + {"$or": [{k: {"$all": [v]}} for v in batch]} + ) + else: + if len(batch) == 1: + yield (with_user_filters({k: batch[0]})) + else: + yield (with_user_filters({k: {"$in": batch}})) + + +async def empty_async_iterable() -> AsyncIterable[AstraDBQueryResult]: + """Create an empty async iterable.""" + if False: + yield + + +class AstraAdapter(Adapter): + """ + Adapter for the [AstraDB](https://www.datastax.com/products/datastax-astra) vector store. + + This class integrates the LangChain AstraDB vector store with the graph + retriever system, providing functionality for similarity search and document + retrieval. + + Parameters + ---------- + vector_store : + The AstraDB vector store instance. + """ # noqa: E501 + + def __init__(self, vector_store: AstraDBVectorStore) -> None: + self.vector_store = vector_store.copy( + component_name="langchain_graph_retriever" + ) + + def _build_content(self, result: AstraDBQueryResult) -> Content: + assert result.embedding is not None + return Content( + id=result.id, + content=result.document.page_content, + metadata=result.document.metadata, + embedding=result.embedding, + ) - def _build_docs( - self, docs_with_embeddings: list[tuple[Document, list[float]]] - ) -> list[Document]: - docs: list[Document] = [] - for doc, embedding in docs_with_embeddings: - doc.metadata[METADATA_EMBEDDING_KEY] = embedding - docs.append(doc) - return docs + def _build_content_iter( + self, results: Iterable[AstraDBQueryResult] + ) -> Iterable[Content]: + for result in results: + yield self._build_content(result) + + async def _abuild_content_iter( + self, results: AsyncIterable[AstraDBQueryResult] + ) -> AsyncIterable[Content]: + async for result in results: + yield self._build_content(result) + + @overload + def _run_query( + self, + *, + n: int, + include_sort_vector: Literal[False] = False, + ids: list[str] | None = None, + filter: dict[str, Any] | None = None, + sort: dict[str, Any] | None = None, + ) -> Iterable[Content]: ... + + @overload + def _run_query( + self, + *, + n: int, + include_sort_vector: Literal[True], + ids: list[str] | None = None, + filter: dict[str, Any] | None = None, + sort: dict[str, Any] | None = None, + ) -> tuple[list[float], Iterable[Content]]: ... + + @backoff.on_exception(backoff.expo, _EXCEPTIONS_TO_RETRY, max_tries=_MAX_RETRIES) + def _run_query( + self, + *, + n: int, + ids: list[str] | None = None, + filter: dict[str, Any] | None = None, # noqa: A002 + sort: dict[str, Any] | None = None, + include_sort_vector: bool = False, + ) -> tuple[list[float], Iterable[Content]] | Iterable[Content]: + if include_sort_vector: + # Work around the fact that `k == 0` is rejected by Astra. + # AstraDBVectorStore has a similar work around for non-vectorize path, but + # we want it to apply in both cases. + query_n = n if n > 0 else 1 + + query_embedding, results = self.vector_store.run_query( + n=query_n, + ids=ids, + filter=filter, + sort=sort, + include_sort_vector=True, + include_embeddings=True, + include_similarity=False, + ) + assert query_embedding is not None + if n == 0: + return query_embedding, self._build_content_iter([]) + return query_embedding, self._build_content_iter(results) + else: + results = self.vector_store.run_query( + n=n, + ids=ids, + filter=filter, + sort=sort, + include_sort_vector=False, + include_embeddings=True, + include_similarity=False, + ) + return self._build_content_iter(results) + + @overload + async def _arun_query( + self, + *, + n: int, + include_sort_vector: Literal[False] = False, + ids: list[str] | None = None, + filter: dict[str, Any] | None = None, + sort: dict[str, Any] | None = None, + ) -> AsyncIterable[Content]: ... + + @overload + async def _arun_query( + self, + *, + n: int, + include_sort_vector: Literal[True], + ids: list[str] | None = None, + filter: dict[str, Any] | None = None, + sort: dict[str, Any] | None = None, + ) -> tuple[list[float], AsyncIterable[Content]]: ... + + @backoff.on_exception(backoff.expo, _EXCEPTIONS_TO_RETRY, max_tries=_MAX_RETRIES) + async def _arun_query( + self, + *, + n: int, + ids: list[str] | None = None, + filter: dict[str, Any] | None = None, # noqa: A002 + sort: dict[str, Any] | None = None, + include_sort_vector: bool = False, + ) -> tuple[list[float], AsyncIterable[Content]] | AsyncIterable[Content]: + if include_sort_vector: + # Work around the fact that `k == 0` is rejected by Astra. + # AstraDBVectorStore has a similar work around for non-vectorize path, but + # we want it to apply in both cases. + query_n = n if n > 0 else 1 + + query_embedding, results = await self.vector_store.arun_query( + n=query_n, + ids=ids, + filter=filter, + sort=sort, + include_sort_vector=True, + include_embeddings=True, + include_similarity=False, + ) + assert query_embedding is not None + if n == 0: + return query_embedding, self._abuild_content_iter( + empty_async_iterable() + ) + return query_embedding, self._abuild_content_iter(results) + else: + results = await self.vector_store.arun_query( + n=n, + ids=ids, + filter=filter, + sort=sort, + include_sort_vector=False, + include_embeddings=True, + include_similarity=False, + ) + return self._abuild_content_iter(results) + + def _vector_sort_from_embedding( + self, + embedding: list[float], + ) -> dict[str, Any]: + return self.vector_store.document_codec.encode_vector_sort(vector=embedding) + + def _get_sort_and_optional_embedding( + self, query: str, k: int + ) -> tuple[None | list[float], dict[str, Any] | None]: + if self.vector_store.document_codec.server_side_embeddings: + sort = self.vector_store.document_codec.encode_vectorize_sort(query) + return None, sort + else: + embedding = self.vector_store._get_safe_embedding().embed_query(query) + if k == 0: + return embedding, None # signal that we should short-circuit + sort = self._vector_sort_from_embedding(embedding) + return embedding, sort @override - def similarity_search_with_embedding( + def search_with_embedding( self, query: str, k: int = 4, filter: dict[str, str] | None = None, **kwargs: Any, - ) -> tuple[list[float], list[Document]]: - query_embedding, docs_with_embeddings = ( - self.vector_store.similarity_search_with_embedding( - query=query, - k=k, - filter=filter, - **kwargs, - ) - ) - return query_embedding, self._build_docs( - docs_with_embeddings=docs_with_embeddings + ) -> tuple[list[float], list[Content]]: + query_embedding, sort = self._get_sort_and_optional_embedding(query, k) + if sort is None and query_embedding is not None: + return query_embedding, [] + + query_embedding, results = self._run_query( + n=k, filter=filter, sort=sort, include_sort_vector=True ) + return query_embedding, list(results) @override - async def asimilarity_search_with_embedding( + async def asearch_with_embedding( self, query: str, k: int = 4, filter: dict[str, str] | None = None, **kwargs: Any, - ) -> tuple[list[float], list[Document]]: - ( - query_embedding, - docs_with_embeddings, - ) = await self.vector_store.asimilarity_search_with_embedding( - query=query, - k=k, - filter=filter, - **kwargs, - ) - return query_embedding, self._build_docs( - docs_with_embeddings=docs_with_embeddings + ) -> tuple[list[float], list[Content]]: + query_embedding, sort = self._get_sort_and_optional_embedding(query, k) + if sort is None and query_embedding is not None: + return query_embedding, [] + + query_embedding, results = await self._arun_query( + n=k, filter=filter, sort=sort, include_sort_vector=True ) + return query_embedding, [r async for r in results] @override - def _similarity_search_with_embedding_by_vector( + def search( self, embedding: list[float], k: int = 4, filter: dict[str, str] | None = None, **kwargs: Any, - ) -> list[Document]: - docs_with_embeddings = ( - self.vector_store.similarity_search_with_embedding_by_vector( - embedding=embedding, - k=k, - filter=filter, - **kwargs, - ) - ) - return self._build_docs(docs_with_embeddings=docs_with_embeddings) + ) -> list[Content]: + if k == 0: + return [] + sort = self._vector_sort_from_embedding(embedding) + results = self._run_query(n=k, filter=filter, sort=sort) + return list(results) @override - async def _asimilarity_search_with_embedding_by_vector( + async def asearch( self, embedding: list[float], k: int = 4, filter: dict[str, str] | None = None, **kwargs: Any, - ) -> list[Document]: - docs_with_embeddings = ( - await self.vector_store.asimilarity_search_with_embedding_by_vector( - embedding=embedding, - k=k, - filter=filter, - **kwargs, - ) - ) - return self._build_docs(docs_with_embeddings=docs_with_embeddings) + ) -> list[Content]: + if k == 0: + return [] + sort = self._vector_sort_from_embedding(embedding) + results = await self._arun_query(n=k, filter=filter, sort=sort) + return [r async for r in results] @override - def _get(self, ids: Sequence[str], /, **kwargs: Any) -> list[Document]: - docs: list[Document] = [] - for id in ids: - doc = self._get_by_id_with_embedding(id) - if doc is not None: - docs.append(doc) - return docs - - def _get_by_id_with_embedding(self, document_id: str) -> Document | None: - """ - Retrieve a document by its ID, including its embedding. - - Parameters - ---------- - document_id : str - The document ID. - - Returns - ------- - Document | None - The retrieved document with embedding, or `None` if not found. - """ - self.vector_store.astra_env.ensure_db_setup() - - hit = self.vector_store.astra_env.collection.find_one( - {"_id": document_id}, - projection=self.vector_store.document_codec.full_projection, - ) - if hit is None: - return None - document = self.vector_store.document_codec.decode(hit) - if document is None: - return None - document.metadata[METADATA_EMBEDDING_KEY] = ( - self.vector_store.document_codec.decode_vector(hit) - ) - return document + def get( + self, ids: Sequence[str], filter: dict[str, Any] | None = None, **kwargs: Any + ) -> list[Content]: + results = self._run_query(n=len(ids), ids=list(ids), filter=filter) + return list(results) @override - async def _aget(self, ids: Sequence[str], /, **kwargs: Any) -> list[Document]: - docs: list[Document] = [] - # TODO: Do this asynchronously? - for id in ids: - doc = await self._aget_by_id_with_embedding(id) - if doc is not None: - docs.append(doc) - return docs - - async def _aget_by_id_with_embedding(self, document_id: str) -> Document | None: - """ - Asynchronously retrieve a document by its ID, including its embedding. - - Parameters - ---------- - document_id : str - The document ID. - - Returns - ------- - Document | None - The retrieved document with embedding, or `None` if not found. - """ - await self.vector_store.astra_env.aensure_db_setup() - - hit = await self.vector_store.astra_env.async_collection.find_one( - {"_id": document_id}, - projection=self.vector_store.document_codec.full_projection, - ) - if hit is None: - return None - document = self.vector_store.document_codec.decode(hit) - if document is None: - return None - document.metadata[METADATA_EMBEDDING_KEY] = ( - self.vector_store.document_codec.decode_vector(hit) - ) - return document + async def aget( + self, ids: Sequence[str], filter: dict[str, Any] | None = None, **kwargs: Any + ) -> list[Content]: + results = await self._arun_query(n=len(ids), ids=list(ids), filter=filter) + return [r async for r in results] + + @override + def adjacent( + self, + edges: set[Edge], + query_embedding: list[float], + k: int, + filter: dict[str, Any] | None, + **kwargs: Any, + ) -> Iterable[Content]: + sort = self._vector_sort_from_embedding(query_embedding) + metadata, ids = _extract_queries(edges) + + metadata_queries = _metadata_queries(user_filters=filter, metadata=metadata) + + results: dict[str, Content] = {} + for metadata_query in metadata_queries: + # TODO: Look at a thread-pool for this. + for result in self._run_query(n=k, filter=metadata_query, sort=sort): + results[result.id] = result + + for id_batch in batched(ids, 100): + for result in self._run_query( + n=k, ids=list(id_batch), filter=filter, sort=sort + ): + results[result.id] = result + + return top_k(results.values(), embedding=query_embedding, k=k) + + @override + async def aadjacent( + self, + edges: set[Edge], + query_embedding: list[float], + k: int, + filter: dict[str, Any] | None, + **kwargs: Any, + ) -> Iterable[Content]: + sort = self._vector_sort_from_embedding(query_embedding) + metadata, ids = _extract_queries(edges) + + metadata_queries = _metadata_queries(user_filters=filter, metadata=metadata) + + iterables = [] + for metadata_query in metadata_queries: + iterables.append( + await self._arun_query(n=k, filter=metadata_query, sort=sort) + ) + for id_batch in batched(ids, 100): + iterables.append( + await self._arun_query( + n=k, ids=list(id_batch), filter=filter, sort=sort + ) + ) + + iterators: list[AsyncIterator[Content]] = [it.__aiter__() for it in iterables] + + results: dict[str, Content] = {} + async for result in merge.amerge(*iterators): + results[result.id] = result + + return top_k(results.values(), embedding=query_embedding, k=k) diff --git a/packages/langchain-graph-retriever/src/langchain_graph_retriever/adapters/base.py b/packages/langchain-graph-retriever/src/langchain_graph_retriever/adapters/base.py deleted file mode 100644 index 838278e3..00000000 --- a/packages/langchain-graph-retriever/src/langchain_graph_retriever/adapters/base.py +++ /dev/null @@ -1,681 +0,0 @@ -"""Defines the base class for vector store adapters.""" - -import abc -import asyncio -from collections.abc import Iterable, Sequence -from typing import Any, Generic, TypeVar - -from langchain_core.documents import Document -from langchain_core.embeddings import Embeddings -from langchain_core.runnables import run_in_executor -from langchain_core.vectorstores import VectorStore -from typing_extensions import ( - override, -) - -from langchain_graph_retriever.document_transformers.metadata_denormalizer import ( - MetadataDenormalizer, -) -from langchain_graph_retriever.types import Edge, IdEdge, MetadataEdge - -StoreT = TypeVar("StoreT", bound=VectorStore) - -METADATA_EMBEDDING_KEY = "__embedding" - - -class Adapter(Generic[StoreT], abc.ABC): - """ - Base adapter for integrating vector stores with the graph retriever system. - - This class provides a foundation for custom adapters, enabling consistent - interaction with various vector store implementations. - - Parameters - ---------- - vector_store : T - The vector store instance. - """ - - def __init__( - self, - vector_store: StoreT, - ): - """ - Initialize the base adapter. - - Parameters - ---------- - vector_store : T - The vector store instance. - """ - self.vector_store = vector_store - - @property - def _safe_embedding(self) -> Embeddings: - if not self.vector_store.embeddings: - msg = "Missing embedding" - raise ValueError(msg) - return self.vector_store.embeddings - - def similarity_search_with_embedding( - self, - query: str, - k: int = 4, - filter: dict[str, str] | None = None, - **kwargs: Any, - ) -> tuple[list[float], list[Document]]: - """ - Return docs (with embeddings) most similar to the query. - - Also returns the embedded query vector. - - Parameters - ---------- - query : str - Input text. - k : int, default 4 - Number of Documents to return. - filter : dict[str, str], optional - Filter on the metadata to apply. - **kwargs : Any - Additional keyword arguments. - - Returns - ------- - query_embedding : list[float] - The embedded query vector - documents : list[Document] - List of up to `k` documents most similar to the query - vector. - - Documents should have their embedding added to the - metadata under the `METADATA_EMBEDDING_KEY` key. - """ - query_embedding = self._safe_embedding.embed_query(text=query) - docs = self.similarity_search_with_embedding_by_vector( - embedding=query_embedding, - k=k, - filter=filter, - **kwargs, - ) - return query_embedding, docs - - async def asimilarity_search_with_embedding( - self, - query: str, - k: int = 4, - filter: dict[str, str] | None = None, - **kwargs: Any, - ) -> tuple[list[float], list[Document]]: - """ - Asynchronously return docs (with embeddings) most similar to the query. - - Also returns the embedded query vector. - - Parameters - ---------- - query : str - Input text. - k : int, default 4 - Number of Documents to return. - filter : dict[str, str], optional - Filter on the metadata to apply. - **kwargs : Any - Additional keyword arguments. - - Returns - ------- - query_embedding : list[float] - The embedded query vector - documents : list[Document] - List of up to `k` documents most similar to the query - vector. - - Documents should have their embedding added to the - metadata under the `METADATA_EMBEDDING_KEY` key. - """ - return await run_in_executor( - None, self.similarity_search_with_embedding, query, k, filter, **kwargs - ) - - def update_filter_hook( - self, filter: dict[str, str] | None - ) -> dict[str, str] | None: - """ - Update the metadata filter before executing the query. - - Parameters - ---------- - filter : dict[str, str], optional - Filter on the metadata to update. - - Returns - ------- - dict[str, str] | None - The updated filter on the metadata to apply. - """ - return filter - - def format_documents_hook(self, docs: list[Document]) -> list[Document]: - """ - Format the documents filter after executing the query. - - Parameters - ---------- - docs : list[Document] - The documents returned from the vector store - - Returns - ------- - list[Document] - The formatted documents - """ - return docs - - @abc.abstractmethod - def _similarity_search_with_embedding_by_vector( - self, - embedding: list[float], - k: int = 4, - filter: dict[str, str] | None = None, - **kwargs: Any, - ) -> list[Document]: - """ - Return docs (with embeddings) most similar to the query vector. - - Parameters - ---------- - embedding : list[float] - Embedding to look up documents similar to. - k : int, default 4 - Number of Documents to return. - filter : dict[str, str], optional - Filter on the metadata to apply. - **kwargs : Any - Additional keyword arguments. - - Returns - ------- - list[Document] - List of Documents most similar to the query vector. - - Documents should have their embedding added to the - metadata under the `METADATA_EMBEDDING_KEY` key. - """ - - def similarity_search_with_embedding_by_vector( - self, - embedding: list[float], - k: int = 4, - filter: dict[str, str] | None = None, - **kwargs: Any, - ) -> list[Document]: - """ - Return docs (with embeddings) most similar to the query vector. - - Parameters - ---------- - embedding : list[float] - Embedding to look up documents similar to. - k : int, default 4 - Number of Documents to return. - filter : dict[str, str], optional - Filter on the metadata to apply. - **kwargs : Any - Additional keyword arguments. - - Returns - ------- - list[Document] - List of Documents most similar to the query vector. - - Documents should have their embedding added to the - metadata under the `METADATA_EMBEDDING_KEY` key. - """ - docs = self._similarity_search_with_embedding_by_vector( - embedding=embedding, - k=k, - filter=self.update_filter_hook(filter), - **kwargs, - ) - return self.format_documents_hook(docs) - - async def _asimilarity_search_with_embedding_by_vector( - self, - embedding: list[float], - k: int = 4, - filter: dict[str, str] | None = None, - **kwargs: Any, - ) -> list[Document]: - """ - Asynchronously return docs (with embeddings) most similar to the query vector. - - Parameters - ---------- - embedding : list[float] - Embedding to look up documents similar to. - k : int, default 4 - Number of Documents to return. - filter : dict[str, str], optional - Filter on the metadata to apply. - **kwargs : Any - Additional keyword arguments. - - Returns - ------- - list[Document] - List of Documents most similar to the query vector. - - Documents should have their embedding added to the - metadata under the `METADATA_EMBEDDING_KEY` key. - """ - return await run_in_executor( - None, - self._similarity_search_with_embedding_by_vector, - embedding, - k, - filter, - **kwargs, - ) - - async def asimilarity_search_with_embedding_by_vector( - self, - embedding: list[float], - k: int = 4, - filter: dict[str, str] | None = None, - **kwargs: Any, - ) -> list[Document]: - """ - Asynchronously return docs (with embeddings) most similar to the query vector. - - Parameters - ---------- - embedding : list[float] - Embedding to look up documents similar to. - k : int, default 4 - Number of Documents to return. - filter : dict[str, str], optional - Filter on the metadata to apply. - **kwargs : Any - Additional keyword arguments. - - Returns - ------- - list[Document] - List of Documents most similar to the query vector. - - Documents should have their embedding added to the - metadata under the `METADATA_EMBEDDING_KEY` key. - """ - docs = await self._asimilarity_search_with_embedding_by_vector( - embedding=embedding, - k=k, - filter=self.update_filter_hook(filter), - **kwargs, - ) - return self.format_documents_hook(docs) - - def _remove_duplicates(self, ids: Sequence[str]) -> list[str]: - """ - Remove duplicate ids while preserving order. - - Parameters - ---------- - ids : Sequence[str] - List of IDs to get. - - Returns - ------- - Sequence[str] - List of IDs with duplicates removed - """ - seen = set() - return [id_ for id_ in ids if id_ not in seen and not seen.add(id_)] # type: ignore - - def get( - self, - ids: Sequence[str], - /, - **kwargs: Any, - ) -> list[Document]: - """ - Get documents by id. - - Fewer documents may be returned than requested if some IDs are not found - or if there are duplicated IDs. This method should **NOT** raise - exceptions if no documents are found for some IDs. - - Users should not assume that the order of the returned documents matches - the order of the input IDs. Instead, users should rely on the ID field - of the returned documents. - - Parameters - ---------- - ids : Sequence[str] - List of IDs to get. - **kwargs : Any - Additional keyword arguments. These are up to the implementation. - - Returns - ------- - list[Document] - List of documents that were found. - """ - docs = self._get(self._remove_duplicates(ids), **kwargs) - return self.format_documents_hook(docs) - - @abc.abstractmethod - def _get( - self, - ids: Sequence[str], - /, - **kwargs: Any, - ) -> list[Document]: - """ - Get documents by id. - - Fewer documents may be returned than requested if some IDs are not found - or if there are duplicated IDs. This method should **NOT** raise - exceptions if no documents are found for some IDs. - - Users should not assume that the order of the returned documents matches - the order of the input IDs. Instead, users should rely on the ID field - of the returned documents. - - Parameters - ---------- - ids : Sequence[str] - List of IDs to get. - **kwargs : Any - Additional keyword arguments. These are up to the implementation. - - Returns - ------- - list[Document] - List of documents that were found. - """ - - async def aget( - self, - ids: Sequence[str], - /, - **kwargs: Any, - ) -> list[Document]: - """ - Asynchronously get documents by id. - - Fewer documents may be returned than requested if some IDs are not found - or if there are duplicated IDs. This method should **NOT** raise - exceptions if no documents are found for some IDs. - - Users should not assume that the order of the returned documents matches - the order of the input IDs. Instead, users should rely on the ID field - of the returned documents. - - Parameters - ---------- - ids : Sequence[str] - List of IDs to get. - **kwargs : Any - Additional keyword arguments. These are up to the implementation. - - Returns - ------- - list[Document] - List of documents that were found. - """ - docs = await self._aget(self._remove_duplicates(ids), **kwargs) - return self.format_documents_hook(docs) - - async def _aget( - self, - ids: Sequence[str], - /, - **kwargs: Any, - ) -> list[Document]: - """ - Asynchronously get documents by id. - - Fewer documents may be returned than requested if some IDs are not found - or if there are duplicated IDs. This method should **NOT** raise - exceptions if no documents are found for some IDs. - - Users should not assume that the order of the returned documents matches - the order of the input IDs. Instead, users should rely on the ID field - of the returned documents. - - Parameters - ---------- - ids : Sequence[str] - List of IDs to get. - **kwargs : Any - Additional keyword arguments. These are up to the implementation. - - Returns - ------- - list[Document] - List of documents that were found. - """ - return await run_in_executor( - None, - self._get, - ids, - **kwargs, - ) - - def get_adjacent( - self, - outgoing_edges: set[Edge], - query_embedding: list[float], - adjacent_k: int, - filter: dict[str, Any] | None, - **kwargs: Any, - ) -> Iterable[Document]: - """ - Return the docs with incoming edges from any of the given edges. - - Parameters - ---------- - outgoing_edges : set[Edge] - The edges to look for. - query_embedding : list[float] - The query embedding used for selecting the most relevant nodes. - adjacent_k : int - The numebr of relevant nodes to select for the edges. - strategy : Strategy - The traversal strategy being used. - filter : dict[str, Any], optional - Optional metadata to filter the results. - **kwargs : Any - Keyword arguments to pass to the similarity search. - - Returns - ------- - Iterable[Document] - Iterable of adjacent nodes. - - Raises - ------ - ValueError - If unsupported edge types are encountered. - """ - results: list[Document] = [] - - ids = [] - for outgoing_edge in outgoing_edges: - if isinstance(outgoing_edge, MetadataEdge): - docs = self.similarity_search_with_embedding_by_vector( - embedding=query_embedding, - k=adjacent_k, - filter=self._get_metadata_filter( - base_filter=filter, edge=outgoing_edge - ), - **kwargs, - ) - results.extend(docs) - elif isinstance(outgoing_edge, IdEdge): - ids.append(outgoing_edge.id) - else: - raise ValueError(f"Unsupported edge: {outgoing_edge}") - - if ids: - results.extend(self.get(ids)) - return results - - async def aget_adjacent( - self, - outgoing_edges: set[Edge], - query_embedding: list[float], - adjacent_k: int, - filter: dict[str, Any] | None, - **kwargs: Any, - ) -> Iterable[Document]: - """ - Asynchronously return the docs with incoming edges from any of the given edges. - - Parameters - ---------- - outgoing_edges : set[Edge] - The edges to look for. - query_embedding : list[float] - The query embedding used for selecting the most relevant nodes. - adjacent_k : int - The numebr of relevant nodes to select for the edges. - filter : dict[str, Any], optional - Optional metadata to filter the results. - **kwargs : Any - Keyword arguments to pass to the similarity search. - - Returns - ------- - Iterable[Document] - Iterable of adjacent nodes. - - Raises - ------ - ValueError - If unsupported edge types are encountered. - """ - tasks = [] - ids = [] - for outgoing_edge in outgoing_edges: - if isinstance(outgoing_edge, MetadataEdge): - tasks.append( - self.asimilarity_search_with_embedding_by_vector( - embedding=query_embedding, - k=adjacent_k, - filter=self._get_metadata_filter( - base_filter=filter, edge=outgoing_edge - ), - **kwargs, - ) - ) - elif isinstance(outgoing_edge, IdEdge): - ids.append(outgoing_edge.id) - else: - raise ValueError(f"Unsupported edge: {outgoing_edge}") - - if ids: - tasks.append(self.aget(ids)) - - results: list[Document] = [] - for completed_task in asyncio.as_completed(tasks): - docs = await completed_task - results.extend(docs) - return results - - def _get_metadata_filter( - self, - base_filter: dict[str, Any] | None = None, - edge: Edge | None = None, - ) -> dict[str, Any]: - """ - Return a filter for the `base_filter` and incoming edges from `edge`. - - Parameters - ---------- - base_filter : dict[str, Any] - Any base metadata filter that should be used for search. - Generally corresponds to the user specified filters for the entire - traversal. Should be combined with the filters necessary to support - nodes with an *incoming* edge matching `edge`. - edge : Edge, optional - An optional edge which should be added to the filter. - - Returns - ------- - dict[str, Any] - The metadata dictionary to use for the given filter. - """ - metadata_filter = {**(base_filter or {})} - assert isinstance(edge, MetadataEdge) - if edge is None: - metadata_filter - else: - metadata_filter[edge.incoming_field] = edge.value - return metadata_filter - - -class DenormalizedAdapter(Adapter[StoreT]): - """ - Base adapter for integrating vector stores with the graph retriever system. - - This class provides a foundation for custom adapters, enabling consistent - interaction with various vector store implementations that do not support - searching on list-based metadata values. - - Parameters - ---------- - vector_store : T - The vector store instance. - metadata_denormalizer: MetadataDenormalizer | None - (Optional) An instance of the MetadataDenormalizer used for doc insertion. - If not passed then a default instance of MetadataDenormalizer is used. - """ - - def __init__( - self, - vector_store: StoreT, - metadata_denormalizer: MetadataDenormalizer | None = None, - nested_metadata_fields: set[str] = set(), - ): - """ - Initialize the base adapter. - - Parameters - ---------- - vector_store : T - The vector store instance. - metadata_denormalizer: MetadataDenormalizer | None - (Optional) An instance of the MetadataDenormalizer used for doc insertion. - If not passed then a default instance of MetadataDenormalizer is used. - nested_metadata_fields: set[str] - The set of metadata fields that contain nested values. - """ - super().__init__(vector_store=vector_store) - self.metadata_denormalizer = ( - MetadataDenormalizer() - if metadata_denormalizer is None - else metadata_denormalizer - ) - self.nested_metadata_fields = nested_metadata_fields - - @override - def update_filter_hook( - self, filter: dict[str, str] | None - ) -> dict[str, str] | None: - if filter is None: - return None - denormalized_filter = {} - for key, value in filter.items(): - if key in self.nested_metadata_fields: - denormalized_filter[ - self.metadata_denormalizer.denormalized_key(key, value) - ] = self.metadata_denormalizer.denormalized_value() - else: - denormalized_filter[key] = value - return denormalized_filter - - @override - def format_documents_hook(self, docs: list[Document]) -> list[Document]: - return list(self.metadata_denormalizer.revert_documents(documents=docs)) diff --git a/packages/langchain-graph-retriever/src/langchain_graph_retriever/adapters/cassandra.py b/packages/langchain-graph-retriever/src/langchain_graph_retriever/adapters/cassandra.py index 044851f4..5cc5acd4 100644 --- a/packages/langchain-graph-retriever/src/langchain_graph_retriever/adapters/cassandra.py +++ b/packages/langchain-graph-retriever/src/langchain_graph_retriever/adapters/cassandra.py @@ -6,33 +6,35 @@ from typing_extensions import override try: - from langchain_community.vectorstores import Cassandra + from langchain_community.vectorstores.cassandra import Cassandra except (ImportError, ModuleNotFoundError): raise ImportError("please `pip install langchain-community cassio`") from langchain_core.documents import Document -from .base import METADATA_EMBEDDING_KEY, DenormalizedAdapter +from langchain_graph_retriever._conversion import METADATA_EMBEDDING_KEY +from langchain_graph_retriever.adapters.langchain import ShreddedLangchainAdapter -class CassandraAdapter(DenormalizedAdapter[Cassandra]): +class CassandraAdapter(ShreddedLangchainAdapter[Cassandra]): """ - Adapter for Cassandra vector store. + Adapter for the [Apache Cassandra](https://cassandra.apache.org/) vector store. - This class integrates the Cassandra vector store with the graph retriever system, - providing functionality for similarity search and document retrieval. + This class integrates the LangChain Cassandra vector store with the graph + retriever system, providing functionality for similarity search and document + retrieval. Parameters ---------- - vector_store : Cassandra + vector_store : The Cassandra vector store instance. - metadata_denormalizer: MetadataDenormalizer | None - (Optional) An instance of the MetadataDenormalizer used for doc insertion. - If not passed then a default instance of MetadataDenormalizer is used. + shredder: ShreddingTransformer, optional + An instance of the ShreddingTransformer used for doc insertion. + If not passed then a default instance of ShreddingTransformer is used. """ @override - def _similarity_search_with_embedding_by_vector( # type: ignore + def _search( # type: ignore self, embedding: list[float], k: int = 4, @@ -83,27 +85,7 @@ def _similarity_search_with_embedding_id_by_vector( ] @override - async def asimilarity_search_with_embedding( - self, - query: str, - k: int = 4, - filter: dict[str, str] | None = None, - **kwargs: Any, - ) -> tuple[list[float], list[Document]]: - query_embedding = self._safe_embedding.embed_query(text=query) - if k == 0: - return query_embedding, [] - - docs = await self.asimilarity_search_with_embedding_by_vector( - embedding=query_embedding, - k=k, - filter=filter, - **kwargs, - ) - return query_embedding, docs - - @override - async def _asimilarity_search_with_embedding_by_vector( # type: ignore + async def _asearch( # type: ignore self, **kwargs: Any ) -> list[Document]: results = ( @@ -119,19 +101,31 @@ async def _asimilarity_search_with_embedding_by_vector( # type: ignore return docs @override - def _get(self, ids: Sequence[str], /, **kwargs: Any) -> list[Document]: + def _get( + self, ids: Sequence[str], filter: dict[str, Any] | None = None, **kwargs: Any + ) -> list[Document]: + filter = self.update_filter_hook(filter) docs: list[Document] = [] for id in ids: - row = self.vector_store.table.get(row_id=id) + args: dict[str, Any] = {"row_id": id} + if filter: + args["metadata"] = filter + row = self.vector_store.table.get(**args) if row is not None: docs.append(self._row_to_doc(row)) return docs @override - async def _aget(self, ids: Sequence[str], /, **kwargs: Any) -> list[Document]: + async def _aget( + self, ids: Sequence[str], filter: dict[str, Any] | None = None, **kwargs: Any + ) -> list[Document]: + filter = self.update_filter_hook(filter) docs: list[Document] = [] for id in ids: - row = await self.vector_store.table.aget(row_id=id) + args: dict[str, Any] = {"row_id": id} + if filter: + args["metadata"] = filter + row = await self.vector_store.table.aget(**args) if row is not None: docs.append(self._row_to_doc(row)) return docs diff --git a/packages/langchain-graph-retriever/src/langchain_graph_retriever/adapters/chroma.py b/packages/langchain-graph-retriever/src/langchain_graph_retriever/adapters/chroma.py index 358f750c..8c436079 100644 --- a/packages/langchain-graph-retriever/src/langchain_graph_retriever/adapters/chroma.py +++ b/packages/langchain-graph-retriever/src/langchain_graph_retriever/adapters/chroma.py @@ -6,7 +6,8 @@ from langchain_core.documents import Document from typing_extensions import override -from .base import METADATA_EMBEDDING_KEY, DenormalizedAdapter +from langchain_graph_retriever._conversion import METADATA_EMBEDDING_KEY +from langchain_graph_retriever.adapters.langchain import ShreddedLangchainAdapter try: from langchain_chroma import Chroma @@ -15,24 +16,35 @@ raise ImportError(msg) -class ChromaAdapter(DenormalizedAdapter[Chroma]): +class ChromaAdapter(ShreddedLangchainAdapter[Chroma]): """ - Adapter for Chroma vector store. + Adapter for [Chroma](https://www.trychroma.com/) vector store. - This adapter integrates the Chroma vector store with the graph retriever system, - allowing for similarity search and document retrieval. + This adapter integrates the LangChain Chroma vector store with the + graph retriever system, allowing for similarity search and document retrieval. Parameters ---------- - vector_store : Chroma + vector_store : The Chroma vector store instance. - metadata_denormalizer: MetadataDenormalizer | None - (Optional) An instance of the MetadataDenormalizer used for doc insertion. - If not passed then a default instance of MetadataDenormalizer is used. + shredder: ShreddingTransformer, optional + An instance of the ShreddingTransformer used for doc insertion. + If not passed then a default instance of ShreddingTransformer is used. """ @override - def _similarity_search_with_embedding_by_vector( + def update_filter_hook( + self, filter: dict[str, Any] | None + ) -> dict[str, Any] | None: + filter = super().update_filter_hook(filter) + if not filter or len(filter) <= 1: + return filter + + conjoined = [{k: v} for k, v in filter.items()] + return {"$and": conjoined} + + @override + def _search( self, embedding: list[float], k: int = 4, @@ -83,10 +95,13 @@ def _similarity_search_with_embedding_by_vector( return docs @override - def _get(self, ids: Sequence[str], /, **kwargs: Any) -> list[Document]: + def _get( + self, ids: Sequence[str], filter: dict[str, Any] | None = None, **kwargs: Any + ) -> list[Document]: results = self.vector_store.get( ids=list(ids), include=["embeddings", "metadatas", "documents"], + where=self.update_filter_hook(filter), **kwargs, ) docs = [ diff --git a/packages/langchain-graph-retriever/src/langchain_graph_retriever/adapters/in_memory.py b/packages/langchain-graph-retriever/src/langchain_graph_retriever/adapters/in_memory.py index 73666d38..f7af1d4b 100644 --- a/packages/langchain-graph-retriever/src/langchain_graph_retriever/adapters/in_memory.py +++ b/packages/langchain-graph-retriever/src/langchain_graph_retriever/adapters/in_memory.py @@ -4,47 +4,55 @@ from typing import Any from langchain_core.documents import Document -from langchain_core.vectorstores import InMemoryVectorStore +from langchain_core.vectorstores.in_memory import InMemoryVectorStore from typing_extensions import override -from .base import METADATA_EMBEDDING_KEY, Adapter +from langchain_graph_retriever._conversion import METADATA_EMBEDDING_KEY +from langchain_graph_retriever.adapters.langchain import LangchainAdapter SENTINEL = object() -class InMemoryAdapter(Adapter[InMemoryVectorStore]): +class InMemoryAdapter(LangchainAdapter[InMemoryVectorStore]): """ Adapter for InMemoryVectorStore vector store. - This adapter integrates the in-memory vector store with the graph + This adapter integrates the LangChain In-Memory vector store with the graph retriever system, enabling similarity search and document retrieval. Parameters ---------- - vector_store : InMemoryVectorStore + vector_store : The in-memory vector store instance. """ @override - def _get(self, ids: Sequence[str], /, **kwargs) -> list[Document]: + def _get( + self, ids: Sequence[str], filter: dict[str, Any] | None = None, **kwargs + ) -> list[Document]: docs: list[Document] = [] + filter_method = self._filter_method(filter) for doc_id in ids: - doc = self.vector_store.store.get(doc_id) - if doc: - metadata = doc["metadata"] - metadata[METADATA_EMBEDDING_KEY] = doc["vector"] - docs.append( - Document( - id=doc["id"], - page_content=doc["text"], - metadata=metadata, - ) + hit = self.vector_store.store.get(doc_id) + if hit: + metadata = hit["metadata"] + metadata[METADATA_EMBEDDING_KEY] = hit["vector"] + + doc = Document( + id=hit["id"], + page_content=hit["text"], + metadata=metadata, ) + + if not filter_method(doc): + continue + + docs.append(doc) return docs @override - def _similarity_search_with_embedding_by_vector( + def _search( self, embedding: list[float], k: int = 4, @@ -54,7 +62,7 @@ def _similarity_search_with_embedding_by_vector( results = self.vector_store._similarity_search_with_score_by_vector( embedding=embedding, k=k, - filter=self._filter_method(filter_dict=filter), + filter=self._filter_method(filter), **kwargs, ) docs = [ @@ -78,20 +86,24 @@ def _equals_or_contains( Parameters ---------- - key : str + key : Metadata key to look for. - value : Any + value : Value to check for equality or containment. - metadata : dict[str, Any] + metadata : Metadata dictionary to inspect. Returns ------- - bool + : True if and only if `metadata[key] == value` or `metadata[key]` is a list containing `value`. """ - actual = metadata.get(key, SENTINEL) + actual = metadata + for key_part in key.split("."): + actual = actual.get(key_part, SENTINEL) + if actual is SENTINEL: + break if actual == value: return True @@ -112,12 +124,12 @@ def _filter_method( Parameters ---------- - filter_dict : dict[str, str], optional + filter_dict : Dictionary specifying the filter criteria. Returns ------- - Callable[[Document], bool] + : A function that determines if a document matches the filter criteria. """ if filter_dict is None: diff --git a/packages/langchain-graph-retriever/src/langchain_graph_retriever/adapters/inference.py b/packages/langchain-graph-retriever/src/langchain_graph_retriever/adapters/inference.py index a311240f..e758c3ad 100644 --- a/packages/langchain-graph-retriever/src/langchain_graph_retriever/adapters/inference.py +++ b/packages/langchain-graph-retriever/src/langchain_graph_retriever/adapters/inference.py @@ -2,9 +2,8 @@ import importlib -from langchain_core.vectorstores import VectorStore - -from .base import Adapter +from graph_retriever.adapters import Adapter +from langchain_core.vectorstores.base import VectorStore ADAPTERS_PKG = "langchain_graph_retriever.adapters" _KNOWN_STORES = { @@ -42,32 +41,9 @@ def _full_class_name(cls: type) -> str: return f"{cls.__module__}.{cls.__name__}" -def infer_adapter(store: Adapter | VectorStore) -> Adapter: - """ - Dynamically infer the adapter for a given vector store. - - This function identifies the correct adapter based on the vector store type - and instantiates it with the provided arguments. - - Parameters - ---------- - vector_store : VectorStore - The vector store instance. - - Returns - ------- - Any - The initialized adapter for the given vector store. - - Raises - ------ - ValueError - If the vector store type is not recognized. - """ - if isinstance(store, Adapter): - return store - - store_classes = [store.__class__] +def _infer_adapter_name(cls: type) -> tuple[str, str]: + """Return the module and class of the adapter or raise.""" + store_classes = [cls] while store_classes: store_class = store_classes.pop() @@ -77,17 +53,40 @@ def infer_adapter(store: Adapter | VectorStore) -> Adapter: adapter = _KNOWN_STORES.get(store_class_name, None) if adapter is not None: - module_name, class_name = adapter - adapter_module = importlib.import_module(module_name) - adapter_class = getattr(adapter_module, class_name) - return adapter_class(store) + return adapter # If we didn't find it yet, and the naem wasn't a stopping point, # we queue up the base classes for consideration. This allows # matching subclasses of supported vector stores. store_classes.extend(store_class.__bases__) - store_class_name = _full_class_name(store.__class__) + store_class_name = _full_class_name(cls) raise ValueError( f"Expected adapter or supported vector store, but got {store_class_name}" ) + + +def infer_adapter(store: Adapter | VectorStore) -> Adapter: + """ + Dynamically infer the adapter for a given vector store. + + This function identifies the correct adapter based on the vector store type + and instantiates it with the provided arguments. + + Parameters + ---------- + store : + The vector store instance. + + Returns + ------- + : + The initialized adapter for the given vector store. + """ + if isinstance(store, Adapter): + return store + + module_name, class_name = _infer_adapter_name(store.__class__) + adapter_module = importlib.import_module(module_name) + adapter_class = getattr(adapter_module, class_name) + return adapter_class(store) diff --git a/packages/langchain-graph-retriever/src/langchain_graph_retriever/adapters/langchain.py b/packages/langchain-graph-retriever/src/langchain_graph_retriever/adapters/langchain.py new file mode 100644 index 00000000..0b1c88f0 --- /dev/null +++ b/packages/langchain-graph-retriever/src/langchain_graph_retriever/adapters/langchain.py @@ -0,0 +1,411 @@ +"""Defines the base class for vector store adapters.""" + +import abc +from collections.abc import Sequence +from typing import Any, Generic, TypeVar + +from graph_retriever import Content +from graph_retriever.adapters import Adapter +from langchain_core.documents import Document +from langchain_core.embeddings import Embeddings +from langchain_core.runnables import run_in_executor +from langchain_core.vectorstores.base import VectorStore +from typing_extensions import ( + override, +) + +from langchain_graph_retriever._conversion import doc_to_content +from langchain_graph_retriever.transformers import ShreddingTransformer + +StoreT = TypeVar("StoreT", bound=VectorStore) + + +class LangchainAdapter(Generic[StoreT], Adapter): + """ + Base adapter for integrating vector stores with the graph retriever system. + + This class provides a foundation for custom adapters, enabling consistent + interaction with various vector store implementations. + + Parameters + ---------- + vector_store : + The vector store instance. + """ + + def __init__( + self, + vector_store: StoreT, + ): + """Initialize the base adapter.""" + self.vector_store = vector_store + + @property + def _safe_embedding(self) -> Embeddings: + if not self.vector_store.embeddings: + msg = "Missing embedding" + raise ValueError(msg) + return self.vector_store.embeddings + + def embed_query(self, query: str): + """Return the embedding of the query.""" + return self._safe_embedding.embed_query(query) + + async def aembed_query(self, query: str): + """Return the embedding of the query.""" + return await self._safe_embedding.aembed_query(query) + + def update_filter_hook( + self, filter: dict[str, Any] | None + ) -> dict[str, Any] | None: + """ + Update the metadata filter before executing the query. + + Parameters + ---------- + filter : + Filter on the metadata to update. + + Returns + ------- + : + The updated filter on the metadata to apply. + """ + return filter + + def format_documents_hook(self, docs: list[Document]) -> list[Content]: + """ + Format the documents as content after executing the query. + + Parameters + ---------- + docs : + The documents returned from the vector store + + Returns + ------- + : + The formatted content. + """ + return [doc_to_content(doc) for doc in docs] + + @override + def search_with_embedding( + self, + query: str, + k: int = 4, + filter: dict[str, Any] | None = None, + **kwargs: Any, + ) -> tuple[list[float], list[Content]]: + query_embedding = self.embed_query(query) + docs = self.search( + embedding=query_embedding, + k=k, + filter=filter, + **kwargs, + ) + return query_embedding, docs + + @override + async def asearch_with_embedding( + self, + query: str, + k: int = 4, + filter: dict[str, Any] | None = None, + **kwargs: Any, + ) -> tuple[list[float], list[Content]]: + query_embedding = await self.aembed_query(query) + docs = await self.asearch( + embedding=query_embedding, + k=k, + filter=filter, + **kwargs, + ) + return query_embedding, docs + + @abc.abstractmethod + def _search( + self, + embedding: list[float], + k: int = 4, + filter: dict[str, str] | None = None, + **kwargs: Any, + ) -> list[Document]: + """ + Return docs (with embeddings) most similar to the query vector. + + Parameters + ---------- + embedding : + Embedding to look up documents similar to. + k : + Number of Documents to return. + filter : + Filter on the metadata to apply. + kwargs : + Additional keyword arguments. + + Returns + ------- + : + List of Documents most similar to the query vector. + + Documents should have their embedding added to the + metadata under the `METADATA_EMBEDDING_KEY` key. + """ + + @override + def search( + self, + embedding: list[float], + k: int = 4, + filter: dict[str, str] | None = None, + **kwargs: Any, + ) -> list[Content]: + """ + Return contents most similar to the query vector. + + Parameters + ---------- + embedding : + Embedding to look up documents similar to. + k : + Number of Documents to return. + filter : + Filter on the metadata to apply. + kwargs : + Additional keyword arguments. + + Returns + ------- + : + List of Contents most similar to the query vector. + """ + if k == 0: + return [] + + docs = self._search( + embedding=embedding, + k=k, + filter=self.update_filter_hook(filter), + **kwargs, + ) + return self.format_documents_hook(docs) + + async def _asearch( + self, + embedding: list[float], + k: int = 4, + filter: dict[str, str] | None = None, + **kwargs: Any, + ) -> list[Document]: + """ + Asynchronously return docs (with embeddings) most similar to the query vector. + + Parameters + ---------- + embedding : + Embedding to look up documents similar to. + k : + Number of Documents to return. + filter : + Filter on the metadata to apply. + kwargs : + Additional keyword arguments. + + Returns + ------- + : + List of Documents most similar to the query vector. + + Documents should have their embedding added to the + metadata under the `METADATA_EMBEDDING_KEY` key. + """ + return await run_in_executor( + None, + self._search, + embedding, + k, + filter, + **kwargs, + ) + + @override + async def asearch( + self, + embedding: list[float], + k: int = 4, + filter: dict[str, str] | None = None, + **kwargs: Any, + ) -> list[Content]: + if k == 0: + return [] + + docs = await self._asearch( + embedding=embedding, + k=k, + filter=self.update_filter_hook(filter), + **kwargs, + ) + return self.format_documents_hook(docs) + + def _remove_duplicates(self, ids: Sequence[str]) -> list[str]: + """ + Remove duplicate ids while preserving order. + + Parameters + ---------- + ids : + List of IDs to get. + + Returns + ------- + : + List of IDs with duplicates removed + """ + return list({k: True for k in ids}.keys()) + + @override + def get( + self, + ids: Sequence[str], + filter: dict[str, Any] | None = None, + **kwargs: Any, + ) -> list[Content]: + docs = self._get(self._remove_duplicates(ids), filter, **kwargs) + return self.format_documents_hook(docs) + + @abc.abstractmethod + def _get( + self, + ids: Sequence[str], + filter: dict[str, Any] | None = None, + **kwargs: Any, + ) -> list[Document]: + """ + Get documents by ID. + + Fewer documents may be returned than requested if some IDs are not found + or if there are duplicated IDs. This method should **NOT** raise + exceptions if no documents are found for some IDs. + + Users should not assume that the order of the returned documents matches + the order of the input IDs. Instead, users should rely on the ID field + of the returned documents. + + Parameters + ---------- + ids : + List of IDs to get. + filter : + Filter to apply to the recrods. + kwargs : + Additional keyword arguments. These are up to the implementation. + + Returns + ------- + : + List of documents that were found. + """ + + @override + async def aget( + self, + ids: Sequence[str], + filter: dict[str, Any] | None = None, + **kwargs: Any, + ) -> list[Content]: + docs = await self._aget(self._remove_duplicates(ids), filter, **kwargs) + return self.format_documents_hook(docs) + + async def _aget( + self, + ids: Sequence[str], + filter: dict[str, Any] | None = None, + **kwargs: Any, + ) -> list[Document]: + """ + Asynchronously get documents by ID. + + Fewer documents may be returned than requested if some IDs are not found + or if there are duplicated IDs. This method should **NOT** raise + exceptions if no documents are found for some IDs. + + Users should not assume that the order of the returned documents matches + the order of the input IDs. Instead, users should rely on the ID field + of the returned documents. + + Parameters + ---------- + ids : + List of IDs to get. + filter : + Filter to apply to the documents. + kwargs : + Additional keyword arguments. These are up to the implementation. + + Returns + ------- + : + List of documents that were found. + """ + return await run_in_executor( + None, + self._get, + ids, + filter, + **kwargs, + ) + + +class ShreddedLangchainAdapter(LangchainAdapter[StoreT]): + """ + Base adapter for integrating vector stores with the graph retriever system. + + This class provides a foundation for custom adapters, enabling consistent + interaction with various vector store implementations that do not support + searching on list-based metadata values. + + Parameters + ---------- + vector_store : + The vector store instance. + shredder: ShreddingTransformer, optional + An instance of the ShreddingTransformer used for doc insertion. + If not passed then a default instance of ShreddingTransformer is used. + nested_metadata_fields: set[str] + The set of metadata fields that contain nested values. + """ + + def __init__( + self, + vector_store: StoreT, + shredder: ShreddingTransformer | None = None, + nested_metadata_fields: set[str] = set(), + ): + """Initialize the base adapter.""" + super().__init__(vector_store=vector_store) + self.shredder = ShreddingTransformer() if shredder is None else shredder + self.nested_metadata_fields = nested_metadata_fields + + @override + def update_filter_hook( + self, filter: dict[str, str] | None + ) -> dict[str, str] | None: + if filter is None: + return None + + shredded_filter = {} + for key, value in filter.items(): + if key in self.nested_metadata_fields: + shredded_filter[self.shredder.shredded_key(key, value)] = ( + self.shredder.shredded_value() + ) + else: + shredded_filter[key] = value + return shredded_filter + + @override + def format_documents_hook(self, docs: list[Document]) -> list[Content]: + restored = list(self.shredder.restore_documents(documents=docs)) + return super().format_documents_hook(restored) diff --git a/packages/langchain-graph-retriever/src/langchain_graph_retriever/adapters/open_search.py b/packages/langchain-graph-retriever/src/langchain_graph_retriever/adapters/open_search.py index b3246d0a..67253918 100644 --- a/packages/langchain-graph-retriever/src/langchain_graph_retriever/adapters/open_search.py +++ b/packages/langchain-graph-retriever/src/langchain_graph_retriever/adapters/open_search.py @@ -6,26 +6,36 @@ from typing_extensions import override try: - from langchain_community.vectorstores import OpenSearchVectorSearch + from langchain_community.vectorstores.opensearch_vector_search import ( + OpenSearchVectorSearch, + ) except (ImportError, ModuleNotFoundError): raise ImportError("please `pip install langchain-community opensearch-py`") from langchain_core.documents import Document -from .base import METADATA_EMBEDDING_KEY, Adapter +from langchain_graph_retriever._conversion import METADATA_EMBEDDING_KEY +from langchain_graph_retriever.adapters.langchain import LangchainAdapter -class OpenSearchAdapter(Adapter[OpenSearchVectorSearch]): +class OpenSearchAdapter(LangchainAdapter[OpenSearchVectorSearch]): """ Adapter to traverse OpenSearch vector stores. This adapter enables similarity search and document retrieval using an - OpenSearch vector store. It supports both "lucene" and "faiss" engines. + OpenSearch vector store. Parameters ---------- - vector_store : OpenSearchVectorSearch + vector_store : The OpenSearch vector store instance. + + Notes + ----- + Graph Traversal is only supported when using either the `"lucene"` or + `"faiss"` engine. + + For more info, see the [OpenSearch Documentation](https://opensearch.org/docs/latest/search-plugins/knn/knn-index#method-definitions) """ def __init__(self, vector_store: OpenSearchVectorSearch): @@ -38,35 +48,47 @@ def __init__(self, vector_store: OpenSearchVectorSearch): raise ValueError(msg) super().__init__(vector_store) + if vector_store.is_aoss: + self._id_field = "id" + else: + self._id_field = "_id" + def _build_filter( - self, filter: dict[str, str] | None = None + self, filter: dict[str, Any] | None = None ) -> list[dict[str, Any]] | None: """ Build a filter query for OpenSearch based on metadata. Parameters ---------- - filter : dict[str, str], optional + filter : Metadata filter to apply. Returns ------- - list[dict[str, Any]] | None + : Filter query for OpenSearch. + + Raises + ------ + ValueError + If the query is not supported by OpenSearch adapter. """ if filter is None: return None - return [ - { - "terms" if isinstance(value, list) else "term": { - f"metadata.{key}.keyword": value - } - } - for key, value in filter.items() - ] + + filters = [] + for key, value in filter.items(): + if isinstance(value, list): + filters.append({"terms": {f"metadata.{key}": value}}) + elif isinstance(value, dict): + raise ValueError("Open Search doesn't suport dictionary searches.") + else: + filters.append({"term": {f"metadata.{key}": value}}) + return filters @override - def _similarity_search_with_embedding_by_vector( + def _search( self, embedding: list[float], k: int = 4, @@ -77,9 +99,8 @@ def _similarity_search_with_embedding_by_vector( # use an efficient_filter to collect results that # are near the embedding vector until up to 'k' # documents that match the filter are found. - kwargs["efficient_filter"] = { - "bool": {"must": self._build_filter(filter=filter)} - } + query = {"bool": {"must": self._build_filter(filter=filter)}} + kwargs["efficient_filter"] = query if k == 0: return [] @@ -110,32 +131,34 @@ def _similarity_search_with_embedding_by_vector( ] @override - def _get(self, ids: Sequence[str], /, **kwargs: Any) -> list[Document]: - try: - from opensearchpy.exceptions import NotFoundError - except (ImportError, ModuleNotFoundError): - msg = "please `pip install opensearch-py`." - raise ImportError(msg) - - docs: list[Document] = [] - for id in ids: - try: - hit = self.vector_store.client.get( - index=self.vector_store.index_name, - id=id, - _source_includes=["text", "metadata", "vector_field"], - **kwargs, - ) - docs.append( - Document( - page_content=hit["_source"]["text"], - metadata={ - METADATA_EMBEDDING_KEY: hit["_source"]["vector_field"], - **hit["_source"]["metadata"], - }, - id=hit["_id"], - ) - ) - except NotFoundError: - pass - return docs + def _get( + self, ids: Sequence[str], filter: dict[str, Any] | None = None, **kwargs: Any + ) -> list[Document]: + query: dict[str, Any] = {"ids": {"values": ids}} + + if filter: + query = { + "bool": {"must": [query, *(self._build_filter(filter=filter) or [])]} + } + + response = self.vector_store.client.search( + body={ + "query": query, + }, + index=self.vector_store.index_name, + _source_includes=["text", "metadata", "vector_field"], + size=len(ids), + **kwargs, + ) + + return [ + Document( + page_content=hit["_source"]["text"], + metadata={ + METADATA_EMBEDDING_KEY: hit["_source"]["vector_field"], + **hit["_source"]["metadata"], + }, + id=hit["_id"], + ) + for hit in response["hits"]["hits"] + ] diff --git a/packages/langchain-graph-retriever/src/langchain_graph_retriever/document_graph.py b/packages/langchain-graph-retriever/src/langchain_graph_retriever/document_graph.py index e60633be..88dbf8b0 100644 --- a/packages/langchain-graph-retriever/src/langchain_graph_retriever/document_graph.py +++ b/packages/langchain-graph-retriever/src/langchain_graph_retriever/document_graph.py @@ -1,14 +1,12 @@ """Utilities for creating and analyzing a graph of documents.""" -from collections.abc import Iterable, Sequence -from typing import Any +from collections.abc import Sequence import networkx as nx +from graph_retriever import Content +from graph_retriever.edges import Edge, EdgeFunction, EdgeSpec, MetadataEdgeFunction from langchain_core.documents import Document -from langchain_graph_retriever.edges.metadata import EdgeSpec, MetadataEdgeFunction -from langchain_graph_retriever.types import Edge, EdgeFunction, Node - def _best_communities(graph: nx.DiGraph) -> list[list[str]]: """ @@ -19,19 +17,24 @@ def _best_communities(graph: nx.DiGraph) -> list[list[str]]: Parameters ---------- - graph : nx.DiGraph + graph : The directed graph to analyze. Returns ------- - list[list[str]] + : A list of communities, where each community is a list of node IDs. """ # TODO: Also continue running until the size of communities is below # a specified threshold? - best_modularity = float("-inf") best_communities = [[node] for node in graph] + if graph.number_of_edges() == 0: + # If there are no edges, then we can't do any beter. + return best_communities + + # TODO: stop running if we reach a target modularity and/or number of communities? + best_modularity = float("-inf") for new_communities in nx.algorithms.community.girvan_newman(graph): new_modularity = nx.algorithms.community.modularity(graph, new_communities) if new_modularity > best_modularity: @@ -42,39 +45,6 @@ def _best_communities(graph: nx.DiGraph) -> list[list[str]]: return best_communities -def _get_md_values(metadata: dict[str, Any], field: str) -> Iterable[Any]: - """ - Retrieve metadata values for a specific field. - - This function extracts values from the metadata dictionary for the given field, - handling cases where the value is a single string, a list, or another iterable. - - Parameters - ---------- - metadata : dict[str, Any] - The metadata dictionary. - field : str - The field to extract values from. - - Returns - ------- - Iterable[Any] - A list of values for the specified field. If no values are found, an - empty list is returned. - """ - value = metadata.get(field, None) - if value is None: - return [] - if isinstance(value, str): - return [value] - if field not in metadata: - return [] - try: - return list(value) - except TypeError as _: - return [value] - - def create_graph( documents: Sequence[Document], *, @@ -88,14 +58,14 @@ def create_graph( Parameters ---------- - documents : Sequence[Document] + documents : A sequence of documents to add as nodes. - edges : list[EdgeSpec] | EdgeFunction + edges : Definitions of edges to use for creating the graph or edge function to use. Returns ------- - nx.DiGraph + : The created directed graph with documents as nodes and metadata relationships as edges. @@ -122,9 +92,9 @@ def create_graph( graph.add_node(document.id, doc=document) document_edges = edge_function( - Node( + Content( id=document.id, - depth=0, + content=document.page_content, embedding=[], metadata=document.metadata, ) @@ -155,12 +125,12 @@ def group_by_community(graph: nx.DiGraph) -> list[list[Document]]: Paramaters ---------- - graph : nx.DiGraph + graph : The directed graph of documents. Returns ------- - list[list[Document]] + : A list of communities, where each community is a list of documents. """ # Find communities and output documents grouped by community. diff --git a/packages/langchain-graph-retriever/src/langchain_graph_retriever/document_transformers/gliner.py b/packages/langchain-graph-retriever/src/langchain_graph_retriever/document_transformers/gliner.py deleted file mode 100644 index 68123e47..00000000 --- a/packages/langchain-graph-retriever/src/langchain_graph_retriever/document_transformers/gliner.py +++ /dev/null @@ -1,113 +0,0 @@ -from collections.abc import Sequence -from typing import Any - -from langchain_core.documents import BaseDocumentTransformer, Document -from typing_extensions import override - - -class GLiNEREntityExtractor(BaseDocumentTransformer): - """ - Add metadata to documents about named entities using `GLiNER`_. - - `GLiNER`_ is a Named Entity Recognition (NER) model capable of identifying any - entity type using a bidirectional transformer encoder (BERT-like). - - Preliminaries - ------------- - - Install the ``gliner`` package. - - Note that ``bs4`` is also installed to support the WebBaseLoader in the example, - but not needed by the GLiNEREntityExtractor itself. - - .. code-block:: bash - - pip install -q langchain_community bs4 gliner - - Example - ------- - We load the ``state_of_the_union.txt`` file, chunk it, then for each chunk we - add named entities to the metadata. - - .. code-block:: python - - from langchain_community.document_loaders import WebBaseLoader - from langchain_community.document_transformers import GLiNEREntityExtractor - from langchain_text_splitters import CharacterTextSplitter - - loader = WebBaseLoader( - "https://raw.githubusercontent.com/hwchase17/chat-your-data/master/state_of_the_union.txt" - ) - raw_documents = loader.load() - - text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0) - documents = text_splitter.split_documents(raw_documents) - - extractor = GLiNEREntityExtractor(labels=["person", "topic"]) - documents = extractor.transform_documents(documents) - - print(documents[0].metadata) - - .. code-block:: output - - {'source': 'https://raw.githubusercontent.com/hwchase17/chat-your-data/master/state_of_the_union.txt', 'person': ['president zelenskyy', 'vladimir putin']} - - Parameters - ---------- - labels : list[str] - List of entity kinds to extract. - batch_size : int, default 8 - The number of documents to process in each batch. - metadata_key_prefix : str, default "" - A prefix to add to metadata keys outputted by the extractor. - This will be prepended to the label, with the value (or values) holding the - generated keywords for that entity kind. - model : str, default "urchade/gliner_mediumv2.1" - The GLiNER model to use. - - """ # noqa: E501 - - def __init__( - self, - labels: list[str], - *, - batch_size: int = 8, - metadata_key_prefix: str = "", - model: str = "urchade/gliner_mediumv2.1", - ): - try: - from gliner import GLiNER # type: ignore - - self._model = GLiNER.from_pretrained(model) - - except ImportError: - raise ImportError( - "gliner is required for the GLiNEREntityExtractor. " - "Please install it with `pip install gliner`." - ) from None - - self._batch_size = batch_size - self._labels = labels - self.metadata_key_prefix = metadata_key_prefix - - @override - def transform_documents( - self, documents: Sequence[Document], **kwargs: Any - ) -> Sequence[Document]: - for i in range(0, len(documents), self._batch_size): - batch = documents[i : i + self._batch_size] - texts = [item.page_content for item in batch] - extracted = self._model.batch_predict_entities( - texts=texts, labels=self._labels, **kwargs - ) - for i, entities in enumerate(extracted): - labels = set() - for entity in entities: - label = self.metadata_key_prefix + entity["label"] - labels.add(label) - batch[i].metadata.setdefault(label, set()).add( - entity["text"].lower() - ) - for label in labels: - batch[i].metadata[label] = list(batch[i].metadata[label]) - return documents diff --git a/packages/langchain-graph-retriever/src/langchain_graph_retriever/document_transformers/hierarchy.py b/packages/langchain-graph-retriever/src/langchain_graph_retriever/document_transformers/hierarchy.py deleted file mode 100644 index fd3e47f5..00000000 --- a/packages/langchain-graph-retriever/src/langchain_graph_retriever/document_transformers/hierarchy.py +++ /dev/null @@ -1,105 +0,0 @@ -from collections.abc import Sequence -from typing import Any - -from langchain_core.documents import BaseDocumentTransformer, Document -from typing_extensions import override - - -class HierarchyLinkExtractor(BaseDocumentTransformer): - r""" - Extract links from a document hierarchy. - - Example - ------- - - .. code-block:: python - - # Given three paths (in this case, within the "Root" document): - h1 = ["Root", "H1"] - h1a = ["Root", "H1", "a"] - h1b = ["Root", "H1", "b"] - - # Parent links `h1a` and `h1b` to `h1`. - # Child links `h1` to `h1a` and `h1b`. - # Sibling links `h1a` and `h1b` together (both directions). - - Example use with documents - -------------------------- - .. code_block: python - transformer = LinkExtractorTransformer([ - HierarchyLinkExtractor().as_document_extractor( - # Assumes the "path" to each document is in the metadata. - # Could split strings, etc. - lambda doc: doc.metadata.get("path", []) - ) - ]) - linked = transformer.transform_documents(docs) - - Parameters - ---------- - path_metadata_key : str, default "path" - Metadata key containing the path. - This may correspond to paths in a file system, hierarchy in a document, etc. - path_delimiter : str, default "\" - Delimiter of items in the path. - parent_links : bool, default True - Whether to link each document to it's parent. - If `True`, `metadata["parent_out"]` will be populated to link to - `metadata["parent_in"]`. - child_links: bool, default False - Whether to link each document from a section to its children. - If `True`, `metadata["child_out"]` will be populated to link to - `metadata["child_in"]`. - sibling_links : bool, default False - Whether to link each document to sibling (adjacent) documents. - If `True`, `metadata["sibling"]` will be populated. - """ - - def __init__( - self, - *, - path_metadata_key: str = "path", - path_delimiter: str = "\\", - parent_links: bool = True, - child_links: bool = False, - sibling_links: bool = False, - ): - self._path_metadata_key = path_metadata_key - self._path_delimiter = path_delimiter - self._parent_links = parent_links - self._child_links = child_links - self._sibling_links = sibling_links - - @override - def transform_documents( - self, documents: Sequence[Document], **kwargs: Any - ) -> Sequence[Document]: - for document in documents: - if self._path_metadata_key not in document.metadata: - msg = ( - f"path not found in metadata at {self._path_metadata_key}", - f" for document id: {document.id}", - ) - raise ValueError(msg) - - path: str = document.metadata[self._path_metadata_key] - if self._parent_links: - # This is linked from everything with this parent path. - document.metadata["parent_in"] = path - if self._child_links: - # This is linked to every child with this as it's "parent" path. - document.metadata["child_out"] = path - - path_parts = path.split(self._path_delimiter) - if len(path_parts) >= 1: - parent_path = self._path_delimiter.join(path_parts[0:-1]) - if self._parent_links and len(path_parts) > 1: - # This is linked to the nodes with the given parent path. - document.metadata["parent_out"] = parent_path - if self._child_links and len(path_parts) > 1: - # This is linked from every node with the given parent path. - document.metadata["child_in"] = parent_path - if self._sibling_links: - # This is a sibling of everything with the same parent. - document.metadata["sibling"] = parent_path - return documents diff --git a/packages/langchain-graph-retriever/src/langchain_graph_retriever/document_transformers/html_hyperlink.py b/packages/langchain-graph-retriever/src/langchain_graph_retriever/document_transformers/html_hyperlink.py deleted file mode 100644 index c97416c4..00000000 --- a/packages/langchain-graph-retriever/src/langchain_graph_retriever/document_transformers/html_hyperlink.py +++ /dev/null @@ -1,279 +0,0 @@ -from __future__ import annotations - -from collections.abc import Sequence -from typing import TYPE_CHECKING, Any -from urllib.parse import urldefrag, urljoin, urlparse - -from langchain_core._api import beta -from langchain_core.documents import BaseDocumentTransformer, Document -from typing_extensions import override - -if TYPE_CHECKING: - from bs4 import BeautifulSoup # type: ignore - from bs4.element import Tag # type: ignore - - -@beta() -class HtmlHyperlinkExtractor(BaseDocumentTransformer): - """ - Extract hyperlinks from HTML content. - - Expects each document to contain its URL in its metadata. - - Example:: - - extractor = HtmlHyperlinkExtractor() - results = extractor.extract_one(HtmlInput(html, url)) - - .. seealso:: - - - :mod:`How to use a graph vector store <langchain_community.graph_vectorstores>` - - :class:`How to create links between documents <langchain_community.graph_vectorstores.links.Link>` - - How to link Documents on hyperlinks in HTML - =========================================== - - Preliminaries - ------------- - - Install the ``beautifulsoup4`` package: - - .. code-block:: bash - - pip install -q langchain_community beautifulsoup4 - - Usage - ----- - - For this example, we'll scrape 2 HTML pages that have an hyperlink from one - page to the other using an ``AsyncHtmlLoader``. - Then we use the ``HtmlLinkExtractor`` to create the links in the documents. - - Using extract_one() - ^^^^^^^^^^^^^^^^^^^ - - We can use :meth:`extract_one` on a document to get the links and add the links - to the document metadata with - :meth:`~langchain_community.graph_vectorstores.links.add_links`:: - - from langchain_community.document_loaders import AsyncHtmlLoader - from langchain_community.graph_vectorstores.extractors import ( - HtmlInput, - HtmlLinkExtractor, - ) - from langchain_community.graph_vectorstores.links import add_links - from langchain_core.documents import Document - - loader = AsyncHtmlLoader( - [ - "https://python.langchain.com/docs/integrations/providers/astradb/", - "https://docs.datastax.com/en/astra/home/astra.html", - ] - ) - - documents = loader.load() - - html_extractor = HtmlLinkExtractor() - - for doc in documents: - links = html_extractor.extract_one(HtmlInput(doc.page_content, url)) - add_links(doc, links) - - documents[0].metadata["links"][:5] - - .. code-block:: output - - [Link(kind='hyperlink', direction='out', tag='https://python.langchain.com/docs/integrations/providers/spreedly/'), - Link(kind='hyperlink', direction='out', tag='https://python.langchain.com/docs/integrations/providers/nvidia/'), - Link(kind='hyperlink', direction='out', tag='https://python.langchain.com/docs/integrations/providers/ray_serve/'), - Link(kind='hyperlink', direction='out', tag='https://python.langchain.com/docs/integrations/providers/bageldb/'), - Link(kind='hyperlink', direction='out', tag='https://python.langchain.com/docs/introduction/')] - - Using as_document_extractor() - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - - If you use a document loader that returns the raw HTML and that sets the source - key in the document metadata such as ``AsyncHtmlLoader``, - you can simplify by using :meth:`as_document_extractor` that takes directly a - ``Document`` as input:: - - from langchain_community.document_loaders import AsyncHtmlLoader - from langchain_community.graph_vectorstores.extractors import HtmlLinkExtractor - from langchain_community.graph_vectorstores.links import add_links - - loader = AsyncHtmlLoader( - [ - "https://python.langchain.com/docs/integrations/providers/astradb/", - "https://docs.datastax.com/en/astra/home/astra.html", - ] - ) - documents = loader.load() - html_extractor = HtmlLinkExtractor().as_document_extractor() - - for document in documents: - links = html_extractor.extract_one(document) - add_links(document, links) - - documents[0].metadata["links"][:5] - - .. code-block:: output - - [Link(kind='hyperlink', direction='out', tag='https://python.langchain.com/docs/integrations/providers/spreedly/'), - Link(kind='hyperlink', direction='out', tag='https://python.langchain.com/docs/integrations/providers/nvidia/'), - Link(kind='hyperlink', direction='out', tag='https://python.langchain.com/docs/integrations/providers/ray_serve/'), - Link(kind='hyperlink', direction='out', tag='https://python.langchain.com/docs/integrations/providers/bageldb/'), - Link(kind='hyperlink', direction='out', tag='https://python.langchain.com/docs/introduction/')] - - Using LinkExtractorTransformer - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - - Using the :class:`~langchain_community.graph_vectorstores.extractors.link_extractor_transformer.LinkExtractorTransformer`, - we can simplify the link extraction:: - - from langchain_community.document_loaders import AsyncHtmlLoader - from langchain_community.graph_vectorstores.extractors import ( - HtmlLinkExtractor, - LinkExtractorTransformer, - ) - from langchain_community.graph_vectorstores.links import add_links - - loader = AsyncHtmlLoader( - [ - "https://python.langchain.com/docs/integrations/providers/astradb/", - "https://docs.datastax.com/en/astra/home/astra.html", - ] - ) - - documents = loader.load() - transformer = LinkExtractorTransformer( - [HtmlLinkExtractor().as_document_extractor()] - ) - documents = transformer.transform_documents(documents) - - documents[0].metadata["links"][:5] - - .. code-block:: output - - [Link(kind='hyperlink', direction='out', tag='https://python.langchain.com/docs/integrations/providers/spreedly/'), - Link(kind='hyperlink', direction='out', tag='https://python.langchain.com/docs/integrations/providers/nvidia/'), - Link(kind='hyperlink', direction='out', tag='https://python.langchain.com/docs/integrations/providers/ray_serve/'), - Link(kind='hyperlink', direction='out', tag='https://python.langchain.com/docs/integrations/providers/bageldb/'), - Link(kind='hyperlink', direction='out', tag='https://python.langchain.com/docs/introduction/')] - - We can check that there is a link from the first document to the second:: - - for doc_to in documents: - for link_to in doc_to.metadata["links"]: - if link_to.direction == "in": - for doc_from in documents: - for link_from in doc_from.metadata["links"]: - if ( - link_to.direction == "in" - and link_from.direction == "out" - and link_to.tag == link_from.tag - ): - print( - f"Found link from {doc_from.metadata['source']} to {doc_to.metadata['source']}." - ) - - .. code-block:: output - - Found link from https://python.langchain.com/docs/integrations/providers/astradb/ to https://docs.datastax.com/en/astra/home/astra.html. - - The documents with URL links can then be added to a :class:`~langchain_community.graph_vectorstores.base.GraphVectorStore`:: - - from langchain_community.graph_vectorstores import CassandraGraphVectorStore - - store = CassandraGraphVectorStore.from_documents(documents=documents, embedding=...) - - Parameters - ---------- - url_metadata_key : str, default "url" - The metadata field containing the URL of the document. Must be set - before transforming. Needed to resolve relative paths. - metadata_key : str, default "hyperlink" - The metadata field to populate with documents linked from this content. - drop_fragments : bool, default True - Whether fragments in URLs and links should be dropped. - - """ # noqa: E501 - - def __init__( - self, - *, - url_metadata_key: str = "url", - metadata_key: str = "hyperlink", - drop_fragments: bool = True, - ): - try: - from bs4 import BeautifulSoup # noqa:F401 - except ImportError as e: - raise ImportError( - "BeautifulSoup4 is required for HtmlHyperlinkExtractor. " - "Please install it with `pip install beautifulsoup4`." - ) from e - - self._url_metadata_key = url_metadata_key - self._metadata_key = metadata_key - self._drop_fragments = drop_fragments - - @staticmethod - def _parse_url(link: Tag, page_url: str, drop_fragments: bool = True) -> str | None: - href = link.get("href") - if href is None: - return None - url = urlparse(href) - if url.scheme not in ["http", "https", ""]: - return None - - # Join the HREF with the page_url to convert relative paths to absolute. - url = str(urljoin(page_url, href)) - - # Fragments would be useful if we chunked a page based on section. - # Then, each chunk would have a different URL based on the fragment. - # Since we aren't doing that yet, they just "break" links. So, drop - # the fragment. - if drop_fragments: - return urldefrag(url).url - return url - - @staticmethod - def _parse_urls( - soup: BeautifulSoup, page_url: str, drop_fragments: bool = True - ) -> list[str]: - soup_links: list[Tag] = soup.find_all("a") - urls: set[str] = set() - - for link in soup_links: - parsed_url = HtmlHyperlinkExtractor._parse_url( - link, page_url=page_url, drop_fragments=drop_fragments - ) - # Remove self links and entries for any 'a' tag that failed to parse - # (didn't have href, or invalid domain, etc.) - if parsed_url and parsed_url != page_url: - urls.add(parsed_url) - - return list(urls) - - @override - def transform_documents( - self, documents: Sequence[Document], **kwargs: Any - ) -> Sequence[Document]: - for document in documents: - if self._url_metadata_key not in document.metadata: - msg = ( - f"html document url not found in metadata at " - f"{self._url_metadata_key} for document id: {document.id}" - ) - raise ValueError(msg) - - page_url = document.metadata[self._url_metadata_key] - if self._drop_fragments: - page_url = urldefrag(page_url).url - - soup = BeautifulSoup(document.page_content, "html.parser", **kwargs) - - document.metadata[self._metadata_key] = self._parse_urls( - soup=soup, page_url=page_url, drop_fragments=self._drop_fragments - ) - return documents diff --git a/packages/langchain-graph-retriever/src/langchain_graph_retriever/document_transformers/keybert.py b/packages/langchain-graph-retriever/src/langchain_graph_retriever/document_transformers/keybert.py deleted file mode 100644 index 6d375d6d..00000000 --- a/packages/langchain-graph-retriever/src/langchain_graph_retriever/document_transformers/keybert.py +++ /dev/null @@ -1,105 +0,0 @@ -from collections.abc import Sequence -from typing import Any - -from langchain_core.documents import BaseDocumentTransformer, Document -from typing_extensions import override - - -class KeybertKeywordExtractor(BaseDocumentTransformer): - """ - Add metadata to documents about keywords using `KeyBERT <https://maartengr.github.io/KeyBERT/>`_. - - KeyBERT is a minimal and easy-to-use keyword extraction technique that - leverages BERT embeddings to create keywords and keyphrases that are most - similar to a document. - - The KeybertKeywordExtractor uses KeyBERT add a list of keywords to a - document's metadata. - - Preliminaries - ------------- - - Install the ``keybert`` package. - - Note that ``bs4`` is also installed to support the WebBaseLoader in the example, - but not needed by the KeybertKeywordExtractor itself. - - .. code-block:: bash - - pip install -q langchain_community bs4 keybert - - Example - ------- - We load the ``state_of_the_union.txt`` file, chunk it, then for each chunk we - add keywords to the metadata. - - .. code-block:: python - - from langchain_community.document_loaders import WebBaseLoader - from langchain_community.document_transformers import KeybertKeywordExtractor - from langchain_text_splitters import CharacterTextSplitter - - loader = WebBaseLoader( - "https://raw.githubusercontent.com/hwchase17/chat-your-data/master/state_of_the_union.txt" - ) - raw_documents = loader.load() - - text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0) - documents = text_splitter.split_documents(raw_documents) - - extractor = KeybertKeywordExtractor() - documents = extractor.transform_documents(documents) - - print(documents[0].metadata) - - .. code-block:: output - - {'source': 'https://raw.githubusercontent.com/hwchase17/chat-your-data/master/state_of_the_union.txt', 'keywords': ['putin', 'vladimir', 'ukrainian', 'russia', 'ukraine']} - - Parameters - ---------- - batch_size : int, default 8 - The number of documents to process in each batch. - metadata_key : str, default "keywords" - The name of the key used in the metadata output. - model : str, default "all-MiniLM-L6-v2" - The KeyBERT model to use. - """ # noqa: E501 - - def __init__( - self, - *, - batch_size: int = 8, - metadata_key: str = "keywords", - model: str = "all-MiniLM-L6-v2", - ): - try: - import keybert # type: ignore - - self._kw_model = keybert.KeyBERT(model=model) - except ImportError: - raise ImportError( - "keybert is required for the KeybertLinkExtractor. " - "Please install it with `pip install keybert`." - ) from None - - self._batch_size = batch_size - self._metadata_key = metadata_key - - @override - def transform_documents( - self, documents: Sequence[Document], **kwargs: Any - ) -> Sequence[Document]: - for i in range(0, len(documents), self._batch_size): - batch = documents[i : i + self._batch_size] - texts = [item.page_content for item in batch] - extracted = self._kw_model.extract_keywords(docs=texts, **kwargs) - if len(texts) == 1: - # Even though we pass a list, if it contains one item, keybert will - # flatten it. This means it's easier to just call the special case - # for one item. - batch[0].metadata[self._metadata_key] = [kw[0] for kw in extracted] - else: - for i, keywords in enumerate(extracted): - batch[i].metadata[self._metadata_key] = [kw[0] for kw in keywords] - return documents diff --git a/packages/langchain-graph-retriever/src/langchain_graph_retriever/document_transformers/metadata_denormalizer.py b/packages/langchain-graph-retriever/src/langchain_graph_retriever/document_transformers/metadata_denormalizer.py deleted file mode 100644 index b1f47380..00000000 --- a/packages/langchain-graph-retriever/src/langchain_graph_retriever/document_transformers/metadata_denormalizer.py +++ /dev/null @@ -1,173 +0,0 @@ -"""Denormalizer for sequence-based metadata fields.""" - -import json -from collections.abc import Sequence -from typing import Any - -from langchain_core.documents import BaseDocumentTransformer, Document -from typing_extensions import override - -DENORMALIZED_KEYS_KEY = "__denormalized_keys" -DEFAULT_PATH_DELIMITER = "→" # unicode 2192 -DEFAULT_STATIC_VALUE = "§" # unicode 00A7 - - -class MetadataDenormalizer(BaseDocumentTransformer): - """ - Denormalizes sequence-based metadata fields. - - Certain vector stores do not support storing or searching on metadata fields - with sequence-based values. This transformer converts sequence-based fields - into simple metadata values. - - Example: - ------- - - .. code-block:: python - - from langchain_core.documents import Document - from langchain_community.document_transformers.metadata_denormalizer import ( - MetadataDenormalizer, - ) - - doc = Document( - page_content="test", - metadata={"place": ["berlin", "paris"], "topic": ["weather"]}, - ) - - de_normalizer = MetadataDenormalizer() - - docs = de_normalizer.transform_documents([doc]) - - print(docs[0].metadata) - - - .. code-block:: output - - {'place.berlin': True, 'place.paris': True, 'topic.weather': True} - - Parameters - ---------- - keys : set[str], optional: - A set of metadata keys to denormalize. - If empty, all sequence-based fields will be denormalized. - path_delimiter : str, default "→" (unicode 2192) - The path delimiter to use when building denormalized keys. - static_value : str, default "§" (unicode 00A7) - The value to set on each denormalized key. - - """ # noqa: E501 - - def __init__( - self, - *, - keys: set[str] = set(), - path_delimiter: str = DEFAULT_PATH_DELIMITER, - static_value: Any = DEFAULT_STATIC_VALUE, - ): - self.keys = keys - self.path_delimiter = path_delimiter - self.static_value = static_value - - @override - def transform_documents( - self, documents: Sequence[Document], **kwargs: Any - ) -> Sequence[Document]: - transformed_docs = [] - for document in documents: - new_doc = Document(id=document.id, page_content=document.page_content) - denormalized_keys: list[str] = [] - for key, value in document.metadata.items(): - is_normalized = isinstance(value, Sequence) and not isinstance( - value, str | bytes - ) - should_denormalize = (not self.keys) or (key in self.keys) - if is_normalized and should_denormalize: - denormalized_keys.append(key) - for item in value: - new_doc.metadata[self.denormalized_key(key=key, value=item)] = ( - self.denormalized_value() - ) - else: - new_doc.metadata[key] = value - if len(denormalized_keys) > 0: - new_doc.metadata[DENORMALIZED_KEYS_KEY] = json.dumps(denormalized_keys) - transformed_docs.append(new_doc) - - return transformed_docs - - def revert_documents( - self, documents: Sequence[Document], **kwargs: Any - ) -> Sequence[Document]: - """ - Revert documents transformed by the MetadataDenormalizer. - - Reverts documents transformed by the MetadataDenormalizer back to their original - state before denormalization. - - Note that any non-string values inside lists will be converted to strings - after reverting. - - Args: - documents: A sequence of Documents to be transformed. - - Returns - ------- - Sequence[Document] - A sequence of transformed Documents. - """ - reverted_docs = [] - for document in documents: - new_doc = Document(id=document.id, page_content=document.page_content) - denormalized_keys = set( - json.loads(document.metadata.pop(DENORMALIZED_KEYS_KEY, "[]")) - ) - - for key, value in document.metadata.items(): - # Check if the key belongs to a denormalized group - split_key = key.split(self.path_delimiter, 1) - if ( - len(split_key) == 2 - and split_key[0] in denormalized_keys - and value == self.static_value - ): - original_key, original_value = split_key - if original_key not in new_doc.metadata: - new_doc.metadata[original_key] = [] - new_doc.metadata[original_key].append(original_value) - else: - # Retain non-denormalized metadata as is - new_doc.metadata[key] = value - - reverted_docs.append(new_doc) - - return reverted_docs - - def denormalized_key(self, key: str, value: Any) -> str: - """ - Get the denormalized key for a key/value pair. - - Parameters - ---------- - key : str - The metadata key to denormalize - value : Any - The metadata value to denormalize - - Returns - ------- - str - the denormalized key - """ - return f"{key}{self.path_delimiter}{value}" - - def denormalized_value(self) -> str: - """ - Get the denormalized value for a key/value pair. - - Returns - ------- - str - the denormalized value - """ - return self.static_value diff --git a/packages/langchain-graph-retriever/src/langchain_graph_retriever/graph_retriever.py b/packages/langchain-graph-retriever/src/langchain_graph_retriever/graph_retriever.py index 157d2e68..a6cb381b 100644 --- a/packages/langchain-graph-retriever/src/langchain_graph_retriever/graph_retriever.py +++ b/packages/langchain-graph-retriever/src/langchain_graph_retriever/graph_retriever.py @@ -1,23 +1,24 @@ """Provides a graph-based retriever combining vector search and graph traversal.""" +import dataclasses from collections.abc import Sequence from functools import cached_property from typing import ( Any, ) +from graph_retriever import atraverse, traverse +from graph_retriever.adapters import Adapter +from graph_retriever.edges import EdgeFunction, EdgeSpec +from graph_retriever.strategies import Eager, Strategy from langchain_core.documents import Document from langchain_core.retrievers import BaseRetriever -from langchain_core.vectorstores import VectorStore +from langchain_core.vectorstores.base import VectorStore from pydantic import ConfigDict, computed_field, model_validator from typing_extensions import Self -from langchain_graph_retriever._traversal import Traversal -from langchain_graph_retriever.adapters.base import Adapter +from langchain_graph_retriever._conversion import node_to_doc from langchain_graph_retriever.adapters.inference import infer_adapter -from langchain_graph_retriever.edges.metadata import EdgeSpec -from langchain_graph_retriever.strategies import Eager, Strategy -from langchain_graph_retriever.types import EdgeFunction # this class uses pydantic, so store must be provided at init time. @@ -25,31 +26,21 @@ class GraphRetriever(BaseRetriever): """ Retriever combining vector search and graph traversal. - The `GraphRetriever` class performs retrieval by first using vector search to find - relevant documents, and then applying graph traversal to explore connected - documents. It supports multiple traversal strategies and integrates seamlessly - with LangChain's retriever framework. - - Parameters - ---------- - store : Adapter | VectorStore - The vector store or adapter used for document retrieval. - edges : list[EdgeSpec] | EdgeFunction, default [] - Function to use for extracting edges from nodes. May be passed a list of - arguments to construct a `MetadataEdgeFunction` from, or an - `EdgeFunction`. - strategy : Strategy, default Eager() - The traversal strategy to use. - Defaults to an `Eager` (breadth-first) strategy which explores - the top `adjacent_k` for each edge. + The [GraphRetriever][langchain_graph_retriever.GraphRetriever] class + retrieves documents by first performing a vector search to identify relevant + documents, followed by graph traversal to explore their connections. It + supports multiple traversal strategies and integrates seamlessly with + LangChain's retriever framework. Attributes ---------- - store : Adapter | VectorStore - The vector store or adapter used for document retrieval. - edges : list[str | tuple[str, str | Id]] | EdgeFunction - Definitions of edges used for graph traversal. - strategy : Strategy + store : + The adapter or vector store used for document retrieval. + edges : + A list of [EdgeSpec][graph_retriever.edges.EdgeSpec] for use in creating a + [MetadataEdgeFunction][graph_retriever.edges.MetadataEdgeFunction], + or an [EdgeFunction][graph_retriever.edges.EdgeFunction]. + strategy : The traversal strategy to use. """ @@ -64,21 +55,21 @@ class GraphRetriever(BaseRetriever): @model_validator(mode="after") def apply_extra(self) -> Self: """ - Apply extra configuration to the traversal strpategy. + Apply extra configuration to the traversal strategy. - This method captures additional fields provided in `model_extra` and applies - them to the current traversal strategy. Any extra fields are cleared after - they are applied. + This method captures additional fields provided in the `model_extra` argument + and applies them to the current traversal strategy. Any extra fields are + cleared after they are applied. Returns ------- - Self - The updated `GraphRetriever` instance. + : + The updated GraphRetriever instance. """ if self.model_extra: - self.strategy = self.strategy.model_validate( - {**self.strategy.model_dump(), **self.model_extra} - ) + if "k" in self.model_extra: + self.model_extra["select_k"] = self.model_extra.pop("k") + self.strategy = dataclasses.replace(self.strategy, **self.model_extra) self.model_extra.clear() return self @@ -104,25 +95,29 @@ def _get_relevant_documents( This method first retrieves documents based on similarity to the query, and then applies a traversal strategy to explore connected nodes in the graph. + Notes + ----- + You can execute this method by calling `.invoke()` on the retriever. + Parameters ---------- - query : str + query : The query string to search for. - edges : list[EdgeSpec] | EdgeFunction, optional - Optional edge definitions for this retrieval. - initial_roots : Sequence[str] + edges : + Override edge definitions for this invocation. + initial_roots : Document IDs to use as initial roots. The top `adjacent_k` nodes connected to each root are included in the initial candidates. - filter : dict[str, Any], optional + filter : Optional metadata filter to apply. - store_kwargs : dict[str, Any], optional + store_kwargs : Additional keyword arguments for the store. - **kwargs : Any + kwargs : Additional arguments for configuring the traversal strategy. Returns ------- - list[Document] + : The retrieved documents. Raises @@ -134,7 +129,7 @@ def _get_relevant_documents( if edges is None: raise ValueError("'edges' must be provided in this call or the constructor") - traversal = Traversal( + nodes = traverse( query=query, edges=edges, strategy=Strategy.build(base_strategy=self.strategy, **kwargs), @@ -143,8 +138,7 @@ def _get_relevant_documents( initial_root_ids=initial_roots, store_kwargs=store_kwargs, ) - - return traversal.traverse() + return [node_to_doc(n) for n in nodes] async def _aget_relevant_documents( self, @@ -162,25 +156,29 @@ async def _aget_relevant_documents( This method first retrieves documents based on similarity to the query, and then applies a traversal strategy to explore connected nodes in the graph. + Notes + ----- + You can execute this method by calling `.ainvoke()` on the retriever. + Parameters ---------- - query : str + query : The query string to search for. - edges : list[EdgeSpec] | EdgeFunction, optional + edges : Override edge definitions for this invocation. - initial_roots : Sequence[str] + initial_roots : Document IDs to use as initial roots. The top `adjacent_k` nodes connected to each root are included in the initial candidates. - filter : dict[str, Any], optional + filter : Optional metadata filter to apply. - store_kwargs : dict[str, Any], optional + store_kwargs : Additional keyword arguments for the store. - **kwargs : Any + kwargs : Additional arguments for configuring the traversal strategy. Returns ------- - list[Document] + : The retrieved documents. Raises @@ -191,7 +189,7 @@ async def _aget_relevant_documents( edges = edges or self.edges if edges is None: raise ValueError("'edges' must be provided in this call or the constructor") - traversal = Traversal( + nodes = await atraverse( query=query, edges=edges, strategy=Strategy.build(base_strategy=self.strategy, **kwargs), @@ -200,5 +198,4 @@ async def _aget_relevant_documents( initial_root_ids=initial_roots, store_kwargs=store_kwargs, ) - - return await traversal.atraverse() + return [node_to_doc(n) for n in nodes] diff --git a/packages/langchain-graph-retriever/src/langchain_graph_retriever/strategies/__init__.py b/packages/langchain-graph-retriever/src/langchain_graph_retriever/strategies/__init__.py deleted file mode 100644 index 8f480c13..00000000 --- a/packages/langchain-graph-retriever/src/langchain_graph_retriever/strategies/__init__.py +++ /dev/null @@ -1,11 +0,0 @@ -"""Provide traversal strategies guiding which nodes are selected.""" - -from .base import Strategy -from .eager import Eager -from .mmr import Mmr - -__all__ = [ - "Strategy", - "Eager", - "Mmr", -] diff --git a/packages/langchain-graph-retriever/src/langchain_graph_retriever/strategies/base.py b/packages/langchain-graph-retriever/src/langchain_graph_retriever/strategies/base.py deleted file mode 100644 index c57f6ce9..00000000 --- a/packages/langchain-graph-retriever/src/langchain_graph_retriever/strategies/base.py +++ /dev/null @@ -1,190 +0,0 @@ -"""Define the base traversal strategy.""" - -from __future__ import annotations - -import abc -import warnings -from collections.abc import Iterable -from typing import Any - -from pydantic import BaseModel - -from langchain_graph_retriever.types import Node - - -class Strategy(BaseModel, abc.ABC): - """ - Interface for configuring node selection and traversal strategies. - - This base class defines how nodes are selected, traversed, and finalized during - a graph traversal. Implementations can customize behaviors like limiting the depth - of traversal, scoring nodes, or selecting the next set of nodes for exploration. - - Parameters - ---------- - k : int, default 5 - Maximum number of nodes to retrieve during traversal. - start_k : int, default 4 - Number of documents to fetch via similarity for starting the traversal. - Added to any initial roots provided to the traversal. - adjacent_k : int, default 10 - Number of documents to fetch for each outgoing edge. - max_depth : int, optional - Maximum traversal depth. If `None`, there is no limit. - - Attributes - ---------- - k : int - Maximum number of nodes to retrieve during traversal. - start_k : int - Number of documents to fetch via similarity for starting the traversal. - Added to any initial roots provided to the traversal. - adjacent_k : int - Number of documents to fetch for each outgoing edge. - max_depth : int - Maximum traversal depth. If `None`, there is no limit. - """ - - k: int = 5 - start_k: int = 4 - adjacent_k: int = 10 - max_depth: int | None = None - - _query_embedding: list[float] = [] - - @abc.abstractmethod - def discover_nodes(self, nodes: dict[str, Node]) -> None: - """ - Add discovered nodes to the strategy. - - This method updates the strategy's state with nodes discovered during - the traversal process. - - Parameters - ---------- - nodes : dict[str, Node] - Discovered nodes keyed by their IDs. - """ - ... - - @abc.abstractmethod - def select_nodes(self, *, limit: int) -> Iterable[Node]: - """ - Select discovered nodes to visit in the next iteration. - - This method determines which nodes will be traversed next. If it returns - an empty list, traversal ends even if fewer than `k` nodes have been selected. - - Parameters - ---------- - limit : - Maximum number of nodes to select. - - Returns - ------- - Iterable[Node] - Selected nodes for the next iteration. Traversal ends if this is empty. - """ - ... - - def finalize_nodes(self, nodes: Iterable[Node]) -> Iterable[Node]: - """ - Finalize the selected nodes. - - This method is called before returning the final set of nodes. - - Parameters - ---------- - nodes : Iterable[Node] - Nodes selected for finalization. - - Returns - ------- - Iterable[Node] - Finalized nodes. - """ - return nodes - - @staticmethod - def build( - base_strategy: Strategy, - **kwargs: Any, - ) -> Strategy: - """ - Build a strategy for a retrieval operation. - - Combines a base strategy with any provided keyword arguments to - create a customized traversal strategy. - - Parameters - ---------- - base_strategy : Strategy - The base strategy to start with. - **kwargs : Any - Additional configuration options for the strategy. - - Returns - ------- - Strategy - A configured strategy instance. - - Raises - ------ - ValueError - If 'strategy' is set incorrectly or extra arguments are invalid. - """ - # Check if there is a new strategy to use. Otherwise, use the base. - strategy: Strategy - if "strategy" in kwargs: - if next(iter(kwargs.keys())) != "strategy": - raise ValueError("Error: 'strategy' must be set before other args.") - strategy = kwargs.pop("strategy") - if not isinstance(strategy, Strategy): - raise ValueError( - f"Unsupported 'strategy' type {type(strategy).__name__}." - " Must be a sub-class of Strategy" - ) - elif base_strategy is not None: - strategy = base_strategy - else: - raise ValueError("'strategy' must be set in `__init__` or invocation") - - # Warn if any of the kwargs don't exist in the strategy. - # Note: We could rely on Pydantic with forbidden extra arguments to - # handle this, however the experience isn't as nice (Validation error - # rather than warning, no indication of which field, etc.). - assert strategy is not None - invalid_keys = _invalid_keys(strategy, kwargs) - if invalid_keys is not None: - warnings.warn(f"Unsupported key(s) {invalid_keys} set.") - - # Apply the kwargs to update the strategy. - # This uses `model_validate` rather than `model_copy`` to re-apply validation. - strategy = strategy.model_validate( - {**strategy.model_dump(), **kwargs}, - ) - - return strategy - - -def _invalid_keys(model: BaseModel, dict: dict[str, Any]) -> str | None: - """ - Identify invalid keys in the given dictionary for a Pydantic model. - - Parameters - ---------- - model : BaseModel - The Pydantic model to validate against. - dict : dict[str, Any] - The dictionary to check. - - Returns - ------- - str | None - A comma-separated string of invalid keys, if any. - """ - invalid_keys = set(dict.keys()) - set(model.model_fields.keys()) - if invalid_keys: - return ", ".join([f"'{k}'" for k in invalid_keys]) - else: - return None diff --git a/packages/langchain-graph-retriever/src/langchain_graph_retriever/strategies/eager.py b/packages/langchain-graph-retriever/src/langchain_graph_retriever/strategies/eager.py deleted file mode 100644 index 55f7ae4d..00000000 --- a/packages/langchain-graph-retriever/src/langchain_graph_retriever/strategies/eager.py +++ /dev/null @@ -1,55 +0,0 @@ -"""Provide eager (breadth-first) traversal strategy.""" - -from collections.abc import Iterable - -from typing_extensions import override - -from langchain_graph_retriever.strategies.base import Strategy -from langchain_graph_retriever.types import Node - - -class Eager(Strategy): - """ - Eager traversal strategy (breadth-first). - - This strategy selects all discovered nodes at each traversal step. It ensures - breadth-first traversal by processing nodes layer by layer, which is useful for - scenarios where all nodes at the current depth should be explored before proceeding - to the next depth. - - Parameters - ---------- - k : int, default 5 - Maximum number of nodes to retrieve during traversal. - start_k : int, default 4 - Number of documents to fetch via similarity for starting the traversal. - Added to any initial roots provided to the traversal. - adjacent_k : int, default 10 - Number of documents to fetch for each outgoing edge. - max_depth : int, optional - Maximum traversal depth. If `None`, there is no limit. - - Attributes - ---------- - k : int - Maximum number of nodes to retrieve during traversal. - start_k : int - Number of documents to fetch via similarity for starting the traversal. - Added to any initial roots provided to the traversal. - adjacent_k : int - Number of documents to fetch for each outgoing edge. - max_depth : int - Maximum traversal depth. If `None`, there is no limit. - """ - - _nodes: list[Node] = [] - - @override - def discover_nodes(self, nodes: dict[str, Node]) -> None: - self._nodes.extend(nodes.values()) - - @override - def select_nodes(self, *, limit: int) -> Iterable[Node]: - nodes = self._nodes[:limit] - self._nodes = [] - return nodes diff --git a/packages/langchain-graph-retriever/src/langchain_graph_retriever/transformers/__init__.py b/packages/langchain-graph-retriever/src/langchain_graph_retriever/transformers/__init__.py new file mode 100644 index 00000000..39da5a6f --- /dev/null +++ b/packages/langchain-graph-retriever/src/langchain_graph_retriever/transformers/__init__.py @@ -0,0 +1,17 @@ +""" +Package containing useful Document Transformers. + +Many of these add metadata that could be useful for linking content, such as +extracting named entities or keywords from the page content. + +Also includes a transformer for shredding metadata, for use with stores +that do not support querying on elements of lists. +""" + +from .parent import ParentTransformer +from .shredding import ShreddingTransformer + +__all__ = [ + "ShreddingTransformer", + "ParentTransformer", +] diff --git a/packages/langchain-graph-retriever/src/langchain_graph_retriever/transformers/gliner.py b/packages/langchain-graph-retriever/src/langchain_graph_retriever/transformers/gliner.py new file mode 100644 index 00000000..cd25aad5 --- /dev/null +++ b/packages/langchain-graph-retriever/src/langchain_graph_retriever/transformers/gliner.py @@ -0,0 +1,96 @@ +from collections.abc import Sequence +from typing import Any + +from gliner import GLiNER # type: ignore +from langchain_core.documents import BaseDocumentTransformer, Document +from typing_extensions import override + + +class GLiNERTransformer(BaseDocumentTransformer): + """ + Add metadata to documents about named entities using **GLiNER**. + + Extracts structured entity labels from text, identifying key attributes and + categories to enrich document metadata with semantic information. + + [**GLiNER**](https://github.com/urchade/GLiNER) is a Named Entity + Recognition (NER) model capable of identifying any entity type using a + bidirectional transformer encoder (BERT-like). + + Prerequisites + ------------- + + This transformer requires the `gliner` extra to be installed. + + ``` + pip install -qU langchain_graph_retriever[gliner] + ``` + + Example + ------- + An example of how to use this transformer exists + [HERE](../../guide/transformers.md#glinertransformer) in the guide. + + + Parameters + ---------- + labels : + List of entity kinds to extract. + batch_size : + The number of documents to process in each batch. + metadata_key_prefix : + A prefix to add to metadata keys outputted by the extractor. + This will be prepended to the label, with the value (or values) holding the + generated keywords for that entity kind. + model : + The GLiNER model to use. Pass the name of a model to load or + pass an instantiated GLiNER model instance. + + """ # noqa: E501 + + def __init__( + self, + labels: list[str], + *, + batch_size: int = 8, + metadata_key_prefix: str = "", + model: str | GLiNER = "urchade/gliner_mediumv2.1", + ): + if isinstance(model, GLiNER): + self._model = model + elif isinstance(model, str): + self._model = GLiNER.from_pretrained(model) + else: + raise ValueError(f"Invalid model: {model}") + + self._batch_size = batch_size + self._labels = labels + self.metadata_key_prefix = metadata_key_prefix + + @override + def transform_documents( + self, documents: Sequence[Document], **kwargs: Any + ) -> Sequence[Document]: + results: list[Document] = [] + for i in range(0, len(documents), self._batch_size): + batch = documents[i : i + self._batch_size] + texts = [item.page_content for item in batch] + extracted = self._model.batch_predict_entities( + texts=texts, labels=self._labels, **kwargs + ) + for j, entities in enumerate(extracted): + new_metadata: dict[str, Any] = {} + for entity in entities: + label = self.metadata_key_prefix + entity["label"] + new_metadata.setdefault(label, set()).add(entity["text"].lower()) + + result = Document( + id=batch[j].id, + page_content=batch[j].page_content, + metadata=batch[j].metadata.copy(), + ) + for key in new_metadata.keys(): + result.metadata[key] = list(new_metadata[key]) + + results.append(result) + return results diff --git a/packages/langchain-graph-retriever/src/langchain_graph_retriever/transformers/html.py b/packages/langchain-graph-retriever/src/langchain_graph_retriever/transformers/html.py new file mode 100644 index 00000000..89b7a751 --- /dev/null +++ b/packages/langchain-graph-retriever/src/langchain_graph_retriever/transformers/html.py @@ -0,0 +1,130 @@ +from __future__ import annotations + +from collections.abc import Sequence +from typing import Any +from urllib.parse import urldefrag, urljoin, urlparse + +from bs4 import BeautifulSoup +from bs4.element import Tag +from langchain_core.documents import BaseDocumentTransformer, Document +from typing_extensions import override + + +class HyperlinkTransformer(BaseDocumentTransformer): + """ + Extracts hyperlinks from HTML content and stores them in document metadata. + + Prerequisites + ------------- + + This transformer requires the `html` extra to be installed. + + ``` + pip install -qU langchain_graph_retriever[html] + ``` + + Example + ------- + An example of how to use this transformer exists + [HERE](../../guide/transformers.md#hyperlinktransformer) in the guide. + + Parameters + ---------- + url_metadata_key : + The metadata field containing the URL of the document. Must be set + before transforming. Needed to resolve relative paths. + metadata_key : + The metadata field to populate with documents linked from this content. + drop_fragments : + Whether fragments in URLs and links should be dropped. + + Notes + ----- + Expects each document to contain its _URL_ in its metadata. + + """ # noqa: E501 + + def __init__( + self, + *, + url_metadata_key: str = "url", + metadata_key: str = "hyperlink", + drop_fragments: bool = True, + ): + self._url_metadata_key = url_metadata_key + self._metadata_key = metadata_key + self._drop_fragments = drop_fragments + + @staticmethod + def _parse_url(link: Tag, page_url: str, drop_fragments: bool = True) -> str | None: + href = link.get("href") + if href is None: + return None + if isinstance(href, list) and len(href) == 1: + href = href[0] + if not isinstance(href, str): + return None + + url = urlparse(href) + if url.scheme not in ["http", "https", ""]: + return None + + # Join the HREF with the page_url to convert relative paths to absolute. + joined_url = str(urljoin(page_url, href)) + + # Fragments would be useful if we chunked a page based on section. + # Then, each chunk would have a different URL based on the fragment. + # Since we aren't doing that yet, they just "break" links. So, drop + # the fragment. + if drop_fragments: + return urldefrag(joined_url).url + return joined_url + + @staticmethod + def _parse_urls( + soup: BeautifulSoup, page_url: str, drop_fragments: bool = True + ) -> list[str]: + soup_links: list[Tag] = soup.find_all("a") + urls: set[str] = set() + + for link in soup_links: + parsed_url = HyperlinkTransformer._parse_url( + link, page_url=page_url, drop_fragments=drop_fragments + ) + # Remove self links and entries for any 'a' tag that failed to parse + # (didn't have href, or invalid domain, etc.) + if parsed_url and parsed_url != page_url: + urls.add(parsed_url) + + return list(urls) + + @override + def transform_documents( + self, documents: Sequence[Document], **kwargs: Any + ) -> Sequence[Document]: + results: list[Document] = [] + for document in documents: + if self._url_metadata_key not in document.metadata: + msg = ( + f"html document url not found in metadata at " + f"{self._url_metadata_key} for document id: {document.id}" + ) + raise ValueError(msg) + + page_url = document.metadata[self._url_metadata_key] + if self._drop_fragments: + page_url = urldefrag(page_url).url + + soup = BeautifulSoup(document.page_content, "html.parser", **kwargs) + parsed_urls = self._parse_urls( + soup=soup, page_url=page_url, drop_fragments=self._drop_fragments + ) + + results.append( + Document( + id=document.id, + page_content=document.page_content, + metadata={self._metadata_key: parsed_urls, **document.metadata}, + ) + ) + return results diff --git a/packages/langchain-graph-retriever/src/langchain_graph_retriever/transformers/keybert.py b/packages/langchain-graph-retriever/src/langchain_graph_retriever/transformers/keybert.py new file mode 100644 index 00000000..719b3728 --- /dev/null +++ b/packages/langchain-graph-retriever/src/langchain_graph_retriever/transformers/keybert.py @@ -0,0 +1,92 @@ +from collections.abc import Sequence +from typing import Any + +from keybert import KeyBERT # type: ignore +from langchain_core.documents import BaseDocumentTransformer, Document +from typing_extensions import override + + +class KeyBERTTransformer(BaseDocumentTransformer): + """ + Add metadata to documents about keywords using **KeyBERT**. + + Extracts key topics and concepts from text, generating metadata that highlights + the most relevant terms to describe the content. + + [**KeyBERT**](https://maartengr.github.io/KeyBERT) is a minimal and easy-to-use + keyword extraction technique that leverages BERT embeddings to create keywords and + keyphrases that are most similar to a document. + + Prerequisites + ------------- + + This transformer requires the `keybert` extra to be installed. + + ``` + pip install -qU langchain_graph_retriever[keybert] + ``` + + Example + ------- + An example of how to use this transformer exists + [HERE](../../guide/transformers.md#keyberttransformer) in the guide. + + Parameters + ---------- + batch_size + The number of documents to process in each batch. + metadata_key : + The name of the key used in the metadata output. + model : + The KeyBERT model to use. Pass the name of a model to load + or pass an instantiated KeyBERT model instance. + """ + + def __init__( + self, + *, + batch_size: int = 8, + metadata_key: str = "keywords", + model: str | KeyBERT = "all-MiniLM-L6-v2", + ): + if isinstance(model, KeyBERT): + self._kw_model = model + elif isinstance(model, str): + self._kw_model = KeyBERT(model=model) + else: + raise ValueError(f"Invalid model: {model}") + self._batch_size = batch_size + self._metadata_key = metadata_key + + def _extract_keywords( + self, docs: list[str], **kwargs + ) -> list[list[tuple[str, float]]]: + """Wrap the function to always return a list of responses.""" + extracted = self._kw_model.extract_keywords(docs=docs, **kwargs) + if len(docs) == 1: + # Even if we pass a list, if it contains one item, keybert will flatten it. + return [extracted] + else: + return extracted + + @override + def transform_documents( + self, documents: Sequence[Document], **kwargs: Any + ) -> Sequence[Document]: + results: list[Document] = [] + for i in range(0, len(documents), self._batch_size): + batch = documents[i : i + self._batch_size] + texts = [item.page_content for item in batch] + extracted = self._extract_keywords(docs=texts, **kwargs) + for j, keywords in enumerate(extracted): + results.append( + Document( + id=batch[j].id, + page_content=batch[j].page_content, + metadata={ + self._metadata_key: [kw[0] for kw in keywords], + **batch[j].metadata, + }, + ) + ) + return results diff --git a/packages/langchain-graph-retriever/src/langchain_graph_retriever/transformers/parent.py b/packages/langchain-graph-retriever/src/langchain_graph_retriever/transformers/parent.py new file mode 100644 index 00000000..a3d0c4c9 --- /dev/null +++ b/packages/langchain-graph-retriever/src/langchain_graph_retriever/transformers/parent.py @@ -0,0 +1,63 @@ +from collections.abc import Sequence +from typing import Any + +from langchain_core.documents import BaseDocumentTransformer, Document +from typing_extensions import override + + +class ParentTransformer(BaseDocumentTransformer): + """ + Adds the hierarchal Parent path to the document metadata. + + Parameters + ---------- + path_metadata_key : + Metadata key containing the path. + This may correspond to paths in a file system, hierarchy in a document, etc. + parent_metadata_key: + Metadata key for the added parent path + path_delimiter : + Delimiter of items in the path. + + Example + ------- + An example of how to use this transformer exists + [HERE](../../guide/transformers.md#parenttransformer) in the guide. + + Notes + ----- + Expects each document to contain its _path_ in its metadata. + """ + + def __init__( + self, + *, + path_metadata_key: str = "path", + parent_metadata_key: str = "parent", + path_delimiter: str = "\\", + ): + self._path_metadata_key = path_metadata_key + self._parent_metadata_key = parent_metadata_key + self._path_delimiter = path_delimiter + + @override + def transform_documents( + self, documents: Sequence[Document], **kwargs: Any + ) -> Sequence[Document]: + results: list[Document] = [] + for document in documents: + if self._path_metadata_key not in document.metadata: + msg = ( + f"path not found in metadata at {self._path_metadata_key}", + f" for document id: {document.id}", + ) + raise ValueError(msg) + + path: str = document.metadata[self._path_metadata_key] + path_parts = path.split(self._path_delimiter) + result = document.model_copy() + if len(path_parts) > 1: + parent_path = self._path_delimiter.join(path_parts[0:-1]) + result.metadata[self._parent_metadata_key] = parent_path + results.append(result) + return results diff --git a/packages/langchain-graph-retriever/src/langchain_graph_retriever/transformers/shredding.py b/packages/langchain-graph-retriever/src/langchain_graph_retriever/transformers/shredding.py new file mode 100644 index 00000000..c18c34bf --- /dev/null +++ b/packages/langchain-graph-retriever/src/langchain_graph_retriever/transformers/shredding.py @@ -0,0 +1,152 @@ +"""Shredding Transformer for sequence-based metadata fields.""" + +import json +from collections.abc import Sequence +from typing import Any + +from langchain_core.documents import BaseDocumentTransformer, Document +from typing_extensions import override + +SHREDDED_KEYS_KEY = "__shredded_keys" +DEFAULT_PATH_DELIMITER = "→" # unicode 2192 +DEFAULT_STATIC_VALUE = "§" # unicode 00A7 + + +class ShreddingTransformer(BaseDocumentTransformer): + """ + Shreds sequence-based metadata fields. + + Certain vector stores do not support storing or searching on metadata fields + with sequence-based values. This transformer converts sequence-based fields + into simple metadata values. + + Example + ------- + An example of how to use this transformer exists + [HERE](../../guide/transformers.md#shreddingtransformer) in the guide. + + Parameters + ---------- + keys : + A set of metadata keys to shred. + If empty, all sequence-based fields will be shredded. + path_delimiter : + The path delimiter to use when building shredded keys. + static_value : + The value to set on each shredded key. + """ + + def __init__( + self, + *, + keys: set[str] = set(), + path_delimiter: str = DEFAULT_PATH_DELIMITER, + static_value: Any = DEFAULT_STATIC_VALUE, + ): + self.keys = keys + self.path_delimiter = path_delimiter + self.static_value = static_value + + @override + def transform_documents( + self, documents: Sequence[Document], **kwargs: Any + ) -> Sequence[Document]: + transformed_docs = [] + for document in documents: + new_doc = Document(id=document.id, page_content=document.page_content) + shredded_keys: list[str] = [] + for key, value in document.metadata.items(): + is_nested_sequence = isinstance(value, Sequence) and not isinstance( + value, str | bytes + ) + should_shred = (not self.keys) or (key in self.keys) + if is_nested_sequence and should_shred: + shredded_keys.append(key) + for item in value: + new_doc.metadata[self.shredded_key(key=key, value=item)] = ( + self.shredded_value() + ) + else: + new_doc.metadata[key] = value + if len(shredded_keys) > 0: + new_doc.metadata[SHREDDED_KEYS_KEY] = json.dumps(shredded_keys) + transformed_docs.append(new_doc) + + return transformed_docs + + def restore_documents( + self, documents: Sequence[Document], **kwargs: Any + ) -> Sequence[Document]: + """ + Restore documents transformed by the ShreddingTransformer. + + Restore documents transformed by the ShreddingTransformer back to + their original state before shredding. + + Note that any non-string values inside lists will be converted to strings + after restoring. + + Args: + documents: A sequence of Documents to be transformed. + + Returns + ------- + Sequence[Document] + A sequence of transformed Documents. + """ + restored_docs = [] + for document in documents: + new_doc = Document(id=document.id, page_content=document.page_content) + shredded_keys = set( + json.loads(document.metadata.pop(SHREDDED_KEYS_KEY, "[]")) + ) + + for key, value in document.metadata.items(): + # Check if the key belongs to a shredded group + split_key = key.split(self.path_delimiter, 1) + if ( + len(split_key) == 2 + and split_key[0] in shredded_keys + and value == self.static_value + ): + original_key, original_value = split_key + value = json.loads(original_value) + if original_key not in new_doc.metadata: + new_doc.metadata[original_key] = [] + new_doc.metadata[original_key].append(value) + else: + # Retain non-shredded metadata as is + new_doc.metadata[key] = value + + restored_docs.append(new_doc) + + return restored_docs + + def shredded_key(self, key: str, value: Any) -> str: + """ + Get the shredded key for a key/value pair. + + Parameters + ---------- + key : + The metadata key to shred + value : + The metadata value to shred + + Returns + ------- + str + the shredded key + """ + return f"{key}{self.path_delimiter}{json.dumps(value)}" + + def shredded_value(self) -> str: + """ + Get the shredded value for a key/value pair. + + Returns + ------- + str + the shredded value + """ + return self.static_value diff --git a/packages/langchain-graph-retriever/src/langchain_graph_retriever/transformers/spacy.py b/packages/langchain-graph-retriever/src/langchain_graph_retriever/transformers/spacy.py new file mode 100644 index 00000000..04bebb1c --- /dev/null +++ b/packages/langchain-graph-retriever/src/langchain_graph_retriever/transformers/spacy.py @@ -0,0 +1,106 @@ +from collections.abc import Sequence +from typing import Any + +import spacy # type: ignore +from langchain_core.documents import BaseDocumentTransformer, Document +from spacy.language import Language # type: ignore +from typing_extensions import override + + +class SpacyNERTransformer(BaseDocumentTransformer): + """ + Add metadata to documents about named entities using **spaCy**. + + Identifies and labels named entities in text, extracting structured + metadata such as organizations, locations, dates, and other key entity types. + + [**spaCy**](https://spacy.io/) is a library for Natural Language Processing + in Python. Here it is used for Named Entity Recognition (NER) to extract values + from document content. + + Prerequisites + ------------- + + This transformer requires the `spacy` extra to be installed. + + ``` + pip install -qU langchain_graph_retriever[spacy] + ``` + + Example + ------- + An example of how to use this transformer exists + [HERE](../../guide/transformers.md#spacynertransformer) in the guide. + + Parameters + ---------- + include_labels : + Set of entity labels to include. Will include all labels if empty. + exclude_labels : + Set of entity labels to exclude. Will not exclude anything if empty. + metadata_key : + The metadata key to store the extracted entities in. + model : + The spaCy model to use. Pass the name of a model to load + or pass an instantiated spaCy model instance. + + Notes + ----- + See spaCy docs for the selected model to determine what NER labels will be + used. The default model + [en_core_web_sm](https://spacy.io/models/en#en_core_web_sm-labels) produces: + CARDINAL, DATE, EVENT, FAC, GPE, LANGUAGE, LAW, LOC, MONEY, NORP, ORDINAL, + ORG, PERCENT, PERSON, PRODUCT, QUANTITY, TIME, WORK_OF_ART. + + """ + + def __init__( + self, + *, + include_labels: set[str] = set(), + exclude_labels: set[str] = set(), + limit: int | None = None, + metadata_key: str = "entities", + model: str | Language = "en_core_web_sm", + ): + self.include_labels = include_labels + self.exclude_labels = exclude_labels + self.limit = limit + self.metadata_key = metadata_key + + if isinstance(model, str): + if not spacy.util.is_package(model): + spacy.cli.download(model) # type: ignore + self.model = spacy.load(model) + elif isinstance(model, Language): + self.model = model + else: + raise ValueError(f"Invalid model: {model}") + + @override + def transform_documents( + self, documents: Sequence[Document], **kwargs: Any + ) -> Sequence[Document]: + docs = [] + for doc in documents: + results = self.model(doc.page_content).ents + # Filter and de-duplicate entities. + entities = list( + { + f"{e.label_}: {e.text}" + for e in results + if not self.include_labels or e.label_ in self.include_labels + if not self.exclude_labels or e.label_ not in self.exclude_labels + } + ) + # Limit it, if necessary. + if self.limit: + entities = entities[: self.limit] + docs.append( + Document( + id=doc.id, + page_content=doc.page_content, + metadata={self.metadata_key: entities, **doc.metadata}, + ) + ) + return docs diff --git a/packages/langchain-graph-retriever/src/langchain_graph_retriever/types.py b/packages/langchain-graph-retriever/src/langchain_graph_retriever/types.py deleted file mode 100644 index 1597d49d..00000000 --- a/packages/langchain-graph-retriever/src/langchain_graph_retriever/types.py +++ /dev/null @@ -1,116 +0,0 @@ -"""Defines the `Node` class used during graph traversal.""" - -from __future__ import annotations - -import abc -from collections.abc import Callable -from dataclasses import dataclass, field -from typing import Any, TypeAlias - - -@dataclass -class Node: - """ - Represents a node in the traversal graph. - - The `Node` class contains information about a document during graph traversal, - including its depth, embedding, edges, and metadata. - - Attributes - ---------- - id : str - The unique identifier of the document represented by this node. - depth : int - The depth (number of edges) through which this node was discovered. This - depth may not reflect the true depth in the full graph if only a subset - of edges is retrieved. - embedding : list[float]) - The embedding vector of the document, used for similarity calculations. - metadata : dict[str, Any] - Metadata from the original document. This is a reference to the original - document metadata and should not be modified directly. Any updates to - metadata should be made to `extra_metadata`. - extra_metadata : dict[str, Any] - Additional metadata to override or augment the original document - metadata during traversal. - """ - - id: str - depth: int - embedding: list[float] - metadata: dict[str, Any] = field(default_factory=dict) - - incoming_edges: set[Edge] = field(default_factory=set) - outgoing_edges: set[Edge] = field(default_factory=set) - - extra_metadata: dict[str, Any] = field(default_factory=dict) - - -class Edge(abc.ABC): - """ - An edge identifies properties necessary for finding matching nodes. - - Sub-classes should be hashable. - """ - - pass - - -@dataclass(frozen=True) -class MetadataEdge(Edge): - """ - Link to nodes with specific metadata. - - A `MetadataEdge` defines nodes with `node.metadata[field] == value` or - `node.metadata[field] CONTAINS value` (if the metadata is a collection). - - Attributes - ---------- - incoming_field : str - The name of the metadata field storing incoming edges. - value : Any - The value associated with the key for this edge - """ - - incoming_field: str - value: Any - - -@dataclass(frozen=True) -class IdEdge(Edge): - """ - Nodes with `node.id == id`. - - Attributes - ---------- - id : str - The ID of the node to link to. - """ - - id: str - - -@dataclass -class Edges: - """ - Information about the incoming and outgoing edges. - - Attributes - ---------- - incoming : set[Edge] - Incoming edges that link to this node. - outgoing : set[Edge] - Edges that this node link to. These edges should be defined in terms of - the *incoming* `Edge` they match. For instance, a link from "mentions" - to "id" would link to `IdEdge(...)`. - """ - - incoming: set[Edge] - outgoing: set[Edge] - - -EdgeFunction: TypeAlias = Callable[[Node], Edges] -"""A function for extracting edges from nodes. - -Implementations should be deterministic. -""" diff --git a/packages/langchain-graph-retriever/tests/adapters/__init__.py b/packages/langchain-graph-retriever/tests/adapters/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/packages/langchain-graph-retriever/tests/adapters/test_astra.py b/packages/langchain-graph-retriever/tests/adapters/test_astra.py new file mode 100644 index 00000000..9de00227 --- /dev/null +++ b/packages/langchain-graph-retriever/tests/adapters/test_astra.py @@ -0,0 +1,277 @@ +import dataclasses +import os +import time +from collections.abc import Iterable, Iterator +from typing import Any + +import pytest +from graph_retriever.testing.adapter_tests import ( + AdapterComplianceCase, + AdapterComplianceSuite, +) +from langchain_core.documents import Document +from langchain_core.embeddings import Embeddings +from langchain_graph_retriever.adapters.astra import AstraAdapter, _metadata_queries +from typing_extensions import override + + +def create_metadata_queries( + user_filters: dict[str, Any], + metadata: dict[str, Iterable[Any]] = {}, +) -> list[dict[str, Any]]: + return list( + _metadata_queries( + user_filters=user_filters, + metadata=metadata, + ) + ) + + +def create_metadata_query( + user_filters: dict[str, Any], + metadata: dict[str, Iterable[Any]] = {}, +) -> dict[str, Any]: + queries = create_metadata_queries(user_filters=user_filters, metadata=metadata) + assert len(queries) == 1 + return queries[0] + + +def test_create_metadata_query_no_user() -> None: + assert create_metadata_queries({}, metadata={}) == [] + + assert create_metadata_query({}, metadata={"foo": [5]}) == {"foo": 5} + + assert create_metadata_query({}, metadata={"foo": [5, 6]}) == { + "foo": {"$in": [5, 6]} + } + + assert create_metadata_queries({}, metadata={"foo": [5], "bar": [7]}) == [ + {"foo": 5}, + {"bar": 7}, + ] + + assert create_metadata_queries( + {}, + metadata={"foo": [5, 6], "bar": [7, 8]}, + ) == [ + {"foo": {"$in": [5, 6]}}, + {"bar": {"$in": [7, 8]}}, + ] + + assert create_metadata_queries( + {}, metadata={"foo": list(range(0, 200)), "bar": [7]} + ) == [ + {"foo": {"$in": list(range(0, 100))}}, + {"foo": {"$in": list(range(100, 200))}}, + {"bar": 7}, + ] + + +def test_create_metadata_query_user() -> None: + USER = {"answer": 42} + assert create_metadata_queries(USER, metadata={}) == [] + assert create_metadata_queries(USER, metadata={"foo": []}) == [] + assert create_metadata_query(USER, metadata={"foo": [5]}) == { + "$and": [ + {"foo": 5}, + {"answer": 42}, + ], + } + + assert create_metadata_query(USER, metadata={"foo": [5, 6]}) == { + "$and": [ + {"foo": {"$in": [5, 6]}}, + {"answer": 42}, + ], + } + + assert create_metadata_queries(USER, metadata={"foo": [5], "bar": [7]}) == [ + { + "$and": [ + {"foo": 5}, + {"answer": 42}, + ], + }, + { + "$and": [ + {"bar": 7}, + {"answer": 42}, + ], + }, + ] + + assert create_metadata_queries( + USER, + metadata={"foo": [5, 6], "bar": [7, 8]}, + ) == [ + { + "$and": [ + {"foo": {"$in": [5, 6]}}, + {"answer": 42}, + ], + }, + { + "$and": [ + {"bar": {"$in": [7, 8]}}, + {"answer": 42}, + ], + }, + ] + + assert create_metadata_queries( + USER, metadata={"foo": list(range(0, 200)), "bar": [7, 8]} + ) == [ + { + "$and": [ + {"foo": {"$in": list(range(0, 100))}}, + {"answer": 42}, + ] + }, + { + "$and": [ + {"foo": {"$in": list(range(100, 200))}}, + {"answer": 42}, + ] + }, + { + "$and": [ + {"bar": {"$in": [7, 8]}}, + {"answer": 42}, + ] + }, + ] + + +@dataclasses.dataclass +class _AstraConfig: + token: str + keyspace: str + api_endpoint: str + + +@pytest.fixture(scope="module") +def astra_config(enabled_stores: set[str]) -> Iterator[_AstraConfig | None]: + if "astra" not in enabled_stores: + pytest.skip("Pass --stores=astra to test Astra") + return + + from astrapy import DataAPIClient + from dotenv import load_dotenv + + load_dotenv() + + token = os.environ["ASTRA_DB_APPLICATION_TOKEN"] + keyspace = os.environ.get("ASTRA_DB_KEYSPACE", "default_keyspace") + api_endpoint = os.environ["ASTRA_DB_API_ENDPOINT"] + + my_client = DataAPIClient(token=token) + admin = my_client.get_admin().get_database_admin(api_endpoint) + admin.create_keyspace(keyspace) + + # Sometimes the creation of the store fails because the keyspace isn't + # created yet. To avoid that, poll the list of keyspaces until we + # confirm it is created. + found = False + t_end = time.time() + 5 # run 5 seconds + while time.time() < t_end: + keyspaces = admin.list_keyspaces() + if keyspace in keyspaces: + found = True + break + + print(f"Waiting for keyspace '{keyspace}'...") # noqa: T201 + time.sleep(0.01) + + assert found, f"Keyspace '{keyspace}' not created" + yield _AstraConfig(token=token, keyspace=keyspace, api_endpoint=api_endpoint) + + if keyspace != "default_keyspace": + admin.drop_keyspace(keyspace) + + +class TestAstraAdapter(AdapterComplianceSuite): + @pytest.fixture(scope="class") + def adapter( + self, + animal_embeddings: Embeddings, + animal_docs: list[Document], + astra_config: _AstraConfig, + ) -> Iterator["AstraAdapter"]: + from langchain_astradb import AstraDBVectorStore + + store = AstraDBVectorStore( + embedding=animal_embeddings, + collection_name="animals", + namespace=astra_config.keyspace, + token=astra_config.token, + api_endpoint=astra_config.api_endpoint, + pre_delete_collection=True, + ) + store.add_documents(animal_docs) + + yield AstraAdapter(store) + + store.delete_collection() + + +VECTORIZE_EXPECTATION_OVERRIDES: dict[tuple[str, str], list[str]] = { + ("search", "basic"): ["alpaca", "cat", "chicken", "horse"], + ("asearch", "basic"): ["alpaca", "cat", "chicken", "horse"], + ("search_with_embedding", "basic"): ["alpaca", "cat", "chicken", "horse"], + ("asearch_with_embedding", "basic"): ["alpaca", "cat", "chicken", "horse"], + ("adjacent", "metadata_and_id"): [ + "cat", + "cobra", + "crocodile", + "gecko", + "iguana", + "lizard", + ], + ("aadjacent", "metadata_and_id"): [ + "cat", + "cobra", + "crocodile", + "gecko", + "iguana", + "lizard", + ], +} + + +class TestAstraVectorizeAdapter(AdapterComplianceSuite): + @override + def expected(self, method: str, case: AdapterComplianceCase) -> list[str]: + # Since vectorize currently requires a server-side embedding model, we + # need to change the expectations a little to reflect the embeddings + # that are actually computed. + return VECTORIZE_EXPECTATION_OVERRIDES.get((method, case.id), []) or ( + super().expected(method, case) + ) + + @pytest.fixture(scope="class") + def adapter( + self, + animal_docs: list[Document], + astra_config: _AstraConfig, + ) -> Iterator["AstraAdapter"]: + from astrapy.info import VectorServiceOptions + from langchain_astradb import AstraDBVectorStore + + service_options = VectorServiceOptions( + provider="nvidia", + model_name="NV-Embed-QA", + ) + + store = AstraDBVectorStore( + collection_name="animals_vectorize", + collection_vector_service_options=service_options, + namespace=astra_config.keyspace, + token=astra_config.token, + api_endpoint=astra_config.api_endpoint, + pre_delete_collection=True, + ) + store.add_documents(animal_docs) + + yield AstraAdapter(store) + + store.delete_collection() diff --git a/packages/langchain-graph-retriever/tests/adapters/test_cassandra.py b/packages/langchain-graph-retriever/tests/adapters/test_cassandra.py new file mode 100644 index 00000000..a5c2a645 --- /dev/null +++ b/packages/langchain-graph-retriever/tests/adapters/test_cassandra.py @@ -0,0 +1,89 @@ +import typing +from collections.abc import Iterator +from typing import Union + +import pytest +from graph_retriever.adapters import Adapter +from graph_retriever.testing.adapter_tests import AdapterComplianceSuite +from langchain_core.documents import Document +from langchain_core.embeddings import Embeddings +from langchain_graph_retriever.transformers import ShreddingTransformer + +if typing.TYPE_CHECKING: + from cassandra.cluster import Cluster # type: ignore + + +@pytest.fixture(scope="module") +def cluster( + request: pytest.FixtureRequest, enabled_stores: set[str], testcontainers: set[str] +) -> Iterator[Union["Cluster", None]]: + if "cassandra" not in enabled_stores: + pytest.skip("Pass --stores=cassandra to test Cassandra") + return + + import os + + from cassandra.cluster import Cluster # type: ignore + + if "cassandra" in testcontainers: + from testcontainers.cassandra import CassandraContainer # type: ignore + + container = CassandraContainer(image="cassandra:5.0.2") + container.start() + + request.addfinalizer(lambda: container.stop()) + contact_points = container.get_contact_points() + elif "CASSANDRA_CONTACT_POINTS" in os.environ: + contact_points = [ + cp.strip() + for cp in os.environ["CASSANDRA_CONTACT_POINTS"].split(",") + if cp.strip() + ] + else: + contact_points = None + + cluster = Cluster(contact_points) + yield cluster + cluster.shutdown() + + +class TestCassandraAdapter(AdapterComplianceSuite): + def supports_nested_metadata(self) -> bool: + return False + + @pytest.fixture(scope="class") + def adapter( + self, + cluster: "Cluster", + animal_embeddings: Embeddings, + animal_docs: list[Document], + ) -> Iterator[Adapter]: + from langchain_community.vectorstores.cassandra import Cassandra + from langchain_graph_retriever.adapters.cassandra import ( + CassandraAdapter, + ) + + session = cluster.connect() + + KEYSPACE = "graph_test_keyspace" + session.execute( + f"CREATE KEYSPACE IF NOT EXISTS {KEYSPACE}" + " WITH replication = " + "{'class': 'SimpleStrategy', 'replication_factor': 1}" + ) + + shredder = ShreddingTransformer() + session = cluster.connect() + session.execute(f"DROP TABLE IF EXISTS {KEYSPACE}.animals") + store = Cassandra( + embedding=animal_embeddings, + session=session, + keyspace=KEYSPACE, + table_name="animals", + ) + docs = list(shredder.transform_documents(animal_docs)) + store.add_documents(docs) + yield CassandraAdapter(store, shredder, {"keywords", "tags"}) + + if session: + session.shutdown() diff --git a/packages/langchain-graph-retriever/tests/adapters/test_chroma.py b/packages/langchain-graph-retriever/tests/adapters/test_chroma.py new file mode 100644 index 00000000..56fb03c4 --- /dev/null +++ b/packages/langchain-graph-retriever/tests/adapters/test_chroma.py @@ -0,0 +1,55 @@ +from collections.abc import Iterator + +import pytest +from graph_retriever.adapters import Adapter +from graph_retriever.testing.adapter_tests import AdapterComplianceSuite +from langchain_core.documents import Document +from langchain_core.embeddings import Embeddings +from langchain_graph_retriever.transformers import ShreddingTransformer + + +class TestChroma(AdapterComplianceSuite): + def supports_nested_metadata(self) -> bool: + return False + + @pytest.fixture(scope="class") + def adapter( + self, + enabled_stores: set[str], + animal_embeddings: Embeddings, + animal_docs: list[Document], + ) -> Iterator[Adapter]: + if "chroma" not in enabled_stores: + pytest.skip("Pass --stores=chroma to test Chroma") + + from langchain_chroma.vectorstores import Chroma + from langchain_graph_retriever.adapters.chroma import ( + ChromaAdapter, + ) + + shredder = ShreddingTransformer() + + # Chroma doesn't even support *writing* nested data currently, so we + # filter it out. + def remove_nested_metadata(doc: Document) -> Document: + metadata = doc.metadata.copy() + metadata.pop("nested", None) + return Document(id=doc.id, page_content=doc.page_content, metadata=metadata) + + animal_docs = [remove_nested_metadata(doc) for doc in animal_docs] + + docs = list(shredder.transform_documents(animal_docs)) + store = Chroma.from_documents( + docs, + animal_embeddings, + collection_name="animals", + # Use `cosine` metric for consistency with other systems. + # Default was L2. + collection_metadata={"hnsw:space": "cosine"}, + ) + + yield ChromaAdapter( + store, shredder, nested_metadata_fields={"keywords", "tags"} + ) + + store.delete_collection() diff --git a/packages/langchain-graph-retriever/tests/adapters/test_in_memory.py b/packages/langchain-graph-retriever/tests/adapters/test_in_memory.py new file mode 100644 index 00000000..065f226e --- /dev/null +++ b/packages/langchain-graph-retriever/tests/adapters/test_in_memory.py @@ -0,0 +1,25 @@ +import pytest +from graph_retriever.adapters import Adapter +from graph_retriever.testing.adapter_tests import AdapterComplianceSuite +from langchain_core.documents import Document +from langchain_core.embeddings import Embeddings +from langchain_core.vectorstores.in_memory import InMemoryVectorStore + + +class TestInMemory(AdapterComplianceSuite): + @pytest.fixture(scope="class") + def adapter( + self, + enabled_stores: set[str], + animal_embeddings: Embeddings, + animal_docs: list[Document], + ) -> Adapter: + if "mem" not in enabled_stores: + pytest.skip("Pass --stores=mem to test InMemory") + + from langchain_graph_retriever.adapters.in_memory import ( + InMemoryAdapter, + ) + + store = InMemoryVectorStore.from_documents(animal_docs, animal_embeddings) + return InMemoryAdapter(store) diff --git a/packages/langchain-graph-retriever/tests/adapters/test_inference.py b/packages/langchain-graph-retriever/tests/adapters/test_inference.py new file mode 100644 index 00000000..9081a885 --- /dev/null +++ b/packages/langchain-graph-retriever/tests/adapters/test_inference.py @@ -0,0 +1,75 @@ +from __future__ import annotations + +from typing import Any + +import langchain_astradb +import pytest +from graph_retriever.adapters import Adapter +from langchain_chroma import Chroma +from langchain_community.vectorstores.cassandra import Cassandra +from langchain_community.vectorstores.opensearch_vector_search import ( + OpenSearchVectorSearch, +) +from langchain_core.documents import Document +from langchain_core.embeddings import Embeddings, FakeEmbeddings +from langchain_core.vectorstores.in_memory import InMemoryVectorStore, VectorStore +from langchain_graph_retriever.adapters.astra import AstraAdapter +from langchain_graph_retriever.adapters.cassandra import CassandraAdapter +from langchain_graph_retriever.adapters.chroma import ChromaAdapter +from langchain_graph_retriever.adapters.inference import ( + _infer_adapter_name, + infer_adapter, +) +from langchain_graph_retriever.adapters.open_search import OpenSearchAdapter +from typing_extensions import override + + +def test_infer_in_memory(): + store = InMemoryVectorStore(FakeEmbeddings(size=4)) + adapter = infer_adapter(store) + assert isinstance(adapter, Adapter) + + +@pytest.mark.parametrize( + "cls,adapter_cls", + [ + (langchain_astradb.AstraDBVectorStore, AstraAdapter), + (Cassandra, CassandraAdapter), + (Chroma, ChromaAdapter), + (OpenSearchVectorSearch, OpenSearchAdapter), + ], +) +def test_infer_adapter_name(cls: type, adapter_cls: type) -> None: + module_name, class_name = _infer_adapter_name(cls) + assert module_name == adapter_cls.__module__ + assert class_name == adapter_cls.__name__ + + +class UnsupportedVectorStore(VectorStore): + @classmethod + @override + def from_texts( + cls, + texts: list[str], + embedding: Embeddings, + metadatas: list[dict[Any, Any]] | None = None, + *, + ids: list[str] | None = None, + **kwargs: Any, + ) -> UnsupportedVectorStore: + return UnsupportedVectorStore() + + @override + def similarity_search( + self, query: str, k: int = 4, **kwargs: Any + ) -> list[Document]: + return [] + + +def test_infer_store_unknown(): + EXPECTED = ( + "Expected adapter or supported vector store, but got" + f" {__name__}.UnsupportedVectorStore" + ) + with pytest.raises(ValueError, match=EXPECTED): + infer_adapter(UnsupportedVectorStore()) diff --git a/packages/langchain-graph-retriever/tests/adapters/test_open_search.py b/packages/langchain-graph-retriever/tests/adapters/test_open_search.py new file mode 100644 index 00000000..ef90359d --- /dev/null +++ b/packages/langchain-graph-retriever/tests/adapters/test_open_search.py @@ -0,0 +1,65 @@ +from collections.abc import Iterable + +import pytest +from graph_retriever.adapters import Adapter +from graph_retriever.testing.adapter_tests import AdapterComplianceSuite +from langchain_core.documents import Document +from langchain_core.embeddings import Embeddings + + +class TestOpenSearch(AdapterComplianceSuite): + def supports_nested_metadata(self) -> bool: + return False + + def supports_dict_in_list(self) -> bool: + return False + + @pytest.fixture(scope="class") + def adapter( + self, + request: pytest.FixtureRequest, + enabled_stores: set[str], + testcontainers: set[str], + animal_embeddings: Embeddings, + animal_docs: list[Document], + ) -> Iterable[Adapter]: + if "opensearch" not in enabled_stores: + pytest.skip("Pass --stores=opensearch to test OpenSearch") + + from langchain_community.vectorstores import OpenSearchVectorSearch + from langchain_graph_retriever.adapters.open_search import ( + OpenSearchAdapter, + ) + + if "opensearch" in testcontainers: + from testcontainers.opensearch import OpenSearchContainer # type: ignore + + # If the admin password doesn't pass the length and regex requirements + # starting the container will hang (`docker ps <container_id>` to debug). + container = OpenSearchContainer( + image="opensearchproject/opensearch:2.18.0", + initial_admin_password="SomeRandomP4ssword", + ) + container.start() + request.addfinalizer(lambda: container.stop()) + + config = container.get_config() + opensearch_url = f"http://{config['host']}:{config['port']}" + kwargs = {"http_auth": (config["username"], config["password"])} + else: + opensearch_url = "http://localhost:9200" + kwargs = {} + + store = OpenSearchVectorSearch( + opensearch_url=opensearch_url, + index_name="animals", + embedding_function=animal_embeddings, + engine="faiss", + **kwargs, + ) + store.add_documents(animal_docs) + + yield OpenSearchAdapter(store) + + if store.index_exists(): + store.delete_index() diff --git a/packages/langchain-graph-retriever/tests/animal_docs.py b/packages/langchain-graph-retriever/tests/animal_docs.py deleted file mode 100644 index 816a18d8..00000000 --- a/packages/langchain-graph-retriever/tests/animal_docs.py +++ /dev/null @@ -1,33 +0,0 @@ -import json -import os - -import pytest -from langchain_core.documents import Document - - -def load_animal_docs() -> list[Document]: - documents = [] - - path = os.path.abspath( - os.path.join(os.path.dirname(__file__), "../../../data/animals.jsonl") - ) - with open(path) as file: - for line in file: - data = json.loads(line.strip()) - documents.append( - Document( - id=data["id"], - page_content=data["text"], - metadata=data["metadata"], - ) - ) - return documents - - -@pytest.fixture(scope="session") -def animal_docs() -> list[Document]: - return load_animal_docs() - - -ANIMALS_QUERY: str = "small agile mammal" -ANIMALS_DEPTH_0_EXPECTED: list[str] = ["fox", "mongoose"] diff --git a/packages/langchain-graph-retriever/tests/conftest.py b/packages/langchain-graph-retriever/tests/conftest.py index e7f02afa..b7ae12ba 100644 --- a/packages/langchain-graph-retriever/tests/conftest.py +++ b/packages/langchain-graph-retriever/tests/conftest.py @@ -1,16 +1,15 @@ import asyncio +import json +import os import pytest +from langchain_core.documents import Document +from langchain_core.embeddings import Embeddings from pytest import Parser -from tests.animal_docs import animal_docs -from tests.integration_tests.stores import ALL_STORES, TESTCONTAINER_STORES +from tests.embeddings import BaseEmbeddings -_ = (animal_docs,) - - -# may need to do some trickery if/when this is going to be used in end-user tests. -pytest.register_assert_rewrite("test.integration_tests.adapters.test_adapters") +pytest.register_assert_rewrite("graph_retriever.testing") @pytest.fixture(scope="session") @@ -23,6 +22,10 @@ def event_loop(): loop.close() +ALL_STORES = ["mem", "opensearch", "astra", "cassandra", "chroma"] +TESTCONTAINER_STORES = ["opensearch", "cassandra"] + + def pytest_addoption(parser: Parser): parser.addoption( "--stores", @@ -38,22 +41,79 @@ def pytest_addoption(parser: Parser): choices=TESTCONTAINER_STORES + ["none"], help="which stores to run testcontainers for (default: 'all')", ) + parser.addoption( + "--runextras", action="store_true", default=False, help="run tests for extras" + ) + + +def pytest_configure(config): + config.addinivalue_line( + "markers", "extra: mark test as requiring an `extra` package" + ) + + +def pytest_collection_modifyitems(config, items): + if config.getoption("--runextras"): + # --runextras given in cli: do not skip extras + return + skip_extras = pytest.mark.skip(reason="need --runextras option to run") + for item in items: + if "extra" in item.keywords: + item.add_marker(skip_extras) + + +@pytest.fixture(scope="session") +def enabled_stores(request: pytest.FixtureRequest) -> set[str]: + # TODO: Use StrEnum? + stores = request.config.getoption("--stores") + if stores and "all" in stores: + return set(ALL_STORES) + elif stores: + return set(stores) + else: + return {"mem"} + + +@pytest.fixture(scope="session") +def testcontainers(request: pytest.FixtureRequest) -> set[str]: + # TODO: Use StrEnum? + testcontainers = set(request.config.getoption("--testcontainer") or []) + if testcontainers and "none" in testcontainers: + return set() + elif not testcontainers: + return set(TESTCONTAINER_STORES) + else: + return testcontainers + + +def load_animal_docs() -> list[Document]: + documents = [] + + path = os.path.abspath( + os.path.join(os.path.dirname(__file__), "../../../data/animals.jsonl") + ) + with open(path) as file: + for line in file: + data = json.loads(line.strip()) + documents.append( + Document( + id=data["id"], + page_content=data["text"], + metadata=data["metadata"], + ) + ) + return documents + + +@pytest.fixture(scope="session") +def animal_docs() -> list[Document]: + return load_animal_docs() + + +@pytest.fixture(scope="session") +def animal_embeddings() -> Embeddings: + # This must be imported late (after registering the rewrites) + from graph_retriever.testing.embeddings import AnimalEmbeddings -# TODO: Allow marking tests as only supporting a subset of stores? -# -# def pytest_configure(config): -# # register an additional marker -# config.addinivalue_line( -# "markers", "svc(name): tests that require the named service" -# ) -# -# def pytest_runtest_setup(item): -# """Skip the test unless all of the marked services are present.""" -# -# required_svcs = {mark.args[0] for mark in item.iter_markers(name="svc")} -# provided_svcs = set(item.config.getoption("-S") or []) -# -# missing_svcs = required_svcs - provided_svcs -# if missing_svcs: -# pytest.skip(f"test requires services {missing_svcs!r}") + return BaseEmbeddings(AnimalEmbeddings()) diff --git a/packages/langchain-graph-retriever/tests/embeddings.py b/packages/langchain-graph-retriever/tests/embeddings.py new file mode 100644 index 00000000..e662cc86 --- /dev/null +++ b/packages/langchain-graph-retriever/tests/embeddings.py @@ -0,0 +1,14 @@ +from collections.abc import Callable + +from langchain_core.embeddings import Embeddings + + +class BaseEmbeddings(Embeddings): + def __init__(self, embedding: Callable[[str], list[float]]) -> None: + self.embedding = embedding + + def embed_query(self, text: str) -> list[float]: + return self.embedding(text) + + def embed_documents(self, texts: list[str]) -> list[list[float]]: + return [self.embedding(text) for text in texts] diff --git a/packages/langchain-graph-retriever/tests/embeddings/__init__.py b/packages/langchain-graph-retriever/tests/embeddings/__init__.py deleted file mode 100644 index 00fcc1dc..00000000 --- a/packages/langchain-graph-retriever/tests/embeddings/__init__.py +++ /dev/null @@ -1,13 +0,0 @@ -from .simple_embeddings import ( - Angular2DEmbeddings, - AnimalEmbeddings, - EarthEmbeddings, - ParserEmbeddings, -) - -__all__ = [ - "Angular2DEmbeddings", - "AnimalEmbeddings", - "EarthEmbeddings", - "ParserEmbeddings", -] diff --git a/packages/langchain-graph-retriever/tests/integration_tests/adapters/test_adapters.py b/packages/langchain-graph-retriever/tests/integration_tests/adapters/test_adapters.py deleted file mode 100644 index 292e5041..00000000 --- a/packages/langchain-graph-retriever/tests/integration_tests/adapters/test_adapters.py +++ /dev/null @@ -1,294 +0,0 @@ -import abc -import dataclasses -from collections.abc import Iterable -from dataclasses import dataclass -from typing import Any - -import pytest -from langchain_core.documents import Document -from langchain_core.embeddings import Embeddings -from langchain_graph_retriever.adapters.base import METADATA_EMBEDDING_KEY, Adapter -from langchain_graph_retriever.document_transformers.metadata_denormalizer import ( - DENORMALIZED_KEYS_KEY, -) -from langchain_graph_retriever.types import Edge, IdEdge, MetadataEdge - -from tests.animal_docs import load_animal_docs -from tests.embeddings.simple_embeddings import AnimalEmbeddings -from tests.integration_tests.stores import AdapterFactory - - -def assert_valid_result(doc: Document): - assert isinstance(doc.id, str) - - assert DENORMALIZED_KEYS_KEY not in doc.metadata - assert METADATA_EMBEDDING_KEY in doc.metadata - assert_is_embedding(doc.metadata[METADATA_EMBEDDING_KEY]) - - -def assert_is_embedding(value): - assert isinstance(value, list) - for item in value: - assert isinstance(item, float) - - -def assert_valid_results(docs: Iterable[Document]): - for doc in docs: - assert_valid_result(doc) - - -def assert_ids_any_order(results: Iterable[Document], expected: list[str]) -> None: - assert_valid_results(results) - - result_ids = [r.id for r in results] - assert len(set(result_ids)) == len(result_ids), "should not contain duplicates" - assert set(result_ids) == set(expected), "should contain exactly expected IDs" - - -@dataclass -class GetCase: - request: list[str] - expected: list[str] - - -GET_CASES: dict[str, GetCase] = { - # Currently, this is not required for `get` implementations since the - # traversal skips making `get` calls with no IDs. Some stores (such as chroma) - # fail in this case. - # "none": GetCase([], []), - "one": GetCase(["boar"], ["boar"]), - "many": GetCase(["boar", "chinchilla", "cobra"], ["boar", "chinchilla", "cobra"]), - "missing": GetCase( - ["boar", "chinchilla", "unicorn", "cobra"], ["boar", "chinchilla", "cobra"] - ), - "duplicate": GetCase( - ["boar", "chinchilla", "boar", "cobra"], ["boar", "chinchilla", "cobra"] - ), -} - - -@pytest.fixture(params=GET_CASES.keys()) -def get_case(request) -> GetCase: - return GET_CASES[request.param] - - -@dataclass -class SimilaritySearchCase: - query: str - expected: list[str] - k: int | None = None - filter: dict[str, str] | None = None - - skips: dict[str, str] = dataclasses.field(default_factory=dict) - - @property - def kwargs(self): - kwargs = {} - if self.k is not None: - kwargs["k"] = self.k - if self.filter is not None: - kwargs["filter"] = self.filter - return kwargs - - -SIMILARITY_SEARCH_CASES: dict[str, SimilaritySearchCase] = { - "basic": SimilaritySearchCase( - "domesticated hunters", ["cat", "horse", "chicken", "llama"] - ), - "k_2": SimilaritySearchCase("domesticated hunters", k=2, expected=["cat", "horse"]), - # Many stores fail in this case. Generally it doesn't happen in the code, since - # no IDs means we don't need to make the call. Not currently part of the contract. - # "k_0": SimilaritySearchCase("domesticated hunters", k=0, expected=[]), - "value_filter": SimilaritySearchCase( - "domesticated hunters", - filter={"type": "mammal"}, - expected=["cat", "dog", "horse", "llama"], - ), - "list_filter": SimilaritySearchCase( - "domesticated hunters", filter={"keywords": "hunting"}, expected=["cat"] - ), - "two_filters": SimilaritySearchCase( - "domesticated hunters", - filter={"type": "mammal", "diet": "carnivorous"}, - expected=["cat", "dingo", "ferret"], - skips={"chroma": "does not support multiple filters"}, - ), - # OpenSearch supports filtering on multiple values, but it is not currently - # relied on. Since no other adapters support it, we don't test it nor should - # traversal depend on it. - # "multi_list_filter": SimilaritySearchCase( - # "domesticated hunters", - # filter={"keywords": ["hunting", "agile"]}, - # expected=["cat", "fox", "gazelle", "mongoose"] - # ), -} - - -@pytest.fixture(params=SIMILARITY_SEARCH_CASES.keys()) -def similarity_search_case(store_param: str, request) -> SimilaritySearchCase: - case = SIMILARITY_SEARCH_CASES[request.param] - skip = case.skips.get(store_param, None) - if skip is not None: - pytest.skip(skip) - return case - - -@dataclass -class GetAdjacentCase: - query: str - outgoing_edges: set[Edge] - expected: list[str] - - adjacent_k: int = 4 - filter: dict[str, Any] | None = None - - -GET_ADJACENT_CASES: dict[str, GetAdjacentCase] = { - "one_edge": GetAdjacentCase( - "domesticated hunters", - outgoing_edges={MetadataEdge("type", "mammal")}, - expected=["horse", "llama", "dog", "cat"], - ), - # Note: Currently, all stores implement get adjacent by performing a - # separate search for each edge. This means that it returns up to - # `adjacent_k * len(outgoing_edges)` results. This will not be true if some - # stores (eg., OpenSearch) implement get adjacent more efficiently. We may - # wish to have `get_adjacent` select the top `adjacent_k` by sorting by - # similarity internally to better reflect this. - "two_edges_same_field": GetAdjacentCase( - "domesticated hunters", - outgoing_edges={ - MetadataEdge("type", "mammal"), - MetadataEdge("type", "crustacean"), - }, - expected=[ - "cat", - "crab", - "dog", - "horse", - "llama", - "lobster", - ], - ), - "ids": GetAdjacentCase( - "domesticated hunters", - outgoing_edges={ - IdEdge("cat"), - IdEdge("dog"), - IdEdge("unicorn"), - IdEdge("crab"), - }, - expected=[ - "cat", - "dog", - "crab", - ], - ), -} - - -@pytest.fixture(params=GET_ADJACENT_CASES.keys()) -def get_adjacent_case(request) -> GetAdjacentCase: - return GET_ADJACENT_CASES[request.param] - - -class AdapterComplianceSuite: - def test_get(self, adapter: Adapter, get_case: GetCase) -> None: - results = adapter.get(get_case.request) - assert_ids_any_order(results, get_case.expected) - - async def test_aget(self, adapter: Adapter, get_case: GetCase) -> None: - results = await adapter.aget(get_case.request) - assert_ids_any_order(results, get_case.expected) - - def test_similarity_search_with_embedding( - self, adapter: Adapter, similarity_search_case: SimilaritySearchCase - ) -> None: - embedding, results = adapter.similarity_search_with_embedding( - similarity_search_case.query, **similarity_search_case.kwargs - ) - assert_is_embedding(embedding) - assert_ids_any_order(results, similarity_search_case.expected) - - async def test_asimilarity_search_with_embedding( - self, adapter: Adapter, similarity_search_case: SimilaritySearchCase - ) -> None: - embedding, results = await adapter.asimilarity_search_with_embedding( - similarity_search_case.query, **similarity_search_case.kwargs - ) - assert_is_embedding(embedding) - assert_ids_any_order(results, similarity_search_case.expected) - - def test_similarity_search_with_embedding_by_vector( - self, adapter: Adapter, similarity_search_case: SimilaritySearchCase - ) -> None: - embedding = adapter._safe_embedding.embed_query( - text=similarity_search_case.query - ) - results = adapter.similarity_search_with_embedding_by_vector( - embedding, **similarity_search_case.kwargs - ) - assert_ids_any_order(results, similarity_search_case.expected) - - async def test_asimilarity_search_with_embedding_by_vector( - self, adapter: Adapter, similarity_search_case: SimilaritySearchCase - ) -> None: - embedding = adapter._safe_embedding.embed_query( - text=similarity_search_case.query - ) - results = await adapter.asimilarity_search_with_embedding_by_vector( - embedding, **similarity_search_case.kwargs - ) - assert_ids_any_order(results, similarity_search_case.expected) - - async def test_get_adjacent( - self, adapter: Adapter, get_adjacent_case: GetAdjacentCase - ) -> None: - embedding = adapter._safe_embedding.embed_query(text=get_adjacent_case.query) - results = adapter.get_adjacent( - outgoing_edges=get_adjacent_case.outgoing_edges, - query_embedding=embedding, - adjacent_k=get_adjacent_case.adjacent_k, - filter=get_adjacent_case.filter, - ) - assert_ids_any_order(results, get_adjacent_case.expected) - - async def test_aget_adjacent( - self, adapter: Adapter, get_adjacent_case: GetAdjacentCase - ) -> None: - embedding = adapter._safe_embedding.embed_query(text=get_adjacent_case.query) - results = await adapter.aget_adjacent( - outgoing_edges=get_adjacent_case.outgoing_edges, - query_embedding=embedding, - adjacent_k=get_adjacent_case.adjacent_k, - filter=get_adjacent_case.filter, - ) - assert_ids_any_order(results, get_adjacent_case.expected) - - -class TestBuiltinAdapters(AdapterComplianceSuite): - @pytest.fixture(scope="class") - def adapter( - self, adapter_factory: AdapterFactory, request: pytest.FixtureRequest - ) -> Adapter: - return adapter_factory.create( - request, - embedding=AnimalEmbeddings(), - docs=load_animal_docs(), - nested_metadata_fields={"keywords"}, - ) - - -class TestAdapterCompliance(abc.ABC, AdapterComplianceSuite): - """ - Run the AdapterComplianceSuite on a the adapter created by `make`. - - To use this, instantiate it in your `pytest` code and implement `make` to create. - """ - - @abc.abstractmethod - def make(self, embedding: Embeddings, docs: list[Document]) -> Adapter: ... - - @pytest.fixture(scope="class") - def adapter(self) -> Adapter: - return self.make(embedding=AnimalEmbeddings(), docs=load_animal_docs()) diff --git a/packages/langchain-graph-retriever/tests/integration_tests/adapters/test_inference.py b/packages/langchain-graph-retriever/tests/integration_tests/adapters/test_inference.py deleted file mode 100644 index a8521c1e..00000000 --- a/packages/langchain-graph-retriever/tests/integration_tests/adapters/test_inference.py +++ /dev/null @@ -1,57 +0,0 @@ -from __future__ import annotations - -from typing import Any - -import pytest -from langchain_core.documents import Document -from langchain_core.embeddings import Embeddings, FakeEmbeddings -from langchain_core.vectorstores import VectorStore -from langchain_graph_retriever.adapters import Adapter, infer_adapter -from typing_extensions import override - -from tests.integration_tests.stores import AdapterFactory - - -def test_infer_store(adapter_factory: AdapterFactory) -> None: - # Some vector stores require at least one document to be created. - doc = Document( - id="doc", - page_content="lorem ipsum and whatnot", - ) - store = adapter_factory._create_store("foo", [doc], FakeEmbeddings(size=8)) - - adapter = infer_adapter(store) - - assert isinstance(adapter, Adapter) - if adapter_factory._teardown: - adapter_factory._teardown(store) - - -class UnsupportedVectorStore(VectorStore): - @classmethod - @override - def from_texts( - cls, - texts: list[str], - embedding: Embeddings, - metadatas: list[dict[Any, Any]] | None = None, - *, - ids: list[str] | None = None, - **kwargs: Any, - ) -> UnsupportedVectorStore: - return UnsupportedVectorStore() - - @override - def similarity_search( - self, query: str, k: int = 4, **kwargs: Any - ) -> list[Document]: - return [] - - -def test_infer_store_unknown(): - EXPECTED = ( - "Expected adapter or supported vector store, but got" - f" {__name__}.UnsupportedVectorStore" - ) - with pytest.raises(ValueError, match=EXPECTED): - infer_adapter(UnsupportedVectorStore()) diff --git a/packages/langchain-graph-retriever/tests/integration_tests/assertions.py b/packages/langchain-graph-retriever/tests/integration_tests/assertions.py deleted file mode 100644 index 9c51ec24..00000000 --- a/packages/langchain-graph-retriever/tests/integration_tests/assertions.py +++ /dev/null @@ -1,14 +0,0 @@ -from collections.abc import Iterable - -from langchain_core.documents import Document - - -def sorted_doc_ids(docs: Iterable[Document]) -> list[str]: - return sorted([doc.id for doc in docs if doc.id is not None]) - - -def assert_document_format(doc: Document) -> None: - assert doc.id is not None - assert doc.page_content is not None - assert doc.metadata is not None - assert "__embedding" not in doc.metadata diff --git a/packages/langchain-graph-retriever/tests/integration_tests/conftest.py b/packages/langchain-graph-retriever/tests/integration_tests/conftest.py deleted file mode 100644 index 35b6250a..00000000 --- a/packages/langchain-graph-retriever/tests/integration_tests/conftest.py +++ /dev/null @@ -1,34 +0,0 @@ -import pytest -from langchain_core.documents import Document -from langchain_graph_retriever.adapters import Adapter - -from tests.embeddings import AnimalEmbeddings -from tests.integration_tests.invoker import invoker - -# Imports for definitions. -from tests.integration_tests.stores import ( - AdapterFactory, - adapter_factory, - enabled_stores, - store_param, -) - -# Mark these imports as used so they don't removed. -# They need to be imported here so the fixtures are available. -_ = ( - adapter_factory, - store_param, - enabled_stores, - invoker, -) - - -@pytest.fixture(scope="session") -def animal_store( - request: pytest.FixtureRequest, - adapter_factory: AdapterFactory, - animal_docs: list[Document], -) -> Adapter: - return adapter_factory.create( - request, AnimalEmbeddings(), animal_docs, nested_metadata_fields={"keywords"} - ) diff --git a/packages/langchain-graph-retriever/tests/integration_tests/invoker.py b/packages/langchain-graph-retriever/tests/integration_tests/invoker.py deleted file mode 100644 index 233ea992..00000000 --- a/packages/langchain-graph-retriever/tests/integration_tests/invoker.py +++ /dev/null @@ -1,24 +0,0 @@ -from typing import Any - -import pytest -from langchain_core.retrievers import BaseRetriever - - -@pytest.fixture(scope="function", params=["sync", "async"]) -def invoker(request: pytest.FixtureRequest): - async def sync_invoker( - retriever: BaseRetriever, input: str, *args: Any, **kwargs: Any - ): - return retriever.invoke(input, *args, **kwargs) - - async def async_invoker( - retriever: BaseRetriever, input: str, *args: Any, **kwargs: Any - ): - return await retriever.ainvoke(input, *args, **kwargs) - - if request.param == "sync": - return sync_invoker - elif request.param == "async": - return async_invoker - else: - raise ValueError(f"Unexpected value '{request.param}'") diff --git a/packages/langchain-graph-retriever/tests/integration_tests/stores.py b/packages/langchain-graph-retriever/tests/integration_tests/stores.py deleted file mode 100644 index dd4a15a3..00000000 --- a/packages/langchain-graph-retriever/tests/integration_tests/stores.py +++ /dev/null @@ -1,309 +0,0 @@ -import abc -from collections.abc import Callable -from typing import Generic, TypeVar - -import pytest -from langchain_core.documents import Document -from langchain_core.embeddings import Embeddings -from langchain_core.vectorstores import InMemoryVectorStore, VectorStore -from langchain_graph_retriever.adapters import Adapter -from langchain_graph_retriever.document_transformers.metadata_denormalizer import ( - MetadataDenormalizer, -) - -ALL_STORES = ["mem", "astra", "cassandra", "chroma", "opensearch"] -TESTCONTAINER_STORES = ["cassandra", "opensearch"] - - -@pytest.fixture(scope="session") -def enabled_stores(request: pytest.FixtureRequest) -> set[str]: - # TODO: Use StrEnum? - stores = request.config.getoption("--stores") - - if stores and "all" in stores: - return set(ALL_STORES) - elif stores: - return set(stores) - else: - return {"mem"} - - -def use_testcontainer(request: pytest.FixtureRequest, store: str) -> bool: - testcontainers = request.config.getoption("--testcontainer") - if testcontainers and "none" in testcontainers: - return False - elif testcontainers: - return store in testcontainers - else: - return True - - -@pytest.fixture(scope="session", params=ALL_STORES) -def store_param(request: pytest.FixtureRequest, enabled_stores: set[str]) -> str: - store: str = request.param - if store not in enabled_stores: - pytest.skip(f"'{store}' is not enabled") - return store - - -T = TypeVar("T", bound=VectorStore) - - -class AdapterFactory(abc.ABC, Generic[T]): - def __init__( - self, - create_store: Callable[[str, list[Document], Embeddings], T], - create_adapter: Callable[[T, set[str]], Adapter], - teardown: Callable[[T], None] | None = None, - ): - self._create_store = create_store - self._create_adapter = create_adapter - self._teardown = teardown - self._index = 0 - - def create( - self, - request: pytest.FixtureRequest, - embedding: Embeddings, - docs: list[Document], - nested_metadata_fields: set[str] = set(), - ) -> Adapter: - name = f"test_{self._index}" - self._index += 1 - store = self._create_store(name, docs, embedding) - - if self._teardown is not None: - # make a local copy of the non-None teardown. This makes `mypy` happy. - # Otherwise, it (correctly) recognizes that `self._teardown` could be not - # `None` and `None` later (when the finalizer is called) - teardown = self._teardown - request.addfinalizer(lambda: teardown(store)) - - return self._create_adapter(store, nested_metadata_fields) - - -def _cassandra_store_factory(request: pytest.FixtureRequest): - import os - - from cassandra.cluster import Cluster # type: ignore - from langchain_community.vectorstores.cassandra import Cassandra - from langchain_graph_retriever.adapters.cassandra import ( - CassandraAdapter, - ) - - if use_testcontainer(request, "cassandra"): - from testcontainers.cassandra import CassandraContainer # type: ignore - - container = CassandraContainer(image="cassandra:5.0.2") - container.start() - request.addfinalizer(lambda: container.stop()) - contact_points = container.get_contact_points() - elif "CASSANDRA_CONTACT_POINTS" in os.environ: - contact_points = [ - cp.strip() - for cp in os.environ["CASSANDRA_CONTACT_POINTS"].split(",") - if cp.strip() - ] - else: - contact_points = None - - cluster = Cluster(contact_points) - session = cluster.connect() - - KEYSPACE = "graph_test_keyspace" - session.execute( - f"CREATE KEYSPACE IF NOT EXISTS {KEYSPACE}" - " WITH replication = " - "{'class': 'SimpleStrategy', 'replication_factor': 1}" - ) - - request.addfinalizer(lambda: cluster.shutdown()) - - metadata_denormalizer = MetadataDenormalizer() - - def create_cassandra( - name: str, docs: list[Document], embedding: Embeddings - ) -> Cassandra: - session = cluster.connect() - session.execute(f"DROP TABLE IF EXISTS {KEYSPACE}.{name}") - - store = Cassandra( - embedding=embedding, - session=session, - keyspace=KEYSPACE, - table_name=name, - ) - docs = list(metadata_denormalizer.transform_documents(docs)) - store.add_documents(docs) - return store - - def teardown_cassandra(cassandra: Cassandra): - assert cassandra.session is not None - cassandra.session.shutdown() - - return AdapterFactory[Cassandra]( - create_store=create_cassandra, - create_adapter=lambda store, nested_metadata_fields: CassandraAdapter( - store, - metadata_denormalizer=metadata_denormalizer, - nested_metadata_fields=nested_metadata_fields, - ), - teardown=teardown_cassandra, - ) - - -def _opensearch_store_factory(request: pytest.FixtureRequest): - from langchain_community.vectorstores import OpenSearchVectorSearch - from langchain_graph_retriever.adapters.open_search import ( - OpenSearchAdapter, - ) - - if use_testcontainer(request, "opensearch"): - from testcontainers.opensearch import OpenSearchContainer # type: ignore - - # If the admin password doesn't pass the length and regex requirements - # starting the container will hang (`docker ps <container_id>` to debug). - container = OpenSearchContainer( - image="opensearchproject/opensearch:2.18.0", - initial_admin_password="SomeRandomP4ssword", - ) - container.start() - request.addfinalizer(lambda: container.stop()) - - config = container.get_config() - opensearch_url = f"http://{config['host']}:{config['port']}" - kwargs = {"http_auth": (config["username"], config["password"])} - else: - opensearch_url = "http://localhost:9200" - kwargs = {} - - def create_open_search( - name: str, docs: list[Document], embedding: Embeddings - ) -> OpenSearchVectorSearch: - store = OpenSearchVectorSearch( - opensearch_url=opensearch_url, - index_name=name, - embedding_function=embedding, - engine="faiss", - **kwargs, - ) - store.add_documents(docs) - return store - - def teardown_open_search(store: OpenSearchVectorSearch) -> None: - if store.index_exists(): - store.delete_index() - - return AdapterFactory[OpenSearchVectorSearch]( - create_store=create_open_search, - create_adapter=lambda store, _nested_metadata_fields: OpenSearchAdapter(store), - teardown=teardown_open_search, - ) - - -def _astra_store_factory(_request: pytest.FixtureRequest) -> AdapterFactory: - import os - - from astrapy import AstraDBDatabaseAdmin - from astrapy.authentication import StaticTokenProvider - from dotenv import load_dotenv - from langchain_astradb import AstraDBVectorStore - from langchain_graph_retriever.adapters.astra import ( - AstraAdapter, - ) - - load_dotenv() - - token = StaticTokenProvider(os.environ["ASTRA_DB_APPLICATION_TOKEN"]) - keyspace = os.environ.get("ASTRA_DB_KEYSPACE", "default_keyspace") - api_endpoint = os.environ["ASTRA_DB_API_ENDPOINT"] - - admin = AstraDBDatabaseAdmin(api_endpoint=api_endpoint, token=token) - admin.create_keyspace(keyspace) - - def create_astra( - name: str, docs: list[Document], embedding: Embeddings - ) -> AstraDBVectorStore: - try: - from langchain_astradb import AstraDBVectorStore - - store = AstraDBVectorStore( - embedding=embedding, - collection_name=name, - namespace=keyspace, - token=token, - api_endpoint=api_endpoint, - ) - store.add_documents(docs) - return store - - except (ImportError, ModuleNotFoundError): - msg = ( - "to test graph-traversal with AstraDB, please" - " install langchain-astradb and python-dotenv" - ) - raise ImportError(msg) - - def teardown_astra(store: AstraDBVectorStore): - store.delete_collection() - - return AdapterFactory[AstraDBVectorStore]( - create_store=create_astra, - create_adapter=lambda store, _nested_metadata_fields: AstraAdapter(store), - teardown=teardown_astra, - ) - - -def _in_memory_store_factory(_request: pytest.FixtureRequest) -> AdapterFactory: - from langchain_graph_retriever.adapters.in_memory import ( - InMemoryAdapter, - ) - - def create_in_memory( - _name: str, docs: list[Document], emb: Embeddings - ) -> InMemoryVectorStore: - return InMemoryVectorStore.from_documents(docs, emb) - - return AdapterFactory[InMemoryVectorStore]( - create_store=create_in_memory, - create_adapter=lambda store, _nested_metadata_fields: InMemoryAdapter(store), - ) - - -def _chroma_store_factory(_request: pytest.FixtureRequest) -> AdapterFactory: - from langchain_chroma.vectorstores import Chroma - from langchain_graph_retriever.adapters.chroma import ( - ChromaAdapter, - ) - - metadata_denormalizer = MetadataDenormalizer() - - def create_chroma(name: str, docs: list[Document], emb: Embeddings) -> Chroma: - docs = list(metadata_denormalizer.transform_documents(docs)) - return Chroma.from_documents(docs, emb, collection_name=name) - - return AdapterFactory[Chroma]( - create_store=create_chroma, - create_adapter=lambda store, nested_metadata_fields: ChromaAdapter( - store, - metadata_denormalizer=metadata_denormalizer, - nested_metadata_fields=nested_metadata_fields, - ), - teardown=lambda store: store.delete_collection(), - ) - - -@pytest.fixture(scope="session") -def adapter_factory(store_param: str, request: pytest.FixtureRequest) -> AdapterFactory: - if store_param == "mem": - return _in_memory_store_factory(request) - elif store_param == "chroma": - return _chroma_store_factory(request) - elif store_param == "astra": - return _astra_store_factory(request) - elif store_param == "cassandra": - return _cassandra_store_factory(request) - elif store_param == "opensearch": - return _opensearch_store_factory(request) - else: - pytest.fail(f"Unsupported store: {store_param}") diff --git a/packages/langchain-graph-retriever/tests/integration_tests/test_graph_retriever.py b/packages/langchain-graph-retriever/tests/integration_tests/test_graph_retriever.py deleted file mode 100644 index 18a411f4..00000000 --- a/packages/langchain-graph-retriever/tests/integration_tests/test_graph_retriever.py +++ /dev/null @@ -1,21 +0,0 @@ -from langchain_core.documents import Document -from langchain_core.embeddings import FakeEmbeddings -from langchain_core.vectorstores.in_memory import InMemoryVectorStore -from langchain_graph_retriever import GraphRetriever -from langchain_graph_retriever.adapters.in_memory import InMemoryAdapter - - -def test_infers_adapter() -> None: - # Some vector stores require at least one document to be created. - doc = Document( - id="doc", - page_content="lorem ipsum and whatnot", - ) - store = InMemoryVectorStore.from_documents([doc], FakeEmbeddings(size=8)) - - retriever = GraphRetriever( - store=store, - edges=[], - ) - - assert isinstance(retriever.adapter, InMemoryAdapter) diff --git a/packages/langchain-graph-retriever/tests/integration_tests/test_traversal_eager.py b/packages/langchain-graph-retriever/tests/integration_tests/test_traversal_eager.py deleted file mode 100644 index 00277994..00000000 --- a/packages/langchain-graph-retriever/tests/integration_tests/test_traversal_eager.py +++ /dev/null @@ -1,405 +0,0 @@ -import pytest -from langchain_core.documents import Document -from langchain_core.vectorstores import InMemoryVectorStore -from langchain_graph_retriever import ( - GraphRetriever, -) -from langchain_graph_retriever.adapters.in_memory import InMemoryAdapter -from langchain_graph_retriever.edges.metadata import Id -from langchain_graph_retriever.strategies import ( - Eager, -) -from langchain_graph_retriever.types import Edges, MetadataEdge, Node - -from tests.animal_docs import ( - ANIMALS_DEPTH_0_EXPECTED, - ANIMALS_QUERY, -) -from tests.embeddings.simple_embeddings import ( - Angular2DEmbeddings, - EarthEmbeddings, - ParserEmbeddings, -) -from tests.integration_tests.assertions import assert_document_format, sorted_doc_ids -from tests.integration_tests.stores import Adapter, AdapterFactory - - -async def test_animals_bidir_collection_eager(animal_store: Adapter, invoker): - # test graph-search on a normalized bi-directional edge - retriever = GraphRetriever( - store=animal_store, - edges=[("keywords", "keywords")], - strategy=Eager(k=100, start_k=2, max_depth=0), - ) - - docs: list[Document] = await invoker(retriever, ANIMALS_QUERY, max_depth=0) - assert sorted_doc_ids(docs) == ANIMALS_DEPTH_0_EXPECTED - - docs = await invoker(retriever, ANIMALS_QUERY, max_depth=1) - assert sorted_doc_ids(docs) == [ - "cat", - "coyote", - "fox", - "gazelle", - "hyena", - "jackal", - "mongoose", - ] - - docs = await invoker(retriever, ANIMALS_QUERY, max_depth=2) - assert sorted_doc_ids(docs) == [ - "alpaca", - "bison", - "cat", - "chicken", - "cockroach", - "coyote", - "crow", - "dingo", - "dog", - "fox", - "gazelle", - "horse", - "hyena", - "jackal", - "llama", - "mongoose", - "ostrich", - ] - - -async def test_animals_bidir_item(animal_store: Adapter, invoker): - retriever = GraphRetriever( - store=animal_store, - edges=[("habitat", "habitat")], - ) - - docs: list[Document] = await invoker( - retriever, ANIMALS_QUERY, strategy=Eager(k=10, start_k=2, max_depth=0) - ) - assert sorted_doc_ids(docs) == ANIMALS_DEPTH_0_EXPECTED - - docs = await invoker( - retriever, ANIMALS_QUERY, strategy=Eager(k=10, start_k=2, max_depth=1) - ) - assert sorted_doc_ids(docs) == [ - "bobcat", - "cobra", - "deer", - "elk", - "fox", - "mongoose", - ] - - docs = await invoker( - retriever, ANIMALS_QUERY, strategy=Eager(k=10, start_k=2, max_depth=2) - ) - assert sorted_doc_ids(docs) == [ - "bobcat", - "cobra", - "deer", - "elk", - "fox", - "mongoose", - ] - - -async def test_animals_initial_roots(animal_store: Adapter, invoker): - retriever = GraphRetriever( - store=animal_store, - edges=[("keywords", "keywords")], - strategy=Eager(k=10, start_k=0), - ) - - docs = await invoker( - retriever, - ANIMALS_QUERY, - initial_roots=["bobcat"], - max_depth=0, - ) - - # bobcat is included (initial roots). - # everything adjacent to bobcat is depth 0 (immediately reachable) - assert sorted_doc_ids(docs) == [ - "bear", - "bobcat", - ] - - docs = await invoker( - retriever, - ANIMALS_QUERY, - initial_roots=["bobcat"], - max_depth=1, - ) - - assert sorted_doc_ids(docs) == [ - "bear", - "bobcat", - "moose", - "ostrich", - ] - - docs = await invoker( - retriever, - ANIMALS_QUERY, - initial_roots=["bobcat", "cheetah"], - k=20, - max_depth=1, - ) - - assert sorted_doc_ids(docs) == [ - "bear", - "bobcat", - "cassowary", - "cheetah", - "dingo", - "eagle", - "emu", - "falcon", - "hawk", - "jaguar", - "kangaroo", - "leopard", - "moose", - "ostrich", - ] - - -async def test_animals_item_to_collection(animal_store: Adapter, invoker): - retriever = GraphRetriever( - store=animal_store, - edges=[("habitat", "keywords")], - ) - - docs: list[Document] = await invoker( - retriever, ANIMALS_QUERY, strategy=Eager(k=10, start_k=2, max_depth=0) - ) - assert sorted_doc_ids(docs) == ANIMALS_DEPTH_0_EXPECTED - - docs = await invoker( - retriever, ANIMALS_QUERY, strategy=Eager(k=10, start_k=2, max_depth=1) - ) - assert sorted_doc_ids(docs) == ["bear", "bobcat", "fox", "mongoose"] - - docs = await invoker( - retriever, ANIMALS_QUERY, strategy=Eager(k=10, start_k=2, max_depth=2) - ) - assert sorted_doc_ids(docs) == ["bear", "bobcat", "caribou", "fox", "mongoose"] - - -async def test_parser( - request: pytest.FixtureRequest, adapter_factory: AdapterFactory, invoker -): - """ - This is a test of set of Documents to pre-populate, - a graph vector store with entries placed in a certain way. - - Space of the entries (under Euclidean similarity): - - A0 (*) - .... AL AR <.... - : | : - : | ^ : - v | . v - | : - TR | : BL - T0 --------------x-------------- B0 - TL | : BR - | : - | . - | . - | - FL FR - F0 - - the query point is meant to be at (*). - the A are bidirectionally with B - the A are outgoing to T - the A are incoming from F - The links are like: L with L, 0 with 0 and R with R. - """ - - docs_a = [ - Document(id="AL", page_content="[-1, 9]", metadata={"label": "AL"}), - Document(id="A0", page_content="[0, 10]", metadata={"label": "A0"}), - Document(id="AR", page_content="[1, 9]", metadata={"label": "AR"}), - ] - docs_b = [ - Document(id="BL", page_content="[9, 1]", metadata={"label": "BL"}), - Document(id="B0", page_content="[10, 0]", metadata={"label": "B0"}), - Document(id="BR", page_content="[9, -1]", metadata={"label": "BR"}), - ] - docs_f = [ - Document(id="FL", page_content="[1, -9]", metadata={"label": "FL"}), - Document(id="F0", page_content="[0, -10]", metadata={"label": "F0"}), - Document(id="FR", page_content="[-1, -9]", metadata={"label": "FR"}), - ] - docs_t = [ - Document(id="TL", page_content="[-9, -1]", metadata={"label": "TL"}), - Document(id="T0", page_content="[-10, 0]", metadata={"label": "T0"}), - Document(id="TR", page_content="[-9, 1]", metadata={"label": "TR"}), - ] - for doc_a, suffix in zip(docs_a, ["l", "0", "r"]): - doc_a.metadata["tag"] = f"ab_{suffix}" - doc_a.metadata["out"] = f"at_{suffix}" - doc_a.metadata["in"] = f"af_{suffix}" - for doc_b, suffix in zip(docs_b, ["l", "0", "r"]): - doc_b.metadata["tag"] = f"ab_{suffix}" - for doc_t, suffix in zip(docs_t, ["l", "0", "r"]): - doc_t.metadata["in"] = f"at_{suffix}" - for doc_f, suffix in zip(docs_f, ["l", "0", "r"]): - doc_f.metadata["out"] = f"af_{suffix}" - documents = docs_a + docs_b + docs_f + docs_t - - retriever = GraphRetriever( - store=adapter_factory.create(request, ParserEmbeddings(dimension=2), documents), - edges=[("out", "in"), ("tag", "tag")], - strategy=Eager(k=10, start_k=2, max_depth=2), - ) - - docs: list[Document] = await invoker( - retriever, "[2, 10]", strategy=Eager(k=10, start_k=2, max_depth=0) - ) - ss_labels = {doc.metadata["label"] for doc in docs} - assert ss_labels == {"AR", "A0"} - assert_document_format(docs[0]) - - docs = await invoker(retriever, "[2, 10]") - # this is a set, as some of the internals of trav.search are set-driven - # so ordering is not deterministic: - ts_labels = {doc.metadata["label"] for doc in docs} - assert ts_labels == {"AR", "A0", "BR", "B0", "TR", "T0"} - assert_document_format(docs[0]) - - -async def test_earth( - request: pytest.FixtureRequest, adapter_factory: AdapterFactory, invoker -): - greetings = Document( - id="greetings", - page_content="Typical Greetings", - metadata={ - "incoming": "parent", - }, - ) - - doc1 = Document( - id="doc1", - page_content="Hello World", - metadata={"outgoing": "parent", "keywords": ["greeting", "world"]}, - ) - - doc2 = Document( - id="doc2", - page_content="Hello Earth", - metadata={"outgoing": "parent", "keywords": ["greeting", "earth"]}, - ) - - retriever = GraphRetriever( - store=adapter_factory.create( - request, - EarthEmbeddings(), - [greetings, doc1, doc2], - nested_metadata_fields={"keywords"}, - ), - edges=[("outgoing", "incoming"), ("keywords", "keywords")], - strategy=Eager(k=10, start_k=2, max_depth=0), - ) - - docs: list[Document] = await invoker( - retriever, "Earth", strategy=Eager(k=10, start_k=1, max_depth=0) - ) - assert sorted_doc_ids(docs) == ["doc2"] - - docs = await invoker( - retriever, "Earth", strategy=Eager(k=10, start_k=2, max_depth=0) - ) - assert sorted_doc_ids(docs) == ["doc1", "doc2"] - - docs = await invoker( - retriever, "Earth", strategy=Eager(k=10, start_k=1, max_depth=1) - ) - assert sorted_doc_ids(docs) == ["doc1", "doc2", "greetings"] - - -async def test_mentions_to_ids(invoker) -> None: - v0 = Document(id="v0", page_content="-0.124") - v1 = Document(id="v1", page_content="+0.127", metadata={"mentions": ["v0"]}) - v2 = Document(id="v2", page_content="+0.250", metadata={"mentions": ["v1", "v3"]}) - v3 = Document(id="v3", page_content="+1.000") - - store = InMemoryVectorStore(embedding=Angular2DEmbeddings()) - store.add_documents([v0, v1, v2, v3]) - - retriever = GraphRetriever( - store=InMemoryAdapter(vector_store=store), - edges=[("mentions", Id())], - ) - - docs: list[Document] = await invoker(retriever, "+0.249", start_k=1, max_depth=0) - assert sorted_doc_ids(docs) == ["v2"] - - docs = await invoker(retriever, "+0.249", start_k=1, max_depth=1) - assert sorted_doc_ids(docs) == ["v1", "v2", "v3"] - - docs = await invoker(retriever, "+0.249", start_k=1, max_depth=3) - assert sorted_doc_ids(docs) == ["v0", "v1", "v2", "v3"] - - -async def test_ids_to_mentions(invoker) -> None: - v0 = Document(id="v0", page_content="-0.124") - v1 = Document(id="v1", page_content="+0.127", metadata={"mentions": ["v0"]}) - v2 = Document(id="v2", page_content="+0.250", metadata={"mentions": ["v1", "v3"]}) - v3 = Document(id="v3", page_content="+1.000") - - store = InMemoryVectorStore(embedding=Angular2DEmbeddings()) - store.add_documents([v0, v1, v2, v3]) - - retriever = GraphRetriever( - store=InMemoryAdapter(vector_store=store), - edges=[(Id(), "mentions")], - ) - - docs: list[Document] = await invoker(retriever, "-0.125", start_k=1, max_depth=0) - assert sorted_doc_ids(docs) == ["v0"] - - docs = await invoker(retriever, "-0.125", start_k=1, max_depth=1) - assert sorted_doc_ids(docs) == ["v0", "v1"] - - docs = await invoker(retriever, "-0.125", start_k=1, max_depth=3) - assert sorted_doc_ids(docs) == ["v0", "v1", "v2"] - - -async def test_edge_function(invoker) -> None: - v0 = Document( - id="v0", - page_content="-0.124", - metadata={"links": [("a", 5.0)], "incoming": ["a"]}, - ) - v1 = Document( - id="v1", - page_content="+1.000", - metadata={"links": [("a", 6.0)], "incoming": ["a"]}, - ) - - store = InMemoryVectorStore(embedding=Angular2DEmbeddings()) - store.add_documents([v0, v1]) - - def link_function(node: Node) -> Edges: - links = node.metadata.get("links", []) - incoming = node.metadata.get("incoming", []) - return Edges( - incoming={MetadataEdge("incoming", v) for v in incoming}, - outgoing={MetadataEdge("incoming", v) for v, _weight in links}, - ) - - retriever = GraphRetriever( - store=InMemoryAdapter(vector_store=store), - edges=link_function, - ) - - docs: list[Document] = await invoker(retriever, "-0.125", start_k=1, max_depth=0) - assert sorted_doc_ids(docs) == ["v0"] - - docs = await invoker(retriever, "-0.125", start_k=1, max_depth=1) - assert sorted_doc_ids(docs) == ["v0", "v1"] diff --git a/packages/langchain-graph-retriever/tests/integration_tests/test_traversal_mmr.py b/packages/langchain-graph-retriever/tests/integration_tests/test_traversal_mmr.py deleted file mode 100644 index 88f51c9b..00000000 --- a/packages/langchain-graph-retriever/tests/integration_tests/test_traversal_mmr.py +++ /dev/null @@ -1,164 +0,0 @@ -from langchain_core.documents import Document -from langchain_core.vectorstores import InMemoryVectorStore -from langchain_graph_retriever import GraphRetriever -from langchain_graph_retriever.adapters.in_memory import ( - InMemoryAdapter, -) -from langchain_graph_retriever.strategies import ( - Mmr, -) - -from tests.animal_docs import ( - ANIMALS_DEPTH_0_EXPECTED, - ANIMALS_QUERY, -) -from tests.embeddings.simple_embeddings import Angular2DEmbeddings -from tests.integration_tests.assertions import sorted_doc_ids -from tests.integration_tests.stores import Adapter - - -async def test_animals_bidir_collection(animal_store: Adapter, invoker): - # test graph-search on a normalized bi-directional edge - retriever = GraphRetriever( - store=animal_store, - edges=[("keywords", "keywords")], - ) - - docs: list[Document] = await invoker( - retriever, ANIMALS_QUERY, strategy=Mmr(k=4, start_k=2, max_depth=0) - ) - assert sorted_doc_ids(docs) == ANIMALS_DEPTH_0_EXPECTED - - docs = await invoker( - retriever, ANIMALS_QUERY, strategy=Mmr(k=4, start_k=2, max_depth=1) - ) - assert sorted_doc_ids(docs) == ["cat", "gazelle", "hyena", "mongoose"] - - docs = await invoker( - retriever, ANIMALS_QUERY, strategy=Mmr(k=6, start_k=2, max_depth=2) - ) - assert sorted_doc_ids(docs) == [ - "bison", - "cat", - "fox", - "gazelle", - "hyena", - "mongoose", - ] - - -async def test_animals_bidir_item(animal_store: Adapter, invoker): - retriever = GraphRetriever( - store=animal_store, - edges=[("habitat", "habitat")], - ) - - docs: list[Document] = await invoker( - retriever, ANIMALS_QUERY, strategy=Mmr(k=10, start_k=2, max_depth=0) - ) - assert sorted_doc_ids(docs) == ANIMALS_DEPTH_0_EXPECTED - - docs = await invoker( - retriever, ANIMALS_QUERY, strategy=Mmr(k=10, start_k=2, max_depth=1) - ) - assert sorted_doc_ids(docs) == [ - "bobcat", - "cobra", - "deer", - "elk", - "fox", - "mongoose", - ] - - docs = await invoker( - retriever, ANIMALS_QUERY, strategy=Mmr(k=10, start_k=2, max_depth=2) - ) - assert sorted_doc_ids(docs) == [ - "bobcat", - "cobra", - "deer", - "elk", - "fox", - "mongoose", - ] - - -async def test_animals_item_to_collection(animal_store: Adapter, invoker): - retriever = GraphRetriever( - store=animal_store, - edges=[("habitat", "keywords")], - ) - - docs: list[Document] = await invoker( - retriever, ANIMALS_QUERY, strategy=Mmr(k=10, start_k=2, max_depth=0) - ) - assert sorted_doc_ids(docs) == ANIMALS_DEPTH_0_EXPECTED - - docs = await invoker( - retriever, ANIMALS_QUERY, strategy=Mmr(k=10, start_k=2, max_depth=1) - ) - assert sorted_doc_ids(docs) == ["bear", "bobcat", "fox", "mongoose"] - - docs = await invoker( - retriever, ANIMALS_QUERY, strategy=Mmr(k=10, start_k=2, max_depth=2) - ) - assert sorted_doc_ids(docs) == ["bear", "bobcat", "caribou", "fox", "mongoose"] - - -async def test_traversal_mem(invoker) -> None: - """ Test end to end construction and MMR search. - The embedding function used here ensures `texts` become - the following vectors on a circle (numbered v0 through v3): - - ______ v2 - // \\ - // \\ v1 - v3 || . || query - \\ // v0 - \\______// (N.B. very crude drawing) - - With start_k==2 and k==2, when query is at (1, ), - one expects that v2 and v0 are returned (in some order) - because v1 is "too close" to v0 (and v0 is closer than v1)). - - Both v2 and v3 are reachable via edges from v0, so once it is - selected, those are both considered. - """ - v0 = Document(id="v0", page_content="-0.124") - v1 = Document(id="v1", page_content="+0.127") - v2 = Document(id="v2", page_content="+0.25") - v3 = Document(id="v3", page_content="+1.0") - - v0.metadata["outgoing"] = "link" - v2.metadata["incoming"] = "link" - v3.metadata["incoming"] = "link" - - store = InMemoryVectorStore(embedding=Angular2DEmbeddings()) - store.add_documents([v0, v1, v2, v3]) - - strategy = Mmr(k=2, start_k=2, max_depth=2) - retriever = GraphRetriever( - store=InMemoryAdapter(vector_store=store), - edges=[("outgoing", "incoming")], - strategy=strategy, - ) - - docs: list[Document] = await invoker(retriever, "0.0") - assert sorted_doc_ids(docs) == ["v0", "v2"] - - # With max depth 0, no edges are traversed, so this doesn't reach v2 or v3. - # So it ends up picking "v1" even though it's similar to "v0". - docs = await invoker(retriever, "0.0", max_depth=0) - assert sorted_doc_ids(docs) == ["v0", "v1"] - - # With max depth 0 but higher `start_k`, we encounter v2 - docs = await invoker(retriever, "0.0", start_k=3, max_depth=0) - assert sorted_doc_ids(docs) == ["v0", "v2"] - - # v0 score is .46, v2 score is 0.16 so it won't be chosen. - docs = await invoker(retriever, "0.0", score_threshold=0.2) - assert sorted_doc_ids(docs) == ["v0"] - - # with k=4 we should get all of the documents. - docs = await invoker(retriever, "0.0", k=4) - assert sorted_doc_ids(docs) == ["v0", "v1", "v2", "v3"] diff --git a/packages/langchain-graph-retriever/tests/unit_tests/test_document_graph.py b/packages/langchain-graph-retriever/tests/test_document_graph.py similarity index 90% rename from packages/langchain-graph-retriever/tests/unit_tests/test_document_graph.py rename to packages/langchain-graph-retriever/tests/test_document_graph.py index 3069ee67..11c9b1a8 100644 --- a/packages/langchain-graph-retriever/tests/unit_tests/test_document_graph.py +++ b/packages/langchain-graph-retriever/tests/test_document_graph.py @@ -3,7 +3,6 @@ import networkx as nx import pytest from langchain_core.documents import Document -from langchain_graph_retriever import Id from langchain_graph_retriever.document_graph import create_graph, group_by_community @@ -72,7 +71,7 @@ def test_create_graph(docs: list[Document]): graph = create_graph( docs, - edges=[("mentions", Id())], + edges=[("mentions", "$id")], ) assert sorted(nx.to_edgelist(graph)) == [ ("doc1", "doc0", {}), @@ -164,7 +163,6 @@ def test_communities(animal_docs: list[Document]): ["lark"], ["lemur"], ["lizard"], - ["llama"], ["lobster"], ["magpie"], ["manatee"], @@ -173,3 +171,14 @@ def test_communities(animal_docs: list[Document]): ["narwhal"], ["octopus"], ] + + +def test_communities_disconnected_graph(): + graph = nx.DiGraph() + graph.add_node("n1", doc=Document(id="n1", page_content="n1")) + graph.add_node("n2", doc=Document(id="n2", page_content="n2")) + + communities = group_by_community(graph) + + community_ids = [sorted([cast(str, d.id) for d in c]) for c in communities] + assert community_ids == [["n1"], ["n2"]] diff --git a/packages/langchain-graph-retriever/tests/test_graph_retriever.py b/packages/langchain-graph-retriever/tests/test_graph_retriever.py new file mode 100644 index 00000000..a67832e8 --- /dev/null +++ b/packages/langchain-graph-retriever/tests/test_graph_retriever.py @@ -0,0 +1,70 @@ +import copy +from collections.abc import Iterable + +from graph_retriever.strategies import Mmr +from langchain_core.documents import Document +from langchain_core.embeddings import FakeEmbeddings +from langchain_core.vectorstores.in_memory import InMemoryVectorStore +from langchain_graph_retriever import GraphRetriever +from langchain_graph_retriever.adapters.in_memory import InMemoryAdapter + + +def test_mmr_parameters() -> None: + # Makes sure that copying the MMR strategy creates new embeddings. + mmr1 = Mmr() + mmr1._query_embedding = [0.25, 0.5, 0.75] + assert id(mmr1._nd_query_embedding) == id(mmr1._nd_query_embedding) + + mmr2 = copy.deepcopy(mmr1) + assert id(mmr1._nd_query_embedding) != id(mmr2._nd_query_embedding) + + +def test_init_parameters_override_strategy() -> None: + store = InMemoryAdapter(vector_store=InMemoryVectorStore(FakeEmbeddings(size=8))) + retriever = GraphRetriever(store=store, edges=[], k=87) # type: ignore[call-arg] + assert retriever.strategy.select_k == 87 + + +def test_infers_adapter() -> None: + # Some vector stores require at least one document to be created. + doc = Document( + id="doc", + page_content="lorem ipsum and whatnot", + ) + store = InMemoryVectorStore.from_documents([doc], FakeEmbeddings(size=8)) + + retriever = GraphRetriever( + store=store, + edges=[], + ) + + assert isinstance(retriever.adapter, InMemoryAdapter) + + +def sorted_ids(docs: Iterable[Document]) -> list[str]: + ids = [] + for doc in docs: + assert doc.id is not None + ids.append(doc.id) + return sorted(ids) + + +async def test_invoke() -> None: + doc1 = Document( + id="doc1", + page_content="lorem ipsum and whatnot", + ) + doc2 = Document( + id="doc2", + page_content="lorem ipsum and some more", + metadata={"mentions": ["doc1"]}, + ) + store = InMemoryVectorStore.from_documents([doc1, doc2], FakeEmbeddings(size=8)) + + retriever = GraphRetriever(store=store, edges=[("mentions", "$id")]) + + assert sorted_ids(retriever.invoke("lorem")) == ["doc1", "doc2"] + assert sorted_ids(await retriever.ainvoke("lorem")) == [ + "doc1", + "doc2", + ] diff --git a/packages/langchain-graph-retriever/tests/integration_tests/test_langchain.py b/packages/langchain-graph-retriever/tests/test_langchain.py similarity index 68% rename from packages/langchain-graph-retriever/tests/integration_tests/test_langchain.py rename to packages/langchain-graph-retriever/tests/test_langchain.py index 6b783b3a..cbf4e68b 100644 --- a/packages/langchain-graph-retriever/tests/integration_tests/test_langchain.py +++ b/packages/langchain-graph-retriever/tests/test_langchain.py @@ -1,9 +1,10 @@ -from langchain_core.vectorstores import InMemoryVectorStore +from graph_retriever.testing.embeddings import AnimalEmbeddings +from langchain_core.vectorstores.in_memory import InMemoryVectorStore from langchain_graph_retriever import GraphRetriever from langchain_tests.integration_tests import RetrieversIntegrationTests -from tests.animal_docs import load_animal_docs -from tests.embeddings.simple_embeddings import AnimalEmbeddings +from tests.conftest import load_animal_docs +from tests.embeddings import BaseEmbeddings class TestGraphTraversalRetriever(RetrieversIntegrationTests): @@ -13,7 +14,8 @@ def retriever_constructor(self) -> type[GraphRetriever]: @property def retriever_constructor_params(self) -> dict: - store = InMemoryVectorStore(embedding=AnimalEmbeddings()) + embedding = BaseEmbeddings(AnimalEmbeddings()) + store = InMemoryVectorStore(embedding=embedding) store.add_documents(load_animal_docs()) return { "store": store, diff --git a/packages/langchain-graph-retriever/tests/transformers/test_gliner.py b/packages/langchain-graph-retriever/tests/transformers/test_gliner.py new file mode 100644 index 00000000..727caaaf --- /dev/null +++ b/packages/langchain-graph-retriever/tests/transformers/test_gliner.py @@ -0,0 +1,35 @@ +from typing import Any + +import pytest +from langchain_core.documents import Document + + +@pytest.mark.extra +def test_transform_documents(animal_docs: list[Document]): + from gliner import GLiNER # type: ignore + from langchain_graph_retriever.transformers.gliner import GLiNERTransformer + + class FakeGLiNER(GLiNER): + def __init__(self): + pass + + def batch_predict_entities( + self, texts: list[str], **kwargs: Any + ) -> list[list[dict[str, str]]]: + return [[{"text": text.split()[0], "label": "first"}] for text in texts] + + first_doc = animal_docs[0].model_copy() + + fake_model = FakeGLiNER() + transformer = GLiNERTransformer( + ["first"], model=fake_model, metadata_key_prefix="prefix_" + ) + + transformed_docs = transformer.transform_documents(animal_docs) + assert "prefix_first" in transformed_docs[0].metadata + + with pytest.raises(ValueError, match="Invalid model"): + GLiNERTransformer([], model={}) + + # confirm original docs aren't modified + assert first_doc == animal_docs[0] diff --git a/packages/langchain-graph-retriever/tests/transformers/test_html.py b/packages/langchain-graph-retriever/tests/transformers/test_html.py new file mode 100644 index 00000000..2448b933 --- /dev/null +++ b/packages/langchain-graph-retriever/tests/transformers/test_html.py @@ -0,0 +1,55 @@ +import pytest +from langchain_core.documents import Document + +test_html = """ +<!DOCTYPE html> +<html><head><title>Animals of the World</title></head> +<body> + <h1>Animals of the World</h1> + <p>Explore the fascinating world of animals.</p> + + <h2>Mammals</h2> + <p>The <a href="https://example.com/lion">lion</a> is the king of the jungle.</p> + <p>The <a href="https://example.com/elephant">elephant</a> is a large animal.</p> + + <h2>Birds</h2> + <p>The <a href="https://example.com/eagle">eagle</a> soars high in the sky.</p> + <p>The <a href="https://example.com/penguin">penguin</a> thrives in icy areas.</p> + + <h2>Reptiles</h2> + <p>The <a href="https://example.com/crocodile">crocodile</a> is a predator.</p> + <p>The <a href="https://example.com/gecko">gecko</a> can climb walls.</p> +</body></html> +""" + + +@pytest.mark.extra +def test_transform_documents(): + from langchain_graph_retriever.transformers.html import ( + HyperlinkTransformer, + ) + + doc = Document( + id="animal_html", + page_content=test_html, + metadata={"_url": "https://example.com/animals"}, + ) + + original_doc = doc.model_copy() + + transformer = HyperlinkTransformer( + url_metadata_key="_url", + metadata_key="_hyperlinks", + ) + + transformed_doc = transformer.transform_documents([doc])[0] + assert "_hyperlinks" in transformed_doc.metadata + assert "https://example.com/gecko" in transformed_doc.metadata["_hyperlinks"] + assert len(transformed_doc.metadata["_hyperlinks"]) == 6 + + transformer = HyperlinkTransformer() + with pytest.raises(ValueError, match="html document url not found in metadata"): + transformer.transform_documents([doc]) + + # confirm original docs aren't modified + assert original_doc == doc diff --git a/packages/langchain-graph-retriever/tests/transformers/test_keybert.py b/packages/langchain-graph-retriever/tests/transformers/test_keybert.py new file mode 100644 index 00000000..cec87ecb --- /dev/null +++ b/packages/langchain-graph-retriever/tests/transformers/test_keybert.py @@ -0,0 +1,36 @@ +from typing import Any + +import pytest +from langchain_core.documents import Document + + +@pytest.mark.extra +def test_transform_documents(animal_docs: list[Document]): + from keybert import KeyBERT # type: ignore + from langchain_graph_retriever.transformers.keybert import KeyBERTTransformer + + class FakeKeyBERT(KeyBERT): + def __init__(self): + pass + + def extract_keywords( + self, docs: list[str], **kwargs: Any + ) -> list[list[tuple[str, float]]]: + return [ + [(word, len(word)) for word in set(doc.split()) if len(word) > 5] + for doc in docs + ] + + first_doc = animal_docs[0].model_copy() + + fake_model = FakeKeyBERT() + transformer = KeyBERTTransformer(model=fake_model, metadata_key="keybert") + + transformed_docs = transformer.transform_documents(animal_docs) + assert "keybert" in transformed_docs[0].metadata + + with pytest.raises(ValueError, match="Invalid model"): + KeyBERTTransformer(model={}) + + # confirm original docs aren't modified + assert first_doc == animal_docs[0] diff --git a/packages/langchain-graph-retriever/tests/transformers/test_parent.py b/packages/langchain-graph-retriever/tests/transformers/test_parent.py new file mode 100644 index 00000000..9ee8d35d --- /dev/null +++ b/packages/langchain-graph-retriever/tests/transformers/test_parent.py @@ -0,0 +1,31 @@ +import pytest +from langchain_core.documents import Document +from langchain_graph_retriever.transformers.parent import ParentTransformer + + +def test_transform_documents(): + root = Document(id="root", page_content="test", metadata={"_path": "root"}) + h1 = Document(id="h1", page_content="test", metadata={"_path": "root.h1"}) + h1a = Document(id="h1a", page_content="test", metadata={"_path": "root.h1.a"}) + + original_h1 = h1.model_copy() + + transformer = ParentTransformer( + path_metadata_key="_path", parent_metadata_key="_parent", path_delimiter="." + ) + + transformed_root = transformer.transform_documents([root])[0] + assert "_parent" not in transformed_root.metadata + + transformed_h1 = transformer.transform_documents([h1])[0] + assert transformed_h1.metadata["_parent"] == "root" + + transformed_h1a = transformer.transform_documents([h1a])[0] + assert transformed_h1a.metadata["_parent"] == "root.h1" + + transformer = ParentTransformer() + with pytest.raises(ValueError, match="path not found in metadata"): + transformer.transform_documents([root]) + + # confirm original docs aren't modified + assert original_h1 == h1 diff --git a/packages/langchain-graph-retriever/tests/transformers/test_shredding.py b/packages/langchain-graph-retriever/tests/transformers/test_shredding.py new file mode 100644 index 00000000..9ed4359b --- /dev/null +++ b/packages/langchain-graph-retriever/tests/transformers/test_shredding.py @@ -0,0 +1,56 @@ +import json + +from langchain_core.documents import Document +from langchain_graph_retriever.transformers.shredding import ( + DEFAULT_PATH_DELIMITER, + DEFAULT_STATIC_VALUE, + SHREDDED_KEYS_KEY, + ShreddingTransformer, +) + + +def test_transform_documents(animal_docs: list[Document]): + first_doc = animal_docs[0].model_copy() + original_docs = [animal_docs[0]] + transformer = ShreddingTransformer() + + # confirm "keywords" contains a list value in original document + list_key = "keywords" + original_metadata = original_docs[0].metadata + assert isinstance(original_metadata[list_key], list) + + # pull the first keyword in that list + first_keyword = original_metadata[list_key][0] + + # transform the document + shredded_docs = transformer.transform_documents(original_docs) + shredded_metadata = shredded_docs[0].metadata + + # confirm "keywords" no longer exists as a metadata key + assert list_key not in shredded_metadata + + # confirm "keywords" exists as a shredded key + assert list_key in json.loads(shredded_metadata[SHREDDED_KEYS_KEY]) + + # confirm the shredded key has the expected value + shredded_key = f'{list_key}{DEFAULT_PATH_DELIMITER}"{first_keyword}"' + assert shredded_key in shredded_metadata + assert shredded_metadata[shredded_key] == DEFAULT_STATIC_VALUE + + # confirm original docs aren't modified + assert first_doc == animal_docs[0] + + +def test_restore_documents(animal_docs: list[Document]): + first_doc = animal_docs[0].model_copy() + original_docs = [animal_docs[0]] + transformer = ShreddingTransformer() + + shredded_docs = transformer.transform_documents(original_docs) + + restored_docs = transformer.restore_documents(shredded_docs) + + assert original_docs[0] == restored_docs[0] + + # confirm original docs aren't modified + assert first_doc == animal_docs[0] diff --git a/packages/langchain-graph-retriever/tests/transformers/test_spacy.py b/packages/langchain-graph-retriever/tests/transformers/test_spacy.py new file mode 100644 index 00000000..827d40e1 --- /dev/null +++ b/packages/langchain-graph-retriever/tests/transformers/test_spacy.py @@ -0,0 +1,64 @@ +import pytest +from langchain_core.documents import Document + + +@pytest.mark.extra +def test_transform_documents(animal_docs: list[Document]): + from langchain_graph_retriever.transformers.spacy import SpacyNERTransformer + from spacy.language import Language # type: ignore + from spacy.tokens import Doc, Span # type: ignore + from spacy.vocab import Vocab # type: ignore + + class FakeLanguage(Language): + def __init__(self): + pass + + def __call__(self, text: str | Doc, **kwargs) -> Doc: + vocab = Vocab() + assert isinstance(text, str) + doc = Doc(vocab=vocab, words=text.split()) + doc.ents = [ + Span(doc, start=0, end=1, label="first"), + Span(doc, start=1, end=2, label="second"), + Span(doc, start=2, end=3, label="third"), + ] + return doc + + first_doc = animal_docs[0].model_copy() + + fake_model = FakeLanguage() + + transformer = SpacyNERTransformer(model=fake_model, metadata_key="spacey") + transformed_docs = transformer.transform_documents(animal_docs) + assert "spacey" in transformed_docs[0].metadata + assert "first: the" in transformed_docs[0].metadata["spacey"] + assert "second: aardvark" in transformed_docs[0].metadata["spacey"] + assert "third: is" in transformed_docs[0].metadata["spacey"] + + transformer = SpacyNERTransformer( + model=fake_model, metadata_key="spacey", include_labels=set(["first"]) + ) + transformed_docs = transformer.transform_documents(animal_docs) + assert "spacey" in transformed_docs[0].metadata + assert "first: the" in transformed_docs[0].metadata["spacey"] + assert "second: aardvark" not in transformed_docs[0].metadata["spacey"] + assert "third: is" not in transformed_docs[0].metadata["spacey"] + + transformer = SpacyNERTransformer( + model=fake_model, metadata_key="spacey", exclude_labels=set(["first"]) + ) + transformed_docs = transformer.transform_documents(animal_docs) + assert "spacey" in transformed_docs[0].metadata + assert "first: the" not in transformed_docs[0].metadata["spacey"] + assert "second: aardvark" in transformed_docs[0].metadata["spacey"] + assert "third: is" in transformed_docs[0].metadata["spacey"] + + transformer = SpacyNERTransformer(model=fake_model, metadata_key="spacey", limit=1) + transformed_docs = transformer.transform_documents(animal_docs) + assert len(transformed_docs[0].metadata["spacey"]) == 1 + + with pytest.raises(ValueError, match="Invalid model"): + SpacyNERTransformer(model={}) # type: ignore + + # confirm original docs aren't modified + assert first_doc == animal_docs[0] diff --git a/packages/langchain-graph-retriever/tests/unit_tests/document_transformers/test_metadata_denormalizer.py b/packages/langchain-graph-retriever/tests/unit_tests/document_transformers/test_metadata_denormalizer.py deleted file mode 100644 index 92658b31..00000000 --- a/packages/langchain-graph-retriever/tests/unit_tests/document_transformers/test_metadata_denormalizer.py +++ /dev/null @@ -1,48 +0,0 @@ -import json - -from langchain_core.documents import Document -from langchain_graph_retriever.document_transformers.metadata_denormalizer import ( - DEFAULT_PATH_DELIMITER, - DEFAULT_STATIC_VALUE, - DENORMALIZED_KEYS_KEY, - MetadataDenormalizer, -) - - -def test_transform_documents(animal_docs: list[Document]): - original_docs = [animal_docs[0]] - transformer = MetadataDenormalizer() - - # confirm "keywords" contains a list value in original document - list_key = "keywords" - original_metadata = original_docs[0].metadata - assert isinstance(original_metadata[list_key], list) - - # pull the first keyword in that list - first_keyword = original_metadata[list_key][0] - - # transform the document - denormalized_docs = transformer.transform_documents(original_docs) - denormalized_metadata = denormalized_docs[0].metadata - - # confirm "keywords" no longer exists as a metadata key - assert list_key not in denormalized_metadata - - # confirm "keywords" exists as a denormalized key - assert list_key in json.loads(denormalized_metadata[DENORMALIZED_KEYS_KEY]) - - # confirm the denormalized key has the expected value - denormalized_key = f"{list_key}{DEFAULT_PATH_DELIMITER}{first_keyword}" - assert denormalized_key in denormalized_metadata - assert denormalized_metadata[denormalized_key] == DEFAULT_STATIC_VALUE - - -def test_revert_documents(animal_docs: list[Document]): - original_docs = [animal_docs[0]] - transformer = MetadataDenormalizer() - - denormalized_docs = transformer.transform_documents(original_docs) - - reverted_docs = transformer.revert_documents(denormalized_docs) - - assert original_docs[0] == reverted_docs[0] diff --git a/packages/langchain-graph-retriever/tests/unit_tests/strategy/test_base.py b/packages/langchain-graph-retriever/tests/unit_tests/strategy/test_base.py deleted file mode 100644 index fff31ef8..00000000 --- a/packages/langchain-graph-retriever/tests/unit_tests/strategy/test_base.py +++ /dev/null @@ -1,85 +0,0 @@ -import pytest -from langchain_graph_retriever.strategies import ( - Eager, - Mmr, -) -from langchain_graph_retriever.strategies.base import Strategy - - -def test_build_strategy_base(): - base_strategy = Eager(k=6, start_k=5, adjacent_k=9, max_depth=2) - - # base strategy with no changes - strategy = Strategy.build(base_strategy=base_strategy) - assert strategy == base_strategy - - # base strategy with changed k - strategy = Strategy.build(base_strategy=base_strategy, k=7) - assert strategy == Eager(k=7, start_k=5, adjacent_k=9, max_depth=2) - - # base strategy with invalid kwarg - with pytest.warns(UserWarning, match=r"Unsupported key\(s\) 'invalid_kwarg' set."): - strategy = Strategy.build(base_strategy=base_strategy, invalid_kwarg=4) - assert strategy == base_strategy - - -def test_build_strategy_base_override(): - base_strategy = Eager(k=6, start_k=5, adjacent_k=9, max_depth=2) - override_strategy = Eager(k=7, start_k=4, adjacent_k=8, max_depth=3) - - # override base strategy - strategy = Strategy.build( - base_strategy=base_strategy, strategy=override_strategy, k=4 - ) - assert strategy == override_strategy.model_copy(update={"k": 4}) - - # override base strategy and change params - strategy = Strategy.build( - base_strategy=base_strategy, strategy=override_strategy, k=3, adjacent_k=7 - ) - assert strategy == Eager(k=3, start_k=4, adjacent_k=7, max_depth=3) - - # override base strategy and invalid kwarg - with pytest.warns(UserWarning, match=r"Unsupported key\(s\) 'invalid_kwarg' set."): - strategy = Strategy.build( - base_strategy=base_strategy, - strategy=override_strategy, - k=4, - invalid_kwarg=4, - ) - assert strategy == override_strategy.model_copy(update={"k": 4}) - - # attempt override base strategy with dict - with pytest.raises(ValueError, match="Unsupported 'strategy'"): - strategy = Strategy.build( - base_strategy=base_strategy, - strategy={"k": 9, "start_k": 7, "adjacent_k": 11}, - ) - - -def test_build_strategy_base_override_mmr(): - base_strategy = Eager(k=6, start_k=5, adjacent_k=9, max_depth=2) - override_strategy = Mmr(k=7, start_k=4, adjacent_k=8, max_depth=3, lambda_mult=0.3) - - # override base strategy with mmr kwarg - with pytest.warns(UserWarning, match=r"Unsupported key\(s\) 'lambda_mult' set."): - strategy = Strategy.build(base_strategy=base_strategy, lambda_mult=0.2) - assert strategy == base_strategy - - # override base strategy with mmr strategy - strategy = Strategy.build( - base_strategy=base_strategy, strategy=override_strategy, k=4 - ) - assert strategy == override_strategy.model_copy(update={"k": 4}) - - # override base strategy with mmr strategy and mmr arg - strategy = Strategy.build( - base_strategy=base_strategy, strategy=override_strategy, k=4, lambda_mult=0.2 - ) - assert strategy == Mmr(k=4, start_k=4, adjacent_k=8, max_depth=3, lambda_mult=0.2) - - # start with override strategy, change to base, try to set mmr arg - with pytest.warns(UserWarning, match=r"Unsupported key\(s\) 'lambda_mult' set."): - Strategy.build( - base_strategy=override_strategy, strategy=base_strategy, lambda_mult=0.2 - ) diff --git a/packages/langchain-graph-retriever/tests/unit_tests/test_graph_retriever.py b/packages/langchain-graph-retriever/tests/unit_tests/test_graph_retriever.py deleted file mode 100644 index 85dddb56..00000000 --- a/packages/langchain-graph-retriever/tests/unit_tests/test_graph_retriever.py +++ /dev/null @@ -1,21 +0,0 @@ -from langchain_core.embeddings import FakeEmbeddings -from langchain_core.vectorstores.in_memory import InMemoryVectorStore -from langchain_graph_retriever import GraphRetriever -from langchain_graph_retriever.adapters.in_memory import InMemoryAdapter -from langchain_graph_retriever.strategies import Mmr - - -def test_mmr_parameters() -> None: - # Makes sure that copying the MMR strategy creates new embeddings. - mmr1 = Mmr() - mmr1._query_embedding = [0.25, 0.5, 0.75] - assert id(mmr1._nd_query_embedding) == id(mmr1._nd_query_embedding) - - mmr2 = mmr1.model_copy(deep=True) - assert id(mmr1._nd_query_embedding) != id(mmr2._nd_query_embedding) - - -def test_init_parameters_override_strategy() -> None: - store = InMemoryAdapter(vector_store=InMemoryVectorStore(FakeEmbeddings(size=8))) - retriever = GraphRetriever(store=store, edges=[], k=87) # type: ignore[call-arg] - assert retriever.strategy.k == 87 diff --git a/pyproject.toml b/pyproject.toml index e18ab308..19d0ff41 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -28,6 +28,9 @@ ignore = ["D100", "D104", "D107"] [tool.ruff.lint.pydocstyle] convention = "numpy" +[tool.ruff.lint.pydoclint] +ignore-one-line-docstrings = true + [tool.ruff.lint.per-file-ignores] "**/{tests,tools,scripts}/*" = ["D", "DOC"] "*.ipynb" = ["D", "DOC"] @@ -49,20 +52,28 @@ dev = [ "pytest-cov>=4.0.0", "pytest-asyncio>=0.25.2", "poethepoet>=0.32.1", + "nbstripout>=0.8.1", + "nbmake>=1.5.5", ] docs = [ - "quartodoc>=0.9.1", - "nbformat>=5.10.4", - "nbclient>=0.10.2", - "langchain-openai>=0.3.1", + "graph-rag-example-helpers", + "griffe-inherited-docstrings>=1.1.1", + "griffe-pydantic>=1.1.0", + "griffe>=1.5.5", "ipykernel>=6.29.5", - "langchain-astradb>=0.5.2", - "langchain-graph-retriever>=0.1.0", + "langchain-astradb>=0.5.3", + "langchain-graph-retriever", + "langchain-openai>=0.3.1", "matplotlib>=3.10.0", - "jupyterlab>=4.3.4", + "mkdocs-jupyter>=0.25.1", + "mkdocs-material[imaging]>=9.6.2", + "mkdocs>=1.6.1", + "mkdocstrings[python]>=0.28.0", "pip>=25.0", - "graph-rag-example-helpers", + "mkdocs-table-reader-plugin>=3.1.0", + "ruff>=0.9.4", + "mkdocs-macros-plugin>=1.3.7", ] [tool.coverage.report] @@ -73,23 +84,19 @@ branch = true source = [ "packages/graph-retriever/src/graph_retriever", "packages/langchain-graph-retriever/src/langchain_graph_retriever", - ] +] [tool.poe] # comment this out to debug poe tasks verbosity = -1 [tool.poe.env] -RUFF = "ruff@0.8.6" +RUFF = "ruff@0.9.4" DEPTRY = "deptry@0.22.0" -[tool.poe.tasks.install-deps] +[tool.poe.tasks.sync] help = "Install dependencies from all packages and all extras" -cmd = "uv sync --all-packages --all-extras" - -[tool.poe.tasks.install-docs-deps] -help = "Install dependencies from all packages and all extras" -cmd = "uv sync --all-packages --all-extras --group docs" +cmd = "uv sync --group=docs --all-packages --all-extras" [tool.poe.tasks.lock-check] help = "Runs `uv lock --locked` to check uv.lock file consistency (fix with `lock-fix`)" @@ -132,58 +139,72 @@ sequence = [ {cmd = "uvx ${DEPTRY} src tests", cwd = "packages/graph-rag-example-helpers"}, ] +[tool.poe.tasks.nbstripout] +help = "Runs `nbstripout` to remove output" +shell = "find . -name '*.ipynb' | awk 'system(\"git check-ignore --quiet \" $0)' | xargs nbstripout" + +[tool.poe.tasks.nbstripout-check] +help = "Runs `nbstripout` to remove output" +shell = "find . -name '*.ipynb' | awk 'system(\"git check-ignore --quiet \" $0)' | xargs nbstripout --verify" + [tool.poe.tasks.lint] help = "Runs all formatting, lints, and checks (fixing where possible)" -sequence = [ "lock-fix", "fmt-fix", "lint-fix", "type-check", "dep-check"] +sequence = [ "lock-fix", "fmt-fix", "lint-fix", "type-check", "dep-check", "docs-build" ] -[tool.poe.tasks.integration] -help = "Runs integration tests (against in-memory stores)" -cwd = "packages/langchain-graph-retriever" -args = [{ name = "path", positional = true, default = "tests/integration_tests"}] -cmd = "uv run pytest -vs $path" +[tool.poe.tasks.test-gr] +help = "Runs graph-retriever tests (against in-memory stores)" +cwd = "packages/graph-retriever" +cmd = "uv run pytest -vs ." -[tool.poe.tasks.unit] -help = "Runs unit tests" +[tool.poe.tasks.test-lgr] +help = "Runs langchain-graph-retriever tests (against in-memory stores)" cwd = "packages/langchain-graph-retriever" -args = [{ name = "path", positional = true, default = "tests/unit_tests"}] -cmd = "uv run pytest -vs $path" +cmd = "uv run pytest -vs --runextras ." [tool.poe.tasks.test] help = "Runs all tests (against in-memory stores)" -cwd = "packages/langchain-graph-retriever" -cmd = "uv run pytest -vs ." +sequence = [ "test-gr", "test-lgr"] + +[tool.poe.tasks.test-nb] +help = "Runs tests on notebooks" +shell = "find . -name '*.ipynb' | awk 'system(\"git check-ignore --quiet \" $0)' | xargs | uv run pytest docs --nbmake" [tool.poe.tasks.test-all] help = "Runs all tests (against all stores)" -cwd = "packages/langchain-graph-retriever" -cmd = "uv run pytest -vs . --stores=all" +sequence = [ + "test-gr", + { cmd = "uv run pytest -vs . --stores=all", cwd = "packages/langchain-graph-retriever" }, + "test-nb", +] [tool.poe.tasks.build] help = "Builds all the packages" sequence = [ - "install-deps", + "sync", {cmd = "uv build --all-packages"}, ] -[tool.poe.tasks.docs-api] -help = "Updates the package installation and generates the API docs" +[tool.poe.tasks.docs-serve] +help = "Starts a live preview of the docs site" sequence = [ - {cmd = "uv pip install packages/* --force-reinstall"}, - {cmd = "uv run quartodoc build", cwd="docs"}, + "sync", + {cmd = "uv run mkdocs serve -o"}, ] -[tool.poe.tasks.docs-preview] -help = "Starts a live preview of the docs site" +[tool.poe.tasks.docs-build] +help = "Build the docs" sequence = [ - "docs-api", - {cmd = "uv run quarto preview", cwd="docs"}, + "sync", + {cmd = "uv run mkdocs build --strict"}, ] -[tool.poe.tasks.docs-build] -help = "Builds the docs site in `_site`" +[tool.poe.tasks.coverage] +help = "Runs tests and reports coverage." sequence = [ - "docs-api", - {cmd = "uv run quarto render", cwd="docs"}, + {cmd = "uv run coverage run -m pytest packages/graph-retriever" }, + {cmd = "uv run coverage run -a -m pytest packages/langchain-graph-retriever --stores=all" }, + {cmd = "uv run coverage html" }, # make htmlcov/index.html + {cmd = "uv run coverage report" }, # print terminal summary ] [tool.poe.tasks.help] diff --git a/scripts/drop-astra-keyspace.py b/scripts/drop-astra-keyspace.py index 38844e86..87843eca 100644 --- a/scripts/drop-astra-keyspace.py +++ b/scripts/drop-astra-keyspace.py @@ -1,13 +1,18 @@ import os +import sys -from astrapy import AstraDBDatabaseAdmin -from astrapy.authentication import StaticTokenProvider +from astrapy import DataAPIClient + +token = os.environ["ASTRA_DB_APPLICATION_TOKEN"] +keyspace = os.environ.get("ASTRA_DB_KEYSPACE") +if keyspace is None: + print("No keyspace to drop.") # noqa: T201 + sys.exit() -token = StaticTokenProvider(os.environ["ASTRA_DB_APPLICATION_TOKEN"]) -keyspace = os.environ.get("ASTRA_DB_KEYSPACE", "default_keyspace") api_endpoint = os.environ["ASTRA_DB_API_ENDPOINT"] -admin = AstraDBDatabaseAdmin(api_endpoint=api_endpoint, token=token) +my_client = DataAPIClient(token=token) +admin = my_client.get_admin().get_database_admin(api_endpoint) keyspaces = admin.list_keyspaces() if keyspace in keyspaces: print(f"Dropping keyspace '{keyspace}'") # noqa: T201 diff --git a/scripts/langchain-interlinks.py b/scripts/langchain-interlinks.py new file mode 100644 index 00000000..3ea72cce --- /dev/null +++ b/scripts/langchain-interlinks.py @@ -0,0 +1,33 @@ +import json +from typing import Any + +file = "docs/_inv/langchain_objects.json" + +langchain_class_remapping = { + "langchain_astradb.vectorstores.AstraDBVectorStore": "langchain_astradb.AstraDBVectorStore", # noqa: E501 + "langchain_chroma.vectorstores.Chroma": "langchain_chroma.Chroma", # noqa: E501 + "langchain_core.documents.base.Document": "langchain_core.documents.Document", # noqa: E501 + "langchain_core.vectorstores.in_memory.InMemoryVectorStore": "langchain_core.vectorstores.InMemoryVectorStore", # noqa: E501 + "langchain_core.vectorstores.base.VectorStore": "langchain_core.vectorstores.VectorStore", # noqa: E501 + "langchain_community.vectorstores.opensearch_vector_search.OpenSearchVectorSearch": "langchain_community.vectorstores.OpenSearchVectorSearch", # noqa: E501 + "langchain_community.vectorstores.cassandra.Cassandra": "langchain_community.vectorstores.Cassandra", # noqa: E501 +} + +objects: dict[str, Any] = {} +with open(file) as f: + objects = json.load(f) + +items: list[dict[str, Any]] = objects["items"] + +new_items = [] + +for item in items: + if item["name"] in langchain_class_remapping.keys(): + new_item = item.copy() + new_item["name"] = langchain_class_remapping[item["name"]] + new_items.append(new_item) + +items.extend(new_items) + +with open(file, "w") as f: + json.dump(obj=objects, fp=f) diff --git a/uv.lock b/uv.lock index 1166ddbc..87ca1d39 100644 --- a/uv.lock +++ b/uv.lock @@ -1,4 +1,5 @@ version = 1 +revision = 1 requires-python = ">=3.10" resolution-markers = [ "python_full_version >= '3.12.4'", @@ -147,52 +148,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/81/29/5ecc3a15d5a33e31b26c11426c45c501e439cb865d0bff96315d86443b78/appnope-0.1.4-py2.py3-none-any.whl", hash = "sha256:502575ee11cd7a28c0205f379b525beefebab9d161b7c964670864014ed7213c", size = 4321 }, ] -[[package]] -name = "argon2-cffi" -version = "23.1.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "argon2-cffi-bindings" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/31/fa/57ec2c6d16ecd2ba0cf15f3c7d1c3c2e7b5fcb83555ff56d7ab10888ec8f/argon2_cffi-23.1.0.tar.gz", hash = "sha256:879c3e79a2729ce768ebb7d36d4609e3a78a4ca2ec3a9f12286ca057e3d0db08", size = 42798 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/a4/6a/e8a041599e78b6b3752da48000b14c8d1e8a04ded09c88c714ba047f34f5/argon2_cffi-23.1.0-py3-none-any.whl", hash = "sha256:c670642b78ba29641818ab2e68bd4e6a78ba53b7eff7b4c3815ae16abf91c7ea", size = 15124 }, -] - -[[package]] -name = "argon2-cffi-bindings" -version = "21.2.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "cffi" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/b9/e9/184b8ccce6683b0aa2fbb7ba5683ea4b9c5763f1356347f1312c32e3c66e/argon2-cffi-bindings-21.2.0.tar.gz", hash = "sha256:bb89ceffa6c791807d1305ceb77dbfacc5aa499891d2c55661c6459651fc39e3", size = 1779911 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/d4/13/838ce2620025e9666aa8f686431f67a29052241692a3dd1ae9d3692a89d3/argon2_cffi_bindings-21.2.0-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:ccb949252cb2ab3a08c02024acb77cfb179492d5701c7cbdbfd776124d4d2367", size = 29658 }, - { url = "https://files.pythonhosted.org/packages/b3/02/f7f7bb6b6af6031edb11037639c697b912e1dea2db94d436e681aea2f495/argon2_cffi_bindings-21.2.0-cp36-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9524464572e12979364b7d600abf96181d3541da11e23ddf565a32e70bd4dc0d", size = 80583 }, - { url = "https://files.pythonhosted.org/packages/ec/f7/378254e6dd7ae6f31fe40c8649eea7d4832a42243acaf0f1fff9083b2bed/argon2_cffi_bindings-21.2.0-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b746dba803a79238e925d9046a63aa26bf86ab2a2fe74ce6b009a1c3f5c8f2ae", size = 86168 }, - { url = "https://files.pythonhosted.org/packages/74/f6/4a34a37a98311ed73bb80efe422fed95f2ac25a4cacc5ae1d7ae6a144505/argon2_cffi_bindings-21.2.0-cp36-abi3-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:58ed19212051f49a523abb1dbe954337dc82d947fb6e5a0da60f7c8471a8476c", size = 82709 }, - { url = "https://files.pythonhosted.org/packages/74/2b/73d767bfdaab25484f7e7901379d5f8793cccbb86c6e0cbc4c1b96f63896/argon2_cffi_bindings-21.2.0-cp36-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:bd46088725ef7f58b5a1ef7ca06647ebaf0eb4baff7d1d0d177c6cc8744abd86", size = 83613 }, - { url = "https://files.pythonhosted.org/packages/4f/fd/37f86deef67ff57c76f137a67181949c2d408077e2e3dd70c6c42912c9bf/argon2_cffi_bindings-21.2.0-cp36-abi3-musllinux_1_1_i686.whl", hash = "sha256:8cd69c07dd875537a824deec19f978e0f2078fdda07fd5c42ac29668dda5f40f", size = 84583 }, - { url = "https://files.pythonhosted.org/packages/6f/52/5a60085a3dae8fded8327a4f564223029f5f54b0cb0455a31131b5363a01/argon2_cffi_bindings-21.2.0-cp36-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:f1152ac548bd5b8bcecfb0b0371f082037e47128653df2e8ba6e914d384f3c3e", size = 88475 }, - { url = "https://files.pythonhosted.org/packages/8b/95/143cd64feb24a15fa4b189a3e1e7efbaeeb00f39a51e99b26fc62fbacabd/argon2_cffi_bindings-21.2.0-cp36-abi3-win32.whl", hash = "sha256:603ca0aba86b1349b147cab91ae970c63118a0f30444d4bc80355937c950c082", size = 27698 }, - { url = "https://files.pythonhosted.org/packages/37/2c/e34e47c7dee97ba6f01a6203e0383e15b60fb85d78ac9a15cd066f6fe28b/argon2_cffi_bindings-21.2.0-cp36-abi3-win_amd64.whl", hash = "sha256:b2ef1c30440dbbcba7a5dc3e319408b59676e2e039e2ae11a8775ecf482b192f", size = 30817 }, - { url = "https://files.pythonhosted.org/packages/5a/e4/bf8034d25edaa495da3c8a3405627d2e35758e44ff6eaa7948092646fdcc/argon2_cffi_bindings-21.2.0-cp38-abi3-macosx_10_9_universal2.whl", hash = "sha256:e415e3f62c8d124ee16018e491a009937f8cf7ebf5eb430ffc5de21b900dad93", size = 53104 }, -] - -[[package]] -name = "arrow" -version = "1.3.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "python-dateutil" }, - { name = "types-python-dateutil" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/2e/00/0f6e8fcdb23ea632c866620cc872729ff43ed91d284c866b515c6342b173/arrow-1.3.0.tar.gz", hash = "sha256:d4540617648cb5f895730f1ad8c82a65f2dad0166f57b75f3ca54759c4d67a85", size = 131960 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/f8/ed/e97229a566617f2ae958a6b13e7cc0f585470eac730a73e9e82c32a3cdd2/arrow-1.3.0-py3-none-any.whl", hash = "sha256:c728b120ebc00eb84e01882a6f5e7927a53960aa990ce7dd2b10f39005a67f80", size = 66419 }, -] - [[package]] name = "asgiref" version = "3.8.1" @@ -207,18 +162,19 @@ wheels = [ [[package]] name = "astrapy" -version = "1.5.2" +version = "2.0.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "deprecation" }, { name = "httpx", extra = ["http2"] }, { name = "pymongo" }, { name = "toml" }, + { name = "typing-extensions" }, { name = "uuid6" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/29/cc/5996efd0598b97d44699f04b5c025080fb1665f30794010f06543f3322a7/astrapy-1.5.2.tar.gz", hash = "sha256:eaf703628b0d03891ae7c391ef04ff3aec1005837fdfa47c19f2ed4478c45a4a", size = 163233 } +sdist = { url = "https://files.pythonhosted.org/packages/0c/c9/5c9488664f99f2b9738d8e4823f8a0474897f39a17c0676430f548834adb/astrapy-2.0.1.tar.gz", hash = "sha256:3a35ebd7af5c24f0abe400f9b2778dc5e8812c78ae247f97704be27e6cb9dc5a", size = 252460 } wheels = [ - { url = "https://files.pythonhosted.org/packages/db/48/684c270724bc3f8d12714556d201aa4610623da919505a6a09e56f50ef6a/astrapy-1.5.2-py3-none-any.whl", hash = "sha256:598b86de723727a11ec43e1c7fe682ecb42d63d37a94165fb08de41c20103f56", size = 177128 }, + { url = "https://files.pythonhosted.org/packages/90/6d/1481615ec3b97e1b8c9058c43c804e02612642d5114a26812894e42fb9b4/astrapy-2.0.1-py3-none-any.whl", hash = "sha256:4fa37f2955e7543a29e78565833de0dc8e39316ccc2f66263a4665b5871b7739", size = 300495 }, ] [[package]] @@ -230,18 +186,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/25/8a/c46dcc25341b5bce5472c718902eb3d38600a903b14fa6aeecef3f21a46f/asttokens-3.0.0-py3-none-any.whl", hash = "sha256:e3078351a059199dd5138cb1c706e6430c05eff2ff136af5eb4790f9d28932e2", size = 26918 }, ] -[[package]] -name = "async-lru" -version = "2.0.4" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "typing-extensions", marker = "python_full_version < '3.11'" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/80/e2/2b4651eff771f6fd900d233e175ddc5e2be502c7eb62c0c42f975c6d36cd/async-lru-2.0.4.tar.gz", hash = "sha256:b8a59a5df60805ff63220b2a0c5b5393da5521b113cd5465a44eb037d81a5627", size = 10019 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/fa/9f/3c3503693386c4b0f245eaf5ca6198e3b28879ca0a40bde6b0e319793453/async_lru-2.0.4-py3-none-any.whl", hash = "sha256:ff02944ce3c288c5be660c42dbcca0742b32c3b279d6dceda655190240b99224", size = 6111 }, -] - [[package]] name = "async-timeout" version = "4.0.3" @@ -310,15 +254,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/3b/05/2546085c6dc07a45627460a39e6291b82382b434fff2bd0167ff3bc31eb1/bcrypt-4.2.1-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:e158009a54c4c8bc91d5e0da80920d048f918c61a581f0a63e4e93bb556d362f", size = 274652 }, ] -[[package]] -name = "beartype" -version = "0.19.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/6f/e1/00515b97afa3993b4a314e4bc168fbde0917fd5845435cb6f16a19770746/beartype-0.19.0.tar.gz", hash = "sha256:de42dfc1ba5c3710fde6c3002e3bd2cad236ed4d2aabe876345ab0b4234a6573", size = 1294480 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/64/69/f6db6e4cb2fe2f887dead40b76caa91af4844cb647dd2c7223bb010aa416/beartype-0.19.0-py3-none-any.whl", hash = "sha256:33b2694eda0daf052eb2aff623ed9a8a586703bbf0a90bbc475a83bbf427f699", size = 1039760 }, -] - [[package]] name = "beautifulsoup4" version = "4.12.3" @@ -331,40 +266,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b1/fe/e8c672695b37eecc5cbf43e1d0638d88d66ba3a44c4d321c796f4e59167f/beautifulsoup4-4.12.3-py3-none-any.whl", hash = "sha256:b80878c9f40111313e55da8ba20bdba06d8fa3969fc68304167741bbf9e082ed", size = 147925 }, ] -[[package]] -name = "black" -version = "24.10.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "click" }, - { name = "mypy-extensions" }, - { name = "packaging" }, - { name = "pathspec" }, - { name = "platformdirs" }, - { name = "tomli", marker = "python_full_version < '3.11'" }, - { name = "typing-extensions", marker = "python_full_version < '3.11'" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/d8/0d/cc2fb42b8c50d80143221515dd7e4766995bd07c56c9a3ed30baf080b6dc/black-24.10.0.tar.gz", hash = "sha256:846ea64c97afe3bc677b761787993be4991810ecc7a4a937816dd6bddedc4875", size = 645813 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/a3/f3/465c0eb5cddf7dbbfe1fecd9b875d1dcf51b88923cd2c1d7e9ab95c6336b/black-24.10.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e6668650ea4b685440857138e5fe40cde4d652633b1bdffc62933d0db4ed9812", size = 1623211 }, - { url = "https://files.pythonhosted.org/packages/df/57/b6d2da7d200773fdfcc224ffb87052cf283cec4d7102fab450b4a05996d8/black-24.10.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:1c536fcf674217e87b8cc3657b81809d3c085d7bf3ef262ead700da345bfa6ea", size = 1457139 }, - { url = "https://files.pythonhosted.org/packages/6e/c5/9023b7673904a5188f9be81f5e129fff69f51f5515655fbd1d5a4e80a47b/black-24.10.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:649fff99a20bd06c6f727d2a27f401331dc0cc861fb69cde910fe95b01b5928f", size = 1753774 }, - { url = "https://files.pythonhosted.org/packages/e1/32/df7f18bd0e724e0d9748829765455d6643ec847b3f87e77456fc99d0edab/black-24.10.0-cp310-cp310-win_amd64.whl", hash = "sha256:fe4d6476887de70546212c99ac9bd803d90b42fc4767f058a0baa895013fbb3e", size = 1414209 }, - { url = "https://files.pythonhosted.org/packages/c2/cc/7496bb63a9b06a954d3d0ac9fe7a73f3bf1cd92d7a58877c27f4ad1e9d41/black-24.10.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:5a2221696a8224e335c28816a9d331a6c2ae15a2ee34ec857dcf3e45dbfa99ad", size = 1607468 }, - { url = "https://files.pythonhosted.org/packages/2b/e3/69a738fb5ba18b5422f50b4f143544c664d7da40f09c13969b2fd52900e0/black-24.10.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f9da3333530dbcecc1be13e69c250ed8dfa67f43c4005fb537bb426e19200d50", size = 1437270 }, - { url = "https://files.pythonhosted.org/packages/c9/9b/2db8045b45844665c720dcfe292fdaf2e49825810c0103e1191515fc101a/black-24.10.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4007b1393d902b48b36958a216c20c4482f601569d19ed1df294a496eb366392", size = 1737061 }, - { url = "https://files.pythonhosted.org/packages/a3/95/17d4a09a5be5f8c65aa4a361444d95edc45def0de887810f508d3f65db7a/black-24.10.0-cp311-cp311-win_amd64.whl", hash = "sha256:394d4ddc64782e51153eadcaaca95144ac4c35e27ef9b0a42e121ae7e57a9175", size = 1423293 }, - { url = "https://files.pythonhosted.org/packages/90/04/bf74c71f592bcd761610bbf67e23e6a3cff824780761f536512437f1e655/black-24.10.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:b5e39e0fae001df40f95bd8cc36b9165c5e2ea88900167bddf258bacef9bbdc3", size = 1644256 }, - { url = "https://files.pythonhosted.org/packages/4c/ea/a77bab4cf1887f4b2e0bce5516ea0b3ff7d04ba96af21d65024629afedb6/black-24.10.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:d37d422772111794b26757c5b55a3eade028aa3fde43121ab7b673d050949d65", size = 1448534 }, - { url = "https://files.pythonhosted.org/packages/4e/3e/443ef8bc1fbda78e61f79157f303893f3fddf19ca3c8989b163eb3469a12/black-24.10.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:14b3502784f09ce2443830e3133dacf2c0110d45191ed470ecb04d0f5f6fcb0f", size = 1761892 }, - { url = "https://files.pythonhosted.org/packages/52/93/eac95ff229049a6901bc84fec6908a5124b8a0b7c26ea766b3b8a5debd22/black-24.10.0-cp312-cp312-win_amd64.whl", hash = "sha256:30d2c30dc5139211dda799758559d1b049f7f14c580c409d6ad925b74a4208a8", size = 1434796 }, - { url = "https://files.pythonhosted.org/packages/d0/a0/a993f58d4ecfba035e61fca4e9f64a2ecae838fc9f33ab798c62173ed75c/black-24.10.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:1cbacacb19e922a1d75ef2b6ccaefcd6e93a2c05ede32f06a21386a04cedb981", size = 1643986 }, - { url = "https://files.pythonhosted.org/packages/37/d5/602d0ef5dfcace3fb4f79c436762f130abd9ee8d950fa2abdbf8bbc555e0/black-24.10.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:1f93102e0c5bb3907451063e08b9876dbeac810e7da5a8bfb7aeb5a9ef89066b", size = 1448085 }, - { url = "https://files.pythonhosted.org/packages/47/6d/a3a239e938960df1a662b93d6230d4f3e9b4a22982d060fc38c42f45a56b/black-24.10.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ddacb691cdcdf77b96f549cf9591701d8db36b2f19519373d60d31746068dbf2", size = 1760928 }, - { url = "https://files.pythonhosted.org/packages/dd/cf/af018e13b0eddfb434df4d9cd1b2b7892bab119f7a20123e93f6910982e8/black-24.10.0-cp313-cp313-win_amd64.whl", hash = "sha256:680359d932801c76d2e9c9068d05c6b107f2584b2a5b88831c83962eb9984c1b", size = 1436875 }, - { url = "https://files.pythonhosted.org/packages/8d/a7/4b27c50537ebca8bec139b872861f9d2bf501c5ec51fcf897cb924d9e264/black-24.10.0-py3-none-any.whl", hash = "sha256:3bb2b7a1f7b685f85b11fed1ef10f8a9148bceb49853e47a294a3dd963c1dd7d", size = 206898 }, -] - [[package]] name = "bleach" version = "6.2.0" @@ -382,6 +283,38 @@ css = [ { name = "tinycss2" }, ] +[[package]] +name = "blis" +version = "1.2.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "numpy" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/e5/69/d4829c5b5cd244e6140a0754a0f73cc725c6e138f609b4c5d1982e699906/blis-1.2.0.tar.gz", hash = "sha256:f25f99d7f3cad72c86a7499212ee833fb5062d80ad1763a935e0e498bc147c69", size = 2375955 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/54/ff/c55d9d42a622b95fca27f82d4674cd19ad86941dc893f0898ebcccdab105/blis-1.2.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:76998702acbb782e9bb298a5c446aaa1ed4652dbade853baa6a7a26f7b98105b", size = 6973751 }, + { url = "https://files.pythonhosted.org/packages/fd/bc/5993eb63fc8a2784fb3a82320bd65df958d7250047f77f467508da896296/blis-1.2.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:1c290c1ba6cb5b633abe59b2fb9ae2ea5dcd7508202f65658fe816bb7e129485", size = 1280762 }, + { url = "https://files.pythonhosted.org/packages/ff/65/3dae66f7aec4fe92726f33180cb8780d6a9bc49de25b3ee413275ff1aaf3/blis-1.2.0-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:cd81489e4b1a4a6bc51f5578795bc9150a2e8b9babead1074ca51398aff51852", size = 3186927 }, + { url = "https://files.pythonhosted.org/packages/4c/96/a420114cb430a790a038ca5a67171b5b124b2b1b0463be2e93bfa8c3378d/blis-1.2.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4896cc4c10c9856c9faaf89401dcb87894da06a18b4b986064acd737a6ed3e60", size = 11526130 }, + { url = "https://files.pythonhosted.org/packages/a1/a3/a626f0e90683667a83cb735fe9638e4ffd0004a188287868a79771fb257f/blis-1.2.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:60a29dcb1bba49cae70088d480b95042d4fbbe6b380f2f7c9e70b2781dc126dd", size = 4225925 }, + { url = "https://files.pythonhosted.org/packages/c9/70/655b6017396074b1c05010d9127c18eb5f404b22e2b819f1e6da50f202fe/blis-1.2.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:fc1de26073302a3713e487ea85d1ecd0bce204f6b102da498c3cd08528a1d69e", size = 14694282 }, + { url = "https://files.pythonhosted.org/packages/af/e0/4ac06562b5dce221fbe20a1f0acd47f67454c377d00b1de0dd44de67116d/blis-1.2.0-cp310-cp310-win_amd64.whl", hash = "sha256:cc2aa5ce96f33162779e88add93b5051437f9c2701d24ee0d2dd89da9a9c23b1", size = 6247380 }, + { url = "https://files.pythonhosted.org/packages/3c/3f/62bc963d7cad6d5d4038ca0fed236559abd67c1afca33a2d5644412470f7/blis-1.2.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:debafb46ad8b5e2d18932770639aa1d22b61580a07ec718e9efcf50c76e180d6", size = 6976662 }, + { url = "https://files.pythonhosted.org/packages/70/4e/4d030d66d3de8dbe12217b4bb0fc67264df9befea07f6c164d33a23b0b09/blis-1.2.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:eb27e94b9dbd9c23595b95155607a57ad814bebd3cc1bf8551bee4af60e1b5d7", size = 1281766 }, + { url = "https://files.pythonhosted.org/packages/c8/32/9994aa6a2cc00f97a71cb6079364c3108da35e19203affcd9c541309728a/blis-1.2.0-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f8ed98669144fb8ee30052f7259d0cb78b7b3755d9589d98cbb7986d22473ab7", size = 3304018 }, + { url = "https://files.pythonhosted.org/packages/73/e7/95ae571ccfe5c43fb65fce5921e8a6213c4443a2e18a9ca5b6bad2fc8aab/blis-1.2.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:08f62b6f114370d8449b4836ebd157980a5718a5c39266af9cdff67a9602a421", size = 11659934 }, + { url = "https://files.pythonhosted.org/packages/21/09/e99e3575eb3609db01948a4bbc3abce03e47be53c18338aa7a657bc92f1b/blis-1.2.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:cc5c25fb12fd134812ea47e3fcbbd64d46d0717d307c5c2fb32a45ac8daf3226", size = 4360532 }, + { url = "https://files.pythonhosted.org/packages/fc/94/2575e8e7716f25265ea17a7272c4dc5b0d32b4d2c52aafbf5425cfbf998c/blis-1.2.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:78a6498c748a42494a2cf58be489616a42ba0b925bc92ab23c3721dc779a4739", size = 14827891 }, + { url = "https://files.pythonhosted.org/packages/c4/d9/b647ef53c33c82c1fa2ed217c5793de551a38fb1e5b2430f59c3ecba4c86/blis-1.2.0-cp311-cp311-win_amd64.whl", hash = "sha256:5ad68bc972f210a0227d9742bf6325600bb95c8188f97850634f6d97c3a08107", size = 6230482 }, + { url = "https://files.pythonhosted.org/packages/fb/98/79df1711d96b38a3cf72b2abad412191fe2ada986b6203a1237dcd7aac9a/blis-1.2.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:99df869b8998303cf78e9f408f0350b0c5cd12d733caa8df99682f046b83ea35", size = 6989668 }, + { url = "https://files.pythonhosted.org/packages/1e/bb/3f84de3303873783f6c2dee121d0a36fae641332db73b046cc93cb7b717e/blis-1.2.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:4001df564c43c8f2260b13c4f06327dee23831b178f65884c22b879062ebca14", size = 1282523 }, + { url = "https://files.pythonhosted.org/packages/91/4d/d0a599555fd97d3229d3c3fd8c7e5b531ca5863421370e99b46d70bce883/blis-1.2.0-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6af5dec35acfc044e29b89bb9202e74edc747344f5a46fc27e8a8998f8229610", size = 3260765 }, + { url = "https://files.pythonhosted.org/packages/4d/59/b7571c5fa57b2198b5240f8cd790daf5749491cc17706e3a4b1528a75185/blis-1.2.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:986f125ad0215e975a0895505728644dff2669a739f6c2faf89436e3fcae21ac", size = 11616436 }, + { url = "https://files.pythonhosted.org/packages/fe/50/9c1311aa73d9812e3c78ebeec7c4fb0b15fdecfcc9a4866f1e3c06d0f331/blis-1.2.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:ea1f4ce1541cddbc9b0574a5969df2a518c5a6d4aa8787782dab5d82233a1458", size = 4309834 }, + { url = "https://files.pythonhosted.org/packages/d1/12/02f3afacf790a93e4d9f367cc5cdd95ed0348e5d2927bc4d9c7d1d70d1ae/blis-1.2.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:6358168c4218a36e49c244c714f50248a1ef981874ae7bc785d68e76d55c57b5", size = 14789989 }, + { url = "https://files.pythonhosted.org/packages/c0/3a/ce0a98664d6283276fa986685e308c1dc1feb634241b2d3828ceaaa5a128/blis-1.2.0-cp312-cp312-win_amd64.whl", hash = "sha256:067f4f99fb3dc0cf50bbbf0ee4b850f13e64fbb84fdaab0864fd97af0bee0ced", size = 6258036 }, +] + [[package]] name = "build" version = "1.2.2.post1" @@ -407,6 +340,34 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/a4/07/14f8ad37f2d12a5ce41206c21820d8cb6561b728e51fad4530dff0552a67/cachetools-5.5.0-py3-none-any.whl", hash = "sha256:02134e8439cdc2ffb62023ce1debca2944c3f289d66bb17ead3ab3dede74b292", size = 9524 }, ] +[[package]] +name = "cairocffi" +version = "1.7.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "cffi" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/70/c5/1a4dc131459e68a173cbdab5fad6b524f53f9c1ef7861b7698e998b837cc/cairocffi-1.7.1.tar.gz", hash = "sha256:2e48ee864884ec4a3a34bfa8c9ab9999f688286eb714a15a43ec9d068c36557b", size = 88096 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/93/d8/ba13451aa6b745c49536e87b6bf8f629b950e84bd0e8308f7dc6883b67e2/cairocffi-1.7.1-py3-none-any.whl", hash = "sha256:9803a0e11f6c962f3b0ae2ec8ba6ae45e957a146a004697a1ac1bbf16b073b3f", size = 75611 }, +] + +[[package]] +name = "cairosvg" +version = "2.7.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "cairocffi" }, + { name = "cssselect2" }, + { name = "defusedxml" }, + { name = "pillow" }, + { name = "tinycss2" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/d5/e6/ec5900b724e3c44af7f6f51f719919137284e5da4aabe96508baec8a1b40/CairoSVG-2.7.1.tar.gz", hash = "sha256:432531d72347291b9a9ebfb6777026b607563fd8719c46ee742db0aef7271ba0", size = 8399085 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/01/a5/1866b42151f50453f1a0d28fc4c39f5be5f412a2e914f33449c42daafdf1/CairoSVG-2.7.1-py3-none-any.whl", hash = "sha256:8a5222d4e6c3f86f1f7046b63246877a63b49923a1cd202184c3a634ef546b3b", size = 43235 }, +] + [[package]] name = "cassandra-driver" version = "3.29.2" @@ -450,6 +411,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/73/ae/25347dca6ca4032d421cd674fdb35bb02985baf5dcb8cbfac0cc95dfae49/cassio-0.1.10-py3-none-any.whl", hash = "sha256:9eebe5f18b627d0f328de4dbbf22c68cc76dbeecf46d846c0277e410de5cb1dc", size = 45726 }, ] +[[package]] +name = "catalogue" +version = "2.0.10" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/38/b4/244d58127e1cdf04cf2dc7d9566f0d24ef01d5ce21811bab088ecc62b5ea/catalogue-2.0.10.tar.gz", hash = "sha256:4f56daa940913d3f09d589c191c74e5a6d51762b3a9e37dd53b7437afd6cda15", size = 19561 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/9e/96/d32b941a501ab566a16358d68b6eb4e4acc373fab3c3c4d7d9e649f7b4bb/catalogue-2.0.10-py3-none-any.whl", hash = "sha256:58c2de0020aa90f4a2da7dfad161bf7b3b054c86a5f09fcedc0b2b740c109a9f", size = 17325 }, +] + [[package]] name = "certifi" version = "2024.12.14" @@ -653,6 +623,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/7e/d4/7ebdbd03970677812aac39c869717059dbb71a4cfc033ca6e5221787892c/click-8.1.8-py3-none-any.whl", hash = "sha256:63c132bbbed01578a06712a2d1f497bb62d9c1c0d329b7903a866228027263b2", size = 98188 }, ] +[[package]] +name = "cloudpathlib" +version = "0.20.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "typing-extensions", marker = "python_full_version < '3.11'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/71/0b/a47d78ed2816db100543b504fdbfc2070f422aac858e6bcf775713e37b8a/cloudpathlib-0.20.0.tar.gz", hash = "sha256:f6ef7ca409a510f7ba4639ba50ab3fc5b6dee82d6dff0d7f5715fd0c9ab35891", size = 45149 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/1f/6e/b64600156934dab14cc8b403095a9ea8bd722aad2e775673c68346b76220/cloudpathlib-0.20.0-py3-none-any.whl", hash = "sha256:7af3bcefbf73392ae7f31c08b3660ec31607f8c01b7f6262d4d73469a845f641", size = 52547 }, +] + [[package]] name = "colorama" version = "0.4.6" @@ -686,6 +668,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/e6/75/49e5bfe642f71f272236b5b2d2691cf915a7283cc0ceda56357b61daa538/comm-0.2.2-py3-none-any.whl", hash = "sha256:e6fb86cb70ff661ee8c9c14e7d36d6de3b4066f1441be4063df9c5009f0a64d3", size = 7180 }, ] +[[package]] +name = "confection" +version = "0.1.5" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pydantic" }, + { name = "srsly" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/51/d3/57c6631159a1b48d273b40865c315cf51f89df7a9d1101094ef12e3a37c2/confection-0.1.5.tar.gz", hash = "sha256:8e72dd3ca6bd4f48913cd220f10b8275978e740411654b6e8ca6d7008c590f0e", size = 38924 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0c/00/3106b1854b45bd0474ced037dfe6b73b90fe68a68968cef47c23de3d43d2/confection-0.1.5-py3-none-any.whl", hash = "sha256:e29d3c3f8eac06b3f77eb9dfb4bf2fc6bcc9622a98ca00a698e3d019c6430b14", size = 35451 }, +] + [[package]] name = "contourpy" version = "1.3.1" @@ -814,6 +809,19 @@ toml = [ { name = "tomli", marker = "python_full_version <= '3.11'" }, ] +[[package]] +name = "cssselect2" +version = "0.7.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "tinycss2" }, + { name = "webencodings" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/e7/fc/326cb6f988905998f09bb54a3f5d98d4462ba119363c0dfad29750d48c09/cssselect2-0.7.0.tar.gz", hash = "sha256:1ccd984dab89fc68955043aca4e1b03e0cf29cad9880f6e28e3ba7a74b14aa5a", size = 35888 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/9d/3a/e39436efe51894243ff145a37c4f9a030839b97779ebcc4f13b3ba21c54e/cssselect2-0.7.0-py3-none-any.whl", hash = "sha256:fd23a65bfd444595913f02fc71f6b286c29261e354c41d722ca7a261a49b5969", size = 15586 }, +] + [[package]] name = "cycler" version = "0.12.1" @@ -823,6 +831,42 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/e7/05/c19819d5e3d95294a6f5947fb9b9629efb316b96de511b418c53d245aae6/cycler-0.12.1-py3-none-any.whl", hash = "sha256:85cef7cff222d8644161529808465972e51340599459b8ac3ccbac5a854e0d30", size = 8321 }, ] +[[package]] +name = "cymem" +version = "2.0.11" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f2/4a/1acd761fb6ac4c560e823ce40536a62f886f2d59b2763b5c3fc7e9d92101/cymem-2.0.11.tar.gz", hash = "sha256:efe49a349d4a518be6b6c6b255d4a80f740a341544bde1a807707c058b88d0bd", size = 10346 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/6d/55/f453f2b2f560e057f20eb2acdaafbf6488d72a6e8a36a4aef30f6053a51c/cymem-2.0.11-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:1b4dd8f8c2475c7c9948eefa89c790d83134600858d8d43b90276efd8df3882e", size = 41886 }, + { url = "https://files.pythonhosted.org/packages/a6/9d/03299eff35bd4fd80db33e4fd516661b82bb7b898cb677829acf22391ede/cymem-2.0.11-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:d46ba0d2e0f749195297d16f2286b55af7d7c084db2b853fdfccece2c000c5dc", size = 41696 }, + { url = "https://files.pythonhosted.org/packages/d3/0c/90aa41f258a67ea210886c5c73f88dc9f120b7a20e6b5d92c5ce73a68276/cymem-2.0.11-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:739c4336b9d04ce9761851e9260ef77508d4a86ee3060e41302bfb6fa82c37de", size = 203719 }, + { url = "https://files.pythonhosted.org/packages/52/d1/dc4a72aa2049c34a53a220290b1a59fadae61929dff3a6e1a830a22971fe/cymem-2.0.11-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a69c470c2fb118161f49761f9137384f46723c77078b659bba33858e19e46b49", size = 204763 }, + { url = "https://files.pythonhosted.org/packages/69/51/86ed323585530558bcdda1324c570abe032db2c1d5afd1c5e8e3e8fde63a/cymem-2.0.11-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:40159f6c92627438de970fd761916e745d70dfd84a7dcc28c1627eb49cee00d8", size = 193964 }, + { url = "https://files.pythonhosted.org/packages/ed/0c/aee4ad2996a4e24342228ccf44d7835c7784042f0ee0c47ad33be1443f18/cymem-2.0.11-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:f503f98e6aa333fffbe657a6854f13a9c3de68860795ae21171284213b9c5c09", size = 195002 }, + { url = "https://files.pythonhosted.org/packages/eb/d5/eda823d639258d2ed1db83403c991a9a57d5a4ddea3bf08e59060809a9aa/cymem-2.0.11-cp310-cp310-win_amd64.whl", hash = "sha256:7f05ed5920cc92d6b958ec5da55bd820d326fe9332b90660e6fa67e3b476ceb1", size = 39079 }, + { url = "https://files.pythonhosted.org/packages/03/e3/d98e3976f4ffa99cddebc1ce379d4d62e3eb1da22285267f902c99cc3395/cymem-2.0.11-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:3ee54039aad3ef65de82d66c40516bf54586287b46d32c91ea0530c34e8a2745", size = 42005 }, + { url = "https://files.pythonhosted.org/packages/41/b4/7546faf2ab63e59befc95972316d62276cec153f7d4d60e7b0d5e08f0602/cymem-2.0.11-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:4c05ef75b5db217be820604e43a47ccbbafea98ab6659d07cea92fa3c864ea58", size = 41747 }, + { url = "https://files.pythonhosted.org/packages/7d/4e/042f372e5b3eb7f5f3dd7677161771d301de2b6fa3f7c74e1cebcd502552/cymem-2.0.11-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a8d5381e5793ce531bac0dbc00829c8381f18605bb67e4b61d34f8850463da40", size = 217647 }, + { url = "https://files.pythonhosted.org/packages/48/cb/2207679e4b92701f78cf141e1ab4f81f55247dbe154eb426b842a0a993de/cymem-2.0.11-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f2b9d3f42d7249ac81802135cad51d707def058001a32f73fc7fbf3de7045ac7", size = 218857 }, + { url = "https://files.pythonhosted.org/packages/31/7a/76ae3b7a39ab2531029d281e43fcfcaad728c2341b150a81a3a1f5587cf3/cymem-2.0.11-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:39b78f2195d20b75c2d465732f6b8e8721c5d4eb012777c2cb89bdb45a043185", size = 206148 }, + { url = "https://files.pythonhosted.org/packages/25/f9/d0fc0191ac79f15638ddb59237aa76f234691374d7d7950e10f384bd8a25/cymem-2.0.11-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:2203bd6525a80d8fd0c94654a263af21c0387ae1d5062cceaebb652bf9bad7bc", size = 207112 }, + { url = "https://files.pythonhosted.org/packages/56/c8/75f75889401b20f4c3a7c5965dda09df42913e904ddc2ffe7ef3bdf25061/cymem-2.0.11-cp311-cp311-win_amd64.whl", hash = "sha256:aa54af7314de400634448da1f935b61323da80a49484074688d344fb2036681b", size = 39360 }, + { url = "https://files.pythonhosted.org/packages/71/67/0d74f7e9d79f934368a78fb1d1466b94bebdbff14f8ae94dd3e4ea8738bb/cymem-2.0.11-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:a0fbe19ce653cd688842d81e5819dc63f911a26e192ef30b0b89f0ab2b192ff2", size = 42621 }, + { url = "https://files.pythonhosted.org/packages/4a/d6/f7a19c63b48efc3f00a3ee8d69070ac90202e1e378f6cf81b8671f0cf762/cymem-2.0.11-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:de72101dc0e6326f6a2f73e05a438d1f3c6110d41044236d0fbe62925091267d", size = 42249 }, + { url = "https://files.pythonhosted.org/packages/d7/60/cdc434239813eef547fb99b6d0bafe31178501702df9b77c4108c9a216f6/cymem-2.0.11-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bee4395917f6588b8ac1699499128842768b391fe8896e8626950b4da5f9a406", size = 224758 }, + { url = "https://files.pythonhosted.org/packages/1d/68/8fa6efae17cd3b2ba9a2f83b824867c5b65b06f7aec3f8a0d0cabdeffb9b/cymem-2.0.11-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5b02f2b17d760dc3fe5812737b1ce4f684641cdd751d67761d333a3b5ea97b83", size = 227995 }, + { url = "https://files.pythonhosted.org/packages/e4/f3/ceda70bf6447880140602285b7c6fa171cb7c78b623d35345cc32505cd06/cymem-2.0.11-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:04ee6b4041ddec24512d6e969ed6445e57917f01e73b9dabbe17b7e6b27fef05", size = 215325 }, + { url = "https://files.pythonhosted.org/packages/d3/47/6915eaa521e1ce7a0ba480eecb6870cb4f681bcd64ced88c2f0ed7a744b4/cymem-2.0.11-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:e1048dae7e627ee25f22c87bb670b13e06bc0aecc114b89b959a798d487d1bf4", size = 216447 }, + { url = "https://files.pythonhosted.org/packages/7b/be/8e02bdd31e557f642741a06c8e886782ef78f0b00daffd681922dc9bbc88/cymem-2.0.11-cp312-cp312-win_amd64.whl", hash = "sha256:0c269c7a867d74adeb9db65fa1d226342aacf44d64b7931282f0b0eb22eb6275", size = 39283 }, + { url = "https://files.pythonhosted.org/packages/bd/90/b064e2677e27a35cf3605146abc3285d4f599cc1b6c18fc445ae876dd1e3/cymem-2.0.11-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:f4a311c82f743275c84f708df89ac5bf60ddefe4713d532000c887931e22941f", size = 42389 }, + { url = "https://files.pythonhosted.org/packages/fd/60/7aa0561a6c1f0d42643b02c4fdeb2a16181b0ff4e85d73d2d80c6689e92a/cymem-2.0.11-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:02ed92bead896cca36abad00502b14fa651bdf5d8319461126a2d5ac8c9674c5", size = 41948 }, + { url = "https://files.pythonhosted.org/packages/5f/4e/88a29cc5575374982e527b4ebcab3781bdc826ce693c6418a0f836544246/cymem-2.0.11-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:44ddd3588379f8f376116384af99e3fb5f90091d90f520c341942618bf22f05e", size = 219382 }, + { url = "https://files.pythonhosted.org/packages/9b/3a/8f96e167e93b7f7ec105ed7b25c77bbf215d15bcbf4a24082cdc12234cd6/cymem-2.0.11-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:87ec985623624bbd298762d8163fc194a096cb13282731a017e09ff8a60bb8b1", size = 222974 }, + { url = "https://files.pythonhosted.org/packages/6a/fc/ce016bb0c66a4776345fac7508fddec3b739b9dd4363094ac89cce048832/cymem-2.0.11-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:e3385a47285435848e0ed66cfd29b35f3ed8703218e2b17bd7a0c053822f26bf", size = 213426 }, + { url = "https://files.pythonhosted.org/packages/5c/c8/accf7cc768f751447a5050b14a195af46798bc22767ac25f49b02861b1eb/cymem-2.0.11-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:5461e65340d6572eb64deadce79242a446a1d39cb7bf70fe7b7e007eb0d799b0", size = 219195 }, + { url = "https://files.pythonhosted.org/packages/74/65/c162fbac63e867a055240b6600b92ef96c0eb7a1895312ac53c4be93d056/cymem-2.0.11-cp313-cp313-win_amd64.whl", hash = "sha256:25da111adf425c29af0cfd9fecfec1c71c8d82e2244a85166830a0817a66ada7", size = 39090 }, +] + [[package]] name = "dataclasses-json" version = "0.6.7" @@ -1052,15 +1096,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/f3/5d/29b126e12df844432e188d19e74f47c2578fa5a72a122b4f41819e1e0923/fonttools-4.55.4-py3-none-any.whl", hash = "sha256:d07ad8f31038c6394a0945752458313367a0ef8125d284ee59f99e68393a3c2d", size = 1111964 }, ] -[[package]] -name = "fqdn" -version = "1.5.1" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/30/3e/a80a8c077fd798951169626cde3e239adeba7dab75deb3555716415bd9b0/fqdn-1.5.1.tar.gz", hash = "sha256:105ed3677e767fb5ca086a0c1f4bb66ebc3c100be518f0e0d755d9eae164d89f", size = 6015 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/cf/58/8acf1b3e91c58313ce5cb67df61001fc9dcd21be4fadb76c1a2d540e09ed/fqdn-1.5.1-py3-none-any.whl", hash = "sha256:3a179af3761e4df6eb2e026ff9e1a3033d3587bf980a0b1b2e1e5d08d7358014", size = 9121 }, -] - [[package]] name = "frozenlist" version = "1.5.0" @@ -1152,6 +1187,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/c9/81/156ca48f950f833ddc392f8e3677ca50a18cb9d5db38ccb4ecea55a9303f/geomet-0.2.1.post1-py3-none-any.whl", hash = "sha256:a41a1e336b381416d6cbed7f1745c848e91defaa4d4c1bdc1312732e46ffad2b", size = 18462 }, ] +[[package]] +name = "ghp-import" +version = "2.1.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "python-dateutil" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/d9/29/d40217cbe2f6b1359e00c6c307bb3fc876ba74068cbab3dde77f03ca0dc4/ghp-import-2.1.0.tar.gz", hash = "sha256:9c535c4c61193c2df8871222567d7fd7e5014d835f97dc7b7439069e2413d343", size = 10943 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f7/ec/67fbef5d497f86283db54c22eec6f6140243aae73265799baaaa19cd17fb/ghp_import-2.1.0-py3-none-any.whl", hash = "sha256:8337dd7b50877f163d4c0289bc1f1c7f127550241988d568c1db512c4324a619", size = 11034 }, +] + [[package]] name = "gliner" version = "0.2.13" @@ -1202,9 +1249,12 @@ source = { editable = "packages/graph-rag-example-helpers" } dependencies = [ { name = "astrapy" }, { name = "backoff" }, + { name = "graph-retriever" }, + { name = "griffe" }, { name = "httpx" }, { name = "langchain-core" }, { name = "python-dotenv" }, + { name = "requests" }, { name = "simsimd" }, { name = "tqdm" }, ] @@ -1216,15 +1266,19 @@ dev = [ { name = "pytest" }, { name = "pytest-asyncio" }, { name = "pytest-cov" }, + { name = "types-requests" }, ] [package.metadata] requires-dist = [ { name = "astrapy", specifier = ">=1.5.2" }, { name = "backoff", specifier = ">=2.2.1" }, + { name = "graph-retriever", editable = "packages/graph-retriever" }, + { name = "griffe", specifier = ">=1.5.7" }, { name = "httpx", specifier = ">=0.28.1" }, { name = "langchain-core", specifier = ">=0.3.29" }, { name = "python-dotenv", specifier = ">=1.0.1" }, + { name = "requests", specifier = ">=2.32.3" }, { name = "simsimd", specifier = ">=6.2.1" }, { name = "tqdm", specifier = ">=4.67.1" }, ] @@ -1236,17 +1290,46 @@ dev = [ { name = "pytest", specifier = ">=8.3.4" }, { name = "pytest-asyncio", specifier = ">=0.25.2" }, { name = "pytest-cov", specifier = ">=4.0.0" }, + { name = "types-requests", specifier = ">=2.32.0.20241016" }, ] [[package]] name = "graph-retriever" version = "0.1.0" source = { editable = "packages/graph-retriever" } +dependencies = [ + { name = "immutabledict" }, + { name = "numpy" }, + { name = "pytest" }, + { name = "typing-extensions" }, +] + +[package.optional-dependencies] +simsimd = [ + { name = "simsimd" }, +] +testing = [ + { name = "pytest" }, +] + +[package.dev-dependencies] +dev = [ + { name = "pytest" }, +] [package.metadata] +requires-dist = [ + { name = "immutabledict", specifier = ">=4.2.1" }, + { name = "numpy", specifier = ">=1.26.4" }, + { name = "pytest", specifier = ">=8.3.4" }, + { name = "pytest", marker = "extra == 'testing'", specifier = ">=8.3.4" }, + { name = "simsimd", marker = "extra == 'simsimd'", specifier = ">=6.2.1" }, + { name = "typing-extensions", specifier = ">=4.12.2" }, +] +provides-extras = ["simsimd", "testing"] [package.metadata.requires-dev] -dev = [] +dev = [{ name = "pytest", specifier = ">=8.3.4" }] [[package]] name = "graph-retriever-workspace" @@ -1255,6 +1338,8 @@ source = { virtual = "." } [package.dev-dependencies] dev = [ + { name = "nbmake" }, + { name = "nbstripout" }, { name = "poethepoet" }, { name = "pytest" }, { name = "pytest-asyncio" }, @@ -1262,22 +1347,30 @@ dev = [ ] docs = [ { name = "graph-rag-example-helpers" }, + { name = "griffe" }, + { name = "griffe-inherited-docstrings" }, + { name = "griffe-pydantic" }, { name = "ipykernel" }, - { name = "jupyterlab" }, { name = "langchain-astradb" }, { name = "langchain-graph-retriever" }, { name = "langchain-openai" }, { name = "matplotlib" }, - { name = "nbclient" }, - { name = "nbformat" }, + { name = "mkdocs" }, + { name = "mkdocs-jupyter" }, + { name = "mkdocs-macros-plugin" }, + { name = "mkdocs-material", extra = ["imaging"] }, + { name = "mkdocs-table-reader-plugin" }, + { name = "mkdocstrings", extra = ["python"] }, { name = "pip" }, - { name = "quartodoc" }, + { name = "ruff" }, ] [package.metadata] [package.metadata.requires-dev] dev = [ + { name = "nbmake", specifier = ">=1.5.5" }, + { name = "nbstripout", specifier = ">=0.8.1" }, { name = "poethepoet", specifier = ">=0.32.1" }, { name = "pytest", specifier = ">=8.3.4" }, { name = "pytest-asyncio", specifier = ">=0.25.2" }, @@ -1285,16 +1378,22 @@ dev = [ ] docs = [ { name = "graph-rag-example-helpers", editable = "packages/graph-rag-example-helpers" }, + { name = "griffe", specifier = ">=1.5.5" }, + { name = "griffe-inherited-docstrings", specifier = ">=1.1.1" }, + { name = "griffe-pydantic", specifier = ">=1.1.0" }, { name = "ipykernel", specifier = ">=6.29.5" }, - { name = "jupyterlab", specifier = ">=4.3.4" }, - { name = "langchain-astradb", specifier = ">=0.5.2" }, + { name = "langchain-astradb", specifier = ">=0.5.3" }, { name = "langchain-graph-retriever", editable = "packages/langchain-graph-retriever" }, { name = "langchain-openai", specifier = ">=0.3.1" }, { name = "matplotlib", specifier = ">=3.10.0" }, - { name = "nbclient", specifier = ">=0.10.2" }, - { name = "nbformat", specifier = ">=5.10.4" }, + { name = "mkdocs", specifier = ">=1.6.1" }, + { name = "mkdocs-jupyter", specifier = ">=0.25.1" }, + { name = "mkdocs-macros-plugin", specifier = ">=1.3.7" }, + { name = "mkdocs-material", extras = ["imaging"], specifier = ">=9.6.2" }, + { name = "mkdocs-table-reader-plugin", specifier = ">=3.1.0" }, + { name = "mkdocstrings", extras = ["python"], specifier = ">=0.28.0" }, { name = "pip", specifier = ">=25.0" }, - { name = "quartodoc", specifier = ">=0.9.1" }, + { name = "ruff", specifier = ">=0.9.4" }, ] [[package]] @@ -1350,14 +1449,38 @@ wheels = [ [[package]] name = "griffe" -version = "1.5.5" +version = "1.6.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "colorama" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/5c/74/cd35a98cb11f79de0581e8e1e6fbd738aeeed1f2d90e9b5106728b63f5f7/griffe-1.5.5.tar.gz", hash = "sha256:35ee5b38b93d6a839098aad0f92207e6ad6b70c3e8866c08ca669275b8cba585", size = 391124 } +sdist = { url = "https://files.pythonhosted.org/packages/a0/1a/d467b93f5e0ea4edf3c1caef44cfdd53a4a498cb3a6bb722df4dd0fdd66a/griffe-1.6.0.tar.gz", hash = "sha256:eb5758088b9c73ad61c7ac014f3cdfb4c57b5c2fcbfca69996584b702aefa354", size = 391819 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/bf/02/5a22bc98d0aebb68c15ba70d2da1c84a5ef56048d79634e5f96cd2ba96e9/griffe-1.6.0-py3-none-any.whl", hash = "sha256:9f1dfe035d4715a244ed2050dfbceb05b1f470809ed4f6bb10ece5a7302f8dd1", size = 128470 }, +] + +[[package]] +name = "griffe-inherited-docstrings" +version = "1.1.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "griffe" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/7c/9f/098599019b2715e1edad3618305b8acf253e2ee375cbd389507cb23a2a00/griffe_inherited_docstrings-1.1.1.tar.gz", hash = "sha256:d179b6a6b7dc260fb892ad5b857837afd6f9de6193fc26d14463c4e9975a0cd3", size = 24146 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/67/f9/51a3fd7460b95583ff470c7b4fd706bd21f3fda97d521f3770126dc6d1fc/griffe_inherited_docstrings-1.1.1-py3-none-any.whl", hash = "sha256:0cb613ade70793b3589c706269a2cc4ceb91cbc4cfdc651037839cb9506eabe6", size = 6008 }, +] + +[[package]] +name = "griffe-pydantic" +version = "1.1.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "griffe" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/8e/6b/351be1b24e7afb1d5185095870f7a88403db6edd7af644c253b6671fc81b/griffe_pydantic-1.1.0.tar.gz", hash = "sha256:9c5a701cc485dab087857c1ac960b44671acee5008aaae0752f610b2aa82b068", size = 36997 } wheels = [ - { url = "https://files.pythonhosted.org/packages/1f/88/52c9422bc853cd7c2b6122090e887d17b5fad29b67f930e4277c9c557357/griffe-1.5.5-py3-none-any.whl", hash = "sha256:2761b1e8876c6f1f9ab1af274df93ea6bbadd65090de5f38f4cb5cc84897c7dd", size = 128221 }, + { url = "https://files.pythonhosted.org/packages/9d/4d/a667697bbe0da8810470e71d1034932f3ec590e47719c9a0b7a3dc0e54cf/griffe_pydantic-1.1.0-py3-none-any.whl", hash = "sha256:ac9cc2d9b016cf302d8d9f577c9b3ca2793d88060f500d0b2a65f33a4a785cf1", size = 10284 }, ] [[package]] @@ -1426,6 +1549,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/2a/e5/db6d438da759efbb488c4f3fbdab7764492ff3c3f953132efa6b9f0e9e53/h2-4.1.0-py3-none-any.whl", hash = "sha256:03a46bcf682256c95b5fd9e9a99c1323584c3eec6440d379b9903d709476bc6d", size = 57488 }, ] +[[package]] +name = "hjson" +version = "3.1.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/82/e5/0b56d723a76ca67abadbf7fb71609fb0ea7e6926e94fcca6c65a85b36a0e/hjson-3.1.0.tar.gz", hash = "sha256:55af475a27cf83a7969c808399d7bccdec8fb836a07ddbd574587593b9cdcf75", size = 40541 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/1f/7f/13cd798d180af4bf4c0ceddeefba2b864a63c71645abc0308b768d67bb81/hjson-3.1.0-py3-none-any.whl", hash = "sha256:65713cdcf13214fb554eb8b4ef803419733f4f5e551047c9b711098ab7186b89", size = 54018 }, +] + [[package]] name = "hpack" version = "4.0.0" @@ -1561,6 +1693,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/76/c6/c88e154df9c4e1a2a66ccf0005a88dfb2650c1dffb6f5ce603dfbd452ce3/idna-3.10-py3-none-any.whl", hash = "sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3", size = 70442 }, ] +[[package]] +name = "immutabledict" +version = "4.2.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/e0/c5/4240186fbabc58fba41bbe17c5f0cd37ffd4c0b85a5029ab104f946df175/immutabledict-4.2.1.tar.gz", hash = "sha256:d91017248981c72eb66c8ff9834e99c2f53562346f23e7f51e7a5ebcf66a3bcc", size = 6228 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/59/56/25ca7b848164b7d93dbd5fc97dd7751700c93e324fe854afbeb562ee2f98/immutabledict-4.2.1-py3-none-any.whl", hash = "sha256:c56a26ced38c236f79e74af3ccce53772827cef5c3bce7cab33ff2060f756373", size = 4700 }, +] + [[package]] name = "importlib-metadata" version = "8.5.0" @@ -1637,18 +1778,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/04/60/d0feb6b6d9fe4ab89fe8fe5b47cbf6cd936bfd9f1e7ffa9d0015425aeed6/ipython-8.31.0-py3-none-any.whl", hash = "sha256:46ec58f8d3d076a61d128fe517a51eb730e3aaf0c184ea8c17d16e366660c6a6", size = 821583 }, ] -[[package]] -name = "isoduration" -version = "20.11.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "arrow" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/7c/1a/3c8edc664e06e6bd06cce40c6b22da5f1429aa4224d0c590f3be21c91ead/isoduration-20.11.0.tar.gz", hash = "sha256:ac2f9015137935279eac671f94f89eb00584f940f5dc49462a0c4ee692ba1bd9", size = 11649 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/7b/55/e5326141505c5d5e34c5e0935d2908a74e4561eca44108fbfb9c13d2911a/isoduration-20.11.0-py3-none-any.whl", hash = "sha256:b2904c2a4228c3d44f409c8ae8e2370eb21a26f7ac2ec5446df141dde3452042", size = 11321 }, -] - [[package]] name = "jedi" version = "0.19.2" @@ -1741,15 +1870,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/91/29/df4b9b42f2be0b623cbd5e2140cafcaa2bef0759a00b7b70104dcfe2fb51/joblib-1.4.2-py3-none-any.whl", hash = "sha256:06d478d5674cbc267e7496a410ee875abd68e4340feff4490bcb7afb88060ae6", size = 301817 }, ] -[[package]] -name = "json5" -version = "0.10.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/85/3d/bbe62f3d0c05a689c711cff57b2e3ac3d3e526380adb7c781989f075115c/json5-0.10.0.tar.gz", hash = "sha256:e66941c8f0a02026943c52c2eb34ebeb2a6f819a0be05920a6f5243cd30fd559", size = 48202 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/aa/42/797895b952b682c3dafe23b1834507ee7f02f4d6299b65aaa61425763278/json5-0.10.0-py3-none-any.whl", hash = "sha256:19b23410220a7271e8377f81ba8aacba2fdd56947fbb137ee5977cbe1f5e8dfa", size = 34049 }, -] - [[package]] name = "jsonpatch" version = "1.33" @@ -1786,18 +1906,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/69/4a/4f9dbeb84e8850557c02365a0eee0649abe5eb1d84af92a25731c6c0f922/jsonschema-4.23.0-py3-none-any.whl", hash = "sha256:fbadb6f8b144a8f8cf9f0b89ba94501d143e50411a1278633f56a7acf7fd5566", size = 88462 }, ] -[package.optional-dependencies] -format-nongpl = [ - { name = "fqdn" }, - { name = "idna" }, - { name = "isoduration" }, - { name = "jsonpointer" }, - { name = "rfc3339-validator" }, - { name = "rfc3986-validator" }, - { name = "uri-template" }, - { name = "webcolors" }, -] - [[package]] name = "jsonschema-specifications" version = "2024.10.1" @@ -1840,104 +1948,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/c9/fb/108ecd1fe961941959ad0ee4e12ee7b8b1477247f30b1fdfd83ceaf017f0/jupyter_core-5.7.2-py3-none-any.whl", hash = "sha256:4f7315d2f6b4bcf2e3e7cb6e46772eba760ae459cd1f59d29eb57b0a01bd7409", size = 28965 }, ] -[[package]] -name = "jupyter-events" -version = "0.11.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "jsonschema", extra = ["format-nongpl"] }, - { name = "python-json-logger" }, - { name = "pyyaml" }, - { name = "referencing" }, - { name = "rfc3339-validator" }, - { name = "rfc3986-validator" }, - { name = "traitlets" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/f4/65/5791c8a979b5646ca29ea50e42b6708908b789f7ff389d1a03c1b93a1c54/jupyter_events-0.11.0.tar.gz", hash = "sha256:c0bc56a37aac29c1fbc3bcfbddb8c8c49533f9cf11f1c4e6adadba936574ab90", size = 62039 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/3f/8c/9b65cb2cd4ea32d885993d5542244641590530836802a2e8c7449a4c61c9/jupyter_events-0.11.0-py3-none-any.whl", hash = "sha256:36399b41ce1ca45fe8b8271067d6a140ffa54cec4028e95491c93b78a855cacf", size = 19423 }, -] - -[[package]] -name = "jupyter-lsp" -version = "2.2.5" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "jupyter-server" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/85/b4/3200b0b09c12bc3b72d943d923323c398eff382d1dcc7c0dbc8b74630e40/jupyter-lsp-2.2.5.tar.gz", hash = "sha256:793147a05ad446f809fd53ef1cd19a9f5256fd0a2d6b7ce943a982cb4f545001", size = 48741 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/07/e0/7bd7cff65594fd9936e2f9385701e44574fc7d721331ff676ce440b14100/jupyter_lsp-2.2.5-py3-none-any.whl", hash = "sha256:45fbddbd505f3fbfb0b6cb2f1bc5e15e83ab7c79cd6e89416b248cb3c00c11da", size = 69146 }, -] - -[[package]] -name = "jupyter-server" -version = "2.15.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "anyio" }, - { name = "argon2-cffi" }, - { name = "jinja2" }, - { name = "jupyter-client" }, - { name = "jupyter-core" }, - { name = "jupyter-events" }, - { name = "jupyter-server-terminals" }, - { name = "nbconvert" }, - { name = "nbformat" }, - { name = "overrides" }, - { name = "packaging" }, - { name = "prometheus-client" }, - { name = "pywinpty", marker = "os_name == 'nt'" }, - { name = "pyzmq" }, - { name = "send2trash" }, - { name = "terminado" }, - { name = "tornado" }, - { name = "traitlets" }, - { name = "websocket-client" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/61/8c/df09d4ab646141f130f9977b32b206ba8615d1969b2eba6a2e84b7f89137/jupyter_server-2.15.0.tar.gz", hash = "sha256:9d446b8697b4f7337a1b7cdcac40778babdd93ba614b6d68ab1c0c918f1c4084", size = 725227 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/e2/a2/89eeaf0bb954a123a909859fa507fa86f96eb61b62dc30667b60dbd5fdaf/jupyter_server-2.15.0-py3-none-any.whl", hash = "sha256:872d989becf83517012ee669f09604aa4a28097c0bd90b2f424310156c2cdae3", size = 385826 }, -] - -[[package]] -name = "jupyter-server-terminals" -version = "0.5.3" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "pywinpty", marker = "os_name == 'nt'" }, - { name = "terminado" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/fc/d5/562469734f476159e99a55426d697cbf8e7eb5efe89fb0e0b4f83a3d3459/jupyter_server_terminals-0.5.3.tar.gz", hash = "sha256:5ae0295167220e9ace0edcfdb212afd2b01ee8d179fe6f23c899590e9b8a5269", size = 31430 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/07/2d/2b32cdbe8d2a602f697a649798554e4f072115438e92249624e532e8aca6/jupyter_server_terminals-0.5.3-py3-none-any.whl", hash = "sha256:41ee0d7dc0ebf2809c668e0fc726dfaf258fcd3e769568996ca731b6194ae9aa", size = 13656 }, -] - -[[package]] -name = "jupyterlab" -version = "4.3.4" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "async-lru" }, - { name = "httpx" }, - { name = "ipykernel" }, - { name = "jinja2" }, - { name = "jupyter-core" }, - { name = "jupyter-lsp" }, - { name = "jupyter-server" }, - { name = "jupyterlab-server" }, - { name = "notebook-shim" }, - { name = "packaging" }, - { name = "setuptools" }, - { name = "tomli", marker = "python_full_version < '3.11'" }, - { name = "tornado" }, - { name = "traitlets" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/a7/45/1052f842e066902b1d78126df7e2269b1b9408991e1344e167b2e429f9e1/jupyterlab-4.3.4.tar.gz", hash = "sha256:f0bb9b09a04766e3423cccc2fc23169aa2ffedcdf8713e9e0fb33cac0b6859d0", size = 21797583 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/61/48/af57263e53cfc220e522de047aa0993f53bab734fe812af1e03e33ac6d7c/jupyterlab-4.3.4-py3-none-any.whl", hash = "sha256:b754c2601c5be6adf87cb5a1d8495d653ffb945f021939f77776acaa94dae952", size = 11665373 }, -] - [[package]] name = "jupyterlab-pygments" version = "0.3.0" @@ -1948,21 +1958,20 @@ wheels = [ ] [[package]] -name = "jupyterlab-server" -version = "2.27.3" +name = "jupytext" +version = "1.16.6" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "babel" }, - { name = "jinja2" }, - { name = "json5" }, - { name = "jsonschema" }, - { name = "jupyter-server" }, + { name = "markdown-it-py" }, + { name = "mdit-py-plugins" }, + { name = "nbformat" }, { name = "packaging" }, - { name = "requests" }, + { name = "pyyaml" }, + { name = "tomli", marker = "python_full_version < '3.11'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/0a/c9/a883ce65eb27905ce77ace410d83587c82ea64dc85a48d1f7ed52bcfa68d/jupyterlab_server-2.27.3.tar.gz", hash = "sha256:eb36caca59e74471988f0ae25c77945610b887f777255aa21f8065def9e51ed4", size = 76173 } +sdist = { url = "https://files.pythonhosted.org/packages/10/e7/58d6fd374e1065d2bccefd07953d2f1f911d8de03fd7dc33dd5a25ac659c/jupytext-1.16.6.tar.gz", hash = "sha256:dbd03f9263c34b737003f388fc069e9030834fb7136879c4c32c32473557baa0", size = 3726029 } wheels = [ - { url = "https://files.pythonhosted.org/packages/54/09/2032e7d15c544a0e3cd831c51d77a8ca57f7555b2e1b2922142eddb02a84/jupyterlab_server-2.27.3-py3-none-any.whl", hash = "sha256:e697488f66c3db49df675158a77b3b017520d772c6e1548c7d9bcc5df7944ee4", size = 59700 }, + { url = "https://files.pythonhosted.org/packages/f4/02/27191f18564d4f2c0e543643aa94b54567de58f359cd6a3bed33adb723ac/jupytext-1.16.6-py3-none-any.whl", hash = "sha256:900132031f73fee15a1c9ebd862e05eb5f51e1ad6ab3a2c6fdd97ce2f9c913b4", size = 154200 }, ] [[package]] @@ -2113,16 +2122,16 @@ wheels = [ [[package]] name = "langchain-astradb" -version = "0.5.2" +version = "0.6.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "astrapy" }, { name = "langchain-community" }, { name = "numpy" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/fe/0d/fbb4e5d04d6da0327781de39c6409f32b1c2c1eb9004dac9c638753f3d6d/langchain_astradb-0.5.2.tar.gz", hash = "sha256:ff70c70ec72d6bce7f0fdad5ac561d1c6d1372100fc1e62f135010418ff9a203", size = 50321 } +sdist = { url = "https://files.pythonhosted.org/packages/0f/3c/e8cd9e7a091e5c9f459856b6f7270ae56f8b84ccf953c16506f3f0c3a276/langchain_astradb-0.6.0.tar.gz", hash = "sha256:5254a85923aa3b0a5850277aa467c3a281f6bafd9b6e1454de6c66c4062ad6d8", size = 64975 } wheels = [ - { url = "https://files.pythonhosted.org/packages/78/4e/6a92293669504831a34661aeb897ac5475af1320a39019c9a76ed1215436/langchain_astradb-0.5.2-py3-none-any.whl", hash = "sha256:da1d58f82825bc9505fce300fa6acac59abffd41b4ff435523c81d8dda049e9f", size = 56711 }, + { url = "https://files.pythonhosted.org/packages/88/d3/b540e6639d8e4f0907c6b9c9226e8f92756bfaeff6b5be58eadb2aed3a72/langchain_astradb-0.6.0-py3-none-any.whl", hash = "sha256:dc5886b29e50f24d6ab5356fdda6aa8b3e5b8c718e2493c8f263d9ecef42d9b4", size = 69654 }, ] [[package]] @@ -2186,20 +2195,21 @@ name = "langchain-graph-retriever" version = "0.1.0" source = { editable = "packages/langchain-graph-retriever" } dependencies = [ + { name = "backoff" }, + { name = "graph-retriever" }, + { name = "immutabledict" }, { name = "langchain-core" }, { name = "networkx" }, - { name = "numpy" }, { name = "pydantic" }, { name = "typing-extensions" }, ] [package.optional-dependencies] astra = [ + { name = "astrapy" }, + { name = "httpx" }, { name = "langchain-astradb" }, ] -beautifulsoup4 = [ - { name = "beautifulsoup4" }, -] cassandra = [ { name = "cassio" }, { name = "langchain-community" }, @@ -2211,6 +2221,9 @@ chroma = [ gliner = [ { name = "gliner" }, ] +html = [ + { name = "beautifulsoup4" }, +] keybert = [ { name = "keybert" }, ] @@ -2218,8 +2231,8 @@ opensearch = [ { name = "langchain-community" }, { name = "opensearch-py" }, ] -simsimd = [ - { name = "simsimd" }, +spacy = [ + { name = "spacy" }, ] [package.dev-dependencies] @@ -2228,7 +2241,6 @@ dev = [ { name = "langchain-astradb" }, { name = "langchain-chroma" }, { name = "langchain-community" }, - { name = "langchain-opensearch" }, { name = "langchain-tests" }, { name = "mypy" }, { name = "networkx-stubs" }, @@ -2237,35 +2249,40 @@ dev = [ { name = "pytest-cov" }, { name = "simsimd" }, { name = "testcontainers" }, + { name = "types-beautifulsoup4" }, ] [package.metadata] requires-dist = [ - { name = "beautifulsoup4", marker = "extra == 'beautifulsoup4'", specifier = ">=4.12.3" }, + { name = "astrapy", marker = "extra == 'astra'", specifier = ">=2.0.0" }, + { name = "backoff", specifier = ">=2.2.1" }, + { name = "beautifulsoup4", marker = "extra == 'html'", specifier = ">=4.12.3" }, { name = "cassio", marker = "extra == 'cassandra'", specifier = ">=0.1.10" }, { name = "chromadb", marker = "extra == 'chroma'", specifier = ">=0.5.23" }, { name = "gliner", marker = "extra == 'gliner'", specifier = "==0.2.13" }, + { name = "graph-retriever", editable = "packages/graph-retriever" }, + { name = "httpx", marker = "extra == 'astra'", specifier = ">=0.28.1" }, + { name = "immutabledict", specifier = ">=4.2.1" }, { name = "keybert", marker = "extra == 'keybert'", specifier = ">=0.8.5" }, - { name = "langchain-astradb", marker = "extra == 'astra'", specifier = ">=0.5.2" }, + { name = "langchain-astradb", marker = "extra == 'astra'", specifier = ">=0.6.0" }, { name = "langchain-chroma", marker = "extra == 'chroma'", specifier = ">=0.2.0" }, { name = "langchain-community", marker = "extra == 'cassandra'", specifier = ">=0.3.14" }, { name = "langchain-community", marker = "extra == 'opensearch'", specifier = ">=0.3.14" }, { name = "langchain-core", specifier = ">=0.3.29" }, { name = "networkx", specifier = ">=3.4.2" }, - { name = "numpy", specifier = ">=1.26.4" }, { name = "opensearch-py", marker = "extra == 'opensearch'", specifier = ">=2.8.0" }, { name = "pydantic", specifier = ">=2.10.4" }, - { name = "simsimd", marker = "extra == 'simsimd'", specifier = ">=6.2.1" }, + { name = "spacy", marker = "extra == 'spacy'", specifier = ">=3.8.4" }, { name = "typing-extensions", specifier = ">=4.12.2" }, ] +provides-extras = ["astra", "html", "cassandra", "chroma", "gliner", "keybert", "opensearch", "spacy"] [package.metadata.requires-dev] dev = [ { name = "cassio", specifier = ">=0.1.10" }, - { name = "langchain-astradb", specifier = ">=0.5.2" }, + { name = "langchain-astradb", specifier = ">=0.5.3" }, { name = "langchain-chroma", specifier = ">=0.2.0" }, { name = "langchain-community", specifier = ">=0.3.14" }, - { name = "langchain-opensearch", specifier = ">=0.0.2" }, { name = "langchain-tests", specifier = ">=0.3.8" }, { name = "mypy", specifier = ">=1.14.1" }, { name = "networkx-stubs", specifier = ">=0.0.1" }, @@ -2274,6 +2291,7 @@ dev = [ { name = "pytest-cov", specifier = ">=4.0.0" }, { name = "simsimd", specifier = ">=6.2.1" }, { name = "testcontainers", specifier = ">=4.9.0" }, + { name = "types-beautifulsoup4", specifier = ">=4.12.0.20250204" }, ] [[package]] @@ -2290,22 +2308,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/8d/9e/388aaa4a727e86c3f61238df059a0d438de1a68ee369957182436d976a8a/langchain_openai-0.3.1-py3-none-any.whl", hash = "sha256:5cf2a1e115b12570158d89c22832fa381803c3e1e11d1eb781195c8d9e454bd5", size = 54311 }, ] -[[package]] -name = "langchain-opensearch" -version = "0.0.2" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "langchain-community" }, - { name = "langchain-core" }, - { name = "langchain-text-splitters" }, - { name = "opensearch-py" }, - { name = "setuptools" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/5b/b6/2889c77bedc21d06b17f4bdddf96bc7d222258bfa9bd88b82891769745ba/langchain_opensearch-0.0.2.tar.gz", hash = "sha256:26d04175e85dab48af8761c154e4942695e0cf731a6d6676e1e9ba9da51a28c6", size = 3382 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/b4/1a/9e7c1c13fe209c48f6ac5ac1efe5d3ae2e6422d2c46f9f237ad4a1e23dac/langchain_opensearch-0.0.2-py3-none-any.whl", hash = "sha256:16432ecd6c78232433fd8067e3bf91ba3365ca9cbf6bfac97c55f71eb0eef943", size = 4472 }, -] - [[package]] name = "langchain-tests" version = "0.3.8" @@ -2336,6 +2338,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/4b/83/f8081c3bea416bd9d9f0c26af795c74f42c24f9ad3c4fbf361b7d69de134/langchain_text_splitters-0.3.5-py3-none-any.whl", hash = "sha256:8c9b059827438c5fa8f327b4df857e307828a5ec815163c9b5c9569a3e82c8ee", size = 31620 }, ] +[[package]] +name = "langcodes" +version = "3.5.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "language-data" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/3a/7a/5a97e327063409a5caa21541e6d08ae4a0f2da328447e9f2c7b39e179226/langcodes-3.5.0.tar.gz", hash = "sha256:1eef8168d07e51e131a2497ffecad4b663f6208e7c3ae3b8dc15c51734a6f801", size = 191030 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c3/6b/068c2ea7a712bf805c62445bd9e9c06d7340358ef2824150eceac027444b/langcodes-3.5.0-py3-none-any.whl", hash = "sha256:853c69d1a35e0e13da2f427bb68fb2fa4a8f4fb899e0c62ad8df8d073dcfed33", size = 182974 }, +] + [[package]] name = "langsmith" version = "0.2.10" @@ -2352,6 +2366,82 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/12/91/e72d13f6b57a0ea9d884ab1d3388f544d7fe3354dbe1d4dd67678693a9fd/langsmith-0.2.10-py3-none-any.whl", hash = "sha256:b02f2f174189ff72e54c88b1aa63343defd6f0f676c396a690c63a4b6495dcc2", size = 326432 }, ] +[[package]] +name = "language-data" +version = "1.3.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "marisa-trie" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/dd/ce/3f144716a9f2cbf42aa86ebc8b085a184be25c80aa453eea17c294d239c1/language_data-1.3.0.tar.gz", hash = "sha256:7600ef8aa39555145d06c89f0c324bf7dab834ea0b0a439d8243762e3ebad7ec", size = 5129310 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5d/e9/5a5ffd9b286db82be70d677d0a91e4d58f7912bb8dd026ddeeb4abe70679/language_data-1.3.0-py3-none-any.whl", hash = "sha256:e2ee943551b5ae5f89cd0e801d1fc3835bb0ef5b7e9c3a4e8e17b2b214548fbf", size = 5385760 }, +] + +[[package]] +name = "marisa-trie" +version = "1.2.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "setuptools" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/31/15/9d9743897e4450b2de199ee673b50cb018980c4ced477d41cf91304a85e3/marisa_trie-1.2.1.tar.gz", hash = "sha256:3a27c408e2aefc03e0f1d25b2ff2afb85aac3568f6fa2ae2a53b57a2e87ce29d", size = 416124 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e4/83/ccf5b33f2123f3110705c608f8e0caa82002626511aafafc58f82e50d322/marisa_trie-1.2.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:a2eb41d2f9114d8b7bd66772c237111e00d2bae2260824560eaa0a1e291ce9e8", size = 362200 }, + { url = "https://files.pythonhosted.org/packages/9d/74/f7ce1fc2ee480c7f8ceadd9b992caceaba442a97e5e99d6aea00d3635a0b/marisa_trie-1.2.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:9e956e6a46f604b17d570901e66f5214fb6f658c21e5e7665deace236793cef6", size = 192309 }, + { url = "https://files.pythonhosted.org/packages/e4/52/5dbbc13e57ce54c2ef0d04962d7d8f66edc69ed34310c734a2913199a581/marisa_trie-1.2.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:bd45142501300e7538b2e544905580918b67b1c82abed1275fe4c682c95635fa", size = 174713 }, + { url = "https://files.pythonhosted.org/packages/57/49/2580372f3f980aea95c23d05b2c1d3bbb9ee1ab8cfd441545153e44f1be7/marisa_trie-1.2.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a8443d116c612cfd1961fbf76769faf0561a46d8e317315dd13f9d9639ad500c", size = 1314808 }, + { url = "https://files.pythonhosted.org/packages/5a/ba/e12a4d450f265414cc68df6a116a78beece72b95f774f04d29cd48e08d19/marisa_trie-1.2.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:875a6248e60fbb48d947b574ffa4170f34981f9e579bde960d0f9a49ea393ecc", size = 1346678 }, + { url = "https://files.pythonhosted.org/packages/b2/81/8e130cb1eea741fd17694d821096f7ec9841f0e3d3c69b740257f5eeafa8/marisa_trie-1.2.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:746a7c60a17fccd3cfcfd4326926f02ea4fcdfc25d513411a0c4fc8e4a1ca51f", size = 1307254 }, + { url = "https://files.pythonhosted.org/packages/d7/d0/3deb5ea2bf7e4d845339875dbb31f3c3f66c8d6568723db1d137fb08a91c/marisa_trie-1.2.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:e70869737cc0e5bd903f620667da6c330d6737048d1f44db792a6af68a1d35be", size = 2194712 }, + { url = "https://files.pythonhosted.org/packages/9c/5f/b38d728dd30954816497b53425cfaddaf7b93ac0912db5911888f191b07a/marisa_trie-1.2.1-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:06b099dd743676dbcd8abd8465ceac8f6d97d8bfaabe2c83b965495523b4cef2", size = 2355625 }, + { url = "https://files.pythonhosted.org/packages/7e/4f/61c0faa9ae9e53600a1b7a0c367bc9db1a4fdc625402ec232c755a05e094/marisa_trie-1.2.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:d2a82eb21afdaf22b50d9b996472305c05ca67fc4ff5a026a220320c9c961db6", size = 2290290 }, + { url = "https://files.pythonhosted.org/packages/7c/7d/713b970fb3043248881ed776dbf4d54918398aa5dde843a38711d0d62c8f/marisa_trie-1.2.1-cp310-cp310-win32.whl", hash = "sha256:8951e7ce5d3167fbd085703b4cbb3f47948ed66826bef9a2173c379508776cf5", size = 130743 }, + { url = "https://files.pythonhosted.org/packages/cc/94/3d619cc82c30daeacd18a88674f4e6540ebfb7b4b7752ca0552793be80cf/marisa_trie-1.2.1-cp310-cp310-win_amd64.whl", hash = "sha256:5685a14b3099b1422c4f59fa38b0bf4b5342ee6cc38ae57df9666a0b28eeaad3", size = 151891 }, + { url = "https://files.pythonhosted.org/packages/4a/93/ffb01dfa22b6eee918e798e0bc3487427036c608aa4c065725f31aaf4104/marisa_trie-1.2.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:ed3fb4ed7f2084597e862bcd56c56c5529e773729a426c083238682dba540e98", size = 362823 }, + { url = "https://files.pythonhosted.org/packages/6d/1d/5c36500ac350c278c9bdfd88e17fa846fa4136d75597c167141ed973cdf2/marisa_trie-1.2.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:0fe69fb9ffb2767746181f7b3b29bbd3454d1d24717b5958e030494f3d3cddf3", size = 192741 }, + { url = "https://files.pythonhosted.org/packages/e8/04/87dd0840f3f720e511eba56193c02bf64d7d96df1ca9f6d19994f55154be/marisa_trie-1.2.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:4728ed3ae372d1ea2cdbd5eaa27b8f20a10e415d1f9d153314831e67d963f281", size = 174995 }, + { url = "https://files.pythonhosted.org/packages/c9/51/9e903a7e13b7593e2e675d0ec4c390ca076dc5df1c1a0d5e85a513b886a3/marisa_trie-1.2.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8cf4f25cf895692b232f49aa5397af6aba78bb679fb917a05fce8d3cb1ee446d", size = 1384728 }, + { url = "https://files.pythonhosted.org/packages/e8/3f/7362a5ac60c2b0aad0f52cd57e7bd0c708f20d2660d8df85360f3d8f1c4b/marisa_trie-1.2.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7cca7f96236ffdbf49be4b2e42c132e3df05968ac424544034767650913524de", size = 1412620 }, + { url = "https://files.pythonhosted.org/packages/1f/bc/aaa3eaf6875f78a204a8da9692d56e3a36f89997dad2c388628385614576/marisa_trie-1.2.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d7eb20bf0e8b55a58d2a9b518aabc4c18278787bdba476c551dd1c1ed109e509", size = 1361555 }, + { url = "https://files.pythonhosted.org/packages/18/98/e11b5a6206c5d110f32adab37fa84a85410d684e9c731acdd5c9250e2ce4/marisa_trie-1.2.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:b1ec93f0d1ee6d7ab680a6d8ea1a08bf264636358e92692072170032dda652ba", size = 2257717 }, + { url = "https://files.pythonhosted.org/packages/d2/9d/6b4a40867875e738a67c5b29f83e2e490a66bd9067ace3dd9a5c497e2b7f/marisa_trie-1.2.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:e2699255d7ac610dee26d4ae7bda5951d05c7d9123a22e1f7c6a6f1964e0a4e4", size = 2417044 }, + { url = "https://files.pythonhosted.org/packages/fe/61/e25613c72f2931757334b8bcf6b501569ef713f5ee9c6c7688ec460bd720/marisa_trie-1.2.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:c484410911182457a8a1a0249d0c09c01e2071b78a0a8538cd5f7fa45589b13a", size = 2351960 }, + { url = "https://files.pythonhosted.org/packages/19/0a/a90ccaf3eb476d13ec261f80c6c52defaf10ebc7f35eb2bcd7dfb533aef7/marisa_trie-1.2.1-cp311-cp311-win32.whl", hash = "sha256:ad548117744b2bcf0e3d97374608be0a92d18c2af13d98b728d37cd06248e571", size = 130446 }, + { url = "https://files.pythonhosted.org/packages/fc/98/574b4e143e0a2f5f71af8716b6c4a8a46220f75a6e0847ce7d11ee0ba4aa/marisa_trie-1.2.1-cp311-cp311-win_amd64.whl", hash = "sha256:436f62d27714970b9cdd3b3c41bdad046f260e62ebb0daa38125ef70536fc73b", size = 152037 }, + { url = "https://files.pythonhosted.org/packages/4e/bf/8bd4ac8436b33fd46c9e1ffe3c2a131cd9744cc1649dbbe13308f744ef2b/marisa_trie-1.2.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:638506eacf20ca503fff72221a7e66a6eadbf28d6a4a6f949fcf5b1701bb05ec", size = 360041 }, + { url = "https://files.pythonhosted.org/packages/ab/dd/4d3151e302e66ae387885f6ec265bd189e096b0c43c1379bfd9a3b9d2543/marisa_trie-1.2.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:de1665eaafefa48a308e4753786519888021740501a15461c77bdfd57638e6b4", size = 190520 }, + { url = "https://files.pythonhosted.org/packages/00/28/ae5991c74fb90b173167a366a634c83445f948ad044d37287b478d6b457e/marisa_trie-1.2.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:f713af9b8aa66a34cd3a78c7d150a560a75734713abe818a69021fd269e927fa", size = 174175 }, + { url = "https://files.pythonhosted.org/packages/5a/6a/fbfa89a8680eaabc6847a6c421e65427c43182db0c4bdb60e1516c81c822/marisa_trie-1.2.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b2a7d00f53f4945320b551bccb826b3fb26948bde1a10d50bb9802fabb611b10", size = 1354995 }, + { url = "https://files.pythonhosted.org/packages/9e/4c/2ba0b385e5f64ca4ddb0c10ec52ddf881bc4521f135948786fc339d1d6c8/marisa_trie-1.2.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:98042040d1d6085792e8d0f74004fc0f5f9ca6091c298f593dd81a22a4643854", size = 1390989 }, + { url = "https://files.pythonhosted.org/packages/6b/22/0791ed3045c91d0938345a86be472fc7c188b894f16c5dfad2ef31e7f882/marisa_trie-1.2.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6532615111eec2c79e711965ece0bc95adac1ff547a7fff5ffca525463116deb", size = 1328810 }, + { url = "https://files.pythonhosted.org/packages/9d/7d/3f566e563abae6efce7fc311c63282a447c611739b3cd66c0e36077c86f8/marisa_trie-1.2.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:20948e40ab2038e62b7000ca6b4a913bc16c91a2c2e6da501bd1f917eeb28d51", size = 2230222 }, + { url = "https://files.pythonhosted.org/packages/a5/0b/38fbb4611b5d1030242ddc2aa62e524438c8076e26f87395dbbf222dc62d/marisa_trie-1.2.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:66b23e5b35dd547f85bf98db7c749bc0ffc57916ade2534a6bbc32db9a4abc44", size = 2383620 }, + { url = "https://files.pythonhosted.org/packages/ae/17/4553c63de29904d5d2521a24cad817bc7883cfa90506ab702ec4dae59a7b/marisa_trie-1.2.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:6704adf0247d2dda42e876b793be40775dff46624309ad99bc7537098bee106d", size = 2329202 }, + { url = "https://files.pythonhosted.org/packages/45/08/6307a630e63cd763fe77ac56516faa67fa9cd342060691e40fabc84be6b0/marisa_trie-1.2.1-cp312-cp312-win32.whl", hash = "sha256:3ad356442c2fea4c2a6f514738ddf213d23930f942299a2b2c05df464a00848a", size = 129652 }, + { url = "https://files.pythonhosted.org/packages/a1/fe/67c357bfd92710d95a16b86e1453c663d565415d7f7838781c79ff7e1a7e/marisa_trie-1.2.1-cp312-cp312-win_amd64.whl", hash = "sha256:f2806f75817392cedcacb24ac5d80b0350dde8d3861d67d045c1d9b109764114", size = 150845 }, + { url = "https://files.pythonhosted.org/packages/2a/a4/a110cd9952f0e72da7bafea1f0084b18b9e03952110d9083bfda52279f5c/marisa_trie-1.2.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:b5ea16e69bfda0ac028c921b58de1a4aaf83d43934892977368579cd3c0a2554", size = 354439 }, + { url = "https://files.pythonhosted.org/packages/3c/a5/a6099eb1c3fd8d7e93408c45501e1d08536ac57dfef02ec331f78e1ace18/marisa_trie-1.2.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:9f627f4e41be710b6cb6ed54b0128b229ac9d50e2054d9cde3af0fef277c23cf", size = 188187 }, + { url = "https://files.pythonhosted.org/packages/7c/cc/f637127e2beffa920d21f7fc45b4029575bcd1b28a90c0d90cb2b08c2205/marisa_trie-1.2.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:5e649f3dc8ab5476732094f2828cc90cac3be7c79bc0c8318b6fda0c1d248db4", size = 171484 }, + { url = "https://files.pythonhosted.org/packages/6d/0f/29f2ad7260b956570f69f25a542efa51ba76eb76ecd53c63ee9d21987c3d/marisa_trie-1.2.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:46e528ee71808c961baf8c3ce1c46a8337ec7a96cc55389d11baafe5b632f8e9", size = 1319770 }, + { url = "https://files.pythonhosted.org/packages/f2/12/0b69ed61fba59551a5f3d569af367afae614db7214ce1da12946ba9a433a/marisa_trie-1.2.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:36aa4401a1180615f74d575571a6550081d84fc6461e9aefc0bb7b2427af098e", size = 1356488 }, + { url = "https://files.pythonhosted.org/packages/33/23/483b110db7ffe8729d6ebea2bf74258aef51f10fef5775f99e4bac7aef69/marisa_trie-1.2.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ce59bcd2cda9bb52b0e90cc7f36413cd86c3d0ce7224143447424aafb9f4aa48", size = 1302334 }, + { url = "https://files.pythonhosted.org/packages/1c/6f/46c2be99ce925985127fdf78900f1673bce8cb72debfebee6dccd11032c6/marisa_trie-1.2.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:f4cd800704a5fc57e53c39c3a6b0c9b1519ebdbcb644ede3ee67a06eb542697d", size = 2202624 }, + { url = "https://files.pythonhosted.org/packages/fd/b6/ef642327dbd4ec35be55d5682520b8f70fca98a54024f441ef2732f6b305/marisa_trie-1.2.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:2428b495003c189695fb91ceeb499f9fcced3a2dce853e17fa475519433c67ff", size = 2364206 }, + { url = "https://files.pythonhosted.org/packages/69/04/ef8197a79d0ab5043b781cc9b457bd11b81d4204fe78adf7625a67f48c21/marisa_trie-1.2.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:735c363d9aaac82eaf516a28f7c6b95084c2e176d8231c87328dc80e112a9afa", size = 2304801 }, + { url = "https://files.pythonhosted.org/packages/03/72/f87564d653daf31d8f33d9bf0121e99ccc21f18f5c485fb404ba06abc10e/marisa_trie-1.2.1-cp313-cp313-win32.whl", hash = "sha256:eba6ca45500ca1a042466a0684aacc9838e7f20fe2605521ee19f2853062798f", size = 128799 }, + { url = "https://files.pythonhosted.org/packages/27/40/5f9eb8b73030cc4b0d6817176e66079a62a2ddd9d5530da54f8011473428/marisa_trie-1.2.1-cp313-cp313-win_amd64.whl", hash = "sha256:aa7cd17e1c690ce96c538b2f4aae003d9a498e65067dd433c52dd069009951d4", size = 149035 }, +] + +[[package]] +name = "markdown" +version = "3.7" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/54/28/3af612670f82f4c056911fbbbb42760255801b3068c48de792d354ff4472/markdown-3.7.tar.gz", hash = "sha256:2ae2471477cfd02dbbf038d5d9bc226d40def84b4fe2986e49b59b6b472bbed2", size = 357086 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3f/08/83871f3c50fc983b88547c196d11cf8c3340e37c32d2e9d6152abe2c61f7/Markdown-3.7-py3-none-any.whl", hash = "sha256:7eb6df5690b81a1d7942992c97fad2938e956e79df20cbc6186e9c3a77b1c803", size = 106349 }, +] + [[package]] name = "markdown-it-py" version = "3.0.0" @@ -2487,36 +2577,236 @@ wheels = [ ] [[package]] -name = "matplotlib-inline" -version = "0.1.7" +name = "matplotlib-inline" +version = "0.1.7" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "traitlets" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/99/5b/a36a337438a14116b16480db471ad061c36c3694df7c2084a0da7ba538b7/matplotlib_inline-0.1.7.tar.gz", hash = "sha256:8423b23ec666be3d16e16b60bdd8ac4e86e840ebd1dd11a30b9f117f2fa0ab90", size = 8159 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/8f/8e/9ad090d3553c280a8060fbf6e24dc1c0c29704ee7d1c372f0c174aa59285/matplotlib_inline-0.1.7-py3-none-any.whl", hash = "sha256:df192d39a4ff8f21b1895d72e6a13f5fcc5099f00fa84384e0ea28c2cc0653ca", size = 9899 }, +] + +[[package]] +name = "mdit-py-plugins" +version = "0.4.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "markdown-it-py" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/19/03/a2ecab526543b152300717cf232bb4bb8605b6edb946c845016fa9c9c9fd/mdit_py_plugins-0.4.2.tar.gz", hash = "sha256:5f2cd1fdb606ddf152d37ec30e46101a60512bc0e5fa1a7002c36647b09e26b5", size = 43542 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a7/f7/7782a043553ee469c1ff49cfa1cdace2d6bf99a1f333cf38676b3ddf30da/mdit_py_plugins-0.4.2-py3-none-any.whl", hash = "sha256:0c673c3f889399a33b95e88d2f0d111b4447bdfea7f237dab2d488f459835636", size = 55316 }, +] + +[[package]] +name = "mdurl" +version = "0.1.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d6/54/cfe61301667036ec958cb99bd3efefba235e65cdeb9c84d24a8293ba1d90/mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba", size = 8729 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b3/38/89ba8ad64ae25be8de66a6d463314cf1eb366222074cfda9ee839c56a4b4/mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8", size = 9979 }, +] + +[[package]] +name = "mergedeep" +version = "1.3.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/3a/41/580bb4006e3ed0361b8151a01d324fb03f420815446c7def45d02f74c270/mergedeep-1.3.4.tar.gz", hash = "sha256:0096d52e9dad9939c3d975a774666af186eda617e6ca84df4c94dec30004f2a8", size = 4661 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/2c/19/04f9b178c2d8a15b076c8b5140708fa6ffc5601fb6f1e975537072df5b2a/mergedeep-1.3.4-py3-none-any.whl", hash = "sha256:70775750742b25c0d8f36c55aed03d24c3384d17c951b3175d898bd778ef0307", size = 6354 }, +] + +[[package]] +name = "mistune" +version = "3.1.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "typing-extensions", marker = "python_full_version < '3.11'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/79/6e/96fc7cb3288666c5de2c396eb0e338dc95f7a8e4920e43e38783a22d0084/mistune-3.1.0.tar.gz", hash = "sha256:dbcac2f78292b9dc066cd03b7a3a26b62d85f8159f2ea5fd28e55df79908d667", size = 94401 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b4/b3/743ffc3f59da380da504d84ccd1faf9a857a1445991ff19bf2ec754163c2/mistune-3.1.0-py3-none-any.whl", hash = "sha256:b05198cf6d671b3deba6c87ec6cf0d4eb7b72c524636eddb6dbf13823b52cee1", size = 53694 }, +] + +[[package]] +name = "mkdocs" +version = "1.6.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "click" }, + { name = "colorama", marker = "sys_platform == 'win32'" }, + { name = "ghp-import" }, + { name = "jinja2" }, + { name = "markdown" }, + { name = "markupsafe" }, + { name = "mergedeep" }, + { name = "mkdocs-get-deps" }, + { name = "packaging" }, + { name = "pathspec" }, + { name = "pyyaml" }, + { name = "pyyaml-env-tag" }, + { name = "watchdog" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/bc/c6/bbd4f061bd16b378247f12953ffcb04786a618ce5e904b8c5a01a0309061/mkdocs-1.6.1.tar.gz", hash = "sha256:7b432f01d928c084353ab39c57282f29f92136665bdd6abf7c1ec8d822ef86f2", size = 3889159 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/22/5b/dbc6a8cddc9cfa9c4971d59fb12bb8d42e161b7e7f8cc89e49137c5b279c/mkdocs-1.6.1-py3-none-any.whl", hash = "sha256:db91759624d1647f3f34aa0c3f327dd2601beae39a366d6e064c03468d35c20e", size = 3864451 }, +] + +[[package]] +name = "mkdocs-autorefs" +version = "1.3.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "markdown" }, + { name = "markupsafe" }, + { name = "mkdocs" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/fe/18/fb1e17fb705228b51bf7b2f791adaf83c0fa708e51bbc003411ba48ae21e/mkdocs_autorefs-1.3.0.tar.gz", hash = "sha256:6867764c099ace9025d6ac24fd07b85a98335fbd30107ef01053697c8f46db61", size = 42597 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f4/4a/960c441950f98becfa5dd419adab20274939fd575ab848aee2c87e3599ac/mkdocs_autorefs-1.3.0-py3-none-any.whl", hash = "sha256:d180f9778a04e78b7134e31418f238bba56f56d6a8af97873946ff661befffb3", size = 17642 }, +] + +[[package]] +name = "mkdocs-get-deps" +version = "0.2.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "mergedeep" }, + { name = "platformdirs" }, + { name = "pyyaml" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/98/f5/ed29cd50067784976f25ed0ed6fcd3c2ce9eb90650aa3b2796ddf7b6870b/mkdocs_get_deps-0.2.0.tar.gz", hash = "sha256:162b3d129c7fad9b19abfdcb9c1458a651628e4b1dea628ac68790fb3061c60c", size = 10239 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/9f/d4/029f984e8d3f3b6b726bd33cafc473b75e9e44c0f7e80a5b29abc466bdea/mkdocs_get_deps-0.2.0-py3-none-any.whl", hash = "sha256:2bf11d0b133e77a0dd036abeeb06dec8775e46efa526dc70667d8863eefc6134", size = 9521 }, +] + +[[package]] +name = "mkdocs-jupyter" +version = "0.25.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "ipykernel" }, + { name = "jupytext" }, + { name = "mkdocs" }, + { name = "mkdocs-material" }, + { name = "nbconvert" }, + { name = "pygments" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/6c/23/6ffb8d2fd2117aa860a04c6fe2510b21bc3c3c085907ffdd851caba53152/mkdocs_jupyter-0.25.1.tar.gz", hash = "sha256:0e9272ff4947e0ec683c92423a4bfb42a26477c103ab1a6ab8277e2dcc8f7afe", size = 1626747 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/08/37/5f1fd5c3f6954b3256f8126275e62af493b96fb6aef6c0dbc4ee326032ad/mkdocs_jupyter-0.25.1-py3-none-any.whl", hash = "sha256:3f679a857609885d322880e72533ef5255561bbfdb13cfee2a1e92ef4d4ad8d8", size = 1456197 }, +] + +[[package]] +name = "mkdocs-macros-plugin" +version = "1.3.7" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "hjson" }, + { name = "jinja2" }, + { name = "mkdocs" }, + { name = "packaging" }, + { name = "pathspec" }, + { name = "python-dateutil" }, + { name = "pyyaml" }, + { name = "super-collections" }, + { name = "termcolor" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/43/65/61a746c56788867221aebf07fe4b6b4c08ac99cf341fd51d728c89d1456e/mkdocs_macros_plugin-1.3.7.tar.gz", hash = "sha256:17c7fd1a49b94defcdb502fd453d17a1e730f8836523379d21292eb2be4cb523", size = 33466 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/55/cf/f03331298ee50a4da6fb72ccec79078041158c1f8b5fc24835c1be42232e/mkdocs_macros_plugin-1.3.7-py3-none-any.whl", hash = "sha256:02432033a5b77fb247d6ec7924e72fc4ceec264165b1644ab8d0dc159c22ce59", size = 37799 }, +] + +[[package]] +name = "mkdocs-material" +version = "9.6.2" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "traitlets" }, + { name = "babel" }, + { name = "colorama" }, + { name = "jinja2" }, + { name = "markdown" }, + { name = "mkdocs" }, + { name = "mkdocs-material-extensions" }, + { name = "paginate" }, + { name = "pygments" }, + { name = "pymdown-extensions" }, + { name = "regex" }, + { name = "requests" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/99/5b/a36a337438a14116b16480db471ad061c36c3694df7c2084a0da7ba538b7/matplotlib_inline-0.1.7.tar.gz", hash = "sha256:8423b23ec666be3d16e16b60bdd8ac4e86e840ebd1dd11a30b9f117f2fa0ab90", size = 8159 } +sdist = { url = "https://files.pythonhosted.org/packages/c9/75/fb8f772d4acf5439a446aedbe6e49b4c42a4bc4f8c866c930a7b0c3be2f8/mkdocs_material-9.6.2.tar.gz", hash = "sha256:a3de1c5d4c745f10afa78b1a02f917b9dce0808fb206adc0f5bb48b58c1ca21f", size = 3942567 } wheels = [ - { url = "https://files.pythonhosted.org/packages/8f/8e/9ad090d3553c280a8060fbf6e24dc1c0c29704ee7d1c372f0c174aa59285/matplotlib_inline-0.1.7-py3-none-any.whl", hash = "sha256:df192d39a4ff8f21b1895d72e6a13f5fcc5099f00fa84384e0ea28c2cc0653ca", size = 9899 }, + { url = "https://files.pythonhosted.org/packages/d1/17/b97aa245d43933acd416361d4f34612baec8ad4a6337339d45448cde728d/mkdocs_material-9.6.2-py3-none-any.whl", hash = "sha256:71d90dbd63b393ad11a4d90151dfe3dcbfcd802c0f29ce80bebd9bbac6abc753", size = 8688648 }, +] + +[package.optional-dependencies] +imaging = [ + { name = "cairosvg" }, + { name = "pillow" }, ] [[package]] -name = "mdurl" -version = "0.1.2" +name = "mkdocs-material-extensions" +version = "1.3.1" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/d6/54/cfe61301667036ec958cb99bd3efefba235e65cdeb9c84d24a8293ba1d90/mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba", size = 8729 } +sdist = { url = "https://files.pythonhosted.org/packages/79/9b/9b4c96d6593b2a541e1cb8b34899a6d021d208bb357042823d4d2cabdbe7/mkdocs_material_extensions-1.3.1.tar.gz", hash = "sha256:10c9511cea88f568257f960358a467d12b970e1f7b2c0e5fb2bb48cab1928443", size = 11847 } wheels = [ - { url = "https://files.pythonhosted.org/packages/b3/38/89ba8ad64ae25be8de66a6d463314cf1eb366222074cfda9ee839c56a4b4/mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8", size = 9979 }, + { url = "https://files.pythonhosted.org/packages/5b/54/662a4743aa81d9582ee9339d4ffa3c8fd40a4965e033d77b9da9774d3960/mkdocs_material_extensions-1.3.1-py3-none-any.whl", hash = "sha256:adff8b62700b25cb77b53358dad940f3ef973dd6db797907c49e3c2ef3ab4e31", size = 8728 }, ] [[package]] -name = "mistune" +name = "mkdocs-table-reader-plugin" version = "3.1.0" source = { registry = "https://pypi.org/simple" } dependencies = [ + { name = "mkdocs" }, + { name = "pandas" }, + { name = "pyyaml" }, + { name = "tabulate" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/a1/1b/ca35e4b51a1814924153f7c8afa5a9c2f961688a9c275fa9f4afe7f5083a/mkdocs_table_reader_plugin-3.1.0.tar.gz", hash = "sha256:eb15688ee8c0cd1a842f506f18973b87be22bd7baa5e2e551089de6b7f9ec25b", size = 12510 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b3/6f/dcc966874f74f8580b99d2ffecbdc85dfd00c4a5039fedbee4ddd7fc8c7f/mkdocs_table_reader_plugin-3.1.0-py3-none-any.whl", hash = "sha256:50a1302661c14d96b90ba0434ae96110441e0c653ce23559e3c6911fe79e7bd2", size = 10564 }, +] + +[[package]] +name = "mkdocstrings" +version = "0.28.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "jinja2" }, + { name = "markdown" }, + { name = "markupsafe" }, + { name = "mkdocs" }, + { name = "mkdocs-autorefs" }, + { name = "mkdocs-get-deps" }, + { name = "pymdown-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/86/4b/70522427768a4637ffac376140f362dc3d159364fb64e698667e51053d57/mkdocstrings-0.28.0.tar.gz", hash = "sha256:df20afef1eafe36ba466ae20732509ecb74237653a585f5061937e54b553b4e0", size = 3392797 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/75/c3/e5a319d4de0867c1b59ff22abb93bf898f9812e934ab75dcf7fe94e85bb6/mkdocstrings-0.28.0-py3-none-any.whl", hash = "sha256:84cf3dc910614781fe0fee46ce8006fde7df6cc7cca2e3f799895fb8a9170b39", size = 4700952 }, +] + +[package.optional-dependencies] +python = [ + { name = "mkdocstrings-python" }, +] + +[[package]] +name = "mkdocstrings-python" +version = "1.14.5" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "griffe" }, + { name = "mkdocs-autorefs" }, + { name = "mkdocstrings" }, { name = "typing-extensions", marker = "python_full_version < '3.11'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/79/6e/96fc7cb3288666c5de2c396eb0e338dc95f7a8e4920e43e38783a22d0084/mistune-3.1.0.tar.gz", hash = "sha256:dbcac2f78292b9dc066cd03b7a3a26b62d85f8159f2ea5fd28e55df79908d667", size = 94401 } +sdist = { url = "https://files.pythonhosted.org/packages/4e/00/75f8badeca7bcc06dd2ca0a09b98998b228beb2109f6dd4e9155ea6a6cc7/mkdocstrings_python-1.14.5.tar.gz", hash = "sha256:8582eeac8cce952f395d76ec636fc814757cba7d8458aa75ba0529a3aa10d98c", size = 421738 } wheels = [ - { url = "https://files.pythonhosted.org/packages/b4/b3/743ffc3f59da380da504d84ccd1faf9a857a1445991ff19bf2ec754163c2/mistune-3.1.0-py3-none-any.whl", hash = "sha256:b05198cf6d671b3deba6c87ec6cf0d4eb7b72c524636eddb6dbf13823b52cee1", size = 53694 }, + { url = "https://files.pythonhosted.org/packages/17/1e/c970d43d2dc844b7dfabb5daf24bc1c8ffdb40c56e3ec65d6dc78879ce16/mkdocstrings_python-1.14.5-py3-none-any.whl", hash = "sha256:ac394f273ae298aeaa6be4506768f05e61bd7c8119437ea98553354b1185c469", size = 448584 }, ] [[package]] @@ -2681,6 +2971,42 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/99/b7/b9e70fde2c0f0c9af4cc5277782a89b66d35948ea3369ec9f598358c3ac5/multidict-6.1.0-py3-none-any.whl", hash = "sha256:48e171e52d1c4d33888e529b999e5900356b9ae588c2f09a52dcefb158b27506", size = 10051 }, ] +[[package]] +name = "murmurhash" +version = "1.0.12" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/64/d9/e7c6a7d4e9b5320c17e54af6f9edd2f521c6f86bbbb72aba571f641a9793/murmurhash-1.0.12.tar.gz", hash = "sha256:467b7ee31c1f79f46d00436a1957fc52a0e5801369dd2f30eb7655f380735b5f", size = 13233 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/74/4c/bc0a79c7b0ebec63256ac547e2cecbae73badcd26e874231ff901665e8fc/murmurhash-1.0.12-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:a3f492bbf6f879b6eaf9da4be7471f4b68a3e3ae525aac0f35c2ae27ec91265c", size = 26857 }, + { url = "https://files.pythonhosted.org/packages/2c/dc/824bd5cf239d6b6997f83dd94c4a99a48f5f2a6267174cf191ddb844f997/murmurhash-1.0.12-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:3493e0c10a64fa72026af2ea2271d8b3511a438de3c6a771b7a57771611b9c08", size = 26974 }, + { url = "https://files.pythonhosted.org/packages/51/b2/67f4e99f9b577187ec1376ff37478da87b88f2f8092c1f1351b18cb29fc6/murmurhash-1.0.12-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:95989ddbb187b9934e5b0e7f450793a445814b6c293a7bf92df56913c3a87c1e", size = 126364 }, + { url = "https://files.pythonhosted.org/packages/4e/10/c7efbc91842ec6d519296129071cc55ba50d5e120e796dac536e4c299dc3/murmurhash-1.0.12-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2efef9f9aad98ec915a830f0c53d14ce6807ccc6e14fd2966565ef0b71cfa086", size = 124315 }, + { url = "https://files.pythonhosted.org/packages/92/87/dc7dbca647909721006405b8f956628dfbd2fd4f7701f6dfcd5e40f29b4f/murmurhash-1.0.12-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:b3147d171a5e5d2953b5eead21d15ea59b424844b4504a692c4b9629191148ed", size = 120355 }, + { url = "https://files.pythonhosted.org/packages/ff/36/cf13614b4bb62a956c62a3d8cd81fb4e0dd35e982dd7d7d028522ba4d9d9/murmurhash-1.0.12-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:736c869bef5023540dde52a9338085ac823eda3f09591ba1b4ed2c09c8b378db", size = 119979 }, + { url = "https://files.pythonhosted.org/packages/05/b9/06bfba06b9fb4855db2cbbeb72eac7a879209c2b989b3de5d3383c49ca04/murmurhash-1.0.12-cp310-cp310-win_amd64.whl", hash = "sha256:b81feb5bfd13bce638ccf910c685b04ad0537635918d04c83b291ce0441776da", size = 25373 }, + { url = "https://files.pythonhosted.org/packages/d3/f4/0208624de330224f3a8981c030007fc4a3583ca6b4d4dd3275364c1d06e6/murmurhash-1.0.12-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:8b236b76a256690e745b63b679892878ec4f01deeeda8d311482a9b183d2d452", size = 26793 }, + { url = "https://files.pythonhosted.org/packages/2f/a4/a387486e79bcc04f3d3b123195fd4cca74a7ba439d6c45b35c5366c66586/murmurhash-1.0.12-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:8bc3756dd657ed90c1354705e66513c11516929fe726e7bc91c79734d190f394", size = 26884 }, + { url = "https://files.pythonhosted.org/packages/9f/38/ec45a33c519feb802cdf0fe9dd1b1e6c15897c43d29c738eaae61da8ae5d/murmurhash-1.0.12-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fd41e4c3d7936b69010d76e5edff363bf40fd918d86287a14e924363d7828522", size = 136101 }, + { url = "https://files.pythonhosted.org/packages/0b/d5/6f1b561d8b14ef01d28d9cec278870bec01d8a569cfbc694e68ac05a5615/murmurhash-1.0.12-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:36be2831df750163495e471d24aeef6aca1b2a3c4dfb05f40114859db47ff3f2", size = 134309 }, + { url = "https://files.pythonhosted.org/packages/e8/78/2df6cdce439f6b8509d7947b8c47e7fe2589671899eb6399f4e2f602fe1f/murmurhash-1.0.12-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:b078c10f9c82cbd144b1200061fbfa7f99af9d5d8d7f7d8a324370169e3da7c2", size = 131134 }, + { url = "https://files.pythonhosted.org/packages/43/0b/f0a5a622c505786d3d1dc1ad3e7f6b6fbfcae2665b205e07b3882185c39f/murmurhash-1.0.12-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:307ca8da5f038635ded9de722fe11f07f06a2b76442ae272dcccbff6086de487", size = 128630 }, + { url = "https://files.pythonhosted.org/packages/de/30/ceb9217cdba72bc0bf8466e373e12e5a42945cc85eda0a7c479e319e07ae/murmurhash-1.0.12-cp311-cp311-win_amd64.whl", hash = "sha256:1b4ab5ba5ba909959659989f3bf57903f31f49906fe40f00aec81e32eea69a88", size = 25417 }, + { url = "https://files.pythonhosted.org/packages/38/c7/0dc2914c24adb9466b69606dfdee7bbfed13476f4dda3753e0185cfbbe1f/murmurhash-1.0.12-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:1a4c97c8ffbedb62b760c3c2f77b5b8cb0e0ac0ec83a74d2f289e113e3e92ed5", size = 27120 }, + { url = "https://files.pythonhosted.org/packages/ae/d7/aea56101f225eb021cfd47245d55680605665b556aba95eecee937b4d4d6/murmurhash-1.0.12-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:9574f0b634f059158bb89734a811e435ac9ad2335c02a7abb59f1875dcce244c", size = 27081 }, + { url = "https://files.pythonhosted.org/packages/f4/68/4b723e0f318e92b0b4779f41ff5d9446e1dc0e68aca2f0043e1fab3fc1be/murmurhash-1.0.12-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:701cc0ce91809b4d7c2e0518be759635205e1e181325792044f5a8118019f716", size = 138552 }, + { url = "https://files.pythonhosted.org/packages/13/40/eed53da76a428f404ec9db6d0983691c61d2744fea7070c6b31caca31ac4/murmurhash-1.0.12-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7e1c9de2167a9d408d121ebc918bcb20b2718ec956f3aae0ded53d9bb224bb8e", size = 138589 }, + { url = "https://files.pythonhosted.org/packages/12/e8/1b9164e62f75bf23d6af5262421985f45bce9bd5c4970a62b83ea7cb62df/murmurhash-1.0.12-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:94a52972835bdae8af18147c67c398ff3ea1d875f5b8dca1e1aa0fadb892f546", size = 129244 }, + { url = "https://files.pythonhosted.org/packages/a7/20/c91f06d3692705bc7ec16c219143cc56062afd8080756d55e0678a7b704c/murmurhash-1.0.12-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:cc88004c8615dcabe31d21142689f719fdf549ba782850bef389cf227a1df575", size = 128944 }, + { url = "https://files.pythonhosted.org/packages/b2/66/7d74a9f547dd719e86245e2a3d126140335861b8e362bdd22c7f9842a2b8/murmurhash-1.0.12-cp312-cp312-win_amd64.whl", hash = "sha256:8c5b8804c07a76f779e67f83aad37bc2189a0e65ebdd3f2b305242d489d31e03", size = 25554 }, + { url = "https://files.pythonhosted.org/packages/12/77/bec7e3f00b0e23bfa027704d4023fea808d8cc0e593dd4247f4579de6776/murmurhash-1.0.12-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:63f10c6d6ef9ee85073dd896d2c4e0ab161bc6b8e7e9201c69f8061f9f1b6468", size = 26474 }, + { url = "https://files.pythonhosted.org/packages/57/46/8dd3631cfb58435004678179a70352d3258b159c3f110e4f11fb23b1f776/murmurhash-1.0.12-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:66356f6308fd2a44a8ab056f020acd5bc22302f23ef5cce3705f2493e0fe9c3c", size = 26418 }, + { url = "https://files.pythonhosted.org/packages/50/8f/ae8ee91c1b9ecdc4d849382af64b10e5d3c79ee7fcf8af13400d32092ae7/murmurhash-1.0.12-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bdb2104aa3471324724abf5a3a76fc94bcbeaf023bb6a6dd94da567b8633d8a6", size = 133342 }, + { url = "https://files.pythonhosted.org/packages/cd/19/3ce034b0c068e8f88ea0ac28e196359aacf3c52718fbce1f7dbcf088261f/murmurhash-1.0.12-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7a7ef5fb37e72536458ac4a6f486fb374c60ac4c4862d9195d3d4b58239a91de", size = 133062 }, + { url = "https://files.pythonhosted.org/packages/a0/0c/f9a868eb39751b9dceb9e35d91e8cf321dad8ce9e3e965d067b0fff0547a/murmurhash-1.0.12-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:8bd5524de195991ce3551b14286ec0b730cc9dd2e10565dad2ae470eec082028", size = 124858 }, + { url = "https://files.pythonhosted.org/packages/b6/fc/52bcb3afc95733f30ec76e07cae4b5d0081fa049ade418303faeee619766/murmurhash-1.0.12-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:19de30edaaa2217cd0c41b6cf6bbfa418be5d7fdf267ca92e5e3710d4daac593", size = 124686 }, + { url = "https://files.pythonhosted.org/packages/cf/99/9cdea62dec8ea26a35a8231ee09c83b16f9dedf16280a7a15f41d2061706/murmurhash-1.0.12-cp313-cp313-win_amd64.whl", hash = "sha256:7dc4ebdfed7ef8ed70519962ac9b704e91978ee14e049f1ff37bca2f579ce84d", size = 24702 }, +] + [[package]] name = "mypy" version = "1.14.1" @@ -2783,6 +3109,34 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/a9/82/0340caa499416c78e5d8f5f05947ae4bc3cba53c9f038ab6e9ed964e22f1/nbformat-5.10.4-py3-none-any.whl", hash = "sha256:3b48d6c8fbca4b299bf3982ea7db1af21580e4fec269ad087b9e81588891200b", size = 78454 }, ] +[[package]] +name = "nbmake" +version = "1.5.5" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "ipykernel" }, + { name = "nbclient" }, + { name = "nbformat" }, + { name = "pygments" }, + { name = "pytest" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/43/9a/aae201cee5639e1d562b3843af8fd9f8d018bb323e776a2b973bdd5fc64b/nbmake-1.5.5.tar.gz", hash = "sha256:239dc868ea13a7c049746e2aba2c229bd0f6cdbc6bfa1d22f4c88638aa4c5f5c", size = 85929 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/eb/be/b257e12f9710819fde40adc972578bee6b72c5992da1bc8369bef2597756/nbmake-1.5.5-py3-none-any.whl", hash = "sha256:c6fbe6e48b60cacac14af40b38bf338a3b88f47f085c54ac5b8639ff0babaf4b", size = 12818 }, +] + +[[package]] +name = "nbstripout" +version = "0.8.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "nbformat" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/92/6e/05d7e0e35598bd0d423167295f978005912a2dcd137c88ebf36e34047dc7/nbstripout-0.8.1.tar.gz", hash = "sha256:eaac8b6b4e729e8dfe1e5df2c0f8ba44abc5a17a65448f0480141f80be230bb1", size = 26399 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/cf/91/93b459c456b0e4389b2b3ddb3b82cd401d022691334a0f06e92c2046e780/nbstripout-0.8.1-py2.py3-none-any.whl", hash = "sha256:79a8c8da488d98c54c112fa87185045f0271a97d84f1d46918d6a3ee561b30e7", size = 16329 }, +] + [[package]] name = "nest-asyncio" version = "1.6.0" @@ -2813,18 +3167,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d1/d4/ba87f68af1ab26392dc64ef7ba7ee261bb2ab1df9a9cca1695b4454b8a16/networkx_stubs-0.0.1-py3-none-any.whl", hash = "sha256:ce58dff9b9dcbfdf895d0fce20fa8a73f2e82e581004596b7552086b5bb91366", size = 105780 }, ] -[[package]] -name = "notebook-shim" -version = "0.2.4" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "jupyter-server" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/54/d2/92fa3243712b9a3e8bafaf60aac366da1cada3639ca767ff4b5b3654ec28/notebook_shim-0.2.4.tar.gz", hash = "sha256:b4b2cfa1b65d98307ca24361f5b30fe785b53c3fd07b7a47e89acb5e6ac638cb", size = 13167 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/f9/33/bd5b9137445ea4b680023eb0469b2bb969d61303dedb2aac6560ff3d14a1/notebook_shim-0.2.4-py3-none-any.whl", hash = "sha256:411a5be4e9dc882a074ccbcae671eda64cceb068767e9a3419096986560e1cef", size = 13307 }, -] - [[package]] name = "numpy" version = "1.26.4" @@ -2945,7 +3287,6 @@ name = "nvidia-nccl-cu12" version = "2.20.5" source = { registry = "https://pypi.org/simple" } wheels = [ - { url = "https://files.pythonhosted.org/packages/c1/bb/d09dda47c881f9ff504afd6f9ca4f502ded6d8fc2f572cacc5e39da91c28/nvidia_nccl_cu12-2.20.5-py3-none-manylinux2014_aarch64.whl", hash = "sha256:1fc150d5c3250b170b29410ba682384b14581db722b2531b0d8d33c595f33d01", size = 176238458 }, { url = "https://files.pythonhosted.org/packages/4b/2a/0a131f572aa09f741c30ccd45a8e56316e8be8dfc7bc19bf0ab7cfef7b19/nvidia_nccl_cu12-2.20.5-py3-none-manylinux2014_x86_64.whl", hash = "sha256:057f6bf9685f75215d0c53bf3ac4a10b3e6578351de307abad9e18a99182af56", size = 176249402 }, ] @@ -2955,7 +3296,6 @@ version = "12.6.85" source = { registry = "https://pypi.org/simple" } wheels = [ { url = "https://files.pythonhosted.org/packages/9d/d7/c5383e47c7e9bf1c99d5bd2a8c935af2b6d705ad831a7ec5c97db4d82f4f/nvidia_nvjitlink_cu12-12.6.85-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl", hash = "sha256:eedc36df9e88b682efe4309aa16b5b4e78c2407eac59e8c10a6a47535164369a", size = 19744971 }, - { url = "https://files.pythonhosted.org/packages/31/db/dc71113d441f208cdfe7ae10d4983884e13f464a6252450693365e166dcf/nvidia_nvjitlink_cu12-12.6.85-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:cf4eaa7d4b6b543ffd69d6abfb11efdeb2db48270d94dfd3a452c24150829e41", size = 19270338 }, ] [[package]] @@ -3258,6 +3598,63 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/88/ef/eb23f262cca3c0c4eb7ab1933c3b1f03d021f2c48f54763065b6f0e321be/packaging-24.2-py3-none-any.whl", hash = "sha256:09abb1bccd265c01f4a3aa3f7a7db064b36514d2cba19a2f694fe6150451a759", size = 65451 }, ] +[[package]] +name = "paginate" +version = "0.5.7" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ec/46/68dde5b6bc00c1296ec6466ab27dddede6aec9af1b99090e1107091b3b84/paginate-0.5.7.tar.gz", hash = "sha256:22bd083ab41e1a8b4f3690544afb2c60c25e5c9a63a30fa2f483f6c60c8e5945", size = 19252 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/90/96/04b8e52da071d28f5e21a805b19cb9390aa17a47462ac87f5e2696b9566d/paginate-0.5.7-py2.py3-none-any.whl", hash = "sha256:b885e2af73abcf01d9559fd5216b57ef722f8c42affbb63942377668e35c7591", size = 13746 }, +] + +[[package]] +name = "pandas" +version = "2.2.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "numpy" }, + { name = "python-dateutil" }, + { name = "pytz" }, + { name = "tzdata" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/9c/d6/9f8431bacc2e19dca897724cd097b1bb224a6ad5433784a44b587c7c13af/pandas-2.2.3.tar.gz", hash = "sha256:4f18ba62b61d7e192368b84517265a99b4d7ee8912f8708660fb4a366cc82667", size = 4399213 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/aa/70/c853aec59839bceed032d52010ff5f1b8d87dc3114b762e4ba2727661a3b/pandas-2.2.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:1948ddde24197a0f7add2bdc4ca83bf2b1ef84a1bc8ccffd95eda17fd836ecb5", size = 12580827 }, + { url = "https://files.pythonhosted.org/packages/99/f2/c4527768739ffa4469b2b4fff05aa3768a478aed89a2f271a79a40eee984/pandas-2.2.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:381175499d3802cde0eabbaf6324cce0c4f5d52ca6f8c377c29ad442f50f6348", size = 11303897 }, + { url = "https://files.pythonhosted.org/packages/ed/12/86c1747ea27989d7a4064f806ce2bae2c6d575b950be087837bdfcabacc9/pandas-2.2.3-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d9c45366def9a3dd85a6454c0e7908f2b3b8e9c138f5dc38fed7ce720d8453ed", size = 66480908 }, + { url = "https://files.pythonhosted.org/packages/44/50/7db2cd5e6373ae796f0ddad3675268c8d59fb6076e66f0c339d61cea886b/pandas-2.2.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:86976a1c5b25ae3f8ccae3a5306e443569ee3c3faf444dfd0f41cda24667ad57", size = 13064210 }, + { url = "https://files.pythonhosted.org/packages/61/61/a89015a6d5536cb0d6c3ba02cebed51a95538cf83472975275e28ebf7d0c/pandas-2.2.3-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:b8661b0238a69d7aafe156b7fa86c44b881387509653fdf857bebc5e4008ad42", size = 16754292 }, + { url = "https://files.pythonhosted.org/packages/ce/0d/4cc7b69ce37fac07645a94e1d4b0880b15999494372c1523508511b09e40/pandas-2.2.3-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:37e0aced3e8f539eccf2e099f65cdb9c8aa85109b0be6e93e2baff94264bdc6f", size = 14416379 }, + { url = "https://files.pythonhosted.org/packages/31/9e/6ebb433de864a6cd45716af52a4d7a8c3c9aaf3a98368e61db9e69e69a9c/pandas-2.2.3-cp310-cp310-win_amd64.whl", hash = "sha256:56534ce0746a58afaf7942ba4863e0ef81c9c50d3f0ae93e9497d6a41a057645", size = 11598471 }, + { url = "https://files.pythonhosted.org/packages/a8/44/d9502bf0ed197ba9bf1103c9867d5904ddcaf869e52329787fc54ed70cc8/pandas-2.2.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:66108071e1b935240e74525006034333f98bcdb87ea116de573a6a0dccb6c039", size = 12602222 }, + { url = "https://files.pythonhosted.org/packages/52/11/9eac327a38834f162b8250aab32a6781339c69afe7574368fffe46387edf/pandas-2.2.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:7c2875855b0ff77b2a64a0365e24455d9990730d6431b9e0ee18ad8acee13dbd", size = 11321274 }, + { url = "https://files.pythonhosted.org/packages/45/fb/c4beeb084718598ba19aa9f5abbc8aed8b42f90930da861fcb1acdb54c3a/pandas-2.2.3-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:cd8d0c3be0515c12fed0bdbae072551c8b54b7192c7b1fda0ba56059a0179698", size = 15579836 }, + { url = "https://files.pythonhosted.org/packages/cd/5f/4dba1d39bb9c38d574a9a22548c540177f78ea47b32f99c0ff2ec499fac5/pandas-2.2.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c124333816c3a9b03fbeef3a9f230ba9a737e9e5bb4060aa2107a86cc0a497fc", size = 13058505 }, + { url = "https://files.pythonhosted.org/packages/b9/57/708135b90391995361636634df1f1130d03ba456e95bcf576fada459115a/pandas-2.2.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:63cc132e40a2e084cf01adf0775b15ac515ba905d7dcca47e9a251819c575ef3", size = 16744420 }, + { url = "https://files.pythonhosted.org/packages/86/4a/03ed6b7ee323cf30404265c284cee9c65c56a212e0a08d9ee06984ba2240/pandas-2.2.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:29401dbfa9ad77319367d36940cd8a0b3a11aba16063e39632d98b0e931ddf32", size = 14440457 }, + { url = "https://files.pythonhosted.org/packages/ed/8c/87ddf1fcb55d11f9f847e3c69bb1c6f8e46e2f40ab1a2d2abadb2401b007/pandas-2.2.3-cp311-cp311-win_amd64.whl", hash = "sha256:3fc6873a41186404dad67245896a6e440baacc92f5b716ccd1bc9ed2995ab2c5", size = 11617166 }, + { url = "https://files.pythonhosted.org/packages/17/a3/fb2734118db0af37ea7433f57f722c0a56687e14b14690edff0cdb4b7e58/pandas-2.2.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:b1d432e8d08679a40e2a6d8b2f9770a5c21793a6f9f47fdd52c5ce1948a5a8a9", size = 12529893 }, + { url = "https://files.pythonhosted.org/packages/e1/0c/ad295fd74bfac85358fd579e271cded3ac969de81f62dd0142c426b9da91/pandas-2.2.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a5a1595fe639f5988ba6a8e5bc9649af3baf26df3998a0abe56c02609392e0a4", size = 11363475 }, + { url = "https://files.pythonhosted.org/packages/c6/2a/4bba3f03f7d07207481fed47f5b35f556c7441acddc368ec43d6643c5777/pandas-2.2.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:5de54125a92bb4d1c051c0659e6fcb75256bf799a732a87184e5ea503965bce3", size = 15188645 }, + { url = "https://files.pythonhosted.org/packages/38/f8/d8fddee9ed0d0c0f4a2132c1dfcf0e3e53265055da8df952a53e7eaf178c/pandas-2.2.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fffb8ae78d8af97f849404f21411c95062db1496aeb3e56f146f0355c9989319", size = 12739445 }, + { url = "https://files.pythonhosted.org/packages/20/e8/45a05d9c39d2cea61ab175dbe6a2de1d05b679e8de2011da4ee190d7e748/pandas-2.2.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6dfcb5ee8d4d50c06a51c2fffa6cff6272098ad6540aed1a76d15fb9318194d8", size = 16359235 }, + { url = "https://files.pythonhosted.org/packages/1d/99/617d07a6a5e429ff90c90da64d428516605a1ec7d7bea494235e1c3882de/pandas-2.2.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:062309c1b9ea12a50e8ce661145c6aab431b1e99530d3cd60640e255778bd43a", size = 14056756 }, + { url = "https://files.pythonhosted.org/packages/29/d4/1244ab8edf173a10fd601f7e13b9566c1b525c4f365d6bee918e68381889/pandas-2.2.3-cp312-cp312-win_amd64.whl", hash = "sha256:59ef3764d0fe818125a5097d2ae867ca3fa64df032331b7e0917cf5d7bf66b13", size = 11504248 }, + { url = "https://files.pythonhosted.org/packages/64/22/3b8f4e0ed70644e85cfdcd57454686b9057c6c38d2f74fe4b8bc2527214a/pandas-2.2.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:f00d1345d84d8c86a63e476bb4955e46458b304b9575dcf71102b5c705320015", size = 12477643 }, + { url = "https://files.pythonhosted.org/packages/e4/93/b3f5d1838500e22c8d793625da672f3eec046b1a99257666c94446969282/pandas-2.2.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:3508d914817e153ad359d7e069d752cdd736a247c322d932eb89e6bc84217f28", size = 11281573 }, + { url = "https://files.pythonhosted.org/packages/f5/94/6c79b07f0e5aab1dcfa35a75f4817f5c4f677931d4234afcd75f0e6a66ca/pandas-2.2.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:22a9d949bfc9a502d320aa04e5d02feab689d61da4e7764b62c30b991c42c5f0", size = 15196085 }, + { url = "https://files.pythonhosted.org/packages/e8/31/aa8da88ca0eadbabd0a639788a6da13bb2ff6edbbb9f29aa786450a30a91/pandas-2.2.3-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f3a255b2c19987fbbe62a9dfd6cff7ff2aa9ccab3fc75218fd4b7530f01efa24", size = 12711809 }, + { url = "https://files.pythonhosted.org/packages/ee/7c/c6dbdb0cb2a4344cacfb8de1c5808ca885b2e4dcfde8008266608f9372af/pandas-2.2.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:800250ecdadb6d9c78eae4990da62743b857b470883fa27f652db8bdde7f6659", size = 16356316 }, + { url = "https://files.pythonhosted.org/packages/57/b7/8b757e7d92023b832869fa8881a992696a0bfe2e26f72c9ae9f255988d42/pandas-2.2.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:6374c452ff3ec675a8f46fd9ab25c4ad0ba590b71cf0656f8b6daa5202bca3fb", size = 14022055 }, + { url = "https://files.pythonhosted.org/packages/3b/bc/4b18e2b8c002572c5a441a64826252ce5da2aa738855747247a971988043/pandas-2.2.3-cp313-cp313-win_amd64.whl", hash = "sha256:61c5ad4043f791b61dd4752191d9f07f0ae412515d59ba8f005832a532f8736d", size = 11481175 }, + { url = "https://files.pythonhosted.org/packages/76/a3/a5d88146815e972d40d19247b2c162e88213ef51c7c25993942c39dbf41d/pandas-2.2.3-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:3b71f27954685ee685317063bf13c7709a7ba74fc996b84fc6821c59b0f06468", size = 12615650 }, + { url = "https://files.pythonhosted.org/packages/9c/8c/f0fd18f6140ddafc0c24122c8a964e48294acc579d47def376fef12bcb4a/pandas-2.2.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:38cf8125c40dae9d5acc10fa66af8ea6fdf760b2714ee482ca691fc66e6fcb18", size = 11290177 }, + { url = "https://files.pythonhosted.org/packages/ed/f9/e995754eab9c0f14c6777401f7eece0943840b7a9fc932221c19d1abee9f/pandas-2.2.3-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:ba96630bc17c875161df3818780af30e43be9b166ce51c9a18c1feae342906c2", size = 14651526 }, + { url = "https://files.pythonhosted.org/packages/25/b0/98d6ae2e1abac4f35230aa756005e8654649d305df9a28b16b9ae4353bff/pandas-2.2.3-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1db71525a1538b30142094edb9adc10be3f3e176748cd7acc2240c2f2e5aa3a4", size = 11871013 }, + { url = "https://files.pythonhosted.org/packages/cc/57/0f72a10f9db6a4628744c8e8f0df4e6e21de01212c7c981d31e50ffc8328/pandas-2.2.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:15c0e1e02e93116177d29ff83e8b1619c93ddc9c49083f237d4312337a61165d", size = 15711620 }, + { url = "https://files.pythonhosted.org/packages/ab/5f/b38085618b950b79d2d9164a711c52b10aefc0ae6833b96f626b7021b2ed/pandas-2.2.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:ad5b65698ab28ed8d7f18790a0dc58005c7629f227be9ecc1072aa74c0c1d43a", size = 13098436 }, +] + [[package]] name = "pandocfilters" version = "1.5.1" @@ -3308,69 +3705,61 @@ wheels = [ [[package]] name = "pillow" -version = "11.1.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/f3/af/c097e544e7bd278333db77933e535098c259609c4eb3b85381109602fb5b/pillow-11.1.0.tar.gz", hash = "sha256:368da70808b36d73b4b390a8ffac11069f8a5c85f29eff1f1b01bcf3ef5b2a20", size = 46742715 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/50/1c/2dcea34ac3d7bc96a1fd1bd0a6e06a57c67167fec2cff8d95d88229a8817/pillow-11.1.0-cp310-cp310-macosx_10_10_x86_64.whl", hash = "sha256:e1abe69aca89514737465752b4bcaf8016de61b3be1397a8fc260ba33321b3a8", size = 3229983 }, - { url = "https://files.pythonhosted.org/packages/14/ca/6bec3df25e4c88432681de94a3531cc738bd85dea6c7aa6ab6f81ad8bd11/pillow-11.1.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c640e5a06869c75994624551f45e5506e4256562ead981cce820d5ab39ae2192", size = 3101831 }, - { url = "https://files.pythonhosted.org/packages/d4/2c/668e18e5521e46eb9667b09e501d8e07049eb5bfe39d56be0724a43117e6/pillow-11.1.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a07dba04c5e22824816b2615ad7a7484432d7f540e6fa86af60d2de57b0fcee2", size = 4314074 }, - { url = "https://files.pythonhosted.org/packages/02/80/79f99b714f0fc25f6a8499ecfd1f810df12aec170ea1e32a4f75746051ce/pillow-11.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e267b0ed063341f3e60acd25c05200df4193e15a4a5807075cd71225a2386e26", size = 4394933 }, - { url = "https://files.pythonhosted.org/packages/81/aa/8d4ad25dc11fd10a2001d5b8a80fdc0e564ac33b293bdfe04ed387e0fd95/pillow-11.1.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:bd165131fd51697e22421d0e467997ad31621b74bfc0b75956608cb2906dda07", size = 4353349 }, - { url = "https://files.pythonhosted.org/packages/84/7a/cd0c3eaf4a28cb2a74bdd19129f7726277a7f30c4f8424cd27a62987d864/pillow-11.1.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:abc56501c3fd148d60659aae0af6ddc149660469082859fa7b066a298bde9482", size = 4476532 }, - { url = "https://files.pythonhosted.org/packages/8f/8b/a907fdd3ae8f01c7670dfb1499c53c28e217c338b47a813af8d815e7ce97/pillow-11.1.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:54ce1c9a16a9561b6d6d8cb30089ab1e5eb66918cb47d457bd996ef34182922e", size = 4279789 }, - { url = "https://files.pythonhosted.org/packages/6f/9a/9f139d9e8cccd661c3efbf6898967a9a337eb2e9be2b454ba0a09533100d/pillow-11.1.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:73ddde795ee9b06257dac5ad42fcb07f3b9b813f8c1f7f870f402f4dc54b5269", size = 4413131 }, - { url = "https://files.pythonhosted.org/packages/a8/68/0d8d461f42a3f37432203c8e6df94da10ac8081b6d35af1c203bf3111088/pillow-11.1.0-cp310-cp310-win32.whl", hash = "sha256:3a5fe20a7b66e8135d7fd617b13272626a28278d0e578c98720d9ba4b2439d49", size = 2291213 }, - { url = "https://files.pythonhosted.org/packages/14/81/d0dff759a74ba87715509af9f6cb21fa21d93b02b3316ed43bda83664db9/pillow-11.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:b6123aa4a59d75f06e9dd3dac5bf8bc9aa383121bb3dd9a7a612e05eabc9961a", size = 2625725 }, - { url = "https://files.pythonhosted.org/packages/ce/1f/8d50c096a1d58ef0584ddc37e6f602828515219e9d2428e14ce50f5ecad1/pillow-11.1.0-cp310-cp310-win_arm64.whl", hash = "sha256:a76da0a31da6fcae4210aa94fd779c65c75786bc9af06289cd1c184451ef7a65", size = 2375213 }, - { url = "https://files.pythonhosted.org/packages/dd/d6/2000bfd8d5414fb70cbbe52c8332f2283ff30ed66a9cde42716c8ecbe22c/pillow-11.1.0-cp311-cp311-macosx_10_10_x86_64.whl", hash = "sha256:e06695e0326d05b06833b40b7ef477e475d0b1ba3a6d27da1bb48c23209bf457", size = 3229968 }, - { url = "https://files.pythonhosted.org/packages/d9/45/3fe487010dd9ce0a06adf9b8ff4f273cc0a44536e234b0fad3532a42c15b/pillow-11.1.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:96f82000e12f23e4f29346e42702b6ed9a2f2fea34a740dd5ffffcc8c539eb35", size = 3101806 }, - { url = "https://files.pythonhosted.org/packages/e3/72/776b3629c47d9d5f1c160113158a7a7ad177688d3a1159cd3b62ded5a33a/pillow-11.1.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a3cd561ded2cf2bbae44d4605837221b987c216cff94f49dfeed63488bb228d2", size = 4322283 }, - { url = "https://files.pythonhosted.org/packages/e4/c2/e25199e7e4e71d64eeb869f5b72c7ddec70e0a87926398785ab944d92375/pillow-11.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f189805c8be5ca5add39e6f899e6ce2ed824e65fb45f3c28cb2841911da19070", size = 4402945 }, - { url = "https://files.pythonhosted.org/packages/c1/ed/51d6136c9d5911f78632b1b86c45241c712c5a80ed7fa7f9120a5dff1eba/pillow-11.1.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:dd0052e9db3474df30433f83a71b9b23bd9e4ef1de13d92df21a52c0303b8ab6", size = 4361228 }, - { url = "https://files.pythonhosted.org/packages/48/a4/fbfe9d5581d7b111b28f1d8c2762dee92e9821bb209af9fa83c940e507a0/pillow-11.1.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:837060a8599b8f5d402e97197d4924f05a2e0d68756998345c829c33186217b1", size = 4484021 }, - { url = "https://files.pythonhosted.org/packages/39/db/0b3c1a5018117f3c1d4df671fb8e47d08937f27519e8614bbe86153b65a5/pillow-11.1.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:aa8dd43daa836b9a8128dbe7d923423e5ad86f50a7a14dc688194b7be5c0dea2", size = 4287449 }, - { url = "https://files.pythonhosted.org/packages/d9/58/bc128da7fea8c89fc85e09f773c4901e95b5936000e6f303222490c052f3/pillow-11.1.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:0a2f91f8a8b367e7a57c6e91cd25af510168091fb89ec5146003e424e1558a96", size = 4419972 }, - { url = "https://files.pythonhosted.org/packages/5f/bb/58f34379bde9fe197f51841c5bbe8830c28bbb6d3801f16a83b8f2ad37df/pillow-11.1.0-cp311-cp311-win32.whl", hash = "sha256:c12fc111ef090845de2bb15009372175d76ac99969bdf31e2ce9b42e4b8cd88f", size = 2291201 }, - { url = "https://files.pythonhosted.org/packages/3a/c6/fce9255272bcf0c39e15abd2f8fd8429a954cf344469eaceb9d0d1366913/pillow-11.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:fbd43429d0d7ed6533b25fc993861b8fd512c42d04514a0dd6337fb3ccf22761", size = 2625686 }, - { url = "https://files.pythonhosted.org/packages/c8/52/8ba066d569d932365509054859f74f2a9abee273edcef5cd75e4bc3e831e/pillow-11.1.0-cp311-cp311-win_arm64.whl", hash = "sha256:f7955ecf5609dee9442cbface754f2c6e541d9e6eda87fad7f7a989b0bdb9d71", size = 2375194 }, - { url = "https://files.pythonhosted.org/packages/95/20/9ce6ed62c91c073fcaa23d216e68289e19d95fb8188b9fb7a63d36771db8/pillow-11.1.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:2062ffb1d36544d42fcaa277b069c88b01bb7298f4efa06731a7fd6cc290b81a", size = 3226818 }, - { url = "https://files.pythonhosted.org/packages/b9/d8/f6004d98579a2596c098d1e30d10b248798cceff82d2b77aa914875bfea1/pillow-11.1.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a85b653980faad27e88b141348707ceeef8a1186f75ecc600c395dcac19f385b", size = 3101662 }, - { url = "https://files.pythonhosted.org/packages/08/d9/892e705f90051c7a2574d9f24579c9e100c828700d78a63239676f960b74/pillow-11.1.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9409c080586d1f683df3f184f20e36fb647f2e0bc3988094d4fd8c9f4eb1b3b3", size = 4329317 }, - { url = "https://files.pythonhosted.org/packages/8c/aa/7f29711f26680eab0bcd3ecdd6d23ed6bce180d82e3f6380fb7ae35fcf3b/pillow-11.1.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7fdadc077553621911f27ce206ffcbec7d3f8d7b50e0da39f10997e8e2bb7f6a", size = 4412999 }, - { url = "https://files.pythonhosted.org/packages/c8/c4/8f0fe3b9e0f7196f6d0bbb151f9fba323d72a41da068610c4c960b16632a/pillow-11.1.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:93a18841d09bcdd774dcdc308e4537e1f867b3dec059c131fde0327899734aa1", size = 4368819 }, - { url = "https://files.pythonhosted.org/packages/38/0d/84200ed6a871ce386ddc82904bfadc0c6b28b0c0ec78176871a4679e40b3/pillow-11.1.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:9aa9aeddeed452b2f616ff5507459e7bab436916ccb10961c4a382cd3e03f47f", size = 4496081 }, - { url = "https://files.pythonhosted.org/packages/84/9c/9bcd66f714d7e25b64118e3952d52841a4babc6d97b6d28e2261c52045d4/pillow-11.1.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:3cdcdb0b896e981678eee140d882b70092dac83ac1cdf6b3a60e2216a73f2b91", size = 4296513 }, - { url = "https://files.pythonhosted.org/packages/db/61/ada2a226e22da011b45f7104c95ebda1b63dcbb0c378ad0f7c2a710f8fd2/pillow-11.1.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:36ba10b9cb413e7c7dfa3e189aba252deee0602c86c309799da5a74009ac7a1c", size = 4431298 }, - { url = "https://files.pythonhosted.org/packages/e7/c4/fc6e86750523f367923522014b821c11ebc5ad402e659d8c9d09b3c9d70c/pillow-11.1.0-cp312-cp312-win32.whl", hash = "sha256:cfd5cd998c2e36a862d0e27b2df63237e67273f2fc78f47445b14e73a810e7e6", size = 2291630 }, - { url = "https://files.pythonhosted.org/packages/08/5c/2104299949b9d504baf3f4d35f73dbd14ef31bbd1ddc2c1b66a5b7dfda44/pillow-11.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:a697cd8ba0383bba3d2d3ada02b34ed268cb548b369943cd349007730c92bddf", size = 2626369 }, - { url = "https://files.pythonhosted.org/packages/37/f3/9b18362206b244167c958984b57c7f70a0289bfb59a530dd8af5f699b910/pillow-11.1.0-cp312-cp312-win_arm64.whl", hash = "sha256:4dd43a78897793f60766563969442020e90eb7847463eca901e41ba186a7d4a5", size = 2375240 }, - { url = "https://files.pythonhosted.org/packages/b3/31/9ca79cafdce364fd5c980cd3416c20ce1bebd235b470d262f9d24d810184/pillow-11.1.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:ae98e14432d458fc3de11a77ccb3ae65ddce70f730e7c76140653048c71bfcbc", size = 3226640 }, - { url = "https://files.pythonhosted.org/packages/ac/0f/ff07ad45a1f172a497aa393b13a9d81a32e1477ef0e869d030e3c1532521/pillow-11.1.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:cc1331b6d5a6e144aeb5e626f4375f5b7ae9934ba620c0ac6b3e43d5e683a0f0", size = 3101437 }, - { url = "https://files.pythonhosted.org/packages/08/2f/9906fca87a68d29ec4530be1f893149e0cb64a86d1f9f70a7cfcdfe8ae44/pillow-11.1.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:758e9d4ef15d3560214cddbc97b8ef3ef86ce04d62ddac17ad39ba87e89bd3b1", size = 4326605 }, - { url = "https://files.pythonhosted.org/packages/b0/0f/f3547ee15b145bc5c8b336401b2d4c9d9da67da9dcb572d7c0d4103d2c69/pillow-11.1.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b523466b1a31d0dcef7c5be1f20b942919b62fd6e9a9be199d035509cbefc0ec", size = 4411173 }, - { url = "https://files.pythonhosted.org/packages/b1/df/bf8176aa5db515c5de584c5e00df9bab0713548fd780c82a86cba2c2fedb/pillow-11.1.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:9044b5e4f7083f209c4e35aa5dd54b1dd5b112b108648f5c902ad586d4f945c5", size = 4369145 }, - { url = "https://files.pythonhosted.org/packages/de/7c/7433122d1cfadc740f577cb55526fdc39129a648ac65ce64db2eb7209277/pillow-11.1.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:3764d53e09cdedd91bee65c2527815d315c6b90d7b8b79759cc48d7bf5d4f114", size = 4496340 }, - { url = "https://files.pythonhosted.org/packages/25/46/dd94b93ca6bd555588835f2504bd90c00d5438fe131cf01cfa0c5131a19d/pillow-11.1.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:31eba6bbdd27dde97b0174ddf0297d7a9c3a507a8a1480e1e60ef914fe23d352", size = 4296906 }, - { url = "https://files.pythonhosted.org/packages/a8/28/2f9d32014dfc7753e586db9add35b8a41b7a3b46540e965cb6d6bc607bd2/pillow-11.1.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:b5d658fbd9f0d6eea113aea286b21d3cd4d3fd978157cbf2447a6035916506d3", size = 4431759 }, - { url = "https://files.pythonhosted.org/packages/33/48/19c2cbe7403870fbe8b7737d19eb013f46299cdfe4501573367f6396c775/pillow-11.1.0-cp313-cp313-win32.whl", hash = "sha256:f86d3a7a9af5d826744fabf4afd15b9dfef44fe69a98541f666f66fbb8d3fef9", size = 2291657 }, - { url = "https://files.pythonhosted.org/packages/3b/ad/285c556747d34c399f332ba7c1a595ba245796ef3e22eae190f5364bb62b/pillow-11.1.0-cp313-cp313-win_amd64.whl", hash = "sha256:593c5fd6be85da83656b93ffcccc2312d2d149d251e98588b14fbc288fd8909c", size = 2626304 }, - { url = "https://files.pythonhosted.org/packages/e5/7b/ef35a71163bf36db06e9c8729608f78dedf032fc8313d19bd4be5c2588f3/pillow-11.1.0-cp313-cp313-win_arm64.whl", hash = "sha256:11633d58b6ee5733bde153a8dafd25e505ea3d32e261accd388827ee987baf65", size = 2375117 }, - { url = "https://files.pythonhosted.org/packages/79/30/77f54228401e84d6791354888549b45824ab0ffde659bafa67956303a09f/pillow-11.1.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:70ca5ef3b3b1c4a0812b5c63c57c23b63e53bc38e758b37a951e5bc466449861", size = 3230060 }, - { url = "https://files.pythonhosted.org/packages/ce/b1/56723b74b07dd64c1010fee011951ea9c35a43d8020acd03111f14298225/pillow-11.1.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:8000376f139d4d38d6851eb149b321a52bb8893a88dae8ee7d95840431977081", size = 3106192 }, - { url = "https://files.pythonhosted.org/packages/e1/cd/7bf7180e08f80a4dcc6b4c3a0aa9e0b0ae57168562726a05dc8aa8fa66b0/pillow-11.1.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9ee85f0696a17dd28fbcfceb59f9510aa71934b483d1f5601d1030c3c8304f3c", size = 4446805 }, - { url = "https://files.pythonhosted.org/packages/97/42/87c856ea30c8ed97e8efbe672b58c8304dee0573f8c7cab62ae9e31db6ae/pillow-11.1.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:dd0e081319328928531df7a0e63621caf67652c8464303fd102141b785ef9547", size = 4530623 }, - { url = "https://files.pythonhosted.org/packages/ff/41/026879e90c84a88e33fb00cc6bd915ac2743c67e87a18f80270dfe3c2041/pillow-11.1.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:e63e4e5081de46517099dc30abe418122f54531a6ae2ebc8680bcd7096860eab", size = 4465191 }, - { url = "https://files.pythonhosted.org/packages/e5/fb/a7960e838bc5df57a2ce23183bfd2290d97c33028b96bde332a9057834d3/pillow-11.1.0-cp313-cp313t-win32.whl", hash = "sha256:dda60aa465b861324e65a78c9f5cf0f4bc713e4309f83bc387be158b077963d9", size = 2295494 }, - { url = "https://files.pythonhosted.org/packages/d7/6c/6ec83ee2f6f0fda8d4cf89045c6be4b0373ebfc363ba8538f8c999f63fcd/pillow-11.1.0-cp313-cp313t-win_amd64.whl", hash = "sha256:ad5db5781c774ab9a9b2c4302bbf0c1014960a0a7be63278d13ae6fdf88126fe", size = 2631595 }, - { url = "https://files.pythonhosted.org/packages/cf/6c/41c21c6c8af92b9fea313aa47c75de49e2f9a467964ee33eb0135d47eb64/pillow-11.1.0-cp313-cp313t-win_arm64.whl", hash = "sha256:67cd427c68926108778a9005f2a04adbd5e67c442ed21d95389fe1d595458756", size = 2377651 }, - { url = "https://files.pythonhosted.org/packages/fa/c5/389961578fb677b8b3244fcd934f720ed25a148b9a5cc81c91bdf59d8588/pillow-11.1.0-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:8c730dc3a83e5ac137fbc92dfcfe1511ce3b2b5d7578315b63dbbb76f7f51d90", size = 3198345 }, - { url = "https://files.pythonhosted.org/packages/c4/fa/803c0e50ffee74d4b965229e816af55276eac1d5806712de86f9371858fd/pillow-11.1.0-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:7d33d2fae0e8b170b6a6c57400e077412240f6f5bb2a342cf1ee512a787942bb", size = 3072938 }, - { url = "https://files.pythonhosted.org/packages/dc/67/2a3a5f8012b5d8c63fe53958ba906c1b1d0482ebed5618057ef4d22f8076/pillow-11.1.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a8d65b38173085f24bc07f8b6c505cbb7418009fa1a1fcb111b1f4961814a442", size = 3400049 }, - { url = "https://files.pythonhosted.org/packages/e5/a0/514f0d317446c98c478d1872497eb92e7cde67003fed74f696441e647446/pillow-11.1.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:015c6e863faa4779251436db398ae75051469f7c903b043a48f078e437656f83", size = 3422431 }, - { url = "https://files.pythonhosted.org/packages/cd/00/20f40a935514037b7d3f87adfc87d2c538430ea625b63b3af8c3f5578e72/pillow-11.1.0-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:d44ff19eea13ae4acdaaab0179fa68c0c6f2f45d66a4d8ec1eda7d6cecbcc15f", size = 3446208 }, - { url = "https://files.pythonhosted.org/packages/28/3c/7de681727963043e093c72e6c3348411b0185eab3263100d4490234ba2f6/pillow-11.1.0-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:d3d8da4a631471dfaf94c10c85f5277b1f8e42ac42bade1ac67da4b4a7359b73", size = 3509746 }, - { url = "https://files.pythonhosted.org/packages/41/67/936f9814bdd74b2dfd4822f1f7725ab5d8ff4103919a1664eb4874c58b2f/pillow-11.1.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:4637b88343166249fe8aa94e7c4a62a180c4b3898283bb5d3d2fd5fe10d8e4e0", size = 2626353 }, +version = "10.4.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/cd/74/ad3d526f3bf7b6d3f408b73fde271ec69dfac8b81341a318ce825f2b3812/pillow-10.4.0.tar.gz", hash = "sha256:166c1cd4d24309b30d61f79f4a9114b7b2313d7450912277855ff5dfd7cd4a06", size = 46555059 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0e/69/a31cccd538ca0b5272be2a38347f8839b97a14be104ea08b0db92f749c74/pillow-10.4.0-cp310-cp310-macosx_10_10_x86_64.whl", hash = "sha256:4d9667937cfa347525b319ae34375c37b9ee6b525440f3ef48542fcf66f2731e", size = 3509271 }, + { url = "https://files.pythonhosted.org/packages/9a/9e/4143b907be8ea0bce215f2ae4f7480027473f8b61fcedfda9d851082a5d2/pillow-10.4.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:543f3dc61c18dafb755773efc89aae60d06b6596a63914107f75459cf984164d", size = 3375658 }, + { url = "https://files.pythonhosted.org/packages/8a/25/1fc45761955f9359b1169aa75e241551e74ac01a09f487adaaf4c3472d11/pillow-10.4.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7928ecbf1ece13956b95d9cbcfc77137652b02763ba384d9ab508099a2eca856", size = 4332075 }, + { url = "https://files.pythonhosted.org/packages/5e/dd/425b95d0151e1d6c951f45051112394f130df3da67363b6bc75dc4c27aba/pillow-10.4.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e4d49b85c4348ea0b31ea63bc75a9f3857869174e2bf17e7aba02945cd218e6f", size = 4444808 }, + { url = "https://files.pythonhosted.org/packages/b1/84/9a15cc5726cbbfe7f9f90bfb11f5d028586595907cd093815ca6644932e3/pillow-10.4.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:6c762a5b0997f5659a5ef2266abc1d8851ad7749ad9a6a5506eb23d314e4f46b", size = 4356290 }, + { url = "https://files.pythonhosted.org/packages/b5/5b/6651c288b08df3b8c1e2f8c1152201e0b25d240e22ddade0f1e242fc9fa0/pillow-10.4.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:a985e028fc183bf12a77a8bbf36318db4238a3ded7fa9df1b9a133f1cb79f8fc", size = 4525163 }, + { url = "https://files.pythonhosted.org/packages/07/8b/34854bf11a83c248505c8cb0fcf8d3d0b459a2246c8809b967963b6b12ae/pillow-10.4.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:812f7342b0eee081eaec84d91423d1b4650bb9828eb53d8511bcef8ce5aecf1e", size = 4463100 }, + { url = "https://files.pythonhosted.org/packages/78/63/0632aee4e82476d9cbe5200c0cdf9ba41ee04ed77887432845264d81116d/pillow-10.4.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:ac1452d2fbe4978c2eec89fb5a23b8387aba707ac72810d9490118817d9c0b46", size = 4592880 }, + { url = "https://files.pythonhosted.org/packages/df/56/b8663d7520671b4398b9d97e1ed9f583d4afcbefbda3c6188325e8c297bd/pillow-10.4.0-cp310-cp310-win32.whl", hash = "sha256:bcd5e41a859bf2e84fdc42f4edb7d9aba0a13d29a2abadccafad99de3feff984", size = 2235218 }, + { url = "https://files.pythonhosted.org/packages/f4/72/0203e94a91ddb4a9d5238434ae6c1ca10e610e8487036132ea9bf806ca2a/pillow-10.4.0-cp310-cp310-win_amd64.whl", hash = "sha256:ecd85a8d3e79cd7158dec1c9e5808e821feea088e2f69a974db5edf84dc53141", size = 2554487 }, + { url = "https://files.pythonhosted.org/packages/bd/52/7e7e93d7a6e4290543f17dc6f7d3af4bd0b3dd9926e2e8a35ac2282bc5f4/pillow-10.4.0-cp310-cp310-win_arm64.whl", hash = "sha256:ff337c552345e95702c5fde3158acb0625111017d0e5f24bf3acdb9cc16b90d1", size = 2243219 }, + { url = "https://files.pythonhosted.org/packages/a7/62/c9449f9c3043c37f73e7487ec4ef0c03eb9c9afc91a92b977a67b3c0bbc5/pillow-10.4.0-cp311-cp311-macosx_10_10_x86_64.whl", hash = "sha256:0a9ec697746f268507404647e531e92889890a087e03681a3606d9b920fbee3c", size = 3509265 }, + { url = "https://files.pythonhosted.org/packages/f4/5f/491dafc7bbf5a3cc1845dc0430872e8096eb9e2b6f8161509d124594ec2d/pillow-10.4.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:dfe91cb65544a1321e631e696759491ae04a2ea11d36715eca01ce07284738be", size = 3375655 }, + { url = "https://files.pythonhosted.org/packages/73/d5/c4011a76f4207a3c151134cd22a1415741e42fa5ddecec7c0182887deb3d/pillow-10.4.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5dc6761a6efc781e6a1544206f22c80c3af4c8cf461206d46a1e6006e4429ff3", size = 4340304 }, + { url = "https://files.pythonhosted.org/packages/ac/10/c67e20445a707f7a610699bba4fe050583b688d8cd2d202572b257f46600/pillow-10.4.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5e84b6cc6a4a3d76c153a6b19270b3526a5a8ed6b09501d3af891daa2a9de7d6", size = 4452804 }, + { url = "https://files.pythonhosted.org/packages/a9/83/6523837906d1da2b269dee787e31df3b0acb12e3d08f024965a3e7f64665/pillow-10.4.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:bbc527b519bd3aa9d7f429d152fea69f9ad37c95f0b02aebddff592688998abe", size = 4365126 }, + { url = "https://files.pythonhosted.org/packages/ba/e5/8c68ff608a4203085158cff5cc2a3c534ec384536d9438c405ed6370d080/pillow-10.4.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:76a911dfe51a36041f2e756b00f96ed84677cdeb75d25c767f296c1c1eda1319", size = 4533541 }, + { url = "https://files.pythonhosted.org/packages/f4/7c/01b8dbdca5bc6785573f4cee96e2358b0918b7b2c7b60d8b6f3abf87a070/pillow-10.4.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:59291fb29317122398786c2d44427bbd1a6d7ff54017075b22be9d21aa59bd8d", size = 4471616 }, + { url = "https://files.pythonhosted.org/packages/c8/57/2899b82394a35a0fbfd352e290945440e3b3785655a03365c0ca8279f351/pillow-10.4.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:416d3a5d0e8cfe4f27f574362435bc9bae57f679a7158e0096ad2beb427b8696", size = 4600802 }, + { url = "https://files.pythonhosted.org/packages/4d/d7/a44f193d4c26e58ee5d2d9db3d4854b2cfb5b5e08d360a5e03fe987c0086/pillow-10.4.0-cp311-cp311-win32.whl", hash = "sha256:7086cc1d5eebb91ad24ded9f58bec6c688e9f0ed7eb3dbbf1e4800280a896496", size = 2235213 }, + { url = "https://files.pythonhosted.org/packages/c1/d0/5866318eec2b801cdb8c82abf190c8343d8a1cd8bf5a0c17444a6f268291/pillow-10.4.0-cp311-cp311-win_amd64.whl", hash = "sha256:cbed61494057c0f83b83eb3a310f0bf774b09513307c434d4366ed64f4128a91", size = 2554498 }, + { url = "https://files.pythonhosted.org/packages/d4/c8/310ac16ac2b97e902d9eb438688de0d961660a87703ad1561fd3dfbd2aa0/pillow-10.4.0-cp311-cp311-win_arm64.whl", hash = "sha256:f5f0c3e969c8f12dd2bb7e0b15d5c468b51e5017e01e2e867335c81903046a22", size = 2243219 }, + { url = "https://files.pythonhosted.org/packages/05/cb/0353013dc30c02a8be34eb91d25e4e4cf594b59e5a55ea1128fde1e5f8ea/pillow-10.4.0-cp312-cp312-macosx_10_10_x86_64.whl", hash = "sha256:673655af3eadf4df6b5457033f086e90299fdd7a47983a13827acf7459c15d94", size = 3509350 }, + { url = "https://files.pythonhosted.org/packages/e7/cf/5c558a0f247e0bf9cec92bff9b46ae6474dd736f6d906315e60e4075f737/pillow-10.4.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:866b6942a92f56300012f5fbac71f2d610312ee65e22f1aa2609e491284e5597", size = 3374980 }, + { url = "https://files.pythonhosted.org/packages/84/48/6e394b86369a4eb68b8a1382c78dc092245af517385c086c5094e3b34428/pillow-10.4.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:29dbdc4207642ea6aad70fbde1a9338753d33fb23ed6956e706936706f52dd80", size = 4343799 }, + { url = "https://files.pythonhosted.org/packages/3b/f3/a8c6c11fa84b59b9df0cd5694492da8c039a24cd159f0f6918690105c3be/pillow-10.4.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bf2342ac639c4cf38799a44950bbc2dfcb685f052b9e262f446482afaf4bffca", size = 4459973 }, + { url = "https://files.pythonhosted.org/packages/7d/1b/c14b4197b80150fb64453585247e6fb2e1d93761fa0fa9cf63b102fde822/pillow-10.4.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:f5b92f4d70791b4a67157321c4e8225d60b119c5cc9aee8ecf153aace4aad4ef", size = 4370054 }, + { url = "https://files.pythonhosted.org/packages/55/77/40daddf677897a923d5d33329acd52a2144d54a9644f2a5422c028c6bf2d/pillow-10.4.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:86dcb5a1eb778d8b25659d5e4341269e8590ad6b4e8b44d9f4b07f8d136c414a", size = 4539484 }, + { url = "https://files.pythonhosted.org/packages/40/54/90de3e4256b1207300fb2b1d7168dd912a2fb4b2401e439ba23c2b2cabde/pillow-10.4.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:780c072c2e11c9b2c7ca37f9a2ee8ba66f44367ac3e5c7832afcfe5104fd6d1b", size = 4477375 }, + { url = "https://files.pythonhosted.org/packages/13/24/1bfba52f44193860918ff7c93d03d95e3f8748ca1de3ceaf11157a14cf16/pillow-10.4.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:37fb69d905be665f68f28a8bba3c6d3223c8efe1edf14cc4cfa06c241f8c81d9", size = 4608773 }, + { url = "https://files.pythonhosted.org/packages/55/04/5e6de6e6120451ec0c24516c41dbaf80cce1b6451f96561235ef2429da2e/pillow-10.4.0-cp312-cp312-win32.whl", hash = "sha256:7dfecdbad5c301d7b5bde160150b4db4c659cee2b69589705b6f8a0c509d9f42", size = 2235690 }, + { url = "https://files.pythonhosted.org/packages/74/0a/d4ce3c44bca8635bd29a2eab5aa181b654a734a29b263ca8efe013beea98/pillow-10.4.0-cp312-cp312-win_amd64.whl", hash = "sha256:1d846aea995ad352d4bdcc847535bd56e0fd88d36829d2c90be880ef1ee4668a", size = 2554951 }, + { url = "https://files.pythonhosted.org/packages/b5/ca/184349ee40f2e92439be9b3502ae6cfc43ac4b50bc4fc6b3de7957563894/pillow-10.4.0-cp312-cp312-win_arm64.whl", hash = "sha256:e553cad5179a66ba15bb18b353a19020e73a7921296a7979c4a2b7f6a5cd57f9", size = 2243427 }, + { url = "https://files.pythonhosted.org/packages/c3/00/706cebe7c2c12a6318aabe5d354836f54adff7156fd9e1bd6c89f4ba0e98/pillow-10.4.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:8bc1a764ed8c957a2e9cacf97c8b2b053b70307cf2996aafd70e91a082e70df3", size = 3525685 }, + { url = "https://files.pythonhosted.org/packages/cf/76/f658cbfa49405e5ecbfb9ba42d07074ad9792031267e782d409fd8fe7c69/pillow-10.4.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:6209bb41dc692ddfee4942517c19ee81b86c864b626dbfca272ec0f7cff5d9fb", size = 3374883 }, + { url = "https://files.pythonhosted.org/packages/46/2b/99c28c4379a85e65378211971c0b430d9c7234b1ec4d59b2668f6299e011/pillow-10.4.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bee197b30783295d2eb680b311af15a20a8b24024a19c3a26431ff83eb8d1f70", size = 4339837 }, + { url = "https://files.pythonhosted.org/packages/f1/74/b1ec314f624c0c43711fdf0d8076f82d9d802afd58f1d62c2a86878e8615/pillow-10.4.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1ef61f5dd14c300786318482456481463b9d6b91ebe5ef12f405afbba77ed0be", size = 4455562 }, + { url = "https://files.pythonhosted.org/packages/4a/2a/4b04157cb7b9c74372fa867096a1607e6fedad93a44deeff553ccd307868/pillow-10.4.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:297e388da6e248c98bc4a02e018966af0c5f92dfacf5a5ca22fa01cb3179bca0", size = 4366761 }, + { url = "https://files.pythonhosted.org/packages/ac/7b/8f1d815c1a6a268fe90481232c98dd0e5fa8c75e341a75f060037bd5ceae/pillow-10.4.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:e4db64794ccdf6cb83a59d73405f63adbe2a1887012e308828596100a0b2f6cc", size = 4536767 }, + { url = "https://files.pythonhosted.org/packages/e5/77/05fa64d1f45d12c22c314e7b97398ffb28ef2813a485465017b7978b3ce7/pillow-10.4.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:bd2880a07482090a3bcb01f4265f1936a903d70bc740bfcb1fd4e8a2ffe5cf5a", size = 4477989 }, + { url = "https://files.pythonhosted.org/packages/12/63/b0397cfc2caae05c3fb2f4ed1b4fc4fc878f0243510a7a6034ca59726494/pillow-10.4.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:4b35b21b819ac1dbd1233317adeecd63495f6babf21b7b2512d244ff6c6ce309", size = 4610255 }, + { url = "https://files.pythonhosted.org/packages/7b/f9/cfaa5082ca9bc4a6de66ffe1c12c2d90bf09c309a5f52b27759a596900e7/pillow-10.4.0-cp313-cp313-win32.whl", hash = "sha256:551d3fd6e9dc15e4c1eb6fc4ba2b39c0c7933fa113b220057a34f4bb3268a060", size = 2235603 }, + { url = "https://files.pythonhosted.org/packages/01/6a/30ff0eef6e0c0e71e55ded56a38d4859bf9d3634a94a88743897b5f96936/pillow-10.4.0-cp313-cp313-win_amd64.whl", hash = "sha256:030abdbe43ee02e0de642aee345efa443740aa4d828bfe8e2eb11922ea6a21ea", size = 2554972 }, + { url = "https://files.pythonhosted.org/packages/48/2c/2e0a52890f269435eee38b21c8218e102c621fe8d8df8b9dd06fabf879ba/pillow-10.4.0-cp313-cp313-win_arm64.whl", hash = "sha256:5b001114dd152cfd6b23befeb28d7aee43553e2402c9f159807bf55f33af8a8d", size = 2243375 }, + { url = "https://files.pythonhosted.org/packages/38/30/095d4f55f3a053392f75e2eae45eba3228452783bab3d9a920b951ac495c/pillow-10.4.0-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:5b4815f2e65b30f5fbae9dfffa8636d992d49705723fe86a3661806e069352d4", size = 3493889 }, + { url = "https://files.pythonhosted.org/packages/f3/e8/4ff79788803a5fcd5dc35efdc9386af153569853767bff74540725b45863/pillow-10.4.0-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:8f0aef4ef59694b12cadee839e2ba6afeab89c0f39a3adc02ed51d109117b8da", size = 3346160 }, + { url = "https://files.pythonhosted.org/packages/d7/ac/4184edd511b14f760c73f5bb8a5d6fd85c591c8aff7c2229677a355c4179/pillow-10.4.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9f4727572e2918acaa9077c919cbbeb73bd2b3ebcfe033b72f858fc9fbef0026", size = 3435020 }, + { url = "https://files.pythonhosted.org/packages/da/21/1749cd09160149c0a246a81d646e05f35041619ce76f6493d6a96e8d1103/pillow-10.4.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ff25afb18123cea58a591ea0244b92eb1e61a1fd497bf6d6384f09bc3262ec3e", size = 3490539 }, + { url = "https://files.pythonhosted.org/packages/b6/f5/f71fe1888b96083b3f6dfa0709101f61fc9e972c0c8d04e9d93ccef2a045/pillow-10.4.0-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:dc3e2db6ba09ffd7d02ae9141cfa0ae23393ee7687248d46a7507b75d610f4f5", size = 3476125 }, + { url = "https://files.pythonhosted.org/packages/96/b9/c0362c54290a31866c3526848583a2f45a535aa9d725fd31e25d318c805f/pillow-10.4.0-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:02a2be69f9c9b8c1e97cf2713e789d4e398c751ecfd9967c18d0ce304efbf885", size = 3579373 }, + { url = "https://files.pythonhosted.org/packages/52/3b/ce7a01026a7cf46e5452afa86f97a5e88ca97f562cafa76570178ab56d8d/pillow-10.4.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:0755ffd4a0c6f267cccbae2e9903d95477ca2f77c4fcf3a3a09570001856c8a5", size = 2554661 }, ] [[package]] @@ -3400,20 +3789,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/88/5f/e351af9a41f866ac3f1fac4ca0613908d9a41741cfcf2228f4ad853b697d/pluggy-1.5.0-py3-none-any.whl", hash = "sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669", size = 20556 }, ] -[[package]] -name = "plum-dispatch" -version = "2.5.7" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "beartype" }, - { name = "rich" }, - { name = "typing-extensions" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/f4/46/ab3928e864b0a88a8ae6987b3da3b7ae32fe0a610264f33272139275dab5/plum_dispatch-2.5.7.tar.gz", hash = "sha256:a7908ad5563b93f387e3817eb0412ad40cfbad04bc61d869cf7a76cd58a3895d", size = 35452 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/2b/31/21609a9be48e877bc33b089a7f495c853215def5aeb9564a31c210d9d769/plum_dispatch-2.5.7-py3-none-any.whl", hash = "sha256:06471782eea0b3798c1e79dca2af2165bafcfa5eb595540b514ddd81053b1ede", size = 42612 }, -] - [[package]] name = "poethepoet" version = "0.32.1" @@ -3445,12 +3820,30 @@ wheels = [ ] [[package]] -name = "prometheus-client" -version = "0.21.1" +name = "preshed" +version = "3.0.9" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/62/14/7d0f567991f3a9af8d1cd4f619040c93b68f09a02b6d0b6ab1b2d1ded5fe/prometheus_client-0.21.1.tar.gz", hash = "sha256:252505a722ac04b0456be05c05f75f45d760c2911ffc45f2a06bcaed9f3ae3fb", size = 78551 } +dependencies = [ + { name = "cymem" }, + { name = "murmurhash" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/f2/4e/76dbf784e7d4ed069f91a4c249b1d6ec6856ef0c0b2fd96992895d458b15/preshed-3.0.9.tar.gz", hash = "sha256:721863c5244ffcd2651ad0928951a2c7c77b102f4e11a251ad85d37ee7621660", size = 14478 } wheels = [ - { url = "https://files.pythonhosted.org/packages/ff/c2/ab7d37426c179ceb9aeb109a85cda8948bb269b7561a0be870cc656eefe4/prometheus_client-0.21.1-py3-none-any.whl", hash = "sha256:594b45c410d6f4f8888940fe80b5cc2521b305a1fafe1c58609ef715a001f301", size = 54682 }, + { url = "https://files.pythonhosted.org/packages/38/7f/a7d3eeaee67ecebbe51866c1aae6310e34cefa0a64821aed963a0a167b51/preshed-3.0.9-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:4f96ef4caf9847b2bb9868574dcbe2496f974e41c2b83d6621c24fb4c3fc57e3", size = 132225 }, + { url = "https://files.pythonhosted.org/packages/61/4e/f251271ee9f0e0eb0ebe219a8df57ff8511a3b7a83e79e24d37105034164/preshed-3.0.9-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:a61302cf8bd30568631adcdaf9e6b21d40491bd89ba8ebf67324f98b6c2a2c05", size = 127791 }, + { url = "https://files.pythonhosted.org/packages/eb/8b/6c8a153ea39b4750c20ed48dd9be4bf9d8c0b4e7822fc63c68cd2891703d/preshed-3.0.9-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:99499e8a58f58949d3f591295a97bca4e197066049c96f5d34944dd21a497193", size = 150279 }, + { url = "https://files.pythonhosted.org/packages/42/59/8f65ad22c13020ff281529e415c32a56cfa691d24b0eca2eb3d756e4d644/preshed-3.0.9-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ea6b6566997dc3acd8c6ee11a89539ac85c77275b4dcefb2dc746d11053a5af8", size = 156914 }, + { url = "https://files.pythonhosted.org/packages/f3/72/108426ca3b6e7f16db30b3b9396e3fa45a3fd5a76f6532ab04beada2e4e3/preshed-3.0.9-cp310-cp310-win_amd64.whl", hash = "sha256:bfd523085a84b1338ff18f61538e1cfcdedc4b9e76002589a301c364d19a2e36", size = 122224 }, + { url = "https://files.pythonhosted.org/packages/c0/1e/05fa559f53b635d96b233b63e93accb75215025b997486f7290991bec6c3/preshed-3.0.9-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:e7c2364da27f2875524ce1ca754dc071515a9ad26eb5def4c7e69129a13c9a59", size = 132972 }, + { url = "https://files.pythonhosted.org/packages/a8/b3/1a73ba16bab53043fd19dd0a7838ae05c705dccb329404dd4ad5925767f1/preshed-3.0.9-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:182138033c0730c683a6d97e567ceb8a3e83f3bff5704f300d582238dbd384b3", size = 128751 }, + { url = "https://files.pythonhosted.org/packages/2c/9a/919d3708f6fa98d9eab1a186e6b30ab25a4595907bbc1fea5c1e8faa9b9d/preshed-3.0.9-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:345a10be3b86bcc6c0591d343a6dc2bfd86aa6838c30ced4256dfcfa836c3a64", size = 150050 }, + { url = "https://files.pythonhosted.org/packages/db/69/d9ab108dc670b5be9e292bbd555f39e6eb0a4baab25cd28f792850d5e65b/preshed-3.0.9-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:51d0192274aa061699b284f9fd08416065348edbafd64840c3889617ee1609de", size = 157159 }, + { url = "https://files.pythonhosted.org/packages/e4/fc/78cdbdb79f5d6d45949e72c32445d6c060977ad50a1dcfc0392622165f7c/preshed-3.0.9-cp311-cp311-win_amd64.whl", hash = "sha256:96b857d7a62cbccc3845ac8c41fd23addf052821be4eb987f2eb0da3d8745aa1", size = 122323 }, + { url = "https://files.pythonhosted.org/packages/fe/7e/a41595876f644d8bd2c3d5422d7211e876b1848a8cc0c03cce33d9cd048a/preshed-3.0.9-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:b4fe6720012c62e6d550d6a5c1c7ad88cacef8388d186dad4bafea4140d9d198", size = 133196 }, + { url = "https://files.pythonhosted.org/packages/e7/68/1b4772ff3232e71b63a9206936eb1f75e976ebf4e4e24dc9b3ea7b68369b/preshed-3.0.9-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:e04f05758875be9751e483bd3c519c22b00d3b07f5a64441ec328bb9e3c03700", size = 128594 }, + { url = "https://files.pythonhosted.org/packages/f3/52/48eefe876a3841c5850bd955daf145d0e408567c8f46a997bce136dc259d/preshed-3.0.9-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4a55091d0e395f1fdb62ab43401bb9f8b46c7d7794d5b071813c29dc1ab22fd0", size = 149220 }, + { url = "https://files.pythonhosted.org/packages/55/ea/9e6c1a7b1d623f6340379290d603a3b8a71ce52a93f842fbf7547f7f1812/preshed-3.0.9-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7de8f5138bcac7870424e09684dc3dd33c8e30e81b269f6c9ede3d8c7bb8e257", size = 156809 }, + { url = "https://files.pythonhosted.org/packages/db/e4/d074efb7e8a8873d346d2fb8dd43e19b1eae0697351c0d79cff947cba46e/preshed-3.0.9-cp312-cp312-win_amd64.whl", hash = "sha256:24229c77364628743bc29c5620c5d6607ed104f0e02ae31f8a030f99a78a5ceb", size = 122428 }, ] [[package]] @@ -3726,6 +4119,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/8a/0b/9fcc47d19c48b59121088dd6da2488a49d5f72dacf8262e2790a1d2c7d15/pygments-2.19.1-py3-none-any.whl", hash = "sha256:9ea1544ad55cecf4b8242fab6dd35a93bbce657034b0611ee383099054ab6d8c", size = 1225293 }, ] +[[package]] +name = "pymdown-extensions" +version = "10.14.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "markdown" }, + { name = "pyyaml" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/7c/44/e6de2fdc880ad0ec7547ca2e087212be815efbc9a425a8d5ba9ede602cbb/pymdown_extensions-10.14.3.tar.gz", hash = "sha256:41e576ce3f5d650be59e900e4ceff231e0aed2a88cf30acaee41e02f063a061b", size = 846846 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/eb/f5/b9e2a42aa8f9e34d52d66de87941ecd236570c7ed2e87775ed23bbe4e224/pymdown_extensions-10.14.3-py3-none-any.whl", hash = "sha256:05e0bee73d64b9c71a4ae17c72abc2f700e8bc8403755a00580b49a4e9f189e9", size = 264467 }, +] + [[package]] name = "pymongo" version = "4.10.1" @@ -3882,12 +4288,12 @@ wheels = [ ] [[package]] -name = "python-json-logger" -version = "3.2.1" +name = "pytz" +version = "2025.1" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/e3/c4/358cd13daa1d912ef795010897a483ab2f0b41c9ea1b35235a8b2f7d15a7/python_json_logger-3.2.1.tar.gz", hash = "sha256:8eb0554ea17cb75b05d2848bc14fb02fbdbd9d6972120781b974380bfa162008", size = 16287 } +sdist = { url = "https://files.pythonhosted.org/packages/5f/57/df1c9157c8d5a05117e455d66fd7cf6dbc46974f832b1058ed4856785d8a/pytz-2025.1.tar.gz", hash = "sha256:c2db42be2a2518b28e65f9207c4d05e6ff547d1efa4086469ef855e4ab70178e", size = 319617 } wheels = [ - { url = "https://files.pythonhosted.org/packages/4b/72/2f30cf26664fcfa0bd8ec5ee62ec90c03bd485e4a294d92aabc76c5203a5/python_json_logger-3.2.1-py3-none-any.whl", hash = "sha256:cdc17047eb5374bd311e748b42f99d71223f3b0e186f4206cc5d52aefe85b090", size = 14924 }, + { url = "https://files.pythonhosted.org/packages/eb/38/ac33370d784287baa1c3d538978b5e2ea064d4c1b93ffbd12826c190dd10/pytz-2025.1-py2.py3-none-any.whl", hash = "sha256:89dd22dca55b46eac6eda23b2d72721bf1bdfef212645d81513ef5d03038de57", size = 507930 }, ] [[package]] @@ -3909,18 +4315,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/26/df/2b63e3e4f2df0224f8aaf6d131f54fe4e8c96400eb9df563e2aae2e1a1f9/pywin32-308-cp313-cp313-win_arm64.whl", hash = "sha256:ef313c46d4c18dfb82a2431e3051ac8f112ccee1a34f29c263c583c568db63cd", size = 7974986 }, ] -[[package]] -name = "pywinpty" -version = "2.0.14" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/f1/82/90f8750423cba4b9b6c842df227609fb60704482d7abf6dd47e2babc055a/pywinpty-2.0.14.tar.gz", hash = "sha256:18bd9529e4a5daf2d9719aa17788ba6013e594ae94c5a0c27e83df3278b0660e", size = 27769 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/07/09/56376af256eab8cc5f8982a3b138d387136eca27fa1a8a68660e8ed59e4b/pywinpty-2.0.14-cp310-none-win_amd64.whl", hash = "sha256:0b149c2918c7974f575ba79f5a4aad58bd859a52fa9eb1296cc22aa412aa411f", size = 1397115 }, - { url = "https://files.pythonhosted.org/packages/be/e2/af1a99c0432e4e58c9ac8e334ee191790ec9793d33559189b9d2069bdc1d/pywinpty-2.0.14-cp311-none-win_amd64.whl", hash = "sha256:cf2a43ac7065b3e0dc8510f8c1f13a75fb8fde805efa3b8cff7599a1ef497bc7", size = 1397223 }, - { url = "https://files.pythonhosted.org/packages/ad/79/759ae767a3b78d340446efd54dd1fe4f7dafa4fc7be96ed757e44bcdba54/pywinpty-2.0.14-cp312-none-win_amd64.whl", hash = "sha256:55dad362ef3e9408ade68fd173e4f9032b3ce08f68cfe7eacb2c263ea1179737", size = 1397207 }, - { url = "https://files.pythonhosted.org/packages/7d/34/b77b3c209bf2eaa6455390c8d5449241637f5957f41636a2204065d52bfa/pywinpty-2.0.14-cp313-none-win_amd64.whl", hash = "sha256:074fb988a56ec79ca90ed03a896d40707131897cefb8f76f926e3834227f2819", size = 1396698 }, -] - [[package]] name = "pyyaml" version = "6.0.2" @@ -3965,6 +4359,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/fa/de/02b54f42487e3d3c6efb3f89428677074ca7bf43aae402517bc7cca949f3/PyYAML-6.0.2-cp313-cp313-win_amd64.whl", hash = "sha256:8388ee1976c416731879ac16da0aff3f63b286ffdd57cdeb95f3f2e085687563", size = 156446 }, ] +[[package]] +name = "pyyaml-env-tag" +version = "0.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pyyaml" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/fb/8e/da1c6c58f751b70f8ceb1eb25bc25d524e8f14fe16edcce3f4e3ba08629c/pyyaml_env_tag-0.1.tar.gz", hash = "sha256:70092675bda14fdec33b31ba77e7543de9ddc88f2e5b99160396572d11525bdb", size = 5631 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5a/66/bbb1dd374f5c870f59c5bb1db0e18cbe7fa739415a24cbd95b2d1f5ae0c4/pyyaml_env_tag-0.1-py3-none-any.whl", hash = "sha256:af31106dec8a4d68c60207c1886031cbf839b68aa7abccdb19868200532c2069", size = 3911 }, +] + [[package]] name = "pyzmq" version = "26.2.0" @@ -4038,30 +4444,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ec/d2/3b2ab40f455a256cb6672186bea95cd97b459ce4594050132d71e76f0d6f/pyzmq-26.2.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:90412f2db8c02a3864cbfc67db0e3dcdbda336acf1c469526d3e869394fe001c", size = 550762 }, ] -[[package]] -name = "quartodoc" -version = "0.9.1" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "black" }, - { name = "click" }, - { name = "griffe" }, - { name = "importlib-metadata" }, - { name = "importlib-resources" }, - { name = "plum-dispatch" }, - { name = "pydantic" }, - { name = "pyyaml" }, - { name = "requests" }, - { name = "sphobjinv" }, - { name = "tabulate" }, - { name = "typing-extensions" }, - { name = "watchdog" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/f4/ad/1bdc6745d0d0527da0a2f30a235850873e98442a2c1afdd67ac7e164e0c4/quartodoc-0.9.1.tar.gz", hash = "sha256:418d9ff0f2d4481a87e926e3a3e5bc31efbe556c7f36912aa0a3f716c0eafa9c", size = 772465 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/0e/47/de4c28e2002653226de3e34c9a3e989619796e5f0fe771284c570a016cec/quartodoc-0.9.1-py3-none-any.whl", hash = "sha256:78345002f75f086be6e1b1ccf3b4e3ada2fa3affb8a8fc0dbdb37890a5da03fa", size = 84275 }, -] - [[package]] name = "referencing" version = "0.36.1" @@ -4185,27 +4567,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/3f/51/d4db610ef29373b879047326cbf6fa98b6c1969d6f6dc423279de2b1be2c/requests_toolbelt-1.0.0-py2.py3-none-any.whl", hash = "sha256:cccfdd665f0a24fcf4726e690f65639d272bb0637b9b92dfd91a5568ccf6bd06", size = 54481 }, ] -[[package]] -name = "rfc3339-validator" -version = "0.1.4" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "six" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/28/ea/a9387748e2d111c3c2b275ba970b735e04e15cdb1eb30693b6b5708c4dbd/rfc3339_validator-0.1.4.tar.gz", hash = "sha256:138a2abdf93304ad60530167e51d2dfb9549521a836871b88d7f4695d0022f6b", size = 5513 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/7b/44/4e421b96b67b2daff264473f7465db72fbdf36a07e05494f50300cc7b0c6/rfc3339_validator-0.1.4-py2.py3-none-any.whl", hash = "sha256:24f6ec1eda14ef823da9e36ec7113124b39c04d50a4d3d3a3c2859577e7791fa", size = 3490 }, -] - -[[package]] -name = "rfc3986-validator" -version = "0.1.1" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/da/88/f270de456dd7d11dcc808abfa291ecdd3f45ff44e3b549ffa01b126464d0/rfc3986_validator-0.1.1.tar.gz", hash = "sha256:3d44bde7921b3b9ec3ae4e3adca370438eccebc676456449b145d533b240d055", size = 6760 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/9e/51/17023c0f8f1869d8806b979a2bffa3f861f26a3f1a66b094288323fba52f/rfc3986_validator-0.1.1-py2.py3-none-any.whl", hash = "sha256:2f235c432ef459970b4306369336b9d5dbdda31b510ca1e327636e01f528bfa9", size = 4242 }, -] - [[package]] name = "rich" version = "13.9.4" @@ -4317,6 +4678,31 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/49/97/fa78e3d2f65c02c8e1268b9aba606569fe97f6c8f7c2d74394553347c145/rsa-4.9-py3-none-any.whl", hash = "sha256:90260d9058e514786967344d0ef75fa8727eed8a7d2e43ce9f4bcf1b536174f7", size = 34315 }, ] +[[package]] +name = "ruff" +version = "0.9.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/c0/17/529e78f49fc6f8076f50d985edd9a2cf011d1dbadb1cdeacc1d12afc1d26/ruff-0.9.4.tar.gz", hash = "sha256:6907ee3529244bb0ed066683e075f09285b38dd5b4039370df6ff06041ca19e7", size = 3599458 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b6/f8/3fafb7804d82e0699a122101b5bee5f0d6e17c3a806dcbc527bb7d3f5b7a/ruff-0.9.4-py3-none-linux_armv6l.whl", hash = "sha256:64e73d25b954f71ff100bb70f39f1ee09e880728efb4250c632ceed4e4cdf706", size = 11668400 }, + { url = "https://files.pythonhosted.org/packages/2e/a6/2efa772d335da48a70ab2c6bb41a096c8517ca43c086ea672d51079e3d1f/ruff-0.9.4-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:6ce6743ed64d9afab4fafeaea70d3631b4d4b28b592db21a5c2d1f0ef52934bf", size = 11628395 }, + { url = "https://files.pythonhosted.org/packages/dc/d7/cd822437561082f1c9d7225cc0d0fbb4bad117ad7ac3c41cd5d7f0fa948c/ruff-0.9.4-py3-none-macosx_11_0_arm64.whl", hash = "sha256:54499fb08408e32b57360f6f9de7157a5fec24ad79cb3f42ef2c3f3f728dfe2b", size = 11090052 }, + { url = "https://files.pythonhosted.org/packages/9e/67/3660d58e893d470abb9a13f679223368ff1684a4ef40f254a0157f51b448/ruff-0.9.4-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:37c892540108314a6f01f105040b5106aeb829fa5fb0561d2dcaf71485021137", size = 11882221 }, + { url = "https://files.pythonhosted.org/packages/79/d1/757559995c8ba5f14dfec4459ef2dd3fcea82ac43bc4e7c7bf47484180c0/ruff-0.9.4-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:de9edf2ce4b9ddf43fd93e20ef635a900e25f622f87ed6e3047a664d0e8f810e", size = 11424862 }, + { url = "https://files.pythonhosted.org/packages/c0/96/7915a7c6877bb734caa6a2af424045baf6419f685632469643dbd8eb2958/ruff-0.9.4-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:87c90c32357c74f11deb7fbb065126d91771b207bf9bfaaee01277ca59b574ec", size = 12626735 }, + { url = "https://files.pythonhosted.org/packages/0e/cc/dadb9b35473d7cb17c7ffe4737b4377aeec519a446ee8514123ff4a26091/ruff-0.9.4-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:56acd6c694da3695a7461cc55775f3a409c3815ac467279dfa126061d84b314b", size = 13255976 }, + { url = "https://files.pythonhosted.org/packages/5f/c3/ad2dd59d3cabbc12df308cced780f9c14367f0321e7800ca0fe52849da4c/ruff-0.9.4-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e0c93e7d47ed951b9394cf352d6695b31498e68fd5782d6cbc282425655f687a", size = 12752262 }, + { url = "https://files.pythonhosted.org/packages/c7/17/5f1971e54bd71604da6788efd84d66d789362b1105e17e5ccc53bba0289b/ruff-0.9.4-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1d4c8772670aecf037d1bf7a07c39106574d143b26cfe5ed1787d2f31e800214", size = 14401648 }, + { url = "https://files.pythonhosted.org/packages/30/24/6200b13ea611b83260501b6955b764bb320e23b2b75884c60ee7d3f0b68e/ruff-0.9.4-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bfc5f1d7afeda8d5d37660eeca6d389b142d7f2b5a1ab659d9214ebd0e025231", size = 12414702 }, + { url = "https://files.pythonhosted.org/packages/34/cb/f5d50d0c4ecdcc7670e348bd0b11878154bc4617f3fdd1e8ad5297c0d0ba/ruff-0.9.4-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:faa935fc00ae854d8b638c16a5f1ce881bc3f67446957dd6f2af440a5fc8526b", size = 11859608 }, + { url = "https://files.pythonhosted.org/packages/d6/f4/9c8499ae8426da48363bbb78d081b817b0f64a9305f9b7f87eab2a8fb2c1/ruff-0.9.4-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:a6c634fc6f5a0ceae1ab3e13c58183978185d131a29c425e4eaa9f40afe1e6d6", size = 11485702 }, + { url = "https://files.pythonhosted.org/packages/18/59/30490e483e804ccaa8147dd78c52e44ff96e1c30b5a95d69a63163cdb15b/ruff-0.9.4-py3-none-musllinux_1_2_i686.whl", hash = "sha256:433dedf6ddfdec7f1ac7575ec1eb9844fa60c4c8c2f8887a070672b8d353d34c", size = 12067782 }, + { url = "https://files.pythonhosted.org/packages/3d/8c/893fa9551760b2f8eb2a351b603e96f15af167ceaf27e27ad873570bc04c/ruff-0.9.4-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:d612dbd0f3a919a8cc1d12037168bfa536862066808960e0cc901404b77968f0", size = 12483087 }, + { url = "https://files.pythonhosted.org/packages/23/15/f6751c07c21ca10e3f4a51ea495ca975ad936d780c347d9808bcedbd7182/ruff-0.9.4-py3-none-win32.whl", hash = "sha256:db1192ddda2200671f9ef61d9597fcef89d934f5d1705e571a93a67fb13a4402", size = 9852302 }, + { url = "https://files.pythonhosted.org/packages/12/41/2d2d2c6a72e62566f730e49254f602dfed23019c33b5b21ea8f8917315a1/ruff-0.9.4-py3-none-win_amd64.whl", hash = "sha256:05bebf4cdbe3ef75430d26c375773978950bbf4ee3c95ccb5448940dc092408e", size = 10850051 }, + { url = "https://files.pythonhosted.org/packages/c6/e6/3d6ec3bc3d254e7f005c543a661a41c3e788976d0e52a1ada195bd664344/ruff-0.9.4-py3-none-win_arm64.whl", hash = "sha256:585792f1e81509e38ac5123492f8875fbc36f3ede8185af0a26df348e5154f41", size = 10078251 }, +] + [[package]] name = "safetensors" version = "0.5.1" @@ -4427,15 +4813,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/79/df/989b2fd3f8ead6bcf89fc683fde94741eb3b291e41a3ce70cec08c80aa36/scipy-1.15.0-cp313-cp313t-win_amd64.whl", hash = "sha256:129f899ed275c0515d553b8d31696924e2ca87d1972421e46c376b9eb87de3d2", size = 43188844 }, ] -[[package]] -name = "send2trash" -version = "1.8.3" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/fd/3a/aec9b02217bb79b87bbc1a21bc6abc51e3d5dcf65c30487ac96c0908c722/Send2Trash-1.8.3.tar.gz", hash = "sha256:b18e7a3966d99871aefeb00cfbcfdced55ce4871194810fc71f4aa484b953abf", size = 17394 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/40/b0/4562db6223154aa4e22f939003cb92514c79f3d4dccca3444253fd17f902/Send2Trash-1.8.3-py3-none-any.whl", hash = "sha256:0c31227e0bd08961c7665474a3d1ef7193929fedda4233843689baa056be46c9", size = 18072 }, -] - [[package]] name = "sentence-transformers" version = "3.3.1" @@ -4589,6 +4966,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b7/ce/149a00dd41f10bc29e5921b496af8b574d8413afcd5e30dfa0ed46c2cc5e/six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274", size = 11050 }, ] +[[package]] +name = "smart-open" +version = "7.1.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "wrapt" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/21/30/1f41c3d3b8cec82024b4b277bfd4e5b18b765ae7279eb9871fa25c503778/smart_open-7.1.0.tar.gz", hash = "sha256:a4f09f84f0f6d3637c6543aca7b5487438877a21360e7368ccf1f704789752ba", size = 72044 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7a/18/9a8d9f01957aa1f8bbc5676d54c2e33102d247e146c1a3679d3bd5cc2e3a/smart_open-7.1.0-py3-none-any.whl", hash = "sha256:4b8489bb6058196258bafe901730c7db0dcf4f083f316e97269c66f45502055b", size = 61746 }, +] + [[package]] name = "sniffio" version = "1.3.1" @@ -4608,17 +4997,64 @@ wheels = [ ] [[package]] -name = "sphobjinv" -version = "2.3.1.2" +name = "spacy" +version = "3.8.4" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "attrs" }, - { name = "certifi" }, - { name = "jsonschema" }, + { name = "catalogue" }, + { name = "cymem" }, + { name = "jinja2" }, + { name = "langcodes" }, + { name = "murmurhash" }, + { name = "numpy" }, + { name = "packaging" }, + { name = "preshed" }, + { name = "pydantic" }, + { name = "requests" }, + { name = "setuptools" }, + { name = "spacy-legacy" }, + { name = "spacy-loggers" }, + { name = "srsly" }, + { name = "thinc" }, + { name = "tqdm" }, + { name = "typer" }, + { name = "wasabi" }, + { name = "weasel" }, +] +wheels = [ + { url = "https://files.pythonhosted.org/packages/e8/51/c0862063e8338a2cc769e787f0448c92a87ac87abfe2987ecc84d8246f51/spacy-3.8.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:bbe074f58ac21156dab821b5a145f72c2a50933098bc371ab5a63f47e8ac5fc6", size = 6611302 }, + { url = "https://files.pythonhosted.org/packages/0f/8c/9e565966f016371bff7d020eac41248085c3ebbc7b50c565d45e0f8a439c/spacy-3.8.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:d0474188edcdad2be647df04cfdc4351dea877caf41bd8c93b36e3b9124794c7", size = 6306323 }, + { url = "https://files.pythonhosted.org/packages/0f/24/599a37ee61458471a1f812a4970c082f7d86089ff2ea8517ffeac0dba86d/spacy-3.8.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:94c589ba39dbd035a7205c05f6efa2b60bdd1faae0f5fc9c0d657eb903b885ac", size = 29152257 }, + { url = "https://files.pythonhosted.org/packages/5f/3d/9364f30a7e768270adf3480fe8e198f425e2c9033aa24aa12f845fdaad3c/spacy-3.8.4-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:17a758d53761b54c9500aca34cd27399730848dce2b74edf76a0893b5c711ed3", size = 29913582 }, + { url = "https://files.pythonhosted.org/packages/1b/b9/31e2d2a3357fbb75aa9cb2dc5ff720077c4af6962034a9a3d8ac8550bc43/spacy-3.8.4-cp310-cp310-win_amd64.whl", hash = "sha256:e34003ab0c8e447ec40f74f03b95fa018e8c81ef2fc067a2e8398edd96a6ab0a", size = 12206276 }, + { url = "https://files.pythonhosted.org/packages/4c/d9/439aed2e686b30aef877dea6181852810ae9dc2fcbc9003e9eedb543e463/spacy-3.8.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:276ef77ce197a1eb73f86e6b6f527886cfd8211f38688eb3c246e07e51f8ae22", size = 6574422 }, + { url = "https://files.pythonhosted.org/packages/90/95/0823540c856b61193cb2d0c8feb863d5130e1571c79140317004ad786612/spacy-3.8.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:1080908078a78299f8924e5df6c61e9f9db2f6930ed945e91b048e9af41d58a1", size = 6265045 }, + { url = "https://files.pythonhosted.org/packages/f9/36/4f95922a22c32bd6fdda50ae5780c55b72d75ff76fd94cafa24950601330/spacy-3.8.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4540e4599df47e2d7525b8da1515d29da72db339ba8553b2f8d30842179806ea", size = 30570081 }, + { url = "https://files.pythonhosted.org/packages/d9/12/fc55ad34ef246f33951ecec0be86ba906830dbd443c763e90c6e19f51e1b/spacy-3.8.4-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:e6b04dce1b52c9a890c651cc50a38349ffaf93f03740059653c124cbf2a3187d", size = 31112610 }, + { url = "https://files.pythonhosted.org/packages/ca/45/7b43e89b30fe73e32fd8b8ab80c407d326761530a88abd823ec8623772a6/spacy-3.8.4-cp311-cp311-win_amd64.whl", hash = "sha256:5837d60329d2da90008ccb24e65b03fb53d6729918ed6fda9d15868d36961c00", size = 12201808 }, + { url = "https://files.pythonhosted.org/packages/bf/0b/a610dcf958c349b361bf36ca25208db4dd25b5d82a00670161fae56970c6/spacy-3.8.4-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:381e482cde64a4346cd5a33e3f30881def405ad97c609620b01f694ef77f4f68", size = 6293285 }, + { url = "https://files.pythonhosted.org/packages/64/20/1db5a7384dba9e979b912dae57a8fb8ab986e3859da60ab1cb086c430dbe/spacy-3.8.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:e5ef210fef16bf95cbb2db381a53089e53aac240d79a401ec67a3f900f3ca538", size = 5978267 }, + { url = "https://files.pythonhosted.org/packages/f2/cb/b565f6e04fb9b2d19c3de105dec659c6d98cba17bfd371e455c11c206040/spacy-3.8.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:96e70f3089f62c661895433991d1818678aa23ecd92c78eac3190892b3840787", size = 31815281 }, + { url = "https://files.pythonhosted.org/packages/d0/e4/ede5b6741d1e180ab52efdcfede0abf81e69429c0ce5616c05a066f588da/spacy-3.8.4-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:10b63bd107230f214283c12b0e38c64978deb64f9fa4ed5538ec4951a019e629", size = 32106479 }, + { url = "https://files.pythonhosted.org/packages/73/94/d9a904e86021fdfa2d403a55e277c58911979aa894114abbf300ee8c1bfd/spacy-3.8.4-cp312-cp312-win_amd64.whl", hash = "sha256:3eba8fb4388f3e21e9a3af14462422ea432a8aa37327886de7603654267a1b2b", size = 11787648 }, +] + +[[package]] +name = "spacy-legacy" +version = "3.0.12" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d9/79/91f9d7cc8db5642acad830dcc4b49ba65a7790152832c4eceb305e46d681/spacy-legacy-3.0.12.tar.gz", hash = "sha256:b37d6e0c9b6e1d7ca1cf5bc7152ab64a4c4671f59c85adaf7a3fcb870357a774", size = 23806 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c3/55/12e842c70ff8828e34e543a2c7176dac4da006ca6901c9e8b43efab8bc6b/spacy_legacy-3.0.12-py2.py3-none-any.whl", hash = "sha256:476e3bd0d05f8c339ed60f40986c07387c0a71479245d6d0f4298dbd52cda55f", size = 29971 }, ] -sdist = { url = "https://files.pythonhosted.org/packages/8c/5e/cf2c54fcc6cc999f64d3a34edeee1ca69d6e1f8aee40772c5f3f036296dc/sphobjinv-2.3.1.2.tar.gz", hash = "sha256:1c874a368460851352d45c92afa172b5c42653b47e2e70d78d61dac93308aa9d", size = 268026 } + +[[package]] +name = "spacy-loggers" +version = "1.0.5" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/67/3d/926db774c9c98acf66cb4ed7faf6c377746f3e00b84b700d0868b95d0712/spacy-loggers-1.0.5.tar.gz", hash = "sha256:d60b0bdbf915a60e516cc2e653baeff946f0cfc461b452d11a4d5458c6fe5f24", size = 20811 } wheels = [ - { url = "https://files.pythonhosted.org/packages/9c/4d/103e541e2533df159e1070cd4372b447a0b689e08a49d271b7b950e21f92/sphobjinv-2.3.1.2-py3-none-any.whl", hash = "sha256:66478d1787d28ef3ebeeedad57c592fdea04cf10eeed0df56307c85ab4eee789", size = 50820 }, + { url = "https://files.pythonhosted.org/packages/33/78/d1a1a026ef3af911159398c939b1509d5c36fe524c7b644f34a5146c4e16/spacy_loggers-1.0.5-py3-none-any.whl", hash = "sha256:196284c9c446cc0cdb944005384270d775fdeaf4f494d8e269466cfa497ef645", size = 22343 }, ] [[package]] @@ -4666,6 +5102,45 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b8/49/21633706dd6feb14cd3f7935fc00b60870ea057686035e1a99ae6d9d9d53/SQLAlchemy-2.0.36-py3-none-any.whl", hash = "sha256:fddbe92b4760c6f5d48162aef14824add991aeda8ddadb3c31d56eb15ca69f8e", size = 1883787 }, ] +[[package]] +name = "srsly" +version = "2.5.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "catalogue" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/b7/e8/eb51b1349f50bac0222398af0942613fdc9d1453ae67cbe4bf9936a1a54b/srsly-2.5.1.tar.gz", hash = "sha256:ab1b4bf6cf3e29da23dae0493dd1517fb787075206512351421b89b4fc27c77e", size = 466464 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/37/08/448bcc87bb93bc19fccf70c2f0f993ac42aa41d5f44a19c60d00186aea09/srsly-2.5.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d0cda6f65cc0dd1daf47e856b0d6c5d51db8a9343c5007723ca06903dcfe367d", size = 636045 }, + { url = "https://files.pythonhosted.org/packages/03/8a/379dd9014e56460e71346cf512632fb8cbc89aa6dfebe31dff21c9eb37ba/srsly-2.5.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:cf643e6f45c266cfacea54997a1f9cfe0113fadac1ac21a1ec5b200cfe477ba0", size = 634425 }, + { url = "https://files.pythonhosted.org/packages/95/69/46e672941b5f4403b0e2b14918d8e1393ca48e3338e2c01e549113261cdf/srsly-2.5.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:467ed25ddab09ca9404fda92519a317c803b5ea0849f846e74ba8b7843557df5", size = 1085032 }, + { url = "https://files.pythonhosted.org/packages/ce/d8/1039e663b87a06d2450148ebadc07eaf6f8b7dd7f7d5e2f4221050ce6702/srsly-2.5.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5f8113d202664b7d31025bdbe40b9d3536e8d7154d09520b6a1955818fa6d622", size = 1089469 }, + { url = "https://files.pythonhosted.org/packages/e9/62/f819ac665ecca2659343a6c79174c582fe292829f481899f05e7a7301988/srsly-2.5.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:794d39fccd2b333d24f1b445acc78daf90f3f37d3c0f6f0167f25c56961804e7", size = 1052673 }, + { url = "https://files.pythonhosted.org/packages/a8/69/321a41fe4d549b96dd010b6a77657e84eb181034f9d125e2feebcd8f2e5c/srsly-2.5.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:df7fd77457c4d6c630f700b1019a8ad173e411e7cf7cfdea70e5ed86b608083b", size = 1062650 }, + { url = "https://files.pythonhosted.org/packages/d5/b8/3dfed2db5c7ecf275aaddb775e2ae17c576b09c848873188fce91e410129/srsly-2.5.1-cp310-cp310-win_amd64.whl", hash = "sha256:1a4dddb2edb8f7974c9aa5ec46dc687a75215b3bbdc815ce3fc9ea68fe1e94b5", size = 632267 }, + { url = "https://files.pythonhosted.org/packages/df/9c/a248bb49de499fe0990e3cb0fb341c2373d8863ef9a8b5799353cade5731/srsly-2.5.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:58f0736794ce00a71d62a39cbba1d62ea8d5be4751df956e802d147da20ecad7", size = 635917 }, + { url = "https://files.pythonhosted.org/packages/41/47/1bdaad84502df973ecb8ca658117234cf7fb20e1dec60da71dce82de993f/srsly-2.5.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:7a8269c40859806d71920396d185f4f38dc985cdb6a28d3a326a701e29a5f629", size = 634374 }, + { url = "https://files.pythonhosted.org/packages/e5/2a/d73c71989fcf2a6d1fa518d75322aff4db01a8763f167f8c5e00aac11097/srsly-2.5.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:889905900401fefc1032e22b73aecbed8b4251aa363f632b2d1f86fc16f1ad8e", size = 1108390 }, + { url = "https://files.pythonhosted.org/packages/35/a3/9eda9997a8bd011caed18fdaa5ce606714eb06d8dab587ed0522b3e92ab1/srsly-2.5.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bf454755f22589df49c25dc799d8af7b47dce3d861dded35baf0f0b6ceab4422", size = 1110712 }, + { url = "https://files.pythonhosted.org/packages/8a/ef/4b50bc05d06349f905b27f824cc23b652098efd4be19aead3af4981df647/srsly-2.5.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:cc0607c8a59013a51dde5c1b4e465558728e9e0a35dcfa73c7cbefa91a0aad50", size = 1081244 }, + { url = "https://files.pythonhosted.org/packages/90/af/d4a2512d9a5048d2b18efead39d4c4404bddd4972935bbc68211292a736c/srsly-2.5.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:d5421ba3ab3c790e8b41939c51a1d0f44326bfc052d7a0508860fb79a47aee7f", size = 1091692 }, + { url = "https://files.pythonhosted.org/packages/bb/da/657a685f63028dcb00ccdc4ac125ed347c8bff6fa0dab6a9eb3dc45f3223/srsly-2.5.1-cp311-cp311-win_amd64.whl", hash = "sha256:b96ea5a9a0d0379a79c46d255464a372fb14c30f59a8bc113e4316d131a530ab", size = 632627 }, + { url = "https://files.pythonhosted.org/packages/fb/f6/bebc20d75bd02121fc0f65ad8c92a5dd2570e870005e940faa55a263e61a/srsly-2.5.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:683b54ed63d7dfee03bc2abc4b4a5f2152f81ec217bbadbac01ef1aaf2a75790", size = 636717 }, + { url = "https://files.pythonhosted.org/packages/b6/e8/9372317a4742c70b87b413335adfcdfb2bee4f88f3faba89fabb9e6abf21/srsly-2.5.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:459d987130e57e83ce9e160899afbeb871d975f811e6958158763dd9a8a20f23", size = 634697 }, + { url = "https://files.pythonhosted.org/packages/d5/00/c6a7b99ab27b051a27bd26fe1a8c1885225bb8980282bf9cb99f70610368/srsly-2.5.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:184e3c98389aab68ff04aab9095bd5f1a8e5a72cc5edcba9d733bac928f5cf9f", size = 1134655 }, + { url = "https://files.pythonhosted.org/packages/c2/e6/861459e8241ec3b78c111081bd5efa414ef85867e17c45b6882954468d6e/srsly-2.5.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:00c2a3e4856e63b7efd47591d049aaee8e5a250e098917f50d93ea68853fab78", size = 1143544 }, + { url = "https://files.pythonhosted.org/packages/2d/85/8448fe874dd2042a4eceea5315cfff3af03ac77ff5073812071852c4e7e2/srsly-2.5.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:366b4708933cd8d6025c13c2cea3331f079c7bb5c25ec76fca392b6fc09818a0", size = 1098330 }, + { url = "https://files.pythonhosted.org/packages/ef/7e/04d0e1417da140b2ac4053a3d4fcfc86cd59bf4829f69d370bb899f74d5d/srsly-2.5.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:c8a0b03c64eb6e150d772c5149befbadd981cc734ab13184b0561c17c8cef9b1", size = 1110670 }, + { url = "https://files.pythonhosted.org/packages/96/1a/a8cd627eaa81a91feb6ceab50155f4ceff3eef6107916cb87ef796958427/srsly-2.5.1-cp312-cp312-win_amd64.whl", hash = "sha256:7952538f6bba91b9d8bf31a642ac9e8b9ccc0ccbb309feb88518bfb84bb0dc0d", size = 632598 }, + { url = "https://files.pythonhosted.org/packages/42/94/cab36845aad6e2c22ecee1178accaa365657296ff87305b805648fd41118/srsly-2.5.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:84b372f7ef1604b4a5b3cee1571993931f845a5b58652ac01bcb32c52586d2a8", size = 634883 }, + { url = "https://files.pythonhosted.org/packages/67/8b/501f51f4eaee7e1fd7327764799cb0a42f5d0de042a97916d30dbff770fc/srsly-2.5.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:6ac3944c112acb3347a39bfdc2ebfc9e2d4bace20fe1c0b764374ac5b83519f2", size = 632842 }, + { url = "https://files.pythonhosted.org/packages/07/be/5b8fce4829661e070a7d3e262d2e533f0e297b11b8993d57240da67d7330/srsly-2.5.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6118f9c4b221cde0a990d06a42c8a4845218d55b425d8550746fe790acf267e9", size = 1118516 }, + { url = "https://files.pythonhosted.org/packages/91/60/a34e97564eac352c0e916c98f44b6f566b7eb6a9fb60bcd60ffa98530762/srsly-2.5.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7481460110d9986781d9e4ac0f5f991f1d6839284a80ad268625f9a23f686950", size = 1127974 }, + { url = "https://files.pythonhosted.org/packages/70/a2/f642334db0cabd187fa86b8773257ee6993c6009338a6831d4804e2c5b3c/srsly-2.5.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:6e57b8138082f09e35db60f99757e16652489e9e3692471d8e0c39aa95180688", size = 1086098 }, + { url = "https://files.pythonhosted.org/packages/0d/9b/be48e185c5a010e71b5135e4cdf317ff56b8ac4bc08f394bbf882ac13b05/srsly-2.5.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:bab90b85a63a1fe0bbc74d373c8bb9bb0499ddfa89075e0ebe8d670f12d04691", size = 1100354 }, + { url = "https://files.pythonhosted.org/packages/3a/e2/745aeba88a8513017fbac2fd2f9f07b8a36065e51695f818541eb795ec0c/srsly-2.5.1-cp313-cp313-win_amd64.whl", hash = "sha256:e73712be1634b5e1de6f81c273a7d47fe091ad3c79dc779c03d3416a5c117cee", size = 630634 }, +] + [[package]] name = "stack-data" version = "0.6.3" @@ -4692,6 +5167,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/96/00/2b325970b3060c7cecebab6d295afe763365822b1306a12eeab198f74323/starlette-0.41.3-py3-none-any.whl", hash = "sha256:44cedb2b7c77a9de33a8b74b2b90e9f50d11fcf25d8270ea525ad71a25374ff7", size = 73225 }, ] +[[package]] +name = "super-collections" +version = "0.5.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "hjson" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/b8/05/d1b50919a0d206d77255217d96dea9ab34bd1eb965a21559380c48f9517e/super_collections-0.5.3.tar.gz", hash = "sha256:94c1ec96c0a0d5e8e7d389ed8cde6882ac246940507c5e6b86e91945c2968d46", size = 10178 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/07/6d/58de58c521e7fb79bceb4da90d55250070bb4adfa3c870b82519a561c79d/super_collections-0.5.3-py3-none-any.whl", hash = "sha256:907d35b25dc4070910e8254bf2f5c928348af1cf8a1f1e8259e06c666e902cff", size = 8436 }, +] + [[package]] name = "sympy" version = "1.13.3" @@ -4735,17 +5222,12 @@ wheels = [ ] [[package]] -name = "terminado" -version = "0.18.1" +name = "termcolor" +version = "2.5.0" source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "ptyprocess", marker = "os_name != 'nt'" }, - { name = "pywinpty", marker = "os_name == 'nt'" }, - { name = "tornado" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/8a/11/965c6fd8e5cc254f1fe142d547387da17a8ebfd75a3455f637c663fb38a0/terminado-0.18.1.tar.gz", hash = "sha256:de09f2c4b85de4765f7714688fff57d3e75bad1f909b589fde880460c753fd2e", size = 32701 } +sdist = { url = "https://files.pythonhosted.org/packages/37/72/88311445fd44c455c7d553e61f95412cf89054308a1aa2434ab835075fc5/termcolor-2.5.0.tar.gz", hash = "sha256:998d8d27da6d48442e8e1f016119076b690d962507531df4890fcd2db2ef8a6f", size = 13057 } wheels = [ - { url = "https://files.pythonhosted.org/packages/6a/9e/2064975477fdc887e47ad42157e214526dcad8f317a948dee17e1659a62f/terminado-0.18.1-py3-none-any.whl", hash = "sha256:a4468e1b37bb318f8a86514f65814e1afc977cf29b3992a4500d9dd305dcceb0", size = 14154 }, + { url = "https://files.pythonhosted.org/packages/7f/be/df630c387a0a054815d60be6a97eb4e8f17385d5d6fe660e1c02750062b4/termcolor-2.5.0-py3-none-any.whl", hash = "sha256:37b17b5fc1e604945c2642c872a3764b5d547a48009871aea3edd3afa180afb8", size = 7755 }, ] [[package]] @@ -4764,6 +5246,43 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/3e/f8/6425ff800894784160290bcb9737878d910b6da6a08633bfe7f2ed8c9ae3/testcontainers-4.9.0-py3-none-any.whl", hash = "sha256:c6fee929990972c40bf6b91b7072c94064ff3649b405a14fde0274c8b2479d32", size = 105324 }, ] +[[package]] +name = "thinc" +version = "8.3.4" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "blis" }, + { name = "catalogue" }, + { name = "confection" }, + { name = "cymem" }, + { name = "murmurhash" }, + { name = "numpy" }, + { name = "packaging" }, + { name = "preshed" }, + { name = "pydantic" }, + { name = "setuptools" }, + { name = "srsly" }, + { name = "wasabi" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/b5/ff/60c9bcfe28e56c905aac8e61a838c7afe5dc3073c9beed0b63a26ace0bb7/thinc-8.3.4.tar.gz", hash = "sha256:b5925482498bbb6dca0771e375b35c915818f735891e93d93a662dab15f6ffd8", size = 193903 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f9/c8/13db2e346d2e199f679fc3f620da53af561ea74b43b38e5b4a0a79a12860/thinc-8.3.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:916ea79a7c7462664be9435679b7769b4fc1ecea3886db6da6118e4eb5cc8c8b", size = 843884 }, + { url = "https://files.pythonhosted.org/packages/ff/32/c25d68b5030f91c8506dfbba706f24b1cd1d0d4950cb0e3de17d176a5411/thinc-8.3.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:6c985ce9cf82a611f4f348c721372d073537ca0e8b7bbb8bd865c1598ddd79d1", size = 779384 }, + { url = "https://files.pythonhosted.org/packages/5d/5f/8a88959191f8c9f7eed61a7efec45f0222720c6318c09f9a058609810128/thinc-8.3.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2fff4b30f8513832d13a31486e9074a7020de3d48f8a3d1527e369c242d6ebe9", size = 3673814 }, + { url = "https://files.pythonhosted.org/packages/6f/4f/ea998b85cece6c2441a2416c795476776a5c11f7f2c7fb478a00d407d7f6/thinc-8.3.4-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:a9ee46d19b9f4cac13a5539f97978c857338a31e4bf8d9b3a7741dcbc792220f", size = 4685083 }, + { url = "https://files.pythonhosted.org/packages/0b/d0/295add6fcac8b633877a3a8d4b323e8cac4f4078f4f48910deb8c29666cb/thinc-8.3.4-cp310-cp310-win_amd64.whl", hash = "sha256:d08529d53f8652e15e4f3c0f6953e73f85cc71d3b6e4750d2d9ace23616dbe8f", size = 1492082 }, + { url = "https://files.pythonhosted.org/packages/85/47/68187c78a04cdc31cbd3ae393068f994b60476b5ecac6dfe7d04b124aacf/thinc-8.3.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:a8bb4b47358a1855803b375f4432cefdf373f46ef249b554418d2e77c7323040", size = 839320 }, + { url = "https://files.pythonhosted.org/packages/49/ea/066dd415e61fcef20083bbca41c2c02e640fea71326531f2619708efee1e/thinc-8.3.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:00ed92f9a34b9794f51fcd48467c863f4eb7c5b41559aef6ef3c980c21378fec", size = 774196 }, + { url = "https://files.pythonhosted.org/packages/8c/68/36c1a92a374891e0d496677c59f5f9fdc1e57bbb214c487bb8bb3e9290c2/thinc-8.3.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:85691fca84a6a1506f7ddbd2c1706a5524d56f65582e76b2e260a06d9e83e86d", size = 3922504 }, + { url = "https://files.pythonhosted.org/packages/ec/8a/48e463240a586e91f83c87660986e520aa91fbd839f6631ee9bc0fbb3cbd/thinc-8.3.4-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:eae1573fc19e514defc1bfd4f93f0b4bfc1dcefdb6d70bad1863825747f24800", size = 4932946 }, + { url = "https://files.pythonhosted.org/packages/d9/98/f910b8d8113ab9b955a68e9bbf0d5bd0e828f22dd6d3c226af6ec3970817/thinc-8.3.4-cp311-cp311-win_amd64.whl", hash = "sha256:81e8638f9bdc38e366674acc4b63cf7c6267266a15477963a5db21b3d9f1aa36", size = 1490133 }, + { url = "https://files.pythonhosted.org/packages/90/ff/d1b5d7e1a7f95581e9a736f50a5a9aff72327ddbbc629a68070c36acefd9/thinc-8.3.4-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:c9da6375b106df5186bd2bfd1273bc923c01ab7d482f8942e4ee528a28965c3a", size = 825099 }, + { url = "https://files.pythonhosted.org/packages/ce/0b/d207c917886dc40671361de0880ec3ea0443a718aae9dbb0a50ac0849f92/thinc-8.3.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:07091c6b5faace50857c4cf0982204969d77388d0a6f156dd2442297dceeb838", size = 761024 }, + { url = "https://files.pythonhosted.org/packages/4b/a3/3ec5e9d7cbebc3257b8223a3d188216b91ab6ec1e66b6fdd99d22394bc62/thinc-8.3.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fd40ad71bcd8b1b9daa0462e1255b1c1e86e901c2fd773966601f44a95878032", size = 3710390 }, + { url = "https://files.pythonhosted.org/packages/40/ee/955c74e4e6ff2f694c99dcbbf7be8d478a8868503aeb3474517277c07667/thinc-8.3.4-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:eb10823b3a3f1c6440998b11bf9a3571dd859feaed0fdb510a1c1097d9dc6a86", size = 4731524 }, + { url = "https://files.pythonhosted.org/packages/a4/44/3786431e5c1eeebed3d7a4c97122896ca6d4a502b03d02c2171c417052fd/thinc-8.3.4-cp312-cp312-win_amd64.whl", hash = "sha256:b5e5e7bf5dae142fd50ed9785971292c4aab4d9ed18e4947653b6a0584d5227c", size = 1455883 }, +] + [[package]] name = "threadpoolctl" version = "3.5.0" @@ -5063,12 +5582,36 @@ wheels = [ ] [[package]] -name = "types-python-dateutil" -version = "2.9.0.20241206" +name = "types-beautifulsoup4" +version = "4.12.0.20250204" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "types-html5lib" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/11/6c/00fd71754ac3babe121c73b52e0de7ec05acd627edcb7ee652223c084d69/types_beautifulsoup4-4.12.0.20250204.tar.gz", hash = "sha256:f083d8edcbd01279f8c3995b56cfff2d01f1bb894c3b502ba118d36fbbc495bf", size = 16641 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b1/ec/9097e9f7f5901e4d7834c7e0bc8f775f9ffa448ae31471457a1ebafeb4c5/types_beautifulsoup4-4.12.0.20250204-py3-none-any.whl", hash = "sha256:57ce9e75717b63c390fd789c787d267a67eb01fa6d800a03b9bdde2e877ed1eb", size = 17061 }, +] + +[[package]] +name = "types-html5lib" +version = "1.1.11.20241018" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/a9/60/47d92293d9bc521cd2301e423a358abfac0ad409b3a1606d8fbae1321961/types_python_dateutil-2.9.0.20241206.tar.gz", hash = "sha256:18f493414c26ffba692a72369fea7a154c502646301ebfe3d56a04b3767284cb", size = 13802 } +sdist = { url = "https://files.pythonhosted.org/packages/b6/9d/f6fbcc8246f5e46845b4f989c4e17e6fb3ce572f7065b185e515bf8a3be7/types-html5lib-1.1.11.20241018.tar.gz", hash = "sha256:98042555ff78d9e3a51c77c918b1041acbb7eb6c405408d8a9e150ff5beccafa", size = 11370 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ba/7c/f862b1dc31268ef10fe95b43dcdf216ba21a592fafa2d124445cd6b92e93/types_html5lib-1.1.11.20241018-py3-none-any.whl", hash = "sha256:3f1e064d9ed2c289001ae6392c84c93833abb0816165c6ff0abfc304a779f403", size = 17292 }, +] + +[[package]] +name = "types-requests" +version = "2.32.0.20241016" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "urllib3" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/fa/3c/4f2a430c01a22abd49a583b6b944173e39e7d01b688190a5618bd59a2e22/types-requests-2.32.0.20241016.tar.gz", hash = "sha256:0d9cad2f27515d0e3e3da7134a1b6f28fb97129d86b867f24d9c726452634d95", size = 18065 } wheels = [ - { url = "https://files.pythonhosted.org/packages/0f/b3/ca41df24db5eb99b00d97f89d7674a90cb6b3134c52fb8121b6d8d30f15c/types_python_dateutil-2.9.0.20241206-py3-none-any.whl", hash = "sha256:e248a4bc70a486d3e3ec84d0dc30eec3a5f979d6e7ee4123ae043eedbb987f53", size = 14384 }, + { url = "https://files.pythonhosted.org/packages/d7/01/485b3026ff90e5190b5e24f1711522e06c79f4a56c8f4b95848ac072e20f/types_requests-2.32.0.20241016-py3-none-any.whl", hash = "sha256:4195d62d6d3e043a4eaaf08ff8a62184584d2e8684e9d2aa178c7915a7da3747", size = 15836 }, ] [[package]] @@ -5094,12 +5637,12 @@ wheels = [ ] [[package]] -name = "uri-template" -version = "1.3.0" +name = "tzdata" +version = "2025.1" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/31/c7/0336f2bd0bcbada6ccef7aaa25e443c118a704f828a0620c6fa0207c1b64/uri-template-1.3.0.tar.gz", hash = "sha256:0e00f8eb65e18c7de20d595a14336e9f337ead580c70934141624b6d1ffdacc7", size = 21678 } +sdist = { url = "https://files.pythonhosted.org/packages/43/0f/fa4723f22942480be4ca9527bbde8d43f6c3f2fe8412f00e7f5f6746bc8b/tzdata-2025.1.tar.gz", hash = "sha256:24894909e88cdb28bd1636c6887801df64cb485bd593f2fd83ef29075a81d694", size = 194950 } wheels = [ - { url = "https://files.pythonhosted.org/packages/e7/00/3fca040d7cf8a32776d3d81a00c8ee7457e00f80c649f1e4a863c8321ae9/uri_template-1.3.0-py3-none-any.whl", hash = "sha256:a44a133ea12d44a0c0f06d7d42a52d71282e77e2f937d8abd5655b8d56fc1363", size = 11140 }, + { url = "https://files.pythonhosted.org/packages/0f/dd/84f10e23edd882c6f968c21c2434fe67bd4a528967067515feca9e611e5e/tzdata-2025.1-py2.py3-none-any.whl", hash = "sha256:7e127113816800496f027041c570f50bcd464a020098a3b6b199517772303639", size = 346762 }, ] [[package]] @@ -5177,6 +5720,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/63/9a/0962b05b308494e3202d3f794a6e85abe471fe3cafdbcf95c2e8c713aabd/uvloop-0.21.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:a5c39f217ab3c663dc699c04cbd50c13813e31d917642d459fdcec07555cc553", size = 4660018 }, ] +[[package]] +name = "wasabi" +version = "1.1.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "colorama", marker = "sys_platform == 'win32'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/ac/f9/054e6e2f1071e963b5e746b48d1e3727470b2a490834d18ad92364929db3/wasabi-1.1.3.tar.gz", hash = "sha256:4bb3008f003809db0c3e28b4daf20906ea871a2bb43f9914197d540f4f2e0878", size = 30391 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/06/7c/34330a89da55610daa5f245ddce5aab81244321101614751e7537f125133/wasabi-1.1.3-py3-none-any.whl", hash = "sha256:f76e16e8f7e79f8c4c8be49b4024ac725713ab10cd7f19350ad18a8e3f71728c", size = 27880 }, +] + [[package]] name = "watchdog" version = "6.0.0" @@ -5284,12 +5839,23 @@ wheels = [ ] [[package]] -name = "webcolors" -version = "24.11.1" +name = "weasel" +version = "0.4.1" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/7b/29/061ec845fb58521848f3739e466efd8250b4b7b98c1b6c5bf4d40b419b7e/webcolors-24.11.1.tar.gz", hash = "sha256:ecb3d768f32202af770477b8b65f318fa4f566c22948673a977b00d589dd80f6", size = 45064 } +dependencies = [ + { name = "cloudpathlib" }, + { name = "confection" }, + { name = "packaging" }, + { name = "pydantic" }, + { name = "requests" }, + { name = "smart-open" }, + { name = "srsly" }, + { name = "typer" }, + { name = "wasabi" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/a7/1a/9c522dd61b52939c217925d3e55c95f9348b73a66a956f52608e1e59a2c0/weasel-0.4.1.tar.gz", hash = "sha256:aabc210f072e13f6744e5c3a28037f93702433405cd35673f7c6279147085aa9", size = 38417 } wheels = [ - { url = "https://files.pythonhosted.org/packages/60/e8/c0e05e4684d13459f93d312077a9a2efbe04d59c393bc2b8802248c908d4/webcolors-24.11.1-py3-none-any.whl", hash = "sha256:515291393b4cdf0eb19c155749a096f779f7d909f7cceea072791cb9095b92e9", size = 14934 }, + { url = "https://files.pythonhosted.org/packages/2a/87/abd57374044e1f627f0a905ac33c1a7daab35a3a815abfea4e1bafd3fdb1/weasel-0.4.1-py3-none-any.whl", hash = "sha256:24140a090ea1ac512a2b2f479cc64192fd1d527a7f3627671268d08ed5ac418c", size = 50270 }, ] [[package]]