#### Run searxng locally if you don't have a hosted isntance.

```
mkdir mkdir -p temp/searxng
cd temp
export PORT=8080
docker run --rm \
             -d -p ${PORT}:8080 \
             -v "${PWD}/searxng:/etc/searxng:z" \
             -e "BASE_URL=http://localhost:$PORT/" \
             -e "INSTANCE_NAME=my-instance" \
             --name "searxng" \
             searxng/searxng
```

When you run the container for the first time it will create configs for searxng in the provided directory. The default searxng config has disabled the json access for api endpoints. For enabling it

Stop the container docker stop searxng
Open `./temp/searxng/settings.xml` and add `- json` under search> formats section (after `- html`).

Run the docker run command again.

In [1]:
%load_ext autoreload

%autoreload 1

In [2]:
from pprint import pprint
import nest_asyncio

nest_asyncio.apply()

### Search tools

In [3]:
# search tools
from akd.tools.search import (
    SearxNGSearchTool,
    SearxNGSearchToolConfig,
    SearxNGSearchToolInputSchema,
)

In [4]:
# scraper tools
from akd.tools.scrapers.web_scrapers import SimpleWebScraper, Crawl4AIWebScraper
from akd.tools.scrapers import ScraperToolConfig, ScraperToolInputSchema, DoclingScraper

from akd.tools.scrapers.pdf_scrapers import (
    SimplePDFScraper,
)
from akd.tools.scrapers.composite import CompositeScraper


from akd.tools.scrapers.resolvers import (
    ArxivResolver,
    ADSResolver,
    IdentityResolver,
    ResolverInputSchema,
)
from akd.tools.scrapers.composite import ResearchArticleResolver

In [17]:
from akd.agents.factory import create_query_agent
from akd.agents.extraction import (
    IntentBasedExtractionSchemaMapper,
    EstimationExtractionAgent,
    ExtractionInputSchema,
)

from akd.agents.litsearch import LitAgent, LitAgentInputSchema
from akd.agents.intents import Intent, IntentInputSchema, IntentAgent
import instructor

# Build Tools and Agents

In [6]:
SEARCH_TOOL = SearxNGSearchTool(
    config=SearxNGSearchToolConfig(
        base_url="http://localhost:8080",
        max_results=5,
        engines=["google", "arxiv", "google_scholar"],
        debug=True,
    )
)

In [9]:
await SEARCH_TOOL.arun(SearxNGSearchToolInputSchema(queries=["landslides nepal"]))

[32m2025-07-29 15:31:35.521[0m | [34m[1mDEBUG   [0m | [36makd._base[0m:[36marun[0m:[36m231[0m - [34m[1mRunning SearxNGSearchTool with params: queries=['landslides nepal'] category='science' max_results=10[0m
[32m2025-07-29 15:31:35.521[0m | [34m[1mDEBUG   [0m | [36makd.tools.search[0m:[36m_fetch_search_results_paginated[0m:[36m274[0m - [34m[1mFetching page 1 for query: landslides nepal[0m
[32m2025-07-29 15:31:36.633[0m | [34m[1mDEBUG   [0m | [36makd.tools.search[0m:[36m_fetch_search_results_paginated[0m:[36m283[0m - [34m[1mFetched 58 results for page 1[0m
[32m2025-07-29 15:31:36.736[0m | [34m[1mDEBUG   [0m | [36makd.tools.search[0m:[36m_fetch_search_results_paginated[0m:[36m302[0m - [34m[1mFetched 24 results across 1 pages for query: landslides nepal[0m
[32m2025-07-29 15:31:36.742[0m | [34m[1mDEBUG   [0m | [36makd.tools.search[0m:[36m_arun[0m:[36m358[0m - [34m[1m[{'template': 'paper.html', 'url': 'http://arxiv.org/abs

SearxNGSearchToolOutputSchema(results=[SearchResultItem(url=AnyUrl('http://arxiv.org/abs/2507.08742v1'), title='Influence of river incision on landslides triggered in Nepal by the Gorkha earthquake: Results from a pixel-based susceptibility model using inlabru', query='landslides nepal', pdf_url=AnyUrl('http://arxiv.org/pdf/2507.08742v1'), content='This study presents a comprehensive framework for modelling earthquake-induced landslides (EQILs) through a channel-based analysis of landslide centroid distributions. A key innovation is the incorporation of the normalised channel steepness index ($k_{sn}$) as a physically meaningful and novel covariate, inferring hillslope erosion and fluvial incision processes. Used within spatial point process models, $k_{sn}$ supports the generation of landslide susceptibility maps with quantified uncertainty. To address spatial data misalignment between covariates and landslide observations, we leverage the inlabru framework, which enables coherent int

In [10]:
scraper_cfg = ScraperToolConfig()
SCRAPER = CompositeScraper(
    DoclingScraper(),
    Crawl4AIWebScraper(),
    SimpleWebScraper(),
    SimplePDFScraper(),
    debug=True,
)

[32m2025-07-29 15:31:38.862[0m | [34m[1mDEBUG   [0m | [36makd.tools.scrapers.omni[0m:[36m_setup_converter[0m:[36m148[0m - [34m[1mDocling format options :: {<InputFormat.HTML: 'html'>: HTMLFormatOption(pipeline_cls=<class 'docling.pipeline.simple_pipeline.SimplePipeline'>, pipeline_options=PipelineOptions(create_legacy_output=True, document_timeout=None, accelerator_options=AcceleratorOptions(num_threads=4, device='auto', cuda_use_flash_attention2=False), enable_remote_services=False, allow_external_plugins=False), backend=<class 'docling.backend.html_backend.HTMLDocumentBackend'>), <InputFormat.PDF: 'pdf'>: PdfFormatOption(pipeline_cls=<class 'docling.pipeline.standard_pdf_pipeline.StandardPdfPipeline'>, pipeline_options=PdfPipelineOptions(create_legacy_output=True, document_timeout=None, accelerator_options=AcceleratorOptions(num_threads=4, device='auto', cuda_use_flash_attention2=False), enable_remote_services=False, allow_external_plugins=False, artifacts_path=None, ima

In [13]:
_result = await SCRAPER.arun(
    ScraperToolInputSchema(
        # url="https://www.sciencedirect.com/science/article/pii/S0048969722074824"
        url="https://www.wordstemplates.org/wp-content/uploads/2021/06/Statement-of-Purpose-Format.pdf"
    )
)
pprint(_result.content)

[32m2025-07-29 15:32:00.048[0m | [34m[1mDEBUG   [0m | [36makd._base[0m:[36marun[0m:[36m231[0m - [34m[1mRunning CompositeScraper with params: url=AnyUrl('https://www.wordstemplates.org/wp-content/uploads/2021/06/Statement-of-Purpose-Format.pdf') include_links=True[0m
[32m2025-07-29 15:32:00.049[0m | [34m[1mDEBUG   [0m | [36makd.tools.scrapers.composite[0m:[36m_arun[0m:[36m81[0m - [34m[1mRunning scraper=DoclingScraper for url=AnyUrl('https://www.wordstemplates.org/wp-content/uploads/2021/06/Statement-of-Purpose-Format.pdf') include_links=True[0m
[32m2025-07-29 15:32:00.050[0m | [34m[1mDEBUG   [0m | [36makd._base[0m:[36marun[0m:[36m231[0m - [34m[1mRunning DoclingScraper with params: url=HttpUrl('https://www.wordstemplates.org/wp-content/uploads/2021/06/Statement-of-Purpose-Format.pdf') include_links=True[0m
[32m2025-07-29 15:32:03.156[0m | [34m[1mDEBUG   [0m | [36makd._base[0m:[36marun[0m:[36m364[0m - [34m[1mRunning _DoclingMetadataEx

('## Statement of Purpose\n'
 '\n'
 'Philip Guo (pgbovine@mit.edu)\n'
 '\n'
 'I want to pursue a Ph.D. in Computer Science, and my career aspiration is to '
 'become a professor. My research interests include program analysis, software '
 'engineering, operating systems, and human-computer interaction (HCI).\n'
 '\n'
 'My motivation for pursuing research related to program analysis stems from '
 'learning about the challenges of software development during 3 summers spent '
 'as a software engineering intern (1 at Codehost, a start-up, and 2 at '
 'Teradyne, a large corporation). Although I found the actual development work '
 'to be educational, what really sparked my interest in research was '
 'experiencing first-hand the challenges that software engineers face daily.\n'
 '\n'
 'I aspire to create tools that can increase programmer productivity and '
 'improve software quality and security. As a first step towards this goal, '
 'for the past 2 years I have been working with Prof. Mi

In [18]:
ARTICLE_RESOLVER = ResearchArticleResolver(
    ArxivResolver(), ADSResolver(), IdentityResolver()
)

In [19]:
await ARTICLE_RESOLVER.arun(ResolverInputSchema(url="https://arxiv.org/abs/2411.08181"))

[32m2025-07-29 15:32:46.825[0m | [34m[1mDEBUG   [0m | [36makd.tools.scrapers.composite[0m:[36mresolve[0m:[36m117[0m - [34m[1mUsing resolver=ArxivResolver for url=https://arxiv.org/abs/2411.08181[0m


ResolverOutputSchema(url=HttpUrl('https://arxiv.org/pdf/2411.08181.pdf'), resolver='ArxivResolver')

In [20]:
# INTENT_AGENT = create_intent_agent()
INTENT_AGENT = IntentAgent()
isinstance(INTENT_AGENT.client, instructor.client.AsyncInstructor)

True

In [21]:
await INTENT_AGENT.arun(IntentInputSchema(query="landslide nepal"))

IntentOutputSchema(intent=<Intent.GENERAL: 'General'>)

In [22]:
await INTENT_AGENT.arun(
    IntentInputSchema(query="What is the estimated population of London?")
)

IntentOutputSchema(intent=<Intent.ESTIMATION: 'Estimation'>)

In [24]:
QUERY_AGENT = create_query_agent()
SCHEMA_MAPPER = IntentBasedExtractionSchemaMapper()

In [25]:
await SCHEMA_MAPPER.arun(Intent.ESTIMATION)

typing.List[akd.structures.SingleEstimation]

In [26]:
EXTRACTION_AGENT = EstimationExtractionAgent()

In [27]:
_result = EXTRACTION_AGENT.run(
    ExtractionInputSchema(
        query="Hello i am nishan pantha.",
        content="I am nishan. But call me Nish. You can find me at example.com",
    )
)
_result.estimations

[SingleEstimation(answer='The name is Nishan Pantha, also referred to as Nish.', related_knowledge=['User prefers to be called Nish.', 'User can be located through example.com.'], research_data=ResearchData(data_format='N/A', origin="User's personal introduction", data_url=None), methodology='Text extraction from user input', assumptions=[], confidence_level=None, validation_method=None)]

In [28]:
lit_agent = LitAgent(
    intent_agent=INTENT_AGENT,
    schema_mapper=IntentBasedExtractionSchemaMapper(),
    query_agent=QUERY_AGENT,
    extraction_agent=EXTRACTION_AGENT,
    search_tool=SEARCH_TOOL,
    web_scraper=SCRAPER,
    article_resolver=ARTICLE_RESOLVER,
)
lit_agent.clear_history()



In [29]:
print(lit_agent.intent_agent.memory)
print(lit_agent.query_agent.memory)

[]
[]


In [30]:
# query = "mangrove forest statuses in cameroon"
# query = "flash flood in south east asia land cover"
query = "methods and estimation to map landslides in Nepal"

In [31]:
result = await lit_agent.arun(LitAgentInputSchema(query=query))

[32m2025-07-29 15:33:43.379[0m | [1mINFO    [0m | [36makd.agents.litsearch[0m:[36m_arun[0m:[36m100[0m - [1mAnalyzing input query to generate relevant search queries...[0m
[32m2025-07-29 15:33:45.227[0m | [34m[1mDEBUG   [0m | [36makd.agents.litsearch[0m:[36m_arun[0m:[36m105[0m - [34m[1mGenerated search queries:[0m
[32m2025-07-29 15:33:45.228[0m | [34m[1mDEBUG   [0m | [36makd.agents.litsearch[0m:[36m_arun[0m:[36m107[0m - [34m[1mQuery 1: methods and estimation to map landslides in Nepal[0m
[32m2025-07-29 15:33:45.229[0m | [34m[1mDEBUG   [0m | [36makd.agents.litsearch[0m:[36m_arun[0m:[36m107[0m - [34m[1mQuery 2: landslide mapping methods Nepal[0m
[32m2025-07-29 15:33:45.229[0m | [34m[1mDEBUG   [0m | [36makd.agents.litsearch[0m:[36m_arun[0m:[36m107[0m - [34m[1mQuery 3: estimation techniques landslide susceptibility Nepal[0m
[32m2025-07-29 15:33:45.229[0m | [34m[1mDEBUG   [0m | [36makd.agents.litsearch[0m:[36m_arun[

[32m2025-07-29 15:34:59.979[0m | [34m[1mDEBUG   [0m | [36makd.agents.litsearch[0m:[36m_arun[0m:[36m140[0m - [34m[1mResult 3: Landslide susceptibility mapping using an integration of ... | https://www.tandfonline.com/doi/full/10.1080/19475705.2024.2396908 | [Skip to Main Content](https://www.tandfonline.com/doi/full/10.1080/19475705.2024.2396908#top-conten.. | words=15114[0m
[32m2025-07-29 15:34:59.980[0m | [34m[1mDEBUG   [0m | [36makd.agents.litsearch[0m:[36m_arun[0m:[36m123[0m - [34m[1mResult 4 : Scraping the url https://hdl.handle.net/11392/2358838[0m
[32m2025-07-29 15:34:59.981[0m | [34m[1mDEBUG   [0m | [36makd.tools.scrapers.composite[0m:[36mresolve[0m:[36m117[0m - [34m[1mUsing resolver=ArxivResolver for url=https://hdl.handle.net/11392/2358838[0m
[32m2025-07-29 15:34:59.981[0m | [31m[1mERROR   [0m | [36makd._base[0m:[36marun[0m:[36m239[0m - [31m[1mError running ArxivResolver: Not a valid arxiv url[0m
[32m2025-07-29 15:34:59.

[32m2025-07-29 15:35:06.531[0m | [34m[1mDEBUG   [0m | [36makd.agents.litsearch[0m:[36m_arun[0m:[36m140[0m - [34m[1mResult 5: Landslide susceptibility mapping using the weight of evidence method in the Tinau watershed, Nepal | https://hdl.handle.net/20.500.14017/8df80ddb-1fa4-4abd-929e-79249a9b579a | * [Skip to main navigation](https://researchportal.vub.be/en/publications/landslide-susceptibility-m.. | words=1258[0m
[32m2025-07-29 15:35:14.884[0m | [34m[1mDEBUG   [0m | [36makd.agents.litsearch[0m:[36m_arun[0m:[36m153[0m - [34m[1mSource=https://arxiv.org/pdf/2507.08742v1.pdf | Answer=estimations=[SingleEstimation(answer='The study provides a spatial modelling framework for earthquake-induced landslides (EQILs) in Nepal, utilizing the normalised channel steepness index (k_sn) as a key covariate to create susceptibility maps with quantified uncertainty.', related_knowledge=['Elevated k_sn is associated with increased landslide susceptibility but not with landsli

In [32]:
result = result.results
pprint(result)
result[0].result.estimations

[ExtractionDTO(source='https://arxiv.org/pdf/2507.08742v1.pdf', result=EstimationExtractionOutputSchema(estimations=[SingleEstimation(answer='The study provides a spatial modelling framework for earthquake-induced landslides (EQILs) in Nepal, utilizing the normalised channel steepness index (k_sn) as a key covariate to create susceptibility maps with quantified uncertainty.', related_knowledge=['Elevated k_sn is associated with increased landslide susceptibility but not with landslide magnitude.', 'The methodology incorporates rigorous model evaluation using proper scoring rules for accurate prediction.', 'The model is validated through cross-validation, ensuring robustness for predicting in unobserved geographical regions.'], research_data=ResearchData(data_format='GeoJSON', origin='Landslide inventory from Valagussa et al. (2022), integrating various sources covering the Gorkha earthquake events.', data_url=AnyUrl('https://example.com/landslide_inventory.geojson')), methodology='The 

[SingleEstimation(answer='The study provides a spatial modelling framework for earthquake-induced landslides (EQILs) in Nepal, utilizing the normalised channel steepness index (k_sn) as a key covariate to create susceptibility maps with quantified uncertainty.', related_knowledge=['Elevated k_sn is associated with increased landslide susceptibility but not with landslide magnitude.', 'The methodology incorporates rigorous model evaluation using proper scoring rules for accurate prediction.', 'The model is validated through cross-validation, ensuring robustness for predicting in unobserved geographical regions.'], research_data=ResearchData(data_format='GeoJSON', origin='Landslide inventory from Valagussa et al. (2022), integrating various sources covering the Gorkha earthquake events.', data_url=AnyUrl('https://example.com/landslide_inventory.geojson')), methodology='The approach leverages spatial point process modelling with inlabru for susceptibility mapping, integrating terrain feat

In [33]:
print(lit_agent.intent_agent.memory)
print(lit_agent.query_agent.memory)

[]
[{'role': 'user', 'content': '{"query":"methods and estimation to map landslides in Nepal","num_queries":3}'}, {'role': 'assistant', 'content': '{"queries":["landslide mapping methods Nepal","estimation techniques landslide susceptibility Nepal","remote sensing landslide mapping Nepal"],"category":"science"}'}]


# Display result

In [34]:
from typing import List, TypeVar
from pydantic import BaseModel

T = TypeVar("T", bound=BaseModel)

In [35]:
def display_result(models_list: List[T], indent: int = 2) -> None:
    """
    Alternative version using model_dump_json method directly.

    Args:
        models_list: A list of Pydantic model instances
        indent: Number of spaces for indentation (default: 2)
    """
    if not models_list:
        print("[]")
        return

    print(f"[{len(models_list)} items]")

    for i, model in enumerate(models_list):
        # Use Pydantic's built-in JSON serialization
        formatted_model = model.model_dump_json(indent=indent)

        print(f"\nItem {i + 1}:")
        print(formatted_model)

In [36]:
for res in result:
    print(f"{res.source}")
    display_result(res.result.estimations)
    print("-" * 42)

https://arxiv.org/pdf/2507.08742v1.pdf
[1 items]

Item 1:
{
  "answer": "The study provides a spatial modelling framework for earthquake-induced landslides (EQILs) in Nepal, utilizing the normalised channel steepness index (k_sn) as a key covariate to create susceptibility maps with quantified uncertainty.",
  "related_knowledge": [
    "Elevated k_sn is associated with increased landslide susceptibility but not with landslide magnitude.",
    "The methodology incorporates rigorous model evaluation using proper scoring rules for accurate prediction.",
    "The model is validated through cross-validation, ensuring robustness for predicting in unobserved geographical regions."
  ],
  "research_data": {
    "data_format": "GeoJSON",
    "origin": "Landslide inventory from Valagussa et al. (2022), integrating various sources covering the Gorkha earthquake events.",
    "data_url": "https://example.com/landslide_inventory.geojson"
  },
  "methodology": "The approach leverages spatial point 

# Relevancy Check

In [37]:
from akd.tools.relevancy import (
    RelevancyChecker,
    RelevancyCheckerConfig,
    RelevancyCheckerInputSchema,
    # RelevancyCheckerSwappedInputSchema
)

In [38]:
relevancy_checker = RelevancyChecker(
    RelevancyCheckerConfig(
        debug=True,
    )
)

In [39]:
relevancy_result = await relevancy_checker.arun(
    RelevancyCheckerInputSchema(query=query, content=str(result[0]))
)

[32m2025-07-29 15:36:28.022[0m | [34m[1mDEBUG   [0m | [36makd._base[0m:[36marun[0m:[36m231[0m - [34m[1mRunning RelevancyChecker with params: query='methods and estimation to map landslides in Nepal' content="source='https://arxiv.org/pdf/2507.08742v1.pdf' result=EstimationExtractionOutputSchema(estimations=[SingleEstimation(answer='The study provides a spatial modelling framework for earthquake-induced landslides (EQILs) in Nepal, utilizing the normalised channel steepness index (k_sn) as a key covariate to create susceptibility maps with quantified uncertainty.', related_knowledge=['Elevated k_sn is associated with increased landslide susceptibility but not with landslide magnitude.', 'The methodology incorporates rigorous model evaluation using proper scoring rules for accurate prediction.', 'The model is validated through cross-validation, ensuring robustness for predicting in unobserved geographical regions.'], research_data=ResearchData(data_format='GeoJSON', origin='

In [40]:
pprint(relevancy_result.model_dump())

{'reasoning_steps': ['The query specifically asks about methods and estimation '
                     'to map landslides in Nepal.',
                     'The content provided details a study that describes a '
                     'spatial modelling framework for landslides in Nepal, '
                     'including methodologies and estimations relevant to the '
                     'mapping of these landslides.',
                     'The information includes the use of the normalized '
                     'channel steepness index as a covariate, methodologies '
                     'for validating predictions, and the research data used, '
                     'all of which directly address the query.'],
 'score': 1.0}
