In [None]:
import os
from paperqa import Settings, Docs

async def rag_agent(question, vector_store, rag_model) -> str:
    """
    Runs the PaperQA2 RAG, returning the answer to the inputted question, given the vector_store object.
    
    Args:
        question: Question to be answered
        vector_store: Documents to be searched for answers (object class is part of PaperQA2 library)
        rag_model: LLM that can be used to power PaperQA2 RAG
    Returns:
        Answer to the inputted question
    """
    settings.temperature=0.0
    settings.llm = rag_model

    answer_response = await vector_store.aquery(
        query=question,
        settings=settings   
    )
    response = answer_response.model_dump()
    return response["answer"]

#"directory" is directory in which papers used in RAG can be found
directory = "/home/adrian/Documents/University Work/Part III Project/cmbagent_dataset/Source_Papers"

settings = Settings(
    parsing={
        "use_doc_details": False,  # Disable metadata extraction
        "disable_doc_valid_check": True  # Skip document validation
    },
    #agent={
        #"return_paper_metadata": False,  # Don't include metadata in results
        #"agent_config": {  # Must initialize as dict
        #    "metadata_client": None  # Explicitly disable client
        #}
    #}
)

vector_store = Docs()
# valid extensions include .pdf, .txt, .md, and .html
full_paths = [os.path.abspath(os.path.join(directory, f)) for f in os.listdir(directory)]

for doc in (full_paths):
    vector_store.add(doc, settings=settings)