# Combination tests

Some tests how well an agent does when given multiple tools

In [11]:
# Settings

import os
from dotenv import load_dotenv

# Zotero settings
load_dotenv()
zotero_api_key = os.environ.get("ZOTERO_API_KEY_READ") # A Zotero API key, here sourced from .env - read only access is enough
zotero_library_id = os.environ.get("ZOTERO_TEST_GROUP_ID") # The library ID of the Zotero library to be accessed. Here a test group library
zotero_library_type = "group" # set the library type to either 'user' or 'group'. Make sure this corresponds to the library ID ('user' for your personal library)
zotero_docs_returned = 20 # number of documents returned from the zotero library
zot_get_fulltext = False # return full text from documents in the zotero library, if available
zot_type = "top" # "top" (top level items) or "items" (incl. sub level items, e.g. deleted)

# PDF Retriever Settings
lit_directory = "./test_data/" # directory with literature to load for the RAG
db_directory = "./test_chroma_db" # directory to save the vector store
pattern = r"[^\\/]*[\\/](?P<author>.+?) (?P<year>\d{4}) (?P<title>.+)\.pdf" # regex pattern to extract metadata from naming scheme in literature directory. Adjust as needed!
short_docs = True # shorten document from pages to chunks? (set chunk size below)
docs_returned = 6 # number of docs returned by the retriever(s)
retriever_type = "mmr" # "similarity"  or "mmr"


# Model settings

embedding_model = "nomic-embed-text" # very good embedding model for retrieval tasks

# textgen model. generates answers to questions, with retrieved documents as context
# textgen_model = "llama3.2"
textgen_model = "o3-mini-2025-01-31"
# textgen_model = "phi4", # phi4 is a rather powerful model, but requires more gpu compute (that is, it will be slower than llama3.2 if not enough gpu memory is available)

# reasoning model. handles deciding whether or not to query the retriever
# reasoning_model = "nemotron-mini" # nvidia's nemotron-mini is smaller, but still capable of the simple reasoning task we need (retrieve or not)
# reasoning_model = "llama3.2" # fast, but not very good at reasoning tasks
reasoning_model = "o3-mini-2025-01-31"
# reasoning_model = "qwq" # qwq is a rather powerful experimental reasoning model. However, as it is rather large (20b), it either requires a fair bit of GPU memory or will be very slow 

# selfquery model. generates queries for the retriever
# selfquery_model = "phi4" # phi4 seems better at query construction than llama3.2, but requires more gpu compute (i.e. may be slower)
# selfquery_model = "deepseek-r1:1.5b"
selfquery_model = "o3-mini-2025-01-31"


max_tokens = 2048 # maximum number of tokens to generate across models
temperature = 0 # temperature across models

verbose = True # print additional information on routing decisions etc.


## Setup

In [12]:
# set up the models

from langchain_ollama import ChatOllama
from langchain_ollama import OllamaEmbeddings
from openai import OpenAI
from langchain_openai import ChatOpenAI

load_dotenv()

client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY")) # get API key from .env file

openai_models = [model.id for model in client.models.list()]

models = [textgen_model, reasoning_model, selfquery_model]
names = ["textgen_llm", "reasoning_llm", "selfquery_llm"]
for name, model in zip(names, models):
    if model in openai_models:
        if "o3" in model or o1 in model:
            print(f"Temperature parameter not supported for model {model}.")
            llm = ChatOpenAI(
                        model = model,
                        max_tokens = max_tokens, # max number of tokens to generate
                    )
        else:
            llm = ChatOpenAI(
                model = model,
                temperature=temperature,
                max_tokens = max_tokens, # max number of tokens to generate
            )
      
    else:
        llm = ChatOllama(
            model = model,
            temperature = temperature,
            num_predict = max_tokens, # max number of tokens to generate
        )
    globals()[f"{name}"] = llm

embeddings = OllamaEmbeddings(model = embedding_model)

Temperature parameter not supported for model o3-mini-2025-01-31.
Temperature parameter not supported for model o3-mini-2025-01-31.
Temperature parameter not supported for model o3-mini-2025-01-31.


In [13]:
# set up the zotero retriever

from pyzotero import zotero
## get local ZoteroRetriever implementation until it is added to langchain_community
import sys
sys.path.append("D:/academicCloud/Python Scripts/langchain-zotero-retriever/src")
from langchain_zotero_retriever.retrievers import ZoteroRetriever

zot = zotero.Zotero(zotero_library_id, zotero_library_type, zotero_api_key) # zot for direct lib access

## pass settings for retriever
zotero_retriever = ZoteroRetriever(k = zotero_docs_returned, 
                                   get_fulltext = zot_get_fulltext, 
                                   library_id = zotero_library_id,
                                   library_type = zotero_library_type,
                                   api_key = zotero_api_key,
                                   type = zot_type,)


In [14]:
# Set up the PDF vector store

from langchain_chroma import Chroma
from langchain_community.document_loaders import PyPDFDirectoryLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.documents import Document
from re import match
import os

## check if the folder db_directory already exists. If not, creat it and load the documents into the vector store. Else, use the existing vector store. Makes a new DB for shortened docs

if short_docs:
    db_directory += "_short"

if not os.path.exists(db_directory):
    # load documents
    loader = PyPDFDirectoryLoader(lit_directory)
    docs = loader.load() # metadata tracks paper and page number; each page is a single document

    # extract metadata from file names (according to the pattern specified above)
    docs = [
        Document(
            doc.page_content,
            metadata={
                **doc.metadata,
                **({
                    "author_name": metadata.group("author"),
                    "year": metadata.group("year"),
                    "title": metadata.group("title")
                } if (metadata := match(pattern, doc.metadata["source"])) else {})
            }
        )
        for doc in docs
    ]


    # optional step: split the docs into smaller chunks to fit into context window of the model (model dependant, necessary for small models) -!! test this, shorter chunks may lead to bad retrieval results !!-
    #           potential remedy: use whole pages, but use the model to summarise each page before chaining it into the context
    if short_docs:
        text_splitter = RecursiveCharacterTextSplitter(
            chunk_size=1000,  # chunk size (characters)
            chunk_overlap=200,  # chunk overlap (characters)
            add_start_index=True,  # track index in original document
            )
        
        docs = text_splitter.split_documents(docs)

    # make doc vector store. as the vector store can get quite large (and takes time to initialize in memory), we use a chroma database to store the vectors    
    vector_store = Chroma(
        collection_name="lit_helper_test",
        embedding_function=embeddings,
        persist_directory=db_directory,  # save data locally
        )
    
    vector_store.add_documents(docs) # add docs

else:
    vector_store = Chroma(
        collection_name="lit_helper_test",
        embedding_function=embeddings,
        persist_directory=db_directory,  # save data locally
    )


In [15]:
# set up the self querying retriever

from langchain.chains.query_constructor.schema import AttributeInfo
from langchain.chains.query_constructor.base import get_query_constructor_prompt
from langchain.chains.query_constructor.base import StructuredQueryOutputParser
from langchain.retrievers.self_query.base import SelfQueryRetriever
from langchain_community.query_constructors.chroma import ChromaTranslator

metadata_field_info = [
    # note that the attribute infos can have a vast influence on the performance of the retriever (likely model dependant)
    #   The name seems to be most important when the model chooses the filter - with notable side effects for other attributes when changing the name of one!
    #   Beware that the names here need to correspond to the names in the metadata of the documents set above (changes in the name here require updating the chroma database)
    AttributeInfo(
        name = "author_name",
        description="The paper author's last name. Abbreviated with et al. for more than two authors.",
        type="string",
    ),
    AttributeInfo(
        name = "year",
        description="The year of publication",
        type="string",
    ),
    AttributeInfo(
        name = "title",
        description="The title of the paper",
        type="string",
    ),
    AttributeInfo(
        name = "page",
        description="The page number of the paper",
        type="string",
    ),
    AttributeInfo(
        name = "source",
        description="The file path of the paper",
        type="string",
    ),
]

document_content_description = "A passage from a scientific paper"


retrieval_prompt = get_query_constructor_prompt( # we're using a premade prompt for the query constructor
    document_contents=document_content_description,
    attribute_info=metadata_field_info,
    # examples = retriever examples, # we can optionally add examples for the model to fine tune the retrieval. See https://github.com/langchain-ai/langchain/blob/master/cookbook/self_query_hotel_search.ipynb
)

output_parser = StructuredQueryOutputParser.from_components( # we're also using a premade output parser
    fix_invalid=True, # this automatically fixes invalid queries (that is, it should avoid errors being thrown when the model constructs an invalid query)
    ) 

query_constructor = retrieval_prompt | selfquery_llm | output_parser # make the query constructor chain

## pass settings (adjust as needed)
if retriever_type == "mmr":
    search_type="mmr" # MMR (Maximal Marginal Relevance) aims to diversify search results. the amount of diversification is set via the lambda_mult parameter
    search_kwargs={"k": docs_returned, # make sure the number of documents passed (k) fits into the context window
                       "fetch_k": docs_returned * 5, # could be adjusted, potentially run tests
                       "lambda_mult": 0.8 # amount of diversification, with 0 being maximum diversity
                       }

if retriever_type == "similarity":
    search_type="similarity" # similarity score; optionally with threshold ("similarity_score_threshold" with "score_threshold" kwarg)
    search_kwargs={"k": docs_returned} # make sure the number of documents passed (k) fits into the context window


self_query_retriever = SelfQueryRetriever(
    query_constructor=query_constructor,
    vectorstore=vector_store,
    structured_query_translator=ChromaTranslator(), # we need to specify the translator for our database scheme, e.g. Chroma
    search_type=search_type,
    search_kwargs=search_kwargs, 
)

if verbose: print(retrieval_prompt.format(query="{query}")) # print out the prompt used for the query constructor

Your goal is to structure the user's query to match the request schema provided below.

<< Structured Request Schema >>
When responding use a markdown code snippet with a JSON object formatted in the following schema:

```json
{
    "query": string \ text string to compare to document contents
    "filter": string \ logical condition statement for filtering documents
}
```

The query string should contain only text that is expected to match the contents of documents. Any conditions in the filter should not be mentioned in the query as well.

A logical condition statement is composed of one or more comparison and logical operation statements.

A comparison statement takes the form: `comp(attr, val)`:
- `comp` (eq | ne | gt | gte | lt | lte | contain | like | in | nin): comparator
- `attr` (string):  name of attribute to apply the comparison to
- `val` (string): is the comparison value

A logical operation statement takes the form `op(statement1, statement2, ...)`:
- `op` (and | or | not