# Conversation Agent

An agent answering questions, able to retrieve the provided literature, in a convesation style

In [1]:
import os

from langchain_ollama import OllamaEmbeddings
from langchain_ollama import ChatOllama

from langchain_chroma import Chroma
from langchain_community.document_loaders import PyPDFDirectoryLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter

from langchain.chains import RetrievalQA

from langchain_core.prompts import PromptTemplate

In [2]:
# set model
# run ollama serve for local API instance
# currently running llama 3.2 3b

embeddings = OllamaEmbeddings(model = "llama3.2")

llm = ChatOllama(
    model = "llama3.2",
    temperature = 0.0,
    num_predict = 512, # max number of tokens to generate
    )

## Set up  the Vectorstore Retriever

In [3]:
# check if the folder db_directory already exists. If not, creat it and load the documents into the vector store. Else, use the existing vector store. Makes a new DB for shortened docs

db_directory = "./test_chroma_db" # directory to save the vector store

short_docs = True # shorten document from pages to chunks? (set chunk size below)

retriever_type = "mmr" # "similarity"  or "mmr"

if short_docs:
    db_directory += "_short"

if not os.path.exists(db_directory):
    # load documents
    loader = PyPDFDirectoryLoader(
    "./test_data/")
    docs = loader.load() # metadata tracks paper and page number; each page is a single document

    # optional step: split the docs into smaller chunks to fit into context window of the model (model dependant, necessary for small models) -!! test this, shorter chunks may lead to bad retrieval results !!-
    #           potential remedy: use whole pages, but use the model to summarise each page before chaining it into the context
    if short_docs:
        text_splitter = RecursiveCharacterTextSplitter(
            chunk_size=1000,  # chunk size (characters)
            chunk_overlap=200,  # chunk overlap (characters)
            add_start_index=True,  # track index in original document
            )
        docs = text_splitter.split_documents(docs)

    # make doc vector store. as the vector store can get quite large (and takes time to initialize in memory), we use a chroma database to store the vectors    
    vector_store = Chroma(
        collection_name="lit_helper_test",
        embedding_function=embeddings,
        persist_directory=db_directory,  # save data locally
        )
    vector_store.add_documents(docs) # add docs

else:
    vector_store = Chroma(
        collection_name="lit_helper_test",
        embedding_function=embeddings,
        persist_directory=db_directory,  # save data locally
    )


docs_returned = 6 # number of docs returned by the retriever(s)

# turn the vector store into retriever(s)

if retriever_type == "mmr":
    retriever = vector_store.as_retriever(
        search_type="mmr", # MMR (Maximal Marginal Relevance) aims to diversify search results. the amount of diversification is set via the lambda_mult parameter
        search_kwargs={"k": docs_returned, "fetch_k": 30, "lambda_mult": 0.8}, # make sure the number of documents passed (k) fits into the context window
    )

if retriever_type == "similarity":
    retriever = vector_store.as_retriever(
        search_type="similarity", # similarity score; optionally with threshold ("similarity_score_threshold" with "score_threshold" kwarg)
        search_kwargs={"k": docs_returned}, # make sure the number of documents passed (k) fits into the context window
    )

## RAG tool

Note that we're leveraging tool calling and message states in this approach. Among other things, this approach lets the model generate the query by itself (and to skip the retrieval step if the user input does not require it), rather than passing the user question as the retriever query

In [6]:
# we're using the MessageState to represent the different parts of our agent in a conversation

from langgraph.graph import MessagesState, StateGraph

graph_builder = StateGraph(MessagesState)

In [7]:
# retriever as a tool

from langchain_core.tools import tool


@tool(response_format="content_and_artifact")
def retrieve(query: str):
    """Retrieve information related to a query."""
    retrieved_docs = retriever.invoke(query)
    serialized = "\n\n".join(
        (f"Source: {doc.metadata}\n" f"Content: {doc.page_content}") # double check if the model is able to correctly report source and page from this format
        for doc in retrieved_docs
    )
    return serialized, retrieved_docs


In [8]:
from langchain_core.messages import SystemMessage
from langgraph.prebuilt import ToolNode


# AIMessage
def query_or_respond(state: MessagesState):
    """Generate tool call for retrieval or respond."""
    llm_with_tools = llm.bind_tools([retrieve])
    response = llm_with_tools.invoke(state["messages"])
    # MessagesState appends messages to state instead of overwriting
    return {"messages": [response]}


# retrieval tool
tools = ToolNode([retrieve])


# Generate a response
def generate(state: MessagesState):
    """Generate answer."""
    # Get generated ToolMessages
    recent_tool_messages = []
    for message in reversed(state["messages"]):
        if message.type == "tool":
            recent_tool_messages.append(message)
        else:
            break
    tool_messages = recent_tool_messages[::-1]

    # Format into prompt
    docs_content = "\n\n".join(doc.content for doc in tool_messages)
    system_message_content = (
        "You are a helpful assistant for finding relevant text passages in scientific literature." 
        "Use the following pieces of retrieved context to answer the question." 
        "Each piece of content starts with the indicators 'Source' and 'Page'. Always provide these in your answer when using one of the retrieved passages."
        "If you don't know the answer, just say that you don't know, don't try to make up an answer."
        "\n\n"
        f"{docs_content}"
    )
    conversation_messages = [
        message
        for message in state["messages"]
        if message.type in ("human", "system")
        or (message.type == "ai" and not message.tool_calls)
    ]
    prompt = [SystemMessage(system_message_content)] + conversation_messages

    # Run
    response = llm.invoke(prompt)
    return {"messages": [response]}

In [9]:
# set up the graph, including a conditional pass on the tools-based generation nodes

from langgraph.graph import END
from langgraph.prebuilt import ToolNode, tools_condition

graph_builder.add_node(query_or_respond)
graph_builder.add_node(tools)
graph_builder.add_node(generate)

graph_builder.set_entry_point("query_or_respond")
graph_builder.add_conditional_edges(
    "query_or_respond",
    tools_condition,
    {END: END, "tools": "tools"},
)
graph_builder.add_edge("tools", "generate")
graph_builder.add_edge("generate", END)

graph = graph_builder.compile()

In [10]:
input_message = "Hello" # it seems the model is rather bad at deciding when to use the tool

for step in graph.stream(
    {"messages": [{"role": "user", "content": input_message}]},
    stream_mode="values",
):
    step["messages"][-1].pretty_print()


Hello
Tool Calls:
  retrieve (7fb84f48-3b1d-4a02-8284-e1a022ffa6a3)
 Call ID: 7fb84f48-3b1d-4a02-8284-e1a022ffa6a3
  Args:
    query: Hello
Name: retrieve

Source: {'page': 78, 'source': 'test_data\\Jungherr & Schroeder 2021 Digital Transformations of the Public Arena.pdf', 'start_index': 0}
Content: Sehl, A., Simon, F. M., and Schroeder, R. (2020). The Populist Campaigns
against European Public Service Media: Hot Air or Existential Threat?
International Communication Gazette.
Settle, J. E. (2018). Frenemies: How Social Media Polarizes America .
Cambridge: Cambridge University Press.
Shoemaker, P. J., and Reese, S. D. (2014).Mediating the Message in the 21st
Century, 3rd ed. New York: Routledge.
Shoemaker, P. J., and V os, T. P. (2009).Gatekeeping Theory. New York:
Routledge.
Sides, J., Tesler, M., and Vavreck, L. (2018). Identity Crisis: The 2016
Presidential Campaign and the Battle for the Meaning of America .
Princeton, NJ: Princeton University Press.
Skocpol, T., and Tervo, C. (Ed

In [14]:
# add memory for prolongued converstations. For now, saving it in memory is sufficient

from langgraph.checkpoint.memory import MemorySaver

memory = MemorySaver()
graph = graph_builder.compile(checkpointer=memory)

# Specify an ID for the thread
config = {"configurable": {"thread_id": "test_thread"}}

In [16]:
input_message = "What is the difference between the Public Sphere and the Public Arena?"

for step in graph.stream(
    {"messages": [{"role": "user", "content": input_message}]},
    stream_mode="values",
    config=config,
):
    step["messages"][-1].pretty_print()


What is the difference between the Public Sphere and the Public Arena?
Tool Calls:
  retrieve (864dfb90-0f63-4073-a1db-9aa3bc0496f5)
 Call ID: 864dfb90-0f63-4073-a1db-9aa3bc0496f5
  Args:
    query: The difference between the Public Sphere and the Public Arena
Name: retrieve

Source: {'page': 10, 'source': 'test_data\\Jungherr & Schroeder 2021 Digital Transformations of the Public Arena.pdf', 'start_index': 812}
Content: contemporary public arena introduces new actors and new tensions between
states, companies, and publics. A more differentiated view of grouping coun-
tries with shared conditions is needed, as is a more dynamic view of the
continuous competition for inﬂuence among states and their civil societies,
including in various macro-regions (Mann 1986; 2013; Mann & Riley 2006).
Thus we focus less on eternal system-level conditions but instead on the
transformations of the structures that constitute the public arena.
New Media Infrastructures and Their Impact on the Public Aren

In [17]:
input_message = "According to Jungherr & Schröder, what is the role of contestation in the public arena?"
# there should be a way to implement metadata search in a way that would only retrieve the Jungherr & Schröder text chunks

for step in graph.stream(
    {"messages": [{"role": "user", "content": input_message}]},
    stream_mode="values",
    config=config,
):
    step["messages"][-1].pretty_print()


According to Jungherr & Schröder, what is the role of contestation in the public arena?
Tool Calls:
  retrieve (4046fa8b-75c3-46b0-8244-c680c05c7256)
 Call ID: 4046fa8b-75c3-46b0-8244-c680c05c7256
  Args:
    query: role of contestation in public arena according to Jungherr & Schröder
Name: retrieve

Source: {'page': 10, 'source': 'test_data\\Jungherr & Schroeder 2021 Digital Transformations of the Public Arena.pdf', 'start_index': 812}
Content: contemporary public arena introduces new actors and new tensions between
states, companies, and publics. A more differentiated view of grouping coun-
tries with shared conditions is needed, as is a more dynamic view of the
continuous competition for inﬂuence among states and their civil societies,
including in various macro-regions (Mann 1986; 2013; Mann & Riley 2006).
Thus we focus less on eternal system-level conditions but instead on the
transformations of the structures that constitute the public arena.
New Media Infrastructures and Their 

to do:
- structured query for author, year, title, incl. metadata annotation in vector store (check how to pass filters to retriever!)
- set up an agent and see how well it does in handling the tools