In [1]:
#Need to run 'ollama run llama3-chatqa' to start the server
#All imports needed for test
from langchain_ollama.llms import OllamaLLM
from langchain_ollama import OllamaEmbeddings
from langchain_core.vectorstores import InMemoryVectorStore
from langchain_community.document_loaders.csv_loader import CSVLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.prompts import PromptTemplate
from langchain_core.documents import Document
from typing_extensions import List, TypedDict
from langgraph.graph import START, StateGraph

In [2]:
#Define Model, Embeddings, and Embeddings Storage
model = OllamaLLM(model="llama3-chatqa")
embeddings = OllamaEmbeddings(model="llama3-chatqa")
vector_store = InMemoryVectorStore(embeddings)

In [3]:
#Load Data into langchain document_loader
loader = CSVLoader("../data/combined_parliament_reports.csv")
parliament_data = loader.load()

In [4]:
#Too many documents to store in model context, so setting up RAG. By first splitting the data into smaller chunks and loading into the vector store
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,  
    chunk_overlap=200,  
    add_start_index=True,  
)
all_splits = text_splitter.split_documents(parliament_data)
document_ids = vector_store.add_documents(documents=all_splits)

In [5]:
#Need to write a custom prompt for the model to generate a summary with an inupt entity and context
def create_entity_summary_prompt() -> str:
    """
    Create a prompt that asks the model to generate a summary of the entity with context to help with response.

    Returns:
    template_prompt_string: generated prompt with  entity and context as input variables
    """

    template_prompt_string = """
    You are a service that allows users to query parliamentary meeting records by an entity that can be a name, topic or anything else the user types.
    You are to generate a answer that will give insight into the entity that the user has entered.
    You will be given context which is from the parliamentary meeting records that are relevant to the entity, use this context to help you .

    The user has entered the entity: '{entity}'
    Below is the context that you can use to generate the answer: '{context}'"""

    return template_prompt_string

entity_summary_prompt = PromptTemplate(input_variables=["entity", "context"], template=create_entity_summary_prompt())


In [6]:
#Create a class for response template
class Response(TypedDict):
    """
    A response object that contains the entity, context and the entity_summary_answer generated by the model.
    
    Variables:
    entity (str):  The entity that the user has entered.
    context (List[Document]): The context that the model can use to generate the answer.
    entity_summary_answer(str): The answer generated by the model.
    """
    entity: str
    context: List[Document]
    entity_summary_answer: str

In [7]:
#Create two methods, one to retrieve the context (RAG) and the other to generate the answer
def retrieve(response: Response, k: int = 15) -> dict:
    """
    Retrieve the context (RAG) for the entity that the user has entered.

    Parameters:
    response (Response): Response object which contains the entity and context + answer both of which are empty

    Returns:
    response: Response object which contains the entity, context  and only answer as empty
    """
    retrieved_docs = vector_store.similarity_search(response["entity"],k=k)
    return {"context": retrieved_docs}


def generate(response: Response) -> dict:
    """
    Generate an answer based on the retrieved context for the entity that the user has entered.

    Parameters:
    response (Response): Response object which contains the entity, context, and an empty answer.

    Returns:
    dict: A dictionary containing the generated answer.
    """
    docs_content = "\n\n".join(doc.page_content for doc in response["context"])
    messages = entity_summary_prompt.invoke({"entity": response["entity"], "context": docs_content})
    return {"entity_summary_answer": model.invoke(messages)}

In [8]:
#Create a graph to connect the two methods, using lang graph for async and batch calls, along with better intergation with langchain
graph_builder = StateGraph(Response).add_sequence([retrieve, generate])
graph_builder.add_edge(START, "retrieve")
graph = graph_builder.compile()

In [9]:
#Call graph and print the result
result = graph.invoke({"entity": "Convener"})

print(result['entity'])
print(result['context'])
print(result['entity_summary_answer'])

Convener
Convener is the person who moderates parliamentary meetings and ensures that they run smoothly and efficiently.
