# LLM application with langchain

## Simple chat model

In [5]:
import getpass
import os

os.environ["LANGSMITH_TRACING"] = "true"
os.environ["LANGSMITH_API_KEY"] = getpass.getpass()

In [2]:
%pip install -qU "langchain[google-vertexai]"

Note: you may need to restart the kernel to use updated packages.


In [None]:
# Ensure your VertexAI credentials are configured

from langchain.chat_models import init_chat_model

model = init_chat_model("gemini-2.0-flash-001", model_provider="google_vertexai")

In [None]:
# ollama old base llm
from langchain_ollama.llms import OllamaLLM

# Instantiate the Ollama class directly.
model = OllamaLLM(model="llama3.2")

In [None]:
# ollama chat model
from langchain_ollama.chat_models import ChatOllama
model = ChatOllama(
    model="llama3.2",
    temperature=0,    
)

In [11]:
model

OllamaLLM(model='llama3.2')

In [13]:
from langchain_core.messages import HumanMessage, SystemMessage

messages = [
    SystemMessage(""),
    HumanMessage("I want to know the version of your model"),
]

model.invoke(messages)

'My knowledge cutoff is currently December 2023, but I was trained on a much larger dataset that includes information up to 2022.'

## Prompt template

In [None]:
from langchain_core.prompts import ChatPromptTemplate
# A template for RAG
system_template = (
 "You are an assistant responsible for answering questions. "
 "Please think carefully about the question and then perform a search. "
 "Use the following retrieved context to answer the question: "
 "Context: {context} "
 "Question: {input} "
 "If you don't know the answer, say you don't know; do not provide irrelevant answers. "
 "Please answer keep your answers concise."
)

prompt_template = ChatPromptTemplate.from_messages(
    [("system", system_template), ("user", "{input}")]
)

In [12]:
prompt = prompt_template.invoke({"context": "gamechest is a company working on designing a quiz platform.",
                                 "input": "what is gamechest?"})

prompt.to_messages()

[SystemMessage(content="You are an assistant responsible for answering questions. Please think carefully about the question and then perform a search. Use the following retrieved context to answer the question: Context: gamechest is a company working on designing a quiz platform. Question: what is gamechest? If you don't know the answer, say you don't know; do not provide irrelevant answers. Please answer keep your answers concise.", additional_kwargs={}, response_metadata={}),
 HumanMessage(content='what is gamechest?', additional_kwargs={}, response_metadata={})]

In [13]:
response = model.invoke(prompt)
print(response.content)

GameChest is a company that designs and develops a quiz platform.


## Semantic search

### Document loader

In [None]:
from langchain_community.document_loaders import PyPDFLoader

file_path = "./nke-10k-2023.pdf"
loader = PyPDFLoader(file_path)

docs = loader.load()
# PyPDFLoader loads one Document object per PDF page. For each, we can easily access:
# The string content of the page;
# Metadata containing the file name and page number.
print(len(docs))

107


In [16]:
print(f"{docs[0].page_content[:200]}\n")
print(docs[0].metadata)

Table of Contents
UNITED STATES
SECURITIES AND EXCHANGE COMMISSION
Washington, D.C. 20549
FORM 10-K
(Mark One)
☑  ANNUAL REPORT PURSUANT TO SECTION 13 OR 15(D) OF THE SECURITIES EXCHANGE ACT OF 1934
F

{'producer': 'EDGRpdf Service w/ EO.Pdf 22.0.40.0', 'creator': 'EDGAR Filing HTML Converter', 'creationdate': '2023-07-20T16:22:00-04:00', 'title': '0000320187-23-000039', 'author': 'EDGAR Online, a division of Donnelley Financial Solutions', 'subject': 'Form 10-K filed on 2023-07-20 for the period ending 2023-05-31', 'keywords': '0000320187-23-000039; ; 10-K', 'moddate': '2023-07-20T16:22:08-04:00', 'source': './nke-10k-2023.pdf', 'total_pages': 107, 'page': 0, 'page_label': '1'}


### Text splitter

In [17]:
# Split our documents into chunks of 1000 characters with 200 characters of overlap between chunks
# The overlap helps mitigate the possibility of separating a statement from important context related to it
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000, chunk_overlap=200, add_start_index=True
)
all_splits = text_splitter.split_documents(docs)

len(all_splits)

516

### Embeddings into vectors

In [None]:
from langchain_ollama import OllamaEmbeddings

# Need to first pull the model from the Ollama API. llama3.2 is around 2GB.
embeddings = OllamaEmbeddings(model="llama3.2")

In [29]:
vector_1 = embeddings.embed_query(all_splits[0].page_content)
vector_2 = embeddings.embed_query(all_splits[1].page_content)

assert len(vector_1) == len(vector_2)
print(f"Generated vectors of length {len(vector_1)}\n")
print(vector_1[:10])

Generated vectors of length 3072

[0.028497243, -0.013744897, -0.019749938, -0.030489199, 0.010665384, -0.027153589, -0.01878525, -0.011177818, -0.0024496312, -0.0041826717]


### Vector stores

In [30]:
from langchain_core.vectorstores import InMemoryVectorStore

vector_store = InMemoryVectorStore(embeddings)

In [31]:
ids = vector_store.add_documents(documents=all_splits)

In [35]:
results = await vector_store.asimilarity_search("When was Nike incorporated?")

print(results[0])

page_content='Table of Contents
INTERNATIONAL MARKETS
For fiscal 2023, non-U.S. NIKE Brand and Converse sales accounted for approximately 57% of total revenues, compared to 60% and 61% for fiscal 2022 and fiscal 2021,
respectively. We sell our products to retail accounts through our own NIKE Direct operations and through a mix of independent distributors, licensees and sales
representatives around the world. We sell to thousands of retail accounts and ship products from 67 distribution centers outside of the United States. Refer to Item 2.
Properties for further information on distribution facilities outside of the United States. During fiscal 2023, NIKE's three largest customers outside of the United States
accounted for approximately 14% of total non-U.S. sales.
In addition to NIKE-owned and Converse-owned digital commerce platforms in over 40 countries, our NIKE Direct and Converse direct to consumer businesses operate
the following number of retail stores outside the United States:

### Retrievers

In [36]:
from typing import List

from langchain_core.documents import Document
from langchain_core.runnables import chain


@chain
def retriever(query: str) -> List[Document]:
    return vector_store.similarity_search(query, k=1)


retriever.batch(
    [
        "How many distribution centers does Nike have in the US?",
        "When was Nike incorporated?",
    ],
)

[[Document(id='fd506ee3-1c04-4201-ba3b-bb6d724125dc', metadata={'producer': 'EDGRpdf Service w/ EO.Pdf 22.0.40.0', 'creator': 'EDGAR Filing HTML Converter', 'creationdate': '2023-07-20T16:22:00-04:00', 'title': '0000320187-23-000039', 'author': 'EDGAR Online, a division of Donnelley Financial Solutions', 'subject': 'Form 10-K filed on 2023-07-20 for the period ending 2023-05-31', 'keywords': '0000320187-23-000039; ; 10-K', 'moddate': '2023-07-20T16:22:08-04:00', 'source': './nke-10k-2023.pdf', 'total_pages': 107, 'page': 5, 'page_label': '6', 'start_index': 0}, page_content="Table of Contents\nINTERNATIONAL MARKETS\nFor fiscal 2023, non-U.S. NIKE Brand and Converse sales accounted for approximately 57% of total revenues, compared to 60% and 61% for fiscal 2022 and fiscal 2021,\nrespectively. We sell our products to retail accounts through our own NIKE Direct operations and through a mix of independent distributors, licensees and sales\nrepresentatives around the world. We sell to thous

## Retrival Augmented Generation

In [8]:
# Ollama local chat model
from langchain_ollama.chat_models import ChatOllama
model = ChatOllama(model="llama3.2",temperature=0)

In [9]:
from langchain.chat_models import init_chat_model
model = init_chat_model("gemini-2.0-flash-001", model_provider="google_vertexai")

In [2]:
# Embeddings model
from langchain_google_vertexai import VertexAIEmbeddings
embeddings = VertexAIEmbeddings(model="text-embedding-004")

# Vector store
from langchain_core.vectorstores import InMemoryVectorStore
vector_store = InMemoryVectorStore(embeddings)

In [17]:
from langchain_core.prompts import PromptTemplate

template = """You are an assistant for question-answering tasks.
Use the following pieces of retrieved context to answer the question. 
If you don't know the answer, just say that you don't know. 
Use 5 sentences maximum and keep the answer as concise as possible.
Context: {context}
Question: {question}
Answer:"""
custom_rag_prompt = PromptTemplate.from_template(template)

In [10]:
import bs4
from langchain import hub
from langchain_community.document_loaders import WebBaseLoader
from langchain_core.documents import Document
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langgraph.graph import START, StateGraph
from typing_extensions import List, TypedDict

# Load and chunk contents of the blog
loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)
docs = loader.load()

# Splitting documents
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
all_splits = text_splitter.split_documents(docs)

#  Embed the contents of each document split and insert these embeddings into a vector store
_ = vector_store.add_documents(documents=all_splits)

# Define prompt for question-answering
# prompt = hub.pull("rlm/rag-prompt")

# Define state for application
class State(TypedDict):
    question: str
    context: List[Document]
    answer: str


# Define Nodes/application steps
def retrieve(state: State):
    retrieved_docs = vector_store.similarity_search(state["question"])
    return {"context": retrieved_docs}


def generate(state: State):
    docs_content = "\n\n".join(doc.page_content for doc in state["context"])
    messages = custom_rag_prompt.invoke({"question": state["question"], "context": docs_content})    
    response = model.invoke(messages)
    return {"answer": response.content}


# Compile application and test/connect steps
graph_builder = StateGraph(State).add_sequence([retrieve, generate])
graph_builder.add_edge(START, "retrieve")
graph = graph_builder.compile()

In [16]:
result = graph.invoke({"question": "What are limitations of LLM agent?"})
print(f'Context: {result["context"]}\n\n')
print(f'Answer: {result["answer"]}')

Context: [Document(id='f3ffd328-ba0c-4a8c-b1d3-368b2c3b9788', metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}, page_content='}\n]\nChallenges#\nAfter going through key ideas and demos of building LLM-centered agents, I start to see a couple common limitations:'), Document(id='6d944d40-f7aa-4632-9c64-98934509660e', metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}, page_content='}\n]\nChallenges#\nAfter going through key ideas and demos of building LLM-centered agents, I start to see a couple common limitations:'), Document(id='5ac1150b-e944-4d1f-bee7-46e91823ab40', metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}, page_content='Finite context length: The restricted context capacity limits the inclusion of historical information, detailed instructions, API call context, and responses. The design of the system has to work with this limited communication bandwidth, while mechanisms like self-reflection to lea