## Initialize the local model

In [1]:
from langchain_community.llms import Ollama 

llm = Ollama(model = "mistral")

In [2]:
llm.invoke("Tell me a short joke")

" Why don't scientists trust atoms?\n\nBecause they make up everything!"

## Load data for RAG

In [3]:
from langchain_community.document_loaders import WebBaseLoader 

loader = WebBaseLoader(
    web_path="https://blog.langchain.dev/langgraph/"
)

docs = loader.load()

## Index the data

In [4]:
from langchain_text_splitters import RecursiveCharacterTextSplitter 

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 1000,
    chunk_overlap = 200,
    add_start_index = True 
)

In [5]:
all_splits = text_splitter.split_documents(docs)

In [6]:
from langchain_community import embeddings 

embedding = embeddings.ollama.OllamaEmbeddings(
    model="nomic-embed-text"
)

In [7]:
from langchain_community.vectorstores import Chroma 

vectorstore = Chroma.from_documents(
    documents = all_splits,
    embedding = embedding
)

In [8]:
retriever = vectorstore.as_retriever(
    search_type = "similarity",
    search_kwargs = {"k":6}
)

In [9]:
retriever.get_relevant_documents("What is LangGraph?")

[Document(page_content='"model",\n    should_continue,\n    {\n        "end": END,\n        "continue": "tools"\n    }\n)CompileAfter we define our graph, we can compile it into a runnable! This simply takes the graph definition we\'ve created so far an returns a runnable. This runnable exposes all the same method as LangChain runnables (.invoke, .stream, .astream_log, etc) allowing it to be called in the same manner as a chain.app = graph.compile()Agent ExecutorWe\'ve recreated the canonical LangChain AgentExecutor with LangGraph. This will allow you to use existing LangChain agents, but allow you to more easily modify the internals of the AgentExecutor. The state of this graph by default contains concepts that should be familiar to you if you\'ve used LangChain agents: input, chat_history, intermediate_steps (and agent_outcome to represent the most recent agent outcome)from typing import TypedDict, Annotated, List, Union\nfrom langchain_core.agents import AgentAction, AgentFinish', m

## Create a chain to contextualize

In [10]:
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder 

contextualize_q_system_prompt = """Given a chat history and the latest user question \
which might reference context in the chat history, formulate a standalone question \
which can be understood without the chat history. Do NOT answer the question, \
just reformulate it if needed and otherwise return it as is."""

In [11]:
contextualize_q_prompt= ChatPromptTemplate.from_messages(
    [
        ("system", contextualize_q_system_prompt),
        MessagesPlaceholder(variable_name="chat_history"),
        ("human", "{question}"),
    ]
)

In [12]:
from langchain_core.output_parsers import StrOutputParser

contextualize_q_chain = contextualize_q_prompt | llm | StrOutputParser()

In [13]:
from langchain_core.messages import AIMessage, HumanMessage

contextualize_q_chain.invoke(
    {
        "chat_history":[
            HumanMessage(content="What does LLM stand for?"),
            AIMessage(content="Large language model"),
        ],
        "question": "What is meant by large?",
    }
)

' Question: What do you mean by "large" in the context of a "large language model"?'

## Create a chain for chat history

In [14]:
qa_system_prompt = """You are an assistant for question-answering tasks. \
Use the following pieces of retrieved context to answer the question. \
If you don't know the answer, just say that you don't know. \
Use three sentences maximum and keep the answer concise.\

{context}"""

In [15]:
qa_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", qa_system_prompt),
        MessagesPlaceholder(variable_name="chat_history"),
        ("human", "{question}"),
    ]
)

In [16]:
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

In [17]:
def contextualized_question(input: dict):
    if input.get("chat_history"):
        return contextualize_q_chain
    else:
        return input["question"]

In [18]:
from langchain_core.runnables import RunnablePassthrough

rag_chain = (
    RunnablePassthrough.assign(
        context = contextualized_question | retriever | format_docs
    )
    | qa_prompt 
    | llm
)

## Inference

In [19]:
chat_history = []

question = "What is LangGraph?"
ai_msg = rag_chain.invoke(
    {
        "question": question,
        "chat_history": chat_history
    }
)

In [20]:
ai_msg 

' LangGraph is a module built on top of LangChain, allowing for the creation of cyclical graphs which is useful for agent runtimes. It provides an interface to create state machines specified as graphs and exposes a narrow interface over LangChain.'

In [21]:
chat_history.extend(
    [
        HumanMessage(content=question), ai_msg
    ]
)

In [22]:
second_question = "What is it used for?"

rag_chain.invoke(
    {
        "question": second_question,
        "chat_history": chat_history
    }
)

' LangGraph is a Python module built on top of LangChain, designed to enable the creation of cyclical graphs, which is particularly useful for agent runtimes. It provides an interface to create state machines specified as graphs and exposes a narrow interface over LangChain. LangGraph allows for modifications such as forcing tool calls, adding human-in-the-loop steps, managing agent steps, and returning output in specific formats, among other things.'

Let's connect [YouTube](http://youtube.com/tirendazacademy) | [Medium](http://tirendazacademy.medium.com) | [X](http://x.com/tirendazacademy) | [Linkedin](https://www.linkedin.com/in/tirendaz-academy) 😎