In [1]:
import os
from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv())
mistral_api_key = os.environ["MISTRAL_API_KEY"]

In [2]:
from langchain_mistralai import ChatMistralAI

chatModel = ChatMistralAI(
    mistral_api_key=mistral_api_key,
    model="mistral-large-latest",
    temperature=1,
    max_retries=2,
    # other params...
)

In [3]:
import bs4
from langchain import hub
from langchain_chroma import Chroma
from langchain_community.document_loaders import WebBaseLoader
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings

USER_AGENT environment variable not set, consider setting it to identify your requests.


In [4]:
loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)


In [5]:
docs = loader.load()

In [6]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)

In [7]:
splits = text_splitter.split_documents(docs)

In [8]:
vectorstore = Chroma.from_documents(documents=splits, embedding=HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2"))

  from tqdm.autonotebook import tqdm, trange


In [9]:
retriever = vectorstore.as_retriever()

prompt = hub.pull("rlm/rag-prompt")

### rlm/rag-prompt

https://smith.langchain.com/hub/rlm/rag-prompt?organizationId=1e4ec621-6e4f-4e64-bff1-6a2b0619ab75

Prompt:
    
    human
    
    You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.
    
    Question: {question} 
    
    Context: {context} 
    
    Answer:


In [10]:
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

In [12]:
rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | chatModel
    | StrOutputParser()
)

In [13]:
response = rag_chain.invoke("What is Task Decomposition?")

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


In [14]:
response

'Task decomposition is a strategy where a complex task is broken down into smaller, simpler steps. This is often done by instructing a model to "think step by step," transforming big tasks into multiple manageable ones. It can be achieved through simple prompting, task-specific instructions, or human inputs.'