In [3]:
import getpass
import os

os.environ["LANGSMITH_TRACING"] = "true"
os.environ["LANGSMITH_API_KEY"] = getpass.getpass()

In [5]:
from langchain_mistralai import MistralAIEmbeddings, ChatMistralAI
import getpass
import os

if not os.environ.get("MISTRAL_API_KEY"):
  os.environ["MISTRAL_API_KEY"] = getpass.getpass(
      "Enter API key for Mistral AI: ")


llm = ChatMistralAI(model="mistral-large-latest")


embeddings = MistralAIEmbeddings(model="mistral-embed")

  from .autonotebook import tqdm as notebook_tqdm


In [6]:
from langchain_chroma import Chroma

vector_store = Chroma(embedding_function=embeddings)

In [10]:
import bs4
from langchain import hub
from langchain_community.document_loaders import WebBaseLoader
from langchain_core.documents import Document
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langgraph.graph import START, StateGraph
from typing_extensions import List, TypedDict

# Load and chunk contents of the blog
loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)
docs = loader.load()

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000, chunk_overlap=200)
all_splits = text_splitter.split_documents(docs)

# Index chunks
_ = vector_store.add_documents(documents=all_splits)

# Define prompt for question-answering
prompt = hub.pull("rlm/rag-prompt")


# Define state for application
class State(TypedDict):
    question: str
    context: List[Document]
    answer: str


# Define application steps
def retrieve(state: State):
    retrieved_docs = vector_store.similarity_search(state["question"])
    return {"context": retrieved_docs}


def generate(state: State):
    docs_content = "\n\n".join(doc.page_content for doc in state["context"])
    messages = prompt.invoke(
        {"question": state["question"], "context": docs_content})
    response = llm.invoke(messages)
    return {"answer": response.content}


# Compile application and test
graph_builder = StateGraph(State).add_sequence([retrieve, generate])
graph_builder.add_edge(START, "retrieve")
graph = graph_builder.compile()

USER_AGENT environment variable not set, consider setting it to identify your requests.


In [11]:
response = graph.invoke({"question": "What is Task Decomposition?"})
print(response["answer"])

Failed to multipart ingest runs: langsmith.utils.LangSmithAuthError: Authentication failed for https://api.smith.langchain.com/runs/multipart. HTTPError('401 Client Error: Unauthorized for url: https://api.smith.langchain.com/runs/multipart', '{"detail":"Invalid token"}')trace=7c777dfd-e6ed-4e95-81ba-3f6b3e868a61,id=7c777dfd-e6ed-4e95-81ba-3f6b3e868a61; trace=7c777dfd-e6ed-4e95-81ba-3f6b3e868a61,id=cc31a19f-a23d-43f6-9c3b-43d71133bd79; trace=7c777dfd-e6ed-4e95-81ba-3f6b3e868a61,id=4e50fb2b-66fe-48c9-8291-29fa5bab2397; trace=7c777dfd-e6ed-4e95-81ba-3f6b3e868a61,id=9ff20909-b9f1-4fd2-bd22-cbbfcc10be0f; trace=7c777dfd-e6ed-4e95-81ba-3f6b3e868a61,id=79b28c83-534e-4ac3-9b32-82ca021a4940
Failed to multipart ingest runs: langsmith.utils.LangSmithAuthError: Authentication failed for https://api.smith.langchain.com/runs/multipart. HTTPError('401 Client Error: Unauthorized for url: https://api.smith.langchain.com/runs/multipart', '{"detail":"Invalid token"}')trace=7c777dfd-e6ed-4e95-81ba-3f6b3e8

Task Decomposition is a strategy where a complex task is broken down into smaller, more manageable steps. This can be achieved through methods like Chain of Thought prompting, where a model is instructed to "think step by step," or through more structured approaches like Tree of Thoughts, which explores multiple reasoning paths. It can be done by large language models with prompting, using task-specific instructions, or with human inputs.


Failed to multipart ingest runs: langsmith.utils.LangSmithAuthError: Authentication failed for https://api.smith.langchain.com/runs/multipart. HTTPError('401 Client Error: Unauthorized for url: https://api.smith.langchain.com/runs/multipart', '{"detail":"Invalid token"}')trace=7c777dfd-e6ed-4e95-81ba-3f6b3e868a61,id=270b83a8-f0ed-4a69-9c51-851a06d922cf; trace=7c777dfd-e6ed-4e95-81ba-3f6b3e868a61,id=7c777dfd-e6ed-4e95-81ba-3f6b3e868a61; trace=7c777dfd-e6ed-4e95-81ba-3f6b3e868a61,id=0b04f164-d696-4b30-ba15-457550ac231f; trace=7c777dfd-e6ed-4e95-81ba-3f6b3e868a61,id=a2275630-8e0d-40eb-a805-e6c90a15ca7e


In [13]:
from langchain_community.document_loaders import PyPDFLoader

file_path = "./data/Plan_type_mémoire_d_étude.pdf"
loader = PyPDFLoader(file_path)

docs = loader.load()

print(len(docs))

6


In [14]:
print(f"{docs[0].page_content[:200]}\n")
print(docs[0].metadata)

PLAN TYPE D’UN RAPPORT DE MISSION DE FIN D’ETUDES 
EN MASTER OF SCIENCE BIHAR 
Vous trouverez ci -dessous le plan type d ’un rapport de  mission de fin d ’études ( MFE) en 
entreprise, pour un projet 

{'source': './data/Plan_type_mémoire_d_étude.pdf', 'page': 0}


In [None]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000, chunk_overlap=200, add_start_index=True
)
all_splits = text_splitter.split_documents(docs)

len(all_splits)