In [2]:
import os
from dotenv import load_dotenv
load_dotenv()
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_API_KEY"] = os.getenv("LANGCHAIN_API_KEY")
os.environ["OPENAI_API_KEY"] = os.getenv("API_KEY")
os.environ["TAVILY_API_KEY"] = os.getenv("TAVILY_KEY")
api_key = os.getenv("API_KEY")

from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from langchain_community.tools.tavily_search import TavilySearchResults
from langchain_core.output_parsers import StrOutputParser
from langgraph.prebuilt import create_react_agent
from langgraph.checkpoint.memory import MemorySaver
from langsmith import traceable
import bs4
from langchain_community.document_loaders import WebBaseLoader

memory = MemorySaver()
search = TavilySearchResults(max_results=2)
# print(search_results)
# If we want, we can create other tools.
# Once we have all the tools we want, we can put them in a list that we will reference later.
tools = [search]

model = ChatOpenAI(api_key=api_key, model="gpt-3.5-turbo")
# model_with_tools = model.bind_tools(tools)
agent_executor = create_react_agent(model, tools,checkpointer=memory)
config = {"configurable": {"thread_id": "abc120"}}

USER_AGENT environment variable not set, consider setting it to identify your requests.


In [6]:
from langchain.document_loaders import PyMuPDFLoader
bs4_strainer = bs4.SoupStrainer(class_=("node node--type-article node--view-mode-full","list-results__classification","list-results__subtitle","species-category -sm -icon species-category--lc"))
pdf_paths = ["report-iucn.pdf","2024-012-En.pdf"]
pdf_docs = []
for pdf_path in pdf_paths:
    pdf_loader = PyMuPDFLoader(pdf_path)
    pdf_docs.extend(pdf_loader.load())

web_loader = WebBaseLoader(
    web_paths=("https://wwf.be/fr/communiques-de-presse/rapport-planete-vivante-du-wwf-les-populations-danimaux-sauvages-connaissent",
               "https://www.iucnredlist.org/search/list?taxonLevel=Amazing&searchType=species"
               ),

    bs_kwargs={"parse_only": bs4_strainer},
)
web_docs = web_loader.load()
all_docs = web_docs + pdf_docs
print(f"Nombre de documents chargés: {len(web_docs)}")

Nombre de documents chargés: 2


In [8]:
len(all_docs[1].page_content)

0

In [9]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000, chunk_overlap=200, add_start_index=True
)
all_splits = text_splitter.split_documents(all_docs)

len(all_splits)
len(all_splits[0].page_content)
all_splits[0].metadata

{'source': 'report-iucn.pdf',
 'file_path': 'report-iucn.pdf',
 'page': 0,
 'total_pages': 24,
 'format': 'PDF 1.7',
 'title': '',
 'author': '',
 'subject': '',
 'keywords': '',
 'creator': 'Adobe InDesign 16.3 (Macintosh)',
 'producer': 'Adobe PDF Library 15.0',
 'creationDate': "D:20210811115201+01'00'",
 'modDate': "D:20210811115230+01'00'",
 'trapped': '',
 'start_index': 0}

In [None]:
from langchain_community.vectorstores import FAISS 
from langchain_openai import OpenAIEmbeddings

if not os.path.exists("faiss_index"):
    vectorstore = FAISS.from_documents(documents=all_splits, embedding=OpenAIEmbeddings(model="text-embedding-3-large"))
    vectorstore.save_local("faiss_index")
else:
    local_index=FAISS.load_local("faiss_index", embeddings=OpenAIEmbeddings(model="text-embedding-3-large"),allow_dangerous_deserialization=True)

# docsearch = FAISS.load_local("faiss_index", embeddings)


In [17]:
retriever = local_index.as_retriever(search_type="similarity", search_kwargs={"k": 6})

retrieved_docs = retriever.invoke("quel est le déclin moyen des populations d'animaux sauvages ?")

len(retrieved_docs)
# print(retrieved_docs[0].page_content)

6

In [15]:
# from langchain_chroma import Chroma
# from langchain_openai import OpenAIEmbeddings
# sample_splits = all_splits[:100]
# vectorstore = Chroma.from_documents(documents=sample_splits, embedding=OpenAIEmbeddings(model="text-embedding-3-large"))

In [16]:
from langchain_core.messages import HumanMessage, SystemMessage
import textwrap
from langchain_core.runnables import RunnablePassthrough
from langchain import hub
from langchain_core.prompts import PromptTemplate
load_dotenv()

Q = "quel est le déclin moyen des populations d'animaux sauvages ?"

template = """Use the following pieces of context and the tools at your disposition to answer the question at the end.
If you don't know the answer, just say that you don't know.
Use five sentences maximum and keep the answer as concise as possible.
{context}"""
# prompt = hub.pull("rlm/rag-prompt")
# custom_rag_prompt = PromptTemplate.from_template(template)
prompt_template = ChatPromptTemplate.from_messages(
    [("system", template), ("user", "{question}" )]
)

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)


chain =  {"context": retriever | format_docs, "question": RunnablePassthrough()} | prompt_template | agent_executor 

Input = input("avez-vous des Questions ?")
@traceable
def process(user_input: str):
    response = chain.invoke(user_input,config)
    final_content = response["messages"][-1].content
    answer= textwrap.fill(final_content,width=100)
    print(answer)
    return response['messages'] 

process(Input)

Please provide a question or topic that you would like assistance with.


[SystemMessage(content="Use the following pieces of context and the tools at your disposition to answer the question at the end.\nIf you don't know the answer, just say that you don't know.\nUse five sentences maximum and keep the answer as concise as possible.\nRing-tailed Lemur (Lemur catta) ©Zwennie CC BY-SA 2.0\nImage credits: South-Western Black Rhino (Diceros bicornis occidentalis) ©Dave Hamman\nRainbow Eucalyptus (Eucalyptus deglupta) ©Thomas Caldwell\nWater monitor (Varanus salvator) ©Stewart MacDonald\nSclater's guenon (Cercopithecus sclateri) ©Lynne R. Baker\n\n26\n27\nIUCN Red List at Regional and National Levels\nA miniature world \nin decline\nEuropean Red List of Mosses, \nLiverworts and Hornworts\nNick Hodgetts, Marta Cálix, Eve Englefield, Nicholas Fettes, Mariana García Criado, Lea Patin, Ana Nieto, Ariel Bergamini,  Irene \nBisang, Elvira Baisheva, Patrizia Campisi, Annalena Cogoni, Tomas Hallingbäck, Nadya Konstantinova, Neil Lockhart, Marko \nSabovljevic, Norbert Sc