In [124]:
import os
from dotenv import load_dotenv
load_dotenv()
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_API_KEY"] = os.getenv("LANGCHAIN_API_KEY")
os.environ["OPENAI_API_KEY"] = os.getenv("API_KEY")
os.environ["TAVILY_API_KEY"] = os.getenv("TAVILY_KEY")
api_key = os.getenv("API_KEY")
token= os.getenv("IUCN_API_KEY")

from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from langchain_community.tools.tavily_search import TavilySearchResults
from langgraph.prebuilt import create_react_agent
from langgraph.checkpoint.memory import MemorySaver
from langsmith import traceable
import bs4
from langchain_community.document_loaders import WebBaseLoader
from langchain.agents import tool

In [125]:
from langchain.document_loaders import PyMuPDFLoader
bs4_strainer = bs4.SoupStrainer(class_=("node node--type-article node--view-mode-full",
                                        "list-results__classification","list-results__subtitle",
                                        "species-category -sm -icon species-category--lc"))
web_loader = WebBaseLoader(
    web_paths=("https://wwf.be/fr/communiques-de-presse/rapport-planete-vivante-du-wwf-les-populations-danimaux-sauvages-connaissent",
               ),

    bs_kwargs={"parse_only": bs4_strainer},
)
web_docs = web_loader.load()

pdf_paths = ["report-iucn.pdf","2024-012-En.pdf"]
pdf_docs = []
for pdf_path in pdf_paths:
    pdf_loader = PyMuPDFLoader(pdf_path)
    pdf_docs.extend(pdf_loader.load())

all_docs = web_docs + pdf_docs
# print(f"Nombre de documents chargés: {len(web_docs)}")
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000, chunk_overlap=200, add_start_index=True
)
all_splits = text_splitter.split_documents(all_docs)

len(all_splits)
len(all_splits[200].page_content)
# all_splits[0].metadata
from langchain_community.vectorstores import FAISS 
from langchain_openai import OpenAIEmbeddings

if not os.path.exists("faiss_index"):
    print("folder not found")
    vectorstore = FAISS.from_documents(documents=all_splits, embedding=OpenAIEmbeddings(model="text-embedding-3-large"))
    vectorstore.save_local("faiss_index")
    local_index=FAISS.load_local("faiss_index", embeddings=OpenAIEmbeddings(model="text-embedding-3-large"),allow_dangerous_deserialization=True)
else:
    print("folder found")
    local_index=FAISS.load_local("faiss_index", embeddings=OpenAIEmbeddings(model="text-embedding-3-large"),allow_dangerous_deserialization=True)

retriever = local_index.as_retriever(search_type="similarity", search_kwargs={"k": 6})


# retrieved_docs = retriever.invoke("quel est le déclin moyen des populations d'animaux sauvages ?")retriever = local_index.as_retriever(search_type="similarity", search_kwargs={"k": 6})
retrieved_docs = retriever.invoke("quel est le déclin moyen des populations d'animaux sauvages ?")
print(retrieved_docs)
q = "quel est le déclin moyen des populations d'animaux sauvages ?"
def format_docs(documents):
        return "\n\n".join(doc.page_content for doc in documents)

def rag_formatter(input):
    docs = retriever.invoke(input)
    formatted_docs = format_docs(docs)
    return formatted_docs


@tool
def rag_tool(input: str):
    """
    LangChain tool that utilizes a Retrieval-Augmented Generation (RAG) system to provide
    fact-based answers by retrieving relevant documents from a knowledge base.

    :param input: The user's query or input string.
    :return: A string containing relevant retrieved information that complements the input query.
    """
    return rag_formatter(input)

RAG = rag_tool

folder found
[Document(metadata={'source': 'report-iucn.pdf', 'file_path': 'report-iucn.pdf', 'page': 6, 'total_pages': 24, 'format': 'PDF 1.7', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'creator': 'Adobe InDesign 16.3 (Macintosh)', 'producer': 'Adobe PDF Library 15.0', 'creationDate': "D:20210811115201+01'00'", 'modDate': "D:20210811115230+01'00'", 'trapped': '', 'start_index': 4639}, page_content='whether monitored by natural and social\nscientists or by Indigenous Peoples and local\ncommunities, are declining. These include the\nnumber and population size of wild species,\nthe number of local varieties of domesticated\nspecies, the distinctness of ecological com-\nmunities, and the extent and integrity of\nmany terrestrial and aquatic ecosystems. As\na consequence, nature’s capacity to provide\ncrucial benefits has also declined, including\nenvironmental processes underpinning hu-\nman health and nonmaterial contributions\nto human quality of life. The costs are dis-

In [126]:
import http.client
import json
from bs4 import BeautifulSoup

def get_species_assessment(genus, species):
    
    # API connexion
    conn = http.client.HTTPSConnection("api.iucnredlist.org")
    payload = ''
    headers = {
        'Authorization': token
    }

    # Step 1: Fetching the Assessment ID
    conn.request("GET", f"/api/v4/taxa/scientific_name?genus_name={genus}&species_name={species}", payload, headers)
    res = conn.getresponse()
    data = res.read()
    json_data = data.decode("utf-8")
    parsed_data = json.loads(json_data)

    if "assessments" in parsed_data and len(parsed_data["assessments"]) > 0:
        assessment_id = parsed_data["assessments"][0]["assessment_id"]

        # Étape 2 : Usage of the assessment_id to get the details
        conn.request("GET", f"/api/v4/assessment/{assessment_id}", payload, headers)
        res = conn.getresponse()
        data = res.read()
        json_data = data.decode("utf-8")
        parsed_data = json.loads(json_data)

        if "taxon" in parsed_data:
            result = parsed_data["taxon"]
            racine = parsed_data
            species_info = {
                "assessment_id" : assessment_id,
                "Scientifique name": result.get("scientific_name", "N/A"),
                "Order": result.get("order_name", "N/A"),
                "Class": result.get("class_name", "N/A"),
                "Kingdom": result.get("kingdom_name", "N/A"),
                "Level of danger": racine['red_list_category']['description'].get('en', "N/A"),
                "Population trend": racine['population_trend']['description'].get('en', "N/A")
            }

            habitats = racine['documentation'].get('habitats', "")
            threats = racine['documentation'].get('threats', "")
            
            cleaned_habitats = BeautifulSoup(habitats, "html.parser").get_text()
            cleaned_threats = BeautifulSoup(threats, "html.parser").get_text()

            species_info["Habitat"] = cleaned_habitats
            species_info["Threats"] = cleaned_threats

            return species_info

    return {"message": "No data found for this assessment ID."}


In [127]:
@tool
def iucn_redlist_tool(genus: str, species: str):
    """
    LangChain tool that uses the IUCN Redlist API to retrieve information about a species.
    
    :param genus: The genus of the species (e.g. Panthera)
    :param species: The species name (e.g. leo)
    :return: A dictionary containing information about the species
    """
    return get_species_assessment(genus, species)

redlist = iucn_redlist_tool


In [128]:
# tools and agent initialisation

memory = MemorySaver()
search = TavilySearchResults(max_results=5,search_depth="advanced",include_answer=True,include_raw_content=True,include_images=False)

tools = [search,redlist, RAG]
model = ChatOpenAI(api_key=api_key, model="gpt-3.5-turbo")

agent_executor = create_react_agent(model, tools,checkpointer=memory)

config = {"configurable": {"thread_id": "abc123"}}

In [134]:
import textwrap
from langchain_core.runnables import RunnablePassthrough

load_dotenv()

template = """Use the tools at your disposition to answer the question.
If you don't know the answer, just say that you don't know.
Use a few sentences maximum and keep the answer concise.
"""
prompt_template = ChatPromptTemplate.from_messages(
    [("system", template), ("user", "{question}" )]
)

chain =  { "question": RunnablePassthrough()} | prompt_template | agent_executor 


@traceable
def Ask_AI(user_input: str):
    response = chain.invoke(user_input,config)
    final_content = response["messages"][-1].content
    answer= textwrap.fill(final_content,width=100)
    print(answer)
    return response['messages']


In [136]:
Input = input("Do you have Questions ?")
Ask_AI(Input)

Yes, there are species that are in a recovering process. For example, the Echo Parakeet
(Alexandrinus eques) has shown improvement from Critically Endangered to Vulnerable status due to
successful conservation efforts, including captive breeding programs. Similarly, freshwater fish
species like the Australian Trout Cod (Maccullochella macquariensis) have improved their status from
Endangered to Vulnerable through conservation actions such as reintroductions and translocations.
These examples highlight the positive outcomes of conservation efforts in helping species recover
from endangered states.


[SystemMessage(content="Use the following pieces of context and the tools at your disposition to answer the question at the end.\nIf you don't know the answer, just say that you don't know.\nUse a few sentences maximum and keep the answer concise.\n", additional_kwargs={}, response_metadata={}, id='9efc6afa-0e8d-48e4-a1e8-181ee69d9024'),
 HumanMessage(content='how much are the polar bears endangered', additional_kwargs={}, response_metadata={}, id='19c231dc-c487-47b8-8e8a-312d60930751'),
 AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_yHx7KCPHzxCBbPXKvVYKqnRL', 'function': {'arguments': '{"genus":"Ursus","species":"maritimus"}', 'name': 'iucn_redlist_tool'}, 'type': 'function'}], 'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 27, 'prompt_tokens': 299, 'total_tokens': 326, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'au