In [110]:
import os
from dotenv import load_dotenv
load_dotenv()
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_API_KEY"] = os.getenv("LANGCHAIN_API_KEY")
os.environ["OPENAI_API_KEY"] = os.getenv("API_KEY")
os.environ["TAVILY_API_KEY"] = os.getenv("TAVILY_KEY")
api_key = os.getenv("API_KEY")
token= os.getenv("IUCN_API_KEY")

from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from langchain_community.tools.tavily_search import TavilySearchResults
from langchain_core.output_parsers import StrOutputParser
from langgraph.prebuilt import create_react_agent
from langgraph.checkpoint.memory import MemorySaver
from langsmith import traceable
import bs4
from langchain_community.document_loaders import WebBaseLoader

memory = MemorySaver()
search = TavilySearchResults(max_results=2)

In [111]:
from langchain.document_loaders import PyMuPDFLoader
bs4_strainer = bs4.SoupStrainer(class_=("node node--type-article node--view-mode-full","list-results__classification","list-results__subtitle","species-category -sm -icon species-category--lc"))
pdf_paths = ["report-iucn.pdf","2024-012-En.pdf"]
pdf_docs = []
for pdf_path in pdf_paths:
    pdf_loader = PyMuPDFLoader(pdf_path)
    pdf_docs.extend(pdf_loader.load())

web_loader = WebBaseLoader(
    web_paths=("https://wwf.be/fr/communiques-de-presse/rapport-planete-vivante-du-wwf-les-populations-danimaux-sauvages-connaissent",
               "https://www.iucnredlist.org/search/list?taxonLevel=Amazing&searchType=species"
               ),

    bs_kwargs={"parse_only": bs4_strainer},
)
web_docs = web_loader.load()
all_docs = web_docs + pdf_docs
print(f"Nombre de documents chargés: {len(web_docs)}")

Nombre de documents chargés: 2


In [112]:
len(all_docs[1].page_content)

0

In [113]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000, chunk_overlap=200, add_start_index=True
)
all_splits = text_splitter.split_documents(all_docs)

len(all_splits)
len(all_splits[0].page_content)
all_splits[0].metadata

{'source': 'report-iucn.pdf',
 'file_path': 'report-iucn.pdf',
 'page': 0,
 'total_pages': 24,
 'format': 'PDF 1.7',
 'title': '',
 'author': '',
 'subject': '',
 'keywords': '',
 'creator': 'Adobe InDesign 16.3 (Macintosh)',
 'producer': 'Adobe PDF Library 15.0',
 'creationDate': "D:20210811115201+01'00'",
 'modDate': "D:20210811115230+01'00'",
 'trapped': '',
 'start_index': 0}

In [114]:
from langchain_community.vectorstores import FAISS 
from langchain_openai import OpenAIEmbeddings

if not os.path.exists("faiss_index"):
    vectorstore = FAISS.from_documents(documents=all_splits, embedding=OpenAIEmbeddings(model="text-embedding-3-large"))
    vectorstore.save_local("faiss_index")
else:
    local_index=FAISS.load_local("faiss_index", embeddings=OpenAIEmbeddings(model="text-embedding-3-large"),allow_dangerous_deserialization=True)

# docsearch = FAISS.load_local("faiss_index", embeddings)


In [115]:
retriever = local_index.as_retriever(search_type="similarity", search_kwargs={"k": 6})

retrieved_docs = retriever.invoke("quel est le déclin moyen des populations d'animaux sauvages ?")

len(retrieved_docs)
# print(retrieved_docs[0].page_content)

6

In [123]:
import http.client
import json
from bs4 import BeautifulSoup

def get_species_assessment(genus, species):
    """
    Cette fonction récupère les informations détaillées d'une espèce à partir de l'API IUCN Redlist.
    
    :param genus: Le genre de l'espèce
    :param species: Le nom de l'espèce
    :return: Un dictionnaire contenant les détails de l'espèce ou un message d'erreur
    """
    
    # Connexion à l'API
    conn = http.client.HTTPSConnection("api.iucnredlist.org")
    payload = ''
    headers = {
        'Authorization': token
    }

    # Étape 1 : Récupérer l'assessment_id
    conn.request("GET", f"/api/v4/taxa/scientific_name?genus_name={genus}&species_name={species}", payload, headers)
    res = conn.getresponse()
    data = res.read()
    json_data = data.decode("utf-8")
    parsed_data = json.loads(json_data)

    # Vérifier si l'assessment_id est présent
    if "assessments" in parsed_data and len(parsed_data["assessments"]) > 0:
        # Récupérer l'assessment_id
        assessment_id = parsed_data["assessments"][0]["assessment_id"]
        # print(f"Assessment ID pour {genus} {species} : {assessment_id}")

        # Étape 2 : Utiliser l'assessment_id pour récupérer les détails
        conn.request("GET", f"/api/v4/assessment/{assessment_id}", payload, headers)
        res = conn.getresponse()
        data = res.read()
        json_data = data.decode("utf-8")
        parsed_data = json.loads(json_data)

        # Vérifier que les données sont présentes
        if "taxon" in parsed_data:
            result = parsed_data["taxon"]
            racine = parsed_data
            species_info = {
                "assessment_id" : assessment_id,
                "Nom scientifique": result.get("scientific_name", "N/A"),
                "Ordre": result.get("order_name", "N/A"),
                "Classe": result.get("class_name", "N/A"),
                "Royaume": result.get("kingdom_name", "N/A"),
                "Niveau de danger": racine['red_list_category']['description'].get('en', "N/A"),
                "Tendance de la population": racine['population_trend']['description'].get('en', "N/A")
            }

            # Nettoyage des habitats et menaces
            habitats = racine['documentation'].get('habitats', "")
            threats = racine['documentation'].get('threats', "")
            
            # Nettoyer le HTML des habitats et des menaces
            cleaned_habitats = BeautifulSoup(habitats, "html.parser").get_text()
            cleaned_threats = BeautifulSoup(threats, "html.parser").get_text()

            species_info["Habitat"] = cleaned_habitats
            species_info["Menaces"] = cleaned_threats

            return species_info

    return {"message": "Aucune donnée trouvée pour cet assessment ID."}


In [117]:
from langchain.agents import tool
from langchain.agents import Tool

@tool
def iucn_redlist_tool(genus: str, species: str):
    """
    Outil LangChain qui utilise l'API IUCN Redlist pour obtenir des informations sur une espèce.
    
    :param genus: Le genre de l'espèce (ex. Panthera)
    :param species: Le nom de l'espèce (ex. leo)
    :return: Un dictionnaire contenant les informations sur l'espèce
    """
    return get_species_assessment(genus, species)

redlist = iucn_redlist_tool


In [118]:

tools = [search,redlist]

model = ChatOpenAI(api_key=api_key, model="gpt-3.5-turbo")
# model_with_tools = model.bind_tools(tools)
agent_executor = create_react_agent(model, tools,checkpointer=memory)
config = {"configurable": {"thread_id": "abc120"}}

In [124]:
from langchain_core.messages import HumanMessage, SystemMessage
import textwrap
from langchain_core.runnables import RunnablePassthrough
from langchain import hub
from langchain_core.prompts import PromptTemplate
load_dotenv()

Q = "quel est le déclin moyen des populations d'animaux sauvages ?"

template = """Use the following pieces of context and the tools at your disposition to answer the question at the end.
If you don't know the answer, just say that you don't know.
Use five sentences maximum and keep the answer as concise as possible.
{context}"""
# prompt = hub.pull("rlm/rag-prompt")
# custom_rag_prompt = PromptTemplate.from_template(template)
prompt_template = ChatPromptTemplate.from_messages(
    [("system", template), ("user", "{question}" )]
)

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)


chain =  {"context": retriever | format_docs, "question": RunnablePassthrough()} | prompt_template | agent_executor 

Input = input("avez-vous des Questions ?")
@traceable
def process(user_input: str):
    response = chain.invoke(user_input,config)
    final_content = response["messages"][-1].content
    answer= textwrap.fill(final_content,width=100)
    print(answer)
    return response['messages'] 

process(Input)

Les noms communs couramment utilisés pour l'espèce Ailurus fulgens sont le "Panda roux" ou le
"Renard de feu".


[SystemMessage(content="Use the following pieces of context and the tools at your disposition to answer the question at the end.\nIf you don't know the answer, just say that you don't know.\nUse five sentences maximum and keep the answer as concise as possible.\n24\n25\nSpecies that are recovering\nSpecies that are declining\nKinabalu Slender Litter Frog \nLeptobrachella arayai\nThis amphibian moved from Vulnerable to \nLeast Concern in 2018. It is known from \nKinabalu National Park and Crocker Range \nNational Park (Trus Madi) in Malaysia. In \n2004, the major threat to this species was \nrapid logging of sub-montane forests for \ntimber. However, industrial logging activities \nat Trus Madi have nearly ceased and are not \noccurring at elevations where this species \noccurs. Both of the national parks are well \nprotected and well-managed, and Trus Madi \nis a High Conservation Value Forest.\nMadagascar's lemurs\nwith 103 of the 107 surviving species \nthreatened with extinction, ma

In [None]:
# import http.client
# import json
# from bs4 import BeautifulSoup

# # Paramètres : Remplacer par votre propre genus, species et assessment_id
# genus = "Ailurus"
# species = "fulgens"
# token = "ct5nfRuiaAzSPBdHWRcn1rFXi2F6stGSz6uD"

# # Étape 1 : Récupérer l'assessment_id
# conn = http.client.HTTPSConnection("api.iucnredlist.org")
# payload = ''
# headers = {
#     'Authorization': token
# }

# # Faire une requête pour obtenir l'assessment_id
# conn.request("GET", f"/api/v4/taxa/scientific_name?genus_name={genus}&species_name={species}", payload, headers)
# res = conn.getresponse()
# data = res.read()
# json_data = data.decode("utf-8")
# parsed_data = json.loads(json_data)

# # Récupérer l'assessment_id
# assessment_id = parsed_data["assessments"][0]["assessment_id"]
# print(f"Assessment ID pour {genus} {species} : {assessment_id}")

# # Étape 2 : Utiliser l'assessment_id pour récupérer les détails
# conn.request("GET", f"/api/v4/assessment/{assessment_id}", payload, headers)
# res = conn.getresponse()
# data = res.read()
# json_data = data.decode("utf-8")
# parsed_data = json.loads(json_data)

# # Afficher les détails de l'espèce
# if "taxon" in parsed_data:
#     result = parsed_data["taxon"]
#     racine = parsed_data
#     print(f"Nom scientifique : {result['scientific_name']}")
#     print(f"Ordre : {result['order_name']}")
#     print(f"Classe : {result['class_name']}")
#     print(f"Royaume : {result['kingdom_name']}")
#     print(f"Niveau de danger : {racine['red_list_category']['description']['en']}")
#     print(f"Tendance de la population : {racine['population_trend']['description']['en']}")
#     habitats = racine['documentation']['habitats']
#     soup = BeautifulSoup(habitats, "html.parser")
#     cleaned_habitats = soup.get_text()
#     print(f"Habitat : {cleaned_habitats}")
#     threats = racine['documentation']['threats']
#     soup = BeautifulSoup(threats, "html.parser")
#     cleaned_threats = soup.get_text()
#     print(f"Menaces : {cleaned_threats}")
# else:
#     print("Aucune donnée trouvée pour cet assessment ID.")


Assessment ID pour Ailurus fulgens : 110023718
Nom scientifique : Ailurus fulgens
Ordre : CARNIVORA
Classe : MAMMALIA
Royaume : ANIMALIA
Niveau de danger : Endangered
Tendance de la population : Decreasing
Habitat : Red Panda is closely associated with montane forests (oak mixed; mixed broad-leaf conifer; and conifer) with dense bamboo-thicket understorey (Roberts and Gittleman 1984). Conifer/fir forests seem to be preferred (Yonzon and Hunter 1991). Habitats above the tree-line are probably not consistently occupied given that Red Panda is essentially arboreal (Choudhury 2001). A dead Red Panda at 4,325 m asl in Arunachal Pradesh, in an area where the species is not generally known and far from any typical Red Panda habitat, was presumably a dispersant (Dorjee et al. 2014).  Six studies reported that Red Panda prefers to live near (typically within 100–200 m of) water (e.g., Pradhan et al. 2001). Five indicated a preference for canopy cover above 30%, with some suggesting cover as hig