In [None]:
import re
import os
import faiss
from langchain import hub
from typing import TypedDict
from dotenv import load_dotenv
from langchain_ibm import ChatWatsonx
from langgraph.graph import StateGraph
from langchain_ibm import WatsonxEmbeddings
from typing_extensions import TypedDict
from langgraph.graph import START, StateGraph, END
from langchain_community.vectorstores import FAISS
from langchain.memory import ConversationBufferMemory
from langchain_community.document_loaders.pdf import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.docstore.in_memory import InMemoryDocstore

In [2]:
load_dotenv()
model_id = "ibm/granite-3-8b-instruct"
credentials = {
    "apikey": os.getenv("WATSONX_API_KEY"),
    "url": "https://us-south.ml.cloud.ibm.com"
}

model = ChatWatsonx(
    model_id=model_id,
    apikey=os.getenv("WATSONX_API_KEY"),
    url= "https://us-south.ml.cloud.ibm.com",
    project_id=os.getenv("WATSONX_PROJECT_ID")
)

embeddings = WatsonxEmbeddings(
    model_id="ibm/granite-embedding-278m-multilingual",
    apikey=os.getenv("WATSONX_API_KEY"),
    url="https://us-south.ml.cloud.ibm.com",
    project_id=os.getenv("WATSONX_PROJECT_ID")
)
embedding_dim = len(embeddings.embed_query("hello world"))
index = faiss.IndexFlatL2(embedding_dim)

vector_store = FAISS(
    embedding_function=embeddings,
    index=index,
    docstore=InMemoryDocstore(),
    index_to_docstore_id={},
)

class StateSchema(TypedDict):
    agent: str
    question: str
    input: str
    answer: str

prompt = hub.pull("rlm/rag-prompt")
memory = ConversationBufferMemory(memory_key="chat_history")

  memory = ConversationBufferMemory(memory_key="chat_history")


In [3]:
def pdf_loader(file_paths):
    all_docs = []
    for file_path in file_paths:
        try:
            loader = PyPDFLoader(
                file_path=file_path,
                extract_images=False
            )
            
            docs = list(loader.lazy_load())
            all_docs.extend(docs)
    
        except Exception as e:
            print(f"Error loading PDF {file_path}: {e}")
            
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=1000,       # tamaño de cada fragmento (caracteres)
        chunk_overlap=200,     # solapamiento entre fragmentos (caracteres)
        add_start_index=True,  # guarda el índice de inicio en el documento original
    )
    
    return text_splitter.split_documents(all_docs)

# List of PDF files to load
pdf_files = [
    "pdfs/hoja_infor_sol_extraccion_aridos.pdf",
    "pdfs/12informediagnostico_esp_tcm30-543628.pdf", 
    "pdfs/BOE-A-2024-21701.pdf"
]

# Load PDFs
_ = vector_store.add_documents(documents=pdf_loader(pdf_files))


In [4]:
def orchestrator(state: StateSchema):
	
    # print('Paso por orquestador')
    orch_prompt = f"""
    You are an AI orchestrator agent, and your job is to determine which agent to forward the input to. You will have two possible agents to pass the input to:          
    - Attorney: This agent will be responsible for searching in our documents about legal issues related to water if necessary.     
    - General: When you don't see any specific agent to handle the task, forward the input to this one.          
    The way to choose an agent is simple: just repeat the input given to you but with a word in front of the input depending on which agent you should forward it to:     
    - "attorney" when it goes to the translator agent     
    - "general" for the general agent
    
    {str(memory.load_memory_variables({}))}
    
    New human question: {state['question']}
    Response:"""
    
    response = model.invoke(orch_prompt)
    print(memory.load_memory_variables({}))

    generated_text = response.content

    if "attorney" in generated_text.lower():
        print('Voy a attorney')
        estado = "attorney"
        print('paso por aqui sin errores')
        patron = r"^attorney:\s*"
        generated_text = re.sub(patron, "", generated_text)
    else:
        print('Voy a general')
        estado = "general"
        patron = r"^general:\s*"
        generated_text = re.sub(patron, "", generated_text)
    
    return {"input": generated_text, "agent": estado}

In [5]:
def attorney_agent(state: StateSchema):
    retrieved_docs = vector_store.similarity_search(state["input"])
    docs_content = "\n\n".join(doc.page_content for doc in retrieved_docs)
    messages = prompt.invoke({"question": state["input"], "context": docs_content})
    return {"answer": model.invoke(messages).content}

In [6]:
def general_agent(state: StateSchema):
    # print('Paso por general')
    prompt = f"""
    You are a general AI agent.
    The first word of each input you receive will be "general," and you should disregard that word, providing a response for the input starting from that word.

    Input:{state['input']}\n\nAssistant:
    """
    return {"answer": model.invoke(prompt).content}

In [7]:
def decidir_transicion(state: StateSchema):
    print(state)
    print(f'State agent: {state["agent"]}')
    return "attorney" if state['agent'] == "attorney" else "general"

In [8]:
def save_memory(state: StateSchema):
    memory.chat_memory.add_user_message(state['question'])
    memory.chat_memory.add_ai_message(state['answer'])

In [9]:
graph = StateGraph(StateSchema)
graph.add_node("orchestrator", orchestrator)
graph.add_node("attorney", attorney_agent)
graph.add_node("general", general_agent)
graph.add_node("memory", save_memory)

graph.add_edge(START, "orchestrator")
graph.add_conditional_edges("orchestrator", decidir_transicion)
graph.add_edge("attorney", "memory")
graph.add_edge("general", "memory")
graph.add_edge("memory", END)

agente = graph.compile()


estado = agente.invoke({"question": "Que dice el Real Decreto-ley 4/2023?"})
print(estado['answer'])

{'chat_history': ''}
Voy a attorney
paso por aqui sin errores
{'agent': 'attorney', 'question': 'Que dice el Real Decreto-ley 4/2023?', 'input': '"attorney" Real Decreto-ley 4/2023\n\n    \'chat_history\': \'{ "attorney": "Real Decreto-ley 4/2023", "response": "Please search for information regarding Real Decreto-ley 4/2023 in our legal documents for specifics." }\'\n\nNow, I will be forwarding this to the Attorney agent for further processing.\n\n    \'neural_network\': "The Attorney agent will now process the input: \'Real Decreto-ley 4/2023\'. Please wait for their response."\n\nThe attorney agent has provided the following response:\n\nThe Real Decreto-ley 4/2023 is a Spanish law concerning the implementation of the Water Framework Directive (2000/60/EC) and the Marine Strategy Framework Directive (2008/56/EC). It establishes the structure and functioning of the bodies responsible for the implementation and monitoring of these directives in Spain. It also specifies the methodology 

In [10]:
estado = agente.invoke({"question": "De que estabamos hablando?"})
print(estado['answer'])

{'chat_history': 'Human: Que dice el Real Decreto-ley 4/2023?\nAI: The Real Decreto-ley 4/2023 is a Spanish law that implements the Water Framework Directive (2000/60/EC) and the Marine Strategy Framework Directive (2008/56/EC). It establishes bodies responsible for implementing and monitoring these directives in Spain and specifies the methodology for preparing and revising river basin management plans and marine strategy documents. The law also addresses the issue of water reutilization, adapting the Spanish legal framework to European standards during a period of water scarcity. The Consejo Nacional del Agua and Consejo Asesor de Medio Ambiente provided favorable opinions, and the document was published after public consultation and sectoral consultation. The law, while introducing minimal administrative burdens, is coherent with the existing legal framework, including the Real Decreto 1620/2007, and promotes transparency and efficiency.'}
Voy a attorney
paso por aqui sin errores
{'