In [4]:
#Instalar las librarias de uso
!pip install langchain
!pip install cohere
!pip install unstructured
!pip install "unstructured[pdf]"
!pip install qdrant-client



In [5]:
!sudo apt-get install poppler-utils

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
poppler-utils is already the newest version (22.02.0-2ubuntu0.8).
0 upgraded, 0 newly installed, 0 to remove and 35 not upgraded.


In [6]:
# Instalar la extensión de langchain para el chatbot con Cohere

pip install -U langchain-community



In [7]:
#Importar los módulos, particularmente los de pregunta-respuesta de Cohere

from langchain.embeddings.cohere import CohereEmbeddings
from langchain.llms import Cohere
from langchain.prompts import PromptTemplate
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains.question_answering import load_qa_chain
from langchain.chains import RetrievalQA
from langchain.vectorstores import Qdrant
from langchain.document_loaders import TextLoader
from langchain.document_loaders import DirectoryLoader
from langchain.retrievers import TFIDFRetriever
from langchain.memory import ConversationBufferMemory


In [85]:
#Configuración del Prompt (En inglés para mayor precisión). Incluye instrucciones, contexto (Guión), pregunta (usuario) e historial

prompt_template = """

## Instructions
You are an AI personal friendly assistant named Cortana, designed to answer document-based questions about the radar of the IA. You MUST only support Spanish for questions and answers. Your responses should be concise and directly address the specific question.
Answer based solely on the content of the provided documents. Do not generate an answer that is not supported by the documents.
If you cannot find the answer to the user's question in the documents provided, respond by stating that the information is beyond your scope in Spanish. If the user asks for the above, the question is the one received immediately before.

Use the following documents and chat history to answer the question in Spanish:

Question:{question}

Documents: {context}

Chat History: {history}

"""
PROMPT = PromptTemplate(
    template=prompt_template, input_variables=["context", "question", "history"]
)

In [59]:
# Cargar el documento de la carpeta docs (contenido). Se carga el Guión (Guión_HMI)

from langchain_community.document_loaders import PyPDFDirectoryLoader

loader = PyPDFDirectoryLoader("/content/docs")
docs = loader.load()



In [60]:
# Aplicar Split sobre el texto para separarlo en badges

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
texts = text_splitter.split_documents(docs)
len(texts)

140

In [61]:
# Uso de la API KEY de Cohere para el chatbot (no cambiar)

cohere_api_key='1msKL9N3DxmNqmxMCQLQ4CHz8e1dO130v1urBoUI'

# Crear el embedding con el modelo libre "multilingual-22-12" de Cohere para su uso en español

embeddings = CohereEmbeddings(
        model="multilingual-22-12", cohere_api_key=cohere_api_key,user_agent="my-app"
    )

In [62]:
embeddings

CohereEmbeddings(client=<cohere.client.Client object at 0x7f5572333c10>, async_client=<cohere.client.AsyncClient object at 0x7f55722d2950>, model='multilingual-22-12', truncate=None, cohere_api_key='1msKL9N3DxmNqmxMCQLQ4CHz8e1dO130v1urBoUI', max_retries=3, request_timeout=None, user_agent='my-app')

In [63]:
# Creación de la parte de contexto

db = TFIDFRetriever.from_documents(texts)

###Respuestas con Cohere (Parte inicial)

In [83]:
# Creación del chatbot. El modelo usado es "command-r-08-2024" por tener las mejores cualidades en español. Incluye el Prompt creado.

qa = RetrievalQA.from_chain_type(llm=Cohere(model="command-r-08-2024", temperature=0.4,cohere_api_key=cohere_api_key,max_tokens=300),
                                 chain_type="stuff",
                                 retriever=db,
                                 verbose=False,
                                 chain_type_kwargs = {"verbose": False,"prompt": PROMPT,
                                                      "memory": ConversationBufferMemory(
                                                                memory_key="history",
                                                                input_key="question"),})

In [88]:
#Prueba de respuesta

answer = qa.invoke({"query": "¿Cómo estás?"})
print(answer)

{'query': '¿Cómo estás?', 'result': 'Me alegra que te hayas interesado en mi estado. Estoy bien, ¡gracias por preguntar!'}
