# Testing Prompt

This notebook is for testing and improvement the system prompt for a better performance of the RAG assitant.
It is used the vectorstore previously generated.

Some strategies are:
- Mentioning to perform calculations with the retrieved information to answer the answer more accurately in the prompt.
- Another strategy would be Few-shots prompting, including some examples of question-answer in the prompt to guide the model to prepare the answers.
- A final strategy not tested would be Chain-of-Thougts, guiding the model in the process of preparing the answer.

In [3]:
from langchain.text_splitter import CharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain.chains.question_answering import load_qa_chain
from langchain.vectorstores import Chroma
from langchain_community.document_loaders import PyMuPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
import os

from langchain.chains import RetrievalQA
from langchain_openai import ChatOpenAI


In [6]:
# Embeddings model and LLM
embedding=OpenAIEmbeddings(api_key=os.getenv("OPENAI_API_KEY"))
llm = ChatOpenAI(model="gpt-3.5-turbo-0125", temperature=0.2, api_key=os.getenv("OPENAI_API_KEY"))

In [3]:
# RAG prompt that works finding the correct information requested, even if it does not perform calculations with the information.
# It pulls the prompt from langchain-hub
from langchain import hub
prompt = hub.pull("rlm/rag-prompt")

In [17]:
# Printing prompt to reproduce to improve the langchain-hub prompt 
prompt

ChatPromptTemplate(input_variables=['context', 'question'], metadata={'lc_hub_owner': 'rlm', 'lc_hub_repo': 'rag-prompt', 'lc_hub_commit_hash': '50442af133e61576e74536c6556cefe1fac147cad032f4377b60c436e6cdcb6e'}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], template="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: {question} \nContext: {context} \nAnswer:"))])

In [7]:
# Reproduction of the prompt, including asking for calculations
from langchain.prompts import ChatPromptTemplate
from langchain.prompts import HumanMessagePromptTemplate
from langchain.prompts import PromptTemplate

template = """
You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, 
just say that you don't know. Use three sentences maximum and keep the answer concise. Make the proper calculations when it asked the result of 
operations with data from the context.\nQuestion: {question} \nContext: {context} \nAnswer:"""

prompt_pre = PromptTemplate(input_variables=['context', 'question'], template=template)
prompt = ChatPromptTemplate(input_variables=['context', 'question'], messages=[HumanMessagePromptTemplate(prompt=prompt_pre)])                                                                                                                                                                                                                                                                  
                                                                                                                                                                                                

In [8]:
# Load the previously created Chroma index from the "chroma_db" directory
vectorstore = Chroma(persist_directory="./chroma_db", embedding_function=embedding)

# RetrievalQA
qa_chain = RetrievalQA.from_chain_type(
    llm,
    retriever=vectorstore.as_retriever(),
    chain_type_kwargs={"prompt": prompt}
)

In [9]:
# Question and invoking the chain for quering
question = "¿Puedes indicarme el coste de recibir una transferencia de 5.000€ en Yenes?"
result = qa_chain.invoke({"query": question})
result

{'query': '¿Puedes indicarme el coste de recibir una transferencia de 5.000€ en Yenes?',
 'result': 'No puedo indicar el coste de recibir una transferencia de 5.000€ en Yenes basándome en la información proporcionada.'}

In [10]:
question_2 = "¿Noruega tiene convenio con la UE?"
result_2 = qa_chain.invoke({"query": question_2})
result_2

{'query': '¿Noruega tiene convenio con la UE?',
 'result': 'No, Noruega no tiene convenio con la Unión Europea.'}

### The assistant is not answering correctly

## Few-shot prompting (still in development)

It is not working, needs a little bit more development

In [11]:
from langchain_core.prompts import PromptTemplate
from langchain_core.prompts import FewShotPromptTemplate

examples = [
    {
       "Pregunta": "¿Puedes calcularme las comisiones para una transferencia fuera de la Zona €,de 10.000€ con la comisiones a cargo del socio?",
       "Respuesta": "total: 60€",
    },
    {
        "Pregunta": "¿Puedes indicarme el coste de recibir una transferencia de 5.000€ en Yenes?",
        "Respuesta": "12,50€",
    },
    {
        "Pregunta": "¿Qué exposición tiene el fondo de inversión, CI Environment ISR en RV?", 
        "Respuesta": "50-75%, resto RF"
    },
]


In [34]:
# RAG prompt that works finding the correct information requested, even if it does not perform calculations with the information.
# It pulls the prompt from langchain-hub
# Reproduction of the prompt, including asking for calculations
from langchain.prompts import ChatPromptTemplate
from langchain.prompts import HumanMessagePromptTemplate
from langchain.prompts import PromptTemplate

_prompt_1 = """
You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, 
just say that you don't know. Use three sentences maximum and keep the answer concise. Make the proper calculations when it asked the result of 
operations with data from the context."""

_PROMPT_SUFFIX_1 = """
Question: {question}
Context: {context}
Answer:

"""

#prompt_pre = PromptTemplate(input_variables=['context', 'question'], template=template)
#prompt = ChatPromptTemplate(input_variables=['context', 'question'], messages=[HumanMessagePromptTemplate(prompt=prompt_pre)])    

example_prompt = PromptTemplate.from_template("User input: {question}\nContext: {context}")
_PROMPT = FewShotPromptTemplate(
    examples=examples,
    example_prompt=example_prompt,
    prefix= _prompt_1,
    suffix= _PROMPT_SUFFIX_1,
    input_variables=["question", "context"],
    )

In [35]:
# Load the previously created Chroma index from the "chroma_db" directory
vectorstore = Chroma(persist_directory="./chroma_db", embedding_function=embedding)

# RetrievalQA
qa_chain_2 = RetrievalQA.from_chain_type(
    llm,
    retriever=vectorstore.as_retriever(),
    chain_type_kwargs={"prompt": _PROMPT}
)

In [36]:
# Question and invoking the chain for quering
question = "¿Puedes indicarme el coste de recibir una transferencia de 5.000€ en Yenes?"
result_3 = qa_chain_2.invoke({"query": question})
result_3

KeyError: 'context'