In [94]:
import os
from dotenv import load_dotenv, find_dotenv
from pprint import pprint
import openai

In [2]:
os.getcwd()

'/root/work'

In [3]:
os.chdir('/root/projects/PythonProjects/hsbc_task/RAG-with-LLMs')
os.getcwd()

'/root/projects/PythonProjects/hsbc_task/RAG-with-LLMs'

In [4]:
# Load the environment variables.
# load_dotenv(find_dotenv(), override=True)
load_dotenv('.env', override=True)

True

In [5]:
api_type = os.environ.get('OPENAI_API_TYPE')
api_version = os.environ.get('OPENAI_API_VERSION')
api_endpoint = os.environ["AZURE_OPENAI_ENDPOINT"]  # os.environ.get('OPENAI_API_BASE')
api_key = os.environ.get('AZURE_OPENAI_API_KEY')
deployment_name = os.environ.get('DEPLOYMENT')

# 载入文档

In [7]:
from langchain.document_loaders import UnstructuredFileLoader

# loader = UnstructuredFileLoader('./data/2303.18223.pdf')
loader = UnstructuredFileLoader('./data/Mediterranean-diet.pdf')
documents = loader.load()

2023-12-05 08:12:52.069265: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [8]:
# documents

# 数据预处理：文档切分成chunks，编码成embeddings，存储到vectorstore (Chroma)

In [9]:
from langchain.text_splitter import CharacterTextSplitter

text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
texts = text_splitter.split_documents(documents=documents)

In [10]:
# texts

In [11]:
from langchain.embeddings import OpenAIEmbeddings, AzureOpenAIEmbeddings
from langchain.vectorstores import Chroma

# embeddings = OpenAIEmbeddings()
embeddings = AzureOpenAIEmbeddings(
    azure_endpoint=api_endpoint,
    openai_api_key=api_key,
    # azure_deployment=deployment_name, 
    deployment="text-embedding-ada-002",
    openai_api_version=api_version)

# load it into Chroma
doc_search = Chroma.from_documents(texts, embeddings)


In [133]:
from langchain.chains import RetrievalQA
from langchain.chat_models import AzureChatOpenAI

llm = AzureChatOpenAI(
    azure_deployment=deployment_name,
    temperature=0
)
# VectorStoreRetriever initialized from this VectorStore
retriever = doc_search.as_retriever(search_type='similarity', search_kwargs={'k': 3})

chain = RetrievalQA.from_chain_type(llm=llm, chain_type='stuff', retriever=retriever)

# 文档问答任务

## Basic Examples

In [270]:
query = 'What are the effects of Mediterranean diet? List with bullet points.'
print(chain.run(query))

The effects of the Mediterranean diet are:

- Reducing the risk of subsequent cardiovascular events in patients who have had myocardial infarction.
- Reducing the risk of cardiovascular events in patients who are at high risk of cardiovascular disease.
- Lowering the risk or mitigating the severity of type 2 diabetes.
- Reducing the risk of colorectal, breast, and other cancers.
- Managing weight (despite high fat content).
- Increasing compliance due to the tastier and more filling nature of the diet.
- Adaptable in any cuisine.
- Readily available in Australia.
- No specific fat restriction, as long as fat is mostly derived from fatty fish and plant sources (particularly olive oils or nuts).


In [271]:
query = 'What does Mediterranean diet contain?'
print(chain.run(query))
# llm

The Mediterranean diet contains high monounsaturated (e.g. olive oil) to saturated (e.g. fatty red meat) ratio, high intake of legumes, fruits, vegetables, grains, and cereals, moderate quantities of fish, white meat, and low-fat dairy, low to moderate consumption of red wine, and low intake of red meat, processed meat, eggs, sweets, sweet desserts, and sweet drinks.


In [272]:
query = 'How to implement Mediterranean diet?'
print(chain.run(query))

To implement the Mediterranean diet, you can follow these simple guidelines:

1. Assess adherence to a Mediterranean diet using the PREDIMED 14-item Questionnaire.

2. Introduce simple changes to your diet, such as using olive oil instead of other oils/fats for cooking and dressing salads and cooked vegetables.

3. Increase your intake of legumes, fruits, vegetables, grains, and cereals.

4. Consume moderate quantities of fish, white meat, and low-fat dairy.

5. Limit your consumption of red wine, red meat, processed meat, and eggs.

6. Limit your intake of sweets, sweet desserts, and sweet drinks.

7. Follow the suggested meal plan, which includes foods like sourdough bread, chopped tomato, red onion, avocado, poached egg, beans, tuna, salmon, Greek-style natural yogurt, baked fish, and baked potato.

Remember that the Mediterranean diet is not a one-size-fits-all approach, and total energy intake should be adapted to meet individual needs.


In [70]:
query = 'What is the recommended portions for vegetables?'
print(chain.run(query))

According to the Australian Guide to Healthy Eating, the recommended portions for vegetables are 5 serves (2½ cups or 375 g) for women and 6 serves (3 cups or 450 g) for men.


In [71]:
query = 'How much vegetables should we eat per day?'
print(chain.run(query))

According to the Australian Guide to Healthy Eating, women should consume 5 serves (2½ cups or 375 g) of vegetables, legumes, and beans per day, while men should consume 6 serves (3 cups or 450 g) per day. It is also recommended to consume vegetables with every meal, including leafy greens and tomatoes.


## Examples with CoT prompting

Relevant part in the document:

Australian Guide to Healthy Eating recommended portions

Vegetables, legumes and beans - 
Women: 5 serves (2½ cups or 375 g) 
Men: 6 serves (3 cups or 450 g)

In [281]:
from langchain.evaluation import load_evaluator, EvaluatorType

evaluator_qa = load_evaluator(EvaluatorType.QA, llm=llm)

evaluator_cot_qa = load_evaluator(EvaluatorType.COT_QA, llm=llm)

# Load the scoring evaluator
evaluator_lss = load_evaluator(EvaluatorType.LABELED_SCORE_STRING, llm=llm)
# Define the criteria for evaluation
criteria = {
    "accuracy": "Rate the accuracy of the answer on a scale of 1 to 10.",
}


In [282]:
# Provide the question and answer as input to the evaluator
question = 'How much grams of vegetables should a married couple eat per week?'
answer1 = chain.run(query)
reference = "5775 g of vegetables per week." # (375 + 450) * 7

print(answer1, '\n', '--' * 20, )

# evaluator_qa.evaluate_strings(
#     prediction=answer1,
#     input=question,
#     reference=reference,
# )

# Evaluate the answer
evaluation_result = evaluator_lss.evaluate_strings(
    prediction=answer1,
    input=question,
    criteria=criteria,
    reference=reference
)
pprint(evaluation_result)

The recommended amount of fruit for women is 2 medium pieces or 300 g, and for men, it is 2 medium pieces or 300 g. Therefore, for a family consisting of 5 males and 6 females, the recommended amount of fruit would be 33 pieces or 4.95 kg (11 lbs) of fruit per day. 
 ----------------------------------------
{'reasoning': "Explanation: The AI assistant's response is not relevant to the "
              "user's question and does not provide any helpful or accurate "
              'information about the recommended amount of vegetables for a '
              'married couple. The response only talks about the recommended '
              'amount of fruit for men and women, which is not related to the '
              'question. \n'
              '\n'
              'Rating: [[1]]',
 'score': 1}


In [283]:
# Zero-shot-CoT

answer2 = chain.run(question + "Let's think step by step.")
print(answer2, '\n', '--' * 20)

evaluation_result = evaluator_cot_qa.evaluate_strings(
    prediction=answer2,
    input=question,
    reference=reference,
)
pprint(evaluation_result)

# Evaluate the answer
evaluation_result = evaluator_lss.evaluate_strings(
    prediction=answer2,
    input=question,
    criteria=criteria,
    reference=reference
)
pprint(evaluation_result)

To answer this question, we need to look at the information provided in the context. The Australian Guide to Healthy Eating recommends that women consume 5 serves (2½ cups or 375 g) of vegetables, legumes, and beans per day, while men consume 6 serves (3 cups or 450 g) per day. 

Assuming a married couple consists of one man and one woman, they should aim to consume a total of 11 serves (5½ cups or 825 g) of vegetables, legumes, and beans per day. Over the course of a week, this would amount to 77 serves (38.5 cups or 5.4 kg) of vegetables, legumes, and beans for the couple. 

It's important to note that this is just a general recommendation and individual needs may vary based on factors such as age, gender, and activity level. It's always a good idea to consult with a healthcare professional or registered dietitian for personalized nutrition advice. 
 ----------------------------------------
{'reasoning': 'The question asks for the amount of grams of vegetables a '
              'marr

Relevant part in the document:

The Mediterranean diet comprises:
high monounsaturated (eg olive oil) to saturated (eg fatty red meat) ratio – at least 2:1
...

In [137]:
question = 'If a person consumes 6 g of monounsaturated fat daily, at most how much saturated he or she should take for 4 days?'
answer1 = chain.run(question)
reference = "The person can consume at most 12 g of saturated fat over 4 days."

print(answer1, '\n', '--' * 20)

# Evaluate the answer
evaluation_result = evaluator_lss.evaluate_strings(
    prediction=answer1,
    input=question,
    criteria=criteria,
    reference=reference
)
pprint(evaluation_result)

The context does not provide enough information to answer this question. The text mentions the recommended ratio of monounsaturated to saturated fat in the Mediterranean diet, but it does not provide specific information about how much saturated fat a person should consume based on their intake of monounsaturated fat. 
 ----------------------------------------
{'reasoning': 'Explanation: The AI assistant correctly identifies that the '
              'given context does not provide enough information to answer the '
              'question. It also explains why it cannot provide a specific '
              'answer. However, it does not provide any further insights or '
              'suggestions to help the user find the answer elsewhere.\n'
              '\n'
              'Rating: [[7]]',
 'score': 7}


In [138]:
answer2 = chain.run(question + "Let's think step by step.")
print(answer2, '\n', '--' * 20)

# evaluator_cot_qa.evaluate_strings(
#     prediction=answer2,
#     input=question,
#     reference=reference,
# )

# Evaluate the answer
evaluation_result = evaluator_lss.evaluate_strings(
    prediction=answer2,
    input=question,
    criteria=criteria,
    reference=reference
)
pprint(evaluation_result)

According to the Mediterranean diet, the ratio of monounsaturated to saturated fat should be at least 2:1. This means that for every 2 grams of monounsaturated fat, a person can consume 1 gram of saturated fat. 

If a person consumes 6 grams of monounsaturated fat daily, then they can consume up to 3 grams of saturated fat daily (since the ratio is 2:1). 

For 4 days, the maximum amount of saturated fat they can consume would be:

3 grams/day x 4 days = 12 grams of saturated fat over 4 days. 
 ----------------------------------------
{'reasoning': 'This response is helpful, relevant, correct, and demonstrates '
              'depth of thought. The assistant provides a clear explanation of '
              'the ratio of monounsaturated to saturated fat in the '
              'Mediterranean diet and how it applies to the given scenario. '
              'The calculation provided is accurate and factual. Overall, the '
              'response is well-structured and informative. \n'
        

Relevant part in the document:

Fruit - Women: 2 medium pieces or 300 g; Men: 2 medium pieces or 300 g

In [275]:
question = ' For a family consisting 5 males and 6 females, what is the recommended amount of fruit?'

answer1 = chain.run(question)
reference = "The recommended amount of fruit for the family is 22 pieces or 3.3 kg per day."

print(answer1, '\n', '--' * 20)

# Evaluate the answer
evaluation_result = evaluator_lss.evaluate_strings(
    prediction=answer1,
    input=question,
    criteria=criteria,
    reference=reference
)
pprint(evaluation_result)

The recommended amount of fruit for women is 2 medium pieces or 300 g, and for men, it is 2 medium pieces or 300 g. Therefore, for a family consisting of 5 males and 6 females, the recommended amount of fruit would be 33 pieces or 4.95 kg (11 lbs) of fruit per day. 
 ----------------------------------------
{'reasoning': "Explanation: The AI assistant's response is not correct as it "
              'provides the recommended amount of fruit for men and women '
              'separately, but not for a family consisting of both. The given '
              'information in the ground truth states the recommended amount '
              'of fruit for a family, which is 22 pieces or 3.3 kg per day. '
              "Therefore, the AI assistant's response is not relevant or "
              'correct.\n'
              '\n'
              'Rating: [[2]]',
 'score': 2}


In [276]:
answer2 = chain.run(question + "Let's think step by step.")
print(answer2, '\n', '--' * 20)

# Evaluate the answer
evaluation_result = evaluator_lss.evaluate_strings(
    prediction=answer2,
    input=question,
    criteria=criteria,
    reference=reference
)
pprint(evaluation_result)

According to the table provided, the recommended amount of fruit for women is 2 medium pieces or 300 g, and the same amount is recommended for men. Therefore, for a family consisting of 5 males and 6 females, the recommended amount of fruit would be:

(5 males x 2 medium pieces) + (6 females x 2 medium pieces) = 22 medium pieces or 3.3 kg of fruit. 
 ----------------------------------------
{'reasoning': 'The response is helpful, relevant, correct, and demonstrates '
              'depth of thought. The AI assistant accurately refers to the '
              'table provided and calculates the recommended amount of fruit '
              'for a family consisting of 5 males and 6 females. The response '
              'is also helpful and insightful as it provides a clear '
              'calculation for the recommended amount of fruit. Therefore, I '
              'would rate this response as: "Rating: [[9]]".',
 'score': 9}


## Examples with Query Transformation (Rewrite-Retrieve-Read)
https://arxiv.org/pdf/2305.14283.pdf?ref=blog.langchain.dev


In [259]:
from langchain.evaluation import Criteria


hh_criteria = {
    Criteria.HELPFULNESS: "Is the submission helpful, insightful, and appropriate?",
    Criteria.HARMFULNESS: "Is the submission harmful, offensive, or inappropriate?",
    Criteria.DEPTH: "Does the submission demonstrate depth of thought?",
    Criteria.CREATIVITY: "Does the submission demonstrate novelty or unique ideas?",
    Criteria.DETAIL: "Does the submission demonstrate attention to detail?",
}

evaluator_ss = load_evaluator(EvaluatorType.SCORE_STRING, criteria=hh_criteria, llm=llm)

This chain was only tested with GPT-4. Performance may be significantly worse with other models.


In [260]:
question = 'What drinks are bad / good for MedDiet?'

answer1 = chain.run(question)
print(answer1, '\n', '--' * 20)

# Evaluate the answer
evaluation_result = evaluator_ss.evaluate_strings(
    prediction=answer1,
    input=question,
)
pprint(evaluation_result)

The Mediterranean diet recommends low to moderate consumption of red wine and low intake of sweets, sweet desserts, and sweet drinks. Herbal tea and short black coffee are good options for the Mediterranean diet. Water is also an excellent choice for hydration. 
 ----------------------------------------
{'reasoning': "The AI assistant's response is helpful and appropriate as it "
              "directly answers the user's question about what drinks are good "
              'or bad for the Mediterranean diet. The response demonstrates '
              'attention to detail by specifying the recommended intake of '
              'certain drinks and the avoidance of others. However, the '
              'response lacks depth and creativity as it only provides basic '
              'information without any unique ideas or insights. \n'
              '\n'
              'Rating: [[7]]',
 'score': 7}


In [261]:
# from langchain.prompts import ChatPromptTemplate
# 
# template = """Provide a better search query for \
# search engine to answer the given question, end \
# the queries with '**'. Question: \
# {x} Answer:
# """
# rewrite_prompt = ChatPromptTemplate.from_template(template)

from langchain import hub
rewrite_prompt = hub.pull("langchain-ai/rewrite")

In [262]:
print(rewrite_prompt.template)

Provide a better search query for web search engine to answer the given question, end the queries with ’**’.  Question {x} Answer:


In [263]:
def _parse(text):
    return text.strip("**")

In [264]:
from langchain.schema.output_parser import StrOutputParser

rewriter = rewrite_prompt | llm | StrOutputParser() | _parse

In [265]:
rewriter.invoke({"x": question})

'Which beverages are recommended / not recommended for Mediterranean diet?'

In [266]:
answer2 = chain.run(rewriter.invoke({"x": question}))

print(answer2, '\n', '--' * 20)

# Evaluate the answer
evaluation_result = evaluator_ss.evaluate_strings(
    prediction=answer2,
    input=question,
)
pprint(evaluation_result)

The Mediterranean diet recommends a low to moderate consumption of red wine and a low intake of sweet drinks. However, it is important to note that excessive alcohol consumption is not recommended for overall health. Other recommended beverages include water, herbal tea, and short black coffee. There are no specific beverages that are not recommended as long as they are consumed in moderation and do not contain excessive amounts of added sugars. 
 ----------------------------------------
{'reasoning': "The AI assistant's response is helpful and appropriate as it "
              "provides a clear and concise answer to the user's question. It "
              'also offers additional information on the recommended beverages '
              'for the Mediterranean diet and the importance of moderation. '
              'However, it lacks depth and creativity as it only provides '
              'basic information without any unique ideas or insights. The '
              'response demonstrates 