# Import Packages

In [1]:
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from transformers import AutoTokenizer, AutoModelForCausalLM

  from .autonotebook import tqdm as notebook_tqdm


# Set parameters

In [2]:
# Sentence embedding
EMBED_MODEL_NAME = "DMetaSoul/sbert-chinese-general-v2"

# LLM for generate answer
TOKENIZER_MODEL_NAME = "bigscience/bloomz-1b7"
LLM_NAME = "bigscience/bloomz-1b7"
tokenizer = AutoTokenizer.from_pretrained(TOKENIZER_MODEL_NAME)
model = AutoModelForCausalLM.from_pretrained(LLM_NAME)

# Function for loading vector DB and generating a retriever

In [3]:
def generate_retriever():
    print("Loading vector DB...")
    # Change to "cpu" or "cuda" if your machine is Windows.
    model_kwargs = {"device": "mps"}
    embedding = HuggingFaceEmbeddings(model_name=EMBED_MODEL_NAME, model_kwargs=model_kwargs)
    
    db = FAISS.load_local("../diabetic_vector_db", embedding, allow_dangerous_deserialization=True)
    print("Done loading vector DB!\n")
    return db.as_retriever(search_type="mmr", search_kwargs={"k": 5})

# Function for searching the related content

In [4]:
def search_related_content(retriever, query):
    docs = retriever.invoke(query)
    related_context = "\n---\n".join([doc.page_content for doc in docs])
    return related_context

# Function for generating the answer

In [5]:
def generate_answer(query, related_context):
    template = f"""
Instruction: 
1. Answer the question based on the below context, you can say "不好意思，我不清楚" if you don't know the answer.
2. Use traditional Chinese to answer the question.
3. Suppose you are a professional nurse who answers questions for diabetics.
---
Context:
{related_context}
---
Question:
{query}
"""

    input_ids = tokenizer.encode(template, return_tensors="pt")
    gen_tokens = model.generate(input_ids, max_length=2048).to("mps")
    qa_result = tokenizer.decode(gen_tokens[0], skip_special_tokens=True)
    return qa_result

# Generate retriever

In [6]:
retriever = generate_retriever()

Loading vector DB...




Done loading vector DB!



# Search related content and generate answer

In [7]:
question = ["水果每天建議攝取量是多少？", "紅豆屬於哪一類食物？"]

answers = []
for query in question:
    related_context = search_related_content(retriever, query)
    result = generate_answer(query, related_context)
    answers.append(result)


In [8]:
for q, a in zip(question,answers):
    print(q, a)

水果每天建議攝取量是多少？ 
Instruction: 
1. Answer the question based on the below context, you can say "不好意思，我不清楚" if you don't know the answer.
2. Use traditional Chinese to answer the question.
3. Suppose you are a professional nurse who answers questions for diabetics.
---
Context:
糖尿病友因怕血糖高，水果都不太敢吃。水果富含豐富維生素及纖維質，提供新陳代謝及促腸蠕動的營養素。建議最好是直接食用原型水果，避免果汁、加工水果及果醬等，因為這些已經去除了纖維容易使血糖快速升高。每天建議攝取量為2份水果。不能依口感覺得甜不甜來判斷吃多少量 。 大家對甜的感覺來自於味覺 ，水果的升糖指數也並非取決甜度及含糖量 。 影響升糖指數的因素 ， 包含澱粉組成 、含醣總量、加工和烹調方式，與膳食纖維質及種類等，無法單從食物決定。所以水果每天建議攝取量為2份，1份水果約像棒球大小的量(女生拳頭大)或是一般的碗切好在碗裡約8分滿。注意份量限制，基本上是可以每天吃水果，亦是維持健康與均衡飲食的元素之一。 紅豆屬於澱粉類也是全穀雜糧類食物。當紅豆煮時會裂開澱粉流出，就算是喝無加糖的紅豆湯也是會影響血糖。一份紅豆約是熟的紅豆放在免洗湯匙3湯匙，若是點心攝取紅豆湯，建議下一餐則要減少澱粉類的攝取
---
第四，善用科技工具，降低記錄難度。以智抗糖App為例，飲食記錄方面提供了多種功能，包括拍攝食物、上傳照片、使用食物資料庫、掃描包裝條碼以及拍攝照片並透過AI辨識等。在食物資料庫功能下，只需輸入關鍵字即可快速查詢到相應的食物選項，並立即記錄相關的營養素，有效節省了打字、查資料和拍照的時間。而透過拍攝照片並透過AI辨識功能，人工智慧能夠自動偵測食物的種類，並搜尋出相關的飲食熱量、醣量等資訊，省去了輸入關鍵字查詢的步驟，大大提升了記錄的效率，減少了記錄的難度。最後是，建立個人清單，讓記錄更快速。記錄飲食一段時間後，會發現某些食物經常重複出現。這時會推薦你將這些常出現的食物，整理成自己常吃的食物選項。同樣是智抗糖App內的功能：你可以將常吃的