In [1]:
import pickle
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from tqdm import tqdm

from multiwoz_utils.dialog_iterator import iterate_dialogues
from multiwoz_utils.data_loader import load_multiwoz

from prompts import (
    STATE_EXTRACTION_PROMPT,
    RESPONSE_GENERATION_PROMPT,
    DOMAIN_RECOGNITION_PROMPT
)

from multiwoz_utils.database import default_database
# results = default_database.query("hotel", {"area": "north", "parking": "yes"})
# print(results)

In [2]:
def process_examples(examples, input_keys, output_keys):
    output = "\n"
    for n, ex in enumerate(examples[-2:]):
        input_str = '\n'.join((f"{key if key != 'full_state' else 'state'}: {ex[key]}" for key in input_keys))
        output_str = '\n'.join((f"{key}: {ex[key]}" for key in output_keys))
        output += "---------------------" + \
                  f"Example {n}:\n" + \
                  f"{input_str}\n" + \
                  f"\n{output_str}\n"
    return output + "\n"

In [4]:
data = load_multiwoz()  # 默认加载 'train' split

# 设置参数
vec_file_path = "multiwoz-context-db.vec"
top_k = 5  # 检索候选数量

# 加载 embedding 模型
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")

# 从本地加载 FAISS 向量库
with open(vec_file_path, "rb") as f:
    vector_store = pickle.load(f)

print("Loaded FAISS vector store from", vec_file_path)

last_dial_id = None
history = []

for it, turn in enumerate(tqdm(iterate_dialogues(data, default_database), desc="Retrieving", unit="turns")):
    dialog_id = turn['dialogue_id']

    if dialog_id != last_dial_id:
        history = []
        last_dial_id = dialog_id

    # 当前历史，不含当前 utterance
    history_text = "\n".join(history)

    # 相似性检索
    query_text = turn['page_content']
    results = vector_store.similarity_search(query_text, k=top_k)

    # 构造 few-shot 示例
    examples = [{
        'context': doc.metadata.get('context', ''),
        'state': doc.metadata.get('state', ''),
        'full_state': doc.metadata.get('full_state', ''),
        'response': doc.metadata.get('response', ''),
        'database': doc.metadata.get('database', ''),
        'domain': doc.metadata.get('domain', '')
    } for doc in results]

    # fewshot_examples = process_examples(
    #     examples, ['context'], ['state']
    # )

    # # ✅ 用对话历史替换原来的 context
    # final_prompt = STATE_EXTRACTION_PROMPT.format(
    #     fewshot_examples,
    #     history_text,
    #     turn['question'].strip()
    # )

    # fewshot_examples = process_examples(
    #     examples, ["context", "full_state", "database"], ["response"]
    # )

    # # ✅ 用对话历史替换原来的 context
    # final_prompt = RESPONSE_GENERATION_PROMPT.format(
    #     fewshot_examples,
    #     history_text,
    #     turn['question'].strip(),
    #     turn['gt_state'],
    #     turn['metadata']['database']
    # )

    history_text_domain = "\n".join(history[-2:])
    # ✅ 用对话历史替换原来的 context
    final_prompt = DOMAIN_RECOGNITION_PROMPT.format(
        history_text_domain,
        turn['question'].strip()
    )

    print(final_prompt)
    print("==" * 50)

    # 更新对话历史
    history.append(f"Customer: {turn['question']}")
    history.append(f"Assistant: {turn['metadata']['response']}")

    if it == 2:
        break

Loaded FAISS vector store from multiwoz-context-db.vec


Retrieving: 2turns [00:00,  6.69turns/s]


Determine which domain is considered in the following dialogue situation.
Choose one domain from this list:
 - restaurant
 - hotel
 - attraction
 - taxi
 - train
Answer with only one word, the selected domain from the list.
You have to always select the closest possible domain.
Consider the last domain mentioned, so focus mainly on the last utterance.

-------------------
Example1:
Customer: I need a cheap place to eat
Assistant: We have several not expensive places available. What food are you interested in?
Customer: Chinese food.

Domain: restaurant

-------

Example 2:
Customer: I also need a hotel in the north.
Assistant: Ok, can I offer you the Molly's place?
Customer: What is the address?

Domain: hotel

---------

Example 3:
Customer: What is the address?
Assistant: It's 123 Northfolk Road.
Customer: That's all. I also need a train from London.

Domain: train

-------------------
Now complete the following example:


Customer: i need a place to dine in the center thats expensi

Retrieving: 4turns [00:00,  8.20turns/s]


Determine which domain is considered in the following dialogue situation.
Choose one domain from this list:
 - restaurant
 - hotel
 - attraction
 - taxi
 - train
Answer with only one word, the selected domain from the list.
You have to always select the closest possible domain.
Consider the last domain mentioned, so focus mainly on the last utterance.

-------------------
Example1:
Customer: I need a cheap place to eat
Assistant: We have several not expensive places available. What food are you interested in?
Customer: Chinese food.

Domain: restaurant

-------

Example 2:
Customer: I also need a hotel in the north.
Assistant: Ok, can I offer you the Molly's place?
Customer: What is the address?

Domain: hotel

---------

Example 3:
Customer: What is the address?
Assistant: It's 123 Northfolk Road.
Customer: That's all. I also need a train from London.

Domain: train

-------------------
Now complete the following example:

Customer: i need a place to dine in the center thats expensiv

Retrieving: 5turns [00:00,  7.33turns/s]


Determine which domain is considered in the following dialogue situation.
Choose one domain from this list:
 - restaurant
 - hotel
 - attraction
 - taxi
 - train
Answer with only one word, the selected domain from the list.
You have to always select the closest possible domain.
Consider the last domain mentioned, so focus mainly on the last utterance.

-------------------
Example1:
Customer: I need a cheap place to eat
Assistant: We have several not expensive places available. What food are you interested in?
Customer: Chinese food.

Domain: restaurant

-------

Example 2:
Customer: I also need a hotel in the north.
Assistant: Ok, can I offer you the Molly's place?
Customer: What is the address?

Domain: hotel

---------

Example 3:
Customer: What is the address?
Assistant: It's 123 Northfolk Road.
Customer: That's all. I also need a train from London.

Domain: train

-------------------
Now complete the following example:

Customer: i need a place to dine in the center thats expensiv


