In [1]:
import json
with open("hotpot_dev_distractor_v1.json") as f:
    datasets = json.load(f)

In [2]:
datasets = datasets[:500]

In [3]:
from dotenv import load_dotenv
from openai import OpenAI
import os

load_dotenv()
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

def llm_create(prompt):
        messages = [{"role": "user", "content": prompt}]
        completion = client.chat.completions.create(
            model="gpt-4o-mini",
            messages=messages,
        )
        return completion.choices[0].message.content

In [4]:
answer_prompt = """Base on the following document and answer the question.
Only answer, no explanation.

Question:{question}

Document:{docs}
"""

In [5]:
from long_memory.component import WeaviateLongMemory

long_mem = WeaviateLongMemory(user="Hotpot", time_sort=False)

Detect existed Hotpot user group memory space, loading...
Detect existed Hotpot user child memory space, loading...


            Consider upgrading to the latest version. See https://weaviate.io/developers/weaviate/client-libraries/python for details.


In [8]:
import pandas as pd

df = pd.DataFrame()
df['question_id'] = None
df['question'] = None
df['gold_answer'] = None
df['answer'] = None
df['org_res_from_long_mem'] = None

In [10]:
error_list = []
for i in range(63, 100):
    try:
        print(f"---{i}---")
        long_mem.del_memory()
        doc_list = datasets[i]["context"]
        question_id = datasets[i]["_id"]
        gold_answer = datasets[i]['answer']
        question = datasets[i]["question"]
        df.loc[i, 'question_id'] = question_id 
        df.loc[i, 'question'] = question
        df.loc[i, 'gold_answer'] = gold_answer
        for doc in doc_list:
            article = []
            for content in doc[1]:
                content = content.replace('"', "").strip(".'")
                if len(content)<=7 or content=="<ref name=Ahmed Rashid/The Telegraph> </ref>": # 過濾無意義的詞
                    pass
                else:
                    article.append({"text":content})
            long_mem.add_article(article)
        res = long_mem.get_memory(question, recall=True)
        df.loc[i, 'org_res_from_long_mem'] = str(res)
        answer_res = llm_create(answer_prompt.format(question=question, docs=res))
        df.loc[i, 'answer'] = answer_res
    except Exception as e:
        print(f"-----error{e}-----")
        error_list.append(i)
        
    if (i+1) % 10 == 0:
        df.to_json("hotpot_df_v2.json", orient="records", lines=True)
df.to_json("hotpot_df_v2.json", orient="records", lines=True)

---63---
Detect empty group memory, create memory space...
Detect empty child memory, create memory space...
[34mSave article to long memory done.[0m
[34mSave article to long memory done.[0m
[34mSave article to long memory done.[0m
[34mSave article to long memory done.[0m
[34mSave article to long memory done.[0m
[34mSave article to long memory done.[0m
[34mSave article to long memory done.[0m
[34mSave article to long memory done.[0m
[34mSave article to long memory done.[0m
[34mSave article to long memory done.[0m
---64---
Detect empty group memory, create memory space...
Detect empty child memory, create memory space...
[34mSave article to long memory done.[0m
[34mSave article to long memory done.[0m
[34mSave article to long memory done.[0m
[34mSave article to long memory done.[0m
[34mSave article to long memory done.[0m
[34mSave article to long memory done.[0m
[34mSave article to long memory done.[0m
[34mSave article to long memory done.[0m
[34mSav

In [11]:
error_list # 檢查錯誤 存檔

[]