In [1]:
import json
with open("hotpot_dev_distractor_v1.json") as f:
    datasets = json.load(f)

In [2]:
datasets = datasets[:500]

In [3]:
from dotenv import load_dotenv
from openai import OpenAI
import os

load_dotenv()
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

def llm_create(prompt):
        messages = [{"role": "user", "content": prompt}]
        completion = client.chat.completions.create(
            model="gpt-4o-mini",
            messages=messages,
        )
        return completion.choices[0].message.content

In [21]:
find_support_fact_prompts = """You are given a query and a list of evidence document.
Your task is to find the support fact from the evidence document for the query.
Put doc_id into support_fact list if relative to the query.

Query:{query}

Document:{docs}

Output following json format, example:
```json
{{
    "support_fact": [
        "a4488301-71ee-47e4-98a1-a26d93ca38a0",
        "62861a3d-87a7-408b-98bd-688abc8583a6"
    ]
}}
```"""

answer_prompt = """Base on the following document and answer the question.
Only answer with no description, answer None if you don't know the answer.

Question:{question}

Document:{docs}
"""

In [5]:
from long_memory.hotpot_component import HotPotWeaviateLongMemory

long_mem = HotPotWeaviateLongMemory()

Detect existed deafult user group memory space, loading...
Detect existed deafult user child memory space, loading...


In [6]:
import pandas as pd

df = pd.DataFrame()
df['question_id'] = None
df['answer'] = None
df['support_fact'] = None
df['org_res_from_long_mem'] = None

In [10]:
from weaviate.classes.query import Filter, QueryReference
import re

for i in range(1, 10):
    print(f"---{i}---")
    sp_fact = []
    long_mem.del_memory()
    doc_list = datasets[i]["context"]
    question_id = datasets[i]["_id"]
    df.loc[i, 'question_id'] = question_id 
    question = datasets[i]["question"]
    for doc in doc_list:
        long_mem.add_hotpot_doc(doc)
    res = long_mem.get_memory(question, recall=True)
    df.loc[i, 'org_res_from_long_mem'] = str(res)
    answer_res = llm_create(answer_prompt.format(question=question, docs=res))
    df.loc[i, 'answer'] = answer_res
    response = llm_create(find_support_fact_prompts.format(query=question, docs=res))
    json_res = json.loads(re.search(r"```json(.*?)```", response, re.DOTALL).group(1).strip())
    evidence_doc_id = json_res["support_fact"]
    for doc_id in evidence_doc_id:
        try:
            evidence_doc = long_mem.child_class.query.fetch_objects(filters=Filter.by_id().equal(doc_id), return_references=[QueryReference(link_on="parent",return_properties=["doc_name"])]).objects[0]
            evidence_doc_number = evidence_doc.properties['doc_id']
            evidence_doc_name = evidence_doc.references['parent'].objects[0].properties['doc_name']
            sp_fact.append([evidence_doc_name, int(evidence_doc_number)])
        except:
            print(f"can't find this document {doc_id}.")
    df.loc[i, 'support_fact'] = str(sp_fact)
    if (i+1) % 10 == 0:
        df.to_json("hotpot_df.json", orient="records", lines=True)
        

---1---
Detect empty group memory, create memory space...
Detect empty child memory, create memory space...
---2---
Detect empty group memory, create memory space...
Detect empty child memory, create memory space...
---3---
Detect empty group memory, create memory space...
Detect empty child memory, create memory space...
---4---
Detect empty group memory, create memory space...
Detect empty child memory, create memory space...
---5---
Detect empty group memory, create memory space...
Detect empty child memory, create memory space...
---6---
Detect empty group memory, create memory space...
Detect empty child memory, create memory space...
---7---
Detect empty group memory, create memory space...
Detect empty child memory, create memory space...
---8---
Detect empty group memory, create memory space...
Detect empty child memory, create memory space...
---9---
Detect empty group memory, create memory space...
Detect empty child memory, create memory space...
can't find this document Loc

In [13]:
df

Unnamed: 0,question_id,answer,support_fact,org_res_from_long_mem
0,5a8b57f25542995d1e6f1371,Yes,"[['Scott Derrickson', 0], ['Ed Wood', 0]]","{'search times': 2, 'used queries': ['Were Sco..."
1,5a8c7595554299585d9e36b6,,[],"{'search times': 4, 'used queries': ['What gov..."
2,5a85ea095542994775f606a8,Animorphs,"[['Animorphs', 0], ['Animorphs', 1]]","{'search times': 1, 'used queries': ['What sci..."
3,5adbf0a255429947ff17385a,,"[['Laleli Mosque', 0]]","{'search times': 2, 'used queries': ['Are the ..."
4,5a8e3ea95542995a26add48d,,"[['Big Stone Gap (film)', 0]]","{'search times': 1, 'used queries': ['The dire..."
5,5abd94525542992ac4f382d2,,"[['2014 S/S', 0]]","{'search times': 1, 'used queries': ['2014 S/S..."
6,5a85b2d95542997b5ce40028,Eenasul Fateh,"[['Eenasul Fateh', 0]]","{'search times': 1, 'used queries': ['Who was ..."
7,5a87ab905542996e4f3088c1,3677,"[['Androscoggin Bank Colisée', 0]]","{'search times': 1, 'used queries': ['The aren..."
8,5a7bbb64554299042af8f7cc,Terry Richardson,"[['Terry Richardson', 0], ['Annie Morton', 0]]","{'search times': 2, 'used queries': ['Who is o..."
9,5a8db19d5542994ba4e3dd00,Yes,"[['For Against', 0]]","{'search times': 2, 'used queries': ['Are Loca..."


In [15]:
index = 5
res = eval(df['org_res_from_long_mem'][index])
res

{'search times': 1,
 'used queries': ['2014 S/S is the debut album of a South Korean boy group that was formed by who?'],
 'searched memory': [{'doc_name': '2014 S/S',
   'text': 'WINNER\'s debut album, "2014 S/S," was released on August 12, 2014, by YG Entertainment. The group members contributed to the lyrics and composition of most songs on the album.'}],
 'thought': "WINNER is the South Korean boy group formed by YG Entertainment, and their debut album '2014 S/S' was released on August 12, 2014.",
 'evidence': [{'text': '2014 S/S is the debut album of South Korean group WINNER.',
   'doc_id': '5d322fc9-52e5-4e16-a951-ff8810657196',
   'doc_name': '2014 S/S'},
  {'text': "It was released on August 12, 2014 by the group's record label, YG Entertainment.",
   'doc_id': 'c9de059e-2cf5-4e32-ac6b-c2d999a518c9',
   'doc_name': '2014 S/S'},
  {'text': "The members were credited for writing the lyrics and composing the majority of the album's songs.",
   'doc_id': 'ada1f252-522f-462b-8284-0

In [28]:
llm_create(answer_prompt.format(question='2014 S/S is the debut album of a South Korean boy group that was formed by who?', docs=res['evidence']))

'None'