### Long memory

Generate datasets

In [None]:
import pandas as pd
from datetime import datetime, timedelta

df = pd.read_json("msc_self_instruct.jsonl", lines=True)

answer_df = pd.DataFrame()
answer_df['dialog'] = None
answer_df['question'] = None
answer_df['gold_answer'] = None
answer_df['long_mem_result'] = None
answer_df['long_mem_answer'] = None
answer_df['long_mem_f1'] = None
answer_df['long_mem_rc'] = None
answer_df['long_mem_pre'] = None
answer_df['long_mem_recall_result'] = None
answer_df['long_mem_recall_answer'] = None
answer_df['long_mem_recall_f1'] = None
answer_df['long_mem_recall_rc'] = None
answer_df['long_mem_recall_pre'] = None

for i in range(500):
    # Add dialog
    current_time = datetime.strptime("2024/11/1 12:00", "%Y/%m/%d %H:%M")
    dialog_data = []
    for dialog_session in df['previous_dialogs'][i]:
        session = []
        for count in range(int(len(dialog_session['dialog'])/2)):
            chat_log = {"text":f"Allen:{dialog_session['dialog'][2*count]['text']}, Jack:{dialog_session['dialog'][2*count+1]['text']}", "time":current_time.strftime("%Y-%m-%dT%H:%M:%SZ")}
            session.append(chat_log)
            current_time = current_time + timedelta(minutes=1)
        dialog_data.append(session)
    answer_df.loc[i, 'dialog'] = str(dialog_data)
    
    # Add question & answer
    answer_df.loc[i, 'question'] = f"Jack:{df['self_instruct'][i]['B']}"
    answer_df.loc[i, 'gold_answer'] = f"{df['self_instruct'][i]['A']}"
answer_df.to_json("MSC_datasets.json", orient="records", lines=True)

In [41]:
import pandas as pd

answer_df = pd.read_json("MSC_datasets.json", lines=True)
# answer_df = pd.read_json("MSC_eval.json", lines=True)

In [None]:
from long_memory.component import WeaviateLongMemory
long_mem = WeaviateLongMemory()

In [4]:
from dotenv import load_dotenv
from openai import OpenAI
import os

load_dotenv()
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

def llm_create(prompt):
        messages = [{"role": "user", "content": prompt}]
        completion = client.chat.completions.create(
            model="gpt-4o-mini",
            messages=messages,
        )
        return completion.choices[0].message.content

other_instruct="""Your role is Allen, Jack will ask some question about you, you need to find relative memory in Allen's memory.
here's some guide for you. similar_snippets is original memory, related_summaries and closest_summary are compressed memory for suggestion, 
If you see some relative content from related_summaries, you can use jump to related_summaries and get original memory,
when you use retry, you can use specific things to search not just a question.
when you need to write evidence, try put original dialog into evidence field, not compressed memory"""

answer_prompt = """Base on the following document and answer the question.
You are Allen, try use origin text in the evidence field to repeat a brief answer.

Question:{question}

Document:{docs}
"""

In [None]:
error_list = []
for i in range(100):
    print(f'==={i}===')
    try:
        # generate result
        long_mem.del_memory()
        dialogs = []
        for dialog_session in eval(answer_df.loc[i, 'dialog']):
            dialogs.extend(dialog_session)
        long_mem.add_chat_logs(dialogs)
        question = f"{answer_df.loc[i, 'question']}"
        answer_df.loc[i, 'long_mem_result'] = str(long_mem.get_memory(question, recall=False))
        answer_df.loc[i, 'long_mem_recall_result'] = str(long_mem.get_memory(question, recall=True, other_instruct=other_instruct))
        
        # response
        p = answer_prompt.format(question=answer_df.loc[i, 'question'], docs=answer_df.loc[i, 'long_mem_result'])
        long_mem_answer = llm_create(p)
        answer_df.loc[i, 'long_mem_answer'] = long_mem_answer
        
        p = answer_prompt.format(question=answer_df.loc[i, 'question'], docs=answer_df.loc[i, 'long_mem_recall_result'])
        long_mem_recall_answer = llm_create(p)
        answer_df.loc[i, 'long_mem_recall_answer'] = long_mem_recall_answer
    except Exception as e:
        error_list.append(i)
        print(f'----error:{e}----')
    if (i+1)%20==0:
        answer_df.to_json("MSC_eval.json", orient="records", lines=True)
answer_df.to_json("MSC_eval.json", orient="records", lines=True)

In [55]:
error_list

[]

In [66]:
index=6
eval(answer_df['long_mem_recall_result'][index])

{'search times': 1,
 'used queries': ['Jack:Hey, remember that time we talked about our favorite movies? What was yours?'],
 'searched memory': [{'text': 'Allen and Jack discuss their preferences in movie genres, primarily comedies and nature documentaries.',
   'time': '2024/11/01 12:15'}],
 'thought': "I found detailed memories of our conversation about movie preferences, highlighting that I enjoy nature documentaries, particularly the BBC series 'Blue Planet II', while Jack enjoys comedies like 'Clueless'.",
 'evidence': [{'text': "Allen: I haven't. I'm not really into comedies, because I don't really have a sophisticated sense of humor. What genre do you think your first book will be?, Jack: it would be the kind of comedy that clueless is. Do you not think of clueless as a comedy?"},
  {'text': "Allen: Yeah, I think so. But what I enjoy the most is nature documentaries. I like learning new things, and so I'll sometimes spend the whole day knitting and watching wildlife on tv!"},
  

In [12]:
answer_df.head()

Unnamed: 0,dialog,question,gold_answer,long_mem_result,long_mem_answer,long_mem_f1,long_mem_rc,long_mem_pre,long_mem_recall_result,long_mem_recall_answer,long_mem_recall_f1,long_mem_recall_rc,long_mem_recall_pre
0,"[[{'text': ""Allen:Hi! How are you doing tonigh...","Jack:Hey, remember that time we talked about m...",Taylor Swift!,{'closest_summary': {'text': 'Allen and Jack t...,The document does not mention any specific art...,,,,"{'search times': 4, 'used queries': ['Jack:Hey...",I mentioned that I could get into Taylor Swift.,,,
1,"[[{'text': 'Allen:Hello, how are you doing?, J...","Jack:Hey, remember that time we talked about o...",I eat a fresh and raw diet to save on groceries.,{'closest_summary': {'text': 'Allen shares his...,Allen saved money by primarily eating a fresh ...,,,,"{'search times': 1, 'used queries': ['Jack:Hey...",I mentioned that I mostly eat a fresh and raw ...,,,
2,"[[{'text': 'Allen:Hello what are doing today?,...","Jack:Hey, remember that time we talked about o...",I used to work in the human services field.,"{'closest_summary': {'text': ""Allen and Jack d...",I used to work in the human services field.,,,,"{'search times': 1, 'used queries': ['Jack:Hey...",Allen used to work in the human services field.,,,
3,"[[{'text': ""Allen:How are you? I'm tired of my...","Jack:Hey, remember that time we talked about o...",Burger King!,{'closest_summary': {'text': 'Conversation abo...,Allen: I have a part-time job at Burger King.,,,,"{'search times': 1, 'used queries': [""Jack:Hey...",Sure! I work at Burger King.,,,
4,"[[{'text': 'Allen:Hi, how are you doing today?...","Jack:Hey, remember that time we talked about o...",Three miles!,{'closest_summary': {'text': 'Allen and Jack d...,I mentioned that I like to walk three miles fo...,,,,"{'search times': 1, 'used queries': ['Jack:Hey...",I mentioned that I like to walk for a small wo...,,,


### Short memory

In [None]:
import pandas as pd

df = pd.read_json("msc_self_instruct.jsonl", lines=True)

s_answer_df = pd.DataFrame()
s_answer_df['dialog'] = None
s_answer_df['question'] = None
s_answer_df['gold_answer'] = None
s_answer_df['short_mem_result'] = None
s_answer_df['short_mem_answer'] = None
s_answer_df['short_mem_f1'] = None
s_answer_df['short_mem_rc'] = None
s_answer_df['short_mem_pre'] = None

for i in range(500):
    # Add dialog
    current_time = datetime.strptime("2024/11/1 12:00", "%Y/%m/%d %H:%M")
    dialog_data = []
    for dialog_session in df['previous_dialogs'][i]:
        session = []
        for count in range(int(len(dialog_session['dialog'])/2)):
            chat_log = {"assistant":f"{dialog_session['dialog'][2*count]['text']}", 
                        "user":f"{dialog_session['dialog'][2*count+1]['text']}", 
                        "time":current_time.strftime("%Y-%m-%dT%H:%M:%SZ")}
            session.append(chat_log)
            current_time = current_time + timedelta(minutes=1)
        dialog_data.append(session)
    s_answer_df.loc[i, 'dialog'] = str(dialog_data)
    
    # Add question & answer
    s_answer_df.loc[i, 'question'] = f"user:{df['self_instruct'][i]['B']}"
    s_answer_df.loc[i, 'gold_answer'] = f"{df['self_instruct'][i]['A']}"
s_answer_df.to_json("s_MSC_datasets.json", orient="records", lines=True)

In [None]:
import pandas as pd

answer_df = pd.read_json("s_MSC_datasets.json", lines=True)
# answer_df = pd.read_json("s_MSC_eval.json", lines=True)

In [None]:
from short_memory.component import WeaviateShortMemory
short_mem = WeaviateShortMemory()

In [None]:
from dotenv import load_dotenv
from openai import OpenAI
import os

load_dotenv()
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

def llm_create(prompt):
        messages = [{"role": "user", "content": prompt}]
        completion = client.chat.completions.create(
            model="gpt-4o-mini",
            messages=messages,
        )
        return completion.choices[0].message.content

answer_prompt = """Base on the following document and answer the question.
Try use origin text in the evidence field to repeat a brief answer.

Question:{question}

Document:{docs}
"""

In [None]:
s_answer_df = pd.read_json("s_MSC_datasets.json", lines=True)
error_list = []
for i in range(100):
    print(f'==={i}===')
    try:
        # generate result
        short_mem.del_memory()
        dialogs = []
        for dialog_session in eval(s_answer_df.loc[i, 'dialog']):
            dialogs.extend(dialog_session)
        short_mem.add_chatlogs(dialogs)
        question = f"{answer_df.loc[i, 'question']}"
        s_answer_df.loc[i, 'short_mem_result'] = str(short_mem.get_memory(question))
        
        # response
        p = answer_prompt.format(question=answer_df.loc[i, 'question'], docs=answer_df.loc[i, 'short_mem_result'])
        short_mem_answer = llm_create(p)
        s_answer_df.loc[i, 'short_mem_answer'] = short_mem_answer
    except Exception as e:
        error_list.append(i)
        print(f'----error:{e}----')
    if (i+1)%20==0:
        s_answer_df.to_json("s_MSC_eval.json", orient="records", lines=True)
s_answer_df.to_json("s_MSC_eval.json", orient="records", lines=True)