In [1]:
from agent.memory import *
from agent.student import *
from agent.agent_memory import *
from student.agent.tools.tools_memory import *

In [2]:
import mllm

# Test memory agent functions

## Function tests

In [4]:
a = MemoryAgent(provider="anthropic")
mllm.config.default_models.expensive = "claude-sonnet-4-20250514"

In [5]:
a.full_summary("""Pi is not a prime number since it is not an integer. An integer is a number with no decimal places. Pi and e are irrational numbers """)

['Pi is not a prime number.',
 'Pi is not a prime number since it is not an integer.',
 'Pi is not a prime number since it is not an integer. Pi is an irrational number.',
 'Pi is not a prime number since it is not an integer. An integer is a number with no decimal places. Pi and e are irrational numbers ']

In [5]:

#a.extract_keys("Pi is not a prime number since it is not an integer")
print(a.extract_keys("What is my name?"))
print(a.decompose("Pi is not a prime number since it is not an integer"))
print(a.filter_information("What is my name?", "The username is not Kevin"))

["name", "my name", "identity", "personal information"]
['Pi is not a prime number', 'Pi is not an integer', 'Prime numbers must be integers']
<nothing/>

The information provided only tells me what your name is NOT (not Kevin), but doesn't contain any positive information about what your actual name is.


In [6]:
s = "RASPA is a classical simulation toolbox which can for example do monte carlo simulations. It can be used to calculate the abosorption properties of materials like MOFs for different gases"

In [7]:
a.summarize('"RASPA is a classical simulation toolbox that can calculate the absorption properties of materials like MOFs for different gases."'
)

'"RASPA is a simulation toolbox for calculating gas absorption in materials like MOFs."'

In [8]:
a.full_summary(s)

['"RASPA is a simulation toolbox."',
 '"RASPA is a simulation toolbox for monte carlo simulations."',
 '"RASPA is a simulation toolbox for monte carlo simulations and calculating absorption properties of materials."',
 '"RASPA is a classical simulation toolbox for monte carlo simulations and calculating absorption properties of materials like MOFs."',
 'RASPA is a classical simulation toolbox which can for example do monte carlo simulations. It can be used to calculate the abosorption properties of materials like MOFs for different gases']

## Ask

In [6]:
a.ask("What is my name?")
a.render_conversation()

## Modify Updated

In [7]:
#a = MemoryAgent(provider="anthropic")
inp = "The user name is Peter"
a.run(f"Add - {inp} -  to memory with the keys: {a.extract_keys(inp)}")

'Successfully added the information "The user name is Peter" to memory with ID d3fda800. The memory is now stored with the keys: user, Peter, username, name.'

In [11]:
new = f"My name is Henrik"

In [12]:
e = ExtendedModifyMemory(a.memory, a.single_run)

In [13]:
node = list(a.memory.memory.values())[0]
e.run(node.id, new)
a.memory.render_html()

## Learn

In [3]:
a = MemoryAgent(provider="anthropic")

In [4]:
a.learn("My name is Peter")

'Based on the new information "My name is Peter", I have added an entry to my memory with the keys "name" and "Peter". I was able to recall this information successfully using those keys, confirming it has been properly stored.\n\nSince there was no previous conflicting information in my memory, no clarification is needed. The new information has been integrated smoothly.'

In [6]:
a.learn("My name is Henrik")

'<thoughts>\nThe memory system has successfully integrated the new information "My name is Henrik" using the keys "introduction", "Henrik", and "name". The memory was able to be recalled using the stimuli "Henrik" and "name", confirming that the keys chosen are appropriate for retrieving this information in the future.\n\nThere does not appear to be any conflicting information present, as the memory system did not indicate that this new information clashed with any existing memories. The integration seems to have occurred smoothly without any issues.\n\nBased on the memory changes provided, no further clarification is needed. The new information has been captured and stored in a way that allows for easy retrieval when needed.\n</thoughts>\n\n<response>\nThe new information "My name is Henrik" has been successfully added to my memory. I was able to store and recall it using the keys "introduction", "Henrik", and "name". \n\nThere were no conflicts with existing memories during this proc

In [7]:
a.render_conversation()

### Learn breakdown

In [18]:
a = MemoryAgent(provider="anthropic")
inp = "The user name is Peter"
a.run(f"Add - {inp} -  to memory with the keys: {a.extract_keys(inp)}")

'Okay, I have added the information "The user name is Peter" to memory, associated with the keys ["user name", "Peter"]. Let me know if you need anything else!'

In [19]:
self = a
question = "the user name is henrik"

recall = []
extracted_keys = []
updates = []

rev_summaries = self.full_summary(question)     # decompose into abstraction level

for summary in rev_summaries:                   # iterate by summarizing
    self.set_prompt(type="learning", version="v4")

    # ask
    keys = self.extract_keys(summary)           # ask for context
    extracted_keys.append(keys)                 # store extracted keys

    input = f"Retrieve all knowledge related to this input: {q(context)}"    
    input += f"Use these or similar keys as stimuli: {keys}"
    
    prompt = self.get_prompt(type="retrieval", version="v3", json=False, general=False)
    prompt += input
    mem = self.run(prompt, remove_tools=self.get_question_mask())
    recall.append(mem)

    # learn
    prompt = self.get_prompt(type="update_mem", version="v1", json=False, general=False)
    prompt = prompt.format(new_information = summary, recalled=mem)
    update = self.run(prompt)
    updates.append(update)

    # TODO: refactor quality control tool for key selection - automatic recall of used keywords
    #     ALWAYS try to recall memory after adding to evaluate if the keys need to be modified.

answer = self.learning_answer(updates, context)
print(answer)

I apologize, but it looks like there was an error when trying to integrate the new information into my memory. The content of the new information appears to be "<function context at 0x12a813910>", which is not meaningful information that I can process or store.

Could you please clarify what information you intended to add to my memory? I'd be happy to try again with the correct information. Let me know if you have any other questions!


In [20]:
a.render_conversation()

In [21]:
a.ask("What is my name?")
a.render_chat_html()

# Old

In [None]:
a = MemoryAgent(provider="anthropic")
m = a.memory
tools = a.tools
add = tools['add'].run
recall = tools['recall'].run
modify = tools['modify'].run

In [None]:
add(stimuli=["Jonas Rios"], content="B | Jonas Rios| 33 | Documentary filmmaker | Never travels without his drone")

'<tool response name=add>Added:\n\t\n        <memory id="f12fa345">\n            <stimuli>Jonas Rios</stimuli>\n            <content>B | Jonas Rios| 33 | Documentary filmmaker | Never travels without his drone</content>\n        </memory>\n        \n</tool response>'

In [76]:
a.load_memory("memory/test.txt")
m.render_html()

# Memory Search

In [81]:
from mllm.embedding import get_embeddings
len(get_embeddings(["test"])[0])

3072

In [5]:
def search(m, queries, sensitivity, thres = 0):
    res = m._recall(queries, sensitivity=sensitivity, max_recall=10, thres=thres)
    return {
        m.get_node(id).keys.__str__() : f"{score:.2g}" for id, score in res.items()
    }

import pandas as pd

def compare_search(m, keys, s, t):

    df = pd.DataFrame({ k.__str__():  pd.Series(search(m, k, s, t)) for k in keys})

    df.index.name = 'key'    
    df.sort_index(inplace=True)  
    return df

def compare_s(m, keys, s : List[float], t: float):

        
    df = pd.DataFrame({ s_i:  pd.Series(search(m, keys, s_i, t)) for s_i in s})

    df.index.name = 'key'    
    df.sort_index(inplace=True)  
    return df

In [9]:
search(m,["Jonas"], 0.3)

{"{'Rios', 'Jonas'}": '0.5',
 "{'name', 'kevin'}": '0.34',
 "{'Documentary filmmaker', 'drone', 'Jonas Rios'}": '0.22',
 "{'Eva Kim', 'long-distance partners', 'Jonas Rios', 'reef-filming trip'}": '0.17',
 "{'client', 'Jonas Rios', 'Priya Desai', 'custom camera-drones'}": '0.17',
 "{'professional friction', 'Liam O’Connor', 'Jonas Rios', 'drone flights'}": '0.17'}

# Sensitivity / thres

In [107]:
compare_s(m, ["Jonas", "Rios"], s=[0, 0.2, 0.4, 0.6], t=0)

Unnamed: 0_level_0,0.0,0.2,0.4,0.6
key,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
"{'Eva Kim', 'Marine biologist', 'coral restoration'}",,0.17,,
"{'Eva Kim', 'coral-health papers', 'research collaborators', 'Priya Desai'}",0.23,,,
"{'Eva Kim', 'reef-filming trip', 'long-distance partners', 'Jonas Rios'}",0.33,0.3,0.17,0.17
"{'Eva Kim', 'restricted seabed maps', 'Liam O’Connor'}",0.26,0.26,,
{'Jonas Rios'},0.67,0.67,0.67,0.67
"{'Jonas', 'Rios'}",1.0,1.0,1.0,1.0
"{'custom camera-drones', 'Priya Desai', 'Jonas Rios', 'client'}",0.32,0.29,0.17,0.17
"{'drone flights', 'Liam O’Connor', 'Jonas Rios', 'professional friction'}",0.31,0.24,0.17,0.17
"{'drone', 'Documentary filmmaker', 'Jonas Rios'}",0.37,0.37,0.22,0.22
"{'ex-navy', 'Liam O’Connor', 'Harbor master'}",0.24,0.18,,


In [108]:
compare_s(m, ["Jonas", "Rios"], s=[0, 0.2, 0.4, 0.6], t=0.3)

Unnamed: 0_level_0,0.0,0.2,0.4,0.6
key,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
"{'Eva Kim', 'reef-filming trip', 'long-distance partners', 'Jonas Rios'}",0.33,,,
{'Jonas Rios'},0.67,0.67,0.67,0.67
"{'Jonas', 'Rios'}",1.0,1.0,1.0,1.0
"{'custom camera-drones', 'Priya Desai', 'Jonas Rios', 'client'}",0.32,,,
"{'drone flights', 'Liam O’Connor', 'Jonas Rios', 'professional friction'}",0.31,,,
"{'drone', 'Documentary filmmaker', 'Jonas Rios'}",0.37,0.37,,
"{'name', 'kevin'}",0.34,0.34,,


### Compare S(A) / S(B) / S(A,B)

In [109]:
A = "Jonas"
B = "drone"
s = 0.2
t = 0.2

compare_search(m, [[A], [B], [A, B]], s, t)

Unnamed: 0_level_0,['Jonas'],['drone'],"['Jonas', 'drone']"
key,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
"{'Eva Kim', 'reef-filming trip', 'long-distance partners', 'Jonas Rios'}",0.24,,0.24
{'Jonas Rios'},0.67,,0.67
"{'Jonas', 'Rios'}",0.65,,0.65
"{'custom camera-drones', 'Priya Desai', 'Jonas Rios', 'client'}",0.28,0.21,0.43
"{'drone flights', 'Liam O’Connor', 'Jonas Rios', 'professional friction'}",0.24,,0.4
"{'drone', 'Documentary filmmaker', 'Jonas Rios'}",0.37,0.4,0.63
"{'ex-navy', 'Liam O’Connor', 'Harbor master'}",,,0.24
"{'name', 'kevin'}",0.34,0.26,0.34
"{'repaired drone', 'Liam O’Connor', 'grateful', 'Priya Desai'}",,,0.26
"{'underwater drones', 'Robotics PhD student', 'Priya Desai'}",,0.26,0.33


### Compare S(A) / S(A, A)

In [110]:
s = 0.3
t = 0
keys = [
    ["Jonas", "drone"],
    ["Jonas", "Jonas", "Jonas", "drone"],
]
compare_search(m, keys, s,t)

Unnamed: 0_level_0,"['Jonas', 'drone']","['Jonas', 'Jonas', 'Jonas', 'drone']"
key,Unnamed: 1_level_1,Unnamed: 2_level_1
"{'Eva Kim', 'reef-filming trip', 'long-distance partners', 'Jonas Rios'}",0.17,0.17
{'Jonas Rios'},0.67,0.67
"{'Jonas', 'Rios'}",0.5,0.5
"{'custom camera-drones', 'Priya Desai', 'Jonas Rios', 'client'}",0.3,0.3
"{'drone flights', 'Liam O’Connor', 'Jonas Rios', 'professional friction'}",0.33,0.33
"{'drone', 'Documentary filmmaker', 'Jonas Rios'}",0.56,0.56
"{'name', 'kevin'}",0.34,0.34
"{'repaired drone', 'Liam O’Connor', 'grateful', 'Priya Desai'}",0.14,0.14
"{'underwater drones', 'Robotics PhD student', 'Priya Desai'}",0.17,0.17


# Recall Function Behavior

For a set of stimuli $S$, the score function 
$R_S(s): [s] \rightarrow \N$ and $s_1 \subset s_2\subset S$:
$$
R_S(s_1) < R_S(s_2)
$$
$$
R_S(A,B) \ne R_S(A) + R_S(B)
$$
$$
R_S(A,A) = R_S(A)
$$
$$
R_S(A,B) \ne R_S(A+B) 
$$

With $S_1 = \{A, B\}$ and $S_2 = \{A+B, C\}$:
$$
R_{S_1}(A) = R_{S_1}(A+B) > R_{S_2}(A)
$$
$$
R_{S_1}(A+B) < R_{S_2}(A+B) < R_{S_1}(A, B) 
$$

In [None]:
from agent.memory import Memory
m = Memory()
m.add_from_dict({"keys" : ["Animal", "Bread"], "content":"e"})
m.add_from_dict({"keys" : ["Animal Bread"], "content":"e"})
m.add_from_dict({"keys" : ["Bread Animal"], "content":"e"})
m.add_from_dict({"keys" : ["Bread"], "content":"e"})
m.add_from_dict({"keys" : ["Chemistry"], "content":"e"})

In [125]:
s = 0.3
t = 0.0

compare_search(m, [["Animal"], ["Bread"], ["Animal", "Bread"], ["Animal Bread"]], s, t)

Unnamed: 0_level_0,['Animal'],['Bread'],"['Animal', 'Bread']",['Animal Bread']
key,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
{'Animal Bread'},0.43,0.69,0.69,1.0
{'Bread Animal'},0.53,0.7,0.7,0.8
"{'Bread', 'Animal'}",0.5,0.5,1.0,0.56
{'Bread'},,1.0,1.0,0.69


In [168]:
from agent.bm25_indexing import get_bm25_score
from mllm.embedding import get_embeddings
import pandas as pd
from typing import List
import numpy as np

def get_embedding_score(query : List[str], keys: List[str], sensitivity=0.4):
    q_emb = np.array(get_embeddings(query))
    k_emb = np.array(get_embeddings(keys))
    
    
    similarity = np.dot(q_emb, k_emb.T) # len(query) x len(keys)
    q = np.linalg.norm(q_emb, axis=1)
    k = np.linalg.norm(k_emb, axis=1)

    norm = q * k + (q-k)**2 + 1
    similarity = 2 * similarity / norm
    '''
    for i in range(len(query)):
        q = np.linalg.norm(q_emb[i])
        for j in range(len(keys)):
            k = np.linalg.norm(k_emb[i])

            similarity[i][j] /= k * q +((q-k)**2 + 1)
            similarity[i][j] *= 2
    '''     

    similarity = similarity * (similarity > sensitivity)
    return similarity 


def get_score(query : List[str], keys: List[str], sensitivity=0.4):
    emb = get_embedding_score(query, keys, sensitivity)
    return np.mean(emb)


query = ["chemistry"]
keys = [["chemistry", "physics"],["chemistry"],["physical chemistry"],["chemical physics"],["biology"], ["elefant"]]

records = [
    {
        "key": k,   
        "bm25": get_bm25_score(query, k),
        "emb":  get_embedding_score(query, k)
    }
    for k in keys
]

df = pd.DataFrame(records)

print("Query: ", query)
df


Query:  ['chemistry']


Unnamed: 0,key,bm25,emb
0,"[chemistry, physics]","[-0.2746530721670274, 0.0]","[[1.0000004, 0.6005238]]"
1,[chemistry],[-0.2746530721670274],[[0.99999994]]
2,[physical chemistry],[-0.2746530721670274],[[0.60559857]]
3,[chemical physics],[0.0],[[0.6239381]]
4,[biology],[0.0],[[0.64003897]]
5,[elefant],[0.0],[[0.11265001]]


In [69]:
recall(m, ["Jonas"])

{'0.15': {'Jonas Rios', 'Priya Desai', 'client', 'custom camera-drones'},
 '0.12': {'Documentary filmmaker', 'Jonas Rios', 'drone'},
 '0.094': {'Jonas', 'Rios'}}

In [159]:
from __future__ import annotations

import math
import heapq
from dataclasses import dataclass, field
from typing import Dict, Iterable, List, Sequence, Set, Tuple

import numpy as np


def log2_weight(df: int, clip: float = 1e-9) -> float:
    """
    Integer weight  =  ceil(-log₂ df)
    df=0 is impossible in use, but clip avoids log(0) during build-time.
    """
    return math.ceil(-math.log2(max(df, clip)))


def cosine(a: np.ndarray, b: np.ndarray) -> float:
    return float(np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b) + 1e-9))


def scoring_kernel(
    query_embs: Sequence[np.ndarray],
    node_embs : Sequence[np.ndarray],     # 1-to-1 with query order
    weights   : Sequence[float],
    sim       = cosine,
) -> float:
    # R_S = Σ w_i · sim(q_i , v_i)    (no “best over variants” step)
    sims = [sim(q, v) for q, v in zip(query_embs, node_embs)]
    return float(np.dot(weights, sims))
def _df(self, token: str) -> int:
        return sum(1 for n in self.memory.values() if token in n.embeddings)

def _weights(self, tokens: Sequence[str]) -> List[float]:
    return [log2_weight(_df(self, t)) for t in tokens]


def recall(
    self : Memory,
    queries: List[str],
    max_recall: int = 5,
    sensitivity: float = 1e-3,
) -> Dict[str, str]:
    """
    Vectorised O(N) recall that honours the axioms.
    Returns  {node_id: content}
    """
    if not self.memory:
        return {}

    # 1) prepare query
    q_vecs = [get_embeddings(q) for q in queries]
    w = _weights(self, queries)

    # 2) score every node (NumPy handles WL part, Python loop per node)
    scores = np.zeros(len(self.memory), dtype=float)
    nodes = self.get_nodes()
    for i, node in enumerate(nodes):
        node_embs = get_embeddings(queries)
        scores[i] = scoring_kernel(q_vecs, node_embs, w)

    # 3) top-k with argpartition
    k = min(max_recall, len(scores))
    if k == 0:
        return {}

    top_idx = np.argpartition(-scores, k - 1)[:k]
    top_idx = top_idx[np.argsort(-scores[top_idx])]

    # 4) collect results above sensitivity
    hits: Dict[str, str] = {}
    for idx in top_idx:
        if scores[idx] < sensitivity:
            break
        n = nodes[idx]
        hits[f"{scores[idx]:.2g}"] = n.keys
    return hits


In [148]:
import random

rng = np.random.default_rng(0)

M = Memory()
# ─ build toy corpus
for i in range(50):
    k = random.choice(["A", "B", "AB", "C", "D", "BC", "BX", "XAB", "CX", "XD", "BXC"])
    M.add_from_dict({"keys": k, "content": f"Document {i} about {k}"})

# ─ query
q = ["A", "A", "A"]
hits = recall(M, q)
hits

  return float(np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b) + 1e-9))


array([90., 90., 90., 90., 90., 90., 90., 90., 90., 90., 90., 90., 90.,
       90., 90., 90., 90., 90., 90., 90., 90., 90., 90., 90., 90., 90.,
       90., 90., 90., 90., 90., 90., 90., 90., 90., 90., 90., 90., 90.,
       90., 90., 90., 90., 90., 90., 90., 90., 90., 90., 90.])

$$

$$

In [23]:
#def eval_recall_single_entry():
# scores = 2x2 matrix

# add memory(keys)
# for changing hyperparameter:
    # for subset of keys
        # recall(keys)
        # score[subset length][hyperparameter] = recall score

In [35]:
ag = StudentAgent()
ag.load_memory("memory/test.txt")
ag.memory.render_html()