Skip to content

Commit

Permalink
Implement loading MemoryItems from file in JSONFileMemory
Browse files Browse the repository at this point in the history
Further changes:
* remove `init` param from `get_memory()`, replace usages by `memory.clear()`
* make token length calculation optional in `MemoryItem.dump()`
  • Loading branch information
Pwuts committed Jun 15, 2023
1 parent 6e6e7fc commit f16d7ba
Show file tree
Hide file tree
Showing 9 changed files with 93 additions and 20 deletions.
2 changes: 1 addition & 1 deletion autogpt/commands/file_operations.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ def ingest_file(

# TODO: differentiate between different types of files
file_memory = MemoryItem.from_text_file(content, filename)
logger.debug(f"Created memory: {file_memory.dump()}")
logger.debug(f"Created memory: {file_memory.dump(True)}")
memory.add(file_memory)

logger.info(f"Ingested {len(file_memory.e_chunks)} chunks from {filename}")
Expand Down
3 changes: 2 additions & 1 deletion autogpt/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,8 @@ def run_auto_gpt(

# Initialize memory and make sure it is empty.
# this is particularly important for indexing and referencing pinecone memory
memory = get_memory(cfg, init=True)
memory = get_memory(cfg)
memory.clear()
logger.typewriter_log(
"Using memory of type:", Fore.GREEN, f"{memory.__class__.__name__}"
)
Expand Down
4 changes: 2 additions & 2 deletions autogpt/memory/vector/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
# MilvusMemory = None


def get_memory(cfg: Config, init=False) -> VectorMemory:
def get_memory(cfg: Config) -> VectorMemory:
memory = None

match cfg.memory_backend:
Expand All @@ -60,7 +60,7 @@ def get_memory(cfg: Config, init=False) -> VectorMemory:
# )
# else:
# memory = PineconeMemory(cfg)
# if init:
# if clear:
# memory.clear()

case "redis":
Expand Down
48 changes: 38 additions & 10 deletions autogpt/memory/vector/memory_item.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,21 +109,21 @@ def from_ai_action(ai_message: Message, result_message: Message):
# The result_message contains either user feedback
# or the result of the command specified in ai_message

if ai_message["role"] != "assistant":
raise ValueError(f"Invalid role on 'ai_message': {ai_message['role']}")
if ai_message.role != "assistant":
raise ValueError(f"Invalid role on 'ai_message': {ai_message.role}")

result = (
result_message["content"]
if result_message["content"].startswith("Command")
result_message.content
if result_message.content.startswith("Command")
else "None"
)
user_input = (
result_message["content"]
if result_message["content"].startswith("Human feedback")
result_message.content
if result_message.content.startswith("Human feedback")
else "None"
)
memory_content = (
f"Assistant Reply: {ai_message['content']}"
f"Assistant Reply: {ai_message.content}"
"\n\n"
f"Result: {result}"
"\n\n"
Expand All @@ -145,11 +145,14 @@ def from_webpage(content: str, url: str, question: str | None = None):
question_for_summary=question,
)

def dump(self) -> str:
token_length = count_string_tokens(self.raw_content, Config().embedding_model)
def dump(self, calculate_length=False) -> str:
if calculate_length:
token_length = count_string_tokens(
self.raw_content, Config().embedding_model
)
return f"""
=============== MemoryItem ===============
Length: {token_length} tokens in {len(self.e_chunks)} chunks
Size: {f'{token_length} tokens in ' if calculate_length else ''}{len(self.e_chunks)} chunks
Metadata: {json.dumps(self.metadata, indent=2)}
---------------- SUMMARY -----------------
{self.summary}
Expand All @@ -158,6 +161,31 @@ def dump(self) -> str:
==========================================
"""

def __eq__(self, other: MemoryItem):
return (
self.raw_content == other.raw_content
and self.chunks == other.chunks
and self.chunk_summaries == other.chunk_summaries
# Embeddings can either be list[float] or np.ndarray[float32],
# and for comparison they must be of the same type
and np.array_equal(
self.e_summary
if isinstance(self.e_summary, np.ndarray)
else np.array(self.e_summary, dtype=np.float32),
other.e_summary
if isinstance(other.e_summary, np.ndarray)
else np.array(other.e_summary, dtype=np.float32),
)
and np.array_equal(
self.e_chunks
if isinstance(self.e_chunks[0], np.ndarray)
else [np.array(c, dtype=np.float32) for c in self.e_chunks],
other.e_chunks
if isinstance(other.e_chunks[0], np.ndarray)
else [np.array(c, dtype=np.float32) for c in other.e_chunks],
)
)


@dataclasses.dataclass
class MemoryItemRelevance:
Expand Down
23 changes: 21 additions & 2 deletions autogpt/memory/vector/providers/json_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,17 @@ def __init__(self, cfg: Config) -> None:
workspace_path = Path(cfg.workspace_path)
self.file_path = workspace_path / f"{cfg.memory_index}.json"
self.file_path.touch()
logger.debug(f"Initialized {__name__} with index path {self.file_path}")
logger.debug(
f"Initialized {__class__.__name__} with index path {self.file_path}"
)

self.memories = []
self.save_index()
try:
self.load_index()
logger.debug(f"Loaded {len(self.memories)} MemoryItems from file")
except Exception as e:
logger.warn(f"Could not load MemoryItems from file: {e}")
self.save_index()

def __iter__(self) -> Iterator[MemoryItem]:
return iter(self.memories)
Expand All @@ -48,6 +55,7 @@ def __len__(self) -> int:

def add(self, item: MemoryItem):
self.memories.append(item)
logger.debug(f"Adding item to memory: {item.dump()}")
self.save_index()
return len(self.memories)

Expand All @@ -62,6 +70,17 @@ def clear(self):
self.memories.clear()
self.save_index()

def load_index(self):
"""Loads all memories from the index file"""
if not self.file_path.is_file():
logger.debug(f"Index file '{self.file_path}' does not exist")
return
with self.file_path.open("r") as f:
logger.debug(f"Loading memories from index file '{self.file_path}'")
json_index = orjson.loads(f.read())
for memory_item_dict in json_index:
self.memories.append(MemoryItem(**memory_item_dict))

def save_index(self):
logger.debug(f"Saving memory index to file {self.file_path}")
with self.file_path.open("wb") as f:
Expand Down
4 changes: 3 additions & 1 deletion data_ingestion.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,9 @@ def main() -> None:
args = parser.parse_args()

# Initialize memory
memory = get_memory(cfg, init=args.init)
memory = get_memory(cfg)
if args.init:
memory.clear()
logger.debug("Using memory of type: " + memory.__class__.__name__)

if args.file:
Expand Down
3 changes: 2 additions & 1 deletion tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,8 @@ def agent(config: Config, workspace: Workspace) -> Agent:
ai_config.command_registry = command_registry

config.set_memory_backend("json_file")
memory_json_file = get_memory(config, init=True)
memory_json_file = get_memory(config)
memory_json_file.clear()

system_prompt = ai_config.construct_full_prompt()

Expand Down
4 changes: 3 additions & 1 deletion tests/integration/agent_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,9 @@ def memory_json_file(agent_test_config: Config):
was_memory_backend = agent_test_config.memory_backend

agent_test_config.set_memory_backend("json_file")
yield get_memory(agent_test_config, init=True)
memory = get_memory(agent_test_config)
memory.clear()
yield memory

agent_test_config.set_memory_backend(was_memory_backend)

Expand Down
22 changes: 21 additions & 1 deletion tests/integration/memory/test_json_file_memory.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,9 @@ def test_json_memory_init_with_backing_empty_file(config: Config, workspace: Wor
assert index_file.read_text() == "[]"


def test_json_memory_init_with_backing_file(config: Config, workspace: Workspace):
def test_json_memory_init_with_backing_invalid_file(
config: Config, workspace: Workspace
):
index_file = workspace.root / f"{config.memory_index}.json"
index_file.touch()

Expand Down Expand Up @@ -78,6 +80,24 @@ def test_json_memory_get(config: Config, memory_item: MemoryItem, mock_get_embed
assert retrieved.memory_item == memory_item


def test_json_memory_load_index(config: Config, memory_item: MemoryItem):
index = JSONFileMemory(config)
index.add(memory_item)

try:
assert index.file_path.exists(), "index was not saved to file"
assert len(index) == 1, f"index constains {len(index)} items instead of 1"
assert index.memories[0] == memory_item, "item in index != added mock item"
except AssertionError as e:
raise ValueError(f"Setting up for load_index test failed: {e}")

index.memories = []
index.load_index()

assert len(index) == 1
assert index.memories[0] == memory_item


@pytest.mark.vcr
@requires_api_key("OPENAI_API_KEY")
def test_json_memory_get_relevant(config: Config, patched_api_requestor: None) -> None:
Expand Down

0 comments on commit f16d7ba

Please sign in to comment.