In [1]:
! pip install -qU gitingest voyageai openai openai-agents pymongo galileo langgraph-checkpoint-mongodb langgraph


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.0.1[0m[39;49m -> [0m[32;49m25.1.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [2]:
import getpass
import os


# Function to securely get and set environment variables
def set_env_securely(var_name, prompt):
    value = getpass.getpass(prompt)
    os.environ[var_name] = value

In [3]:
set_env_securely("GALILEO_API_KEY", "Enter your Galileo API key: ")

In [4]:
# Optional, set a default Project
GALILEO_PROJECT = "CODING_ASSISTANT_AGENT"

# Optional, set a default Log Stream
GALILEO_LOG_STREAM = "CODING_ASSISTANT_AGENT_STREAM"

In [5]:
from galileo import GalileoLogger

logger = GalileoLogger(project=GALILEO_PROJECT, log_stream=GALILEO_LOG_STREAM)

In [6]:
codebase_metadata = {}

Provide the url of the codebase you want to process

In [7]:
codebase_metadata["url"] = "https://github.com/RichmondAlake/memorizz"

In [8]:
# Step 1 Convert github repo to markdown
from gitingest import ingest_async

summary, tree, content = await ingest_async(codebase_metadata.get("url"))

In [9]:
print(summary)

Repository: richmondalake/memorizz
Files analyzed: 57

Estimated tokens: 101.0k


In [10]:
attrs = {}
for line in summary.splitlines():
    line = line.strip()
    if not line or ":" not in line:
        continue
    key, val = line.split(":", 1)
    attrs[key.lower().replace(" ", "_")] = val.strip()

In [11]:
codebase_metadata["repository"] = attrs.get("repository")
codebase_metadata["analyzed_count"] = attrs.get("files_analyzed")
codebase_metadata["estimated_tokens"] = attrs.get("estimated_tokens")

In [12]:
import pprint

pprint.pprint(codebase_metadata)

{'analyzed_count': '57',
 'estimated_tokens': '101.0k',
 'repository': 'richmondalake/memorizz',
 'url': 'https://github.com/RichmondAlake/memorizz'}


In [13]:
codebase_metadata["tree"] = tree

In [14]:
print(codebase_metadata["tree"])

Directory structure:
└── richmondalake-memorizz/
    ├── README.md
    ├── LICENCE.txt
    ├── pyproject.toml
    ├── eval/
    │   ├── README.md
    │   └── longmemeval/
    │       ├── README.md
    │       ├── download_dataset.py
    │       ├── evaluate_delegate_pattern.py
    │       ├── evaluate_hierarchical_pattern.py
    │       ├── evaluate_memorizz.py
    │       └── README_evaluation_architectures.md
    ├── examples/
    │   ├── knowledge_base.ipynb
    │   ├── memagent_single_agent.ipynb
    │   ├── memagents_multi_agents.ipynb
    │   ├── persona.ipynb
    │   ├── test-ollama-embed.ipynb
    │   ├── test-openai-embed.ipynb
    │   ├── toolbox.ipynb
    │   └── workflow.ipynb
    └── src/
        └── memorizz/
            ├── __init__.py
            ├── memagent.py
            ├── multi_agent_orchestrator.py
            ├── task_decomposition.py
            ├── context_window_management/
            │   ├── __init__.py
            │   └── cwm.py
            ├── database/
 

Generate the description for the codebase and add it to the codebase_metadata


In [15]:
set_env_securely("OPENAI_API_KEY", "Enter your OPENAI_API_KEY: ")

In [17]:
from typing import Dict

from galileo.openai import openai

openai_client = openai.OpenAI()


def generate_repo_description(
    metadata: Dict[str, str],
    content: str,
    model: str = "gpt-4.1",
    snippet_length: int = 1000,
) -> str:
    """
    Generate a Markdown README-style description of a GitHub repo using the
    OpenAI Response API. The description will:
      - Name the repo and link to it
      - Mention how many files were analyzed and the approximate token count
      - Summarize the high-level directory structure (major folders)
      - Infer the repository's purpose and main goals

    Args:
        metadata: dict with keys:
            - repository: "owner/name"
            - url: repo URL
            - analyzed_count: number of files analyzed (string or int)
            - estimated_tokens: e.g. "101.0k"
            - tree: full directory structure string
        model: which OpenAI model to call (default "gpt-4.1")

    Returns:
        A Markdown-formatted description string.
    """
    # Extract just the top-level entries for brevity
    tree_lines = metadata.get("tree", "").splitlines()
    top_level = "\n".join(tree_lines[:4])  # e.g. the first three indented lines

    prompt = f"""
You are a documentation assistant. Using the metadata and a snippet of the repository's content,
write a concise, engaging README-style paragraph in Markdown that:
- Names the repository and links to it
- Mentions how many files were analyzed and the approximate token count
- Summarizes the high-level directory structure (major folders)
- Infers the repository's purpose and primary goals
- Highlights any notable code patterns or core modules from the provided content snippet


Metadata:
Repository: {metadata.get('repository')}
URL: {metadata.get('url')}
Files analyzed: {metadata.get('analyzed_count')}
Estimated tokens: {metadata.get('estimated_tokens')}

Top-level directory structure:
{top_level}

Content snippet:
{content.strip()[:snippet_length]}

Please produce one well-crafted paragraph.
"""

    response = openai_client.responses.create(model=model, input=prompt)
    return response.output_text.strip()

In [18]:
codebase_metadata["description"] = generate_repo_description(codebase_metadata, content)

In [19]:
pprint.pprint(codebase_metadata["description"])

('[**richmondalake/memorizz**](https://github.com/RichmondAlake/memorizz) is '
 'an experimental Python library focused on advanced memory management for AI '
 'agents, providing core infrastructure to store, retrieve, and utilize '
 'contextual knowledge in agent-based systems. With a codebase spanning 57 '
 'files and approximately 101,000 tokens, the repository is structured with '
 'essential top-level files like `README.md` and `LICENCE.txt`, making entry '
 'points clear for new contributors or users. The primary aim of Memorizz is '
 'to facilitate research and education on agent memory architectures, '
 'Notable patterns include strong modularization for memory operations and '
 'clear emphasis on both extensibility and educational clarity. The project’s '
 'core modules likely revolve around agent memory APIs and storage mechanisms, '
 'supporting experimentation in cognitive AI system design.')


Extracting the codebase_content data

In [20]:
print(content[0:1000])

FILE: README.md
<div align="center">

# Memorizz 🧠

📊 **[Agent Memory Presentation](https://docs.google.com/presentation/d/1iSu667m5-pOXMrJq_LjkfnfD4V0rW1kbhGaQ2u3TKXQ/edit?usp=sharing)** | 🎥 **[AIEWF Richmond's Talk](https://youtu.be/W2HVdB4Jbjs?si=faaI3cMLc71Efpeu)**

[![PyPI version](https://badge.fury.io/py/memorizz.svg)](https://badge.fury.io/py/memorizz)
[![PyPI downloads](https://img.shields.io/pypi/dm/memorizz.svg)](https://pypistats.org/packages/memorizz)

</div>

> 
> **MemoRizz is an EXPERIMENTAL library intended for EDUCATIONAL PURPOSES ONLY.**
> 
> **Do NOT use in production environments or with sensitive data.**
> 
> This library is under active development, has not undergone security audits, and may contain bugs or breaking changes in future releases.

## Overview

MemoRizz is an advanced memory management framework designed for AI agents, enabl


In [21]:
import os
import re


def split_content_to_files(content_blob: str):
    # Split on the "====\nFILE: " marker
    parts = re.split(r"^={2,}\nFILE: ", content_blob, flags=re.MULTILINE)
    files = []
    for part in parts[1:]:
        # part starts with: "<path>\n====\n<rest of file>"
        lines = part.splitlines()
        full_path = lines[0].strip()  # e.g. "README.md" or "src/foo.py"
        file_name = os.path.basename(full_path)  # e.g. "README.md" or "foo.py"
        # Skip the next separator line, then the rest is content
        content = "\n".join(lines[2:]).rstrip()
        files.append({"file_name": file_name, "content": content})
    return files

In [22]:
files = split_content_to_files(content)

In [23]:
pprint.pprint(files[0])

{'content': '<div align="center">\n'
            '\n'
            '# Memorizz 🧠\n'
            '\n'
            '📊 **[Agent Memory '
            'Presentation](https://docs.google.com/presentation/d/1iSu667m5-pOXMrJq_LjkfnfD4V0rW1kbhGaQ2u3TKXQ/edit?usp=sharing)** '
            "| 🎥 **[AIEWF Richmond's "
            'Talk](https://youtu.be/W2HVdB4Jbjs?si=faaI3cMLc71Efpeu)**\n'
            '\n'
            '[![PyPI '
            'version](https://badge.fury.io/py/memorizz.svg)](https://badge.fury.io/py/memorizz)\n'
            '[![PyPI '
            'downloads](https://img.shields.io/pypi/dm/memorizz.svg)](https://pypistats.org/packages/memorizz)\n'
            '\n'
            '</div>\n'
            '\n'
            '> \n'
            '> **MemoRizz is an EXPERIMENTAL library intended for EDUCATIONAL '
            'PURPOSES ONLY.**\n'
            '> \n'
            '> **Do NOT use in production environments or with sensitive '
            'data.**\n'
            '> \n'
            '> Thi

In [24]:
def generate_file_description(
    file_name: str, content: str, model: str = "gpt-4.1", snippet_length: int = 100
) -> str:
    """
    Generate a brief description of a single file using the OpenAI Response API.

    Args:
        file_name: Name of the file (e.g. "memagent.py")
        content:   Full text of the file
        model:     OpenAI model to use (default "gpt-4.1")
        snippet_length: Number of characters from the start of `content` to include in prompt

    Returns:
        A short Markdown sentence describing the file purpose.
    """
    # Grab a small snippet for context
    snippet = content.strip()[:snippet_length].replace("\n", " ")

    prompt = f"""
    You are a code documentation assistant. Write a one-sentence Markdown
    description of the file "{file_name}", based on this snippet:

    ```txt
    {snippet}

    Description:
    """.strip()

    # Call the OpenAI Response API
    response = openai_client.responses.create(model=model, input=prompt)

    return response.output_text.strip()

Quick test of the description function

In [25]:
files[0]["description"] = generate_file_description(
    files[0]["file_name"], files[0]["content"]
)

In [26]:
files[0]["description"]

'A Markdown file introducing Memorizz, featuring a centered header, emoji, and a link to the Agent Memory Presentation.'

In [27]:
# Generate the descriptions for the codebase_metadata and the codebase_files
for file in files:
    file["description"] = generate_file_description(file["file_name"], file["content"])

Generate the emebdding for the codebase_metadata and the codebase_files

We are genearting embeddings with openai and voyage ai

In [28]:
set_env_securely("VOYAGE_API_KEY", "Enter your Voyage API Key: ")

In [29]:
from typing import List

import voyageai

vo = voyageai.Client()


def get_voyage_embedding(data: str) -> List:
    """
    Get Voyage AI embeddings for text.

    Args:
        data (str): A text to embed.
        input_type (str): Input type, either "document" or "query".

    Returns:
        List: Embeddings as a list.
    """
    embedding = vo.embed(
        texts=data, model="voyage-code-3", truncation=False, output_dimension=2048
    ).embeddings[0]
    return embedding

  from .autonotebook import tqdm as notebook_tqdm


In [30]:
def get_openai_embedding(data: str) -> List:
    """
    Get OpenAI embeddings for text.

    Args:
        data (str): A text to embed.

    Returns:
        List: Embeddings as a list.
    """
    response = openai_client.embeddings.create(
        input=data, model="text-embedding-3-large", dimensions=3072
    )
    return response.data[0].embedding

In [31]:
# Create the embeddings for the codebase_metadata
# convert the codebase_metadata to dataframe

import pandas as pd

codebase_metadata_df = pd.DataFrame([codebase_metadata])
codebase_metadata_df

Unnamed: 0,url,repository,analyzed_count,estimated_tokens,tree,description
0,https://github.com/RichmondAlake/memorizz,richmondalake/memorizz,57,101.0k,Directory structure:\n└── richmondalake-memori...,[**richmondalake/memorizz**](https://github.co...


In [32]:
# Create the embeddings for the codebase_metadata
# convert the codebase_metadata to dataframe
from tqdm.auto import tqdm

# Enable the pandas “progress_apply” method
tqdm.pandas()

codebase_metadata_df["openai_embeddings"] = codebase_metadata_df[
    "description"
].progress_apply(get_openai_embedding)
codebase_metadata_df["voyage_embeddings"] = codebase_metadata_df[
    "description"
].progress_apply(get_voyage_embedding)

100%|██████████| 1/1 [00:00<00:00,  2.73it/s]
100%|██████████| 1/1 [00:00<00:00,  2.19it/s]


In [33]:
codebase_metadata_df

Unnamed: 0,url,repository,analyzed_count,estimated_tokens,tree,description,openai_embeddings,voyage_embeddings
0,https://github.com/RichmondAlake/memorizz,richmondalake/memorizz,57,101.0k,Directory structure:\n└── richmondalake-memori...,[**richmondalake/memorizz**](https://github.co...,"[-0.013910239562392235, 0.0077960304915905, -0...","[-0.01897663064301014, 0.019311921671032906, -..."


In [34]:
# Generate embeddings for the codebase_files
code_base_files_df = pd.DataFrame(files)
code_base_files_df.head()

Unnamed: 0,file_name,content,description
0,README.md,"<div align=""center"">\n\n# Memorizz 🧠\n\n📊 **[A...",A detailed overview and guide for the Memorizz...
1,LICENCE.txt,MIT License\n\nCopyright (c) 2024 Richmond Ala...,"This file contains the MIT License, granting p..."
2,pyproject.toml,"[build-system]\nrequires = [""hatchling""]\nbuil...",The pyproject.toml file specifies build system...
3,README.md,# Memorizz Evaluation Framework\n\nThis direct...,Description: This README.md provides an overvi...
4,README.md,# LongMemEval Evaluation for Memorizz\n\nThis ...,The `README.md` file provides an overview and ...


In [35]:
code_base_files_df["openai_embeddings"] = code_base_files_df[
    "description"
].progress_apply(get_openai_embedding)

code_base_files_df["voyage_embeddings"] = code_base_files_df[
    "description"
].progress_apply(get_voyage_embedding)

100%|██████████| 57/57 [00:22<00:00,  2.50it/s]
100%|██████████| 57/57 [00:15<00:00,  3.67it/s]


In [36]:
code_base_files_df.head()

Unnamed: 0,file_name,content,description,openai_embeddings,voyage_embeddings
0,README.md,"<div align=""center"">\n\n# Memorizz 🧠\n\n📊 **[A...",A detailed overview and guide for the Memorizz...,"[-0.022689061239361763, -0.024230889976024628,...","[-0.014513802714645863, 0.020234525203704834, ..."
1,LICENCE.txt,MIT License\n\nCopyright (c) 2024 Richmond Ala...,"This file contains the MIT License, granting p...","[0.0009140886832028627, 0.0029062831308692694,...","[-0.024665705859661102, 0.049052704125642776, ..."
2,pyproject.toml,"[build-system]\nrequires = [""hatchling""]\nbuil...",The pyproject.toml file specifies build system...,"[-0.06007716432213783, 0.013143442571163177, -...","[-0.03296789899468422, 0.020944518968462944, -..."
3,README.md,# Memorizz Evaluation Framework\n\nThis direct...,Description: This README.md provides an overvi...,"[-0.020649829879403114, 0.005917594768106937, ...","[-0.027502847835421562, -0.0006511090905405581..."
4,README.md,# LongMemEval Evaluation for Memorizz\n\nThis ...,The `README.md` file provides an overview and ...,"[-0.02544913813471794, -0.0007917052134871483,...","[-0.036813315004110336, 0.011114790104329586, ..."


Connect to mongodb and create collections

In [37]:
set_env_securely("MONGODB_URI", "Enter your MongoDB URI: ")

In [38]:
import pymongo


def get_mongo_client(mongo_uri):
    """Establish and validate connection to the MongoDB."""

    client = pymongo.MongoClient(
        mongo_uri,
        appname="devrel.showcase.partners.galileo.ai_hallucination_detection_and_reduction",
    )

    # Validate the connection
    ping_result = client.admin.command("ping")
    if ping_result.get("ok") == 1.0:
        # Connection successful
        print("Connection to MongoDB successful")
        return client
    else:
        print("Connection to MongoDB failed")
    return None

In [39]:
DB_NAME = "code_repository_data"
db_client = get_mongo_client(os.environ.get("MONGODB_URI"))
db = db_client[DB_NAME]

Connection to MongoDB successful


In [40]:
# Collection names
CODEBASE_METADATA = "codebase_metadata"
CODEBASE_FILES = "codebase_files"

In [41]:
# Create collections
def create_collections():
    existing_collections = db.list_collection_names()
    print(f"Existing collections: {existing_collections}")

    # If the collection does not exist, create it
    if CODEBASE_METADATA not in existing_collections:
        db.create_collection(CODEBASE_METADATA)
        print(f"Created collection: {CODEBASE_METADATA}")
    else:
        print(f"Collection {CODEBASE_METADATA} already exists")

    if CODEBASE_FILES not in existing_collections:
        db.create_collection(CODEBASE_FILES)
        print(f"Created collection: {CODEBASE_FILES}")
    else:
        print(f"Collection {CODEBASE_FILES} already exists")

In [42]:
create_collections()

Existing collections: []
Created collection: codebase_metadata
Created collection: codebase_files


In [43]:
# Create Indexes
import time

from pymongo.operations import SearchIndexModel


# Create vector search index if it doesn't exist
def create_vector_search_index(
    collection,
    vector_index_name,
    dimensions=1024,
    quantization="scalar",
    embedding_path="embedding",
):
    # Check if index already exists
    try:
        existing_indexes = collection.list_search_indexes()
        for index in existing_indexes:
            if index["name"] == vector_index_name:
                print(f"Vector search index '{vector_index_name}' already exists.")
                return
    except Exception as e:
        print(f"Could not list search indexes: {e}")
        return

    index_definition = {
        "fields": [
            {
                "type": "vector",
                "path": embedding_path,
                "numDimensions": dimensions,
                "similarity": "cosine",
            }
        ]
    }

    if quantization == "scalar":
        index_definition["fields"][0]["quantization"] = quantization

    if quantization == "binary":
        index_definition["fields"][0]["quantization"] = quantization

    # Create vector search index
    search_index_model = SearchIndexModel(
        definition=index_definition,
        name=vector_index_name,
        type="vectorSearch",
    )

    try:
        result = collection.create_search_index(model=search_index_model)
        print(f"New search index named '{result}' is building.")
    except Exception as e:
        print(f"Error creating vector search index: {e}")
        return

    # Wait for initial sync to complete
    print(
        f"Polling to check if the index '{result}' is ready. This may take up to a minute."
    )
    predicate = lambda index: index.get("queryable") is True

    while True:
        try:
            indices = list(collection.list_search_indexes(result))
            if indices and predicate(indices[0]):
                break
            time.sleep(5)
        except Exception as e:
            print(f"Error checking index readiness: {e}")
            time.sleep(5)

    print(f"{result} is ready for querying.")

In [44]:
create_vector_search_index(
    db[CODEBASE_METADATA],
    "vector_search_index_scalar_openai",
    quantization="scalar",
    embedding_path="openai_embeddings",
    dimensions=3072,
)

create_vector_search_index(
    db[CODEBASE_METADATA],
    "vector_search_index_scalar_voyage",
    quantization="scalar",
    embedding_path="voyage_embeddings",
    dimensions=2048,
)

create_vector_search_index(
    db[CODEBASE_FILES],
    "vector_search_index_scalar_voyage",
    quantization="scalar",
    embedding_path="voyage_embeddings",
    dimensions=2048,
)

create_vector_search_index(
    db[CODEBASE_FILES],
    "vector_search_index_scalar_openai",
    quantization="scalar",
    embedding_path="openai_embeddings",
    dimensions=3072,
)

New search index named 'vector_search_index_scalar_openai' is building.
Polling to check if the index 'vector_search_index_scalar_openai' is ready. This may take up to a minute.
vector_search_index_scalar_openai is ready for querying.
New search index named 'vector_search_index_scalar_voyage' is building.
Polling to check if the index 'vector_search_index_scalar_voyage' is ready. This may take up to a minute.
vector_search_index_scalar_voyage is ready for querying.
New search index named 'vector_search_index_scalar_voyage' is building.
Polling to check if the index 'vector_search_index_scalar_voyage' is ready. This may take up to a minute.
vector_search_index_scalar_voyage is ready for querying.
New search index named 'vector_search_index_scalar_openai' is building.
Polling to check if the index 'vector_search_index_scalar_openai' is ready. This may take up to a minute.
vector_search_index_scalar_openai is ready for querying.


In [45]:
def create_text_search_index(collection, index_definition, index_name):
    """
    Create a search index for a MongoDB Atlas collection.

    Args:
    collection: MongoDB collection object
    index_definition: Dictionary defining the index mappings
    index_name: String name for the index

    Returns:
    str: Result of the index creation operation
    """

    try:
        search_index_model = SearchIndexModel(
            definition=index_definition, name=index_name
        )

        result = collection.create_search_index(model=search_index_model)
        print(f"Search index '{index_name}' created successfully")
        return result
    except Exception as e:
        print(f"Error creating search index: {e!s}")
        return None

In [46]:
codebase_metadata_index_definition = {
    "mappings": {
        "dynamic": True,
        "fields": {
            "repository": {
                "type": "string",
            },
            "url": {
                "type": "string",
            },
            "description": {
                "type": "string",
            },
        },
    }
}

In [47]:
codebase_files_index_definition = {
    "mappings": {
        "dynamic": True,
        "fields": {
            "file_path": {
                "type": "string",
            },
            "description": {
                "type": "string",
            },
            "content": {
                "type": "string",
            },
        },
    }
}

In [48]:
create_text_search_index(
    db[CODEBASE_METADATA],
    codebase_metadata_index_definition,
    "codebase_metadata_index",
)

create_text_search_index(
    db[CODEBASE_FILES],
    codebase_files_index_definition,
    "codebase_files_index",
)

Search index 'codebase_metadata_index' created successfully
Search index 'codebase_files_index' created successfully


'codebase_files_index'

In [49]:
# Ensure the collections are empty
db[CODEBASE_METADATA].delete_many({})
db[CODEBASE_FILES].delete_many({})

DeleteResult({'n': 0, 'electionId': ObjectId('7fffffff0000000000000003'), 'opTime': {'ts': Timestamp(1751631946, 1), 't': 3}, 'ok': 1.0, '$clusterTime': {'clusterTime': Timestamp(1751631946, 1), 'signature': {'hash': b"\xa9\x0f\xc5\x85\xa1['g\x90F\x14*\xc2Yx\xdd%;\xe6|", 'keyId': 7520068280199938053}}, 'operationTime': Timestamp(1751631946, 1)}, acknowledged=True)

In [50]:
# Insert the codebase metadata documents into the collection
codebase_metadata_documents = codebase_metadata_df.to_dict(orient="records")
codebase_files_documents = code_base_files_df.to_dict(orient="records")

db[CODEBASE_METADATA].insert_many(codebase_metadata_documents)
db[CODEBASE_FILES].insert_many(codebase_files_documents)

InsertManyResult([ObjectId('6867c84f3947e1e6a1b6db8d'), ObjectId('6867c84f3947e1e6a1b6db8e'), ObjectId('6867c84f3947e1e6a1b6db8f'), ObjectId('6867c84f3947e1e6a1b6db90'), ObjectId('6867c84f3947e1e6a1b6db91'), ObjectId('6867c84f3947e1e6a1b6db92'), ObjectId('6867c84f3947e1e6a1b6db93'), ObjectId('6867c84f3947e1e6a1b6db94'), ObjectId('6867c84f3947e1e6a1b6db95'), ObjectId('6867c84f3947e1e6a1b6db96'), ObjectId('6867c84f3947e1e6a1b6db97'), ObjectId('6867c84f3947e1e6a1b6db98'), ObjectId('6867c84f3947e1e6a1b6db99'), ObjectId('6867c84f3947e1e6a1b6db9a'), ObjectId('6867c84f3947e1e6a1b6db9b'), ObjectId('6867c84f3947e1e6a1b6db9c'), ObjectId('6867c84f3947e1e6a1b6db9d'), ObjectId('6867c84f3947e1e6a1b6db9e'), ObjectId('6867c84f3947e1e6a1b6db9f'), ObjectId('6867c84f3947e1e6a1b6dba0'), ObjectId('6867c84f3947e1e6a1b6dba1'), ObjectId('6867c84f3947e1e6a1b6dba2'), ObjectId('6867c84f3947e1e6a1b6dba3'), ObjectId('6867c84f3947e1e6a1b6dba4'), ObjectId('6867c84f3947e1e6a1b6dba5'), ObjectId('6867c84f3947e1e6a1b6db

Create search methods (vector, hybrid)

In [51]:
# Implementing vector search
def semantic_search(
    user_query,
    collection,
    top_n=5,
    vector_search_index_name="vector_search_index",
    path="embedding",
    embedding_function=get_voyage_embedding,
):
    """
    Perform a vector search in the MongoDB collection based on the user query.

    Args:
    user_query (str): The user's query string.
    collection (MongoCollection): The MongoDB collection to search.
    top_n (int): The number of top results to return.
    vector_search_index_name (str): The name of the vector search index.

    Returns:
    list: A list of matching documents.
    """

    # Retrieve the pre-generated embedding for the query from our dictionary
    # This embedding represents the semantic meaning of the query as a vector
    query_embedding = embedding_function(user_query)

    # Check if we have a valid embedding for the query
    if query_embedding is None:
        return "Invalid query or embedding generation failed."

    # Define the vector search stage using MongoDB's $vectorSearch operator
    # This stage performs the semantic similarity search
    vector_search_stage = {
        "$vectorSearch": {
            "index": vector_search_index_name,  # The vector index we created earlier
            "queryVector": query_embedding,  # The numerical vector representing our query
            "path": path,  # The field containing document embeddings
            "numCandidates": 100,  # Explore this many vectors for potential matches
            "limit": top_n,  # Return only the top N most similar results
        }
    }

    # Define which fields to include in the results and their format
    project_stage = {
        "$project": {
            "_id": 0,  # Exclude MongoDB's internal ID
            "openai_embeddings": 0,
            "voyage_embeddings": 0,
            "score": {
                "$meta": "vectorSearchScore"  # Include similarity score from vector search
            },
        }
    }

    # Combine the search and projection stages into a complete pipeline
    pipeline = [vector_search_stage, project_stage]

    # Execute the pipeline against our collection and get results
    results = collection.aggregate(pipeline)

    # Convert cursor to a Python list for easier handling
    return list(results)

In [52]:
user_query = "Get me the root function of the memagent, where it is defined."

In [53]:
search_results_openai_embeddings = semantic_search(
    user_query,
    db[CODEBASE_FILES],
    top_n=5,
    vector_search_index_name="vector_search_index_scalar_openai",
    path="openai_embeddings",
    embedding_function=get_openai_embedding,
)

In [54]:
pprint.pprint(search_results_openai_embeddings)

[{'content': 'from .toolbox import Toolbox\n'
             'from .llms.openai import OpenAI\n'
             'from .persona import Persona\n'
             'from typing import Optional, Union, List, Dict, Any\n'
             'import json\n'
             'from .memory_component import MemoryComponent, '
             'ConversationMemoryComponent\n'
             'from datetime import datetime\n'
             'import uuid\n'
             'from .memory_provider import MemoryProvider\n'
             'from .memory_component.memory_mode import MemoryMode\n'
             'from .memory_provider.memory_type import MemoryType\n'
             'import logging\n'
             'from pydantic import BaseModel\n'
             'from .toolbox.tool_schema import ToolSchemaType\n'
             'from typing import Callable\n'
             'from .workflow.workflow import Workflow, WorkflowOutcome\n'
             'from .context_window_management.cwm import CWM\n'
             'from .long_term_memory import Knowl

In [55]:
search_results_voyageai_embeddings = semantic_search(
    user_query,
    db[CODEBASE_FILES],
    top_n=5,
    vector_search_index_name="vector_search_index_scalar_voyage",
    path="voyage_embeddings",
    embedding_function=get_voyage_embedding,
)

In [56]:
pprint.pprint(search_results_voyageai_embeddings)

[{'content': 'from typing import List\n'
             '# from ..memagent import MemAgent\n'
             'from ..memory_provider.memory_type import MemoryType\n'
             '\n'
             '# Can take in an agent and then return a prompt that informs the '
             'agent on how to manage the context window\n'
             'class CWM:\n'
             '    # def __init__(self, agent: MemAgent):\n'
             '    #     self.agent = agent\n'
             '    \n'
             '    @staticmethod\n'
             '    def get_prompt_from_memory_types(memory_types: '
             'List[MemoryType]):\n'
             '        prompt = "You are an AI Agent endowed with a powerful, '
             'multi-tiered memory augmentation system. Your mission is to use '
             'all available memory modalities to deliver consistent, accurate, '
             'and context-rich responses. The aim is to esure that through '
             'augmented memory, you become belivable, capable, and re

In [219]:
# Let's compare the results of the two embeddings

In [57]:
import pandas as pd


def compare_search_results(openai_results, voyage_results):
    """
    Compare two lists of search-result dicts side-by-side.

    Args:
        openai_results (list of dict): Results from OpenAI embeddings search.
        voyage_results (list of dict): Results from Voyage embeddings search.

    Returns:
        pandas.DataFrame: Table with columns for file name, description, and score
                          from each embedding source.
    """
    max_len = max(len(openai_results), len(voyage_results))
    rows = []
    for i in range(max_len):
        oa = openai_results[i] if i < len(openai_results) else {}
        va = voyage_results[i] if i < len(voyage_results) else {}
        rows.append(
            {
                "file_name_openai": oa.get("file_name"),
                "description_openai": oa.get("description"),
                "score_openai": oa.get("score"),
                "file_name_voyage": va.get("file_name"),
                "description_voyage": va.get("description"),
                "score_voyage": va.get("score"),
            }
        )
    return pd.DataFrame(rows)

In [58]:
search_results_comparison_df = compare_search_results(
    search_results_openai_embeddings, search_results_voyageai_embeddings
)

In [59]:
search_results_comparison_df

Unnamed: 0,file_name_openai,description_openai,score_openai,file_name_voyage,description_voyage,score_voyage
0,memagent.py,```markdown\nmemagent.py defines a memory-driv...,0.706605,cwm.py,`cwm.py` provides functionality related to mem...,0.802608
1,cwm.py,`cwm.py` provides functionality related to mem...,0.69299,memagent.py,```markdown\nmemagent.py defines a memory-driv...,0.793248
2,README.md,A detailed overview and guide for the Memorizz...,0.691128,test_memagent_enhanced_tools.py,Description: This file contains tests for the ...,0.791329
3,test_memagent_enhanced_tools.py,Description: This file contains tests for the ...,0.668708,toolbox.py,`toolbox.py` provides utility functions and ty...,0.77631
4,README.md,The `README.md` file provides an overview and ...,0.644289,memagent_single_agent.ipynb,A Jupyter notebook that demonstrates using the...,0.769624


Vector Search is not all you need 

In [60]:
def hybrid_search(
    user_query,
    collection,
    top_n=5,
    vector_search_index_name="vector_search_index_scalar",
    text_search_index_name="text_search_index",
    vector_weight=0.7,
    text_weight=0.3,
    embedding_function=get_voyage_embedding,
    embedding_path="embedding",
    text_path="text",
):
    """
    Perform hybrid search using both vector and text search with MongoDB RankFusion.

    Args:
        user_query (str): The user's query or search term.
        collection (Collection): MongoDB collection object.
        top_n (int): Number of results to return.
        vector_search_index_name (str): Name of the vector search index.
        text_search_index_name (str): Name of the text search index.
        vector_weight (float): Weight for vector search results (0.0-1.0).
        text_weight (float): Weight for text search results (0.0-1.0).
        embedding_function (function): Function to generate embeddings.
        path (str): Path to the embedding field.

    Returns:
        List[Dict]: List of search results with scores and details.
    """

    # Convert user query to embedding for vector search
    query_embedding = embedding_function(user_query)

    # Build the RankFusion aggregation pipeline
    rank_fusion_stage = {
        "$rankFusion": {
            "input": {
                "pipelines": {
                    "vectorPipeline": [
                        {
                            "$vectorSearch": {
                                "index": vector_search_index_name,
                                "path": embedding_path,
                                "queryVector": query_embedding,
                                "numCandidates": 100,
                                "limit": 20,
                            }
                        }
                    ],
                    "textPipeline": [
                        {
                            "$search": {
                                "index": text_search_index_name,
                                "phrase": {
                                    "query": user_query,
                                    "path": text_path,
                                },
                            }
                        },
                        {"$limit": 20},
                    ],
                }
            },
            "combination": {
                "weights": {
                    "vectorPipeline": vector_weight,
                    "textPipeline": text_weight,
                }
            },
            "scoreDetails": True,
        }
    }

    # Project stage to select desired fields and include score details
    project_stage = {
        "$project": {
            "_id": 0,
            "openai_embeddings": 0,
            "voyage_embeddings": 0,
            "score": {"$meta": "scoreDetails"},
        }
    }

    # Final limit stage
    limit_stage = {"$limit": top_n}

    # Combine all stages into the complete aggregation pipeline
    pipeline = [rank_fusion_stage, project_stage, limit_stage]

    try:
        # Execute the pipeline against the collection
        results = list(collection.aggregate(pipeline))

        print(f"Found {len(results)} results for query: '{user_query}'")

        return results

    except Exception as e:
        print(f"Error executing hybrid search: {e}")
        return []

In [65]:
hybrid_search_results_voyage_embeddings = hybrid_search(
    user_query,
    db[CODEBASE_FILES],
    top_n=5,
    vector_weight=0.5,
    text_weight=0.5,
    vector_search_index_name="vector_search_index_scalar_voyage",
    text_search_index_name="codebase_files_index",
    embedding_path="voyage_embeddings",
    text_path="content",
    embedding_function=get_voyage_embedding,
)

Found 5 results for query: 'Get me the root function of the memagent, where it is defined.'


In [66]:
hybrid_search_results_openai_embeddings = hybrid_search(
    user_query,
    db[CODEBASE_FILES],
    top_n=5,
    vector_weight=0.5,
    text_weight=0.5,
    vector_search_index_name="vector_search_index_scalar_openai",
    text_search_index_name="codebase_files_index",
    embedding_path="openai_embeddings",
    text_path="content",
    embedding_function=get_openai_embedding,
)

Found 5 results for query: 'Get me the root function of the memagent, where it is defined.'


In [67]:
search_results_comparison_df = compare_search_results(
    hybrid_search_results_openai_embeddings, hybrid_search_results_voyage_embeddings
)

In [69]:
search_results_comparison_df

Unnamed: 0,file_name_openai,description_openai,score_openai,file_name_voyage,description_voyage,score_voyage
0,memagent.py,```markdown\nmemagent.py defines a memory-driv...,"{'value': 0.00819672131147541, 'description': ...",cwm.py,`cwm.py` provides functionality related to mem...,"{'value': 0.00819672131147541, 'description': ..."
1,cwm.py,`cwm.py` provides functionality related to mem...,"{'value': 0.008064516129032258, 'description':...",memagent.py,```markdown\nmemagent.py defines a memory-driv...,"{'value': 0.008064516129032258, 'description':..."
2,README.md,A detailed overview and guide for the Memorizz...,"{'value': 0.007936507936507936, 'description':...",test_memagent_enhanced_tools.py,Description: This file contains tests for the ...,"{'value': 0.007936507936507936, 'description':..."
3,test_memagent_enhanced_tools.py,Description: This file contains tests for the ...,"{'value': 0.0078125, 'description': 'value out...",toolbox.py,`toolbox.py` provides utility functions and ty...,"{'value': 0.0078125, 'description': 'value out..."
4,README.md,The `README.md` file provides an overview and ...,"{'value': 0.007692307692307693, 'description':...",memagent_single_agent.ipynb,A Jupyter notebook that demonstrates using the...,"{'value': 0.007692307692307693, 'description':..."


In [70]:
def generate_answer_from_search(
    user_query: str,
    model: str = "gpt-4.1",
    max_snippet_chars: int = 200,
    embedding_model_provider: str = "voyage",
    num_search_results: int = 5,
) -> str:
    """
    Use code search results as context to answer a user query via the OpenAI Response API.

    Args:
        search_results: List of dicts, each with keys:
            - file_name (str)
            - description (str)
            - content (str)
            - score (float)
        user_prompt: The user's question or instruction.
        model: OpenAI model to call (default "gpt-4.1").
        max_snippet_chars: How many chars of each file's content to include.

    Returns:
        The assistant's answer as a string.
    """

    # Let's get the search reults

    search_results = []

    if embedding_model_provider == "voyage":
        # Feel free to change the search result method to hybrid search
        search_results = semantic_search(
            user_query,
            db[CODEBASE_FILES],
            top_n=num_search_results,
            vector_search_index_name="vector_search_index_scalar_voyage",
            path="voyage_embeddings",
            embedding_function=get_voyage_embedding,
        )
    elif embedding_model_provider == "openai":
        search_results = semantic_search(
            user_query,
            db[CODEBASE_FILES],
            top_n=num_search_results,
            vector_search_index_name="vector_search_index_scalar_openai",
            path="openai_embeddings",
            embedding_function=get_openai_embedding,
        )

    # Build a contextual prompt
    context_entries = []
    for i, res in enumerate(search_results, start=1):
        fname = res.get("file_name", "")
        desc = res.get("description", "").strip()
        snippet = res.get("content", "").strip().replace("```", "`​``")
        snippet = snippet[:max_snippet_chars]
        context_entries.append(f"{i}. **{fname}** — {desc}\n```{snippet}```")

    context_block = "\n\n".join(context_entries)

    prompt = (
        "You are a helpful assistant. Use the following code search results as context "
        "to answer the user query. Be precise and reference filenames when relevant.\n\n"
        f"### Context\n{context_block}\n\n"
        f"### User Query\n{user_query}\n\n"
        "### Answer\n"
    )

    # Call the Response API
    response = openai_client.responses.create(model=model, input=prompt)

    return search_results, response.output_text.strip()

In [71]:
user_query = "Get me the file responsibe for the defintion of the MemAgent class"

search_results, answer = generate_answer_from_search(
    user_query, embedding_model_provider="voyage"
)
print(answer)

The file responsible for the definition of the `MemAgent` class is **memagent.py**.

**Reference:**
- The context states: "memagent.py defines a memory-driven agent by integrating persona management, OpenAI language model support, and a customizable toolbox for executing tasks."
- The import statements in `memagent.py` list several components (e.g., `Toolbox`, `Persona`, `MemoryComponent`), suggesting it's the main class implementation for the agent.


In [72]:
search_results, answer = generate_answer_from_search(
    user_query, embedding_model_provider="openai"
)
print(answer)

The file responsible for the definition of the MemAgent class is **memagent.py**. The context indicates that this file "defines a memory-driven agent by integrating persona management, OpenAI language model support, and a customizable toolbox for executing tasks," which directly describes the role of a MemAgent. The imports and structure further confirm it is the module where MemAgent is defined.


ReRanking with VoyageAI (rerank-2-lite)

In [73]:
from typing import Any, Dict, List


def rerank_search_results(
    search_results: List[Dict[str, Any]],
    user_query: str,
    model: str = "rerank-2",
    top_k: int = 5,
) -> List[Dict[str, Any]]:
    """
    Re-rank semantic search results using Voyage AI's rerank API.

    Args:
        search_results: List of dicts with at least a "description" field.
        user_query:     The original user query string.
        model:          The Voyage rerank model to use (e.g. "rerank-2").
        top_k:          If provided, limit to the top_k highest-scoring results.

    Returns:
        A new list of the original result dicts, each augmented with
        "rerank_score", sorted in descending order of that score.
    """

    # Extract the text snippets to rerank
    documents = [res["description"] for res in search_results]

    if top_k is None:
        top_k = len(documents)

    # Call the rerank endpoint
    resp = vo.rerank(user_query, documents, model=model, top_k=top_k)

    # Build a mapping from snippet → score
    score_map = {r.document: r.relevance_score for r in resp.results}

    # Attach scores and sort
    reranked = []
    for orig in search_results:
        desc = orig.get("description")
        score = score_map.get(desc, 0.0)
        entry = orig.copy()
        entry["rerank_score"] = score
        reranked.append(entry)

    # Sort by new rerank_score descending
    reranked.sort(key=lambda x: x["rerank_score"], reverse=True)

    return reranked

In [74]:
reranked_openai_results = rerank_search_results(
    hybrid_search_results_openai_embeddings, user_query, top_k=5
)

In [75]:
for r in reranked_openai_results:
    print(r["file_name"], r["rerank_score"])

memagent.py 0.6953125
test_memagent_enhanced_tools.py 0.5078125
cwm.py 0.484375
README.md 0.4609375
README.md 0.330078125


In [76]:
reranked_voyage_results = rerank_search_results(
    hybrid_search_results_voyage_embeddings, user_query, top_k=5
)

In [77]:
for r in reranked_voyage_results:
    print(r["file_name"], r["rerank_score"])

memagent.py 0.6953125
test_memagent_enhanced_tools.py 0.5078125
cwm.py 0.484375
memagent_single_agent.ipynb 0.4140625
toolbox.py 0.34375


In [100]:
import operator
from typing import Annotated, List, TypedDict

from langchain_core.messages import BaseMessage


class CodeAgentState(TypedDict):
    """State for the code agent."""

    messages: Annotated[List[BaseMessage], operator.add]
    codebase_files: List[dict]
    codebase_metadata: dict
    summary: str
    directory_tree: str
    content: str

Create a tool that can transform github url to markdown content

In [101]:
from langchain.agents import tool


@tool
async def transform_github_url_to_markdown_using_gitingest(github_url: str) -> str:
    """
    Transform a github url into a markdown content.
    """

    summary, tree, content = await ingest_async(github_url)

    return summary, tree, content

Create tool that can prepare metadata for github repo

In [102]:
@tool
def extract_github_metadata_for_ingestion(
    summary: str, directory_tree: str, content: str
) -> str:
    """
    Extract the metadata for ingestion from a github url.
    """

    attrs = {}
    codebase_metadata = {}
    for line in summary.splitlines():
        line = line.strip()
        if not line or ":" not in line:
            continue
        key, val = line.split(":", 1)
        attrs[key.lower().replace(" ", "_")] = val.strip()

    codebase_metadata["repository"] = attrs.get("repository")
    codebase_metadata["analyzed_count"] = attrs.get("files_analyzed")
    codebase_metadata["estimated_tokens"] = attrs.get("estimated_tokens")
    codebase_metadata["tree"] = directory_tree

    # Create the embeddings for the codebase_metadata using OpenAI and VoyageAI
    codebase_metadata["openai_embeddings"] = get_openai_embedding(
        codebase_metadata["description"]
    )
    codebase_metadata["voyage_embeddings"] = get_voyage_embedding(
        codebase_metadata["description"]
    )

    # generate_repo_description is a function created earlier in this notebook
    codebase_metadata["description"] = generate_repo_description(
        codebase_metadata, content
    )

    return codebase_metadata

Create a tool that can process the data for ingestion

In [103]:
@tool
def prepare_data_and_generate_descriptions_and_embeddings_for_ingestion(content):
    """
    Prepare the data for ingestion into the database.
    """

    # From the markdown content, we will split the content into segments that represent files
    files = split_content_to_files(content)

    # Generate the descriptions for the each files
    for file in files:
        file["description"] = generate_file_description(file["content"])

        # Create the embeddings for the each files using OpenAI and VoyageAI
        file["openai_embeddings"] = get_openai_embedding(file["description"])
        file["voyage_embeddings"] = get_voyage_embedding(file["description"])

    return files

Ingest data into MongoDB

In [104]:
@tool
def ingest_codebase_metadata_to_mongodb(codebase_metadata: dict):
    """
    Ingest the codebase metadata into the database.
    """

    # Ensure that the collection exists, if not create it
    if CODEBASE_METADATA not in db.list_collection_names():
        db.create_collection(CODEBASE_METADATA)

    # Insert the codebase metadata documents into the collection
    db[CODEBASE_METADATA].insert_many(codebase_metadata)

    return "Codebase metadata ingested successfully"

In [105]:
@tool
def ingest_codebase_files_metadata_to_mongodb(codebase_files: dict):
    """
    Ingest the codebase files metadata into the database.
    """

    # Ensure that the collection exists, if not create it
    if CODEBASE_FILES not in db.list_collection_names():
        db.create_collection(CODEBASE_FILES)

    # Insert the codebase files documents into the collection
    db[CODEBASE_FILES].insert_many(codebase_files)

    return "Codebase metadata ingested successfully"

In [106]:
@tool
def search_codebase_data_in_mongodb_using_hybrid_search(
    user_query: str, top_n: int = 10
):
    """
    Search the codebase using hybrid search.
    """

    hybrid_search_results = hybrid_search(
        user_query,
        db[CODEBASE_FILES],
        top_n=top_n,
        vector_weight=0.5,
        text_weight=0.5,
        vector_search_index_name="vector_search_index_scalar_voyage",
        text_search_index_name="codebase_files_index",
        embedding_path="voyage_embeddings",
        text_path="content",
        embedding_function=get_voyage_embedding,
    )

    return hybrid_search_results

In [107]:
code_agent_toolbox = [
    search_codebase_data_in_mongodb_using_hybrid_search,
    ingest_codebase_files_metadata_to_mongodb,
    ingest_codebase_metadata_to_mongodb,
    prepare_data_and_generate_descriptions_and_embeddings_for_ingestion,
    extract_github_metadata_for_ingestion,
    transform_github_url_to_markdown_using_gitingest,
]

In [108]:
from langchain.chat_models import init_chat_model

llm = init_chat_model("openai:gpt-4.1")

In [109]:
code_agent = llm.bind_tools(code_agent_toolbox)

In [110]:
import re


def sanitize_name(name: str) -> str:
    """Sanitize the name to match OpenAI's pattern requirements."""
    # Remove any spaces, <, |, \, /, and >
    sanitized = re.sub(r"[\s<|\\/>]", "_", name)
    # Ensure the name isn't empty
    return sanitized or "anonymous"

In [112]:
import functools

from langchain_core.messages import AIMessage, ToolMessage


def agent_node(state, agent, name):
    # Extract just the messages from the state to pass to the agent
    messages = state["messages"]

    # Ensure all message names are properly sanitized before sending to the agent
    for msg in messages:
        if hasattr(msg, "name"):
            msg.name = sanitize_name(msg.name or "anonymous")

    result = agent.invoke(messages)

    if isinstance(result, ToolMessage):
        # Sanitize tool message name
        result.name = sanitize_name(result.name)
    else:
        # Use a fixed, compliant name for the AI
        result = AIMessage(**result.dict(exclude={"type", "name"}), name="assistant")

    return {
        "messages": [result],
        "sender": sanitize_name(name),
    }

In [113]:
from langgraph.prebuilt import ToolNode

code_agent_node = functools.partial(agent_node, agent=code_agent, name="Code Agent")
tool_node = ToolNode(code_agent_toolbox, name="tools")

In [116]:
from langgraph.graph import END, StateGraph
from langgraph.prebuilt import tools_condition

workflow = StateGraph(CodeAgentState)

workflow.add_node("code_agent", code_agent_node)
workflow.add_node("tools", tool_node)

workflow.set_entry_point("code_agent")
workflow.add_conditional_edges(
    "code_agent", tools_condition, {"tools": "tools", END: END}
)

workflow.add_edge("tools", "code_agent")

<langgraph.graph.state.StateGraph at 0x14da05650>

In [117]:
from langgraph.checkpoint.mongodb import AsyncMongoDBSaver
from pymongo import AsyncMongoClient

async_mongodb_client = AsyncMongoClient(os.getenv("MONGODB_URI"))
mongodb_checkpointer = AsyncMongoDBSaver(async_mongodb_client)

graph = workflow.compile(checkpointer=mongodb_checkpointer)

In [None]:
from IPython.display import Image, display

try:
    display(Image(graph.get_graph(xray=True).draw_mermaid_png()))
except Exception:
    # This requires some extra dependencies and is optional
    pass

In [118]:
import asyncio
import time

from langchain_core.messages import AIMessage, HumanMessage


async def chat_loop():
    config = {"configurable": {"thread_id": "0"}}

    while True:
        user_input = await asyncio.get_event_loop().run_in_executor(
            None, input, "User: "
        )
        if user_input.lower() in ["quit", "exit", "q"]:
            print("Goodbye!")
            break

        # Use a sanitized name for the human
        state = {"messages": [HumanMessage(content=user_input, name="human")]}

        print("Assistant: ", end="", flush=True)

        max_retries = 3
        retry_delay = 1

        for attempt in range(max_retries):
            try:
                async for chunk in graph.astream(state, config, stream_mode="values"):
                    if chunk.get("messages"):
                        last_message = chunk["messages"][-1]
                        if isinstance(last_message, AIMessage):
                            # Ensure the AI name is properly sanitized
                            last_message.name = "assistant"
                            print(last_message.content, end="", flush=True)
                        elif isinstance(last_message, ToolMessage):
                            # Sanitize tool names as well
                            tool_name = sanitize_name(last_message.name)
                            print(f"\n[Tool Used: {tool_name}]")
                            print(f"Tool Call ID: {last_message.tool_call_id}")
                            print(f"Content: {last_message.content}")
                            print("Assistant: ", end="", flush=True)
                break
            except Exception as e:
                if attempt < max_retries - 1:
                    print(f"\nAn unexpected error occurred: {e!s}")
                    print(f"\nRetrying in {retry_delay} seconds...")
                    await asyncio.sleep(retry_delay)
                    retry_delay *= 2
                else:
                    print(f"\nMax retries reached. Error: {e!s}")
                    break

        print("\n")  # New line after the complete response

In [119]:
# For Jupyter notebooks and IPython environments
import nest_asyncio

nest_asyncio.apply()

# Run the async function
await chat_loop()

Assistant: 

Error while processing OpenAI request: Error code: 400 - {'error': {'message': "Invalid 'tools[3].function.name': string too long. Expected a string with maximum length 64, but got a string with length 67 instead.", 'type': 'invalid_request_error', 'param': 'tools[3].function.name', 'code': 'string_above_max_length'}}



An unexpected error occurred: Failed to process the OpenAI Request

Retrying in 1 seconds...


Error while processing OpenAI request: Error code: 400 - {'error': {'message': "Invalid 'tools[3].function.name': string too long. Expected a string with maximum length 64, but got a string with length 67 instead.", 'type': 'invalid_request_error', 'param': 'tools[3].function.name', 'code': 'string_above_max_length'}}



An unexpected error occurred: Failed to process the OpenAI Request

Retrying in 2 seconds...


Error while processing OpenAI request: Error code: 400 - {'error': {'message': "Invalid 'tools[3].function.name': string too long. Expected a string with maximum length 64, but got a string with length 67 instead.", 'type': 'invalid_request_error', 'param': 'tools[3].function.name', 'code': 'string_above_max_length'}}



Max retries reached. Error: Failed to process the OpenAI Request


Goodbye!
