In [1]:
import pkg_resources

packages = ["langgraph", "langmem", "sentence-transformers", "torch", "numpy", "openai"]

with open("requirements.txt", "w") as f:
    for pkg in packages:
        try:
            version = pkg_resources.get_distribution(pkg).version
            line = f"{pkg}=={version}\n"
            print(line.strip())  # Print to console
            f.write(line)  # Write to file
        except pkg_resources.DistributionNotFound:
            print(f"{pkg} is not installed.")


langgraph==0.3.2
langmem==0.0.14
sentence-transformers==3.4.1
torch==2.6.0
numpy==2.2.3
openai==1.65.2


  import pkg_resources


In [1]:
from sentence_transformers import SentenceTransformer

# Load model (downloads ~420MB first time)
model = SentenceTransformer('all-mpnet-base-v2')

# Test it
text = "I like Python"
embedding = model.encode(text)
print(f"Embedding shape: {embedding.shape}")  # Should be (768,)
print(f"First few values: {embedding[:5]}")

  from .autonotebook import tqdm as notebook_tqdm


Embedding shape: (768,)
First few values: [-0.01384925  0.06499632 -0.03022257 -0.03865709  0.02064267]


In [4]:
from azure_openai_llm import get_llm

llm = get_llm()

res = llm.invoke(input=[{"role": "user", "content": "Hello, Azure!"}])

print(res.content)

2025-03-04 13:26:48,156 - INFO - Client ID and Client Secret found [azure_openai_llm.py:54]


Hello! It seems you might be mistaking me for Azure, but I’m here to assist you with any questions or information you need. How can I help you today?


In [1]:
import os
from langgraph.prebuilt import create_react_agent
from langgraph.store.memory import InMemoryStore
from langmem import create_manage_memory_tool, create_search_memory_tool
from sentence_transformers import SentenceTransformer
from azure_openai_llm import get_llm

# Load local embedding model
model = SentenceTransformer('all-MiniLM-L12-v2')

# Embedding function for LangMem
def embed_func(text):
    return model.encode(text, convert_to_numpy=True)

# Setup memory store
store = InMemoryStore(index={"dims": 384, "embed": embed_func})

# Get Azure ChatGPT model
llm = get_llm()

# Create memory tools with namespace
manage_memory = create_manage_memory_tool(namespace=("user_1",))
search_memory = create_search_memory_tool(namespace=("user_1",))

# Create agent with memory tools
agent = create_react_agent(
    model=llm, 
    tools=[manage_memory, search_memory],
    store=store
)

def print_stored_memories():
    print("\n--- Stored Memories ---")

    # Check if the store has stored data
    if hasattr(store, "_data") and isinstance(store._data, dict):
        all_memories = list(store._data.keys())  # Get stored keys
    else:
        print("Error: Cannot retrieve stored keys.")
        return

    if not all_memories:
        print("No memories stored yet.")
    else:
        # Fetch values without extra namespace argument
        values = [store.get(key) for key in all_memories]

        for i, (key, value) in enumerate(zip(all_memories, values)):
            print(f"Memory {i+1}: Key={key}, Value={value}")

    print("----------------------\n")

# Chat loop with memory
def chat_with_membot():
    print("MemBot: Hi! Ask me anything. (Type 'exit' to stop)")
    try:
        while True:
            user_input = input("You: ")
            if user_input.lower() == 'exit':
                print("MemBot: Goodbye!")
                break
            
            # Run the agent with memory
            response = agent.invoke({'messages': [{"role": "user", "content": user_input}]})
            
            # Extract AI response (FIXED)
            if isinstance(response, dict) and "messages" in response:
                ai_response = response["messages"][-1].content  # FIXED
            else:
                ai_response = str(response)
            
            print(f"MemBot: {ai_response}")
            
            # Print what’s stored after each turn
            print_stored_memories()

    except Exception as e:
        print("Error occurred: ", e)
    except KeyboardInterrupt:
        print("MemBot: Goodbye!")

if __name__ == "__main__":
    chat_with_membot()


  from .autonotebook import tqdm as notebook_tqdm
2025-03-04 16:17:06,061 - INFO - Client ID and Client Secret found [azure_openai_llm.py:54]


MemBot: Hi! Ask me anything. (Type 'exit' to stop)
MemBot: Hello! How can I assist you today?

--- Stored Memories ---
No memories stored yet.
----------------------

MemBot: I've noted that you like Python! If you have any questions or topics related to Python that you'd like to discuss, feel free to ask!

--- Stored Memories ---
Error occurred:  BaseStore.get() missing 1 required positional argument: 'key'


In [1]:
import langmem
print(dir(langmem))

['Prompt', 'ReflectionExecutor', '__all__', '__builtins__', '__cached__', '__doc__', '__file__', '__loader__', '__name__', '__package__', '__path__', '__spec__', 'create_manage_memory_tool', 'create_memory_manager', 'create_memory_searcher', 'create_memory_store_manager', 'create_multi_prompt_optimizer', 'create_prompt_optimizer', 'create_search_memory_tool', 'create_thread_extractor', 'errors', 'knowledge', 'prompts', 'reflection', 'utils']


In [None]:
# In the Hot Path

from langgraph.checkpoint.memory import MemorySaver
from langgraph.prebuilt import create_react_agent
from langgraph.store.memory import InMemoryStore
from langgraph.utils.config import get_store 
from langmem import (
    # Lets agent create, update, and delete memories 
    create_manage_memory_tool,
)

from sentence_transformers import SentenceTransformer
from azure_openai_llm import get_llm
from uuid import uuid4

# Embedding model setup
embedding_model = SentenceTransformer('all-MiniLM-L12-v2')

def embed_text(text: str) -> list:
    """Convert text to embeddings using all-MiniLM-L12-v2."""
    return embedding_model.encode(text, convert_to_numpy=True).tolist()

store = InMemoryStore(index={"dims": 384, "embed": embed_text}) 

def prompt(state):
    """Prepare the messages for the LLM."""
    # Get store from configured contextvar; 
    store = get_store() # Same as that provided to `create_react_agent`
    memories = store.search(
        # Search within the same namespace as the one
        # we've configured for the agent
        ("memories",),
        query=state["messages"][-1].content,
    )
    system_msg = f"""You are a helpful assistant.

## Memories
<memories>
{memories}
</memories>
"""
    return [{"role": "system", "content": system_msg}, *state["messages"]]


checkpointer = MemorySaver() # Checkpoint graph state 

agent = create_react_agent( 
    model=get_llm(),
    prompt=prompt,
    tools=[ # Add memory tools 
        # The agent can call "manage_memory" to
        # create, update, and delete memories by ID
        # Namespaces add scope to memories. To
        # scope memories per-user, do ("memories", "{user_id}"): 
        create_manage_memory_tool(namespace=("memories",)),
    ],
    # Our memories will be stored in this provided BaseStore instance
    store=store,
    # And the graph "state" will be checkpointed after each node
    # completes executing for tracking the chat history and durable execution
    checkpointer=checkpointer, 
)

  from .autonotebook import tqdm as notebook_tqdm
2025-03-05 07:58:56,510 - INFO - Client ID and Client Secret found [azure_openai_llm.py:54]


In [None]:
config = {"configurable": {"thread_id": "thread-a"}}

# Use the agent. The agent hasn't saved any memories,
# so it doesn't know about us
response = agent.invoke(
    {
        "messages": [
            {"role": "user", "content": "Know which display mode I prefer?"}
        ]
    },
    config=config,
)
# Output: "I don't seem to have any stored memories about your display mode preferences..."

agent.invoke(
    {
        "messages": [
            {"role": "user", "content": "dark. Remember that."}
        ]
    },
    # We will continue the conversation (thread-a) by using the config with
    # the same thread_id
    config=config,
)
print(response["messages"][-1].content)

# New thread = new conversation!
new_config = {"configurable": {"thread_id": "thread-b"}}
# The agent will only be able to recall
# whatever it explicitly saved using the manage_memories tool
response = agent.invoke(
    {"messages": [{"role": "user", "content": "Hey there. Do you remember me? What are my preferences?"}]},
    config=new_config,
)
print(response["messages"][-1].content)
# Output: "Based on my memory search, I can see that you've previously indicated a preference for dark display mode..."


I currently don't have any information about your preferred display mode. Would you like me to remember your preference now? If so, please let me know what it is!
I currently don't have any information about your preferred display mode. Would you like me to remember your preference now? If so, please let me know what it is!
Yes, I remember you! You prefer a dark display mode. If you have any other preferences or requests, feel free to let me know!


In [2]:
print(llm.invoke("What is your model name"))

content="I’m based on OpenAI's GPT-3 model. If you have any specific questions or need assistance, feel free to ask!" additional_kwargs={'refusal': None} response_metadata={'token_usage': {'completion_tokens': 27, 'prompt_tokens': 27, 'total_tokens': 54, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_b705f0c291', 'prompt_filter_results': [{'prompt_index': 0, 'content_filter_results': {'hate': {'filtered': False, 'severity': 'safe'}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': False, 'severity': 'safe'}}}], 'finish_reason': 'stop', 'logprobs': None, 'content_filter_results': {'hate': {'filtered': False, 'severity': 'safe'}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sex

In [6]:
# In the Background

from langchain.chat_models import init_chat_model
from langgraph.func import entrypoint
from langgraph.store.memory import InMemoryStore

from langmem import ReflectionExecutor, create_memory_store_manager
from sentence_transformers import SentenceTransformer
from azure_openai_llm import get_llm
from uuid import uuid4

# Embedding model setup
embedding_model = SentenceTransformer('all-MiniLM-L12-v2')

def embed_text(text: str) -> list:
    """Convert text to embeddings using all-MiniLM-L12-v2."""
    return embedding_model.encode(text, convert_to_numpy=True).tolist()

store = InMemoryStore(index={"dims": 384, "embed": embed_text})

llm = get_llm()

# Create memory manager Runnable to extract memories from conversations
memory_manager = create_memory_store_manager(
    llm,
    # Store memories in the "memories" namespace (aka directory)
    namespace=("memories",),  # 
)

@entrypoint(store=store)  # Create a LangGraph workflow
async def chat(message: str):
    response = llm.invoke(message)

    # memory_manager extracts memories from conversation history
    # We'll provide it in OpenAI's message format
    to_process = {"messages": [{"role": "user", "content": message}] + [response]}
    await memory_manager.ainvoke(to_process)  # 
    return response.content
# Run conversation as normal
response = await chat.ainvoke(
    "I like dogs. My dog's name is Fido.",
)
print(response)
# Output: That's nice! Dogs make wonderful companions. Fido is a classic dog name. What kind of dog is Fido?

2025-03-05 08:10:37,931 - INFO - Client ID and Client Secret found [azure_openai_llm.py:54]


That's great! Dogs can be wonderful companions. What kind of breed is Fido, and what do you enjoy doing together?


In [7]:
print(store.search(("memories",)))

[Item(namespace=['memories'], key='cc0f7be7-0fa6-4b05-8d92-4e829916e024', value={'kind': 'Memory', 'content': {'content': 'The user likes dogs and has a dog named Fido.'}}, created_at='2025-03-05T02:40:49.796761+00:00', updated_at='2025-03-05T02:40:49.796761+00:00', score=None)]


In [8]:
import asyncio
from langchain.chat_models import init_chat_model
from langgraph.func import entrypoint
from langgraph.store.memory import InMemoryStore

from langmem import ReflectionExecutor, create_memory_store_manager
from sentence_transformers import SentenceTransformer
from azure_openai_llm import get_llm

# Embedding model setup
embedding_model = SentenceTransformer('all-MiniLM-L12-v2')

def embed_text(text: str) -> list:
    """Convert text to embeddings using all-MiniLM-L12-v2."""
    return embedding_model.encode(text, convert_to_numpy=True).tolist()

store = InMemoryStore(index={"dims": 384, "embed": embed_text})

llm = get_llm()

# Create memory manager Runnable to extract memories from conversations
memory_manager = create_memory_store_manager(
    llm,
    namespace=("memories",),
)

async def extract_memories_async(to_process):
    """Run memory extraction in the background."""
    loop = asyncio.get_running_loop()
    loop.create_task(memory_manager.ainvoke(to_process))

@entrypoint(store=store)
async def chat(message: str):
    response = llm.invoke(message)

    # Prepare conversation history for memory extraction
    to_process = {"messages": [{"role": "user", "content": message}] + [response]}
    
    # Trigger background memory extraction
    asyncio.create_task(extract_memories_async(to_process))

    return response.content

# Run conversation as normal
response = await chat.ainvoke("I like dogs. My dog's name is Fido.")
print(response)


2025-03-05 08:52:02,015 - INFO - Client ID and Client Secret found [azure_openai_llm.py:54]


That's great! Dogs make wonderful companions. How long have you had Fido? What breed is he?


In [9]:
print(store.search(("memories",)))

[Item(namespace=['memories'], key='b51aff0e-11dd-4988-b94d-7fe49701af8e', value={'kind': 'Memory', 'content': {'content': 'User likes dogs and has a dog named Fido.'}}, created_at='2025-03-05T03:22:06.234598+00:00', updated_at='2025-03-05T03:22:06.234598+00:00', score=None)]


In [14]:
import asyncio
from langchain.chat_models import init_chat_model
from langgraph.func import entrypoint
from langgraph.store.memory import InMemoryStore
from langmem import create_memory_store_manager
from sentence_transformers import SentenceTransformer
from azure_openai_llm import get_llm

# Embedding model setup
embedding_model = SentenceTransformer('all-MiniLM-L12-v2')

def embed_text(text: str) -> list:
    """Convert text to embeddings using all-MiniLM-L12-v2."""
    return embedding_model.encode(text, convert_to_numpy=True).tolist()

storee = InMemoryStore(index={"dims": 384, "embed": embed_text})

llm = get_llm()

# Create memory manager to extract memories from conversations
memory_manager = create_memory_store_manager(
    llm,
    namespace=("memories",),
)

# Memory extraction queue
memory_queue = asyncio.Queue()

async def memory_worker():
    """Background worker that processes memory extraction tasks sequentially inside LangGraph context."""
    while True:
        to_process = await memory_queue.get()
        try:
            # Use LangGraph's entrypoint to ensure execution within the proper store context
            @entrypoint(store=storee)
            async def extract_memory(to_process):
                await memory_manager.ainvoke(to_process)

            await extract_memory.ainvoke(to_process)

        except Exception as e:
            print(f"Memory extraction error: {e}")
        finally:
            memory_queue.task_done()

# Start the memory worker in the background
asyncio.create_task(memory_worker())

@entrypoint(store=storee)
async def chat(message: str):
    response = llm.invoke(message)

    # Prepare conversation history for memory extraction
    to_process = {"messages": [{"role": "user", "content": message}] + [response]}
    
    # Add the task to the queue for background processing
    await memory_queue.put(to_process)

    return response.content

# Run conversation as normal
response = await chat.ainvoke("I like cat. My dog's name is Fido.")
print(response)

print('-----------------------')
print(storee.search(("memories",)))

2025-03-05 08:57:57,528 - INFO - Client ID and Client Secret found [azure_openai_llm.py:54]


That's great! Cats and dogs can make wonderful companions. Do you have any favorite stories or experiences with your cat or Fido that you'd like to share? Or is there something specific you’d like to know or discuss about them?
-----------------------
[]


In [15]:
print(storee.search(("memories",)))

[Item(namespace=['memories'], key='77eb67ec-497e-47ff-a533-e9b4ea24a705', value={'kind': 'Memory', 'content': {'content': 'User enjoys cats and has a dog named Fido.'}}, created_at='2025-03-05T03:28:03.998115+00:00', updated_at='2025-03-05T03:28:03.998115+00:00', score=None)]


In [16]:
"""
MemBot: A context-aware chatbot using LangGraph and LangMem with InMemorySaver.
- Uses Azure ChatGPT for responses.
- Stores every query and response in InMemoryStore with all-MiniLM-L12-v2 embeddings.
- Persists messages state in-memory via thread_id and InMemorySaver.
- Moves memory storage to a background queue for responsiveness.
"""

import asyncio
from langgraph.prebuilt import create_react_agent
from langgraph.checkpoint.memory import MemorySaver
from langgraph.store.memory import InMemoryStore
from langmem import create_manage_memory_tool, create_search_memory_tool
from sentence_transformers import SentenceTransformer
from azure_openai_llm import get_llm

# Embedding model setup
embedding_model = SentenceTransformer('all-MiniLM-L12-v2')

def embed_text(text: str) -> list:
    """Convert text to embeddings using all-MiniLM-L12-v2."""
    return embedding_model.encode(text, convert_to_numpy=True).tolist()

# Memory store and checkpointer setup
NAMESPACE = ("user_1",)
memory_store = InMemoryStore(index={"dims": 384, "embed": embed_text})
checkpointer = MemorySaver()  # In-memory persistence for messages state

# Azure ChatGPT model
llm = get_llm()

# Memory tools
manage_memory_tool = create_manage_memory_tool(namespace=NAMESPACE)
search_memory_tool = create_search_memory_tool(namespace=NAMESPACE)

# System prompt
SYSTEM_PROMPT = """
You are MemBot, a helpful assistant with memory. Your goals:
1. Assist users conversationally.
2. Use `manage_memory_tool` to store EVERY user query and assistant response as a single memory entry (e.g., "User: I like Python | Bot: Noted, you like Python").
3. For questions about past interactions (e.g., "What was my first message?"), ALWAYS use `search_memory_tool` to retrieve relevant memories. Sort memories by order (earliest first) and return the EXACT user input from the first relevant memory. If the tool fails, use the conversation history (messages) to find the first user input.
Keep responses natural and use the full conversation history (passed in messages) for coherence.
"""

# Agent setup
agent = create_react_agent(
    model=llm,
    tools=[manage_memory_tool, search_memory_tool],
    store=memory_store,
    checkpointer=checkpointer,
    prompt=SYSTEM_PROMPT
)

# Queue for background memory storage
memory_queue = asyncio.Queue()

async def memory_worker():
    """Background worker that processes memory storage asynchronously."""
    while True:
        memory_entry = await memory_queue.get()
        try:
            agent.invoke(
                {"messages": [{"role": "system", "content": f"Use manage_memory_tool to store: {memory_entry}"}]},
                config={"configurable": {"thread_id": "user_1_thread"}}
            )
        except Exception as e:
            print(f"Memory storage error: {e}")
        finally:
            memory_queue.task_done()

# Start background memory processing
asyncio.create_task(memory_worker())

def print_stored_memories() -> None:
    """Print all memories stored in InMemoryStore."""
    print("\n--- Stored Memories ---")
    try:
        all_memories = memory_store._data.get(NAMESPACE, {})
        if not all_memories:
            print("No memories stored yet.")
        else:
            for i, (key, item) in enumerate(sorted(all_memories.items(), key=lambda x: x[0]), 1):
                value = getattr(item, "value", "None") if item else "None"
                if isinstance(value, dict) and "content" in value:
                    value = value["content"]
                print(f"Memory {i}: Key={key}, Value={value}")
    except Exception as e:
        print(f"Error retrieving memories: {e}")
    print("----------------------\n")

def chat_with_membot() -> None:
    """Run an interactive chat loop with MemBot."""
    print("MemBot: Hi! Ask me anything. (Type 'exit' to stop)")
    conversation_history = [{"role": "system", "content": SYSTEM_PROMPT}]
    config = {"configurable": {"thread_id": "user_1_thread"}}  # Thread-specific state

    try:
        while True:
            user_input = input("You: ").strip()
            if user_input.lower() == "exit":
                print("MemBot: Goodbye!")
                break

            # Normalize input
            normalized_input = user_input.lower()

            # Add user input to history
            conversation_history.append({"role": "user", "content": user_input})

            # Invoke agent with thread config
            response = agent.invoke({"messages": conversation_history}, config=config)

            # Extract response
            ai_response = (
                response["messages"][-1].content
                if isinstance(response, dict) and "messages" in response
                else str(response)
            )
            print(f"MemBot: {ai_response}")

            # Add bot response to history
            conversation_history.append({"role": "assistant", "content": ai_response})

            # Store memory asynchronously in the background
            memory_entry = f"User: {normalized_input} | Bot: {ai_response}"
            asyncio.create_task(memory_queue.put(memory_entry))  # Queue it

            # Show stored memories
            print_stored_memories()

    except Exception as e:
        print(f"Error occurred: {e}")
    except KeyboardInterrupt:
        print("\nMemBot: Goodbye!")

if __name__ == "__main__":
    chat_with_membot()


2025-03-05 13:13:49,531 - INFO - Client ID and Client Secret found [azure_openai_llm.py:54]


MemBot: Hi! Ask me anything. (Type 'exit' to stop)
MemBot: Hi there! How can I assist you today?

--- Stored Memories ---
Memory 1: Key=a6fa2045-cb2d-4a84-b071-6b735d5fcdd2, Value=User: hi
----------------------

MemBot: Hello! What’s on your mind today?

--- Stored Memories ---
Memory 1: Key=0d82d3d4-5289-46da-a2a3-0be0ea83486b, Value=User: hello | Bot: Hi there! How can I assist you today?
Memory 2: Key=a6fa2045-cb2d-4a84-b071-6b735d5fcdd2, Value=User: hi
----------------------

MemBot: Your first message was "hi". Is there anything specific you would like to know or discuss?

--- Stored Memories ---
Memory 1: Key=0d82d3d4-5289-46da-a2a3-0be0ea83486b, Value=User: hello | Bot: Hi there! How can I assist you today?
Memory 2: Key=a6fa2045-cb2d-4a84-b071-6b735d5fcdd2, Value=User: hi
----------------------

MemBot: Your last question was, "what did I asked." Would you like to ask something else or clarify further?

--- Stored Memories ---
Memory 1: Key=0d82d3d4-5289-46da-a2a3-0be0ea83486b

Memory storage error: Error code: 429 - {'fault': {'faultstring': 'Spike arrest violation. Allowed rate : MessageRate{messagesPerPeriod=15, periodInMicroseconds=60000000, maxBurstMessageCount=1.5}', 'detail': {'errorcode': 'policies.ratelimit.SpikeArrestViolation'}}}
