# Dynamic FAISS Sentence RAG Demo

- FAISS index for vector similarity search
- Runtime insertion of new sentences
- Paragraph query returns top-N most relevant memory sentences


In [10]:
%pip install -q faiss-cpu sentence-transformers numpy ollama

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Note: you may need to restart the kernel to use updated packages.


In [11]:
import re
from dataclasses import dataclass
from typing import List

import faiss
import numpy as np
from sentence_transformers import SentenceTransformer


In [12]:
SENTENCE_SPLIT_RE = re.compile(r"(?<=[.!?])\s+|\n+")

def split_into_sentences(text: str) -> List[str]:
    parts = [p.strip() for p in SENTENCE_SPLIT_RE.split(text or "") if p.strip()]
    return parts

@dataclass
class SearchHit:
    sentence: str
    score: float

@dataclass
class Memory:
    sentence: str
    relevance: float

class DynamicSentenceMemory:
    def __init__(self, model_name: str = "sentence-transformers/all-MiniLM-L6-v2"):
        self.model = SentenceTransformer(model_name)
        self.index = None
        self.sentences: List[str] = []

    def _embed(self, texts: List[str]) -> np.ndarray:
        vectors = self.model.encode(
            texts,
            convert_to_numpy=True,
            normalize_embeddings=True,
        ).astype(np.float32)
        return vectors

    def add_sentences(self, sentences: List[Memory]) -> None:
        clean = [s.strip() for s in sentences if s and s.strip()]
        if not clean:
            return

        vectors = self._embed(clean)
        if self.index is None:
            self.index = faiss.IndexFlatIP(vectors.shape[1])
        self.index.add(vectors)
        self.sentences.extend(clean)

    def add_memory(self, sentence: str, relevance: float) -> None:
        clean = sentence.strip()
        vectors = self._embed(clean)
        if self.index is None:
            self.index = faiss.IndexFlatIP(vectors.shape[1])
        self.index.add(vectors)
        self.sentences.extend(clean)

    def decay_memory(self, decay_factor: float = 0.9) -> None:
        for i in range(0, len(self.sentences)):
            self.sentences[i].relevance -= decay_factor

    def search(self, paragraph: str, top_n: int = 3) -> List[SearchHit]:
        if self.index is None or not self.sentences:
            return []

        query = self._embed([paragraph])
        k = min(top_n, len(self.sentences))
        scores, idxs = self.index.search(query, k)

        hits: List[SearchHit] = []
        for score, idx in zip(scores[0], idxs[0]):
            if idx < 0:
                continue
            hits.append(SearchHit(sentence=self.sentences[int(idx)], score=float(score)))
        return hits


In [18]:
memory = DynamicSentenceMemory()

seed_text = """
Mitch keeps spare boots under his bed.
Mitch hides his knives in the cellar of the copper cup.
Mitch is secretly murdering people in the town.
Mitch and the Wizard hate eachother.
Mitch knows the Wizard is reviving his victims.
Mitch wants to manipulate the player into killing the Wizard for him.
Mitch secretly kills people at night.
"""

memory.add_text(seed_text)

# Runtime insertion: add new facts during play
memory.add_sentences([
    "Mitch suspects the player stole his boots.",
    "Mitch is barefoot and very angry about it.",
])


for sentence in memory.sentences:
    print(f"- {sentence}")
print(f"Indexed sentences: {len(memory.sentences)}")

- Mitch keeps spare boots under his bed.
- Mitch hides his knives in the cellar of the copper cup.
- Mitch is secretly murdering people in the town.
- Mitch and the Wizard hate eachother.
- Mitch knows the Wizard is reviving his victims.
- Mitch wants to manipulate the player into killing the Wizard for him.
- Mitch secretly kills people at night.
- Mitch suspects the player stole his boots.
- Mitch is barefoot and very angry about it.
Indexed sentences: 9


In [25]:
query_paragraph_chat = """
I say to Mitch, whats going on with the shoes there buddy? You're walking around barefoot in the middle of the night; and why do you have blood on your hands. You're also naked in the middle of the night. You have a knife in your hand. What's going on there buddy? You're also naked in the middle of the night. You have a knife in your hand. What 's going on there buddy? You're also naked in the middle of the night.
"""
query_knives = """
Weapons
"""
query_boots = """
clothing
"""
query_murder = """
stab
"""

top_n = len(memory.sentences) 
hits = memory.search(query_paragraph_chat, top_n=top_n) #swap out the query to see how nuanced the matching can be here. -JM

for rank, hit in enumerate(hits, start=1):
    print(f"{rank}. score={hit.score:.4f} | {hit.sentence}")


1. score=0.6363 | Mitch is barefoot and very angry about it.
2. score=0.5748 | Mitch secretly kills people at night.
3. score=0.5718 | Mitch keeps spare boots under his bed.
4. score=0.5084 | Mitch hides his knives in the cellar of the copper cup.
5. score=0.5021 | Mitch suspects the player stole his boots.
6. score=0.5001 | Mitch is secretly murdering people in the town.
7. score=0.3652 | Mitch wants to manipulate the player into killing the Wizard for him.
8. score=0.3380 | Mitch knows the Wizard is reviving his victims.
9. score=0.3023 | Mitch and the Wizard hate eachother.


In [24]:
top_n = len(memory.sentences) 
hits = memory.search(query_paragraph_chat, top_n=top_n) #swap out the query to see how nuanced the matching can be here. -JM

for rank, hit in enumerate(hits, start=1):
    print(f"{rank}. score={hit.score:.4f} | {hit.sentence}")


1. score=0.7300 | Mitch is barefoot and very angry about it.
2. score=0.6022 | Mitch keeps spare boots under his bed.
3. score=0.5448 | Mitch suspects the player stole his boots.
4. score=0.5192 | Mitch secretly kills people at night.
5. score=0.4462 | Mitch is secretly murdering people in the town.
6. score=0.3990 | Mitch hides his knives in the cellar of the copper cup.
7. score=0.3375 | Mitch wants to manipulate the player into killing the Wizard for him.
8. score=0.3330 | Mitch knows the Wizard is reviving his victims.
9. score=0.2989 | Mitch and the Wizard hate eachother.


In [None]:
"""Minimal entity model and memory retrieval tool."""

from dataclasses import dataclass
from typing import Any, Dict, List


@dataclass
class Entity:
    name: str
    description: str
    memory: DynamicSentenceMemory


ENTITY_REGISTRY: Dict[str, Entity] = {}

# If we need to pass the entity registry to help the tool calls stay accurate then we need to make sure this spec is updated before each tool call. For now static is fine. -JM
OLLAMA_MEMORY_RETRIEVAL_TOOL_SPEC: Dict[str, Any] = {
    "type": "function",
    "function": {
        "name": "retrieve_memory_tool",
        "description": "Retrieve memory for a registered entity.",
        "parameters": {
            "type": "object",
            "properties": {
                "entity_name": {
                    "type": "string",
                    "description": "Name of the registered entity.",
                    "enum": list(ENTITY_REGISTRY.keys()), #Uncomment this to add the entity keys as options, could help coherence -JM 
                    "enum": list(ENTITY_REGISTRY.keys()), #Uncomment this to add the entity keys as options, could help coherence -JM 
                },
                "context": {
                    "type": "string",
                    "description": "Context used for similarity search.",
                },
                "top_n": {
                    "type": "integer",
                    "description": "Maximum number of memories to return.",
                    "minimum": 1,
                    "default": 4,
                },
            },
            "required": ["entity_name", "context"],
            "additionalProperties": False,
        },
    },
}


# If we need to pass the entity registry to help the tool calls stay accurate then we need to make sure this spec is updated before each tool call. For now static is fine. -JM
OLLAMA_MEMORY_INSERT_SPEC: Dict[str, Any] = {
    "type": "function",
    "function": {
        "name": "write_memory_tool",
        "description": "Write memory for a registered entity.",
        "parameters": {
            "type": "object",
            "properties": {
                "entity_name": {
                    "type": "string",
                    "description": "Name of the registered entity.",
                    "enum": list(ENTITY_REGISTRY.keys()), #Uncomment this to add the entity keys as options, could help coherence -JM 
                },
                "context": {
                    "type": "string",
                    "description": "One sentence memory to add to the entity's memory store.",
                },
                "relevance": {
                    "type": "float",
                    "description": "Relevance score for the new memory on a scale of 1.0 to 100.0.",
                    "minimum": 1.0,
                    "default": 100.0,
                },
            },
            "required": ["entity_name", "context"],
            "additionalProperties": False,
        },
    },
}

def write_memory_tool(entity_name: str, memory: str, relevance: float):
    entity = ENTITY_REGISTRY.get(entity_name.lower().strip())
    if entity is None:
        return {
            "success": False,
            "message": f"Entity '{entity_name}' is not registered.",
            "memories": [],
        }

    entity.memory.add_memory(memory, relevance)
    return {
        "success": True,
        "entity_name": entity.name,
        "memory": memory,
        "relevance": relevance
    }

def register_entity(entity: Entity) -> None:
    """Register an entity so the tool can retrieve its memory."""
    ENTITY_REGISTRY[entity.name.lower().strip()] = entity


def retrieve_memory_tool(entity_name: str, context: str, top_n: int = 4) -> Dict[str, Any]:
    """Return top-N memory hits for a registered entity."""
    entity = ENTITY_REGISTRY.get(entity_name.lower().strip())
    if entity is None:
        return {
            "success": False,
            "message": f"Entity '{entity_name}' is not registered.",
            "memories": [],
        }

    hits = entity.memory.search(context, top_n=top_n)
    return {
        "success": True,
        "entity_name": entity.name,
        "memories": [
            {"sentence": hit.sentence, "score": float(hit.score)}
            for hit in hits
        ],
    }


In [17]:
from pathlib import Path
import sys

repo_root = Path.cwd()  # in the notebook directories get broken, this fixes it -JM
if not (repo_root / "orchestrator").exists() and (repo_root.parent / "orchestrator").exists():
    repo_root = repo_root.parent
if str(repo_root) not in sys.path:
    sys.path.append(str(repo_root))

from orchestrator.llm_interaction.adapter import LLMAdapter
from orchestrator.runtime_flow.step_registry import build_steps

register_entity(
    Entity(
        name="Mitch",
        description="Suspicious townsman with hidden motives.",
        memory=memory,
    )
)

adapter = LLMAdapter(
    model="llama3.1:8b",
    default_options={"temperature": 0, "top_p": 0.9},
    stage_options={"narrate": {"temperature": 0, "top_p": 0.93}},
)

steps = build_steps()
query_text = query_paragraph_chat.strip()

payload_text = f"""
You have access to a tool called retrieve_memory_tool.
If you need relevant facts about Mitch, call retrieve_memory_tool with:
- entity_name: "Mitch"
- context: the player input (or a refined query)
- top_n: 4

After tool results are returned, write your response.

Format exactly:
Thoughts: <hidden reasoning>
Narrative: <story prose>

# Player Input
{query_text}
""".strip()

narrative, _ = steps["narrate"].run(
    adapter,
    payload_text,
    tools=[retrieve_memory_tool],
)

print(narrative)


Mitch looked down at his feet, a mixture of confusion and embarrassment on his face. "Oh, I... uh... I must have taken off my shoes when I was walking around earlier," he stammered. He glanced down at his hands, which were indeed smeared with a faint red substance. "And... um... I think I might have cut myself on something." Mitch's eyes darted around the room, as if searching for an explanation or an escape from the situation. Please provide further input to progress the story.
