# Dynamic FAISS Sentence RAG Demo

- FAISS index for vector similarity search
- Runtime insertion of new sentences
- Paragraph query returns top-N most relevant memory sentences


In [None]:
%pip install -q faiss-cpu sentence-transformers numpy ollama


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.2[0m[39;49m -> [0m[32;49m26.0.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [2]:
import re
from dataclasses import dataclass
from typing import List

import faiss
import numpy as np
from sentence_transformers import SentenceTransformer


  from .autonotebook import tqdm as notebook_tqdm


In [3]:
SENTENCE_SPLIT_RE = re.compile(r"(?<=[.!?])\s+|\n+")

def split_into_sentences(text: str) -> List[str]:
    parts = [p.strip() for p in SENTENCE_SPLIT_RE.split(text or "") if p.strip()]
    return parts

@dataclass
class SearchHit:
    sentence: str
    score: float

class DynamicSentenceMemory:
    def __init__(self, model_name: str = "sentence-transformers/all-MiniLM-L6-v2"):
        self.model = SentenceTransformer(model_name)
        self.index = None
        self.sentences: List[str] = []

    def _embed(self, texts: List[str]) -> np.ndarray:
        vectors = self.model.encode(
            texts,
            convert_to_numpy=True,
            normalize_embeddings=True,
        ).astype(np.float32)
        return vectors

    def add_sentences(self, sentences: List[str]) -> None:
        clean = [s.strip() for s in sentences if s and s.strip()]
        if not clean:
            return

        vectors = self._embed(clean)
        if self.index is None:
            self.index = faiss.IndexFlatIP(vectors.shape[1])
        self.index.add(vectors)
        self.sentences.extend(clean)

    def add_text(self, text: str) -> None:
        self.add_sentences(split_into_sentences(text))

    def search(self, paragraph: str, top_n: int = 3) -> List[SearchHit]:
        if self.index is None or not self.sentences:
            return []

        query = self._embed([paragraph])
        k = min(top_n, len(self.sentences))
        scores, idxs = self.index.search(query, k)

        hits: List[SearchHit] = []
        for score, idx in zip(scores[0], idxs[0]):
            if idx < 0:
                continue
            hits.append(SearchHit(sentence=self.sentences[int(idx)], score=float(score)))
        return hits


In [4]:
memory = DynamicSentenceMemory()

seed_text = """
Mitch keeps spare boots under his bed.
Mictch hides his knives in the cellar of the copper cup.
Mitch is secretly murdering people in the town.
Mitch and the Wizard hate eachother.
Mitch knows the Wizard is reviving his victims.
Mitch wants to manipulate the player into killing the Wizard for him.
Mitch secretly kills people at night.
"""

memory.add_text(seed_text)

# Runtime insertion: add new facts during play
memory.add_sentences([
    "Mitch suspects the player stole his boots.",
    "Mitch is barefoot and very angry about it.",
])


for sentence in memory.sentences:
    print(f"- {sentence}")
print(f"Indexed sentences: {len(memory.sentences)}")

- Mitch keeps spare boots under his bed.
- Mictch hides his knives in the cellar of the copper cup.
- Mitch is secretly murdering people in the town.
- Mitch and the Wizard hate eachother.
- Mitch knows the Wizard is reviving his victims.
- Mitch wants to manipulate the player into killing the Wizard for him.
- Mitch secretly kills people at night.
- Mitch suspects the player stole his boots.
- Mitch is barefoot and very angry about it.
Indexed sentences: 9


In [None]:
query_paragraph_chat = """
I say to Mitch, whats going on with the shoes there buddy? You're walking around barefoot in the middle of the night; and why do you have blood on your hands.
"""
query_knives = """
Weapons
"""
query_boots = """
clothing
"""
query_murder = """
stab
"""

top_n = 4
hits = memory.search(query_boots, top_n=top_n) #swap out the query to see how nuanced the matching can be here. -JM

for rank, hit in enumerate(hits, start=1):
    print(f"{rank}. score={hit.score:.4f} | {hit.sentence}")


1. score=0.2383 | Mitch keeps spare boots under his bed.
2. score=0.2120 | Mitch suspects the player stole his boots.
3. score=0.1546 | Mitch is barefoot and very angry about it.
4. score=0.1205 | Mictch hides his knives in the cellar of the copper cup.


In [None]:
"""Minimal entity model and memory retrieval tool."""

from dataclasses import dataclass
from typing import Any, Dict, List


@dataclass
class Entity:
    name: str
    description: str
    memory: DynamicSentenceMemory


ENTITY_REGISTRY: Dict[str, Entity] = {}

# If we need to pass the entity registry to help the tool calls stay accurate then we need to make sure this spec is updated before each tool call. For now static is fine. -JM
OLLAMA_TOOL_SPEC: Dict[str, Any] = {
    "type": "function",
    "function": {
        "name": "retrieve_memory_tool",
        "description": "Retrieve memory for a registered entity.",
        "parameters": {
            "type": "object",
            "properties": {
                "entity_name": {
                    "type": "string",
                    "description": "Name of the registered entity.",
                    #"enum": list(ENTITY_REGISTRY.keys()), #Uncomment this to add the entity keys as options, could help coherence -JM 
                },
                "context": {
                    "type": "string",
                    "description": "Context used for similarity search.",
                },
                "top_n": {
                    "type": "integer",
                    "description": "Maximum number of memories to return.",
                    "minimum": 1,
                    "default": 4,
                },
            },
            "required": ["entity_name", "context"],
            "additionalProperties": False,
        },
    },
}


def register_entity(entity: Entity) -> None:
    """Register an entity so the tool can retrieve its memory."""
    ENTITY_REGISTRY[entity.name.lower().strip()] = entity


def retrieve_memory_tool(entity_name: str, context: str, top_n: int = 4) -> Dict[str, Any]:
    """Return top-N memory hits for a registered entity."""
    entity = ENTITY_REGISTRY.get(entity_name.lower().strip())
    if entity is None:
        return {
            "success": False,
            "message": f"Entity '{entity_name}' is not registered.",
            "memories": [],
        }

    hits = entity.memory.search(context, top_n=top_n)
    return {
        "success": True,
        "entity_name": entity.name,
        "memories": [
            {"sentence": hit.sentence, "score": float(hit.score)}
            for hit in hits
        ],
    }


In [7]:
from pathlib import Path
import sys

repo_root = Path.cwd()  # in the notebook directories get broken, this fixes it -JM
if not (repo_root / "orchestrator").exists() and (repo_root.parent / "orchestrator").exists():
    repo_root = repo_root.parent
if str(repo_root) not in sys.path:
    sys.path.append(str(repo_root))

from orchestrator.llm_interaction.adapter import LLMAdapter
from orchestrator.runtime_flow.step_registry import build_steps

register_entity(
    Entity(
        name="Mitch",
        description="Suspicious townsman with hidden motives.",
        memory=memory,
    )
)

query_text = query_paragraph_chat.strip()
tool_result = retrieve_memory_tool("Mitch", query_text, top_n=4)
if not tool_result["success"]:
    raise ValueError(tool_result.get("message", "Memory retrieval failed."))

memory_context = "\n".join(
    f"- {item['sentence']} (score={item['score']:.4f})"
    for item in tool_result["memories"]
) or "- None"

adapter = LLMAdapter(
    model="gemma3:27b",
    default_options={"temperature": 0, "top_p": 0.9},
    stage_options={"narrate": {"temperature": 0, "top_p": 0.93}},
)

steps = build_steps()
payload_text = f"""
# Retrieved Memory
{memory_context}

# Player Input
{query_text}
""".strip()

narrative, _ = steps["narrate"].run(adapter, payload_text)

print("Retrieved memory:")
for item in tool_result["memories"]:
    print(f"- {item['score']:.4f}: {item['sentence']}")

print("\nLLM response:")
print(narrative)


Retrieved memory:
- 0.7300: Mitch is barefoot and very angry about it.
- 0.6022: Mitch keeps spare boots under his bed.
- 0.5448: Mitch suspects the player stole his boots.
- 0.5192: Mitch secretly kills people at night.

LLM response:
The chipped linoleum feels cold under your own feet as you speak, a stark contrast to the simmering heat radiating from Mitch. He stops pacing, his jaw tight, and fixes you with a glare that could curdle milk. “What’s going on with the shoes?” he echoes, the question laced with venom. He holds up his hands, examining the crimson staining his palms as if surprised to find it there. “What’s going on? *You* tell *me* what’s going on. You waltz in here in the dead of night, and *I’m* the one who has to explain why I’m comfortable in my own home? And don’t play innocent about the boots. They were right there, under the bed, and now they’re gone. You’re the only one who’s been in here.” He takes a step closer, his voice dropping to a dangerous whisper. “Don’t 