In [1]:
import os
from openai import OpenAI
from google.colab import userdata

# Set up your OpenAI API key
# Make sure to add your API key to Colab secrets under the name 'OPENAI_API_KEY'
# For more info, see https://colab.research.google.com/notebooks/integrations.ipynb
os.environ["OPENAI_API_KEY"] = userdata.get('OPENAI_API_KEY')

# Initialize the OpenAI client
# This client is used to interact with the OpenAI API
client = OpenAI(
    # This is the default and can be omitted
    api_key=os.environ.get("OPENAI_API_KEY"),
)

# 1. Types of Memory

1.  **Contextual (Short-Term) Memory:** Keeps conversations coherent in a single session.
2.  **Long-Term (Persistent) Memory:** Remembers facts and preferences across sessions.
3.  **Semantic Memory:** The model’s built-in factual and conceptual knowledge.
4.  **Working Memory:** Handles multi-step reasoning and problem-solving within a single query.
5.  **External / Tool-Augmented Memory (RAG):** Extends the model’s capacity by retrieving info from external sources.

## 1. Contextual (Short-Term) Memory

Keeps conversations coherent in a single session by remembering recent interactions. This type of memory is crucial for maintaining flow and understanding within a limited timeframe.

In [2]:
# Step 1: First message to set context
# This message introduces the user and sets the initial context for the conversation.
response1 = client.chat.completions.create(
    model="gpt-4o-mini",  # Small, fast model for demo
    messages=[
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": "My name is Alex."}
    ]
)
print(response1.choices[0].message.content)

Nice to meet you, Alex! How can I assist you today?


In [3]:
# Step 2: Second message reuses context (conversation continues)
# By including the previous messages, the model remembers the user's name from the first interaction.
response2 = client.chat.completions.create(
    model="gpt-4o-mini",
    messages=[
        {"role": "system", "content": "You are a helpful assistant."},
        # Include the previous messages to maintain context
        {"role": "assistant", "content": response1.choices[0].message.content},
        {"role": "user", "content": "What is my name?"}
    ]
)
print(response2.choices[0].message.content)

You mentioned your name is Alex. How can I help you today, Alex?


## 2. Long-Term (Persistent) Memory

Remembers facts and preferences across sessions, allowing the model to maintain consistency and personalization over extended periods.

In [7]:
# Persistent file for memory
MEMORY_FILE = "memory.txt"

def save_memory(data):
    with open(MEMORY_FILE, "w") as f:
        f.write(data)

def load_memory():
    if os.path.exists(MEMORY_FILE):
        with open(MEMORY_FILE, "r") as f:
            return f.read().strip()
    return None

# Step 1: User gives info
user_message = "My name is Alex"

# Step 2: Ask GPT-4 if this should be remembered
decision = client.chat.completions.create(
    model="gpt-4o-mini",
    messages=[
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "system", "content": "Decide if the user's message contains important facts to store such as name, preferences and so on. If you do decide to remember the facts begin the answer with 'Fact remembered!'"},
        {"role": "user", "content": user_message}
    ]
)

print("Model decision:", decision.choices[0].message.content)

# Step 3: If model says it's important, trigger 'store_memory'
if "remembered" in decision.choices[0].message.content.lower():
    save_memory(user_message)
    print("Stored in memory:", user_message)

# Step 4: Later, retrieve memory and use it in a new conversation
stored_info = load_memory()
if stored_info:
    response = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[
            {"role": "system", "content": "You are a friendly assistant."},
            {"role": "user", "content": f"Previously you learned: {stored_info}. Now greet me using that info."}
        ]
    )
    print(response.choices[0].message.content)


Model decision: Fact remembered! Your name is Alex.
Stored in memory: My name is Alex
Hello, Alex! It's great to meet you! How can I assist you today?


In [8]:
"""
Simple LLM Long Memory System
- Stores user facts in JSON file
- Uses OpenAI function calling to save/recall info
- Perfect for Google Colab experiments
"""

import json
import os
from openai import OpenAI

MEMORY_FILE = "memory.json"  # Where memories are stored

def save_memory(key: str, value: str):
    """Store a key-value pair in memory file"""
    print("Memory updating...")
    memory = {}
    if os.path.exists(MEMORY_FILE):
        with open(MEMORY_FILE, "r") as f:
            memory = json.load(f)

    memory[key] = value
    with open(MEMORY_FILE, "w") as f:
        json.dump(memory, f, indent=2)
    return "Stored successfully"

def load_memory(query: str):
    """Search for stored memories matching the query"""
    print("Reading memory...")
    if not os.path.exists(MEMORY_FILE):
        return "No memories stored"

    with open(MEMORY_FILE, "r") as f:
        memory = json.load(f)

    if not memory:
        return "No memories stored"

    # Search for relevant info
    # query_lower = query.lower()
    # relevant = {k: v for k, v in memory.items()
    #            if query_lower in k.lower() or query_lower in str(v).lower()}

    # if relevant:
    #     return json.dumps(relevant)
    # else:
    #     return f"No memory found for: {query}"
    return memory # Return the entire memory dictionary

# Function tools for the AI to call

# Tool definitions
tools = [
    {
        "type": "function",
        "function": {
            "name": "store_fact",
            "description": "Store user info like name, age, etc.",
            "parameters": {
                "type": "object",
                "properties": {
                    "key": {"type": "string"},
                    "value": {"type": "string"}
                },
                "required": ["key", "value"]
            }
        }
    },
    {
        "type": "function",
        "function": {
            "name": "recall_fact",
            "description": "Get stored user info",
            "parameters": {
                "type": "object",
                "properties": {
                    "query": {"type": "string"}
                },
                "required": ["query"]
            }
        }
    }
]

def chat_with_memory(user_input: str):
    """Main function that handles conversation with memory capabilities"""
    client = OpenAI()

    messages = [
        {"role": "system", "content": "You have memory tools. When user shares personal info, store it. When they ask about themselves, recall it first."},
        {"role": "user", "content": user_input}
    ]

    # First API call - may trigger tool usage
    response = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=messages,
        tools=tools,
        tool_choice="auto"
    )

    # Handle tool calls if any
    if response.choices[0].message.tool_calls:
        messages.append(response.choices[0].message)

        for tool_call in response.choices[0].message.tool_calls:
            args = json.loads(tool_call.function.arguments)

            # Execute the appropriate tool
            if tool_call.function.name == "store_fact":
                result = save_memory(args["key"], args["value"])
            elif tool_call.function.name == "recall_fact":
                result = load_memory(args["query"])

            messages.append({
                "role": "tool",
                "tool_call_id": tool_call.id,
                "content": str(result) # Ensure the content is a string
            })

        # Second API call with tool results
        final_response = client.chat.completions.create(
            model="gpt-4o-mini",
            messages=messages
        )


        return final_response.choices[0].message.content

    return response.choices[0].message.content

# ==================== DEMO ====================



user_text = input("\n💬 You: ")

print("\n🤖 Assistant:")
response = chat_with_memory(user_text)
print(response)


💬 You: Hello there! I am 20 years old and my name is Alex

🤖 Assistant:
Memory updating...
Memory updating...
Hi Alex! It's great to meet you. How can I assist you today?


## 3. Semantic Memory

The model’s built-in factual and conceptual knowledge, enabling it to understand and generate human-like text based on its training data.

In [9]:
# Define the input message for the model
input_message = "Write a short scientific article about nature." # Using the original prompt content

# Create a response request using the responses endpoint
response = client.responses.create(
    model="gpt-4o-mini",  # Using gpt-4o-mini
    input=input_message,
    max_output_tokens=100
)

# Extract and print the model's output text
print(response.output_text)

# The Interconnectedness of Nature: A Delicate Balance

## Abstract
Nature is a complex web of interactions that sustains life on Earth. This article explores the fundamental elements of ecosystems, the relationship between biodiversity and ecosystem services, and the impact of human activity on natural systems.

## Introduction
Nature, often perceived as a collection of individual species and landscapes, is, in reality, an interconnected network of relationships that sustains life. Ecosystems, defined as communities of living organisms interacting with their


# 2. Embeddings

## 1. Vectors

In [10]:
import numpy as np

def generate_vector(n):
    """Generate a vector with n dimensions filled with random values"""
    return np.random.rand(n)

# Example usage
number = 110
vector = generate_vector(number)

print(f"Input: {number}")
print(f"Vector: {vector}")
print(f"Shape: {vector.shape}")


Input: 110
Vector: [0.70473026 0.69562759 0.31124105 0.85171687 0.98094158 0.93547458
 0.2957988  0.25989481 0.28693497 0.08102088 0.46816844 0.46277758
 0.7408384  0.9831221  0.84089429 0.02214819 0.67074792 0.45428174
 0.26494666 0.88836125 0.31786706 0.78110637 0.13716815 0.76843992
 0.97307726 0.88823262 0.65598551 0.00848529 0.08207384 0.05479042
 0.62375421 0.62953462 0.25157051 0.76645527 0.30245514 0.18304236
 0.61517575 0.03011538 0.78854075 0.25820766 0.48399333 0.75791422
 0.75659371 0.48547041 0.34400719 0.14910311 0.4812495  0.2375033
 0.95979445 0.78444793 0.94132784 0.40159678 0.22056531 0.80020971
 0.56394193 0.42495733 0.95456606 0.51851623 0.48286589 0.15775229
 0.70233872 0.56193559 0.09997356 0.04678185 0.48753984 0.58845103
 0.51950102 0.14273828 0.09004669 0.92832353 0.71492236 0.62950373
 0.22387372 0.34815242 0.54547043 0.02959915 0.49100049 0.70264211
 0.47318651 0.18307098 0.40934184 0.6724792  0.04170277 0.80233729
 0.52886555 0.67457919 0.37131834 0.18778419

## 2. Generate Embeddings

In [11]:
# Cell 1: Generate Embeddings
# This code demonstrates how to generate embeddings for text using the OpenAI API.

def get_embedding(text):
    """
    Generate embeddings for text using OpenAI API
    """
    response = client.embeddings.create(
        model="text-embedding-3-small",  # A common and efficient embedding model
        input=text,
        encoding_format="float"  # Specify the format of the embedding vector
    )

    # The embedding vector is in the 'data' attribute of the response
    # It's a list of embedding objects, we take the first one's embedding
    embedding_vector = response.data[0].embedding
    return embedding_vector

# Example usage
text_to_embed = "This is a sample sentence for creating an embedding."

embedding_vector = get_embedding(text_to_embed)

# Print the embedding vector and its dimension
print("Text:", text_to_embed)
print("Embedding vector (first 10 elements):", embedding_vector[:10])
print("Dimension of the embedding vector:", len(embedding_vector))

Text: This is a sample sentence for creating an embedding.
Embedding vector (first 10 elements): [0.03280215, 0.011288634, 0.02859873, 0.0051769116, -0.003304069, -0.03293109, 0.03349842, -0.016040046, -0.024150325, 0.019882435]
Dimension of the embedding vector: 1536


## 3.Compare Embeddings

In [21]:
# Cell 2: Compare Embeddings

import numpy as np

def cosine_similarity(vec1, vec2):
    """
    Calculate cosine similarity between two vectors
    """
    v1 = np.array(vec1)
    v2 = np.array(vec2)
    return np.dot(v1, v2) / (np.linalg.norm(v1) * np.linalg.norm(v2))

# Example texts to compare
text1 = "How to train a cat?"
text2 = "How to drive a car?"

# Get embeddings for both texts
embedding1 = get_embedding(text1)
embedding2 = get_embedding(text2)

# Calculate similarity
similarity = cosine_similarity(embedding1, embedding2)

print(f"Text 1: {text1}")
print(f"Text 2: {text2}")
print(f"Similarity: {similarity:.4f}")

Text 1: How to train a cat?
Text 2: How to drive a car?
Similarity: 0.3499


# 3. Vector Database

## 1. Chroma DB

Solo dev / small app / on-prem or hybrid → start with Chroma (OSS, dead-simple API; can move to Chroma Cloud later). Chroma

### 1. Basic Setup

In [22]:
# Cell 1: Setup
!pip install chromadb

import chromadb

chroma = chromadb.Client()
collection = chroma.get_or_create_collection("demo")

def get_embedding(text):
    response = client.embeddings.create(
        model="text-embedding-3-small",
        input=text
    )
    return response.data[0].embedding

print("✅ Ready!")

Collecting chromadb
  Downloading chromadb-1.0.20-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (7.3 kB)
Collecting pybase64>=1.4.1 (from chromadb)
  Downloading pybase64-1.4.2-cp312-cp312-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl.metadata (8.7 kB)
Collecting posthog<6.0.0,>=2.4.0 (from chromadb)
  Downloading posthog-5.4.0-py3-none-any.whl.metadata (5.7 kB)
Collecting onnxruntime>=1.14.1 (from chromadb)
  Downloading onnxruntime-1.22.1-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (4.9 kB)
Collecting opentelemetry-exporter-otlp-proto-grpc>=1.2.0 (from chromadb)
  Downloading opentelemetry_exporter_otlp_proto_grpc-1.36.0-py3-none-any.whl.metadata (2.4 kB)
Collecting pypika>=0.48.9 (from chromadb)
  Downloading PyPika-0.48.9.tar.gz (67 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m67.3/67.3 kB[0m [31m1.3 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [

In [23]:
# Cell 2: Store Embeddings

texts = [
    "How do I cook pasta?",
    "The weather is sunny",
    "AI is amazing"
]

print("Storing embeddings...")

embeddings = [get_embedding(text) for text in texts]

collection.add(
    ids=["1", "2", "3"],
    documents=texts,
    embeddings=embeddings
)

print(f"✅ Stored {len(texts)} documents!")

Storing embeddings...
✅ Stored 3 documents!


In [29]:
# Cell 3: View Stored Data

# Show what's stored
# Explicitly include embeddings, documents, and metadatas
data = collection.get(
    include=['embeddings', 'documents', 'metadatas']
)

ids = data["ids"]
docs = data["documents"]
embs = data.get("embeddings")


print(f"Found {len(ids)} stored items:\n")

# If for some reason embeddings are missing, don't try to iterate over them
if embs is None or any(e is None for e in embs):
    for idx, doc in zip(ids, docs):
        print(f"ID: {idx}")
        print(f"Text: {doc}")
        print("(no embeddings returned)")
        print("-" * 40)
else:
    for idx, doc, emb in zip(ids, docs, embs):
        print(f"ID: {idx}")
        print(f"Text: {doc}")
        print(f"Embedding (first 5 dims): {emb[:5]}")
        print("-" * 40)

Found 3 stored items:

ID: 1
Text: How do I cook pasta?
Embedding (first 5 dims): [-6.04647398e-02 -6.07534535e-02 -1.20228324e-02 -2.46024672e-02
  9.77145100e-05]
----------------------------------------
ID: 2
Text: The weather is sunny
Embedding (first 5 dims): [ 0.00196909 -0.04838687  0.00736109 -0.00663111  0.02561658]
----------------------------------------
ID: 3
Text: AI is amazing
Embedding (first 5 dims): [-0.00992907 -0.00132574 -0.01422693  0.04163846  0.00857541]
----------------------------------------


#3. RAG

## 1. Set up Chroma + OpenAI API

In [30]:
import os
from getpass import getpass


# Create (or get) a collection to hold our chunks
collection = chroma.get_or_create_collection(name="rag_demo")

print("✅ Setup complete.")


✅ Setup complete.


## 2. Prepare and chunk a text

In [31]:
# A small sample text (replace with your own)
text = """
Retrieval-Augmented Generation (RAG) helps language models answer questions by looking up
supporting information from a knowledge base. We chunk documents, embed those chunks,
store the vectors in a database, and at query time we retrieve the most similar chunks
to ground the answer. This improves accuracy and keeps responses up-to-date with our



This improves accuracy and keeps responses up-to-date with our data.Retrieval-Augmented Generation (RAG) helps language models answer questions by looking up
supporting information from a knowledge base. We chunk documents, embed those chunks,
store the vectors in a database, and at query time we retrieve the most similar chunks
to ground the answer. This improves accuracy and keeps responses up-to-date with our data.Retrieval-Augmented Generation (RAG) helps language models answer questions by looking up
supporting information from a knowledge base. We chunk documents, embed those chunks,
store the vectors in a database, and at query time we retrieve the most similar chunks
to ground the answer. This improves accuracy and keeps responses up-to-date with our data.Retrieval-Augmented Generation (RAG) helps language models answer questions by looking up
supporting information from a knowledge base. We chunk documents, embed those chunks,
store the vectors in a database, and at query time we retrieve the most similar chunks
to ground the answer. This improves accuracy and keeps responses up-to-date with our data.Retrieval-Augmented Generation (RAG) helps language models answer questions by looking up
supporting information from a knowledge base. We chunk documents, embed those chunks,
store the vectors in a database, and at query time we retrieve the most similar chunks
to ground the answer. This improves accuracy and keeps responses up-to-date with our data.Retrieval-Augmented Generation (RAG) helps language models answer questions by looking up
supporting information from a knowledge base. We chunk documents, embed those chunks,
store the vectors in a database, and at query time we retrieve the most similar chunks
to ground the answer. This improves accuracy and keeps responses up-to-date with our data.Retrieval-Augmented Generation (RAG) helps language models answer questions by looking up
supporting information from a knowledge base. We chunk documents, embed those chunks,
store the vectors in a database, and at query time we retrieve the most similar chunks
to ground the answer. This improves accuracy and keeps responses up-to-date with our data.Retrieval-Augmented Generation (RAG) helps language models answer questions by looking up
supporting information from a knowledge base. We chunk documents, embed those chunks,
store the vectors in a database, and at query time we retrieve the most similar chunks
to ground the answer. This improves accuracy and keeps responses up-to-date with our data.Retrieval-Augmented Generation (RAG) helps language models answer questions by looking up
supporting information from a knowledge base. We chunk documents, embed those chunks,
store the vectors in a database, and at query time we retrieve the most similar chunks
to ground the answer. This improves accuracy and keeps responses up-to-date with our data.Retrieval-Augmented Generation (RAG) helps language models answer questions by looking up
supporting information from a knowledge base. We chunk documents, embed those chunks,
store the vectors in a database, and at query time we retrieve the most similar chunks
to ground the answer. This improves accuracy and keeps responses up-to-date with our data.Retrieval-Augmented Generation (RAG) helps language models answer questions by looking up
supporting information from a knowledge base. We chunk documents, embed those chunks,
store the vectors in a database, and at query time we retrieve the most similar chunks
to ground the answer. This improves accuracy and keeps responses up-to-date with our data.Retrieval-Augmented Generation (RAG) helps language models answer questions by looking up
supporting information from a knowledge base. We chunk documents, embed those chunks,
store the vectors in a database, and at query time we retrieve the most similar chunks
to ground the answer. This improves accuracy and keeps responses up-to-date with our data.Retrieval-Augmented Generation (RAG) helps language models answer questions by looking up
supporting information from a knowledge base. We chunk documents, embed those chunks,
store the vectors in a database, and at query time we retrieve the most similar chunks
to ground the answer. This improves accuracy and keeps responses up-to-date with our data.Retrieval-Augmented Generation (RAG) helps language models answer questions by looking up
supporting information from a knowledge base. We chunk documents, embed those chunks,
store the vectors in a database, and at query time we retrieve the most similar chunks
to ground the answer. This improves accuracy and keeps responses up-to-date with our data.Retrieval-Augmented Generation (RAG) helps language models answer questions by looking up
supporting information from a knowledge base. We chunk documents, embed those chunks,
store the vectors in a database, and at query time we retrieve the most similar chunks
to ground the answer. This improves accuracy and keeps responses up-to-date with our data.Retrieval-Augmented Generation (RAG) helps language models answer questions by looking up
supporting information from a knowledge base. We chunk documents, embed those chunks,
store the vectors in a database, and at query time we retrieve the most similar chunks
to ground the answer. This improves accuracy and keeps responses up-to-date with our data.Retrieval-Augmented Generation (RAG) helps language models answer questions by looking up
supporting information from a knowledge base. We chunk documents, embed those chunks,
store the vectors in a database, and at query time we retrieve the most similar chunks
to ground the answer. This improves accuracy and keeps responses up-to-date with our data.Retrieval-Augmented Generation (RAG) helps language models answer questions by looking up
supporting information from a knowledge base. We chunk documents, embed those chunks,
store the vectors in a database, and at query time we retrieve the most similar chunks
to ground the answer. This improves accuracy and keeps responses up-to-date with our data.Retrieval-Augmented Generation (RAG) helps language models answer questions by looking up
supporting information from a knowledge base. We chunk documents, embed those chunks,
store the vectors in a database, and at query time we retrieve the most similar chunks
to ground the answer. This improves accuracy and keeps responses up-to-date with our data.Retrieval-Augmented Generation (RAG) helps language models answer questions by looking up
supporting information from a knowledge base. We chunk documents, embed those chunks,
store the vectors in a database, and at query time we retrieve the most similar chunks
to ground the answer. This improves accuracy and keeps responses up-to-date with our data.Retrieval-Augmented Generation (RAG) helps language models answer questions by looking up
supporting information from a knowledge base. We chunk documents, embed those chunks,
store the vectors in a database, and at query time we retrieve the most similar chunks
to ground the answer. This improves accuracy and keeps responses up-to-date with our data.
"""

# Simple sentence-based chunking (groups of ~2–3 sentences)
import re

def chunk_text(t: str, max_sentences=1):
    sents = [s.strip() for s in re.split(r'(?<=[.!?])\s+', t.strip()) if s.strip()]
    chunks, buf = [], []
    for s in sents:
        buf.append(s)
        if len(buf) >= max_sentences:
            chunks.append(" ".join(buf))
            buf = []
    if buf:
        chunks.append(" ".join(buf))
    return chunks

chunks = chunk_text(text, max_sentences=2)
for i, c in enumerate(chunks):
    print(f"[{i}] {c}\n")

print(f"✅ Prepared {len(chunks)} chunks.")


[0] Retrieval-Augmented Generation (RAG) helps language models answer questions by looking up
supporting information from a knowledge base. We chunk documents, embed those chunks,
store the vectors in a database, and at query time we retrieve the most similar chunks
to ground the answer.

[1] This improves accuracy and keeps responses up-to-date with our



This improves accuracy and keeps responses up-to-date with our data.Retrieval-Augmented Generation (RAG) helps language models answer questions by looking up
supporting information from a knowledge base. We chunk documents, embed those chunks,
store the vectors in a database, and at query time we retrieve the most similar chunks
to ground the answer.

[2] This improves accuracy and keeps responses up-to-date with our data.Retrieval-Augmented Generation (RAG) helps language models answer questions by looking up
supporting information from a knowledge base. We chunk documents, embed those chunks,
store the vectors in a database, and a

## 3. Turn chunks into embeddings

In [32]:
EMBED_MODEL = "text-embedding-3-small"  # 1536-dim, fast & inexpensive

def embed_texts(text_list):
    resp = client.embeddings.create(model=EMBED_MODEL, input=text_list)
    return [d.embedding for d in resp.data]

embeddings = embed_texts(chunks)

print(f"✅ Got {len(embeddings)} embeddings.")
print(f"   Each vector has dimension: {len(embeddings[0])}")


✅ Got 22 embeddings.
   Each vector has dimension: 1536


## 4.  Store in the vector DB (Chroma)

In [33]:
# Use string IDs "0", "1", ...
ids = [str(i) for i in range(len(chunks))]
metadatas = [{"source": "demo", "chunk_id": i} for i in range(len(chunks))]

collection.add(
    ids=ids,
    documents=chunks,
    embeddings=embeddings,
    metadatas=metadatas
)

# Peek at what we stored (without dumping full vectors)
data = collection.get(include=["documents", "metadatas", "embeddings"])  # add "embeddings" if you want them

print("✅ Stored items:")
for i, doc, emb, meta in zip(data["ids"], data["documents"],data['embeddings'], data["metadatas"]):
    print(f"- id={i}, meta={meta}\n  {doc}\n {emb}\n")


✅ Stored items:
- id=0, meta={'chunk_id': 0, 'source': 'demo'}
  Retrieval-Augmented Generation (RAG) helps language models answer questions by looking up
supporting information from a knowledge base. We chunk documents, embed those chunks,
store the vectors in a database, and at query time we retrieve the most similar chunks
to ground the answer.
 [-0.01604582  0.02682545  0.03639361 ... -0.00516421  0.02783913
 -0.00210617]

- id=1, meta={'source': 'demo', 'chunk_id': 1}
  This improves accuracy and keeps responses up-to-date with our



This improves accuracy and keeps responses up-to-date with our data.Retrieval-Augmented Generation (RAG) helps language models answer questions by looking up
supporting information from a knowledge base. We chunk documents, embed those chunks,
store the vectors in a database, and at query time we retrieve the most similar chunks
to ground the answer.
 [-0.00671253  0.03448062  0.05353642 ... -0.00039564  0.02271407
 -0.00155443]

- id=2, meta={'sourc

### Display the data

In [34]:
# Install Rich once in Colab
!pip -q install rich

data = collection.get(include=["documents", "metadatas", "embeddings"])

from rich.console import Console
from rich.table import Table
import json

def fmt_text(s, maxlen=120):
    s = s or ""
    return (s[:maxlen] + "…") if len(s) > maxlen else s

def fmt_emb(e, preview=8):
    if e is None:
        return "—"
    head = ", ".join(f"{x:.4f}" for x in e[:preview])
    tail = " …" if len(e) > preview else ""
    return f"[{head}{tail}]  (dim={len(e)})"

table = Table(show_header=True, header_style="bold magenta")
table.add_column("ID", style="cyan", no_wrap=True)
table.add_column("Text", style="white")
table.add_column("Embedding (preview)", style="green")
table.add_column("Metadata", style="yellow")

for i, doc, emb, meta in zip(data["ids"], data["documents"], data["embeddings"], data["metadatas"]):
    table.add_row(str(i), fmt_text(doc), fmt_emb(emb, 8), json.dumps(meta, ensure_ascii=False))

Console().print("Stored items:", style="bold")
Console().print(table)


In [35]:
# Check what is stored in the Chroma collection
data = collection.get(
    include=['documents', 'metadatas'] # Include documents and metadatas
)

print("✅ Data currently stored in Chroma:")
if data and data['ids']:
    for i, (doc, meta) in enumerate(zip(data['documents'], data['metadatas'])):
        print(f"- ID: {data['ids'][i]}, Metadata: {meta}")
        print(f"  Document: {doc}\n")
else:
    print("No data found in the collection.")

print(f"📊 Total items in collection: {collection.count()}")

✅ Data currently stored in Chroma:
- ID: 0, Metadata: {'chunk_id': 0, 'source': 'demo'}
  Document: Retrieval-Augmented Generation (RAG) helps language models answer questions by looking up
supporting information from a knowledge base. We chunk documents, embed those chunks,
store the vectors in a database, and at query time we retrieve the most similar chunks
to ground the answer.

- ID: 1, Metadata: {'chunk_id': 1, 'source': 'demo'}
  Document: This improves accuracy and keeps responses up-to-date with our



This improves accuracy and keeps responses up-to-date with our data.Retrieval-Augmented Generation (RAG) helps language models answer questions by looking up
supporting information from a knowledge base. We chunk documents, embed those chunks,
store the vectors in a database, and at query time we retrieve the most similar chunks
to ground the answer.

- ID: 2, Metadata: {'source': 'demo', 'chunk_id': 2}
  Document: This improves accuracy and keeps responses up-to-date with our d

## 2. A simple vector search

In [36]:
# User query → embed → query Chroma
query = "What is RAG?"
q_emb = embed_texts([query])[0]

results = collection.query(
    query_embeddings=[q_emb],
    n_results=3,
    include=["distances", "documents", "metadatas"] # Removed 'ids' from include
)

print("🔎 Top matches:")
for i in range(len(results["ids"][0])):
    doc_id    = results["ids"][0][i]
    distance  = results["distances"][0][i]  # smaller is closer (cosine distance)
    doc_text  = results["documents"][0][i]
    meta      = results["metadatas"][0][i]
    print(f"\nRank {i+1} | id={doc_id} | distance={distance:.4f} | meta={meta}\n{doc_text}")

# From here you could pass the retrieved text to your LLM prompt to 'augment' generation.

🔎 Top matches:

Rank 1 | id=0 | distance=0.9761 | meta={'source': 'demo', 'chunk_id': 0}
Retrieval-Augmented Generation (RAG) helps language models answer questions by looking up
supporting information from a knowledge base. We chunk documents, embed those chunks,
store the vectors in a database, and at query time we retrieve the most similar chunks
to ground the answer.

Rank 2 | id=1 | distance=0.9927 | meta={'source': 'demo', 'chunk_id': 1}
This improves accuracy and keeps responses up-to-date with our



This improves accuracy and keeps responses up-to-date with our data.Retrieval-Augmented Generation (RAG) helps language models answer questions by looking up
supporting information from a knowledge base. We chunk documents, embed those chunks,
store the vectors in a database, and at query time we retrieve the most similar chunks
to ground the answer.

Rank 3 | id=2 | distance=1.0303 | meta={'chunk_id': 2, 'source': 'demo'}
This improves accuracy and keeps responses up-to-date with 