# Embeddings Service Testing

This notebook tests the embeddings service implementation from `src/services/embeddings_service.py`.

We'll test each storage type:
1. Memory Storage (Basic functionality)
2. SQLite Storage (Persistent local storage)
3. Cloud Storage (GCS integration)

In [1]:
import logging
import json
from pathlib import Path
from src.config.settings import Settings
from src.services.embeddings_service import EmbeddingsService

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

In [2]:
# Initialize settings for different storage types
memory_settings = Settings(
    EMBEDDINGS_MODEL="sentence-transformers/nli-mpnet-base-v2",
    EMBEDDINGS_STORAGE_TYPE="memory",
    EMBEDDINGS_CONTENT_PATH=":memory:",
)

sqlite_settings = Settings(
    EMBEDDINGS_MODEL="sentence-transformers/nli-mpnet-base-v2",
    EMBEDDINGS_STORAGE_TYPE="sqlite",
    EMBEDDINGS_CONTENT_PATH="txtai/test/content.db",
)

# Cloud settings require proper GCP credentials
cloud_settings = Settings(
    EMBEDDINGS_MODEL="sentence-transformers/nli-mpnet-base-v2",
    EMBEDDINGS_STORAGE_TYPE="cloud",
    GOOGLE_CLOUD_PROJECT="aurite-dev",
    GOOGLE_CLOUD_BUCKET="aurite-txtai-dev",
    GOOGLE_APPLICATION_CREDENTIALS="/home/wilcoxr/workspace/txtai-aurite/config/service-account.json",
    EMBEDDINGS_PREFIX="txtai/test",  # Prefix for cloud storage path
)

In [3]:
test_docs = [
    {
        "id": "doc1",
        "text": "Technical document about database indexing",
        "metadata": {
            "type": "technical",
            "tags": ["database", "index"],
            "version": 1.0,
        },
    },
    {
        "id": "doc2",
        "text": "Guide to cloud storage systems",
        "metadata": {"type": "guide", "tags": ["cloud", "storage"], "version": 1.0},
    },
]

In [4]:
# Test memory storage
logger.info("Testing memory storage...")
memory_service = EmbeddingsService(memory_settings)
memory_service.create_index("memory")
memory_service.add_documents(test_docs)

INFO:__main__:Testing memory storage...
INFO:src.services.embeddings_service:Creating new embeddings index with memory storage
INFO:src.services.embeddings_service:Adding 2 documents to index


In [5]:
# Test SQLite storage
logger.info("Testing SQLite storage...")
sqlite_service = EmbeddingsService(sqlite_settings)
sqlite_service.create_index("sqlite", "txtai/test/content.db")
sqlite_service.add_documents(test_docs)

INFO:__main__:Testing SQLite storage...
INFO:src.services.embeddings_service:Creating new embeddings index with sqlite storage
INFO:src.services.embeddings_service:Adding 2 documents to index


In [6]:
# Test cloud storage
logger.info("Testing cloud storage...")
cloud_service = EmbeddingsService(cloud_settings)
cloud_service.create_index("cloud")
cloud_service.add_documents(test_docs)

# Test cloud persistence
cloud_path = f"{cloud_settings.EMBEDDINGS_PREFIX}/test_index"
logger.info(f"Saving index to cloud: {cloud_path}")
cloud_service.save_index(cloud_path)

# Create new service and load from cloud
logger.info("Loading index from cloud")
new_cloud_service = EmbeddingsService(cloud_settings)
new_cloud_service.create_index("cloud")
new_cloud_service.load_index(cloud_path)

INFO:__main__:Testing cloud storage...
INFO:src.services.embeddings_service:Creating new embeddings index with cloud storage
INFO:src.services.embeddings_service:Adding 2 documents to index
INFO:__main__:Saving index to cloud: txtai/test/test_index
INFO:src.services.embeddings_service:Saving embeddings index to txtai/test/test_index
INFO:__main__:Loading index from cloud
INFO:src.services.embeddings_service:Creating new embeddings index with cloud storage
INFO:src.services.embeddings_service:Loading embeddings index from txtai/test/test_index


In [7]:
# Test save/load functionality
logger.info("Testing index persistence...")
save_path = Path("txtai/test/saved_index")
memory_service.save_index(str(save_path))

new_service = EmbeddingsService(memory_settings)
new_service.create_index("memory")
new_service.load_index(str(save_path))

INFO:__main__:Testing index persistence...
INFO:src.services.embeddings_service:Saving embeddings index to txtai/test/saved_index
INFO:src.services.embeddings_service:Creating new embeddings index with memory storage
INFO:src.services.embeddings_service:Loading embeddings index from txtai/test/saved_index
